miri/alloc_addresses/
mod.rs

1//! This module is responsible for managing the absolute addresses that allocations are located at,
2//! and for casting between pointers and integers based on those addresses.
3
4mod reuse_pool;
5
6use std::cell::RefCell;
7use std::cmp::max;
8
9use rand::Rng;
10use rustc_abi::{Align, Size};
11use rustc_data_structures::fx::{FxHashMap, FxHashSet};
12
13use self::reuse_pool::ReusePool;
14use crate::concurrency::VClock;
15use crate::*;
16
17#[derive(Copy, Clone, Debug, PartialEq, Eq)]
18pub enum ProvenanceMode {
19    /// We support `expose_provenance`/`with_exposed_provenance` via "wildcard" provenance.
20    /// However, we warn on `with_exposed_provenance` to alert the user of the precision loss.
21    Default,
22    /// Like `Default`, but without the warning.
23    Permissive,
24    /// We error on `with_exposed_provenance`, ensuring no precision loss.
25    Strict,
26}
27
28pub type GlobalState = RefCell<GlobalStateInner>;
29
30#[derive(Debug)]
31pub struct GlobalStateInner {
32    /// This is used as a map between the address of each allocation and its `AllocId`. It is always
33    /// sorted by address. We cannot use a `HashMap` since we can be given an address that is offset
34    /// from the base address, and we need to find the `AllocId` it belongs to. This is not the
35    /// *full* inverse of `base_addr`; dead allocations have been removed.
36    int_to_ptr_map: Vec<(u64, AllocId)>,
37    /// The base address for each allocation.  We cannot put that into
38    /// `AllocExtra` because function pointers also have a base address, and
39    /// they do not have an `AllocExtra`.
40    /// This is the inverse of `int_to_ptr_map`.
41    base_addr: FxHashMap<AllocId, u64>,
42    /// Temporarily store prepared memory space for global allocations the first time their memory
43    /// address is required. This is used to ensure that the memory is allocated before Miri assigns
44    /// it an internal address, which is important for matching the internal address to the machine
45    /// address so FFI can read from pointers.
46    prepared_alloc_bytes: FxHashMap<AllocId, MiriAllocBytes>,
47    /// A pool of addresses we can reuse for future allocations.
48    reuse: ReusePool,
49    /// Whether an allocation has been exposed or not. This cannot be put
50    /// into `AllocExtra` for the same reason as `base_addr`.
51    exposed: FxHashSet<AllocId>,
52    /// This is used as a memory address when a new pointer is casted to an integer. It
53    /// is always larger than any address that was previously made part of a block.
54    next_base_addr: u64,
55    /// The provenance to use for int2ptr casts
56    provenance_mode: ProvenanceMode,
57}
58
59impl VisitProvenance for GlobalStateInner {
60    fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
61        let GlobalStateInner {
62            int_to_ptr_map: _,
63            base_addr: _,
64            prepared_alloc_bytes: _,
65            reuse: _,
66            exposed: _,
67            next_base_addr: _,
68            provenance_mode: _,
69        } = self;
70        // Though base_addr, int_to_ptr_map, and exposed contain AllocIds, we do not want to visit them.
71        // int_to_ptr_map and exposed must contain only live allocations, and those
72        // are never garbage collected.
73        // base_addr is only relevant if we have a pointer to an AllocId and need to look up its
74        // base address; so if an AllocId is not reachable from somewhere else we can remove it
75        // here.
76    }
77}
78
79impl GlobalStateInner {
80    pub fn new(config: &MiriConfig, stack_addr: u64) -> Self {
81        GlobalStateInner {
82            int_to_ptr_map: Vec::default(),
83            base_addr: FxHashMap::default(),
84            prepared_alloc_bytes: FxHashMap::default(),
85            reuse: ReusePool::new(config),
86            exposed: FxHashSet::default(),
87            next_base_addr: stack_addr,
88            provenance_mode: config.provenance_mode,
89        }
90    }
91
92    pub fn remove_unreachable_allocs(&mut self, allocs: &LiveAllocs<'_, '_>) {
93        // `exposed` and `int_to_ptr_map` are cleared immediately when an allocation
94        // is freed, so `base_addr` is the only one we have to clean up based on the GC.
95        self.base_addr.retain(|id, _| allocs.is_live(*id));
96    }
97}
98
99/// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple
100/// of `align` that is larger or equal to `addr`
101fn align_addr(addr: u64, align: u64) -> u64 {
102    match addr % align {
103        0 => addr,
104        rem => addr.strict_add(align) - rem,
105    }
106}
107
108impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
109trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
110    fn addr_from_alloc_id_uncached(
111        &self,
112        global_state: &mut GlobalStateInner,
113        alloc_id: AllocId,
114        memory_kind: MemoryKind,
115    ) -> InterpResult<'tcx, u64> {
116        let this = self.eval_context_ref();
117        let info = this.get_alloc_info(alloc_id);
118
119        // This is either called immediately after allocation (and then cached), or when
120        // adjusting `tcx` pointers (which never get freed). So assert that we are looking
121        // at a live allocation. This also ensures that we never re-assign an address to an
122        // allocation that previously had an address, but then was freed and the address
123        // information was removed.
124        assert!(!matches!(info.kind, AllocKind::Dead));
125
126        // TypeId allocations always have a "base address" of 0 (i.e., the relative offset is the
127        // hash fragment and therefore equal to the actual integer value).
128        if matches!(info.kind, AllocKind::TypeId) {
129            return interp_ok(0);
130        }
131
132        // Miri's address assignment leaks state across thread boundaries, which is incompatible
133        // with GenMC execution. So we instead let GenMC assign addresses to allocations.
134        if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
135            let addr = genmc_ctx.handle_alloc(&this.machine, info.size, info.align, memory_kind)?;
136            return interp_ok(addr);
137        }
138
139        // This allocation does not have a base address yet, pick or reuse one.
140        if !this.machine.native_lib.is_empty() {
141            // In native lib mode, we use the "real" address of the bytes for this allocation.
142            // This ensures the interpreted program and native code have the same view of memory.
143            let params = this.machine.get_default_alloc_params();
144            let base_ptr = match info.kind {
145                AllocKind::LiveData => {
146                    if memory_kind == MiriMemoryKind::Global.into() {
147                        // For new global allocations, we always pre-allocate the memory to be able use the machine address directly.
148                        let prepared_bytes = MiriAllocBytes::zeroed(info.size, info.align, params)
149                            .unwrap_or_else(|| {
150                                panic!("Miri ran out of memory: cannot create allocation of {size:?} bytes", size = info.size)
151                            });
152                        let ptr = prepared_bytes.as_ptr();
153                        // Store prepared allocation to be picked up for use later.
154                        global_state
155                            .prepared_alloc_bytes
156                            .try_insert(alloc_id, prepared_bytes)
157                            .unwrap();
158                        ptr
159                    } else {
160                        // Non-global allocations are already in memory at this point so
161                        // we can just get a pointer to where their data is stored.
162                        this.get_alloc_bytes_unchecked_raw(alloc_id)?
163                    }
164                }
165                AllocKind::Function | AllocKind::VTable => {
166                    // Allocate some dummy memory to get a unique address for this function/vtable.
167                    let alloc_bytes = MiriAllocBytes::from_bytes(
168                        &[0u8; 1],
169                        Align::from_bytes(1).unwrap(),
170                        params,
171                    );
172                    let ptr = alloc_bytes.as_ptr();
173                    // Leak the underlying memory to ensure it remains unique.
174                    std::mem::forget(alloc_bytes);
175                    ptr
176                }
177                AllocKind::TypeId | AllocKind::Dead => unreachable!(),
178            };
179            // We don't have to expose this pointer yet, we do that in `prepare_for_native_call`.
180            return interp_ok(base_ptr.addr().to_u64());
181        }
182        // We are not in native lib mode, so we control the addresses ourselves.
183        let mut rng = this.machine.rng.borrow_mut();
184        if let Some((reuse_addr, clock)) = global_state.reuse.take_addr(
185            &mut *rng,
186            info.size,
187            info.align,
188            memory_kind,
189            this.active_thread(),
190        ) {
191            if let Some(clock) = clock {
192                this.acquire_clock(&clock);
193            }
194            interp_ok(reuse_addr)
195        } else {
196            // We have to pick a fresh address.
197            // Leave some space to the previous allocation, to give it some chance to be less aligned.
198            // We ensure that `(global_state.next_base_addr + slack) % 16` is uniformly distributed.
199            let slack = rng.random_range(0..16);
200            // From next_base_addr + slack, round up to adjust for alignment.
201            let base_addr = global_state
202                .next_base_addr
203                .checked_add(slack)
204                .ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
205            let base_addr = align_addr(base_addr, info.align.bytes());
206
207            // Remember next base address.  If this allocation is zero-sized, leave a gap of at
208            // least 1 to avoid two allocations having the same base address. (The logic in
209            // `alloc_id_from_addr` assumes unique addresses, and different function/vtable pointers
210            // need to be distinguishable!)
211            global_state.next_base_addr = base_addr
212                .checked_add(max(info.size.bytes(), 1))
213                .ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
214            // Even if `Size` didn't overflow, we might still have filled up the address space.
215            if global_state.next_base_addr > this.target_usize_max() {
216                throw_exhaust!(AddressSpaceFull);
217            }
218            // If we filled up more than half the address space, start aggressively reusing
219            // addresses to avoid running out.
220            if global_state.next_base_addr > u64::try_from(this.target_isize_max()).unwrap() {
221                global_state.reuse.address_space_shortage();
222            }
223
224            interp_ok(base_addr)
225        }
226    }
227}
228
229impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
230pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
231    // Returns the `AllocId` that corresponds to the specified addr,
232    // or `None` if the addr is out of bounds.
233    // Setting `only_exposed_allocations` selects whether only exposed allocations are considered.
234    fn alloc_id_from_addr(
235        &self,
236        addr: u64,
237        size: i64,
238        only_exposed_allocations: bool,
239    ) -> Option<AllocId> {
240        let this = self.eval_context_ref();
241        let global_state = this.machine.alloc_addresses.borrow();
242        assert!(global_state.provenance_mode != ProvenanceMode::Strict);
243
244        // We always search the allocation to the right of this address. So if the size is strictly
245        // negative, we have to search for `addr-1` instead.
246        let addr = if size >= 0 { addr } else { addr.saturating_sub(1) };
247        let pos = global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr);
248
249        // Determine the in-bounds provenance for this pointer.
250        let alloc_id = match pos {
251            Ok(pos) => Some(global_state.int_to_ptr_map[pos].1),
252            Err(0) => None,
253            Err(pos) => {
254                // This is the largest of the addresses smaller than `int`,
255                // i.e. the greatest lower bound (glb)
256                let (glb, alloc_id) = global_state.int_to_ptr_map[pos - 1];
257                // This never overflows because `addr >= glb`
258                let offset = addr - glb;
259                // We require this to be strict in-bounds of the allocation. This arm is only
260                // entered for addresses that are not the base address, so even zero-sized
261                // allocations will get recognized at their base address -- but all other
262                // allocations will *not* be recognized at their "end" address.
263                let size = this.get_alloc_info(alloc_id).size;
264                if offset < size.bytes() { Some(alloc_id) } else { None }
265            }
266        }?;
267
268        // We only use this provenance if it has been exposed, or if the caller requested also non-exposed allocations
269        if !only_exposed_allocations || global_state.exposed.contains(&alloc_id) {
270            // This must still be live, since we remove allocations from `int_to_ptr_map` when they get freed.
271            debug_assert!(this.is_alloc_live(alloc_id));
272            Some(alloc_id)
273        } else {
274            None
275        }
276    }
277
278    /// Returns the base address of an allocation, or an error if no base address could be found
279    ///
280    /// # Panics
281    /// If `memory_kind = None` and the `alloc_id` is not cached, meaning that the first call to this function per `alloc_id` must get the `memory_kind`.
282    fn addr_from_alloc_id(
283        &self,
284        alloc_id: AllocId,
285        memory_kind: Option<MemoryKind>,
286    ) -> InterpResult<'tcx, u64> {
287        let this = self.eval_context_ref();
288        let mut global_state = this.machine.alloc_addresses.borrow_mut();
289        let global_state = &mut *global_state;
290
291        match global_state.base_addr.get(&alloc_id) {
292            Some(&addr) => interp_ok(addr),
293            None => {
294                // First time we're looking for the absolute address of this allocation.
295                let memory_kind =
296                    memory_kind.expect("memory_kind is required since alloc_id is not cached");
297                let base_addr =
298                    this.addr_from_alloc_id_uncached(global_state, alloc_id, memory_kind)?;
299                trace!("Assigning base address {:#x} to allocation {:?}", base_addr, alloc_id);
300
301                // Store address in cache.
302                global_state.base_addr.try_insert(alloc_id, base_addr).unwrap();
303
304                // Also maintain the opposite mapping in `int_to_ptr_map`, ensuring we keep it
305                // sorted. We have a fast-path for the common case that this address is bigger than
306                // all previous ones. We skip this for allocations at address 0; those can't be
307                // real, they must be TypeId "fake allocations".
308                if base_addr != 0 {
309                    let pos = if global_state
310                        .int_to_ptr_map
311                        .last()
312                        .is_some_and(|(last_addr, _)| *last_addr < base_addr)
313                    {
314                        global_state.int_to_ptr_map.len()
315                    } else {
316                        global_state
317                            .int_to_ptr_map
318                            .binary_search_by_key(&base_addr, |(addr, _)| *addr)
319                            .unwrap_err()
320                    };
321                    global_state.int_to_ptr_map.insert(pos, (base_addr, alloc_id));
322                }
323
324                interp_ok(base_addr)
325            }
326        }
327    }
328
329    fn expose_provenance(&self, provenance: Provenance) -> InterpResult<'tcx> {
330        let this = self.eval_context_ref();
331        let mut global_state = this.machine.alloc_addresses.borrow_mut();
332
333        let (alloc_id, tag) = match provenance {
334            Provenance::Concrete { alloc_id, tag } => (alloc_id, tag),
335            Provenance::Wildcard => {
336                // No need to do anything for wildcard pointers as
337                // their provenances have already been previously exposed.
338                return interp_ok(());
339            }
340        };
341
342        // In strict mode, we don't need this, so we can save some cycles by not tracking it.
343        if global_state.provenance_mode == ProvenanceMode::Strict {
344            return interp_ok(());
345        }
346        // Exposing a dead alloc is a no-op, because it's not possible to get a dead allocation
347        // via int2ptr.
348        if !this.is_alloc_live(alloc_id) {
349            return interp_ok(());
350        }
351        trace!("Exposing allocation id {alloc_id:?}");
352        global_state.exposed.insert(alloc_id);
353        // Release the global state before we call `expose_tag`, which may call `get_alloc_info_extra`,
354        // which may need access to the global state.
355        drop(global_state);
356        if this.machine.borrow_tracker.is_some() {
357            this.expose_tag(alloc_id, tag)?;
358        }
359        interp_ok(())
360    }
361
362    fn ptr_from_addr_cast(&self, addr: u64) -> InterpResult<'tcx, Pointer> {
363        trace!("Casting {:#x} to a pointer", addr);
364
365        let this = self.eval_context_ref();
366        let global_state = this.machine.alloc_addresses.borrow();
367
368        // Potentially emit a warning.
369        match global_state.provenance_mode {
370            ProvenanceMode::Default => {
371                // The first time this happens at a particular location, print a warning.
372                let mut int2ptr_warned = this.machine.int2ptr_warned.borrow_mut();
373                let first = int2ptr_warned.is_empty();
374                if int2ptr_warned.insert(this.cur_span()) {
375                    // Newly inserted, so first time we see this span.
376                    this.emit_diagnostic(NonHaltingDiagnostic::Int2Ptr { details: first });
377                }
378            }
379            ProvenanceMode::Strict => {
380                throw_machine_stop!(TerminationInfo::Int2PtrWithStrictProvenance);
381            }
382            ProvenanceMode::Permissive => {}
383        }
384
385        // We do *not* look up the `AllocId` here! This is a `ptr as usize` cast, and it is
386        // completely legal to do a cast and then `wrapping_offset` to another allocation and only
387        // *then* do a memory access. So the allocation that the pointer happens to point to on a
388        // cast is fairly irrelevant. Instead we generate this as a "wildcard" pointer, such that
389        // *every time the pointer is used*, we do an `AllocId` lookup to find the (exposed)
390        // allocation it might be referencing.
391        interp_ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr)))
392    }
393
394    /// Convert a relative (tcx) pointer to a Miri pointer.
395    fn adjust_alloc_root_pointer(
396        &self,
397        ptr: interpret::Pointer<CtfeProvenance>,
398        tag: BorTag,
399        kind: MemoryKind,
400    ) -> InterpResult<'tcx, interpret::Pointer<Provenance>> {
401        let this = self.eval_context_ref();
402
403        let (prov, offset) = ptr.prov_and_relative_offset();
404        let alloc_id = prov.alloc_id();
405
406        // Get a pointer to the beginning of this allocation.
407        let base_addr = this.addr_from_alloc_id(alloc_id, Some(kind))?;
408        let base_ptr = interpret::Pointer::new(
409            Provenance::Concrete { alloc_id, tag },
410            Size::from_bytes(base_addr),
411        );
412        // Add offset with the right kind of pointer-overflowing arithmetic.
413        interp_ok(base_ptr.wrapping_offset(offset, this))
414    }
415
416    // This returns some prepared `MiriAllocBytes`, either because `addr_from_alloc_id` reserved
417    // memory space in the past, or by doing the pre-allocation right upon being called.
418    fn get_global_alloc_bytes(
419        &self,
420        id: AllocId,
421        bytes: &[u8],
422        align: Align,
423    ) -> InterpResult<'tcx, MiriAllocBytes> {
424        let this = self.eval_context_ref();
425        assert!(this.tcx.try_get_global_alloc(id).is_some());
426        if !this.machine.native_lib.is_empty() {
427            // In native lib mode, MiriAllocBytes for global allocations are handled via `prepared_alloc_bytes`.
428            // This additional call ensures that some `MiriAllocBytes` are always prepared, just in case
429            // this function gets called before the first time `addr_from_alloc_id` gets called.
430            this.addr_from_alloc_id(id, Some(MiriMemoryKind::Global.into()))?;
431            // The memory we need here will have already been allocated during an earlier call to
432            // `addr_from_alloc_id` for this allocation. So don't create a new `MiriAllocBytes` here, instead
433            // fetch the previously prepared bytes from `prepared_alloc_bytes`.
434            let mut global_state = this.machine.alloc_addresses.borrow_mut();
435            let mut prepared_alloc_bytes = global_state
436                .prepared_alloc_bytes
437                .remove(&id)
438                .unwrap_or_else(|| panic!("alloc bytes for {id:?} have not been prepared"));
439            // Sanity-check that the prepared allocation has the right size and alignment.
440            assert!(prepared_alloc_bytes.as_ptr().is_aligned_to(align.bytes_usize()));
441            assert_eq!(prepared_alloc_bytes.len(), bytes.len());
442            // Copy allocation contents into prepared memory.
443            prepared_alloc_bytes.copy_from_slice(bytes);
444            interp_ok(prepared_alloc_bytes)
445        } else {
446            let params = this.machine.get_default_alloc_params();
447            interp_ok(MiriAllocBytes::from_bytes(std::borrow::Cow::Borrowed(bytes), align, params))
448        }
449    }
450
451    /// When a pointer is used for a memory access, this computes where in which allocation the
452    /// access is going.
453    fn ptr_get_alloc(
454        &self,
455        ptr: interpret::Pointer<Provenance>,
456        size: i64,
457    ) -> Option<(AllocId, Size)> {
458        let this = self.eval_context_ref();
459
460        let (tag, addr) = ptr.into_raw_parts(); // addr is absolute (Miri provenance)
461
462        let alloc_id = if let Provenance::Concrete { alloc_id, .. } = tag {
463            alloc_id
464        } else {
465            // A wildcard pointer.
466            let only_exposed_allocations = true;
467            this.alloc_id_from_addr(addr.bytes(), size, only_exposed_allocations)?
468        };
469
470        // This cannot fail: since we already have a pointer with that provenance, adjust_alloc_root_pointer
471        // must have been called in the past, so we can just look up the address in the map.
472        let base_addr = *this.machine.alloc_addresses.borrow().base_addr.get(&alloc_id).unwrap();
473
474        // Wrapping "addr - base_addr"
475        let rel_offset = this.truncate_to_target_usize(addr.bytes().wrapping_sub(base_addr));
476        Some((alloc_id, Size::from_bytes(rel_offset)))
477    }
478
479    /// Return a list of all exposed allocations.
480    fn exposed_allocs(&self) -> Vec<AllocId> {
481        let this = self.eval_context_ref();
482        this.machine.alloc_addresses.borrow().exposed.iter().copied().collect()
483    }
484}
485
486impl<'tcx> MiriMachine<'tcx> {
487    pub fn free_alloc_id(&mut self, dead_id: AllocId, size: Size, align: Align, kind: MemoryKind) {
488        let global_state = self.alloc_addresses.get_mut();
489        let rng = self.rng.get_mut();
490
491        // We can *not* remove this from `base_addr`, since the interpreter design requires that we
492        // be able to retrieve an AllocId + offset for any memory access *before* we check if the
493        // access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory
494        // access to determine the allocation ID and offset -- and there can still be pointers with
495        // `dead_id` that one can attempt to use for a memory access. `ptr_get_alloc` may return
496        // `None` only if the pointer truly has no provenance (this ensures consistent error
497        // messages).
498        // However, we *can* remove it from `int_to_ptr_map`, since any wildcard pointers that exist
499        // can no longer actually be accessing that address. This ensures `alloc_id_from_addr` never
500        // returns a dead allocation.
501        // To avoid a linear scan we first look up the address in `base_addr`, and then find it in
502        // `int_to_ptr_map`.
503        let addr = *global_state.base_addr.get(&dead_id).unwrap();
504        let pos =
505            global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr).unwrap();
506        let removed = global_state.int_to_ptr_map.remove(pos);
507        assert_eq!(removed, (addr, dead_id)); // double-check that we removed the right thing
508        // We can also remove it from `exposed`, since this allocation can anyway not be returned by
509        // `alloc_id_from_addr` any more.
510        global_state.exposed.remove(&dead_id);
511        // Also remember this address for future reuse.
512        let thread = self.threads.active_thread();
513        global_state.reuse.add_addr(rng, addr, size, align, kind, thread, || {
514            if let Some(data_race) = self.data_race.as_vclocks_ref() {
515                data_race.release_clock(&self.threads, |clock| clock.clone())
516            } else {
517                VClock::default()
518            }
519        })
520    }
521}
522
523#[cfg(test)]
524mod tests {
525    use super::*;
526
527    #[test]
528    fn test_align_addr() {
529        assert_eq!(align_addr(37, 4), 40);
530        assert_eq!(align_addr(44, 4), 44);
531    }
532}