miri/alloc/
isolated_alloc.rs

1use std::alloc::{self, Layout};
2
3use rustc_index::bit_set::DenseBitSet;
4
5/// How many bytes of memory each bit in the bitset represents.
6const COMPRESSION_FACTOR: usize = 4;
7
8/// A dedicated allocator for interpreter memory contents, ensuring they are stored on dedicated
9/// pages (not mixed with Miri's own memory). This is used in native-lib mode.
10#[derive(Debug)]
11pub struct IsolatedAlloc {
12    /// Pointers to page-aligned memory that has been claimed by the allocator.
13    /// Every pointer here must point to a page-sized allocation claimed via
14    /// the global allocator. These pointers are used for "small" allocations.
15    page_ptrs: Vec<*mut u8>,
16    /// Metadata about which bytes have been allocated on each page. The length
17    /// of this vector must be the same as that of `page_ptrs`, and the domain
18    /// size of the bitset must be exactly `page_size / COMPRESSION_FACTOR`.
19    ///
20    /// Conceptually, each bit of the bitset represents the allocation status of
21    /// one n-byte chunk on the corresponding element of `page_ptrs`. Thus,
22    /// indexing into it should be done with a value one-nth of the corresponding
23    /// offset on the matching `page_ptrs` element (n = `COMPRESSION_FACTOR`).
24    page_infos: Vec<DenseBitSet<usize>>,
25    /// Pointers to multiple-page-sized allocations. These must also be page-aligned,
26    /// with their size stored as the second element of the vector.
27    huge_ptrs: Vec<(*mut u8, usize)>,
28    /// The host (not emulated) page size.
29    page_size: usize,
30}
31
32impl IsolatedAlloc {
33    /// Creates an empty allocator.
34    pub fn new() -> Self {
35        Self {
36            page_ptrs: Vec::new(),
37            huge_ptrs: Vec::new(),
38            page_infos: Vec::new(),
39            // SAFETY: `sysconf(_SC_PAGESIZE)` is always safe to call at runtime
40            // See https://www.man7.org/linux/man-pages/man3/sysconf.3.html
41            page_size: unsafe { libc::sysconf(libc::_SC_PAGESIZE).try_into().unwrap() },
42        }
43    }
44
45    /// For simplicity, we serve small allocations in multiples of COMPRESSION_FACTOR
46    /// bytes with at least that alignment.
47    #[inline]
48    fn normalized_layout(layout: Layout) -> Layout {
49        let align =
50            if layout.align() < COMPRESSION_FACTOR { COMPRESSION_FACTOR } else { layout.align() };
51        let size = layout.size().next_multiple_of(COMPRESSION_FACTOR);
52        Layout::from_size_align(size, align).unwrap()
53    }
54
55    /// Returns the layout used to allocate the pages that hold small allocations.
56    #[inline]
57    fn page_layout(&self) -> Layout {
58        Layout::from_size_align(self.page_size, self.page_size).unwrap()
59    }
60
61    /// If the allocation is greater than a page, then round to the nearest page #.
62    #[inline]
63    fn huge_normalized_layout(layout: Layout, page_size: usize) -> Layout {
64        // Allocate in page-sized chunks
65        let size = layout.size().next_multiple_of(page_size);
66        // And make sure the align is at least one page
67        let align = std::cmp::max(layout.align(), page_size);
68        Layout::from_size_align(size, align).unwrap()
69    }
70
71    /// Determined whether a given normalized (size, align) should be sent to
72    /// `alloc_huge` / `dealloc_huge`.
73    #[inline]
74    fn is_huge_alloc(&self, layout: &Layout) -> bool {
75        layout.align() > self.page_size / 2 || layout.size() >= self.page_size / 2
76    }
77
78    /// Allocates memory as described in `Layout`. This memory should be deallocated
79    /// by calling `dealloc` on this same allocator.
80    ///
81    /// SAFETY: See `alloc::alloc()`
82    pub unsafe fn alloc(&mut self, layout: Layout) -> *mut u8 {
83        // SAFETY: Upheld by caller
84        unsafe { self.allocate(layout, false) }
85    }
86
87    /// Same as `alloc`, but zeroes out the memory.
88    ///
89    /// SAFETY: See `alloc::alloc_zeroed()`
90    pub unsafe fn alloc_zeroed(&mut self, layout: Layout) -> *mut u8 {
91        // SAFETY: Upheld by caller
92        unsafe { self.allocate(layout, true) }
93    }
94
95    /// Abstracts over the logic of `alloc_zeroed` vs `alloc`, as determined by
96    /// the `zeroed` argument.
97    ///
98    /// SAFETY: See `alloc::alloc()`, with the added restriction that `page_size`
99    /// corresponds to the host pagesize.
100    unsafe fn allocate(&mut self, layout: Layout, zeroed: bool) -> *mut u8 {
101        let layout = IsolatedAlloc::normalized_layout(layout);
102        if self.is_huge_alloc(&layout) {
103            // SAFETY: Validity of `layout` upheld by caller; we checked that
104            // the size and alignment are appropriate for being a huge alloc
105            unsafe { self.alloc_huge(layout, zeroed) }
106        } else {
107            for (&mut page, pinfo) in std::iter::zip(&mut self.page_ptrs, &mut self.page_infos) {
108                // SAFETY: The value in `self.page_size` is used to allocate
109                // `page`, with page alignment
110                if let Some(ptr) =
111                    unsafe { Self::alloc_small(self.page_size, layout, page, pinfo, zeroed) }
112                {
113                    return ptr;
114                }
115            }
116
117            // We get here only if there's no space in our existing pages
118            let page_size = self.page_size;
119            // Add another page and allocate from it; this cannot fail since the
120            // new page is empty and we already asserted it fits into a page
121            let (page, pinfo) = self.add_page();
122
123            // SAFETY: See comment on `alloc_from_page` above
124            unsafe { Self::alloc_small(page_size, layout, page, pinfo, zeroed).unwrap() }
125        }
126    }
127
128    /// Used internally by `allocate` to abstract over some logic.
129    ///
130    /// SAFETY: `page` must be a page-aligned pointer to an allocated page,
131    /// where the allocation is (at least) `page_size` bytes.
132    unsafe fn alloc_small(
133        page_size: usize,
134        layout: Layout,
135        page: *mut u8,
136        pinfo: &mut DenseBitSet<usize>,
137        zeroed: bool,
138    ) -> Option<*mut u8> {
139        // Check every alignment-sized block and see if there exists a `size`
140        // chunk of empty space i.e. forall idx . !pinfo.contains(idx / n)
141        for offset in (0..page_size).step_by(layout.align()) {
142            let offset_pinfo = offset / COMPRESSION_FACTOR;
143            let size_pinfo = layout.size() / COMPRESSION_FACTOR;
144            // DenseBitSet::contains() panics if the index is out of bounds
145            if pinfo.domain_size() < offset_pinfo + size_pinfo {
146                break;
147            }
148            // FIXME: is there a more efficient way to check whether the entire range is unset
149            // in the bitset?
150            let range_avail = !(offset_pinfo..offset_pinfo + size_pinfo).any(|i| pinfo.contains(i));
151            if range_avail {
152                pinfo.insert_range(offset_pinfo..offset_pinfo + size_pinfo);
153                // SAFETY: We checked the available bytes after `idx` in the call
154                // to `domain_size` above and asserted there are at least `idx +
155                // layout.size()` bytes available and unallocated after it.
156                // `page` must point to the start of the page, so adding `idx`
157                // is safe per the above.
158                unsafe {
159                    let ptr = page.add(offset);
160                    if zeroed {
161                        // Only write the bytes we were specifically asked to
162                        // zero out, even if we allocated more
163                        ptr.write_bytes(0, layout.size());
164                    }
165                    return Some(ptr);
166                }
167            }
168        }
169        None
170    }
171
172    /// Expands the available memory pool by adding one page.
173    fn add_page(&mut self) -> (*mut u8, &mut DenseBitSet<usize>) {
174        // SAFETY: The system page size, which is the layout size, cannot be 0
175        let page_ptr = unsafe { alloc::alloc(self.page_layout()) };
176        // `page_infos` has to have one bit for each `COMPRESSION_FACTOR`-sized chunk of bytes in the page.
177        assert!(self.page_size % COMPRESSION_FACTOR == 0);
178        self.page_infos.push(DenseBitSet::new_empty(self.page_size / COMPRESSION_FACTOR));
179        self.page_ptrs.push(page_ptr);
180        (page_ptr, self.page_infos.last_mut().unwrap())
181    }
182
183    /// Allocates in multiples of one page on the host system.
184    ///
185    /// SAFETY: Same as `alloc()`.
186    unsafe fn alloc_huge(&mut self, layout: Layout, zeroed: bool) -> *mut u8 {
187        let layout = IsolatedAlloc::huge_normalized_layout(layout, self.page_size);
188        // SAFETY: Upheld by caller
189        let ret =
190            unsafe { if zeroed { alloc::alloc_zeroed(layout) } else { alloc::alloc(layout) } };
191        self.huge_ptrs.push((ret, layout.size()));
192        ret
193    }
194
195    /// Deallocates a pointer from this allocator.
196    ///
197    /// SAFETY: This pointer must have been allocated by calling `alloc()` (or
198    /// `alloc_zeroed()`) with the same layout as the one passed on this same
199    /// `IsolatedAlloc`.
200    pub unsafe fn dealloc(&mut self, ptr: *mut u8, layout: Layout) {
201        let layout = IsolatedAlloc::normalized_layout(layout);
202
203        if self.is_huge_alloc(&layout) {
204            // SAFETY: Partly upheld by caller, and we checked that the size
205            // and align, meaning this must have been allocated via `alloc_huge`
206            unsafe {
207                self.dealloc_huge(ptr, layout);
208            }
209        } else {
210            // SAFETY: It's not a huge allocation, therefore it is a small one.
211            let idx = unsafe { self.dealloc_small(ptr, layout) };
212
213            // This may have been the last allocation on this page. If so, free the entire page.
214            // FIXME: this can lead to threshold effects, we should probably add some form
215            // of hysteresis.
216            if self.page_infos[idx].is_empty() {
217                self.page_infos.remove(idx);
218                let page_ptr = self.page_ptrs.remove(idx);
219                // SAFETY: We checked that there are no outstanding allocations
220                // from us pointing to this page, and we know it was allocated
221                // with this layout
222                unsafe {
223                    alloc::dealloc(page_ptr, self.page_layout());
224                }
225            }
226        }
227    }
228
229    /// Returns the index of the page that this was deallocated from
230    ///
231    /// SAFETY: the pointer must have been allocated with `alloc_small`.
232    unsafe fn dealloc_small(&mut self, ptr: *mut u8, layout: Layout) -> usize {
233        // Offset of the pointer in the current page
234        let offset = ptr.addr() % self.page_size;
235        // And then the page's base address
236        let page_addr = ptr.addr() - offset;
237
238        // Find the page this allocation belongs to.
239        // This could be made faster if the list was sorted -- the allocator isn't fully optimized at the moment.
240        let pinfo = std::iter::zip(&mut self.page_ptrs, &mut self.page_infos)
241            .enumerate()
242            .find(|(_, (page, _))| page.addr() == page_addr);
243        let Some((idx_of_pinfo, (_, pinfo))) = pinfo else {
244            panic!("Freeing in an unallocated page: {ptr:?}\nHolding pages {:?}", self.page_ptrs)
245        };
246        // Mark this range as available in the page.
247        let ptr_idx_pinfo = offset / COMPRESSION_FACTOR;
248        let size_pinfo = layout.size() / COMPRESSION_FACTOR;
249        for idx in ptr_idx_pinfo..ptr_idx_pinfo + size_pinfo {
250            pinfo.remove(idx);
251        }
252        idx_of_pinfo
253    }
254
255    /// SAFETY: Same as `dealloc()` with the added requirement that `layout`
256    /// must ask for a size larger than the host pagesize.
257    unsafe fn dealloc_huge(&mut self, ptr: *mut u8, layout: Layout) {
258        let layout = IsolatedAlloc::huge_normalized_layout(layout, self.page_size);
259        // Find the pointer matching in address with the one we got
260        let idx = self
261            .huge_ptrs
262            .iter()
263            .position(|pg| ptr.addr() == pg.0.addr())
264            .expect("Freeing unallocated pages");
265        // And kick it from the list
266        self.huge_ptrs.remove(idx);
267        // SAFETY: Caller ensures validity of the layout
268        unsafe {
269            alloc::dealloc(ptr, layout);
270        }
271    }
272}
273
274#[cfg(test)]
275mod tests {
276    use super::*;
277
278    /// Helper function to assert that all bytes from `ptr` to `ptr.add(layout.size())`
279    /// are zeroes.
280    ///
281    /// SAFETY: `ptr` must have been allocated with `layout`.
282    unsafe fn assert_zeroes(ptr: *mut u8, layout: Layout) {
283        // SAFETY: Caller ensures this is valid
284        unsafe {
285            for ofs in 0..layout.size() {
286                assert_eq!(0, ptr.add(ofs).read());
287            }
288        }
289    }
290
291    /// Check that small (sub-pagesize) allocations are properly zeroed out.
292    #[test]
293    fn small_zeroes() {
294        let mut alloc = IsolatedAlloc::new();
295        // 256 should be less than the pagesize on *any* system
296        let layout = Layout::from_size_align(256, 32).unwrap();
297        // SAFETY: layout size is the constant above, not 0
298        let ptr = unsafe { alloc.alloc_zeroed(layout) };
299        // SAFETY: `ptr` was just allocated with `layout`
300        unsafe {
301            assert_zeroes(ptr, layout);
302            alloc.dealloc(ptr, layout);
303        }
304    }
305
306    /// Check that huge (> 1 page) allocations are properly zeroed out also.
307    #[test]
308    fn huge_zeroes() {
309        let mut alloc = IsolatedAlloc::new();
310        // 16k is about as big as pages get e.g. on macos aarch64
311        let layout = Layout::from_size_align(16 * 1024, 128).unwrap();
312        // SAFETY: layout size is the constant above, not 0
313        let ptr = unsafe { alloc.alloc_zeroed(layout) };
314        // SAFETY: `ptr` was just allocated with `layout`
315        unsafe {
316            assert_zeroes(ptr, layout);
317            alloc.dealloc(ptr, layout);
318        }
319    }
320
321    /// Check that repeatedly reallocating the same memory will still zero out
322    /// everything properly
323    #[test]
324    fn repeated_allocs() {
325        let mut alloc = IsolatedAlloc::new();
326        // Try both sub-pagesize allocs and those larger than / equal to a page
327        for sz in (1..=(16 * 1024)).step_by(128) {
328            let layout = Layout::from_size_align(sz, 1).unwrap();
329            // SAFETY: all sizes in the range above are nonzero as we start from 1
330            let ptr = unsafe { alloc.alloc_zeroed(layout) };
331            // SAFETY: `ptr` was just allocated with `layout`, which was used
332            // to bound the access size
333            unsafe {
334                assert_zeroes(ptr, layout);
335                ptr.write_bytes(255, sz);
336                alloc.dealloc(ptr, layout);
337            }
338        }
339    }
340
341    /// Checks that allocations of different sizes do not overlap, then for memory
342    /// leaks that might have occurred.
343    #[test]
344    fn check_leaks_and_overlaps() {
345        let mut alloc = IsolatedAlloc::new();
346
347        // Some random sizes and aligns
348        let mut sizes = vec![32; 10];
349        sizes.append(&mut vec![15; 4]);
350        sizes.append(&mut vec![256; 12]);
351        // Give it some multi-page ones too
352        sizes.append(&mut vec![32 * 1024; 4]);
353
354        // Matching aligns for the sizes
355        let mut aligns = vec![16; 12];
356        aligns.append(&mut vec![256; 2]);
357        aligns.append(&mut vec![64; 12]);
358        aligns.append(&mut vec![4096; 4]);
359
360        // Make sure we didn't mess up in the test itself!
361        assert_eq!(sizes.len(), aligns.len());
362
363        // Aggregate the sizes and aligns into a vec of layouts, then allocate them
364        let layouts: Vec<_> = std::iter::zip(sizes, aligns)
365            .map(|(sz, al)| Layout::from_size_align(sz, al).unwrap())
366            .collect();
367        // SAFETY: all sizes specified in `sizes` are nonzero
368        let ptrs: Vec<_> =
369            layouts.iter().map(|layout| unsafe { alloc.alloc_zeroed(*layout) }).collect();
370
371        for (&ptr, &layout) in std::iter::zip(&ptrs, &layouts) {
372            // We requested zeroed allocations, so check that that's true
373            // Then write to the end of the current size, so if the allocs
374            // overlap (or the zeroing is wrong) then `assert_zeroes` will panic.
375            // Also check that the alignment we asked for was respected
376            assert_eq!(ptr.addr().strict_rem(layout.align()), 0);
377            // SAFETY: each `ptr` was allocated with its corresponding `layout`,
378            // which is used to bound the access size
379            unsafe {
380                assert_zeroes(ptr, layout);
381                ptr.write_bytes(255, layout.size());
382                alloc.dealloc(ptr, layout);
383            }
384        }
385
386        // And then verify that no memory was leaked after all that
387        assert!(alloc.page_ptrs.is_empty() && alloc.huge_ptrs.is_empty());
388    }
389}