miri/shims/native_lib/
mod.rs

1//! Implements calling functions from a native library.
2
3use std::ops::Deref;
4
5use libffi::low::CodePtr;
6use libffi::middle::Type as FfiType;
7use rustc_abi::{HasDataLayout, Size};
8use rustc_middle::ty::{self as ty, IntTy, Ty, UintTy};
9use rustc_span::Symbol;
10use serde::{Deserialize, Serialize};
11
12mod ffi;
13
14#[cfg_attr(
15    not(all(
16        target_os = "linux",
17        target_env = "gnu",
18        any(target_arch = "x86", target_arch = "x86_64")
19    )),
20    path = "trace/stub.rs"
21)]
22pub mod trace;
23
24use self::ffi::OwnedArg;
25use crate::*;
26
27/// The final results of an FFI trace, containing every relevant event detected
28/// by the tracer.
29#[derive(Serialize, Deserialize, Debug)]
30pub struct MemEvents {
31    /// An list of memory accesses that occurred, in the order they occurred in.
32    pub acc_events: Vec<AccessEvent>,
33}
34
35/// A single memory access.
36#[derive(Serialize, Deserialize, Clone, Debug)]
37pub enum AccessEvent {
38    /// A read occurred on this memory range.
39    Read(AccessRange),
40    /// A write may have occurred on this memory range.
41    /// Some instructions *may* write memory without *always* doing that,
42    /// so this can be an over-approximation.
43    /// The range info, however, is reliable if the access did happen.
44    /// If the second field is true, the access definitely happened.
45    Write(AccessRange, bool),
46}
47
48impl AccessEvent {
49    fn get_range(&self) -> AccessRange {
50        match self {
51            AccessEvent::Read(access_range) => access_range.clone(),
52            AccessEvent::Write(access_range, _) => access_range.clone(),
53        }
54    }
55}
56
57/// The memory touched by a given access.
58#[derive(Serialize, Deserialize, Clone, Debug)]
59pub struct AccessRange {
60    /// The base address in memory where an access occurred.
61    pub addr: usize,
62    /// The number of bytes affected from the base.
63    pub size: usize,
64}
65
66impl AccessRange {
67    fn end(&self) -> usize {
68        self.addr.strict_add(self.size)
69    }
70}
71
72impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
73trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
74    /// Call native host function and return the output as an immediate.
75    fn call_native_with_args(
76        &mut self,
77        link_name: Symbol,
78        dest: &MPlaceTy<'tcx>,
79        fun: CodePtr,
80        libffi_args: &mut [OwnedArg],
81    ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option<MemEvents>)> {
82        let this = self.eval_context_mut();
83        #[cfg(target_os = "linux")]
84        let alloc = this.machine.allocator.as_ref().unwrap();
85        #[cfg(not(target_os = "linux"))]
86        // Placeholder value.
87        let alloc = ();
88
89        trace::Supervisor::do_ffi(alloc, || {
90            // Call the function (`ptr`) with arguments `libffi_args`, and obtain the return value
91            // as the specified primitive integer type
92            let scalar = match dest.layout.ty.kind() {
93                // ints
94                ty::Int(IntTy::I8) => {
95                    // Unsafe because of the call to native code.
96                    // Because this is calling a C function it is not necessarily sound,
97                    // but there is no way around this and we've checked as much as we can.
98                    let x = unsafe { ffi::call::<i8>(fun, libffi_args) };
99                    Scalar::from_i8(x)
100                }
101                ty::Int(IntTy::I16) => {
102                    let x = unsafe { ffi::call::<i16>(fun, libffi_args) };
103                    Scalar::from_i16(x)
104                }
105                ty::Int(IntTy::I32) => {
106                    let x = unsafe { ffi::call::<i32>(fun, libffi_args) };
107                    Scalar::from_i32(x)
108                }
109                ty::Int(IntTy::I64) => {
110                    let x = unsafe { ffi::call::<i64>(fun, libffi_args) };
111                    Scalar::from_i64(x)
112                }
113                ty::Int(IntTy::Isize) => {
114                    let x = unsafe { ffi::call::<isize>(fun, libffi_args) };
115                    Scalar::from_target_isize(x.try_into().unwrap(), this)
116                }
117                // uints
118                ty::Uint(UintTy::U8) => {
119                    let x = unsafe { ffi::call::<u8>(fun, libffi_args) };
120                    Scalar::from_u8(x)
121                }
122                ty::Uint(UintTy::U16) => {
123                    let x = unsafe { ffi::call::<u16>(fun, libffi_args) };
124                    Scalar::from_u16(x)
125                }
126                ty::Uint(UintTy::U32) => {
127                    let x = unsafe { ffi::call::<u32>(fun, libffi_args) };
128                    Scalar::from_u32(x)
129                }
130                ty::Uint(UintTy::U64) => {
131                    let x = unsafe { ffi::call::<u64>(fun, libffi_args) };
132                    Scalar::from_u64(x)
133                }
134                ty::Uint(UintTy::Usize) => {
135                    let x = unsafe { ffi::call::<usize>(fun, libffi_args) };
136                    Scalar::from_target_usize(x.try_into().unwrap(), this)
137                }
138                // Functions with no declared return type (i.e., the default return)
139                // have the output_type `Tuple([])`.
140                ty::Tuple(t_list) if (*t_list).deref().is_empty() => {
141                    unsafe { ffi::call::<()>(fun, libffi_args) };
142                    return interp_ok(ImmTy::uninit(dest.layout));
143                }
144                ty::RawPtr(..) => {
145                    let x = unsafe { ffi::call::<*const ()>(fun, libffi_args) };
146                    let ptr = StrictPointer::new(Provenance::Wildcard, Size::from_bytes(x.addr()));
147                    Scalar::from_pointer(ptr, this)
148                }
149                _ =>
150                    return Err(err_unsup_format!(
151                        "unsupported return type for native call: {:?}",
152                        link_name
153                    ))
154                    .into(),
155            };
156            interp_ok(ImmTy::from_scalar(scalar, dest.layout))
157        })
158    }
159
160    /// Get the pointer to the function of the specified name in the shared object file,
161    /// if it exists. The function must be in one of the shared object files specified:
162    /// we do *not* return pointers to functions in dependencies of libraries.
163    fn get_func_ptr_explicitly_from_lib(&mut self, link_name: Symbol) -> Option<CodePtr> {
164        let this = self.eval_context_mut();
165        // Try getting the function from one of the shared libraries.
166        for (lib, lib_path) in &this.machine.native_lib {
167            let Ok(func): Result<libloading::Symbol<'_, unsafe extern "C" fn()>, _> =
168                (unsafe { lib.get(link_name.as_str().as_bytes()) })
169            else {
170                continue;
171            };
172            #[expect(clippy::as_conversions)] // fn-ptr to raw-ptr cast needs `as`.
173            let fn_ptr = *func.deref() as *mut std::ffi::c_void;
174
175            // FIXME: this is a hack!
176            // The `libloading` crate will automatically load system libraries like `libc`.
177            // On linux `libloading` is based on `dlsym`: https://docs.rs/libloading/0.7.3/src/libloading/os/unix/mod.rs.html#202
178            // and `dlsym`(https://linux.die.net/man/3/dlsym) looks through the dependency tree of the
179            // library if it can't find the symbol in the library itself.
180            // So, in order to check if the function was actually found in the specified
181            // `machine.external_so_lib` we need to check its `dli_fname` and compare it to
182            // the specified SO file path.
183            // This code is a reimplementation of the mechanism for getting `dli_fname` in `libloading`,
184            // from: https://docs.rs/libloading/0.7.3/src/libloading/os/unix/mod.rs.html#411
185            // using the `libc` crate where this interface is public.
186            let mut info = std::mem::MaybeUninit::<libc::Dl_info>::zeroed();
187            unsafe {
188                let res = libc::dladdr(fn_ptr, info.as_mut_ptr());
189                assert!(res != 0, "failed to load info about function we already loaded");
190                let info = info.assume_init();
191                #[cfg(target_os = "cygwin")]
192                let fname_ptr = info.dli_fname.as_ptr();
193                #[cfg(not(target_os = "cygwin"))]
194                let fname_ptr = info.dli_fname;
195                assert!(!fname_ptr.is_null());
196                if std::ffi::CStr::from_ptr(fname_ptr).to_str().unwrap()
197                    != lib_path.to_str().unwrap()
198                {
199                    // The function is not actually in this .so, check the next one.
200                    continue;
201                }
202            }
203
204            // Return a pointer to the function.
205            return Some(CodePtr(fn_ptr));
206        }
207        None
208    }
209
210    /// Applies the `events` to Miri's internal state. The event vector must be
211    /// ordered sequentially by when the accesses happened, and the sizes are
212    /// assumed to be exact.
213    fn tracing_apply_accesses(&mut self, events: MemEvents) -> InterpResult<'tcx> {
214        let this = self.eval_context_mut();
215
216        for evt in events.acc_events {
217            let evt_rg = evt.get_range();
218            // LLVM at least permits vectorising accesses to adjacent allocations,
219            // so we cannot assume 1 access = 1 allocation. :(
220            let mut rg = evt_rg.addr..evt_rg.end();
221            while let Some(curr) = rg.next() {
222                let Some(alloc_id) = this.alloc_id_from_addr(
223                    curr.to_u64(),
224                    rg.len().try_into().unwrap(),
225                    /* only_exposed_allocations */ true,
226                ) else {
227                    throw_ub_format!("Foreign code did an out-of-bounds access!")
228                };
229                let alloc = this.get_alloc_raw(alloc_id)?;
230                // The logical and physical address of the allocation coincide, so we can use
231                // this instead of `addr_from_alloc_id`.
232                let alloc_addr = alloc.get_bytes_unchecked_raw().addr();
233
234                // Determine the range inside the allocation that this access covers. This range is
235                // in terms of offsets from the start of `alloc`. The start of the overlap range
236                // will be `curr`; the end will be the minimum of the end of the allocation and the
237                // end of the access' range.
238                let overlap = curr.strict_sub(alloc_addr)
239                    ..std::cmp::min(alloc.len(), rg.end.strict_sub(alloc_addr));
240                // Skip forward however many bytes of the access are contained in the current
241                // allocation, subtracting 1 since the overlap range includes the current addr
242                // that was already popped off of the range.
243                rg.advance_by(overlap.len().strict_sub(1)).unwrap();
244
245                match evt {
246                    AccessEvent::Read(_) => {
247                        // If a provenance was read by the foreign code, expose it.
248                        for prov in alloc.provenance().get_range(this, overlap.into()) {
249                            this.expose_provenance(prov)?;
250                        }
251                    }
252                    AccessEvent::Write(_, certain) => {
253                        // Sometimes we aren't certain if a write happened, in which case we
254                        // only initialise that data if the allocation is mutable.
255                        if certain || alloc.mutability.is_mut() {
256                            let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
257                            alloc.process_native_write(
258                                &cx.tcx,
259                                Some(AllocRange {
260                                    start: Size::from_bytes(overlap.start),
261                                    size: Size::from_bytes(overlap.len()),
262                                }),
263                            )
264                        }
265                    }
266                }
267            }
268        }
269
270        interp_ok(())
271    }
272
273    /// Extract the value from the result of reading an operand from the machine
274    /// and convert it to a `OwnedArg`.
275    fn op_to_ffi_arg(&self, v: &OpTy<'tcx>, tracing: bool) -> InterpResult<'tcx, OwnedArg> {
276        let this = self.eval_context_ref();
277
278        // This should go first so that we emit unsupported before doing a bunch
279        // of extra work for types that aren't supported yet.
280        let ty = this.ty_to_ffitype(v.layout.ty)?;
281
282        // Helper to print a warning when a pointer is shared with the native code.
283        let expose = |prov: Provenance| -> InterpResult<'tcx> {
284            // The first time this happens, print a warning.
285            if !this.machine.native_call_mem_warned.replace(true) {
286                // Newly set, so first time we get here.
287                this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { tracing });
288            }
289
290            this.expose_provenance(prov)?;
291            interp_ok(())
292        };
293
294        // Compute the byte-level representation of the argument. If there's a pointer in there, we
295        // expose it inside the AM. Later in `visit_reachable_allocs`, the "meta"-level provenance
296        // for accessing the pointee gets exposed; this is crucial to justify the C code effectively
297        // casting the integer in `byte` to a pointer and using that.
298        let bytes = match v.as_mplace_or_imm() {
299            either::Either::Left(mplace) => {
300                // Get the alloc id corresponding to this mplace, alongside
301                // a pointer that's offset to point to this particular
302                // mplace (not one at the base addr of the allocation).
303                let sz = mplace.layout.size.bytes_usize();
304                if sz == 0 {
305                    throw_unsup_format!("attempting to pass a ZST over FFI");
306                }
307                let (id, ofs, _) = this.ptr_get_alloc_id(mplace.ptr(), sz.try_into().unwrap())?;
308                let ofs = ofs.bytes_usize();
309                let range = ofs..ofs.strict_add(sz);
310                // Expose all provenances in the allocation within the byte range of the struct, if
311                // any. These pointers are being directly passed to native code by-value.
312                let alloc = this.get_alloc_raw(id)?;
313                for prov in alloc.provenance().get_range(this, range.clone().into()) {
314                    expose(prov)?;
315                }
316                // Read the bytes that make up this argument. We cannot use the normal getter as
317                // those would fail if any part of the argument is uninitialized. Native code
318                // is kind of outside the interpreter, after all...
319                Box::from(alloc.inspect_with_uninit_and_ptr_outside_interpreter(range))
320            }
321            either::Either::Right(imm) => {
322                let mut bytes: Box<[u8]> = vec![0; imm.layout.size.bytes_usize()].into();
323
324                // A little helper to write scalars to our byte array.
325                let mut write_scalar = |this: &MiriInterpCx<'tcx>, sc: Scalar, pos: usize| {
326                    // If a scalar is a pointer, then expose its provenance.
327                    if let interpret::Scalar::Ptr(p, _) = sc {
328                        expose(p.provenance)?;
329                    }
330                    write_target_uint(
331                        this.data_layout().endian,
332                        &mut bytes[pos..][..sc.size().bytes_usize()],
333                        sc.to_scalar_int()?.to_bits_unchecked(),
334                    )
335                    .unwrap();
336                    interp_ok(())
337                };
338
339                // Write the scalar into the `bytes` buffer.
340                match *imm {
341                    Immediate::Scalar(sc) => write_scalar(this, sc, 0)?,
342                    Immediate::ScalarPair(sc_first, sc_second) => {
343                        // The first scalar has an offset of zero; compute the offset of the 2nd.
344                        let ofs_second = {
345                            let rustc_abi::BackendRepr::ScalarPair(a, b) = imm.layout.backend_repr
346                            else {
347                                span_bug!(
348                                    this.cur_span(),
349                                    "op_to_ffi_arg: invalid scalar pair layout: {:#?}",
350                                    imm.layout
351                                )
352                            };
353                            a.size(this).align_to(b.align(this).abi).bytes_usize()
354                        };
355
356                        write_scalar(this, sc_first, 0)?;
357                        write_scalar(this, sc_second, ofs_second)?;
358                    }
359                    Immediate::Uninit => {
360                        // Nothing to write.
361                    }
362                }
363
364                bytes
365            }
366        };
367        interp_ok(OwnedArg::new(ty, bytes))
368    }
369
370    /// Parses an ADT to construct the matching libffi type.
371    fn adt_to_ffitype(
372        &self,
373        orig_ty: Ty<'_>,
374        adt_def: ty::AdtDef<'tcx>,
375        args: &'tcx ty::List<ty::GenericArg<'tcx>>,
376    ) -> InterpResult<'tcx, FfiType> {
377        // TODO: Certain non-C reprs should be okay also.
378        if !adt_def.repr().c() {
379            throw_unsup_format!("passing a non-#[repr(C)] struct over FFI: {orig_ty}")
380        }
381        // TODO: unions, etc.
382        if !adt_def.is_struct() {
383            throw_unsup_format!(
384                "unsupported argument type for native call: {orig_ty} is an enum or union"
385            );
386        }
387
388        let this = self.eval_context_ref();
389        let mut fields = vec![];
390        for field in &adt_def.non_enum_variant().fields {
391            fields.push(this.ty_to_ffitype(field.ty(*this.tcx, args))?);
392        }
393
394        interp_ok(FfiType::structure(fields))
395    }
396
397    /// Gets the matching libffi type for a given Ty.
398    fn ty_to_ffitype(&self, ty: Ty<'tcx>) -> InterpResult<'tcx, FfiType> {
399        interp_ok(match ty.kind() {
400            ty::Int(IntTy::I8) => FfiType::i8(),
401            ty::Int(IntTy::I16) => FfiType::i16(),
402            ty::Int(IntTy::I32) => FfiType::i32(),
403            ty::Int(IntTy::I64) => FfiType::i64(),
404            ty::Int(IntTy::Isize) => FfiType::isize(),
405            // the uints
406            ty::Uint(UintTy::U8) => FfiType::u8(),
407            ty::Uint(UintTy::U16) => FfiType::u16(),
408            ty::Uint(UintTy::U32) => FfiType::u32(),
409            ty::Uint(UintTy::U64) => FfiType::u64(),
410            ty::Uint(UintTy::Usize) => FfiType::usize(),
411            ty::RawPtr(..) => FfiType::pointer(),
412            ty::Adt(adt_def, args) => self.adt_to_ffitype(ty, *adt_def, args)?,
413            _ => throw_unsup_format!("unsupported argument type for native call: {}", ty),
414        })
415    }
416}
417
418impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
419pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
420    /// Call the native host function, with supplied arguments.
421    /// Needs to convert all the arguments from their Miri representations to
422    /// a native form (through `libffi` call).
423    /// Then, convert the return value from the native form into something that
424    /// can be stored in Miri's internal memory.
425    fn call_native_fn(
426        &mut self,
427        link_name: Symbol,
428        dest: &MPlaceTy<'tcx>,
429        args: &[OpTy<'tcx>],
430    ) -> InterpResult<'tcx, bool> {
431        let this = self.eval_context_mut();
432        // Get the pointer to the function in the shared object file if it exists.
433        let code_ptr = match this.get_func_ptr_explicitly_from_lib(link_name) {
434            Some(ptr) => ptr,
435            None => {
436                // Shared object file does not export this function -- try the shims next.
437                return interp_ok(false);
438            }
439        };
440
441        // Do we have ptrace?
442        let tracing = trace::Supervisor::is_enabled();
443
444        // Get the function arguments, copy them, and prepare the type descriptions.
445        let mut libffi_args = Vec::<OwnedArg>::with_capacity(args.len());
446        for arg in args.iter() {
447            libffi_args.push(this.op_to_ffi_arg(arg, tracing)?);
448        }
449
450        // Prepare all exposed memory (both previously exposed, and just newly exposed since a
451        // pointer was passed as argument). Uninitialised memory is left as-is, but any data
452        // exposed this way is garbage anyway.
453        this.visit_reachable_allocs(this.exposed_allocs(), |this, alloc_id, info| {
454            // If there is no data behind this pointer, skip this.
455            if !matches!(info.kind, AllocKind::LiveData) {
456                return interp_ok(());
457            }
458            // It's okay to get raw access, what we do does not correspond to any actual
459            // AM operation, it just approximates the state to account for the native call.
460            let alloc = this.get_alloc_raw(alloc_id)?;
461            // Also expose the provenance of the interpreter-level allocation, so it can
462            // be read by FFI. The `black_box` is defensive programming as LLVM likes
463            // to (incorrectly) optimize away ptr2int casts whose result is unused.
464            std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());
465
466            if !tracing {
467                // Expose all provenances in this allocation, since the native code can do
468                // $whatever. Can be skipped when tracing; in that case we'll expose just the
469                // actually-read parts later.
470                for prov in alloc.provenance().provenances() {
471                    this.expose_provenance(prov)?;
472                }
473            }
474
475            // Prepare for possible write from native code if mutable.
476            if info.mutbl.is_mut() {
477                let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
478                // These writes could initialize everything and wreck havoc with the pointers.
479                // We can skip that when tracing; in that case we'll later do that only for the
480                // memory that got actually written.
481                if !tracing {
482                    alloc.process_native_write(&cx.tcx, None);
483                }
484                // Also expose *mutable* provenance for the interpreter-level allocation.
485                std::hint::black_box(alloc.get_bytes_unchecked_raw_mut().expose_provenance());
486            }
487
488            interp_ok(())
489        })?;
490
491        // Call the function and store output, depending on return type in the function signature.
492        let (ret, maybe_memevents) =
493            this.call_native_with_args(link_name, dest, code_ptr, &mut libffi_args)?;
494
495        if tracing {
496            this.tracing_apply_accesses(maybe_memevents.unwrap())?;
497        }
498
499        this.write_immediate(*ret, dest)?;
500        interp_ok(true)
501    }
502}