miri/shims/native_lib/mod.rs
1//! Implements calling functions from a native library.
2
3use std::ops::Deref;
4
5use libffi::low::CodePtr;
6use libffi::middle::Type as FfiType;
7use rustc_abi::{HasDataLayout, Size};
8use rustc_middle::ty::{self as ty, IntTy, Ty, UintTy};
9use rustc_span::Symbol;
10use serde::{Deserialize, Serialize};
11
12mod ffi;
13
14#[cfg_attr(
15 not(all(
16 target_os = "linux",
17 target_env = "gnu",
18 any(target_arch = "x86", target_arch = "x86_64")
19 )),
20 path = "trace/stub.rs"
21)]
22pub mod trace;
23
24use self::ffi::OwnedArg;
25use crate::*;
26
27/// The final results of an FFI trace, containing every relevant event detected
28/// by the tracer.
29#[derive(Serialize, Deserialize, Debug)]
30pub struct MemEvents {
31 /// An list of memory accesses that occurred, in the order they occurred in.
32 pub acc_events: Vec<AccessEvent>,
33}
34
35/// A single memory access.
36#[derive(Serialize, Deserialize, Clone, Debug)]
37pub enum AccessEvent {
38 /// A read occurred on this memory range.
39 Read(AccessRange),
40 /// A write may have occurred on this memory range.
41 /// Some instructions *may* write memory without *always* doing that,
42 /// so this can be an over-approximation.
43 /// The range info, however, is reliable if the access did happen.
44 /// If the second field is true, the access definitely happened.
45 Write(AccessRange, bool),
46}
47
48impl AccessEvent {
49 fn get_range(&self) -> AccessRange {
50 match self {
51 AccessEvent::Read(access_range) => access_range.clone(),
52 AccessEvent::Write(access_range, _) => access_range.clone(),
53 }
54 }
55}
56
57/// The memory touched by a given access.
58#[derive(Serialize, Deserialize, Clone, Debug)]
59pub struct AccessRange {
60 /// The base address in memory where an access occurred.
61 pub addr: usize,
62 /// The number of bytes affected from the base.
63 pub size: usize,
64}
65
66impl AccessRange {
67 fn end(&self) -> usize {
68 self.addr.strict_add(self.size)
69 }
70}
71
72impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
73trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
74 /// Call native host function and return the output as an immediate.
75 fn call_native_with_args(
76 &mut self,
77 link_name: Symbol,
78 dest: &MPlaceTy<'tcx>,
79 fun: CodePtr,
80 libffi_args: &mut [OwnedArg],
81 ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option<MemEvents>)> {
82 let this = self.eval_context_mut();
83 #[cfg(target_os = "linux")]
84 let alloc = this.machine.allocator.as_ref().unwrap();
85 #[cfg(not(target_os = "linux"))]
86 // Placeholder value.
87 let alloc = ();
88
89 trace::Supervisor::do_ffi(alloc, || {
90 // Call the function (`ptr`) with arguments `libffi_args`, and obtain the return value
91 // as the specified primitive integer type
92 let scalar = match dest.layout.ty.kind() {
93 // ints
94 ty::Int(IntTy::I8) => {
95 // Unsafe because of the call to native code.
96 // Because this is calling a C function it is not necessarily sound,
97 // but there is no way around this and we've checked as much as we can.
98 let x = unsafe { ffi::call::<i8>(fun, libffi_args) };
99 Scalar::from_i8(x)
100 }
101 ty::Int(IntTy::I16) => {
102 let x = unsafe { ffi::call::<i16>(fun, libffi_args) };
103 Scalar::from_i16(x)
104 }
105 ty::Int(IntTy::I32) => {
106 let x = unsafe { ffi::call::<i32>(fun, libffi_args) };
107 Scalar::from_i32(x)
108 }
109 ty::Int(IntTy::I64) => {
110 let x = unsafe { ffi::call::<i64>(fun, libffi_args) };
111 Scalar::from_i64(x)
112 }
113 ty::Int(IntTy::Isize) => {
114 let x = unsafe { ffi::call::<isize>(fun, libffi_args) };
115 Scalar::from_target_isize(x.try_into().unwrap(), this)
116 }
117 // uints
118 ty::Uint(UintTy::U8) => {
119 let x = unsafe { ffi::call::<u8>(fun, libffi_args) };
120 Scalar::from_u8(x)
121 }
122 ty::Uint(UintTy::U16) => {
123 let x = unsafe { ffi::call::<u16>(fun, libffi_args) };
124 Scalar::from_u16(x)
125 }
126 ty::Uint(UintTy::U32) => {
127 let x = unsafe { ffi::call::<u32>(fun, libffi_args) };
128 Scalar::from_u32(x)
129 }
130 ty::Uint(UintTy::U64) => {
131 let x = unsafe { ffi::call::<u64>(fun, libffi_args) };
132 Scalar::from_u64(x)
133 }
134 ty::Uint(UintTy::Usize) => {
135 let x = unsafe { ffi::call::<usize>(fun, libffi_args) };
136 Scalar::from_target_usize(x.try_into().unwrap(), this)
137 }
138 // Functions with no declared return type (i.e., the default return)
139 // have the output_type `Tuple([])`.
140 ty::Tuple(t_list) if (*t_list).deref().is_empty() => {
141 unsafe { ffi::call::<()>(fun, libffi_args) };
142 return interp_ok(ImmTy::uninit(dest.layout));
143 }
144 ty::RawPtr(..) => {
145 let x = unsafe { ffi::call::<*const ()>(fun, libffi_args) };
146 let ptr = StrictPointer::new(Provenance::Wildcard, Size::from_bytes(x.addr()));
147 Scalar::from_pointer(ptr, this)
148 }
149 _ =>
150 return Err(err_unsup_format!(
151 "unsupported return type for native call: {:?}",
152 link_name
153 ))
154 .into(),
155 };
156 interp_ok(ImmTy::from_scalar(scalar, dest.layout))
157 })
158 }
159
160 /// Get the pointer to the function of the specified name in the shared object file,
161 /// if it exists. The function must be in one of the shared object files specified:
162 /// we do *not* return pointers to functions in dependencies of libraries.
163 fn get_func_ptr_explicitly_from_lib(&mut self, link_name: Symbol) -> Option<CodePtr> {
164 let this = self.eval_context_mut();
165 // Try getting the function from one of the shared libraries.
166 for (lib, lib_path) in &this.machine.native_lib {
167 let Ok(func): Result<libloading::Symbol<'_, unsafe extern "C" fn()>, _> =
168 (unsafe { lib.get(link_name.as_str().as_bytes()) })
169 else {
170 continue;
171 };
172 #[expect(clippy::as_conversions)] // fn-ptr to raw-ptr cast needs `as`.
173 let fn_ptr = *func.deref() as *mut std::ffi::c_void;
174
175 // FIXME: this is a hack!
176 // The `libloading` crate will automatically load system libraries like `libc`.
177 // On linux `libloading` is based on `dlsym`: https://docs.rs/libloading/0.7.3/src/libloading/os/unix/mod.rs.html#202
178 // and `dlsym`(https://linux.die.net/man/3/dlsym) looks through the dependency tree of the
179 // library if it can't find the symbol in the library itself.
180 // So, in order to check if the function was actually found in the specified
181 // `machine.external_so_lib` we need to check its `dli_fname` and compare it to
182 // the specified SO file path.
183 // This code is a reimplementation of the mechanism for getting `dli_fname` in `libloading`,
184 // from: https://docs.rs/libloading/0.7.3/src/libloading/os/unix/mod.rs.html#411
185 // using the `libc` crate where this interface is public.
186 let mut info = std::mem::MaybeUninit::<libc::Dl_info>::zeroed();
187 unsafe {
188 let res = libc::dladdr(fn_ptr, info.as_mut_ptr());
189 assert!(res != 0, "failed to load info about function we already loaded");
190 let info = info.assume_init();
191 #[cfg(target_os = "cygwin")]
192 let fname_ptr = info.dli_fname.as_ptr();
193 #[cfg(not(target_os = "cygwin"))]
194 let fname_ptr = info.dli_fname;
195 assert!(!fname_ptr.is_null());
196 if std::ffi::CStr::from_ptr(fname_ptr).to_str().unwrap()
197 != lib_path.to_str().unwrap()
198 {
199 // The function is not actually in this .so, check the next one.
200 continue;
201 }
202 }
203
204 // Return a pointer to the function.
205 return Some(CodePtr(fn_ptr));
206 }
207 None
208 }
209
210 /// Applies the `events` to Miri's internal state. The event vector must be
211 /// ordered sequentially by when the accesses happened, and the sizes are
212 /// assumed to be exact.
213 fn tracing_apply_accesses(&mut self, events: MemEvents) -> InterpResult<'tcx> {
214 let this = self.eval_context_mut();
215
216 for evt in events.acc_events {
217 let evt_rg = evt.get_range();
218 // LLVM at least permits vectorising accesses to adjacent allocations,
219 // so we cannot assume 1 access = 1 allocation. :(
220 let mut rg = evt_rg.addr..evt_rg.end();
221 while let Some(curr) = rg.next() {
222 let Some(alloc_id) = this.alloc_id_from_addr(
223 curr.to_u64(),
224 rg.len().try_into().unwrap(),
225 /* only_exposed_allocations */ true,
226 ) else {
227 throw_ub_format!("Foreign code did an out-of-bounds access!")
228 };
229 let alloc = this.get_alloc_raw(alloc_id)?;
230 // The logical and physical address of the allocation coincide, so we can use
231 // this instead of `addr_from_alloc_id`.
232 let alloc_addr = alloc.get_bytes_unchecked_raw().addr();
233
234 // Determine the range inside the allocation that this access covers. This range is
235 // in terms of offsets from the start of `alloc`. The start of the overlap range
236 // will be `curr`; the end will be the minimum of the end of the allocation and the
237 // end of the access' range.
238 let overlap = curr.strict_sub(alloc_addr)
239 ..std::cmp::min(alloc.len(), rg.end.strict_sub(alloc_addr));
240 // Skip forward however many bytes of the access are contained in the current
241 // allocation, subtracting 1 since the overlap range includes the current addr
242 // that was already popped off of the range.
243 rg.advance_by(overlap.len().strict_sub(1)).unwrap();
244
245 match evt {
246 AccessEvent::Read(_) => {
247 // If a provenance was read by the foreign code, expose it.
248 for prov in alloc.provenance().get_range(this, overlap.into()) {
249 this.expose_provenance(prov)?;
250 }
251 }
252 AccessEvent::Write(_, certain) => {
253 // Sometimes we aren't certain if a write happened, in which case we
254 // only initialise that data if the allocation is mutable.
255 if certain || alloc.mutability.is_mut() {
256 let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
257 alloc.process_native_write(
258 &cx.tcx,
259 Some(AllocRange {
260 start: Size::from_bytes(overlap.start),
261 size: Size::from_bytes(overlap.len()),
262 }),
263 )
264 }
265 }
266 }
267 }
268 }
269
270 interp_ok(())
271 }
272
273 /// Extract the value from the result of reading an operand from the machine
274 /// and convert it to a `OwnedArg`.
275 fn op_to_ffi_arg(&self, v: &OpTy<'tcx>, tracing: bool) -> InterpResult<'tcx, OwnedArg> {
276 let this = self.eval_context_ref();
277
278 // This should go first so that we emit unsupported before doing a bunch
279 // of extra work for types that aren't supported yet.
280 let ty = this.ty_to_ffitype(v.layout.ty)?;
281
282 // Helper to print a warning when a pointer is shared with the native code.
283 let expose = |prov: Provenance| -> InterpResult<'tcx> {
284 // The first time this happens, print a warning.
285 if !this.machine.native_call_mem_warned.replace(true) {
286 // Newly set, so first time we get here.
287 this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { tracing });
288 }
289
290 this.expose_provenance(prov)?;
291 interp_ok(())
292 };
293
294 // Compute the byte-level representation of the argument. If there's a pointer in there, we
295 // expose it inside the AM. Later in `visit_reachable_allocs`, the "meta"-level provenance
296 // for accessing the pointee gets exposed; this is crucial to justify the C code effectively
297 // casting the integer in `byte` to a pointer and using that.
298 let bytes = match v.as_mplace_or_imm() {
299 either::Either::Left(mplace) => {
300 // Get the alloc id corresponding to this mplace, alongside
301 // a pointer that's offset to point to this particular
302 // mplace (not one at the base addr of the allocation).
303 let sz = mplace.layout.size.bytes_usize();
304 if sz == 0 {
305 throw_unsup_format!("attempting to pass a ZST over FFI");
306 }
307 let (id, ofs, _) = this.ptr_get_alloc_id(mplace.ptr(), sz.try_into().unwrap())?;
308 let ofs = ofs.bytes_usize();
309 let range = ofs..ofs.strict_add(sz);
310 // Expose all provenances in the allocation within the byte range of the struct, if
311 // any. These pointers are being directly passed to native code by-value.
312 let alloc = this.get_alloc_raw(id)?;
313 for prov in alloc.provenance().get_range(this, range.clone().into()) {
314 expose(prov)?;
315 }
316 // Read the bytes that make up this argument. We cannot use the normal getter as
317 // those would fail if any part of the argument is uninitialized. Native code
318 // is kind of outside the interpreter, after all...
319 Box::from(alloc.inspect_with_uninit_and_ptr_outside_interpreter(range))
320 }
321 either::Either::Right(imm) => {
322 let mut bytes: Box<[u8]> = vec![0; imm.layout.size.bytes_usize()].into();
323
324 // A little helper to write scalars to our byte array.
325 let mut write_scalar = |this: &MiriInterpCx<'tcx>, sc: Scalar, pos: usize| {
326 // If a scalar is a pointer, then expose its provenance.
327 if let interpret::Scalar::Ptr(p, _) = sc {
328 expose(p.provenance)?;
329 }
330 write_target_uint(
331 this.data_layout().endian,
332 &mut bytes[pos..][..sc.size().bytes_usize()],
333 sc.to_scalar_int()?.to_bits_unchecked(),
334 )
335 .unwrap();
336 interp_ok(())
337 };
338
339 // Write the scalar into the `bytes` buffer.
340 match *imm {
341 Immediate::Scalar(sc) => write_scalar(this, sc, 0)?,
342 Immediate::ScalarPair(sc_first, sc_second) => {
343 // The first scalar has an offset of zero; compute the offset of the 2nd.
344 let ofs_second = {
345 let rustc_abi::BackendRepr::ScalarPair(a, b) = imm.layout.backend_repr
346 else {
347 span_bug!(
348 this.cur_span(),
349 "op_to_ffi_arg: invalid scalar pair layout: {:#?}",
350 imm.layout
351 )
352 };
353 a.size(this).align_to(b.align(this).abi).bytes_usize()
354 };
355
356 write_scalar(this, sc_first, 0)?;
357 write_scalar(this, sc_second, ofs_second)?;
358 }
359 Immediate::Uninit => {
360 // Nothing to write.
361 }
362 }
363
364 bytes
365 }
366 };
367 interp_ok(OwnedArg::new(ty, bytes))
368 }
369
370 /// Parses an ADT to construct the matching libffi type.
371 fn adt_to_ffitype(
372 &self,
373 orig_ty: Ty<'_>,
374 adt_def: ty::AdtDef<'tcx>,
375 args: &'tcx ty::List<ty::GenericArg<'tcx>>,
376 ) -> InterpResult<'tcx, FfiType> {
377 // TODO: Certain non-C reprs should be okay also.
378 if !adt_def.repr().c() {
379 throw_unsup_format!("passing a non-#[repr(C)] struct over FFI: {orig_ty}")
380 }
381 // TODO: unions, etc.
382 if !adt_def.is_struct() {
383 throw_unsup_format!(
384 "unsupported argument type for native call: {orig_ty} is an enum or union"
385 );
386 }
387
388 let this = self.eval_context_ref();
389 let mut fields = vec![];
390 for field in &adt_def.non_enum_variant().fields {
391 fields.push(this.ty_to_ffitype(field.ty(*this.tcx, args))?);
392 }
393
394 interp_ok(FfiType::structure(fields))
395 }
396
397 /// Gets the matching libffi type for a given Ty.
398 fn ty_to_ffitype(&self, ty: Ty<'tcx>) -> InterpResult<'tcx, FfiType> {
399 interp_ok(match ty.kind() {
400 ty::Int(IntTy::I8) => FfiType::i8(),
401 ty::Int(IntTy::I16) => FfiType::i16(),
402 ty::Int(IntTy::I32) => FfiType::i32(),
403 ty::Int(IntTy::I64) => FfiType::i64(),
404 ty::Int(IntTy::Isize) => FfiType::isize(),
405 // the uints
406 ty::Uint(UintTy::U8) => FfiType::u8(),
407 ty::Uint(UintTy::U16) => FfiType::u16(),
408 ty::Uint(UintTy::U32) => FfiType::u32(),
409 ty::Uint(UintTy::U64) => FfiType::u64(),
410 ty::Uint(UintTy::Usize) => FfiType::usize(),
411 ty::RawPtr(..) => FfiType::pointer(),
412 ty::Adt(adt_def, args) => self.adt_to_ffitype(ty, *adt_def, args)?,
413 _ => throw_unsup_format!("unsupported argument type for native call: {}", ty),
414 })
415 }
416}
417
418impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
419pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
420 /// Call the native host function, with supplied arguments.
421 /// Needs to convert all the arguments from their Miri representations to
422 /// a native form (through `libffi` call).
423 /// Then, convert the return value from the native form into something that
424 /// can be stored in Miri's internal memory.
425 fn call_native_fn(
426 &mut self,
427 link_name: Symbol,
428 dest: &MPlaceTy<'tcx>,
429 args: &[OpTy<'tcx>],
430 ) -> InterpResult<'tcx, bool> {
431 let this = self.eval_context_mut();
432 // Get the pointer to the function in the shared object file if it exists.
433 let code_ptr = match this.get_func_ptr_explicitly_from_lib(link_name) {
434 Some(ptr) => ptr,
435 None => {
436 // Shared object file does not export this function -- try the shims next.
437 return interp_ok(false);
438 }
439 };
440
441 // Do we have ptrace?
442 let tracing = trace::Supervisor::is_enabled();
443
444 // Get the function arguments, copy them, and prepare the type descriptions.
445 let mut libffi_args = Vec::<OwnedArg>::with_capacity(args.len());
446 for arg in args.iter() {
447 libffi_args.push(this.op_to_ffi_arg(arg, tracing)?);
448 }
449
450 // Prepare all exposed memory (both previously exposed, and just newly exposed since a
451 // pointer was passed as argument). Uninitialised memory is left as-is, but any data
452 // exposed this way is garbage anyway.
453 this.visit_reachable_allocs(this.exposed_allocs(), |this, alloc_id, info| {
454 // If there is no data behind this pointer, skip this.
455 if !matches!(info.kind, AllocKind::LiveData) {
456 return interp_ok(());
457 }
458 // It's okay to get raw access, what we do does not correspond to any actual
459 // AM operation, it just approximates the state to account for the native call.
460 let alloc = this.get_alloc_raw(alloc_id)?;
461 // Also expose the provenance of the interpreter-level allocation, so it can
462 // be read by FFI. The `black_box` is defensive programming as LLVM likes
463 // to (incorrectly) optimize away ptr2int casts whose result is unused.
464 std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());
465
466 if !tracing {
467 // Expose all provenances in this allocation, since the native code can do
468 // $whatever. Can be skipped when tracing; in that case we'll expose just the
469 // actually-read parts later.
470 for prov in alloc.provenance().provenances() {
471 this.expose_provenance(prov)?;
472 }
473 }
474
475 // Prepare for possible write from native code if mutable.
476 if info.mutbl.is_mut() {
477 let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
478 // These writes could initialize everything and wreck havoc with the pointers.
479 // We can skip that when tracing; in that case we'll later do that only for the
480 // memory that got actually written.
481 if !tracing {
482 alloc.process_native_write(&cx.tcx, None);
483 }
484 // Also expose *mutable* provenance for the interpreter-level allocation.
485 std::hint::black_box(alloc.get_bytes_unchecked_raw_mut().expose_provenance());
486 }
487
488 interp_ok(())
489 })?;
490
491 // Call the function and store output, depending on return type in the function signature.
492 let (ret, maybe_memevents) =
493 this.call_native_with_args(link_name, dest, code_ptr, &mut libffi_args)?;
494
495 if tracing {
496 this.tracing_apply_accesses(maybe_memevents.unwrap())?;
497 }
498
499 this.write_immediate(*ret, dest)?;
500 interp_ok(true)
501 }
502}