miri/
eval.rs

1//! Main evaluator loop and setting up the initial stack frame.
2
3use std::ffi::{OsStr, OsString};
4use std::panic::{self, AssertUnwindSafe};
5use std::path::PathBuf;
6use std::rc::Rc;
7use std::task::Poll;
8use std::{iter, thread};
9
10use rustc_abi::ExternAbi;
11use rustc_data_structures::fx::{FxHashMap, FxHashSet};
12use rustc_hir::def::Namespace;
13use rustc_hir::def_id::DefId;
14use rustc_middle::ty::layout::{HasTyCtxt, HasTypingEnv, LayoutCx};
15use rustc_middle::ty::{self, Ty, TyCtxt};
16use rustc_session::config::EntryFnType;
17
18use crate::concurrency::GenmcCtx;
19use crate::concurrency::thread::TlsAllocAction;
20use crate::diagnostics::report_leaks;
21use crate::shims::{global_ctor, tls};
22use crate::*;
23
24#[derive(Copy, Clone, Debug)]
25pub enum MiriEntryFnType {
26    MiriStart,
27    Rustc(EntryFnType),
28}
29
30/// When the main thread would exit, we will yield to any other thread that is ready to execute.
31/// But we must only do that a finite number of times, or a background thread running `loop {}`
32/// will hang the program.
33const MAIN_THREAD_YIELDS_AT_SHUTDOWN: u32 = 256;
34
35#[derive(Copy, Clone, Debug, PartialEq)]
36pub enum AlignmentCheck {
37    /// Do not check alignment.
38    None,
39    /// Check alignment "symbolically", i.e., using only the requested alignment for an allocation and not its real base address.
40    Symbolic,
41    /// Check alignment on the actual physical integer address.
42    Int,
43}
44
45#[derive(Copy, Clone, Debug, PartialEq)]
46pub enum RejectOpWith {
47    /// Isolated op is rejected with an abort of the machine.
48    Abort,
49
50    /// If not Abort, miri returns an error for an isolated op.
51    /// Following options determine if user should be warned about such error.
52    /// Do not print warning about rejected isolated op.
53    NoWarning,
54
55    /// Print a warning about rejected isolated op, with backtrace.
56    Warning,
57
58    /// Print a warning about rejected isolated op, without backtrace.
59    WarningWithoutBacktrace,
60}
61
62#[derive(Copy, Clone, Debug, PartialEq)]
63pub enum IsolatedOp {
64    /// Reject an op requiring communication with the host. By
65    /// default, miri rejects the op with an abort. If not, it returns
66    /// an error code, and prints a warning about it. Warning levels
67    /// are controlled by `RejectOpWith` enum.
68    Reject(RejectOpWith),
69
70    /// Execute op requiring communication with the host, i.e. disable isolation.
71    Allow,
72}
73
74#[derive(Debug, Copy, Clone, PartialEq, Eq)]
75pub enum BacktraceStyle {
76    /// Prints a terser backtrace which ideally only contains relevant information.
77    Short,
78    /// Prints a backtrace with all possible information.
79    Full,
80    /// Prints only the frame that the error occurs in.
81    Off,
82}
83
84#[derive(Debug, Copy, Clone, PartialEq, Eq)]
85pub enum ValidationMode {
86    /// Do not perform any kind of validation.
87    No,
88    /// Validate the interior of the value, but not things behind references.
89    Shallow,
90    /// Fully recursively validate references.
91    Deep,
92}
93
94/// Configuration needed to spawn a Miri instance.
95#[derive(Clone)]
96pub struct MiriConfig {
97    /// The host environment snapshot to use as basis for what is provided to the interpreted program.
98    /// (This is still subject to isolation as well as `forwarded_env_vars`.)
99    pub env: Vec<(OsString, OsString)>,
100    /// Determine if validity checking is enabled.
101    pub validation: ValidationMode,
102    /// Determines if Stacked Borrows or Tree Borrows is enabled.
103    pub borrow_tracker: Option<BorrowTrackerMethod>,
104    /// Controls alignment checking.
105    pub check_alignment: AlignmentCheck,
106    /// Action for an op requiring communication with the host.
107    pub isolated_op: IsolatedOp,
108    /// Determines if memory leaks should be ignored.
109    pub ignore_leaks: bool,
110    /// Environment variables that should always be forwarded from the host.
111    pub forwarded_env_vars: Vec<String>,
112    /// Additional environment variables that should be set in the interpreted program.
113    pub set_env_vars: FxHashMap<String, String>,
114    /// Command-line arguments passed to the interpreted program.
115    pub args: Vec<String>,
116    /// The seed to use when non-determinism or randomness are required (e.g. ptr-to-int cast, `getrandom()`).
117    pub seed: Option<u64>,
118    /// The stacked borrows pointer ids to report about.
119    pub tracked_pointer_tags: FxHashSet<BorTag>,
120    /// The allocation ids to report about.
121    pub tracked_alloc_ids: FxHashSet<AllocId>,
122    /// For the tracked alloc ids, also report read/write accesses.
123    pub track_alloc_accesses: bool,
124    /// Determine if data race detection should be enabled.
125    pub data_race_detector: bool,
126    /// Determine if weak memory emulation should be enabled. Requires data race detection to be enabled.
127    pub weak_memory_emulation: bool,
128    /// Determine if we are running in GenMC mode and with which settings. In GenMC mode, Miri will explore multiple concurrent executions of the given program.
129    pub genmc_config: Option<GenmcConfig>,
130    /// Track when an outdated (weak memory) load happens.
131    pub track_outdated_loads: bool,
132    /// Rate of spurious failures for compare_exchange_weak atomic operations,
133    /// between 0.0 and 1.0, defaulting to 0.8 (80% chance of failure).
134    pub cmpxchg_weak_failure_rate: f64,
135    /// If `Some`, enable the `measureme` profiler, writing results to a file
136    /// with the specified prefix.
137    pub measureme_out: Option<String>,
138    /// Which style to use for printing backtraces.
139    pub backtrace_style: BacktraceStyle,
140    /// Which provenance to use for int2ptr casts.
141    pub provenance_mode: ProvenanceMode,
142    /// Whether to ignore any output by the program. This is helpful when debugging miri
143    /// as its messages don't get intermingled with the program messages.
144    pub mute_stdout_stderr: bool,
145    /// The probability of the active thread being preempted at the end of each basic block.
146    pub preemption_rate: f64,
147    /// Report the current instruction being executed every N basic blocks.
148    pub report_progress: Option<u32>,
149    /// Whether Stacked Borrows and Tree Borrows retagging should recurse into fields of datatypes.
150    pub retag_fields: RetagFields,
151    /// The location of the shared object files to load when calling external functions
152    pub native_lib: Vec<PathBuf>,
153    /// Whether to enable the new native lib tracing system.
154    pub native_lib_enable_tracing: bool,
155    /// Run a garbage collector for BorTags every N basic blocks.
156    pub gc_interval: u32,
157    /// The number of CPUs to be reported by miri.
158    pub num_cpus: u32,
159    /// Requires Miri to emulate pages of a certain size.
160    pub page_size: Option<u64>,
161    /// Whether to collect a backtrace when each allocation is created, just in case it leaks.
162    pub collect_leak_backtraces: bool,
163    /// Probability for address reuse.
164    pub address_reuse_rate: f64,
165    /// Probability for address reuse across threads.
166    pub address_reuse_cross_thread_rate: f64,
167    /// Round Robin scheduling with no preemption.
168    pub fixed_scheduling: bool,
169    /// Always prefer the intrinsic fallback body over the native Miri implementation.
170    pub force_intrinsic_fallback: bool,
171    /// Whether floating-point operations can behave non-deterministically.
172    pub float_nondet: bool,
173    /// Whether floating-point operations can have a non-deterministic rounding error.
174    pub float_rounding_error: bool,
175}
176
177impl Default for MiriConfig {
178    fn default() -> MiriConfig {
179        MiriConfig {
180            env: vec![],
181            validation: ValidationMode::Shallow,
182            borrow_tracker: Some(BorrowTrackerMethod::StackedBorrows),
183            check_alignment: AlignmentCheck::Int,
184            isolated_op: IsolatedOp::Reject(RejectOpWith::Abort),
185            ignore_leaks: false,
186            forwarded_env_vars: vec![],
187            set_env_vars: FxHashMap::default(),
188            args: vec![],
189            seed: None,
190            tracked_pointer_tags: FxHashSet::default(),
191            tracked_alloc_ids: FxHashSet::default(),
192            track_alloc_accesses: false,
193            data_race_detector: true,
194            weak_memory_emulation: true,
195            genmc_config: None,
196            track_outdated_loads: false,
197            cmpxchg_weak_failure_rate: 0.8, // 80%
198            measureme_out: None,
199            backtrace_style: BacktraceStyle::Short,
200            provenance_mode: ProvenanceMode::Default,
201            mute_stdout_stderr: false,
202            preemption_rate: 0.01, // 1%
203            report_progress: None,
204            retag_fields: RetagFields::Yes,
205            native_lib: vec![],
206            native_lib_enable_tracing: false,
207            gc_interval: 10_000,
208            num_cpus: 1,
209            page_size: None,
210            collect_leak_backtraces: true,
211            address_reuse_rate: 0.5,
212            address_reuse_cross_thread_rate: 0.1,
213            fixed_scheduling: false,
214            force_intrinsic_fallback: false,
215            float_nondet: true,
216            float_rounding_error: true,
217        }
218    }
219}
220
221/// The state of the main thread. Implementation detail of `on_main_stack_empty`.
222#[derive(Debug)]
223enum MainThreadState<'tcx> {
224    GlobalCtors {
225        ctor_state: global_ctor::GlobalCtorState<'tcx>,
226        /// The main function to call.
227        entry_id: DefId,
228        entry_type: MiriEntryFnType,
229        /// Arguments passed to `main`.
230        argc: ImmTy<'tcx>,
231        argv: ImmTy<'tcx>,
232    },
233    Running,
234    TlsDtors(tls::TlsDtorsState<'tcx>),
235    Yield {
236        remaining: u32,
237    },
238    Done,
239}
240
241impl<'tcx> MainThreadState<'tcx> {
242    fn on_main_stack_empty(
243        &mut self,
244        this: &mut MiriInterpCx<'tcx>,
245    ) -> InterpResult<'tcx, Poll<()>> {
246        use MainThreadState::*;
247        match self {
248            GlobalCtors { ctor_state, entry_id, entry_type, argc, argv } => {
249                match ctor_state.on_stack_empty(this)? {
250                    Poll::Pending => {} // just keep going
251                    Poll::Ready(()) => {
252                        call_main(this, *entry_id, *entry_type, argc.clone(), argv.clone())?;
253                        *self = Running;
254                    }
255                }
256            }
257            Running => {
258                *self = TlsDtors(Default::default());
259            }
260            TlsDtors(state) =>
261                match state.on_stack_empty(this)? {
262                    Poll::Pending => {} // just keep going
263                    Poll::Ready(()) => {
264                        if this.machine.data_race.as_genmc_ref().is_some() {
265                            // In GenMC mode, we don't yield at the end of the main thread.
266                            // Instead, the `GenmcCtx` will ensure that unfinished threads get a chance to run at this point.
267                            *self = Done;
268                        } else {
269                            // Give background threads a chance to finish by yielding the main thread a
270                            // couple of times -- but only if we would also preempt threads randomly.
271                            if this.machine.preemption_rate > 0.0 {
272                                // There is a non-zero chance they will yield back to us often enough to
273                                // make Miri terminate eventually.
274                                *self = Yield { remaining: MAIN_THREAD_YIELDS_AT_SHUTDOWN };
275                            } else {
276                                // The other threads did not get preempted, so no need to yield back to
277                                // them.
278                                *self = Done;
279                            }
280                        }
281                    }
282                },
283            Yield { remaining } =>
284                match remaining.checked_sub(1) {
285                    None => *self = Done,
286                    Some(new_remaining) => {
287                        *remaining = new_remaining;
288                        this.yield_active_thread();
289                    }
290                },
291            Done => {
292                // Figure out exit code.
293                let ret_place = this.machine.main_fn_ret_place.clone().unwrap();
294                let exit_code = this.read_target_isize(&ret_place)?;
295                // Rust uses `isize` but the underlying type of an exit code is `i32`.
296                // Do a saturating cast.
297                let exit_code = i32::try_from(exit_code).unwrap_or(if exit_code >= 0 {
298                    i32::MAX
299                } else {
300                    i32::MIN
301                });
302                // Deal with our thread-local memory. We do *not* want to actually free it, instead we consider TLS
303                // to be like a global `static`, so that all memory reached by it is considered to "not leak".
304                this.terminate_active_thread(TlsAllocAction::Leak)?;
305
306                // Stop interpreter loop.
307                throw_machine_stop!(TerminationInfo::Exit { code: exit_code, leak_check: true });
308            }
309        }
310        interp_ok(Poll::Pending)
311    }
312}
313
314/// Returns a freshly created `InterpCx`.
315/// Public because this is also used by `priroda`.
316pub fn create_ecx<'tcx>(
317    tcx: TyCtxt<'tcx>,
318    entry_id: DefId,
319    entry_type: MiriEntryFnType,
320    config: &MiriConfig,
321    genmc_ctx: Option<Rc<GenmcCtx>>,
322) -> InterpResult<'tcx, InterpCx<'tcx, MiriMachine<'tcx>>> {
323    let typing_env = ty::TypingEnv::fully_monomorphized();
324    let layout_cx = LayoutCx::new(tcx, typing_env);
325    let mut ecx = InterpCx::new(
326        tcx,
327        rustc_span::DUMMY_SP,
328        typing_env,
329        MiriMachine::new(config, layout_cx, genmc_ctx),
330    );
331
332    // Make sure we have MIR. We check MIR for some stable monomorphic function in libcore.
333    let sentinel =
334        helpers::try_resolve_path(tcx, &["core", "ascii", "escape_default"], Namespace::ValueNS);
335    if !matches!(sentinel, Some(s) if tcx.is_mir_available(s.def.def_id())) {
336        tcx.dcx().fatal(
337            "the current sysroot was built without `-Zalways-encode-mir`, or libcore seems missing.\n\
338            Note that directly invoking the `miri` binary is not supported; please use `cargo miri` instead."
339        );
340    }
341
342    // Compute argc and argv from `config.args`.
343    let argc =
344        ImmTy::from_int(i64::try_from(config.args.len()).unwrap(), ecx.machine.layouts.isize);
345    let argv = {
346        // Put each argument in memory, collect pointers.
347        let mut argvs = Vec::<Immediate<Provenance>>::with_capacity(config.args.len());
348        for arg in config.args.iter() {
349            // Make space for `0` terminator.
350            let size = u64::try_from(arg.len()).unwrap().strict_add(1);
351            let arg_type = Ty::new_array(tcx, tcx.types.u8, size);
352            let arg_place =
353                ecx.allocate(ecx.layout_of(arg_type)?, MiriMemoryKind::Machine.into())?;
354            ecx.write_os_str_to_c_str(OsStr::new(arg), arg_place.ptr(), size)?;
355            ecx.mark_immutable(&arg_place);
356            argvs.push(arg_place.to_ref(&ecx));
357        }
358        // Make an array with all these pointers, in the Miri memory.
359        let u8_ptr_type = Ty::new_imm_ptr(tcx, tcx.types.u8);
360        let u8_ptr_ptr_type = Ty::new_imm_ptr(tcx, u8_ptr_type);
361        let argvs_layout =
362            ecx.layout_of(Ty::new_array(tcx, u8_ptr_type, u64::try_from(argvs.len()).unwrap()))?;
363        let argvs_place = ecx.allocate(argvs_layout, MiriMemoryKind::Machine.into())?;
364        for (arg, idx) in argvs.into_iter().zip(0..) {
365            let place = ecx.project_index(&argvs_place, idx)?;
366            ecx.write_immediate(arg, &place)?;
367        }
368        ecx.mark_immutable(&argvs_place);
369        // Store `argc` and `argv` for macOS `_NSGetArg{c,v}`, and for the GC to see them.
370        {
371            let argc_place =
372                ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?;
373            ecx.write_immediate(*argc, &argc_place)?;
374            ecx.mark_immutable(&argc_place);
375            ecx.machine.argc = Some(argc_place.ptr());
376
377            let argv_place =
378                ecx.allocate(ecx.layout_of(u8_ptr_ptr_type)?, MiriMemoryKind::Machine.into())?;
379            ecx.write_pointer(argvs_place.ptr(), &argv_place)?;
380            ecx.mark_immutable(&argv_place);
381            ecx.machine.argv = Some(argv_place.ptr());
382        }
383        // Store command line as UTF-16 for Windows `GetCommandLineW`.
384        if tcx.sess.target.os == "windows" {
385            // Construct a command string with all the arguments.
386            let cmd_utf16: Vec<u16> = args_to_utf16_command_string(config.args.iter());
387
388            let cmd_type =
389                Ty::new_array(tcx, tcx.types.u16, u64::try_from(cmd_utf16.len()).unwrap());
390            let cmd_place =
391                ecx.allocate(ecx.layout_of(cmd_type)?, MiriMemoryKind::Machine.into())?;
392            ecx.machine.cmd_line = Some(cmd_place.ptr());
393            // Store the UTF-16 string. We just allocated so we know the bounds are fine.
394            for (&c, idx) in cmd_utf16.iter().zip(0..) {
395                let place = ecx.project_index(&cmd_place, idx)?;
396                ecx.write_scalar(Scalar::from_u16(c), &place)?;
397            }
398            ecx.mark_immutable(&cmd_place);
399        }
400        let imm = argvs_place.to_ref(&ecx);
401        let layout = ecx.layout_of(u8_ptr_ptr_type)?;
402        ImmTy::from_immediate(imm, layout)
403    };
404
405    // Some parts of initialization require a full `InterpCx`.
406    MiriMachine::late_init(&mut ecx, config, {
407        let mut main_thread_state = MainThreadState::GlobalCtors {
408            entry_id,
409            entry_type,
410            argc,
411            argv,
412            ctor_state: global_ctor::GlobalCtorState::default(),
413        };
414
415        // Cannot capture anything GC-relevant here.
416        // `argc` and `argv` *are* GC_relevant, but they also get stored in `machine.argc` and
417        // `machine.argv` so we are good.
418        Box::new(move |m| main_thread_state.on_main_stack_empty(m))
419    })?;
420
421    interp_ok(ecx)
422}
423
424// Call the entry function.
425fn call_main<'tcx>(
426    ecx: &mut MiriInterpCx<'tcx>,
427    entry_id: DefId,
428    entry_type: MiriEntryFnType,
429    argc: ImmTy<'tcx>,
430    argv: ImmTy<'tcx>,
431) -> InterpResult<'tcx, ()> {
432    let tcx = ecx.tcx();
433
434    // Setup first stack frame.
435    let entry_instance = ty::Instance::mono(tcx, entry_id);
436
437    // Return place (in static memory so that it does not count as leak).
438    let ret_place = ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?;
439    ecx.machine.main_fn_ret_place = Some(ret_place.clone());
440
441    // Call start function.
442    match entry_type {
443        MiriEntryFnType::Rustc(EntryFnType::Main { .. }) => {
444            let start_id = tcx.lang_items().start_fn().unwrap_or_else(|| {
445                tcx.dcx().fatal("could not find start lang item");
446            });
447            let main_ret_ty = tcx.fn_sig(entry_id).no_bound_vars().unwrap().output();
448            let main_ret_ty = main_ret_ty.no_bound_vars().unwrap();
449            let start_instance = ty::Instance::try_resolve(
450                tcx,
451                ecx.typing_env(),
452                start_id,
453                tcx.mk_args(&[ty::GenericArg::from(main_ret_ty)]),
454            )
455            .unwrap()
456            .unwrap();
457
458            let main_ptr = ecx.fn_ptr(FnVal::Instance(entry_instance));
459
460            // Always using DEFAULT is okay since we don't support signals in Miri anyway.
461            // (This means we are effectively ignoring `-Zon-broken-pipe`.)
462            let sigpipe = rustc_session::config::sigpipe::DEFAULT;
463
464            ecx.call_function(
465                start_instance,
466                ExternAbi::Rust,
467                &[
468                    ImmTy::from_scalar(
469                        Scalar::from_pointer(main_ptr, ecx),
470                        // FIXME use a proper fn ptr type
471                        ecx.machine.layouts.const_raw_ptr,
472                    ),
473                    argc,
474                    argv,
475                    ImmTy::from_uint(sigpipe, ecx.machine.layouts.u8),
476                ],
477                Some(&ret_place),
478                ReturnContinuation::Stop { cleanup: true },
479            )?;
480        }
481        MiriEntryFnType::MiriStart => {
482            ecx.call_function(
483                entry_instance,
484                ExternAbi::Rust,
485                &[argc, argv],
486                Some(&ret_place),
487                ReturnContinuation::Stop { cleanup: true },
488            )?;
489        }
490    }
491
492    interp_ok(())
493}
494
495/// Evaluates the entry function specified by `entry_id`.
496/// Returns `Some(return_code)` if program execution completed.
497/// Returns `None` if an evaluation error occurred.
498pub fn eval_entry<'tcx>(
499    tcx: TyCtxt<'tcx>,
500    entry_id: DefId,
501    entry_type: MiriEntryFnType,
502    config: &MiriConfig,
503    genmc_ctx: Option<Rc<GenmcCtx>>,
504) -> Option<i32> {
505    // Copy setting before we move `config`.
506    let ignore_leaks = config.ignore_leaks;
507
508    if let Some(genmc_ctx) = &genmc_ctx {
509        genmc_ctx.handle_execution_start();
510    }
511
512    let mut ecx = match create_ecx(tcx, entry_id, entry_type, config, genmc_ctx).report_err() {
513        Ok(v) => v,
514        Err(err) => {
515            let (kind, backtrace) = err.into_parts();
516            backtrace.print_backtrace();
517            panic!("Miri initialization error: {kind:?}")
518        }
519    };
520
521    // Perform the main execution.
522    let res: thread::Result<InterpResult<'_, !>> =
523        panic::catch_unwind(AssertUnwindSafe(|| ecx.run_threads()));
524    let res = res.unwrap_or_else(|panic_payload| {
525        ecx.handle_ice();
526        panic::resume_unwind(panic_payload)
527    });
528    // `Ok` can never happen; the interpreter loop always exits with an "error"
529    // (but that "error" might be just "regular program termination").
530    let Err(err) = res.report_err();
531
532    // Show diagnostic, if any.
533    let (return_code, leak_check) = report_error(&ecx, err)?;
534
535    // We inform GenMC that the execution is complete.
536    if let Some(genmc_ctx) = ecx.machine.data_race.as_genmc_ref()
537        && let Err(error) = genmc_ctx.handle_execution_end(&ecx)
538    {
539        // FIXME(GenMC): Improve error reporting.
540        tcx.dcx().err(format!("GenMC returned an error: \"{error}\""));
541        return None;
542    }
543
544    // If we get here there was no fatal error.
545
546    // Possibly check for memory leaks.
547    if leak_check && !ignore_leaks {
548        // Check for thread leaks.
549        if !ecx.have_all_terminated() {
550            tcx.dcx().err("the main thread terminated without waiting for all remaining threads");
551            tcx.dcx().note("set `MIRIFLAGS=-Zmiri-ignore-leaks` to disable this check");
552            return None;
553        }
554        // Check for memory leaks.
555        info!("Additional static roots: {:?}", ecx.machine.static_roots);
556        let leaks = ecx.take_leaked_allocations(|ecx| &ecx.machine.static_roots);
557        if !leaks.is_empty() {
558            report_leaks(&ecx, leaks);
559            tcx.dcx().note("set `MIRIFLAGS=-Zmiri-ignore-leaks` to disable this check");
560            // Ignore the provided return code - let the reported error
561            // determine the return code.
562            return None;
563        }
564    }
565    Some(return_code)
566}
567
568/// Turns an array of arguments into a Windows command line string.
569///
570/// The string will be UTF-16 encoded and NUL terminated.
571///
572/// Panics if the zeroth argument contains the `"` character because doublequotes
573/// in `argv[0]` cannot be encoded using the standard command line parsing rules.
574///
575/// Further reading:
576/// * [Parsing C++ command-line arguments](https://docs.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-160#parsing-c-command-line-arguments)
577/// * [The C/C++ Parameter Parsing Rules](https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES)
578fn args_to_utf16_command_string<I, T>(mut args: I) -> Vec<u16>
579where
580    I: Iterator<Item = T>,
581    T: AsRef<str>,
582{
583    // Parse argv[0]. Slashes aren't escaped. Literal double quotes are not allowed.
584    let mut cmd = {
585        let arg0 = if let Some(arg0) = args.next() {
586            arg0
587        } else {
588            return vec![0];
589        };
590        let arg0 = arg0.as_ref();
591        if arg0.contains('"') {
592            panic!("argv[0] cannot contain a doublequote (\") character");
593        } else {
594            // Always surround argv[0] with quotes.
595            let mut s = String::new();
596            s.push('"');
597            s.push_str(arg0);
598            s.push('"');
599            s
600        }
601    };
602
603    // Build the other arguments.
604    for arg in args {
605        let arg = arg.as_ref();
606        cmd.push(' ');
607        if arg.is_empty() {
608            cmd.push_str("\"\"");
609        } else if !arg.bytes().any(|c| matches!(c, b'"' | b'\t' | b' ')) {
610            // No quote, tab, or space -- no escaping required.
611            cmd.push_str(arg);
612        } else {
613            // Spaces and tabs are escaped by surrounding them in quotes.
614            // Quotes are themselves escaped by using backslashes when in a
615            // quoted block.
616            // Backslashes only need to be escaped when one or more are directly
617            // followed by a quote. Otherwise they are taken literally.
618
619            cmd.push('"');
620            let mut chars = arg.chars().peekable();
621            loop {
622                let mut nslashes = 0;
623                while let Some(&'\\') = chars.peek() {
624                    chars.next();
625                    nslashes += 1;
626                }
627
628                match chars.next() {
629                    Some('"') => {
630                        cmd.extend(iter::repeat_n('\\', nslashes * 2 + 1));
631                        cmd.push('"');
632                    }
633                    Some(c) => {
634                        cmd.extend(iter::repeat_n('\\', nslashes));
635                        cmd.push(c);
636                    }
637                    None => {
638                        cmd.extend(iter::repeat_n('\\', nslashes * 2));
639                        break;
640                    }
641                }
642            }
643            cmd.push('"');
644        }
645    }
646
647    if cmd.contains('\0') {
648        panic!("interior null in command line arguments");
649    }
650    cmd.encode_utf16().chain(iter::once(0)).collect()
651}
652
653#[cfg(test)]
654mod tests {
655    use super::*;
656    #[test]
657    #[should_panic(expected = "argv[0] cannot contain a doublequote (\") character")]
658    fn windows_argv0_panic_on_quote() {
659        args_to_utf16_command_string(["\""].iter());
660    }
661    #[test]
662    fn windows_argv0_no_escape() {
663        // Ensure that a trailing backslash in argv[0] is not escaped.
664        let cmd = String::from_utf16_lossy(&args_to_utf16_command_string(
665            [r"C:\Program Files\", "arg1", "arg 2", "arg \" 3"].iter(),
666        ));
667        assert_eq!(cmd.trim_end_matches('\0'), r#""C:\Program Files\" arg1 "arg 2" "arg \" 3""#);
668    }
669}