From 43453793692646a62576dcb034d8eaa26f8a58f0 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 13 Jun 2024 17:51:54 +0200 Subject: [PATCH 1/2] cargo miri: add support for '--many-seeds' to run the program / tests many times with different seeds --- src/tools/miri/cargo-miri/src/phases.rs | 200 ++++++++++++++---------- src/tools/miri/cargo-miri/src/util.rs | 36 ++++- src/tools/miri/miri-script/src/main.rs | 7 +- 3 files changed, 155 insertions(+), 88 deletions(-) diff --git a/src/tools/miri/cargo-miri/src/phases.rs b/src/tools/miri/cargo-miri/src/phases.rs index e2fc2a0c2779b..6773cff89abf8 100644 --- a/src/tools/miri/cargo-miri/src/phases.rs +++ b/src/tools/miri/cargo-miri/src/phases.rs @@ -1,10 +1,10 @@ //! Implements the various phases of `cargo miri run/test`. -use std::env; use std::fs::{self, File}; -use std::io::BufReader; +use std::io::{BufReader, Write}; use std::path::{Path, PathBuf}; use std::process::Command; +use std::{env, thread}; use rustc_version::VersionMeta; @@ -119,7 +119,7 @@ pub fn phase_cargo_miri(mut args: impl Iterator) { // describes an alternative // approach that uses `cargo check`, making that part easier but target and binary handling // harder. - let cargo_miri_path = std::env::current_exe() + let cargo_miri_path = env::current_exe() .expect("current executable path invalid") .into_os_string() .into_string() @@ -163,14 +163,22 @@ pub fn phase_cargo_miri(mut args: impl Iterator) { let target_dir = get_target_dir(&metadata); cmd.arg("--target-dir").arg(target_dir); + // Store many-seeds argument. + let mut many_seeds = None; // *After* we set all the flags that need setting, forward everything else. Make sure to skip - // `--target-dir` (which would otherwise be set twice). + // `--target-dir` (which would otherwise be set twice) and `--many-seeds` (which is our flag, not cargo's). for arg in ArgSplitFlagValue::from_string_iter(&mut args, "--target-dir").filter_map(Result::err) { - cmd.arg(arg); + if arg == "--many-seeds" { + many_seeds = Some(format!("0..256")); + } else if let Some(val) = arg.strip_prefix("--many-seeds=") { + many_seeds = Some(val.to_owned()); + } else { + cmd.arg(arg); + } } - // Forward all further arguments (not consumed by `ArgSplitFlagValue`) to cargo. + // Forward all further arguments after `--` (not consumed by `ArgSplitFlagValue`) to cargo. cmd.args(args); // Set `RUSTC_WRAPPER` to ourselves. Cargo will prepend that binary to its usual invocation, @@ -222,6 +230,9 @@ pub fn phase_cargo_miri(mut args: impl Iterator) { // Forward some crucial information to our own re-invocations. cmd.env("MIRI_SYSROOT", miri_sysroot); cmd.env("MIRI_LOCAL_CRATES", local_crates(&metadata)); + if let Some(many_seeds) = many_seeds { + cmd.env("MIRI_MANY_SEEDS", many_seeds); + } if verbose > 0 { cmd.env("MIRI_VERBOSE", verbose.to_string()); // This makes the other phases verbose. } @@ -309,7 +320,7 @@ pub fn phase_rustc(mut args: impl Iterator, phase: RustcPhase) { } } - let verbose = std::env::var("MIRI_VERBOSE") + let verbose = env::var("MIRI_VERBOSE") .map_or(0, |verbose| verbose.parse().expect("verbosity flag must be an integer")); let target_crate = is_target_crate(); @@ -489,7 +500,7 @@ pub fn phase_rustc(mut args: impl Iterator, phase: RustcPhase) { // This is a host crate. // When we're running `cargo-miri` from `x.py` we need to pass the sysroot explicitly // due to bootstrap complications. - if let Some(sysroot) = std::env::var_os("MIRI_HOST_SYSROOT") { + if let Some(sysroot) = env::var_os("MIRI_HOST_SYSROOT") { cmd.arg("--sysroot").arg(sysroot); } @@ -532,7 +543,7 @@ pub enum RunnerPhase { } pub fn phase_runner(mut binary_args: impl Iterator, phase: RunnerPhase) { - let verbose = std::env::var("MIRI_VERBOSE") + let verbose = env::var("MIRI_VERBOSE") .map_or(0, |verbose| verbose.parse().expect("verbosity flag must be an integer")); let binary = binary_args.next().unwrap(); @@ -541,6 +552,7 @@ pub fn phase_runner(mut binary_args: impl Iterator, phase: Runner "file {:?} not found or `cargo-miri` invoked incorrectly; please only invoke this binary through `cargo miri`", binary )); let file = BufReader::new(file); + let binary_args = binary_args.collect::>(); let info = serde_json::from_reader(file).unwrap_or_else(|_| { show_error!("file {:?} contains outdated or invalid JSON; try `cargo clean`", binary) @@ -555,84 +567,114 @@ pub fn phase_runner(mut binary_args: impl Iterator, phase: Runner } }; - let mut cmd = miri(); - - // Set missing env vars. We prefer build-time env vars over run-time ones; see - // for the kind of issue that fixes. - for (name, val) in info.env { - // `CARGO_MAKEFLAGS` contains information about how to reach the jobserver, but by the time - // the program is being run, that jobserver no longer exists (cargo only runs the jobserver - // for the build portion of `cargo run`/`cargo test`). Hence we shouldn't forward this. - // Also see . - if name == "CARGO_MAKEFLAGS" { - continue; - } - if let Some(old_val) = env::var_os(&name) { - if old_val == val { - // This one did not actually change, no need to re-set it. - // (This keeps the `debug_cmd` below more manageable.) + let many_seeds = env::var("MIRI_MANY_SEEDS"); + run_many_seeds(many_seeds.ok(), |seed| { + let mut cmd = miri(); + + // Set missing env vars. We prefer build-time env vars over run-time ones; see + // for the kind of issue that fixes. + for (name, val) in &info.env { + // `CARGO_MAKEFLAGS` contains information about how to reach the jobserver, but by the time + // the program is being run, that jobserver no longer exists (cargo only runs the jobserver + // for the build portion of `cargo run`/`cargo test`). Hence we shouldn't forward this. + // Also see . + if name == "CARGO_MAKEFLAGS" { continue; - } else if verbose > 0 { - eprintln!( - "[cargo-miri runner] Overwriting run-time env var {name:?}={old_val:?} with build-time value {val:?}" - ); } + if let Some(old_val) = env::var_os(name) { + if *old_val == *val { + // This one did not actually change, no need to re-set it. + // (This keeps the `debug_cmd` below more manageable.) + continue; + } else if verbose > 0 { + eprintln!( + "[cargo-miri runner] Overwriting run-time env var {name:?}={old_val:?} with build-time value {val:?}" + ); + } + } + cmd.env(name, val); } - cmd.env(name, val); - } - if phase != RunnerPhase::Rustdoc { - // Set the sysroot. Not necessary in rustdoc, where we already set the sysroot in - // `phase_rustdoc`. rustdoc will forward that flag when invoking rustc (i.e., us), so the - // flag is present in `info.args`. - cmd.arg("--sysroot").arg(env::var_os("MIRI_SYSROOT").unwrap()); - } - // Forward rustc arguments. - // We need to patch "--extern" filenames because we forced a check-only - // build without cargo knowing about that: replace `.rlib` suffix by - // `.rmeta`. - // We also need to remove `--error-format` as cargo specifies that to be JSON, - // but when we run here, cargo does not interpret the JSON any more. `--json` - // then also needs to be dropped. - let mut args = info.args.into_iter(); - while let Some(arg) = args.next() { - if arg == "--extern" { - forward_patched_extern_arg(&mut args, &mut cmd); - } else if let Some(suffix) = arg.strip_prefix("--error-format") { - assert!(suffix.starts_with('=')); - // Drop this argument. - } else if let Some(suffix) = arg.strip_prefix("--json") { - assert!(suffix.starts_with('=')); - // Drop this argument. - } else { - cmd.arg(arg); + if phase != RunnerPhase::Rustdoc { + // Set the sysroot. Not necessary in rustdoc, where we already set the sysroot in + // `phase_rustdoc`. rustdoc will forward that flag when invoking rustc (i.e., us), so the + // flag is present in `info.args`. + cmd.arg("--sysroot").arg(env::var_os("MIRI_SYSROOT").unwrap()); + } + // Forward rustc arguments. + // We need to patch "--extern" filenames because we forced a check-only + // build without cargo knowing about that: replace `.rlib` suffix by + // `.rmeta`. + // We also need to remove `--error-format` as cargo specifies that to be JSON, + // but when we run here, cargo does not interpret the JSON any more. `--json` + // then also needs to be dropped. + let mut args = info.args.iter(); + while let Some(arg) = args.next() { + if arg == "--extern" { + forward_patched_extern_arg(&mut (&mut args).cloned(), &mut cmd); + } else if let Some(suffix) = arg.strip_prefix("--error-format") { + assert!(suffix.starts_with('=')); + // Drop this argument. + } else if let Some(suffix) = arg.strip_prefix("--json") { + assert!(suffix.starts_with('=')); + // Drop this argument. + } else { + cmd.arg(arg); + } + } + // Respect `MIRIFLAGS`. + if let Ok(a) = env::var("MIRIFLAGS") { + let args = flagsplit(&a); + cmd.args(args); + } + // Set the current seed. + if let Some(seed) = seed { + eprintln!("Trying seed: {seed}"); + cmd.arg(format!("-Zmiri-seed={seed}")); } - } - // Respect `MIRIFLAGS`. - if let Ok(a) = env::var("MIRIFLAGS") { - let args = flagsplit(&a); - cmd.args(args); - } - - // Then pass binary arguments. - cmd.arg("--"); - cmd.args(binary_args); - - // Make sure we use the build-time working directory for interpreting Miri/rustc arguments. - // But then we need to switch to the run-time one, which we instruct Miri to do by setting `MIRI_CWD`. - cmd.current_dir(info.current_dir); - cmd.env("MIRI_CWD", env::current_dir().unwrap()); - // Run it. - debug_cmd("[cargo-miri runner]", verbose, &cmd); - match phase { - RunnerPhase::Rustdoc => exec_with_pipe(cmd, &info.stdin, format!("{binary}.stdin")), - RunnerPhase::Cargo => exec(cmd), - } + // Then pass binary arguments. + cmd.arg("--"); + cmd.args(&binary_args); + + // Make sure we use the build-time working directory for interpreting Miri/rustc arguments. + // But then we need to switch to the run-time one, which we instruct Miri to do by setting `MIRI_CWD`. + cmd.current_dir(&info.current_dir); + cmd.env("MIRI_CWD", env::current_dir().unwrap()); + + // Run it. + debug_cmd("[cargo-miri runner]", verbose, &cmd); + + match phase { + RunnerPhase::Rustdoc => { + cmd.stdin(std::process::Stdio::piped()); + let mut child = cmd.spawn().expect("failed to spawn process"); + let child_stdin = child.stdin.take().unwrap(); + // Write stdin in a background thread, as it may block. + let exit_status = thread::scope(|s| { + s.spawn(|| { + let mut child_stdin = child_stdin; + // Ignore failure, it is most likely due to the process having terminated. + let _ = child_stdin.write_all(&info.stdin); + }); + child.wait().expect("failed to run command") + }); + if !exit_status.success() { + std::process::exit(exit_status.code().unwrap_or(-1)); + } + } + RunnerPhase::Cargo => { + let exit_status = cmd.status().expect("failed to run command"); + if !exit_status.success() { + std::process::exit(exit_status.code().unwrap_or(-1)); + } + } + } + }); } pub fn phase_rustdoc(mut args: impl Iterator) { - let verbose = std::env::var("MIRI_VERBOSE") + let verbose = env::var("MIRI_VERBOSE") .map_or(0, |verbose| verbose.parse().expect("verbosity flag must be an integer")); // phase_cargo_miri sets the RUSTDOC env var to ourselves, and puts a backup @@ -681,7 +723,7 @@ pub fn phase_rustdoc(mut args: impl Iterator) { cmd.arg("--cfg").arg("miri"); // Make rustdoc call us back. - let cargo_miri_path = std::env::current_exe().expect("current executable path invalid"); + let cargo_miri_path = env::current_exe().expect("current executable path invalid"); cmd.arg("--test-builder").arg(&cargo_miri_path); // invoked by forwarding most arguments cmd.arg("--runtool").arg(&cargo_miri_path); // invoked with just a single path argument diff --git a/src/tools/miri/cargo-miri/src/util.rs b/src/tools/miri/cargo-miri/src/util.rs index f36cff1f7981e..5f2794e2244f4 100644 --- a/src/tools/miri/cargo-miri/src/util.rs +++ b/src/tools/miri/cargo-miri/src/util.rs @@ -171,11 +171,16 @@ where drop(path); // We don't need the path, we can pipe the bytes directly cmd.stdin(std::process::Stdio::piped()); let mut child = cmd.spawn().expect("failed to spawn process"); - { - let stdin = child.stdin.as_mut().expect("failed to open stdin"); - stdin.write_all(input).expect("failed to write out test source"); - } - let exit_status = child.wait().expect("failed to run command"); + let child_stdin = child.stdin.take().unwrap(); + // Write stdin in a background thread, as it may block. + let exit_status = std::thread::scope(|s| { + s.spawn(|| { + let mut child_stdin = child_stdin; + // Ignore failure, it is most likely due to the process having terminated. + let _ = child_stdin.write_all(input); + }); + child.wait().expect("failed to run command") + }); std::process::exit(exit_status.code().unwrap_or(-1)) } } @@ -317,3 +322,24 @@ pub fn clean_target_dir(meta: &Metadata) { remove_dir_all_idem(&target_dir).unwrap_or_else(|err| show_error!("{}", err)) } + +/// Run `f` according to the many-seeds argument. In single-seed mode, `f` will only +/// be called once, with `None`. +pub fn run_many_seeds(many_seeds: Option, f: impl Fn(Option)) { + let Some(many_seeds) = many_seeds else { + return f(None); + }; + let (from, to) = many_seeds + .split_once("..") + .unwrap_or_else(|| show_error!("invalid format for `--many-seeds`: expected `from..to`")); + let from: u32 = if from.is_empty() { + 0 + } else { + from.parse().unwrap_or_else(|_| show_error!("invalid `from` in `--many-seeds=from..to")) + }; + let to: u32 = + to.parse().unwrap_or_else(|_| show_error!("invalid `to` in `--many-seeds=from..to")); + for seed in from..to { + f(Some(seed)); + } +} diff --git a/src/tools/miri/miri-script/src/main.rs b/src/tools/miri/miri-script/src/main.rs index d436ef0c5aaa3..f4448146c55c5 100644 --- a/src/tools/miri/miri-script/src/main.rs +++ b/src/tools/miri/miri-script/src/main.rs @@ -182,15 +182,14 @@ fn main() -> Result<()> { verbose = true; } else if let Some(val) = args.get_long_opt_with_default("many-seeds", "0..256")? { let (from, to) = val.split_once("..").ok_or_else(|| { - anyhow!("invalid format for `--many-seeds-range`: expected `from..to`") + anyhow!("invalid format for `--many-seeds`: expected `from..to`") })?; let from: u32 = if from.is_empty() { 0 } else { - from.parse().context("invalid `from` in `--many-seeds-range=from..to")? + from.parse().context("invalid `from` in `--many-seeds=from..to")? }; - let to: u32 = - to.parse().context("invalid `to` in `--many-seeds-range=from..to")?; + let to: u32 = to.parse().context("invalid `to` in `--many-seeds=from..to")?; many_seeds = Some(from..to); } else if let Some(val) = args.get_long_opt("target")? { target = Some(val); From cfcea21074f922aa2fd184751842513e574b2e37 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 13 Jun 2024 20:02:02 +0200 Subject: [PATCH 2/2] document --many-seeds; set the default range to 0..64 --- src/tools/miri/README.md | 32 ++++++++++++------------- src/tools/miri/cargo-miri/src/phases.rs | 4 +++- src/tools/miri/miri-script/src/main.rs | 4 ++-- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/tools/miri/README.md b/src/tools/miri/README.md index c437619a76eab..4b4f2f83062f0 100644 --- a/src/tools/miri/README.md +++ b/src/tools/miri/README.md @@ -151,6 +151,21 @@ platform. For example `cargo miri test --target s390x-unknown-linux-gnu` will run your test suite on a big-endian target, which is useful for testing endian-sensitive code. +### Testing multiple different executions + +Certain parts of the execution are picked randomly by Miri, such as the exact base address +allocations are stored at and the interleaving of concurrently executing threads. Sometimes, it can +be useful to explore multiple different execution, e.g. to make sure that your code does not depend +on incidental "super-alignment" of new allocations and to test different thread interleavings. +This can be done with the `--many-seeds` flag: + +``` +cargo miri test --many-seeds # tries the seeds in 0..64 +cargo miri test --many-seeds=0..16 +``` + +The default of 64 different seeds is quite slow, so you probably want to specify a smaller range. + ### Running Miri on CI When running Miri on CI, use the following snippet to install a nightly toolchain with the Miri @@ -183,23 +198,6 @@ Here is an example job for GitHub Actions: The explicit `cargo miri setup` helps to keep the output of the actual test step clean. -### Testing for alignment issues - -Miri can sometimes miss misaligned accesses since allocations can "happen to be" -aligned just right. You can use `-Zmiri-symbolic-alignment-check` to definitely -catch all such issues, but that flag will also cause false positives when code -does manual pointer arithmetic to account for alignment. Another alternative is -to call Miri with various values for `-Zmiri-seed`; that will alter the -randomness that is used to determine allocation base addresses. The following -snippet calls Miri in a loop with different values for the seed: - -``` -for SEED in $(seq 0 255); do - echo "Trying seed: $SEED" - MIRIFLAGS=-Zmiri-seed=$SEED cargo miri test || { echo "Failing seed: $SEED"; break; }; -done -``` - ### Supported targets Miri does not support all targets supported by Rust. The good news, however, is diff --git a/src/tools/miri/cargo-miri/src/phases.rs b/src/tools/miri/cargo-miri/src/phases.rs index 6773cff89abf8..3c36f606d8452 100644 --- a/src/tools/miri/cargo-miri/src/phases.rs +++ b/src/tools/miri/cargo-miri/src/phases.rs @@ -34,6 +34,8 @@ Examples: "; +const DEFAULT_MANY_SEEDS: &str = "0..64"; + fn show_help() { println!("{CARGO_MIRI_HELP}"); } @@ -171,7 +173,7 @@ pub fn phase_cargo_miri(mut args: impl Iterator) { ArgSplitFlagValue::from_string_iter(&mut args, "--target-dir").filter_map(Result::err) { if arg == "--many-seeds" { - many_seeds = Some(format!("0..256")); + many_seeds = Some(DEFAULT_MANY_SEEDS.to_owned()); } else if let Some(val) = arg.strip_prefix("--many-seeds=") { many_seeds = Some(val.to_owned()); } else { diff --git a/src/tools/miri/miri-script/src/main.rs b/src/tools/miri/miri-script/src/main.rs index f4448146c55c5..c4f0d808d93e1 100644 --- a/src/tools/miri/miri-script/src/main.rs +++ b/src/tools/miri/miri-script/src/main.rs @@ -98,7 +98,7 @@ Build miri, set up a sysroot and then run the test suite. Build miri, set up a sysroot and then run the driver with the given . (Also respects MIRIFLAGS environment variable.) If `--many-seeds` is present, Miri is run many times in parallel with different seeds. -The range defaults to `0..256`. +The range defaults to `0..64`. ./miri fmt : Format all sources and tests. are passed to `rustfmt`. @@ -180,7 +180,7 @@ fn main() -> Result<()> { dep = true; } else if args.get_long_flag("verbose")? || args.get_short_flag('v')? { verbose = true; - } else if let Some(val) = args.get_long_opt_with_default("many-seeds", "0..256")? { + } else if let Some(val) = args.get_long_opt_with_default("many-seeds", "0..64")? { let (from, to) = val.split_once("..").ok_or_else(|| { anyhow!("invalid format for `--many-seeds`: expected `from..to`") })?;