From 43453793692646a62576dcb034d8eaa26f8a58f0 Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Thu, 13 Jun 2024 17:51:54 +0200
Subject: [PATCH 1/2] cargo miri: add support for '--many-seeds' to run the
 program / tests many times with different seeds

---
 src/tools/miri/cargo-miri/src/phases.rs | 200 ++++++++++++++----------
 src/tools/miri/cargo-miri/src/util.rs   |  36 ++++-
 src/tools/miri/miri-script/src/main.rs  |   7 +-
 3 files changed, 155 insertions(+), 88 deletions(-)
diff --git a/src/tools/miri/cargo-miri/src/phases.rs b/src/tools/miri/cargo-miri/src/phases.rs
index e2fc2a0c2779b..6773cff89abf8 100644
--- a/src/tools/miri/cargo-miri/src/phases.rs
+++ b/src/tools/miri/cargo-miri/src/phases.rs
@@ -1,10 +1,10 @@
 //! Implements the various phases of `cargo miri run/test`.
 
-use std::env;
 use std::fs::{self, File};
-use std::io::BufReader;
+use std::io::{BufReader, Write};
 use std::path::{Path, PathBuf};
 use std::process::Command;
+use std::{env, thread};
 
 use rustc_version::VersionMeta;
 
@@ -119,7 +119,7 @@ pub fn phase_cargo_miri(mut args: impl Iterator<Item = String>) {
     // </~https://github.com/rust-lang/miri/pull/1540#issuecomment-693553191> describes an alternative
     // approach that uses `cargo check`, making that part easier but target and binary handling
     // harder.
-    let cargo_miri_path = std::env::current_exe()
+    let cargo_miri_path = env::current_exe()
         .expect("current executable path invalid")
         .into_os_string()
         .into_string()
@@ -163,14 +163,22 @@ pub fn phase_cargo_miri(mut args: impl Iterator<Item = String>) {
     let target_dir = get_target_dir(&metadata);
     cmd.arg("--target-dir").arg(target_dir);
 
+    // Store many-seeds argument.
+    let mut many_seeds = None;
     // *After* we set all the flags that need setting, forward everything else. Make sure to skip
-    // `--target-dir` (which would otherwise be set twice).
+    // `--target-dir` (which would otherwise be set twice) and `--many-seeds` (which is our flag, not cargo's).
     for arg in
         ArgSplitFlagValue::from_string_iter(&mut args, "--target-dir").filter_map(Result::err)
     {
-        cmd.arg(arg);
+        if arg == "--many-seeds" {
+            many_seeds = Some(format!("0..256"));
+        } else if let Some(val) = arg.strip_prefix("--many-seeds=") {
+            many_seeds = Some(val.to_owned());
+        } else {
+            cmd.arg(arg);
+        }
     }
-    // Forward all further arguments (not consumed by `ArgSplitFlagValue`) to cargo.
+    // Forward all further arguments after `--` (not consumed by `ArgSplitFlagValue`) to cargo.
     cmd.args(args);
 
     // Set `RUSTC_WRAPPER` to ourselves.  Cargo will prepend that binary to its usual invocation,
@@ -222,6 +230,9 @@ pub fn phase_cargo_miri(mut args: impl Iterator<Item = String>) {
     // Forward some crucial information to our own re-invocations.
     cmd.env("MIRI_SYSROOT", miri_sysroot);
     cmd.env("MIRI_LOCAL_CRATES", local_crates(&metadata));
+    if let Some(many_seeds) = many_seeds {
+        cmd.env("MIRI_MANY_SEEDS", many_seeds);
+    }
     if verbose > 0 {
         cmd.env("MIRI_VERBOSE", verbose.to_string()); // This makes the other phases verbose.
     }
@@ -309,7 +320,7 @@ pub fn phase_rustc(mut args: impl Iterator<Item = String>, phase: RustcPhase) {
         }
     }
 
-    let verbose = std::env::var("MIRI_VERBOSE")
+    let verbose = env::var("MIRI_VERBOSE")
         .map_or(0, |verbose| verbose.parse().expect("verbosity flag must be an integer"));
     let target_crate = is_target_crate();
 
@@ -489,7 +500,7 @@ pub fn phase_rustc(mut args: impl Iterator<Item = String>, phase: RustcPhase) {
         // This is a host crate.
         // When we're running `cargo-miri` from `x.py` we need to pass the sysroot explicitly
         // due to bootstrap complications.
-        if let Some(sysroot) = std::env::var_os("MIRI_HOST_SYSROOT") {
+        if let Some(sysroot) = env::var_os("MIRI_HOST_SYSROOT") {
             cmd.arg("--sysroot").arg(sysroot);
         }
 
@@ -532,7 +543,7 @@ pub enum RunnerPhase {
 }
 
 pub fn phase_runner(mut binary_args: impl Iterator<Item = String>, phase: RunnerPhase) {
-    let verbose = std::env::var("MIRI_VERBOSE")
+    let verbose = env::var("MIRI_VERBOSE")
         .map_or(0, |verbose| verbose.parse().expect("verbosity flag must be an integer"));
 
     let binary = binary_args.next().unwrap();
@@ -541,6 +552,7 @@ pub fn phase_runner(mut binary_args: impl Iterator<Item = String>, phase: Runner
             "file {:?} not found or `cargo-miri` invoked incorrectly; please only invoke this binary through `cargo miri`", binary
         ));
     let file = BufReader::new(file);
+    let binary_args = binary_args.collect::<Vec<_>>();
 
     let info = serde_json::from_reader(file).unwrap_or_else(|_| {
         show_error!("file {:?} contains outdated or invalid JSON; try `cargo clean`", binary)
@@ -555,84 +567,114 @@ pub fn phase_runner(mut binary_args: impl Iterator<Item = String>, phase: Runner
         }
     };
 
-    let mut cmd = miri();
-
-    // Set missing env vars. We prefer build-time env vars over run-time ones; see
-    // </~https://github.com/rust-lang/miri/issues/1661> for the kind of issue that fixes.
-    for (name, val) in info.env {
-        // `CARGO_MAKEFLAGS` contains information about how to reach the jobserver, but by the time
-        // the program is being run, that jobserver no longer exists (cargo only runs the jobserver
-        // for the build portion of `cargo run`/`cargo test`). Hence we shouldn't forward this.
-        // Also see </~https://github.com/rust-lang/rust/pull/113730>.
-        if name == "CARGO_MAKEFLAGS" {
-            continue;
-        }
-        if let Some(old_val) = env::var_os(&name) {
-            if old_val == val {
-                // This one did not actually change, no need to re-set it.
-                // (This keeps the `debug_cmd` below more manageable.)
+    let many_seeds = env::var("MIRI_MANY_SEEDS");
+    run_many_seeds(many_seeds.ok(), |seed| {
+        let mut cmd = miri();
+
+        // Set missing env vars. We prefer build-time env vars over run-time ones; see
+        // </~https://github.com/rust-lang/miri/issues/1661> for the kind of issue that fixes.
+        for (name, val) in &info.env {
+            // `CARGO_MAKEFLAGS` contains information about how to reach the jobserver, but by the time
+            // the program is being run, that jobserver no longer exists (cargo only runs the jobserver
+            // for the build portion of `cargo run`/`cargo test`). Hence we shouldn't forward this.
+            // Also see </~https://github.com/rust-lang/rust/pull/113730>.
+            if name == "CARGO_MAKEFLAGS" {
                 continue;
-            } else if verbose > 0 {
-                eprintln!(
-                    "[cargo-miri runner] Overwriting run-time env var {name:?}={old_val:?} with build-time value {val:?}"
-                );
             }
+            if let Some(old_val) = env::var_os(name) {
+                if *old_val == *val {
+                    // This one did not actually change, no need to re-set it.
+                    // (This keeps the `debug_cmd` below more manageable.)
+                    continue;
+                } else if verbose > 0 {
+                    eprintln!(
+                        "[cargo-miri runner] Overwriting run-time env var {name:?}={old_val:?} with build-time value {val:?}"
+                    );
+                }
+            }
+            cmd.env(name, val);
         }
-        cmd.env(name, val);
-    }
 
-    if phase != RunnerPhase::Rustdoc {
-        // Set the sysroot. Not necessary in rustdoc, where we already set the sysroot in
-        // `phase_rustdoc`. rustdoc will forward that flag when invoking rustc (i.e., us), so the
-        // flag is present in `info.args`.
-        cmd.arg("--sysroot").arg(env::var_os("MIRI_SYSROOT").unwrap());
-    }
-    // Forward rustc arguments.
-    // We need to patch "--extern" filenames because we forced a check-only
-    // build without cargo knowing about that: replace `.rlib` suffix by
-    // `.rmeta`.
-    // We also need to remove `--error-format` as cargo specifies that to be JSON,
-    // but when we run here, cargo does not interpret the JSON any more. `--json`
-    // then also needs to be dropped.
-    let mut args = info.args.into_iter();
-    while let Some(arg) = args.next() {
-        if arg == "--extern" {
-            forward_patched_extern_arg(&mut args, &mut cmd);
-        } else if let Some(suffix) = arg.strip_prefix("--error-format") {
-            assert!(suffix.starts_with('='));
-            // Drop this argument.
-        } else if let Some(suffix) = arg.strip_prefix("--json") {
-            assert!(suffix.starts_with('='));
-            // Drop this argument.
-        } else {
-            cmd.arg(arg);
+        if phase != RunnerPhase::Rustdoc {
+            // Set the sysroot. Not necessary in rustdoc, where we already set the sysroot in
+            // `phase_rustdoc`. rustdoc will forward that flag when invoking rustc (i.e., us), so the
+            // flag is present in `info.args`.
+            cmd.arg("--sysroot").arg(env::var_os("MIRI_SYSROOT").unwrap());
+        }
+        // Forward rustc arguments.
+        // We need to patch "--extern" filenames because we forced a check-only
+        // build without cargo knowing about that: replace `.rlib` suffix by
+        // `.rmeta`.
+        // We also need to remove `--error-format` as cargo specifies that to be JSON,
+        // but when we run here, cargo does not interpret the JSON any more. `--json`
+        // then also needs to be dropped.
+        let mut args = info.args.iter();
+        while let Some(arg) = args.next() {
+            if arg == "--extern" {
+                forward_patched_extern_arg(&mut (&mut args).cloned(), &mut cmd);
+            } else if let Some(suffix) = arg.strip_prefix("--error-format") {
+                assert!(suffix.starts_with('='));
+                // Drop this argument.
+            } else if let Some(suffix) = arg.strip_prefix("--json") {
+                assert!(suffix.starts_with('='));
+                // Drop this argument.
+            } else {
+                cmd.arg(arg);
+            }
+        }
+        // Respect `MIRIFLAGS`.
+        if let Ok(a) = env::var("MIRIFLAGS") {
+            let args = flagsplit(&a);
+            cmd.args(args);
+        }
+        // Set the current seed.
+        if let Some(seed) = seed {
+            eprintln!("Trying seed: {seed}");
+            cmd.arg(format!("-Zmiri-seed={seed}"));
         }
-    }
-    // Respect `MIRIFLAGS`.
-    if let Ok(a) = env::var("MIRIFLAGS") {
-        let args = flagsplit(&a);
-        cmd.args(args);
-    }
-
-    // Then pass binary arguments.
-    cmd.arg("--");
-    cmd.args(binary_args);
-
-    // Make sure we use the build-time working directory for interpreting Miri/rustc arguments.
-    // But then we need to switch to the run-time one, which we instruct Miri to do by setting `MIRI_CWD`.
-    cmd.current_dir(info.current_dir);
-    cmd.env("MIRI_CWD", env::current_dir().unwrap());
 
-    // Run it.
-    debug_cmd("[cargo-miri runner]", verbose, &cmd);
-    match phase {
-        RunnerPhase::Rustdoc => exec_with_pipe(cmd, &info.stdin, format!("{binary}.stdin")),
-        RunnerPhase::Cargo => exec(cmd),
-    }
+        // Then pass binary arguments.
+        cmd.arg("--");
+        cmd.args(&binary_args);
+
+        // Make sure we use the build-time working directory for interpreting Miri/rustc arguments.
+        // But then we need to switch to the run-time one, which we instruct Miri to do by setting `MIRI_CWD`.
+        cmd.current_dir(&info.current_dir);
+        cmd.env("MIRI_CWD", env::current_dir().unwrap());
+
+        // Run it.
+        debug_cmd("[cargo-miri runner]", verbose, &cmd);
+
+        match phase {
+            RunnerPhase::Rustdoc => {
+                cmd.stdin(std::process::Stdio::piped());
+                let mut child = cmd.spawn().expect("failed to spawn process");
+                let child_stdin = child.stdin.take().unwrap();
+                // Write stdin in a background thread, as it may block.
+                let exit_status = thread::scope(|s| {
+                    s.spawn(|| {
+                        let mut child_stdin = child_stdin;
+                        // Ignore failure, it is most likely due to the process having terminated.
+                        let _ = child_stdin.write_all(&info.stdin);
+                    });
+                    child.wait().expect("failed to run command")
+                });
+                if !exit_status.success() {
+                    std::process::exit(exit_status.code().unwrap_or(-1));
+                }
+            }
+            RunnerPhase::Cargo => {
+                let exit_status = cmd.status().expect("failed to run command");
+                if !exit_status.success() {
+                    std::process::exit(exit_status.code().unwrap_or(-1));
+                }
+            }
+        }
+    });
 }
 
 pub fn phase_rustdoc(mut args: impl Iterator<Item = String>) {
-    let verbose = std::env::var("MIRI_VERBOSE")
+    let verbose = env::var("MIRI_VERBOSE")
         .map_or(0, |verbose| verbose.parse().expect("verbosity flag must be an integer"));
 
     // phase_cargo_miri sets the RUSTDOC env var to ourselves, and puts a backup
@@ -681,7 +723,7 @@ pub fn phase_rustdoc(mut args: impl Iterator<Item = String>) {
     cmd.arg("--cfg").arg("miri");
 
     // Make rustdoc call us back.
-    let cargo_miri_path = std::env::current_exe().expect("current executable path invalid");
+    let cargo_miri_path = env::current_exe().expect("current executable path invalid");
     cmd.arg("--test-builder").arg(&cargo_miri_path); // invoked by forwarding most arguments
     cmd.arg("--runtool").arg(&cargo_miri_path); // invoked with just a single path argument
 
diff --git a/src/tools/miri/cargo-miri/src/util.rs b/src/tools/miri/cargo-miri/src/util.rs
index f36cff1f7981e..5f2794e2244f4 100644
--- a/src/tools/miri/cargo-miri/src/util.rs
+++ b/src/tools/miri/cargo-miri/src/util.rs
@@ -171,11 +171,16 @@ where
         drop(path); // We don't need the path, we can pipe the bytes directly
         cmd.stdin(std::process::Stdio::piped());
         let mut child = cmd.spawn().expect("failed to spawn process");
-        {
-            let stdin = child.stdin.as_mut().expect("failed to open stdin");
-            stdin.write_all(input).expect("failed to write out test source");
-        }
-        let exit_status = child.wait().expect("failed to run command");
+        let child_stdin = child.stdin.take().unwrap();
+        // Write stdin in a background thread, as it may block.
+        let exit_status = std::thread::scope(|s| {
+            s.spawn(|| {
+                let mut child_stdin = child_stdin;
+                // Ignore failure, it is most likely due to the process having terminated.
+                let _ = child_stdin.write_all(input);
+            });
+            child.wait().expect("failed to run command")
+        });
         std::process::exit(exit_status.code().unwrap_or(-1))
     }
 }
@@ -317,3 +322,24 @@ pub fn clean_target_dir(meta: &Metadata) {
 
     remove_dir_all_idem(&target_dir).unwrap_or_else(|err| show_error!("{}", err))
 }
+
+/// Run `f` according to the many-seeds argument. In single-seed mode, `f` will only
+/// be called once, with `None`.
+pub fn run_many_seeds(many_seeds: Option<String>, f: impl Fn(Option<u32>)) {
+    let Some(many_seeds) = many_seeds else {
+        return f(None);
+    };
+    let (from, to) = many_seeds
+        .split_once("..")
+        .unwrap_or_else(|| show_error!("invalid format for `--many-seeds`: expected `from..to`"));
+    let from: u32 = if from.is_empty() {
+        0
+    } else {
+        from.parse().unwrap_or_else(|_| show_error!("invalid `from` in `--many-seeds=from..to"))
+    };
+    let to: u32 =
+        to.parse().unwrap_or_else(|_| show_error!("invalid `to` in `--many-seeds=from..to"));
+    for seed in from..to {
+        f(Some(seed));
+    }
+}
diff --git a/src/tools/miri/miri-script/src/main.rs b/src/tools/miri/miri-script/src/main.rs
index d436ef0c5aaa3..f4448146c55c5 100644
--- a/src/tools/miri/miri-script/src/main.rs
+++ b/src/tools/miri/miri-script/src/main.rs
@@ -182,15 +182,14 @@ fn main() -> Result<()> {
                     verbose = true;
                 } else if let Some(val) = args.get_long_opt_with_default("many-seeds", "0..256")? {
                     let (from, to) = val.split_once("..").ok_or_else(|| {
-                        anyhow!("invalid format for `--many-seeds-range`: expected `from..to`")
+                        anyhow!("invalid format for `--many-seeds`: expected `from..to`")
                     })?;
                     let from: u32 = if from.is_empty() {
                         0
                     } else {
-                        from.parse().context("invalid `from` in `--many-seeds-range=from..to")?
+                        from.parse().context("invalid `from` in `--many-seeds=from..to")?
                     };
-                    let to: u32 =
-                        to.parse().context("invalid `to` in `--many-seeds-range=from..to")?;
+                    let to: u32 = to.parse().context("invalid `to` in `--many-seeds=from..to")?;
                     many_seeds = Some(from..to);
                 } else if let Some(val) = args.get_long_opt("target")? {
                     target = Some(val);

From cfcea21074f922aa2fd184751842513e574b2e37 Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Thu, 13 Jun 2024 20:02:02 +0200
Subject: [PATCH 2/2] document --many-seeds; set the default range to 0..64

---
 src/tools/miri/README.md                | 32 ++++++++++++-------------
 src/tools/miri/cargo-miri/src/phases.rs |  4 +++-
 src/tools/miri/miri-script/src/main.rs  |  4 ++--
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/tools/miri/README.md b/src/tools/miri/README.md
index c437619a76eab..4b4f2f83062f0 100644
--- a/src/tools/miri/README.md
+++ b/src/tools/miri/README.md
@@ -151,6 +151,21 @@ platform. For example `cargo miri test --target s390x-unknown-linux-gnu`
 will run your test suite on a big-endian target, which is useful for testing
 endian-sensitive code.
 
+### Testing multiple different executions
+
+Certain parts of the execution are picked randomly by Miri, such as the exact base address
+allocations are stored at and the interleaving of concurrently executing threads. Sometimes, it can
+be useful to explore multiple different execution, e.g. to make sure that your code does not depend
+on incidental "super-alignment" of new allocations and to test different thread interleavings.
+This can be done with the `--many-seeds` flag:
+
+```
+cargo miri test --many-seeds # tries the seeds in 0..64
+cargo miri test --many-seeds=0..16
+```
+
+The default of 64 different seeds is quite slow, so you probably want to specify a smaller range.
+
 ### Running Miri on CI
 
 When running Miri on CI, use the following snippet to install a nightly toolchain with the Miri
@@ -183,23 +198,6 @@ Here is an example job for GitHub Actions:
 The explicit `cargo miri setup` helps to keep the output of the actual test step
 clean.
 
-### Testing for alignment issues
-
-Miri can sometimes miss misaligned accesses since allocations can "happen to be"
-aligned just right. You can use `-Zmiri-symbolic-alignment-check` to definitely
-catch all such issues, but that flag will also cause false positives when code
-does manual pointer arithmetic to account for alignment. Another alternative is
-to call Miri with various values for `-Zmiri-seed`; that will alter the
-randomness that is used to determine allocation base addresses. The following
-snippet calls Miri in a loop with different values for the seed:
-
-```
-for SEED in $(seq 0 255); do
-  echo "Trying seed: $SEED"
-  MIRIFLAGS=-Zmiri-seed=$SEED cargo miri test || { echo "Failing seed: $SEED"; break; };
-done
-```
-
 ### Supported targets
 
 Miri does not support all targets supported by Rust. The good news, however, is
diff --git a/src/tools/miri/cargo-miri/src/phases.rs b/src/tools/miri/cargo-miri/src/phases.rs
index 6773cff89abf8..3c36f606d8452 100644
--- a/src/tools/miri/cargo-miri/src/phases.rs
+++ b/src/tools/miri/cargo-miri/src/phases.rs
@@ -34,6 +34,8 @@ Examples:
 
 ";
 
+const DEFAULT_MANY_SEEDS: &str = "0..64";
+
 fn show_help() {
     println!("{CARGO_MIRI_HELP}");
 }
@@ -171,7 +173,7 @@ pub fn phase_cargo_miri(mut args: impl Iterator<Item = String>) {
         ArgSplitFlagValue::from_string_iter(&mut args, "--target-dir").filter_map(Result::err)
     {
         if arg == "--many-seeds" {
-            many_seeds = Some(format!("0..256"));
+            many_seeds = Some(DEFAULT_MANY_SEEDS.to_owned());
         } else if let Some(val) = arg.strip_prefix("--many-seeds=") {
             many_seeds = Some(val.to_owned());
         } else {
diff --git a/src/tools/miri/miri-script/src/main.rs b/src/tools/miri/miri-script/src/main.rs
index f4448146c55c5..c4f0d808d93e1 100644
--- a/src/tools/miri/miri-script/src/main.rs
+++ b/src/tools/miri/miri-script/src/main.rs
@@ -98,7 +98,7 @@ Build miri, set up a sysroot and then run the test suite.
 Build miri, set up a sysroot and then run the driver with the given <flags>.
 (Also respects MIRIFLAGS environment variable.)
 If `--many-seeds` is present, Miri is run many times in parallel with different seeds.
-The range defaults to `0..256`.
+The range defaults to `0..64`.
 
 ./miri fmt <flags>:
 Format all sources and tests. <flags> are passed to `rustfmt`.
@@ -180,7 +180,7 @@ fn main() -> Result<()> {
                     dep = true;
                 } else if args.get_long_flag("verbose")? || args.get_short_flag('v')? {
                     verbose = true;
-                } else if let Some(val) = args.get_long_opt_with_default("many-seeds", "0..256")? {
+                } else if let Some(val) = args.get_long_opt_with_default("many-seeds", "0..64")? {
                     let (from, to) = val.split_once("..").ok_or_else(|| {
                         anyhow!("invalid format for `--many-seeds`: expected `from..to`")
                     })?;