Skip to content

Commit

Permalink
Try to hit line lengths limits on windows in testing (#53)
Browse files Browse the repository at this point in the history
* Try to hit line lengths limits on windows in testing

* Initial basic support for splitting arguments

* Fix windows build

* Fallback to `@-file` when invoking lld
  • Loading branch information
alexcrichton authored Dec 2, 2024
1 parent e00ddf3 commit a9f0a7b
Show file tree
Hide file tree
Showing 5 changed files with 364 additions and 71 deletions.
9 changes: 9 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,10 @@ wat = "1.219.0"
wit-component = "0.219.0"
wit-parser = "0.219.0"
wasi-preview1-component-adapter-provider = "24.0.0"

[target.'cfg(unix)'.dependencies]
libc = "0.2"

[target.'cfg(windows)'.dependencies]
winsplit = "0.1"
windows-sys = { version = "0.59", features = ['Win32_Foundation'] }
136 changes: 136 additions & 0 deletions src/argfile.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
use anyhow::{Context, Result};
use std::ffi::{OsStr, OsString};

pub fn expand() -> Result<Vec<OsString>> {
let mut expander = Expander::default();
for arg in std::env::args_os() {
expander.push(arg)?;
}
Ok(expander.args)
}

#[derive(Default)]
struct Expander {
args: Vec<OsString>,
}

impl Expander {
fn push(&mut self, arg: OsString) -> Result<()> {
let bytes = arg.as_encoded_bytes();
match bytes.split_first() {
Some((b'@', rest)) => {
self.push_file(unsafe { OsStr::from_encoded_bytes_unchecked(rest) })
}
_ => {
self.args.push(arg);
Ok(())
}
}
}

fn push_file(&mut self, file: &OsStr) -> Result<()> {
let contents =
std::fs::read_to_string(file).with_context(|| format!("failed to read {file:?}"))?;

for part in imp::split(&contents) {
self.push(part.into())?;
}
Ok(())
}
}

#[cfg(not(windows))]
use gnu as imp;
#[cfg(not(windows))]
mod gnu {
pub fn split(s: &str) -> impl Iterator<Item = String> + '_ {
Split { iter: s.chars() }
}

struct Split<'a> {
iter: std::str::Chars<'a>,
}

impl<'a> Iterator for Split<'a> {
type Item = String;

fn next(&mut self) -> Option<String> {
loop {
match self.iter.next()? {
c if c.is_whitespace() => {}
'"' => break Some(self.quoted('"')),
'\'' => break Some(self.quoted('\'')),
c => {
let mut ret = String::new();
self.push(&mut ret, c);
while let Some(next) = self.iter.next() {
if next.is_whitespace() {
break;
}
self.push(&mut ret, next);
}
break Some(ret);
}
}
}
}
}

impl Split<'_> {
fn quoted(&mut self, end: char) -> String {
let mut part = String::new();
while let Some(next) = self.iter.next() {
if next == end {
break;
}
self.push(&mut part, next);
}
part
}

fn push(&mut self, dst: &mut String, ch: char) {
if ch == '\\' {
if let Some(ch) = self.iter.next() {
dst.push(ch);
return;
}
}
dst.push(ch);
}
}

#[test]
fn tests() {
assert_eq!(split("x").collect::<Vec<_>>(), ["x"]);
assert_eq!(split("\\x").collect::<Vec<_>>(), ["x"]);
assert_eq!(split("'x'").collect::<Vec<_>>(), ["x"]);
assert_eq!(split("\"x\"").collect::<Vec<_>>(), ["x"]);

assert_eq!(split("x y").collect::<Vec<_>>(), ["x", "y"]);
assert_eq!(split("x\ny").collect::<Vec<_>>(), ["x", "y"]);
assert_eq!(split("\\x y").collect::<Vec<_>>(), ["x", "y"]);
assert_eq!(split("'x y'").collect::<Vec<_>>(), ["x y"]);
assert_eq!(split("\"x y\"").collect::<Vec<_>>(), ["x y"]);
assert_eq!(split("\"x 'y'\"\n'y'").collect::<Vec<_>>(), ["x 'y'", "y"]);
assert_eq!(
split(
r#"
a\ \\b
z
"x y \\z"
"#
)
.collect::<Vec<_>>(),
["a \\b", "z", "x y \\z"]
);
}
}

#[cfg(windows)]
use windows as imp;
#[cfg(windows)]
mod windows {
pub fn split(s: &str) -> impl Iterator<Item = String> {
winsplit::split(s).into_iter()
}
}
150 changes: 129 additions & 21 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@ use lexopt::Arg;
use std::env;
use std::ffi::OsString;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::process::{Command, ExitStatus};
use std::str::FromStr;
use wasmparser::Payload;
use wit_component::StringEncoding;
use wit_parser::{Resolve, WorldId};

mod argfile;

/// Representation of a flag passed to `wasm-ld`
///
/// Note that the parsing of flags in `wasm-ld` is not as uniform as parsing
Expand Down Expand Up @@ -391,7 +393,7 @@ impl App {
/// in fact `lexopt` is used to filter out `wasm-ld` arguments and `clap`
/// only parses arguments specific to `wasm-component-ld`.
fn parse() -> Result<App> {
let mut args = env::args_os().collect::<Vec<_>>();
let mut args = argfile::expand().context("failed to expand @-response files")?;

// First remove `-flavor wasm` in case this is invoked as a generic LLD
// driver. We can safely ignore that going forward.
Expand Down Expand Up @@ -526,8 +528,7 @@ impl App {
}

fn run(&mut self) -> Result<()> {
let mut cmd = self.lld();
let linker = cmd.get_program().to_owned();
let mut lld = self.lld();

// If a temporary output is needed make sure it has the same file name
// as the output of our command itself since LLD will embed this file
Expand All @@ -549,16 +550,14 @@ impl App {
// temporary location for wit-component to read and then the real output
// is created after wit-component runs.
if self.skip_wit_component() {
cmd.arg("-o").arg(&self.component.output);
lld.output(&self.component.output);
} else {
cmd.arg("-o").arg(&temp_output);
lld.output(&temp_output);
}

if self.component.verbose {
eprintln!("running LLD: {cmd:?}");
}
let status = cmd
.status()
let linker = &lld.exe;
let status = lld
.status(&temp_dir, &self.lld_args)
.with_context(|| format!("failed to spawn {linker:?}"))?;
if !status.success() {
bail!("failed to invoke LLD: {status}");
Expand Down Expand Up @@ -676,38 +675,147 @@ impl App {
|| self.shared
}

fn lld(&self) -> Command {
fn lld(&self) -> Lld {
let mut lld = self.find_lld();
lld.args(&self.lld_args);
if self.component.verbose {
lld.arg("--verbose");
lld.verbose = true
}
lld
}

fn find_lld(&self) -> Command {
fn find_lld(&self) -> Lld {
if let Some(path) = &self.component.wasm_ld_path {
return Command::new(path);
return Lld::new(path);
}

// Search for the first of `wasm-ld` or `rust-lld` in `$PATH`
let wasm_ld = format!("wasm-ld{}", env::consts::EXE_SUFFIX);
let rust_lld = format!("rust-lld{}", env::consts::EXE_SUFFIX);
for entry in env::split_paths(&env::var_os("PATH").unwrap_or_default()) {
if entry.join(&wasm_ld).is_file() {
return Command::new(wasm_ld);
return Lld::new(wasm_ld);
}
if entry.join(&rust_lld).is_file() {
let mut ret = Command::new(rust_lld);
ret.arg("-flavor").arg("wasm");
return ret;
let mut lld = Lld::new(rust_lld);
lld.needs_flavor = true;
return lld;
}
}

// Fall back to `wasm-ld` if the search failed to get an error message
// that indicates that `wasm-ld` was attempted to be found but couldn't
// be found.
Command::new("wasm-ld")
Lld::new("wasm-ld")
}
}

/// Helper structure representing an `lld` invocation.
struct Lld {
exe: PathBuf,
needs_flavor: bool,
verbose: bool,
output: Option<PathBuf>,
}

impl Lld {
fn new(exe: impl Into<PathBuf>) -> Lld {
Lld {
exe: exe.into(),
needs_flavor: false,
verbose: false,
output: None,
}
}

fn output(&mut self, dst: impl Into<PathBuf>) {
self.output = Some(dst.into());
}

fn status(&self, tmpdir: &tempfile::TempDir, args: &[OsString]) -> Result<ExitStatus> {
// If we can probably pass `args` natively, try to do so. In some cases
// though just skip this entirely and go straight to below.
if !self.probably_too_big(args) {
match self.run(args) {
// If this subprocess failed to spawn because the arguments
// were too large, fall through to below.
Err(ref e) if self.command_line_too_big(e) => {
if self.verbose {
eprintln!("command line was too large, trying again...");
}
}
other => return Ok(other?),
}
} else if self.verbose {
eprintln!("arguments probably too large {args:?}");
}

// The `args` are too big to be passed via the command line itself so
// encode the mall using "posix quoting" into an "argfile". This gets
// passed as `@foo` to lld and we also pass `--rsp-quoting=posix` to
// ensure that LLD always uses posix quoting. That means that we don't
// have to implement the dual nature of both posix and windows encoding
// here.
let mut argfile = Vec::new();
for arg in args {
for byte in arg.as_encoded_bytes() {
if *byte == b'\\' || *byte == b' ' {
argfile.push(b'\\');
}
argfile.push(*byte);
}
argfile.push(b'\n');
}
let path = tmpdir.path().join("argfile_tmp");
std::fs::write(&path, &argfile).with_context(|| format!("failed to write {path:?}"))?;
let mut argfile_arg = OsString::from("@");
argfile_arg.push(&path);
let status = self.run(&["--rsp-quoting=posix".into(), argfile_arg.into()])?;
Ok(status)
}

/// Tests whether the `args` array is too large to execute natively.
///
/// Windows `cmd.exe` has a very small limit of around 8k so perform a
/// guess up to 6k. This isn't 100% accurate.
fn probably_too_big(&self, args: &[OsString]) -> bool {
let args_size = args
.iter()
.map(|s| s.as_encoded_bytes().len())
.sum::<usize>();
cfg!(windows) && args_size > 6 * 1024
}

/// Test if the OS failed to spawn a process because the arguments were too
/// long.
fn command_line_too_big(&self, err: &std::io::Error) -> bool {
#[cfg(unix)]
return err.raw_os_error() == Some(libc::E2BIG);
#[cfg(windows)]
return err.raw_os_error()
== Some(windows_sys::Win32::Foundation::ERROR_FILENAME_EXCED_RANGE as i32);
#[cfg(not(any(unix, windows)))]
{
let _ = err;
return false;
}
}

fn run(&self, args: &[OsString]) -> std::io::Result<ExitStatus> {
let mut cmd = Command::new(&self.exe);
if self.needs_flavor {
cmd.arg("-flavor").arg("wasm");
}
cmd.args(args);
if self.verbose {
cmd.arg("--verbose");
}
if let Some(output) = &self.output {
cmd.arg("-o").arg(output);
}
if self.verbose {
eprintln!("running {cmd:?}");
}
cmd.status()
}
}

Expand Down
Loading

0 comments on commit a9f0a7b

Please sign in to comment.