diff --git a/Cargo.lock b/Cargo.lock index 885e73d3e..6029cc1a0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -136,16 +136,6 @@ version = "1.0.7" source = "registry+/~https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" -[[package]] -name = "gethostname" -version = "0.4.3" -source = "registry+/~https://github.com/rust-lang/crates.io-index" -checksum = "0176e0459c2e4a1fe232f984bca6890e681076abb9934f6cea7c326f3fc47818" -dependencies = [ - "libc", - "windows-targets", -] - [[package]] name = "glob" version = "0.3.1" @@ -216,10 +206,10 @@ version = "0.1.7" dependencies = [ "base64", "bstr", - "gethostname", "grep-matcher", "grep-regex", "grep-searcher", + "log", "serde", "serde_json", "termcolor", @@ -621,60 +611,3 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+/~https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-targets" -version = "0.48.0" -source = "registry+/~https://github.com/rust-lang/crates.io-index" -checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.0" -source = "registry+/~https://github.com/rust-lang/crates.io-index" -checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.0" -source = "registry+/~https://github.com/rust-lang/crates.io-index" -checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" - -[[package]] -name = "windows_i686_gnu" -version = "0.48.0" -source = "registry+/~https://github.com/rust-lang/crates.io-index" -checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.0" -source = "registry+/~https://github.com/rust-lang/crates.io-index" -checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.0" -source = "registry+/~https://github.com/rust-lang/crates.io-index" -checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.0" -source = "registry+/~https://github.com/rust-lang/crates.io-index" -checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.0" -source = "registry+/~https://github.com/rust-lang/crates.io-index" -checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" diff --git a/complete/_rg b/complete/_rg index be8d18bac..7fd6c542b 100644 --- a/complete/_rg +++ b/complete/_rg @@ -305,6 +305,7 @@ _rg() { '--debug[show debug messages]' '--field-context-separator[set string to delimit fields in context lines]' '--field-match-separator[set string to delimit fields in matching lines]' + '--hostname-bin=[executable for getting system hostname]:hostname executable:_command_names -e' '--hyperlink-format=[specify pattern for hyperlinks]:pattern' '--trace[show more verbose debug messages]' '--dfa-size-limit=[specify upper size limit of generated DFA]:DFA size (bytes)' diff --git a/crates/core/app.rs b/crates/core/app.rs index 9c5234796..d0dfc8d37 100644 --- a/crates/core/app.rs +++ b/crates/core/app.rs @@ -580,6 +580,7 @@ pub fn all_args_and_flags() -> Vec { flag_glob_case_insensitive(&mut args); flag_heading(&mut args); flag_hidden(&mut args); + flag_hostname_bin(&mut args); flag_hyperlink_format(&mut args); flag_iglob(&mut args); flag_ignore_case(&mut args); @@ -1495,19 +1496,93 @@ This flag can be disabled with --no-hidden. args.push(arg); } +fn flag_hostname_bin(args: &mut Vec) { + const SHORT: &str = "Run a program to get this system's hostname."; + const LONG: &str = long!( + "\ +This flag controls how ripgrep determines this system's hostname. The flag's +value should correspond to an executable (either a path or something that can +be found via your system's *PATH* environment variable). When set, ripgrep will +run this executable, with no arguments, and treat its output (with leading and +trailing whitespace stripped) as your system's hostname. + +When not set (the default, or the empty string), ripgrep will try to +automatically detect your system's hostname. On Unix, this corresponds +to calling *gethostname*. On Windows, this corresponds to calling +*GetComputerNameExW* to fetch the system's \"physical DNS hostname.\" + +ripgrep uses your system's hostname for producing hyperlinks. +" + ); + let arg = + RGArg::flag("hostname-bin", "COMMAND").help(SHORT).long_help(LONG); + args.push(arg); +} + fn flag_hyperlink_format(args: &mut Vec) { const SHORT: &str = "Set the format of hyperlinks to match results."; const LONG: &str = long!( "\ -Set the format of hyperlinks to match results. This defines a pattern which -can contain the following placeholders: {file}, {line}, {column}, and {host}. -An empty pattern or 'none' disables hyperlinks. +Set the format of hyperlinks to match results. Hyperlinks make certain elements +of ripgrep's output, such as file paths, clickable. This generally only works +in terminal emulators that support OSC-8 hyperlinks. For example, the format +*file://{host}{file}* will emit an RFC 8089 hyperlink. + +The following variables are available in the format string: + +*{path}*: Required. This is replaced with a path to a matching file. The +path is guaranteed to be absolute and percent encoded such that it is valid to +put into a URI. Note that a path is guaranteed to start with a */*. + +*{host}*: Optional. This is replaced with your system's hostname. On Unix, +this corresponds to calling *gethostname*. On Windows, this corresponds to +calling *GetComputerNameExW* to fetch the system's \"physical DNS hostname.\" +Alternatively, if --hostname-bin was provided, then the hostname returned from +the output of that program will be returned. If no hostname could be found, +then this variable is replaced with the empty string. + +*{line}*: Optional. If appropriate, this is replaced with the line number of +a match. If no line number is available (for example, if --no-line-number was +given), then it is automatically replaced with the value *1*. + +*{column}*: Optional, but requires the presence of **{line}**. If appropriate, +this is replaced with the column number of a match. If no column number is +available (for example, if --no-column was given), then it is automatically +replaced with the value *1*. + +*{wslprefix}*: Optional. This is a special value that is set to +*wsl$/WSL_DISTRO_NAME*, where *WSL_DISTRO_NAME* corresponds to the value of +the equivalent environment variable. If the system is not Unix or if the +*WSL_DISTRO_NAME* environment variable is not set, then this is replaced with +the empty string. + +Alternatively, a format string may correspond to one of the following +aliases: default, file, grep+, kitty, macvim, none, subl, textmate, vscode, +vscode-insiders, vscodium. + +A format string may be empty. An empty format string is equivalent to the +*none* alias. In this case, hyperlinks will be disabled. + +At present, the default format when ripgrep detects a tty on stdout all systems +is *default*. This is an alias that expands to *file://{host}{path}* on Unix +and *file://{path}* on Windows. When stdout is not a tty, then the default +format behaves as if it were *none*. That is, hyperlinks are disabled. + +Note that hyperlinks are only written when colors are enabled. To write +hyperlinks without colors, you'll need to configure ripgrep to not colorize +anything without actually disabling all ANSI escape codes completely: + + --colors 'path:none' --colors 'line:none' --colors 'column:none' --colors 'match:none' -The {file} placeholder is required, and will be replaced with the absolute -file path with a few adjustments: The leading '/' on Unix is removed, -and '\\' is replaced with '/' on Windows. +ripgrep works this way because it treats the *--color=(never|always|auto)* flag +as a proxy for whether ANSI escape codes should be used at all. This means +that environment variables like *NO_COLOR=1* and *TERM=dumb* not only disable +colors, but hyperlinks as well. Similarly, colors and hyperlinks are disabled +when ripgrep is not writing to a tty. (Unless one forces the issue by setting +*--color=always*.) -As an example, the default pattern on Unix systems is: 'file://{host}/{file}' +For more information on hyperlinks in terminal emulators, see: +https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda " ); let arg = diff --git a/crates/core/args.rs b/crates/core/args.rs index 0f8d1f18c..75029a050 100644 --- a/crates/core/args.rs +++ b/crates/core/args.rs @@ -18,9 +18,9 @@ use grep::pcre2::{ RegexMatcherBuilder as PCRE2RegexMatcherBuilder, }; use grep::printer::{ - default_color_specs, ColorSpecs, HyperlinkPattern, JSONBuilder, - PathPrinter, PathPrinterBuilder, Standard, StandardBuilder, Stats, - Summary, SummaryBuilder, SummaryKind, JSON, + default_color_specs, ColorSpecs, HyperlinkConfig, HyperlinkEnvironment, + HyperlinkFormat, JSONBuilder, PathPrinter, PathPrinterBuilder, Standard, + StandardBuilder, Stats, Summary, SummaryBuilder, SummaryKind, JSON, }; use grep::regex::{ RegexMatcher as RustRegexMatcher, @@ -236,7 +236,7 @@ impl Args { let mut builder = PathPrinterBuilder::new(); builder .color_specs(self.matches().color_specs()?) - .hyperlink_pattern(self.matches().hyperlink_pattern()?) + .hyperlink(self.matches().hyperlink_config()?) .separator(self.matches().path_separator()?) .terminator(self.matches().path_terminator().unwrap_or(b'\n')); Ok(builder.build(wtr)) @@ -774,7 +774,7 @@ impl ArgMatches { let mut builder = StandardBuilder::new(); builder .color_specs(self.color_specs()?) - .hyperlink_pattern(self.hyperlink_pattern()?) + .hyperlink(self.hyperlink_config()?) .stats(self.stats()) .heading(self.heading()) .path(self.with_filename(paths)) @@ -814,7 +814,7 @@ impl ArgMatches { builder .kind(self.summary_kind().expect("summary format")) .color_specs(self.color_specs()?) - .hyperlink_pattern(self.hyperlink_pattern()?) + .hyperlink(self.hyperlink_config()?) .stats(self.stats()) .path(self.with_filename(paths)) .max_matches(self.max_count()?) @@ -1126,11 +1126,21 @@ impl ArgMatches { /// for the current system is used if the value is not set. /// /// If an invalid pattern is provided, then an error is returned. - fn hyperlink_pattern(&self) -> Result { - Ok(match self.value_of_lossy("hyperlink-format") { - Some(pattern) => HyperlinkPattern::from_str(&pattern)?, - None => HyperlinkPattern::default_file_scheme(), - }) + fn hyperlink_config(&self) -> Result { + let mut env = HyperlinkEnvironment::new(); + env.host(hostname(self.value_of_os("hostname-bin"))) + .wsl_prefix(wsl_prefix()); + let fmt = match self.value_of_lossy("hyperlink-format") { + None => HyperlinkFormat::from_str("default").unwrap(), + Some(format) => match HyperlinkFormat::from_str(&format) { + Ok(format) => format, + Err(err) => { + let msg = format!("invalid hyperlink format: {err}"); + return Err(msg.into()); + } + }, + }; + Ok(HyperlinkConfig::new(env, fmt)) } /// Returns true if ignore files should be processed case insensitively. @@ -1838,6 +1848,107 @@ fn current_dir() -> Result { .into()) } +/// Retrieves the hostname that ripgrep should use wherever a hostname is +/// required. Currently, that's just in the hyperlink format. +/// +/// This works by first running the given binary program (if present and with +/// no arguments) to get the hostname after trimming leading and trailing +/// whitespace. If that fails for any reason, then it falls back to getting +/// the hostname via platform specific means (e.g., `gethostname` on Unix). +/// +/// The purpose of `bin` is to make it possible for end users to override how +/// ripgrep determines the hostname. +fn hostname(bin: Option<&OsStr>) -> Option { + let Some(bin) = bin else { return platform_hostname() }; + let bin = match grep::cli::resolve_binary(bin) { + Ok(bin) => bin, + Err(err) => { + log::debug!( + "failed to run command '{bin:?}' to get hostname \ + (falling back to platform hostname): {err}", + ); + return platform_hostname(); + } + }; + let mut cmd = process::Command::new(&bin); + cmd.stdin(process::Stdio::null()); + let rdr = match grep::cli::CommandReader::new(&mut cmd) { + Ok(rdr) => rdr, + Err(err) => { + log::debug!( + "failed to spawn command '{bin:?}' to get \ + hostname (falling back to platform hostname): {err}", + ); + return platform_hostname(); + } + }; + let out = match io::read_to_string(rdr) { + Ok(out) => out, + Err(err) => { + log::debug!( + "failed to read output from command '{bin:?}' to get \ + hostname (falling back to platform hostname): {err}", + ); + return platform_hostname(); + } + }; + let hostname = out.trim(); + if hostname.is_empty() { + log::debug!( + "output from command '{bin:?}' is empty after trimming \ + leading and trailing whitespace (falling back to \ + platform hostname)", + ); + return platform_hostname(); + } + Some(hostname.to_string()) +} + +/// Attempts to get the hostname by using platform specific routines. For +/// example, this will do `gethostname` on Unix and `GetComputerNameExW` on +/// Windows. +fn platform_hostname() -> Option { + let hostname_os = match grep::cli::hostname() { + Ok(x) => x, + Err(err) => { + log::debug!("could not get hostname: {}", err); + return None; + } + }; + let Some(hostname) = hostname_os.to_str() else { + log::debug!( + "got hostname {:?}, but it's not valid UTF-8", + hostname_os + ); + return None; + }; + Some(hostname.to_string()) +} + +/// Returns a value that is meant to fill in the `{wslprefix}` variable for +/// a user given hyperlink format. A WSL prefix is a share/network like thing +/// that is meant to permit Windows applications to open files stored within +/// a WSL drive. +/// +/// If a WSL distro name is unavailable, not valid UTF-8 or this isn't running +/// in a Unix environment, then this returns None. +/// +/// See: +fn wsl_prefix() -> Option { + if !cfg!(unix) { + return None; + } + let distro_os = env::var_os("WSL_DISTRO_NAME")?; + let Some(distro) = distro_os.to_str() else { + log::debug!( + "found WSL_DISTRO_NAME={:?}, but value is not UTF-8", + distro_os + ); + return None; + }; + Some(format!("wsl$/{distro}")) +} + /// Tries to assign a timestamp to every `Subject` in the vector to help with /// sorting Subjects by time. fn load_timestamps( diff --git a/crates/printer/Cargo.toml b/crates/printer/Cargo.toml index 69e03d651..dc63a6cc7 100644 --- a/crates/printer/Cargo.toml +++ b/crates/printer/Cargo.toml @@ -21,9 +21,9 @@ serde = ["dep:base64", "dep:serde", "dep:serde_json"] [dependencies] base64 = { version = "0.21.4", optional = true } bstr = "1.6.2" -gethostname = "0.4.3" grep-matcher = { version = "0.1.6", path = "../matcher" } grep-searcher = { version = "0.1.11", path = "../searcher" } +log = "0.4.5" termcolor = "1.3.0" serde = { version = "1.0.188", optional = true, features = ["derive"] } serde_json = { version = "1.0.107", optional = true } diff --git a/crates/printer/src/hyperlink.rs b/crates/printer/src/hyperlink.rs index fa38b5c28..7e6be6e49 100644 --- a/crates/printer/src/hyperlink.rs +++ b/crates/printer/src/hyperlink.rs @@ -1,394 +1,710 @@ -use std::{ - io::{self, Write}, - path::Path, -}; +use std::{cell::RefCell, io, path::Path, sync::Arc}; use { bstr::ByteSlice, termcolor::{HyperlinkSpec, WriteColor}, }; -use crate::hyperlink_aliases::HYPERLINK_PATTERN_ALIASES; +use crate::hyperlink_aliases; -/// A builder for `HyperlinkPattern`. +/// Hyperlink configuration. /// -/// Once a `HyperlinkPattern` is built, it is immutable. -#[derive(Debug)] -pub struct HyperlinkPatternBuilder { - parts: Vec, +/// This configuration specifies both the [hyperlink format](HyperlinkFormat) +/// and an [environment](HyperlinkConfig) for interpolating a subset of +/// variables. The specific subset includes variables that are intended to +/// be invariant throughout the lifetime of a process, such as a machine's +/// hostname. +/// +/// A hyperlink configuration can be provided to printer builders such as +/// [`StandardBuilder::hyperlink`](crate::StandardBuilder::hyperlink). +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct HyperlinkConfig(Arc); + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +struct HyperlinkConfigInner { + env: HyperlinkEnvironment, + format: HyperlinkFormat, } -/// A hyperlink pattern with placeholders. +impl HyperlinkConfig { + /// Create a new configuration from an environment and a format. + pub fn new( + env: HyperlinkEnvironment, + format: HyperlinkFormat, + ) -> HyperlinkConfig { + HyperlinkConfig(Arc::new(HyperlinkConfigInner { env, format })) + } + + /// Returns the hyperlink environment in this configuration. + pub(crate) fn environment(&self) -> &HyperlinkEnvironment { + &self.0.env + } + + /// Returns the hyperlink format in this configuration. + pub(crate) fn format(&self) -> &HyperlinkFormat { + &self.0.format + } +} + +/// A hyperlink format with variables. +/// +/// This can be created by parsing a string using `HyperlinkPattern::from_str`. +/// +/// The default format is empty. An empty format is valid and effectively +/// disables hyperlinks. +/// +/// # Example +/// +/// ``` +/// use grep_printer::HyperlinkFormat; +/// +/// let fmt = "vscode".parse::()?; +/// assert_eq!(fmt.to_string(), "vscode://file{path}:{line}:{column}"); /// -/// This can be created with `HyperlinkPatternBuilder` or from a string -/// using `HyperlinkPattern::from_str`. +/// # Ok::<(), Box>(()) +/// ``` #[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct HyperlinkPattern { +pub struct HyperlinkFormat { parts: Vec, is_line_dependent: bool, } -/// A hyperlink pattern part. -#[derive(Clone, Debug, Eq, PartialEq)] -enum Part { - /// Static text. Can include invariant values such as the hostname. - Text(Vec), - /// Placeholder for the file path. - File, - /// Placeholder for the line number. - Line, - /// Placeholder for the column number. - Column, +impl HyperlinkFormat { + /// Creates an empty hyperlink format. + pub fn empty() -> HyperlinkFormat { + HyperlinkFormat::default() + } + + /// Returns true if this format is empty. + pub fn is_empty(&self) -> bool { + self.parts.is_empty() + } + + /// Creates a [`HyperlinkConfig`] from this format and the environment + /// given. + pub fn into_config(self, env: HyperlinkEnvironment) -> HyperlinkConfig { + HyperlinkConfig::new(env, self) + } + + /// Returns true if the format can produce line-dependent hyperlinks. + pub(crate) fn is_line_dependent(&self) -> bool { + self.is_line_dependent + } } -/// An error that can occur when parsing a hyperlink pattern. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum HyperlinkPatternError { - /// This occurs when the pattern syntax is not valid. - InvalidSyntax, - /// This occurs when the {file} placeholder is missing. - NoFilePlaceholder, - /// This occurs when the {line} placeholder is missing, - /// while the {column} placeholder is present. - NoLinePlaceholder, - /// This occurs when an unknown placeholder is used. - InvalidPlaceholder(String), - /// The pattern doesn't start with a valid scheme. - InvalidScheme, +impl std::str::FromStr for HyperlinkFormat { + type Err = HyperlinkFormatError; + + fn from_str(s: &str) -> Result { + use self::HyperlinkFormatErrorKind::*; + + #[derive(Debug)] + enum State { + Verbatim, + VerbatimCloseVariable, + OpenVariable, + InVariable, + } + + let mut builder = FormatBuilder::new(); + let input = match hyperlink_aliases::find(s) { + Some(format) => format, + None => s, + }; + let mut name = String::new(); + let mut state = State::Verbatim; + let err = |kind| HyperlinkFormatError { kind }; + for ch in input.chars() { + state = match state { + State::Verbatim => { + if ch == '{' { + State::OpenVariable + } else if ch == '}' { + State::VerbatimCloseVariable + } else { + builder.append_char(ch); + State::Verbatim + } + } + State::VerbatimCloseVariable => { + if ch == '}' { + builder.append_char('}'); + State::Verbatim + } else { + return Err(err(InvalidCloseVariable)); + } + } + State::OpenVariable => { + if ch == '{' { + builder.append_char('{'); + State::Verbatim + } else { + name.clear(); + if ch == '}' { + builder.append_var(&name)?; + State::Verbatim + } else { + name.push(ch); + State::InVariable + } + } + } + State::InVariable => { + if ch == '}' { + builder.append_var(&name)?; + State::Verbatim + } else { + name.push(ch); + State::InVariable + } + } + }; + } + match state { + State::Verbatim => builder.build(), + State::VerbatimCloseVariable => Err(err(InvalidCloseVariable)), + State::OpenVariable | State::InVariable => { + Err(err(UnclosedVariable)) + } + } + } } -/// The values to replace the pattern placeholders with. -#[derive(Clone, Debug)] -pub(crate) struct HyperlinkValues<'a> { - file: &'a HyperlinkPath, - line: u64, - column: u64, +impl std::fmt::Display for HyperlinkFormat { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + for part in self.parts.iter() { + part.fmt(f)?; + } + Ok(()) + } } -/// Represents the {file} part of a hyperlink. +/// A static environment for hyperlink interpolation. /// -/// This is the value to use as-is in the hyperlink, converted from an OS file -/// path. -#[derive(Clone, Debug)] -pub(crate) struct HyperlinkPath(Vec); +/// This environment permits setting the values of varibables used in hyperlink +/// interpolation that are not expected to change for the lifetime of a program. +/// That is, these values are invariant. +/// +/// Currently, this includes the hostname and a WSL distro prefix. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct HyperlinkEnvironment { + host: Option, + wsl_prefix: Option, +} -impl HyperlinkPatternBuilder { - /// Creates a new hyperlink pattern builder. - pub fn new() -> Self { - Self { parts: vec![] } +impl HyperlinkEnvironment { + /// Create a new empty hyperlink environment. + pub fn new() -> HyperlinkEnvironment { + HyperlinkEnvironment::default() } - /// Appends static text. - pub fn append_text(&mut self, text: &[u8]) -> &mut Self { - if let Some(Part::Text(contents)) = self.parts.last_mut() { - contents.extend_from_slice(text); - } else if !text.is_empty() { - self.parts.push(Part::Text(text.to_vec())); - } + /// Set the `{host}` variable, which fills in any hostname components of + /// a hyperlink. + /// + /// One can get the hostname in the current environment via the `hostname` + /// function in the `grep-cli` crate. + pub fn host(&mut self, host: Option) -> &mut HyperlinkEnvironment { + self.host = host; self } - /// Appends the hostname. - /// - /// On WSL, appends `wsl$/{distro}` instead. - pub fn append_hostname(&mut self) -> &mut Self { - self.append_text(Self::get_hostname().as_bytes()) + /// Set the `{wslprefix}` variable, which contains the WSL distro prefix. + /// An example value is `wsl$/Ubuntu`. The distro name can typically be + /// discovered from the `WSL_DISTRO_NAME` environment variable. + pub fn wsl_prefix( + &mut self, + wsl_prefix: Option, + ) -> &mut HyperlinkEnvironment { + self.wsl_prefix = wsl_prefix; + self } +} - /// Returns the hostname to use in the pattern. - /// - /// On WSL, returns `wsl$/{distro}`. - fn get_hostname() -> String { - if cfg!(unix) { - if let Ok(mut wsl_distro) = std::env::var("WSL_DISTRO_NAME") { - wsl_distro.insert_str(0, "wsl$/"); - return wsl_distro; +/// An error that can occur when parsing a hyperlink format. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct HyperlinkFormatError { + kind: HyperlinkFormatErrorKind, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +enum HyperlinkFormatErrorKind { + /// This occurs when there are zero variables in the format. + NoVariables, + /// This occurs when the {path} variable is missing. + NoPathVariable, + /// This occurs when the {line} variable is missing, while the {column} + /// variable is present. + NoLineVariable, + /// This occurs when an unknown variable is used. + InvalidVariable(String), + /// The format doesn't start with a valid scheme. + InvalidScheme, + /// This occurs when an unescaped `}` is found without a corresponding + /// `{` preceding it. + InvalidCloseVariable, + /// This occurs when a `{` is found without a corresponding `}` following + /// it. + UnclosedVariable, +} + +impl std::error::Error for HyperlinkFormatError {} + +impl std::fmt::Display for HyperlinkFormatError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use self::HyperlinkFormatErrorKind::*; + + match self.kind { + NoVariables => { + let aliases = hyperlink_aliases::iter() + .map(|(name, _)| name) + .collect::>() + .join(", "); + write!( + f, + "at least a {{path}} variable is required in a \ + hyperlink format, or otherwise use a valid alias: {}", + aliases, + ) + } + NoPathVariable => { + write!( + f, + "the {{path}} variable is required in a hyperlink format", + ) + } + NoLineVariable => { + write!( + f, + "the hyperlink format contains a {{column}} variable, \ + but no {{line}} variable is present", + ) + } + InvalidVariable(ref name) => { + write!( + f, + "invalid hyperlink format variable: '{name}', choose \ + from: path, line, column, host", + ) + } + InvalidScheme => { + write!( + f, + "the hyperlink format must start with a valid URL scheme, \ + i.e., [0-9A-Za-z+-.]+:", + ) + } + InvalidCloseVariable => { + write!( + f, + "unopened variable: found '}}' without a \ + corresponding '{{' preceding it", + ) + } + UnclosedVariable => { + write!( + f, + "unclosed variable: found '{{' without a \ + corresponding '}}' following it", + ) } } - - gethostname::gethostname().to_string_lossy().to_string() } +} - /// Appends a placeholder for the file path. - pub fn append_file(&mut self) -> &mut Self { - self.parts.push(Part::File); - self +/// A builder for `HyperlinkPattern`. +/// +/// Once a `HyperlinkPattern` is built, it is immutable. +#[derive(Debug)] +struct FormatBuilder { + parts: Vec, +} + +impl FormatBuilder { + /// Creates a new hyperlink format builder. + fn new() -> FormatBuilder { + FormatBuilder { parts: vec![] } } - /// Appends a placeholder for the line number. - pub fn append_line(&mut self) -> &mut Self { - self.parts.push(Part::Line); + /// Appends static text. + fn append_slice(&mut self, text: &[u8]) -> &mut FormatBuilder { + if let Some(Part::Text(contents)) = self.parts.last_mut() { + contents.extend_from_slice(text); + } else if !text.is_empty() { + self.parts.push(Part::Text(text.to_vec())); + } self } - /// Appends a placeholder for the column number. - pub fn append_column(&mut self) -> &mut Self { - self.parts.push(Part::Column); - self + /// Appends a single character. + fn append_char(&mut self, ch: char) -> &mut FormatBuilder { + self.append_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()) + } + + /// Appends a variable with the given name. If the name isn't recognized, + /// then this returns an error. + fn append_var( + &mut self, + name: &str, + ) -> Result<&mut FormatBuilder, HyperlinkFormatError> { + let part = match name { + "host" => Part::Host, + "wslprefix" => Part::WSLPrefix, + "path" => Part::Path, + "line" => Part::Line, + "column" => Part::Column, + unknown => { + let err = HyperlinkFormatError { + kind: HyperlinkFormatErrorKind::InvalidVariable( + unknown.to_string(), + ), + }; + return Err(err); + } + }; + self.parts.push(part); + Ok(self) } - /// Builds the pattern. - pub fn build(&self) -> Result { + /// Builds the format. + fn build(&self) -> Result { self.validate()?; - - Ok(HyperlinkPattern { + Ok(HyperlinkFormat { parts: self.parts.clone(), is_line_dependent: self.parts.contains(&Part::Line), }) } - /// Validate that the pattern is well-formed. - fn validate(&self) -> Result<(), HyperlinkPatternError> { + /// Validate that the format is well-formed. + fn validate(&self) -> Result<(), HyperlinkFormatError> { + use self::HyperlinkFormatErrorKind::*; + + let err = |kind| HyperlinkFormatError { kind }; + // An empty format is fine. It just means hyperlink support is + // disabled. if self.parts.is_empty() { return Ok(()); } - - if !self.parts.contains(&Part::File) { - return Err(HyperlinkPatternError::NoFilePlaceholder); + // If all parts are just text, then there are no variables. It's + // likely a reference to invalid alias. + if self.parts.iter().all(|p| matches!(*p, Part::Text(_))) { + return Err(err(NoVariables)); } - + // Even if we have other variables, no path variable means the + // hyperlink can't possibly work the way it is intended. + if !self.parts.contains(&Part::Path) { + return Err(err(NoPathVariable)); + } + // If the {column} variable is used, then we also need a {line} + // variable or else {column} can't possibly work. if self.parts.contains(&Part::Column) && !self.parts.contains(&Part::Line) { - return Err(HyperlinkPatternError::NoLinePlaceholder); + return Err(err(NoLineVariable)); } - self.validate_scheme() } - /// Validate that the pattern starts with a valid scheme. + /// Validate that the format starts with a valid scheme. Validation is done + /// according to how a scheme is defined in RFC 1738 sections 2.1[1] and + /// 5[2]. In short, a scheme is this: /// - /// A valid scheme starts with an alphabetic character, continues with - /// a sequence of alphanumeric characters, periods, hyphens or plus signs, - /// and ends with a colon. - fn validate_scheme(&self) -> Result<(), HyperlinkPatternError> { - if let Some(Part::Text(value)) = self.parts.first() { - if let Some(colon_index) = value.find_byte(b':') { - if value[0].is_ascii_alphabetic() - && value.iter().take(colon_index).all(|c| { - c.is_ascii_alphanumeric() - || matches!(c, b'.' | b'-' | b'+') - }) - { - return Ok(()); - } + /// scheme = 1*[ lowalpha | digit | "+" | "-" | "." ] + /// + /// but is case insensitive. + /// + /// [1]: https://datatracker.ietf.org/doc/html/rfc1738#section-2.1 + /// [2]: https://datatracker.ietf.org/doc/html/rfc1738#section-5 + fn validate_scheme(&self) -> Result<(), HyperlinkFormatError> { + let err_invalid_scheme = HyperlinkFormatError { + kind: HyperlinkFormatErrorKind::InvalidScheme, + }; + let Some(Part::Text(ref part)) = self.parts.first() else { + return Err(err_invalid_scheme); + }; + let Some(colon) = part.find_byte(b':') else { + return Err(err_invalid_scheme); + }; + let scheme = &part[..colon]; + if scheme.is_empty() { + return Err(err_invalid_scheme); + } + let is_valid_scheme_char = |byte| match byte { + b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'+' | b'-' | b'.' => { + true } + _ => false, + }; + if !scheme.iter().all(|&b| is_valid_scheme_char(b)) { + return Err(err_invalid_scheme); } - - Err(HyperlinkPatternError::InvalidScheme) + Ok(()) } } -impl HyperlinkPattern { - /// Creates an empty hyperlink pattern. - pub fn empty() -> Self { - HyperlinkPattern::default() - } - - /// Creates a default pattern suitable for Unix. +/// A hyperlink format part. +/// +/// A sequence of these corresponds to a complete format. (Not all sequences +/// are valid.) +#[derive(Clone, Debug, Eq, PartialEq)] +enum Part { + /// Static text. /// - /// The returned pattern is `file://{host}/{file}` - #[cfg(unix)] - pub fn default_file_scheme() -> Self { - HyperlinkPatternBuilder::new() - .append_text(b"file://") - .append_hostname() - .append_text(b"/") - .append_file() - .build() - .unwrap() + /// We use `Vec` here (and more generally treat a format string as a + /// sequence of bytes) because file paths may be arbitrary bytes. A rare + /// case, but one for which there is no good reason to choke on. + Text(Vec), + /// Variable for the hostname. + Host, + /// Variable for a WSL path prefix. + WSLPrefix, + /// Variable for the file path. + Path, + /// Variable for the line number. + Line, + /// Variable for the column number. + Column, +} + +impl Part { + /// Interpolate this part using the given `env` and `values`, and write + /// the result of interpolation to the buffer provided. + fn interpolate_to( + &self, + env: &HyperlinkEnvironment, + values: &Values, + dest: &mut Vec, + ) { + match self { + Part::Text(ref text) => dest.extend_from_slice(text), + Part::Host => dest.extend_from_slice( + env.host.as_ref().map(|s| s.as_bytes()).unwrap_or(b""), + ), + Part::WSLPrefix => dest.extend_from_slice( + env.wsl_prefix.as_ref().map(|s| s.as_bytes()).unwrap_or(b""), + ), + Part::Path => dest.extend_from_slice(&values.path.0), + Part::Line => { + let line = values.line.unwrap_or(1).to_string(); + dest.extend_from_slice(line.as_bytes()); + } + Part::Column => { + let column = values.column.unwrap_or(1).to_string(); + dest.extend_from_slice(column.as_bytes()); + } + } } +} - /// Creates a default pattern suitable for Windows. - /// - /// The returned pattern is `file:///{file}` - #[cfg(windows)] - pub fn default_file_scheme() -> Self { - HyperlinkPatternBuilder::new() - .append_text(b"file:///") - .append_file() - .build() - .unwrap() +impl std::fmt::Display for Part { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Part::Text(text) => write!(f, "{}", String::from_utf8_lossy(text)), + Part::Host => write!(f, "{{host}}"), + Part::WSLPrefix => write!(f, "{{wslprefix}}"), + Part::Path => write!(f, "{{path}}"), + Part::Line => write!(f, "{{line}}"), + Part::Column => write!(f, "{{column}}"), + } } +} - /// Returns true if this pattern is empty. - pub fn is_empty(&self) -> bool { - self.parts.is_empty() +/// The values to replace the format variables with. +/// +/// This only consists of values that depend on each path or match printed. +/// Values that are invariant throughout the lifetime of the process are set +/// via a [`HyperlinkEnvironment`]. +#[derive(Clone, Debug)] +pub(crate) struct Values<'a> { + path: &'a HyperlinkPath, + line: Option, + column: Option, +} + +impl<'a> Values<'a> { + /// Creates a new set of values, starting with the path given. + /// + /// Callers may also set the line and column number using the mutator + /// methods. + pub(crate) fn new(path: &'a HyperlinkPath) -> Values<'a> { + Values { path, line: None, column: None } } - /// Returns true if the pattern can produce line-dependent hyperlinks. - pub fn is_line_dependent(&self) -> bool { - self.is_line_dependent + /// Sets the line number for these values. + /// + /// If a line number is not set and a hyperlink format contains a `{line}` + /// variable, then it is interpolated with the value of `1` automatically. + pub(crate) fn line(mut self, line: Option) -> Values<'a> { + self.line = line; + self } - /// Renders this pattern with the given values to the given output. - pub(crate) fn render( - &self, - values: &HyperlinkValues, - output: &mut impl Write, - ) -> io::Result<()> { - for part in &self.parts { - part.render(values, output)?; - } - Ok(()) + /// Sets the column number for these values. + /// + /// If a column number is not set and a hyperlink format contains a + /// `{column}` variable, then it is interpolated with the value of `1` + /// automatically. + pub(crate) fn column(mut self, column: Option) -> Values<'a> { + self.column = column; + self } } -impl std::str::FromStr for HyperlinkPattern { - type Err = HyperlinkPatternError; +/// An abstraction for interpolating a hyperlink format with values for every +/// variable. +/// +/// Interpolation of variables occurs through two different sources. The +/// first is via a `HyperlinkEnvironment` for values that are expected to +/// be invariant. This comes from the `HyperlinkConfig` used to build this +/// interpolator. The second source is via `Values`, which is provided to +/// `Interpolator::begin`. The `Values` contains things like the file path, +/// line number and column number. +#[derive(Clone, Debug)] +pub(crate) struct Interpolator { + config: HyperlinkConfig, + buf: RefCell>, +} - fn from_str(s: &str) -> Result { - let mut builder = HyperlinkPatternBuilder::new(); - let mut input = s.as_bytes(); +impl Interpolator { + /// Create a new interpolator for the given hyperlink format configuration. + pub(crate) fn new(config: &HyperlinkConfig) -> Interpolator { + Interpolator { config: config.clone(), buf: RefCell::new(vec![]) } + } - if let Ok(index) = HYPERLINK_PATTERN_ALIASES - .binary_search_by_key(&input, |&(name, _)| name.as_bytes()) + /// Start interpolation with the given values by writing a hyperlink + /// to `wtr`. Subsequent writes to `wtr`, until `Interpolator::end` is + /// called, are the label for the hyperlink. + /// + /// This returns an interpolator status which indicates whether the + /// hyperlink was written. It might not be written, for example, if the + /// underlying writer doesn't support hyperlinks or if the hyperlink + /// format is empty. The status should be provided to `Interpolator::end` + /// as an instruction for whether to close the hyperlink or not. + pub(crate) fn begin( + &self, + values: &Values, + mut wtr: W, + ) -> io::Result { + if self.config.format().is_empty() + || !wtr.supports_hyperlinks() + || !wtr.supports_color() { - input = HYPERLINK_PATTERN_ALIASES[index].1.as_bytes(); + return Ok(InterpolatorStatus::inactive()); } - - while !input.is_empty() { - if input[0] == b'{' { - // Placeholder - let end = input - .find_byte(b'}') - .ok_or(HyperlinkPatternError::InvalidSyntax)?; - - match &input[1..end] { - b"file" => builder.append_file(), - b"line" => builder.append_line(), - b"column" => builder.append_column(), - b"host" => builder.append_hostname(), - other => { - return Err(HyperlinkPatternError::InvalidPlaceholder( - String::from_utf8_lossy(other).to_string(), - )) - } - }; - - input = &input[(end + 1)..]; - } else { - // Static text - let end = input.find_byte(b'{').unwrap_or(input.len()); - builder.append_text(&input[..end]); - input = &input[end..]; - } + let mut buf = self.buf.borrow_mut(); + buf.clear(); + for part in self.config.format().parts.iter() { + part.interpolate_to(self.config.environment(), values, &mut buf); } - - builder.build() - } -} - -impl ToString for HyperlinkPattern { - fn to_string(&self) -> String { - self.parts.iter().map(|p| p.to_string()).collect() + let spec = HyperlinkSpec::open(&buf); + wtr.set_hyperlink(&spec)?; + Ok(InterpolatorStatus { active: true }) } -} -impl Part { - fn render( + /// Writes the correct escape sequences to `wtr` to close any extant + /// hyperlink, marking the end of a hyperlink's label. + /// + /// The status given should be returned from a corresponding + /// `Interpolator::begin` call. Since `begin` may not write a hyperlink + /// (e.g., if the underlying writer doesn't support hyperlinks), it follows + /// that `finish` must not close a hyperlink that was never opened. The + /// status indicates whether the hyperlink was opened or not. + pub(crate) fn finish( &self, - values: &HyperlinkValues, - output: &mut impl Write, + status: InterpolatorStatus, + mut wtr: W, ) -> io::Result<()> { - match self { - Part::Text(text) => output.write_all(text), - Part::File => output.write_all(&values.file.0), - Part::Line => write!(output, "{}", values.line), - Part::Column => write!(output, "{}", values.column), + if !status.active { + return Ok(()); } + wtr.set_hyperlink(&HyperlinkSpec::close()) } } -impl ToString for Part { - fn to_string(&self) -> String { - match self { - Part::Text(text) => String::from_utf8_lossy(text).to_string(), - Part::File => "{file}".to_string(), - Part::Line => "{line}".to_string(), - Part::Column => "{column}".to_string(), - } - } +/// A status indicating whether a hyperlink was written or not. +/// +/// This is created by `Interpolator::begin` and used by `Interpolator::finish` +/// to determine whether a hyperlink was actually opened or not. If it wasn't +/// opened, then finishing interpolation is a no-op. +#[derive(Debug)] +pub(crate) struct InterpolatorStatus { + active: bool, } -impl std::fmt::Display for HyperlinkPatternError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - HyperlinkPatternError::InvalidSyntax => { - write!(f, "invalid hyperlink pattern syntax") - } - HyperlinkPatternError::NoFilePlaceholder => { - write!( - f, - "the {{file}} placeholder is required in hyperlink \ - patterns", - ) - } - HyperlinkPatternError::NoLinePlaceholder => { - write!( - f, - "the hyperlink pattern contains a {{column}} placeholder, \ - but no {{line}} placeholder is present", - ) - } - HyperlinkPatternError::InvalidPlaceholder(name) => { - write!( - f, - "invalid hyperlink pattern placeholder: '{}', choose \ - from: file, line, column, host", - name - ) - } - HyperlinkPatternError::InvalidScheme => { - write!( - f, - "the hyperlink pattern must start with a valid URL scheme" - ) - } - } +impl InterpolatorStatus { + /// Create an inactive interpolator status. + pub(crate) fn inactive() -> InterpolatorStatus { + InterpolatorStatus { active: false } } } -impl std::error::Error for HyperlinkPatternError {} - -impl<'a> HyperlinkValues<'a> { - /// Creates a new set of hyperlink values. - pub(crate) fn new( - file: &'a HyperlinkPath, - line: Option, - column: Option, - ) -> Self { - HyperlinkValues { - file, - line: line.unwrap_or(1), - column: column.unwrap_or(1), - } - } -} +/// Represents the `{path}` part of a hyperlink. +/// +/// This is the value to use as-is in the hyperlink, converted from an OS file +/// path. +#[derive(Clone, Debug)] +pub(crate) struct HyperlinkPath(Vec); impl HyperlinkPath { /// Returns a hyperlink path from an OS path. #[cfg(unix)] - pub(crate) fn from_path(path: &Path) -> Option { - // On Unix, this function returns the absolute file path without the - // leading slash, as it makes for more natural hyperlink patterns, for - // instance: - // file://{host}/{file} instead of file://{host}{file} - // vscode://file/{file} instead of vscode://file{file} - // It also allows for patterns to be multi-platform. - - let path = path.canonicalize().ok()?; - let path = path.to_str()?.as_bytes(); - let path = if path.starts_with(b"/") { &path[1..] } else { path }; - Some(Self::encode(path)) + pub(crate) fn from_path(original_path: &Path) -> Option { + use std::os::unix::ffi::OsStrExt; + + // We canonicalize the path in order to get an absolute version of it + // without any `.` or `..` or superflous separators. Unfortunately, + // this does also remove symlinks, and in theory, it would be nice to + // retain them. Perhaps even simpler, we could just join the current + // working directory with the path and be done with it. There was + // some discussion about this on PR#2483, and there generally appears + // to be some uncertainty about the extent to which hyperlinks with + // things like `..` in them actually work. So for now, we do the safest + // thing possible even though I think it can result in worse user + // experience. (Because it means the path you click on and the actual + // path that gets followed are different, even though they ostensibly + // refer to the same file.) + // + // There's also the potential issue that path canonicalization is + // expensive since it can touch the file system. That is probably + // less of an issue since hyperlinks are only created when they're + // supported, i.e., when writing to a tty. + // + // [1]: /~https://github.com/BurntSushi/ripgrep/pull/2483 + let path = match original_path.canonicalize() { + Ok(path) => path, + Err(err) => { + log::debug!( + "hyperlink creation for {:?} failed, error occurred \ + during path canonicalization: {}", + original_path, + err, + ); + return None; + } + }; + let bytes = path.as_os_str().as_bytes(); + // This should not be possible since one imagines that canonicalization + // should always return an absolute path. But it doesn't actually + // appear guaranteed by POSIX, so we check whether it's true or not and + // refuse to create a hyperlink from a relative path if it isn't. + if !bytes.starts_with(b"/") { + log::debug!( + "hyperlink creation for {:?} failed, canonicalization \ + returned {:?}, which does not start with a slash", + original_path, + path, + ); + return None; + } + Some(HyperlinkPath::encode(bytes)) } /// Returns a hyperlink path from an OS path. #[cfg(windows)] - pub fn from_path(path: &Path) -> Option { + pub(crate) fn from_path(original_path: &Path) -> Option { // On Windows, Path::canonicalize returns the result of // GetFinalPathNameByHandleW with VOLUME_NAME_DOS, // which produces paths such as the following: + // // \\?\C:\dir\file.txt (local path) // \\?\UNC\server\dir\file.txt (network share) // @@ -396,55 +712,102 @@ impl HyperlinkPath { // It is followed either by the drive letter, or by UNC\ // (universal naming convention), which denotes a network share. // - // Given that the default URL pattern on Windows is file:///{file} + // Given that the default URL format on Windows is file://{path} // we need to return the following from this function: - // C:/dir/file.txt (local path) - // /server/dir/file.txt (network share) + // + // /C:/dir/file.txt (local path) + // //server/dir/file.txt (network share) // // Which produces the following links: + // // file:///C:/dir/file.txt (local path) // file:////server/dir/file.txt (network share) // - // This substitutes the {file} placeholder with the expected value - // for the most common DOS paths, but on the other hand, - // network paths start with a single slash, which may be unexpected. - // It produces correct URLs though. + // This substitutes the {path} variable with the expected value for + // the most common DOS paths, but on the other hand, network paths + // start with a single slash, which may be unexpected. It seems to work + // though? + // + // Note that the following URL syntax also seems to be valid? // - // Note that the following URL syntax is also valid for network shares: // file://server/dir/file.txt - // It is also more consistent with the Unix case, but in order to - // use it, the pattern would have to be file://{file} and - // the {file} placeholder would have to be replaced with - // /C:/dir/file.txt - // for local files, which is not ideal, and it is certainly unexpected. + // + // But the initial implementation of this routine went for the format + // above. // // Also note that the file://C:/dir/file.txt syntax is not correct, // even though it often works in practice. // - // In the end, this choice was confirmed by VSCode, whose pattern is - // vscode://file/{file}:{line}:{column} and which correctly understands - // the following URL format for network drives: + // In the end, this choice was confirmed by VSCode, whose format is + // + // vscode://file{path}:{line}:{column} + // + // and which correctly understands the following URL format for network + // drives: + // // vscode://file//server/dir/file.txt:1:1 + // // It doesn't parse any other number of slashes in "file//server" as a // network path. - const WIN32_NAMESPACE_PREFIX: &[u8] = br"\\?\"; - const UNC_PREFIX: &[u8] = br"UNC\"; - - let path = path.canonicalize().ok()?; - let mut path = path.to_str()?.as_bytes(); - - if path.starts_with(WIN32_NAMESPACE_PREFIX) { - path = &path[WIN32_NAMESPACE_PREFIX.len()..]; - - if path.starts_with(UNC_PREFIX) { - path = &path[(UNC_PREFIX.len() - 1)..]; + const WIN32_NAMESPACE_PREFIX: &str = r"\\?\"; + const UNC_PREFIX: &str = r"UNC\"; + + // As for Unix, we canonicalize the path to make sure we have an + // absolute path. + let path = match original_path.canonicalize() { + Ok(path) => path, + Err(err) => { + log::debug!( + "hyperlink creation for {:?} failed, error occurred \ + during path canonicalization: {}", + original_path, + err, + ); + return None; + } + }; + // We convert the path to a string for easier manipulation. If it + // wasn't valid UTF-16 (and thus could not be non-lossily transcoded + // to UTF-8), then we just give up. It's not clear we could make + // a meaningful hyperlink from it anyway. And this should be an + // exceptionally rare case. + let mut string = match path.to_str() { + Some(string) => string, + None => { + log::debug!( + "hyperlink creation for {:?} failed, path is not \ + valid UTF-8", + original_path, + ); + return None; } - } else { + }; + // As the comment above says, we expect all canonicalized paths to + // begin with a \\?\. If it doesn't, then something weird is happening + // and we should just give up. + if !string.starts_with(WIN32_NAMESPACE_PREFIX) { + log::debug!( + "hyperlink creation for {:?} failed, canonicalization \ + returned {:?}, which does not start with \\\\?\\", + original_path, + path, + ); return None; } + string = &string[WIN32_NAMESPACE_PREFIX.len()..]; - Some(Self::encode(path)) + // And as above, drop the UNC prefix too, but keep the leading slash. + if string.starts_with(UNC_PREFIX) { + string = &string[(UNC_PREFIX.len() - 1)..]; + } + // Finally, add a leading slash. In the local file case, this turns + // C:\foo\bar into /C:\foo\bar (and then percent encoding turns it into + // /C:/foo/bar). In the network share case, this turns \share\foo\bar + // into /\share/foo/bar (and then percent encoding turns it into + // //share/foo/bar). + let with_slash = format!("/{string}"); + Some(HyperlinkPath::encode(with_slash.as_bytes())) } /// Percent-encodes a path. @@ -461,9 +824,8 @@ impl HyperlinkPath { /// creates invalid file:// URLs on that platform. fn encode(input: &[u8]) -> HyperlinkPath { let mut result = Vec::with_capacity(input.len()); - - for &c in input { - match c { + for &byte in input.iter() { + match byte { b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' @@ -474,7 +836,7 @@ impl HyperlinkPath { | b'_' | b'~' | 128.. => { - result.push(c); + result.push(byte); } #[cfg(windows)] b'\\' => { @@ -483,60 +845,12 @@ impl HyperlinkPath { _ => { const HEX: &[u8] = b"0123456789ABCDEF"; result.push(b'%'); - result.push(HEX[(c >> 4) as usize]); - result.push(HEX[(c & 0xF) as usize]); + result.push(HEX[(byte >> 4) as usize]); + result.push(HEX[(byte & 0xF) as usize]); } } } - - Self(result) - } -} - -impl std::fmt::Display for HyperlinkPath { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}", - std::str::from_utf8(&self.0).unwrap_or("invalid utf-8") - ) - } -} - -/// A simple abstraction over a hyperlink span written to the terminal. This -/// helps tracking whether a hyperlink has been started, and should be ended. -#[derive(Debug, Default)] -pub(crate) struct HyperlinkSpan { - active: bool, -} - -impl HyperlinkSpan { - /// Starts a hyperlink and returns a span which tracks whether it is still - /// in effect. - pub(crate) fn start( - wtr: &mut impl WriteColor, - hyperlink: &HyperlinkSpec, - ) -> io::Result { - if wtr.supports_hyperlinks() && hyperlink.uri().is_some() { - wtr.set_hyperlink(hyperlink)?; - Ok(HyperlinkSpan { active: true }) - } else { - Ok(HyperlinkSpan { active: false }) - } - } - - /// Ends the hyperlink span if it is active. - pub(crate) fn end(&mut self, wtr: &mut impl WriteColor) -> io::Result<()> { - if self.is_active() { - wtr.set_hyperlink(&HyperlinkSpec::close())?; - self.active = false; - } - Ok(()) - } - - /// Returns true if there is currently an active hyperlink. - pub(crate) fn is_active(&self) -> bool { - self.active + HyperlinkPath(result) } } @@ -547,135 +861,141 @@ mod tests { use super::*; #[test] - fn build_pattern() { - let pattern = HyperlinkPatternBuilder::new() - .append_text(b"foo://") - .append_text(b"bar-") - .append_text(b"baz") - .append_file() + fn build_format() { + let format = FormatBuilder::new() + .append_slice(b"foo://") + .append_slice(b"bar-") + .append_slice(b"baz") + .append_var("path") + .unwrap() .build() .unwrap(); - assert_eq!(pattern.to_string(), "foo://bar-baz{file}"); - assert_eq!(pattern.parts[0], Part::Text(b"foo://bar-baz".to_vec())); - assert!(!pattern.is_empty()); + assert_eq!(format.to_string(), "foo://bar-baz{path}"); + assert_eq!(format.parts[0], Part::Text(b"foo://bar-baz".to_vec())); + assert!(!format.is_empty()); } #[test] - fn build_empty_pattern() { - let pattern = HyperlinkPatternBuilder::new().build().unwrap(); + fn build_empty_format() { + let format = FormatBuilder::new().build().unwrap(); - assert!(pattern.is_empty()); - assert_eq!(pattern, HyperlinkPattern::empty()); - assert_eq!(pattern, HyperlinkPattern::default()); + assert!(format.is_empty()); + assert_eq!(format, HyperlinkFormat::empty()); + assert_eq!(format, HyperlinkFormat::default()); } #[test] fn handle_alias() { - assert!(HyperlinkPattern::from_str("file").is_ok()); - assert!(HyperlinkPattern::from_str("none").is_ok()); - assert!(HyperlinkPattern::from_str("none").unwrap().is_empty()); + assert!(HyperlinkFormat::from_str("file").is_ok()); + assert!(HyperlinkFormat::from_str("none").is_ok()); + assert!(HyperlinkFormat::from_str("none").unwrap().is_empty()); } #[test] - fn parse_pattern() { - let pattern = HyperlinkPattern::from_str( - "foo://{host}/bar/{file}:{line}:{column}", + fn parse_format() { + let format = HyperlinkFormat::from_str( + "foo://{host}/bar/{path}:{line}:{column}", ) .unwrap(); assert_eq!( - pattern.to_string(), - "foo://{host}/bar/{file}:{line}:{column}" - .replace("{host}", &HyperlinkPatternBuilder::get_hostname()) + format.to_string(), + "foo://{host}/bar/{path}:{line}:{column}" ); - assert_eq!(pattern.parts.len(), 6); - assert!(pattern.parts.contains(&Part::File)); - assert!(pattern.parts.contains(&Part::Line)); - assert!(pattern.parts.contains(&Part::Column)); + assert_eq!(format.parts.len(), 8); + assert!(format.parts.contains(&Part::Path)); + assert!(format.parts.contains(&Part::Line)); + assert!(format.parts.contains(&Part::Column)); } #[test] fn parse_valid() { - assert!(HyperlinkPattern::from_str("").unwrap().is_empty()); + assert!(HyperlinkFormat::from_str("").unwrap().is_empty()); assert_eq!( - HyperlinkPattern::from_str("foo://{file}").unwrap().to_string(), - "foo://{file}" + HyperlinkFormat::from_str("foo://{path}").unwrap().to_string(), + "foo://{path}" ); assert_eq!( - HyperlinkPattern::from_str("foo://{file}/bar") - .unwrap() - .to_string(), - "foo://{file}/bar" + HyperlinkFormat::from_str("foo://{path}/bar").unwrap().to_string(), + "foo://{path}/bar" ); - HyperlinkPattern::from_str("f://{file}").unwrap(); - HyperlinkPattern::from_str("f:{file}").unwrap(); - HyperlinkPattern::from_str("f-+.:{file}").unwrap(); - HyperlinkPattern::from_str("f42:{file}").unwrap(); + HyperlinkFormat::from_str("f://{path}").unwrap(); + HyperlinkFormat::from_str("f:{path}").unwrap(); + HyperlinkFormat::from_str("f-+.:{path}").unwrap(); + HyperlinkFormat::from_str("f42:{path}").unwrap(); + HyperlinkFormat::from_str("42:{path}").unwrap(); + HyperlinkFormat::from_str("+:{path}").unwrap(); + HyperlinkFormat::from_str("F42:{path}").unwrap(); + HyperlinkFormat::from_str("F42://foo{{bar}}{path}").unwrap(); } #[test] fn parse_invalid() { + use super::HyperlinkFormatErrorKind::*; + + let err = |kind| HyperlinkFormatError { kind }; assert_eq!( - HyperlinkPattern::from_str("foo://bar").unwrap_err(), - HyperlinkPatternError::NoFilePlaceholder + HyperlinkFormat::from_str("foo://bar").unwrap_err(), + err(NoVariables), ); assert_eq!( - HyperlinkPattern::from_str("foo://{bar}").unwrap_err(), - HyperlinkPatternError::InvalidPlaceholder("bar".to_string()) + HyperlinkFormat::from_str("foo://{line}").unwrap_err(), + err(NoPathVariable), ); assert_eq!( - HyperlinkPattern::from_str("foo://{file").unwrap_err(), - HyperlinkPatternError::InvalidSyntax + HyperlinkFormat::from_str("foo://{path").unwrap_err(), + err(UnclosedVariable), ); assert_eq!( - HyperlinkPattern::from_str("foo://{file}:{column}").unwrap_err(), - HyperlinkPatternError::NoLinePlaceholder + HyperlinkFormat::from_str("foo://{path}:{column}").unwrap_err(), + err(NoLineVariable), ); assert_eq!( - HyperlinkPattern::from_str("{file}").unwrap_err(), - HyperlinkPatternError::InvalidScheme + HyperlinkFormat::from_str("{path}").unwrap_err(), + err(InvalidScheme), ); assert_eq!( - HyperlinkPattern::from_str(":{file}").unwrap_err(), - HyperlinkPatternError::InvalidScheme + HyperlinkFormat::from_str(":{path}").unwrap_err(), + err(InvalidScheme), ); assert_eq!( - HyperlinkPattern::from_str("f*:{file}").unwrap_err(), - HyperlinkPatternError::InvalidScheme + HyperlinkFormat::from_str("f*:{path}").unwrap_err(), + err(InvalidScheme), ); - } - - #[test] - fn aliases_are_valid() { - for (name, definition) in HYPERLINK_PATTERN_ALIASES { - assert!( - HyperlinkPattern::from_str(definition).is_ok(), - "invalid hyperlink alias: {}", - name - ); - } - } - - #[test] - fn aliases_are_sorted() { - let mut names = HYPERLINK_PATTERN_ALIASES.iter().map(|(name, _)| name); - - let Some(mut previous_name) = names.next() else { - return; - }; - - for name in names { - assert!( - name > previous_name, - "'{}' should be sorted before '{}' \ - in HYPERLINK_PATTERN_ALIASES", - name, - previous_name - ); - previous_name = name; - } + assert_eq!( + HyperlinkFormat::from_str("foo://{bar}").unwrap_err(), + err(InvalidVariable("bar".to_string())), + ); + assert_eq!( + HyperlinkFormat::from_str("foo://{}}bar}").unwrap_err(), + err(InvalidVariable("".to_string())), + ); + assert_eq!( + HyperlinkFormat::from_str("foo://{b}}ar}").unwrap_err(), + err(InvalidVariable("b".to_string())), + ); + assert_eq!( + HyperlinkFormat::from_str("foo://{bar}}}").unwrap_err(), + err(InvalidVariable("bar".to_string())), + ); + assert_eq!( + HyperlinkFormat::from_str("foo://{{bar}").unwrap_err(), + err(InvalidCloseVariable), + ); + assert_eq!( + HyperlinkFormat::from_str("foo://{{{bar}").unwrap_err(), + err(InvalidVariable("bar".to_string())), + ); + assert_eq!( + HyperlinkFormat::from_str("foo://{b{{ar}").unwrap_err(), + err(InvalidVariable("b{{ar".to_string())), + ); + assert_eq!( + HyperlinkFormat::from_str("foo://{bar{{}").unwrap_err(), + err(InvalidVariable("bar{{".to_string())), + ); } } diff --git a/crates/printer/src/hyperlink_aliases.rs b/crates/printer/src/hyperlink_aliases.rs index 6d429bf80..c98bc0b06 100644 --- a/crates/printer/src/hyperlink_aliases.rs +++ b/crates/printer/src/hyperlink_aliases.rs @@ -1,23 +1,87 @@ /// Aliases to well-known hyperlink schemes. /// /// These need to be sorted by name. -pub(crate) const HYPERLINK_PATTERN_ALIASES: &[(&str, &str)] = &[ - #[cfg(unix)] - ("file", "file://{host}/{file}"), +const HYPERLINK_PATTERN_ALIASES: &[(&str, &str)] = &[ + #[cfg(not(windows))] + ("default", "file://{host}{path}"), #[cfg(windows)] - ("file", "file:///{file}"), + ("default", "file://{path}"), + ("file", "file://{host}{path}"), // /~https://github.com/misaki-web/grepp - ("grep+", "grep+:///{file}:{line}"), - ("kitty", "file://{host}/{file}#{line}"), + ("grep+", "grep+://{path}:{line}"), + ("kitty", "file://{host}{path}#{line}"), // https://macvim.org/docs/gui_mac.txt.html#mvim%3A%2F%2F - ("macvim", "mvim://open?url=file:///{file}&line={line}&column={column}"), + ("macvim", "mvim://open?url=file://{path}&line={line}&column={column}"), ("none", ""), // /~https://github.com/inopinatus/sublime_url - ("subl", "subl://open?url=file:///{file}&line={line}&column={column}"), + ("subl", "subl://open?url=file://{path}&line={line}&column={column}"), // https://macromates.com/blog/2007/the-textmate-url-scheme/ - ("textmate", "txmt://open?url=file:///{file}&line={line}&column={column}"), + ("textmate", "txmt://open?url=file://{path}&line={line}&column={column}"), // https://code.visualstudio.com/docs/editor/command-line#_opening-vs-code-with-urls - ("vscode", "vscode://file/{file}:{line}:{column}"), - ("vscode-insiders", "vscode-insiders://file/{file}:{line}:{column}"), - ("vscodium", "vscodium://file/{file}:{line}:{column}"), + ("vscode", "vscode://file{path}:{line}:{column}"), + ("vscode-insiders", "vscode-insiders://file{path}:{line}:{column}"), + ("vscodium", "vscodium://file{path}:{line}:{column}"), ]; + +/// Look for the hyperlink format defined by the given alias name. +/// +/// If one does not exist, `None` is returned. +pub(crate) fn find(name: &str) -> Option<&str> { + HYPERLINK_PATTERN_ALIASES + .binary_search_by_key(&name, |&(name, _)| name) + .map(|i| HYPERLINK_PATTERN_ALIASES[i].1) + .ok() +} + +/// Return an iterator over all available alias names and their definitions. +pub(crate) fn iter() -> impl Iterator { + HYPERLINK_PATTERN_ALIASES.iter().copied() +} + +#[cfg(test)] +mod tests { + use crate::HyperlinkFormat; + + use super::*; + + #[test] + fn is_sorted() { + let mut prev = HYPERLINK_PATTERN_ALIASES + .get(0) + .expect("aliases should be non-empty") + .0; + for &(name, _) in HYPERLINK_PATTERN_ALIASES.iter().skip(1) { + assert!( + name > prev, + "'{prev}' should come before '{name}' in \ + HYPERLINK_PATTERN_ALIASES", + ); + prev = name; + } + } + + #[test] + fn alias_names_are_reasonable() { + for &(name, _) in HYPERLINK_PATTERN_ALIASES.iter() { + // There's no hard rule here, but if we want to define an alias + // with a name that doesn't pass this assert, then we should + // probably flag it as worthy of consideration. For example, we + // really do not want to define an alias that contains `{` or `}`, + // which might confuse it for a variable. + assert!(name.chars().all(|c| c.is_alphanumeric() + || c == '+' + || c == '-' + || c == '.')); + } + } + + #[test] + fn aliases_are_valid_formats() { + for (name, definition) in HYPERLINK_PATTERN_ALIASES { + assert!( + definition.parse::().is_ok(), + "invalid hyperlink alias '{name}': {definition}", + ); + } + } +} diff --git a/crates/printer/src/lib.rs b/crates/printer/src/lib.rs index b2869d99f..6c4a37352 100644 --- a/crates/printer/src/lib.rs +++ b/crates/printer/src/lib.rs @@ -60,12 +60,13 @@ assert_eq!(output, expected); */ #![deny(missing_docs)] -#![cfg_attr(feature = "pattern", feature(pattern))] +#![cfg_attr(docsrs, feature(doc_auto_cfg))] pub use crate::{ color::{default_color_specs, ColorError, ColorSpecs, UserColorSpec}, hyperlink::{ - HyperlinkPattern, HyperlinkPatternBuilder, HyperlinkPatternError, + HyperlinkConfig, HyperlinkEnvironment, HyperlinkFormat, + HyperlinkFormatError, }, path::{PathPrinter, PathPrinterBuilder}, standard::{Standard, StandardBuilder, StandardSink}, diff --git a/crates/printer/src/path.rs b/crates/printer/src/path.rs index c25956bc3..38a2c9ecb 100644 --- a/crates/printer/src/path.rs +++ b/crates/printer/src/path.rs @@ -4,7 +4,7 @@ use termcolor::WriteColor; use crate::{ color::ColorSpecs, - hyperlink::{HyperlinkPattern, HyperlinkSpan}, + hyperlink::{self, HyperlinkConfig}, util::PrinterPath, }; @@ -12,7 +12,7 @@ use crate::{ #[derive(Clone, Debug)] struct Config { colors: ColorSpecs, - hyperlink_pattern: HyperlinkPattern, + hyperlink: HyperlinkConfig, separator: Option, terminator: u8, } @@ -21,7 +21,7 @@ impl Default for Config { fn default() -> Config { Config { colors: ColorSpecs::default(), - hyperlink_pattern: HyperlinkPattern::default(), + hyperlink: HyperlinkConfig::default(), separator: None, terminator: b'\n', } @@ -43,7 +43,9 @@ impl PathPrinterBuilder { /// Create a new path printer with the current configuration that writes /// paths to the given writer. pub fn build(&self, wtr: W) -> PathPrinter { - PathPrinter { config: self.config.clone(), wtr, buf: vec![] } + let interpolator = + hyperlink::Interpolator::new(&self.config.hyperlink); + PathPrinter { config: self.config.clone(), wtr, interpolator } } /// Set the user color specifications to use for coloring in this printer. @@ -73,7 +75,7 @@ impl PathPrinterBuilder { self } - /// Set the hyperlink pattern to use for hyperlinks output by this printer. + /// Set the configuration to use for hyperlinks output by this printer. /// /// Regardless of the hyperlink format provided here, whether hyperlinks /// are actually used or not is determined by the implementation of @@ -83,12 +85,12 @@ impl PathPrinterBuilder { /// /// This completely overrides any previous hyperlink format. /// - /// The default pattern format results in not emitting any hyperlinks. - pub fn hyperlink_pattern( + /// The default configuration results in not emitting any hyperlinks. + pub fn hyperlink( &mut self, - pattern: HyperlinkPattern, + config: HyperlinkConfig, ) -> &mut PathPrinterBuilder { - self.config.hyperlink_pattern = pattern; + self.config.hyperlink = config; self } @@ -140,40 +142,35 @@ impl PathPrinterBuilder { pub struct PathPrinter { config: Config, wtr: W, - buf: Vec, + interpolator: hyperlink::Interpolator, } impl PathPrinter { /// Write the given path to the underlying writer. pub fn write(&mut self, path: &Path) -> io::Result<()> { - let ppath = PrinterPath::with_separator(path, self.config.separator); + let ppath = PrinterPath::new(path.as_ref()) + .with_separator(self.config.separator); if !self.wtr.supports_color() { self.wtr.write_all(ppath.as_bytes())?; } else { - let mut hyperlink = self.start_hyperlink_span(&ppath)?; + let status = self.start_hyperlink(&ppath)?; self.wtr.set_color(self.config.colors.path())?; self.wtr.write_all(ppath.as_bytes())?; self.wtr.reset()?; - hyperlink.end(&mut self.wtr)?; + self.interpolator.finish(status, &mut self.wtr)?; } self.wtr.write_all(&[self.config.terminator]) } /// Starts a hyperlink span when applicable. - fn start_hyperlink_span( + fn start_hyperlink( &mut self, path: &PrinterPath, - ) -> io::Result { - if self.wtr.supports_hyperlinks() { - if let Some(spec) = path.create_hyperlink_spec( - &self.config.hyperlink_pattern, - None, - None, - &mut self.buf, - ) { - return Ok(HyperlinkSpan::start(&mut self.wtr, &spec)?); - } - } - Ok(HyperlinkSpan::default()) + ) -> io::Result { + let Some(hyperpath) = path.as_hyperlink() else { + return Ok(hyperlink::InterpolatorStatus::inactive()); + }; + let values = hyperlink::Values::new(hyperpath); + self.interpolator.begin(&values, &mut self.wtr) } } diff --git a/crates/printer/src/standard.rs b/crates/printer/src/standard.rs index aa925546c..cd6a4e549 100644 --- a/crates/printer/src/standard.rs +++ b/crates/printer/src/standard.rs @@ -20,7 +20,7 @@ use { use crate::{ color::ColorSpecs, counter::CounterWriter, - hyperlink::{HyperlinkPattern, HyperlinkSpan}, + hyperlink::{self, HyperlinkConfig}, stats::Stats, util::{ find_iter_at_in_context, trim_ascii_prefix, trim_line_terminator, @@ -36,7 +36,7 @@ use crate::{ #[derive(Debug, Clone)] struct Config { colors: ColorSpecs, - hyperlink_pattern: HyperlinkPattern, + hyperlink: HyperlinkConfig, stats: bool, heading: bool, path: bool, @@ -62,7 +62,7 @@ impl Default for Config { fn default() -> Config { Config { colors: ColorSpecs::default(), - hyperlink_pattern: HyperlinkPattern::default(), + hyperlink: HyperlinkConfig::default(), stats: false, heading: false, path: true, @@ -131,7 +131,6 @@ impl StandardBuilder { Standard { config: self.config.clone(), wtr: RefCell::new(CounterWriter::new(wtr)), - buf: RefCell::new(vec![]), matches: vec![], } } @@ -170,7 +169,7 @@ impl StandardBuilder { self } - /// Set the hyperlink pattern to use for hyperlinks output by this printer. + /// Set the configuration to use for hyperlinks output by this printer. /// /// Regardless of the hyperlink format provided here, whether hyperlinks /// are actually used or not is determined by the implementation of @@ -180,12 +179,12 @@ impl StandardBuilder { /// /// This completely overrides any previous hyperlink format. /// - /// The default pattern format results in not emitting any hyperlinks. - pub fn hyperlink_pattern( + /// The default configuration results in not emitting any hyperlinks. + pub fn hyperlink( &mut self, - pattern: HyperlinkPattern, + config: HyperlinkConfig, ) -> &mut StandardBuilder { - self.config.hyperlink_pattern = pattern; + self.config.hyperlink = config; self } @@ -496,7 +495,6 @@ impl StandardBuilder { pub struct Standard { config: Config, wtr: RefCell>, - buf: RefCell>, matches: Vec, } @@ -533,12 +531,15 @@ impl Standard { &'s mut self, matcher: M, ) -> StandardSink<'static, 's, M, W> { + let interpolator = + hyperlink::Interpolator::new(&self.config.hyperlink); let stats = if self.config.stats { Some(Stats::new()) } else { None }; let needs_match_granularity = self.needs_match_granularity(); StandardSink { matcher, standard: self, replacer: Replacer::new(), + interpolator, path: None, start_time: Instant::now(), match_count: 0, @@ -565,16 +566,17 @@ impl Standard { if !self.config.path { return self.sink(matcher); } + let interpolator = + hyperlink::Interpolator::new(&self.config.hyperlink); let stats = if self.config.stats { Some(Stats::new()) } else { None }; - let ppath = PrinterPath::with_separator( - path.as_ref(), - self.config.separator_path, - ); + let ppath = PrinterPath::new(path.as_ref()) + .with_separator(self.config.separator_path); let needs_match_granularity = self.needs_match_granularity(); StandardSink { matcher, standard: self, replacer: Replacer::new(), + interpolator, path: Some(ppath), start_time: Instant::now(), match_count: 0, @@ -659,6 +661,7 @@ pub struct StandardSink<'p, 's, M: Matcher, W> { matcher: M, standard: &'s mut Standard, replacer: Replacer, + interpolator: hyperlink::Interpolator, path: Option>, start_time: Instant, match_count: u64, @@ -1241,22 +1244,10 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { ) -> io::Result<()> { let mut prelude = PreludeWriter::new(self); prelude.start(line_number, column)?; - - if !self.config().heading { - prelude.write_path()?; - } - if let Some(n) = line_number { - prelude.write_line_number(n)?; - } - if let Some(n) = column { - if self.config().column { - prelude.write_column_number(n)?; - } - } - if self.config().byte_offset { - prelude.write_byte_offset(absolute_byte_offset)?; - } - + prelude.write_path()?; + prelude.write_line_number(line_number)?; + prelude.write_column_number(column)?; + prelude.write_byte_offset(absolute_byte_offset)?; prelude.end() } @@ -1507,30 +1498,30 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { } fn write_path_hyperlink(&self, path: &PrinterPath) -> io::Result<()> { - let mut hyperlink = self.start_hyperlink_span(path, None, None)?; + let status = self.start_hyperlink(path, None, None)?; self.write_path(path)?; - hyperlink.end(&mut *self.wtr().borrow_mut()) + self.end_hyperlink(status) } - fn start_hyperlink_span( + fn start_hyperlink( &self, path: &PrinterPath, line_number: Option, column: Option, - ) -> io::Result { - let mut wtr = self.wtr().borrow_mut(); - if wtr.supports_hyperlinks() { - let mut buf = self.buf().borrow_mut(); - if let Some(spec) = path.create_hyperlink_spec( - &self.config().hyperlink_pattern, - line_number, - column, - &mut buf, - ) { - return HyperlinkSpan::start(&mut *wtr, &spec); - } - } - Ok(HyperlinkSpan::default()) + ) -> io::Result { + let Some(hyperpath) = path.as_hyperlink() else { + return Ok(hyperlink::InterpolatorStatus::inactive()); + }; + let values = + hyperlink::Values::new(hyperpath).line(line_number).column(column); + self.sink.interpolator.begin(&values, &mut *self.wtr().borrow_mut()) + } + + fn end_hyperlink( + &self, + status: hyperlink::InterpolatorStatus, + ) -> io::Result<()> { + self.sink.interpolator.finish(status, &mut *self.wtr().borrow_mut()) } fn start_color_match(&self) -> io::Result<()> { @@ -1586,12 +1577,6 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { &self.sink.standard.wtr } - /// Return a temporary buffer, which may be used for anything. - /// It is not necessarily empty when returned. - fn buf(&self) -> &'a RefCell> { - &self.sink.standard.buf - } - /// Return the path associated with this printer, if one exists. fn path(&self) -> Option<&'a PrinterPath<'a>> { self.sink.path.as_ref() @@ -1645,7 +1630,7 @@ struct PreludeWriter<'a, M: Matcher, W> { std: &'a StandardImpl<'a, M, W>, next_separator: PreludeSeparator, field_separator: &'a [u8], - hyperlink: HyperlinkSpan, + interp_status: hyperlink::InterpolatorStatus, } /// A type of separator used in the prelude @@ -1660,45 +1645,45 @@ enum PreludeSeparator { impl<'a, M: Matcher, W: WriteColor> PreludeWriter<'a, M, W> { /// Creates a new prelude printer. + #[inline(always)] fn new(std: &'a StandardImpl<'a, M, W>) -> PreludeWriter<'a, M, W> { - Self { + PreludeWriter { std, next_separator: PreludeSeparator::None, field_separator: std.separator_field(), - hyperlink: HyperlinkSpan::default(), + interp_status: hyperlink::InterpolatorStatus::inactive(), } } /// Starts the prelude with a hyperlink when applicable. /// - /// If a heading was written, and the hyperlink pattern is invariant on + /// If a heading was written, and the hyperlink format is invariant on /// the line number, then this doesn't hyperlink each line prelude, as it /// wouldn't point to the line anyway. The hyperlink on the heading should /// be sufficient and less confusing. + #[inline(always)] fn start( &mut self, line_number: Option, column: Option, ) -> io::Result<()> { - if let Some(path) = self.std.path() { - if self.config().hyperlink_pattern.is_line_dependent() - || !self.config().heading - { - self.hyperlink = self.std.start_hyperlink_span( - path, - line_number, - column, - )?; - } + let Some(path) = self.std.path() else { return Ok(()) }; + if self.config().hyperlink.format().is_line_dependent() + || !self.config().heading + { + self.interp_status = + self.std.start_hyperlink(path, line_number, column)?; } Ok(()) } /// Ends the prelude and writes the remaining output. + #[inline(always)] fn end(&mut self) -> io::Result<()> { - if self.hyperlink.is_active() { - self.hyperlink.end(&mut *self.std.wtr().borrow_mut())?; - } + self.std.end_hyperlink(std::mem::replace( + &mut self.interp_status, + hyperlink::InterpolatorStatus::inactive(), + ))?; self.write_separator() } @@ -1706,22 +1691,30 @@ impl<'a, M: Matcher, W: WriteColor> PreludeWriter<'a, M, W> { /// write that path to the underlying writer followed by the given field /// separator. (If a path terminator is set, then that is used instead of /// the field separator.) + #[inline(always)] fn write_path(&mut self) -> io::Result<()> { - if let Some(path) = self.std.path() { - self.write_separator()?; - self.std.write_path(path)?; - - self.next_separator = if self.config().path_terminator.is_some() { - PreludeSeparator::PathTerminator - } else { - PreludeSeparator::FieldSeparator - }; + // The prelude doesn't handle headings, only what comes before a match + // on the same line. So if we are emitting paths in headings, we should + // not do it here on each line. + if self.config().heading { + return Ok(()); } + let Some(path) = self.std.path() else { return Ok(()) }; + self.write_separator()?; + self.std.write_path(path)?; + + self.next_separator = if self.config().path_terminator.is_some() { + PreludeSeparator::PathTerminator + } else { + PreludeSeparator::FieldSeparator + }; Ok(()) } - /// Writes the line number field. - fn write_line_number(&mut self, line_number: u64) -> io::Result<()> { + /// Writes the line number field if present. + #[inline(always)] + fn write_line_number(&mut self, line: Option) -> io::Result<()> { + let Some(line_number) = line else { return Ok(()) }; self.write_separator()?; let n = line_number.to_string(); self.std.write_spec(self.config().colors.line(), n.as_bytes())?; @@ -1729,8 +1722,13 @@ impl<'a, M: Matcher, W: WriteColor> PreludeWriter<'a, M, W> { Ok(()) } - /// Writes the column number field. - fn write_column_number(&mut self, column_number: u64) -> io::Result<()> { + /// Writes the column number field if present and configured to do so. + #[inline(always)] + fn write_column_number(&mut self, column: Option) -> io::Result<()> { + if !self.config().column { + return Ok(()); + } + let Some(column_number) = column else { return Ok(()) }; self.write_separator()?; let n = column_number.to_string(); self.std.write_spec(self.config().colors.column(), n.as_bytes())?; @@ -1738,8 +1736,12 @@ impl<'a, M: Matcher, W: WriteColor> PreludeWriter<'a, M, W> { Ok(()) } - /// Writes the byte offset field. + /// Writes the byte offset field if configured to do so. + #[inline(always)] fn write_byte_offset(&mut self, offset: u64) -> io::Result<()> { + if !self.config().byte_offset { + return Ok(()); + } self.write_separator()?; let n = offset.to_string(); self.std.write_spec(self.config().colors.column(), n.as_bytes())?; @@ -1751,6 +1753,7 @@ impl<'a, M: Matcher, W: WriteColor> PreludeWriter<'a, M, W> { /// /// This is called before writing the contents of a field, and at /// the end of the prelude. + #[inline(always)] fn write_separator(&mut self) -> io::Result<()> { match self.next_separator { PreludeSeparator::None => {} @@ -1767,6 +1770,7 @@ impl<'a, M: Matcher, W: WriteColor> PreludeWriter<'a, M, W> { Ok(()) } + #[inline(always)] fn config(&self) -> &Config { self.std.config() } diff --git a/crates/printer/src/summary.rs b/crates/printer/src/summary.rs index 4875bb7e0..431b3a923 100644 --- a/crates/printer/src/summary.rs +++ b/crates/printer/src/summary.rs @@ -15,7 +15,7 @@ use { use crate::{ color::ColorSpecs, counter::CounterWriter, - hyperlink::{HyperlinkPattern, HyperlinkSpan}, + hyperlink::{self, HyperlinkConfig}, stats::Stats, util::{find_iter_at_in_context, PrinterPath}, }; @@ -29,7 +29,7 @@ use crate::{ struct Config { kind: SummaryKind, colors: ColorSpecs, - hyperlink_pattern: HyperlinkPattern, + hyperlink: HyperlinkConfig, stats: bool, path: bool, max_matches: Option, @@ -44,7 +44,7 @@ impl Default for Config { Config { kind: SummaryKind::Count, colors: ColorSpecs::default(), - hyperlink_pattern: HyperlinkPattern::default(), + hyperlink: HyperlinkConfig::default(), stats: false, path: true, max_matches: None, @@ -169,7 +169,6 @@ impl SummaryBuilder { Summary { config: self.config.clone(), wtr: RefCell::new(CounterWriter::new(wtr)), - buf: vec![], } } @@ -216,7 +215,7 @@ impl SummaryBuilder { self } - /// Set the hyperlink pattern to use for hyperlinks output by this printer. + /// Set the configuration to use for hyperlinks output by this printer. /// /// Regardless of the hyperlink format provided here, whether hyperlinks /// are actually used or not is determined by the implementation of @@ -226,12 +225,12 @@ impl SummaryBuilder { /// /// This completely overrides any previous hyperlink format. /// - /// The default pattern format results in not emitting any hyperlinks. - pub fn hyperlink_pattern( + /// The default configuration results in not emitting any hyperlinks. + pub fn hyperlink( &mut self, - pattern: HyperlinkPattern, + config: HyperlinkConfig, ) -> &mut SummaryBuilder { - self.config.hyperlink_pattern = pattern; + self.config.hyperlink = config; self } @@ -357,7 +356,6 @@ impl SummaryBuilder { pub struct Summary { config: Config, wtr: RefCell>, - buf: Vec, } impl Summary { @@ -400,6 +398,8 @@ impl Summary { &'s mut self, matcher: M, ) -> SummarySink<'static, 's, M, W> { + let interpolator = + hyperlink::Interpolator::new(&self.config.hyperlink); let stats = if self.config.stats || self.config.kind.requires_stats() { Some(Stats::new()) } else { @@ -408,6 +408,7 @@ impl Summary { SummarySink { matcher, summary: self, + interpolator, path: None, start_time: Instant::now(), match_count: 0, @@ -432,18 +433,19 @@ impl Summary { if !self.config.path && !self.config.kind.requires_path() { return self.sink(matcher); } + let interpolator = + hyperlink::Interpolator::new(&self.config.hyperlink); let stats = if self.config.stats || self.config.kind.requires_stats() { Some(Stats::new()) } else { None }; - let ppath = PrinterPath::with_separator( - path.as_ref(), - self.config.separator_path, - ); + let ppath = PrinterPath::new(path.as_ref()) + .with_separator(self.config.separator_path); SummarySink { matcher, summary: self, + interpolator, path: Some(ppath), start_time: Instant::now(), match_count: 0, @@ -490,6 +492,7 @@ impl Summary { pub struct SummarySink<'p, 's, M: Matcher, W> { matcher: M, summary: &'s mut Summary, + interpolator: hyperlink::Interpolator, path: Option>, start_time: Instant, match_count: u64, @@ -595,36 +598,34 @@ impl<'p, 's, M: Matcher, W: WriteColor> SummarySink<'p, 's, M, W> { /// (color and hyperlink). fn write_path(&mut self) -> io::Result<()> { if self.path.is_some() { - let mut hyperlink = self.start_hyperlink_span()?; - + let status = self.start_hyperlink()?; self.write_spec( self.summary.config.colors.path(), self.path.as_ref().unwrap().as_bytes(), )?; - - if hyperlink.is_active() { - hyperlink.end(&mut *self.summary.wtr.borrow_mut())?; - } + self.end_hyperlink(status)?; } Ok(()) } /// Starts a hyperlink span when applicable. - fn start_hyperlink_span(&mut self) -> io::Result { - if let Some(ref path) = self.path { - let mut wtr = self.summary.wtr.borrow_mut(); - if wtr.supports_hyperlinks() { - if let Some(spec) = path.create_hyperlink_spec( - &self.summary.config.hyperlink_pattern, - None, - None, - &mut self.summary.buf, - ) { - return Ok(HyperlinkSpan::start(&mut *wtr, &spec)?); - } - } - } - Ok(HyperlinkSpan::default()) + fn start_hyperlink( + &mut self, + ) -> io::Result { + let Some(hyperpath) = + self.path.as_ref().and_then(|p| p.as_hyperlink()) + else { + return Ok(hyperlink::InterpolatorStatus::inactive()); + }; + let values = hyperlink::Values::new(hyperpath); + self.interpolator.begin(&values, &mut *self.summary.wtr.borrow_mut()) + } + + fn end_hyperlink( + &self, + status: hyperlink::InterpolatorStatus, + ) -> io::Result<()> { + self.interpolator.finish(status, &mut *self.summary.wtr.borrow_mut()) } /// Write the line terminator configured on the given searcher. diff --git a/crates/printer/src/util.rs b/crates/printer/src/util.rs index a042e7542..b633ec9ae 100644 --- a/crates/printer/src/util.rs +++ b/crates/printer/src/util.rs @@ -1,21 +1,17 @@ -use std::{borrow::Cow, fmt, io, path::Path, time}; +use std::{borrow::Cow, cell::OnceCell, fmt, io, path::Path, time}; use { - bstr::{ByteSlice, ByteVec}, + bstr::ByteVec, grep_matcher::{Captures, LineTerminator, Match, Matcher}, grep_searcher::{ LineIter, Searcher, SinkContext, SinkContextKind, SinkError, SinkMatch, }, - termcolor::HyperlinkSpec, }; #[cfg(feature = "serde")] use serde::{Serialize, Serializer}; -use crate::{ - hyperlink::{HyperlinkPath, HyperlinkPattern, HyperlinkValues}, - MAX_LOOK_AHEAD, -}; +use crate::{hyperlink::HyperlinkPath, MAX_LOOK_AHEAD}; /// A type for handling replacements while amortizing allocation. pub(crate) struct Replacer { @@ -268,11 +264,12 @@ impl<'a> Sunk<'a> { /// something else. This allows us to amortize work if we are printing the /// file path for every match. /// -/// In the common case, no transformation is needed, which lets us avoid the -/// allocation. Typically, only Windows requires a transform, since we can't -/// access the raw bytes of a path directly and first need to lossily convert -/// to UTF-8. Windows is also typically where the path separator replacement -/// is used, e.g., in cygwin environments to use `/` instead of `\`. +/// In the common case, no transformation is needed, which lets us avoid +/// the allocation. Typically, only Windows requires a transform, since +/// it's fraught to access the raw bytes of a path directly and first need +/// to lossily convert to UTF-8. Windows is also typically where the path +/// separator replacement is used, e.g., in cygwin environments to use `/` +/// instead of `\`. /// /// Users of this type are expected to construct it from a normal `Path` /// found in the standard library. It can then be written to any `io::Write` @@ -281,54 +278,55 @@ impl<'a> Sunk<'a> { /// will not roundtrip correctly. #[derive(Clone, Debug)] pub(crate) struct PrinterPath<'a> { + // On Unix, we can re-materialize a `Path` from our `Cow<'a, [u8]>` with + // zero cost, so there's no point in storing it. At time of writing, + // OsStr::as_os_str_bytes (and its corresponding constructor) are not + // stable yet. Those would let us achieve the same end portably. (As long + // as we keep our UTF-8 requirement on Windows.) + #[cfg(not(unix))] path: &'a Path, bytes: Cow<'a, [u8]>, - hyperlink_path: std::cell::OnceCell>, + hyperlink: OnceCell>, } impl<'a> PrinterPath<'a> { /// Create a new path suitable for printing. pub(crate) fn new(path: &'a Path) -> PrinterPath<'a> { PrinterPath { + #[cfg(not(unix))] path, + // N.B. This is zero-cost on Unix and requires at least a UTF-8 + // check on Windows. This doesn't allocate on Windows unless the + // path is invalid UTF-8 (which is exceptionally rare). bytes: Vec::from_path_lossy(path), - hyperlink_path: std::cell::OnceCell::new(), + hyperlink: OnceCell::new(), } } - /// Create a new printer path from the given path which can be efficiently - /// written to a writer without allocation. + /// Set the separator on this path. /// - /// If the given separator is present, then any separators in `path` are - /// replaced with it. + /// When set, `PrinterPath::as_bytes` will return the path provided but + /// with its separator replaced with the one given. pub(crate) fn with_separator( - path: &'a Path, + mut self, sep: Option, ) -> PrinterPath<'a> { - let mut ppath = PrinterPath::new(path); - if let Some(sep) = sep { - ppath.replace_separator(sep); - } - ppath - } - - /// Replace the path separator in this path with the given separator - /// and do it in place. On Windows, both `/` and `\` are treated as - /// path separators that are both replaced by `new_sep`. In all other - /// environments, only `/` is treated as a path separator. - fn replace_separator(&mut self, new_sep: u8) { - let transformed_path: Vec = self - .as_bytes() - .bytes() - .map(|b| { - if b == b'/' || (cfg!(windows) && b == b'\\') { - new_sep - } else { - b + /// Replace the path separator in this path with the given separator + /// and do it in place. On Windows, both `/` and `\` are treated as + /// path separators that are both replaced by `new_sep`. In all other + /// environments, only `/` is treated as a path separator. + fn replace_separator(bytes: &[u8], sep: u8) -> Vec { + let mut bytes = bytes.to_vec(); + for b in bytes.iter_mut() { + if *b == b'/' || (cfg!(windows) && *b == b'\\') { + *b = sep; } - }) - .collect(); - self.bytes = Cow::Owned(transformed_path); + } + bytes + } + let Some(sep) = sep else { return self }; + self.bytes = Cow::Owned(replace_separator(self.as_bytes(), sep)); + self } /// Return the raw bytes for this path. @@ -336,33 +334,31 @@ impl<'a> PrinterPath<'a> { &self.bytes } - /// Creates a hyperlink for this path and the given line and column, using - /// the specified pattern. Uses the given buffer to store the hyperlink. - pub(crate) fn create_hyperlink_spec<'b>( - &self, - pattern: &HyperlinkPattern, - line_number: Option, - column: Option, - buffer: &'b mut Vec, - ) -> Option> { - if pattern.is_empty() { - return None; - } - let file_path = self.hyperlink_path()?; - let values = HyperlinkValues::new(file_path, line_number, column); - buffer.clear(); - pattern.render(&values, buffer).ok()?; - Some(HyperlinkSpec::open(buffer)) - } - - /// Returns the file path to use in hyperlinks, if any. + /// Return this path as a hyperlink. /// - /// This is what the {file} placeholder will be substituted with. - fn hyperlink_path(&self) -> Option<&HyperlinkPath> { - self.hyperlink_path - .get_or_init(|| HyperlinkPath::from_path(self.path)) + /// Note that a hyperlink may not be able to be created from a path. + /// Namely, computing the hyperlink may require touching the file system + /// (e.g., for path canonicalization) and that can fail. This failure is + /// silent but is logged. + pub(crate) fn as_hyperlink(&self) -> Option<&HyperlinkPath> { + self.hyperlink + .get_or_init(|| HyperlinkPath::from_path(self.as_path())) .as_ref() } + + /// Return this path as an actual `Path` type. + fn as_path(&self) -> &Path { + #[cfg(unix)] + fn imp<'p>(p: &'p PrinterPath<'_>) -> &'p Path { + use std::{ffi::OsStr, os::unix::ffi::OsStrExt}; + Path::new(OsStr::from_bytes(p.as_bytes())) + } + #[cfg(not(unix))] + fn imp<'p>(p: &'p PrinterPath<'_>) -> &'p Path { + p.path + } + imp(self) + } } /// A type that provides "nicer" Display and Serialize impls for