Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(common): Make character analysis lazy #9696

Merged
merged 15 commits into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/healthy-donuts-rule.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
swc_common: major
---

perf(common): Make `new_source_file` lazy
115 changes: 115 additions & 0 deletions crates/swc_common/src/cache.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
use std::ops::Deref;

use once_cell::sync::OnceCell;

/// Wrapper for [OnceCell] with support for [rkyv].
#[derive(Clone, Debug)]
pub struct CacheCell<T>(OnceCell<T>);

impl<T> Deref for CacheCell<T> {
type Target = OnceCell<T>;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl<T> CacheCell<T> {
pub fn new() -> Self {
Self(OnceCell::new())
}
}

impl<T> From<T> for CacheCell<T> {
fn from(value: T) -> Self {
Self(OnceCell::from(value))
}
}

impl<T> Default for CacheCell<T> {
fn default() -> Self {
Self::new()
}
}

#[cfg(feature = "rkyv-impl")]
mod rkyv_impl {
use std::{hint::unreachable_unchecked, ptr};

use rkyv::{
option::ArchivedOption, out_field, Archive, Archived, Deserialize, Fallible, Resolver,
Serialize,
};

use super::*;

#[allow(dead_code)]
#[repr(u8)]
enum ArchivedOptionTag {
None,
Some,
}

#[repr(C)]
struct ArchivedOptionVariantNone(ArchivedOptionTag);

#[repr(C)]
struct ArchivedOptionVariantSome<T>(ArchivedOptionTag, T);

impl<T> Archive for CacheCell<T>
where
T: Archive,
{
type Archived = Archived<Option<T>>;
type Resolver = Resolver<Option<T>>;

unsafe fn resolve(&self, pos: usize, resolver: Self::Resolver, out: *mut Self::Archived) {
match resolver {
None => {
let out = out.cast::<ArchivedOptionVariantNone>();
ptr::addr_of_mut!((*out).0).write(ArchivedOptionTag::None);
}
Some(resolver) => {
let out = out.cast::<ArchivedOptionVariantSome<T::Archived>>();
ptr::addr_of_mut!((*out).0).write(ArchivedOptionTag::Some);

let v = self.0.get();
let value = if let Some(value) = v.as_ref() {
value
} else {
unreachable_unchecked();
};

let (fp, fo) = out_field!(out.1);
value.resolve(pos + fp, resolver, fo);
}
}
}
}

impl<T: Serialize<S>, S: Fallible + ?Sized> Serialize<S> for CacheCell<T> {
#[inline]
fn serialize(&self, serializer: &mut S) -> Result<Self::Resolver, S::Error> {
self.0
.get()
.map(|value| value.serialize(serializer))
.transpose()
}
}

impl<T: Archive, D: Fallible + ?Sized> Deserialize<CacheCell<T>, D> for ArchivedOption<T::Archived>
where
T::Archived: Deserialize<T, D>,
{
#[inline]
fn deserialize(&self, deserializer: &mut D) -> Result<CacheCell<T>, D::Error> {
match self {
ArchivedOption::Some(value) => {
let v = value.deserialize(deserializer)?;
Ok(CacheCell::from(v))
}
ArchivedOption::None => Ok(CacheCell::new()),
}
}
}
}
5 changes: 3 additions & 2 deletions crates/swc_common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,13 @@ pub use self::{
source_map::{FileLines, FileLoader, FilePathMapping, SourceMap, SpanSnippetError},
syntax_pos::LineCol,
};
#[doc(hidden)]
pub mod private;

/// A trait for ast nodes.
pub trait AstNode: Debug + PartialEq + Clone + Spanned {
const TYPE: &'static str;
}

pub mod cache;
pub mod collections;
pub mod comments;
mod eq;
Expand All @@ -68,6 +67,8 @@ pub mod iter;
pub mod pass;
pub mod plugin;
mod pos;
#[doc(hidden)]
pub mod private;
mod rustc_data_structures;
pub mod serializer;
pub mod source_map;
Expand Down
26 changes: 15 additions & 11 deletions crates/swc_common/src/source_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,10 +316,11 @@ impl SourceMap {
let line_info = self.lookup_line_with(fm, pos);
match line_info {
Ok(SourceFileAndLine { sf: f, line: a }) => {
let analysis = f.analyze();
let chpos = self.bytepos_to_file_charpos_with(&f, pos);

let line = a + 1; // Line numbers start at 1
let linebpos = f.lines[a];
let linebpos = f.analyze().lines[a];
assert!(
pos >= linebpos,
"{}: bpos = {:?}; linebpos = {:?};",
Expand All @@ -332,16 +333,17 @@ impl SourceMap {
let col = chpos - linechpos;

let col_display = {
let start_width_idx = f
let start_width_idx = analysis
.non_narrow_chars
.binary_search_by_key(&linebpos, |x| x.pos())
.unwrap_or_else(|x| x);
let end_width_idx = f
let end_width_idx = analysis
.non_narrow_chars
.binary_search_by_key(&pos, |x| x.pos())
.unwrap_or_else(|x| x);
let special_chars = end_width_idx - start_width_idx;
let non_narrow: usize = f.non_narrow_chars[start_width_idx..end_width_idx]
let non_narrow: usize = analysis.non_narrow_chars
[start_width_idx..end_width_idx]
.iter()
.map(|x| x.width())
.sum();
Expand All @@ -367,14 +369,15 @@ impl SourceMap {
})
}
Err(f) => {
let analysis = f.analyze();
let chpos = self.bytepos_to_file_charpos(pos)?;

let col_display = {
let end_width_idx = f
let end_width_idx = analysis
.non_narrow_chars
.binary_search_by_key(&pos, |x| x.pos())
.unwrap_or_else(|x| x);
let non_narrow: usize = f.non_narrow_chars[0..end_width_idx]
let non_narrow: usize = analysis.non_narrow_chars[0..end_width_idx]
.iter()
.map(|x| x.width())
.sum();
Expand Down Expand Up @@ -1028,11 +1031,11 @@ impl SourceMap {
) -> u32 {
let mut total_extra_bytes = state.total_extra_bytes;
let mut index = state.mbc_index;

let analysis = file.analyze();
if bpos >= state.pos {
let range = index..file.multibyte_chars.len();
let range = index..analysis.multibyte_chars.len();
for i in range {
let mbc = &file.multibyte_chars[i];
let mbc = &analysis.multibyte_chars[i];
debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos);
if mbc.pos >= bpos {
break;
Expand All @@ -1052,7 +1055,7 @@ impl SourceMap {
} else {
let range = 0..index;
for i in range.rev() {
let mbc = &file.multibyte_chars[i];
let mbc = &analysis.multibyte_chars[i];
debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos);
if mbc.pos < bpos {
break;
Expand Down Expand Up @@ -1322,7 +1325,8 @@ impl SourceMap {
None => continue,
};

let linebpos = f.lines[line as usize];
let analysis = f.analyze();
let linebpos = analysis.lines[line as usize];
debug_assert!(
pos >= linebpos,
"{}: bpos = {:?}; linebpos = {:?};",
Expand Down
63 changes: 44 additions & 19 deletions crates/swc_common/src/syntax_pos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use url::Url;

use self::hygiene::MarkData;
pub use self::hygiene::{Mark, SyntaxContext};
use crate::{rustc_data_structures::stable_hasher::StableHasher, sync::Lrc};
use crate::{cache::CacheCell, rustc_data_structures::stable_hasher::StableHasher, sync::Lrc};

mod analyze_source_file;
pub mod hygiene;
Expand Down Expand Up @@ -827,14 +827,26 @@ pub struct SourceFile {
pub start_pos: BytePos,
/// The end position of this source in the `SourceMap`
pub end_pos: BytePos,
/// A hash of the filename, used for speeding up the incr. comp. hashing.
pub name_hash: u128,

lazy: CacheCell<SourceFileAnalysis>,
}

#[cfg_attr(
any(feature = "rkyv-impl"),
derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)
)]
#[cfg_attr(feature = "rkyv-impl", archive(check_bytes))]
#[cfg_attr(feature = "rkyv-impl", archive_attr(repr(C)))]
#[derive(Clone)]
pub struct SourceFileAnalysis {
/// Locations of lines beginnings in the source code
pub lines: Vec<BytePos>,
/// Locations of multi-byte characters in the source code
pub multibyte_chars: Vec<MultiByteChar>,
/// Width of characters that are not narrow in the source code
pub non_narrow_chars: Vec<NonNarrowChar>,
/// A hash of the filename, used for speeding up the incr. comp. hashing.
pub name_hash: u128,
}

impl fmt::Debug for SourceFile {
Expand Down Expand Up @@ -888,9 +900,6 @@ impl SourceFile {
};
let end_pos = start_pos.to_usize() + src.len();

let (lines, multibyte_chars, non_narrow_chars) =
analyze_source_file::analyze_source_file(&src[..], start_pos);

SourceFile {
name,
name_was_remapped,
Expand All @@ -900,17 +909,16 @@ impl SourceFile {
src_hash,
start_pos,
end_pos: SmallPos::from_usize(end_pos),
lines,
multibyte_chars,
non_narrow_chars,
name_hash,
lazy: CacheCell::new(),
}
}

/// Return the BytePos of the beginning of the current line.
pub fn line_begin_pos(&self, pos: BytePos) -> BytePos {
let line_index = self.lookup_line(pos).unwrap();
self.lines[line_index]
let analysis = self.analyze();
analysis.lines[line_index]
}

/// Get a line from the list of pre-computed line-beginnings.
Expand All @@ -928,7 +936,8 @@ impl SourceFile {
}

let begin = {
let line = self.lines.get(line_number)?;
let analysis = self.analyze();
let line = analysis.lines.get(line_number)?;
let begin: BytePos = *line - self.start_pos;
begin.to_usize()
};
Expand All @@ -945,20 +954,22 @@ impl SourceFile {
}

pub fn count_lines(&self) -> usize {
self.lines.len()
let analysis = self.analyze();
analysis.lines.len()
}

/// Find the line containing the given position. The return value is the
/// index into the `lines` array of this SourceFile, not the 1-based line
/// number. If the `source_file` is empty or the position is located before
/// the first line, `None` is returned.
pub fn lookup_line(&self, pos: BytePos) -> Option<usize> {
if self.lines.is_empty() {
let analysis = self.analyze();
if analysis.lines.is_empty() {
return None;
}

let line_index = lookup_line(&self.lines[..], pos);
assert!(line_index < self.lines.len() as isize);
let line_index = lookup_line(&analysis.lines, pos);
assert!(line_index < analysis.lines.len() as isize);
if line_index >= 0 {
Some(line_index as usize)
} else {
Expand All @@ -971,18 +982,32 @@ impl SourceFile {
return (self.start_pos, self.end_pos);
}

assert!(line_index < self.lines.len());
if line_index == (self.lines.len() - 1) {
(self.lines[line_index], self.end_pos)
let analysis = self.analyze();

assert!(line_index < analysis.lines.len());
if line_index == (analysis.lines.len() - 1) {
(analysis.lines[line_index], self.end_pos)
} else {
(self.lines[line_index], self.lines[line_index + 1])
(analysis.lines[line_index], analysis.lines[line_index + 1])
}
}

#[inline]
pub fn contains(&self, byte_pos: BytePos) -> bool {
byte_pos >= self.start_pos && byte_pos <= self.end_pos
}

pub fn analyze(&self) -> &SourceFileAnalysis {
self.lazy.get_or_init(|| {
let (lines, multibyte_chars, non_narrow_chars) =
analyze_source_file::analyze_source_file(&self.src[..], self.start_pos);
SourceFileAnalysis {
lines,
multibyte_chars,
non_narrow_chars,
}
})
}
}

/// Remove utf-8 BOM if any.
Expand Down
2 changes: 1 addition & 1 deletion crates/swc_error_reporters/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ impl SourceCode for MietteSourceCode<'_> {
}

let loc = self.0.lookup_char_pos(span.lo());
let line_count = loc.file.lines.len();
let line_count = loc.file.analyze().lines.len();

let name = if self.1.skip_filename {
None
Expand Down
2 changes: 1 addition & 1 deletion crates/swc_estree_compat/src/swcify/ctx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ pub struct Context {

impl Context {
fn locate_line_col(&self, loc: LineCol) -> BytePos {
if let Some(&line_start) = self.fm.lines.get(loc.line) {
if let Some(&line_start) = self.fm.analyze().lines.get(loc.line) {
line_start + BytePos(loc.column as _)
} else {
BytePos(0)
Expand Down
Loading