Skip to content

Commit

Permalink
Auto merge of #125829 - petrochenkov:upctxt2, r=<try>
Browse files Browse the repository at this point in the history
rustc_span: Add conveniences for working with span formats

Also
- Centralize span ctxt updates in `Span::update_ctxt`
- Stop requiring inline and interned ctxts in partially interned format to be synchronized.

This is the refactoring part of #125017.
  • Loading branch information
bors committed May 31, 2024
2 parents ada5e2c + 0d28df7 commit 0218d5e
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 97 deletions.
10 changes: 5 additions & 5 deletions compiler/rustc_expand/src/mbe/transcribe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ impl MutVisitor for Marker {
// it's some advanced case with macro-generated macros. So if we cache the marked version
// of that context once, we'll typically have a 100% cache hit rate after that.
let Marker(expn_id, transparency, ref mut cache) = *self;
let data = span.data();
let marked_ctxt = *cache
.entry(data.ctxt)
.or_insert_with(|| data.ctxt.apply_mark(expn_id.to_expn_id(), transparency));
*span = data.with_ctxt(marked_ctxt);
span.update_ctxt(|ctxt| {
*cache
.entry(ctxt)
.or_insert_with(|| ctxt.apply_mark(expn_id.to_expn_id(), transparency))
});
}
}

Expand Down
26 changes: 14 additions & 12 deletions compiler/rustc_span/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -520,8 +520,9 @@ impl SpanData {
pub fn with_hi(&self, hi: BytePos) -> Span {
Span::new(self.lo, hi, self.ctxt, self.parent)
}
/// Avoid if possible, `Span::update_ctxt` should be preferred.
#[inline]
pub fn with_ctxt(&self, ctxt: SyntaxContext) -> Span {
fn with_ctxt(&self, ctxt: SyntaxContext) -> Span {
Span::new(self.lo, self.hi, ctxt, self.parent)
}
#[inline]
Expand Down Expand Up @@ -576,8 +577,9 @@ impl Span {
self.data().with_hi(hi)
}
#[inline]
pub fn with_ctxt(self, ctxt: SyntaxContext) -> Span {
self.data_untracked().with_ctxt(ctxt)
pub fn with_ctxt(mut self, ctxt: SyntaxContext) -> Span {
self.update_ctxt(|_| ctxt);
self
}
#[inline]
pub fn parent(self) -> Option<LocalDefId> {
Expand Down Expand Up @@ -1051,9 +1053,9 @@ impl Span {
}

#[inline]
pub fn apply_mark(self, expn_id: ExpnId, transparency: Transparency) -> Span {
let span = self.data();
span.with_ctxt(span.ctxt.apply_mark(expn_id, transparency))
pub fn apply_mark(mut self, expn_id: ExpnId, transparency: Transparency) -> Span {
self.update_ctxt(|ctxt| ctxt.apply_mark(expn_id, transparency));
self
}

#[inline]
Expand Down Expand Up @@ -1101,15 +1103,15 @@ impl Span {
}

#[inline]
pub fn normalize_to_macros_2_0(self) -> Span {
let span = self.data();
span.with_ctxt(span.ctxt.normalize_to_macros_2_0())
pub fn normalize_to_macros_2_0(mut self) -> Span {
self.update_ctxt(|ctxt| ctxt.normalize_to_macros_2_0());
self
}

#[inline]
pub fn normalize_to_macro_rules(self) -> Span {
let span = self.data();
span.with_ctxt(span.ctxt.normalize_to_macro_rules())
pub fn normalize_to_macro_rules(mut self) -> Span {
self.update_ctxt(|ctxt| ctxt.normalize_to_macro_rules());
self
}
}

Expand Down
228 changes: 148 additions & 80 deletions compiler/rustc_span/src/span_encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ use crate::SPAN_TRACK;
use crate::{BytePos, SpanData};

use rustc_data_structures::fx::FxIndexSet;

// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance.
// See /~https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727
use rustc_serialize::int_overflow::DebugStrictAdd;
use std::mem::transmute;

/// A compressed span.
///
Expand Down Expand Up @@ -87,6 +87,99 @@ pub struct Span {
ctxt_or_parent_or_marker: u16,
}

// Convenience structures for all span formats.
#[derive(Clone, Copy)]
struct InlineCtxt {
lo: u32,
len: u16,
ctxt: u16,
}
#[derive(Clone, Copy)]
struct InlineParent {
lo: u32,
len_with_tag: u16,
parent: u16,
}
#[derive(Clone, Copy)]
struct PartiallyInterned {
index: u32,
_marker1: u16,
ctxt: u16,
}
#[derive(Clone, Copy)]
struct Interned {
index: u32,
_marker1: u16,
_marker2: u16,
}
enum Fmt<'a> {
InlineCtxt(&'a mut InlineCtxt),
InlineParent(&'a mut InlineParent),
PartiallyInterned(&'a mut PartiallyInterned),
Interned(&'a mut Interned),
}

impl InlineCtxt {
fn data(self) -> SpanData {
let len = self.len as u32;
debug_assert!(len <= MAX_LEN);
SpanData {
lo: BytePos(self.lo),
hi: BytePos(self.lo.debug_strict_add(len)),
ctxt: SyntaxContext::from_u32(self.ctxt as u32),
parent: None,
}
}
fn to_span(self) -> Span {
unsafe { transmute(self) }
}
}
impl InlineParent {
fn data(self) -> SpanData {
let len = (self.len_with_tag & !PARENT_TAG) as u32;
debug_assert!(len <= MAX_LEN);
let parent = LocalDefId { local_def_index: DefIndex::from_u32(self.parent as u32) };
SpanData {
lo: BytePos(self.lo),
hi: BytePos(self.lo.debug_strict_add(len)),
ctxt: SyntaxContext::root(),
parent: Some(parent),
}
}
fn to_span(self) -> Span {
unsafe { transmute(self) }
}
}
impl PartiallyInterned {
fn data(self) -> SpanData {
SpanData {
ctxt: SyntaxContext::from_u32(self.ctxt as u32),
..with_span_interner(|interner| interner.spans[self.index as usize])
}
}
fn to_span(self) -> Span {
unsafe { transmute(self) }
}
}
impl Interned {
fn data(self) -> SpanData {
with_span_interner(|interner| interner.spans[self.index as usize])
}
fn to_span(self) -> Span {
unsafe { transmute(self) }
}
}
impl Fmt<'_> {
fn data(self) -> SpanData {
match self {
Fmt::InlineCtxt(span) => span.data(),
Fmt::InlineParent(span) => span.data(),
Fmt::PartiallyInterned(span) => span.data(),
Fmt::Interned(span) => span.data(),
}
}
}

// `MAX_LEN` is chosen so that `PARENT_TAG | MAX_LEN` is distinct from
// `BASE_LEN_INTERNED_MARKER`. (If `MAX_LEN` was 1 higher, this wouldn't be true.)
const MAX_LEN: u32 = 0b0111_1111_1111_1110;
Expand All @@ -111,42 +204,48 @@ impl Span {
std::mem::swap(&mut lo, &mut hi);
}

let (lo2, len, ctxt2) = (lo.0, hi.0 - lo.0, ctxt.as_u32());

// Small len may enable one of fully inline formats (or may not).
let (len, ctxt32) = (hi.0 - lo.0, ctxt.as_u32());
if len <= MAX_LEN {
if ctxt2 <= MAX_CTXT && parent.is_none() {
// Inline-context format.
return Span {
lo_or_index: lo2,
len_with_tag_or_marker: len as u16,
ctxt_or_parent_or_marker: ctxt2 as u16,
};
} else if ctxt2 == SyntaxContext::root().as_u32()
if ctxt32 <= MAX_CTXT && parent.is_none() {
return InlineCtxt { lo: lo.0, len: len as u16, ctxt: ctxt32 as u16 }.to_span();
} else if ctxt32 == 0
&& let Some(parent) = parent
&& let parent2 = parent.local_def_index.as_u32()
&& parent2 <= MAX_CTXT
&& let parent32 = parent.local_def_index.as_u32()
&& parent32 <= MAX_CTXT
{
// Inline-parent format.
return Span {
lo_or_index: lo2,
len_with_tag_or_marker: PARENT_TAG | len as u16,
ctxt_or_parent_or_marker: parent2 as u16,
};
let len_with_tag = PARENT_TAG | len as u16;
return InlineParent { lo: lo.0, len_with_tag, parent: parent32 as u16 }.to_span();
}
}

// Partially-interned or fully-interned format.
let index =
with_span_interner(|interner| interner.intern(&SpanData { lo, hi, ctxt, parent }));
let ctxt_or_parent_or_marker = if ctxt2 <= MAX_CTXT {
ctxt2 as u16 // partially-interned
} else {
CTXT_INTERNED_MARKER // fully-interned
// Otherwise small ctxt may enable the partially inline format.
let index = |ctxt| {
with_span_interner(|interner| interner.intern(&SpanData { lo, hi, ctxt, parent }))
};
Span {
lo_or_index: index,
len_with_tag_or_marker: BASE_LEN_INTERNED_MARKER,
ctxt_or_parent_or_marker,
if ctxt32 <= MAX_CTXT {
let index = index(SyntaxContext::from_u32(u32::MAX)); // any value, should never be read
PartiallyInterned { index, _marker1: BASE_LEN_INTERNED_MARKER, ctxt: ctxt32 as u16 }
.to_span()
} else {
let index = index(ctxt);
Interned { index, _marker1: BASE_LEN_INTERNED_MARKER, _marker2: CTXT_INTERNED_MARKER }
.to_span()
}
}

#[inline]
fn fmt(&mut self) -> Fmt<'_> {
if self.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER {
if self.len_with_tag_or_marker & PARENT_TAG == 0 {
Fmt::InlineCtxt(unsafe { transmute(self) })
} else {
Fmt::InlineParent(unsafe { transmute(self) })
}
} else if self.ctxt_or_parent_or_marker != CTXT_INTERNED_MARKER {
Fmt::PartiallyInterned(unsafe { transmute(self) })
} else {
Fmt::Interned(unsafe { transmute(self) })
}
}

Expand All @@ -162,39 +261,8 @@ impl Span {
/// Internal function to translate between an encoded span and the expanded representation.
/// This function must not be used outside the incremental engine.
#[inline]
pub fn data_untracked(self) -> SpanData {
if self.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER {
if self.len_with_tag_or_marker & PARENT_TAG == 0 {
// Inline-context format.
let len = self.len_with_tag_or_marker as u32;
debug_assert!(len <= MAX_LEN);
SpanData {
lo: BytePos(self.lo_or_index),
hi: BytePos(self.lo_or_index.debug_strict_add(len)),
ctxt: SyntaxContext::from_u32(self.ctxt_or_parent_or_marker as u32),
parent: None,
}
} else {
// Inline-parent format.
let len = (self.len_with_tag_or_marker & !PARENT_TAG) as u32;
debug_assert!(len <= MAX_LEN);
let parent = LocalDefId {
local_def_index: DefIndex::from_u32(self.ctxt_or_parent_or_marker as u32),
};
SpanData {
lo: BytePos(self.lo_or_index),
hi: BytePos(self.lo_or_index.debug_strict_add(len)),
ctxt: SyntaxContext::root(),
parent: Some(parent),
}
}
} else {
// Fully-interned or partially-interned format. In either case,
// the interned value contains all the data, so we don't need to
// distinguish them.
let index = self.lo_or_index;
with_span_interner(|interner| interner.spans[index as usize])
}
pub fn data_untracked(mut self) -> SpanData {
self.fmt().data()
}

/// Returns `true` if this is a dummy span with any hygienic context.
Expand All @@ -214,26 +282,26 @@ impl Span {
}
}

// For optimization we are interested in cases in which the context is inline and the context
// update doesn't change format. All non-inline or format changing scenarios require accessing
// interner and can fall back to `Span::new`.
pub fn update_ctxt(&mut self, update: impl FnOnce(SyntaxContext) -> SyntaxContext) {
// FIXME(#125017): Update ctxt inline without touching interner when possible.
let data = self.data();
*self = data.with_ctxt(update(data.ctxt));
}

// Returns either syntactic context, if it can be retrieved without taking the interner lock,
// or an index into the interner if it cannot.
fn inline_ctxt(self) -> Result<SyntaxContext, usize> {
Ok(if self.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER {
if self.len_with_tag_or_marker & PARENT_TAG == 0 {
// Inline-context format.
SyntaxContext::from_u32(self.ctxt_or_parent_or_marker as u32)
} else {
// Inline-parent format. We know that the SyntaxContext is root.
SyntaxContext::root()
fn inline_ctxt(mut self) -> Result<SyntaxContext, usize> {
match self.fmt() {
Fmt::InlineCtxt(InlineCtxt { ctxt, .. })
| Fmt::PartiallyInterned(PartiallyInterned { ctxt, .. }) => {
Ok(SyntaxContext::from_u32(*ctxt as u32))
}
} else if self.ctxt_or_parent_or_marker != CTXT_INTERNED_MARKER {
// Partially-interned format. This path avoids looking up the
// interned value, and is the whole point of the
// partially-interned format.
SyntaxContext::from_u32(self.ctxt_or_parent_or_marker as u32)
} else {
// Fully-interned format.
return Err(self.lo_or_index as usize);
})
Fmt::InlineParent(_) => Ok(SyntaxContext::root()),
Fmt::Interned(span) => Err(span.index as usize),
}
}

/// This function is used as a fast path when decoding the full `SpanData` is not necessary.
Expand Down

0 comments on commit 0218d5e

Please sign in to comment.