Skip to content

Commit

Permalink
Auto merge of #56699 - nnethercote:SymbolIndex, r=<try>
Browse files Browse the repository at this point in the history
Use a `newtype_index!` within `Symbol`.

This shrinks `Option<Symbol>` from 8 bytes to 4 bytes, which shrinks
`Token` from 24 bytes to 16 bytes. This reduces instruction counts by up
to 1% across a range of benchmarks.

r? @oli-obk
  • Loading branch information
bors committed Dec 11, 2018
2 parents 4c0116e + 7f67df4 commit dbbd519
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 22 deletions.
4 changes: 4 additions & 0 deletions src/libsyntax/parse/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,10 @@ pub enum Token {
Eof,
}

// `Token` is used a lot. Make sure it doesn't unintentionally get bigger.
#[cfg(target_arch = "x86_64")]
static_assert!(MEM_SIZE_OF_STATEMENT: mem::size_of::<Token>() == 16);

impl Token {
pub fn interpolated(nt: Nonterminal) -> Token {
Token::Interpolated(Lrc::new((nt, LazyTokenStream::new())))
Expand Down
3 changes: 3 additions & 0 deletions src/libsyntax_pos/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,13 @@
#![feature(nll)]
#![feature(non_exhaustive)]
#![feature(optin_builtin_traits)]
#![feature(rustc_attrs)]
#![feature(specialization)]
#![feature(step_trait)]
#![cfg_attr(not(stage0), feature(stdsimd))]

extern crate arena;
#[macro_use]
extern crate rustc_data_structures;

#[macro_use]
Expand Down
58 changes: 36 additions & 22 deletions src/libsyntax_pos/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
use arena::DroplessArena;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::indexed_vec::Idx;
use serialize::{Decodable, Decoder, Encodable, Encoder};

use std::fmt;
Expand Down Expand Up @@ -143,9 +144,15 @@ impl Decodable for Ident {
}
}

/// A symbol is an interned or gensymed string.
/// A symbol is an interned or gensymed string. The use of newtype_index! means
/// that Option<Symbol> only takes up 4 bytes, because newtype_index! reserves
/// the last 256 values for tagging purposes.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Symbol(u32);
pub struct Symbol(SymbolIndex);

newtype_index! {
pub struct SymbolIndex { .. }
}

// The interner is pointed to by a thread local value which is only set on the main thread
// with parallelization is disabled. So we don't allow `Symbol` to transfer between threads
Expand All @@ -156,6 +163,10 @@ impl !Send for Symbol { }
impl !Sync for Symbol { }

impl Symbol {
const fn new(n: u32) -> Self {
Symbol(SymbolIndex::from_u32_const(n))
}

/// Maps a string to its interned representation.
pub fn intern(string: &str) -> Self {
with_interner(|interner| interner.intern(string))
Expand Down Expand Up @@ -189,15 +200,15 @@ impl Symbol {
}

pub fn as_u32(self) -> u32 {
self.0
self.0.as_u32()
}
}

impl fmt::Debug for Symbol {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
if is_gensymed {
write!(f, "{}({})", self, self.0)
write!(f, "{}({:?})", self, self.0)
} else {
write!(f, "{}", self)
}
Expand Down Expand Up @@ -229,6 +240,9 @@ impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
}

// The `&'static str`s in this type actually point into the arena.
//
// Note that normal symbols are indexed upward from 0, and gensyms are indexed
// downward from SymbolIndex::MAX_AS_U32.
#[derive(Default)]
pub struct Interner {
arena: DroplessArena,
Expand All @@ -243,7 +257,7 @@ impl Interner {
for &string in init {
if string == "" {
// We can't allocate empty strings in the arena, so handle this here.
let name = Symbol(this.strings.len() as u32);
let name = Symbol::new(this.strings.len() as u32);
this.names.insert("", name);
this.strings.push("");
} else {
Expand All @@ -258,7 +272,7 @@ impl Interner {
return name;
}

let name = Symbol(self.strings.len() as u32);
let name = Symbol::new(self.strings.len() as u32);

// `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be
// UTF-8.
Expand All @@ -276,10 +290,10 @@ impl Interner {
}

pub fn interned(&self, symbol: Symbol) -> Symbol {
if (symbol.0 as usize) < self.strings.len() {
if (symbol.0.as_usize()) < self.strings.len() {
symbol
} else {
self.interned(self.gensyms[(!0 - symbol.0) as usize])
self.interned(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize])
}
}

Expand All @@ -290,17 +304,17 @@ impl Interner {

fn gensymed(&mut self, symbol: Symbol) -> Symbol {
self.gensyms.push(symbol);
Symbol(!0 - self.gensyms.len() as u32 + 1)
Symbol::new(SymbolIndex::MAX_AS_U32 - self.gensyms.len() as u32 + 1)
}

fn is_gensymed(&mut self, symbol: Symbol) -> bool {
symbol.0 as usize >= self.strings.len()
symbol.0.as_usize() >= self.strings.len()
}

pub fn get(&self, symbol: Symbol) -> &str {
match self.strings.get(symbol.0 as usize) {
match self.strings.get(symbol.0.as_usize()) {
Some(string) => string,
None => self.get(self.gensyms[(!0 - symbol.0) as usize]),
None => self.get(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize]),
}
}
}
Expand All @@ -324,7 +338,7 @@ macro_rules! declare_keywords {(
$(
#[allow(non_upper_case_globals)]
pub const $konst: Keyword = Keyword {
ident: Ident::with_empty_ctxt(super::Symbol($index))
ident: Ident::with_empty_ctxt(super::Symbol::new($index))
};
)*

Expand Down Expand Up @@ -709,19 +723,19 @@ mod tests {
fn interner_tests() {
let mut i: Interner = Interner::default();
// first one is zero:
assert_eq!(i.intern("dog"), Symbol(0));
assert_eq!(i.intern("dog"), Symbol::new(0));
// re-use gets the same entry:
assert_eq!(i.intern("dog"), Symbol(0));
assert_eq!(i.intern("dog"), Symbol::new(0));
// different string gets a different #:
assert_eq!(i.intern("cat"), Symbol(1));
assert_eq!(i.intern("cat"), Symbol(1));
assert_eq!(i.intern("cat"), Symbol::new(1));
assert_eq!(i.intern("cat"), Symbol::new(1));
// dog is still at zero
assert_eq!(i.intern("dog"), Symbol(0));
assert_eq!(i.gensym("zebra"), Symbol(4294967295));
// gensym of same string gets new number :
assert_eq!(i.gensym("zebra"), Symbol(4294967294));
assert_eq!(i.intern("dog"), Symbol::new(0));
assert_eq!(i.gensym("zebra"), Symbol::new(SymbolIndex::MAX_AS_U32));
// gensym of same string gets new number:
assert_eq!(i.gensym("zebra"), Symbol::new(SymbolIndex::MAX_AS_U32 - 1));
// gensym of *existing* string gets new number:
assert_eq!(i.gensym("dog"), Symbol(4294967293));
assert_eq!(i.gensym("dog"), Symbol::new(SymbolIndex::MAX_AS_U32 - 2));
}

#[test]
Expand Down

0 comments on commit dbbd519

Please sign in to comment.