Skip to content

Commit

Permalink
Auto merge of #110083 - saethlin:encode-hashes-as-bytes, r=cjgillot
Browse files Browse the repository at this point in the history
Encode hashes as bytes, not varint

In a few places, we store hashes as `u64` or `u128` and then apply `derive(Decodable, Encodable)` to the enclosing struct/enum. It is more efficient to encode hashes directly than try to apply some varint encoding. This PR adds two new types `Hash64` and `Hash128` which are produced by `StableHasher` and replace every use of storing a `u64` or `u128` that represents a hash.

Distribution of the byte lengths of leb128 encodings, from `x build --stage 2` with `incremental = true`

Before:
```
(  1) 373418203 (53.7%, 53.7%): 1
(  2) 196240113 (28.2%, 81.9%): 3
(  3) 108157958 (15.6%, 97.5%): 2
(  4)  17213120 ( 2.5%, 99.9%): 4
(  5)    223614 ( 0.0%,100.0%): 9
(  6)    216262 ( 0.0%,100.0%): 10
(  7)     15447 ( 0.0%,100.0%): 5
(  8)      3633 ( 0.0%,100.0%): 19
(  9)      3030 ( 0.0%,100.0%): 8
( 10)      1167 ( 0.0%,100.0%): 18
( 11)      1032 ( 0.0%,100.0%): 7
( 12)      1003 ( 0.0%,100.0%): 6
( 13)        10 ( 0.0%,100.0%): 16
( 14)        10 ( 0.0%,100.0%): 17
( 15)         5 ( 0.0%,100.0%): 12
( 16)         4 ( 0.0%,100.0%): 14
```

After:
```
(  1) 372939136 (53.7%, 53.7%): 1
(  2) 196240140 (28.3%, 82.0%): 3
(  3) 108014969 (15.6%, 97.5%): 2
(  4)  17192375 ( 2.5%,100.0%): 4
(  5)       435 ( 0.0%,100.0%): 5
(  6)        83 ( 0.0%,100.0%): 18
(  7)        79 ( 0.0%,100.0%): 10
(  8)        50 ( 0.0%,100.0%): 9
(  9)         6 ( 0.0%,100.0%): 19
```

The remaining 9 or 10 and 18 or 19 are `u64` and `u128` respectively that have the high bits set. As far as I can tell these are coming primarily from `SwitchTargets`.
  • Loading branch information
bors committed Apr 18, 2023
2 parents c609da5 + 073d99b commit b3f1379
Show file tree
Hide file tree
Showing 38 changed files with 288 additions and 137 deletions.
3 changes: 2 additions & 1 deletion compiler/rustc_abi/src/layout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ pub trait LayoutCalculator {
{
// `ReprOptions.layout_seed` is a deterministic seed that we can use to
// randomize field ordering with
let mut rng = Xoshiro128StarStar::seed_from_u64(repr.field_shuffle_seed);
let mut rng =
Xoshiro128StarStar::seed_from_u64(repr.field_shuffle_seed.as_u64());

// Shuffle the ordering of the fields
optimizing.shuffle(&mut rng);
Expand Down
7 changes: 4 additions & 3 deletions compiler/rustc_abi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use std::str::FromStr;

use bitflags::bitflags;
use rustc_data_structures::intern::Interned;
use rustc_data_structures::stable_hasher::Hash64;
#[cfg(feature = "nightly")]
use rustc_data_structures::stable_hasher::StableOrd;
use rustc_index::vec::{IndexSlice, IndexVec};
Expand Down Expand Up @@ -77,12 +78,12 @@ pub struct ReprOptions {
pub flags: ReprFlags,
/// The seed to be used for randomizing a type's layout
///
/// Note: This could technically be a `[u8; 16]` (a `u128`) which would
/// Note: This could technically be a `Hash128` which would
/// be the "most accurate" hash as it'd encompass the item and crate
/// hash without loss, but it does pay the price of being larger.
/// Everything's a tradeoff, a `u64` seed should be sufficient for our
/// Everything's a tradeoff, a 64-bit seed should be sufficient for our
/// purposes (primarily `-Z randomize-layout`)
pub field_shuffle_seed: u64,
pub field_shuffle_seed: Hash64,
}

impl ReprOptions {
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_codegen_llvm/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use crate::value::Value;
use rustc_ast::Mutability;
use rustc_codegen_ssa::mir::place::PlaceRef;
use rustc_codegen_ssa::traits::*;
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc_data_structures::stable_hasher::{Hash128, HashStable, StableHasher};
use rustc_hir::def_id::DefId;
use rustc_middle::bug;
use rustc_middle::mir::interpret::{ConstAllocation, GlobalAlloc, Scalar};
Expand Down Expand Up @@ -261,7 +261,7 @@ impl<'ll, 'tcx> ConstMethods<'tcx> for CodegenCx<'ll, 'tcx> {
let hash = self.tcx.with_stable_hashing_context(|mut hcx| {
let mut hasher = StableHasher::new();
alloc.hash_stable(&mut hcx, &mut hasher);
hasher.finish::<u128>()
hasher.finish::<Hash128>()
});
llvm::set_value_name(value, format!("alloc_{hash:032x}").as_bytes());
}
Expand Down
3 changes: 2 additions & 1 deletion compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use rustc_codegen_ssa::debuginfo::type_names;
use rustc_codegen_ssa::mir::debuginfo::{DebugScope, FunctionDebugContext, VariableKind};
use rustc_codegen_ssa::traits::*;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::stable_hasher::Hash128;
use rustc_data_structures::sync::Lrc;
use rustc_hir::def_id::{DefId, DefIdMap};
use rustc_index::vec::IndexVec;
Expand Down Expand Up @@ -61,7 +62,7 @@ pub struct CodegenUnitDebugContext<'ll, 'tcx> {
llcontext: &'ll llvm::Context,
llmod: &'ll llvm::Module,
builder: &'ll mut DIBuilder<'ll>,
created_files: RefCell<FxHashMap<Option<(u128, SourceFileHash)>, &'ll DIFile>>,
created_files: RefCell<FxHashMap<Option<(Hash128, SourceFileHash)>, &'ll DIFile>>,

type_map: metadata::TypeMap<'ll, 'tcx>,
namespace_map: RefCell<DefIdMap<&'ll DIScope>>,
Expand Down
5 changes: 2 additions & 3 deletions compiler/rustc_codegen_ssa/src/debuginfo/type_names.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// * `"` is treated as the start of a string.

use rustc_data_structures::fx::FxHashSet;
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher};
use rustc_hir::def_id::DefId;
use rustc_hir::definitions::{DefPathData, DefPathDataName, DisambiguatedDefPathData};
use rustc_hir::{AsyncGeneratorKind, GeneratorKind, Mutability};
Expand Down Expand Up @@ -675,8 +675,7 @@ fn push_const_param<'tcx>(tcx: TyCtxt<'tcx>, ct: ty::Const<'tcx>, output: &mut S
hcx.while_hashing_spans(false, |hcx| {
ct.to_valtree().hash_stable(hcx, &mut hasher)
});
let hash: u64 = hasher.finish();
hash
hasher.finish::<Hash64>()
});

if cpp_like_debuginfo(tcx) {
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_const_eval/src/interpret/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ pub(crate) fn eval_nullary_intrinsic<'tcx>(
}
sym::type_id => {
ensure_monomorphic_enough(tcx, tp_ty)?;
ConstValue::from_u64(tcx.type_id_hash(tp_ty))
ConstValue::from_u64(tcx.type_id_hash(tp_ty).as_u64())
}
sym::variant_count => match tp_ty.kind() {
// Correctly handles non-monomorphic calls, so there is no need for ensure_monomorphic_enough.
Expand Down
43 changes: 30 additions & 13 deletions compiler/rustc_data_structures/src/fingerprint.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::stable_hasher;
use crate::stable_hasher::{Hash64, StableHasher, StableHasherResult};
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
use std::hash::{Hash, Hasher};

Expand All @@ -9,32 +9,49 @@ mod tests;
#[repr(C)]
pub struct Fingerprint(u64, u64);

impl Fingerprint {
pub const ZERO: Fingerprint = Fingerprint(0, 0);
pub trait FingerprintComponent {
fn as_u64(&self) -> u64;
}

impl FingerprintComponent for Hash64 {
#[inline]
pub fn new(_0: u64, _1: u64) -> Fingerprint {
Fingerprint(_0, _1)
fn as_u64(&self) -> u64 {
Hash64::as_u64(*self)
}
}

impl FingerprintComponent for u64 {
#[inline]
fn as_u64(&self) -> u64 {
*self
}
}

impl Fingerprint {
pub const ZERO: Fingerprint = Fingerprint(0, 0);

#[inline]
pub fn from_smaller_hash(hash: u64) -> Fingerprint {
Fingerprint(hash, hash)
pub fn new<A, B>(_0: A, _1: B) -> Fingerprint
where
A: FingerprintComponent,
B: FingerprintComponent,
{
Fingerprint(_0.as_u64(), _1.as_u64())
}

#[inline]
pub fn to_smaller_hash(&self) -> u64 {
pub fn to_smaller_hash(&self) -> Hash64 {
// Even though both halves of the fingerprint are expected to be good
// quality hash values, let's still combine the two values because the
// Fingerprints in DefPathHash have the StableCrateId portion which is
// the same for all DefPathHashes from the same crate. Combining the
// two halves makes sure we get a good quality hash in such cases too.
self.0.wrapping_mul(3).wrapping_add(self.1)
Hash64::new(self.0.wrapping_mul(3).wrapping_add(self.1))
}

#[inline]
pub fn as_value(&self) -> (u64, u64) {
(self.0, self.1)
pub fn split(&self) -> (Hash64, Hash64) {
(Hash64::new(self.0), Hash64::new(self.1))
}

#[inline]
Expand Down Expand Up @@ -131,9 +148,9 @@ impl FingerprintHasher for crate::unhash::Unhasher {
}
}

impl stable_hasher::StableHasherResult for Fingerprint {
impl StableHasherResult for Fingerprint {
#[inline]
fn finish(hasher: stable_hasher::StableHasher) -> Self {
fn finish(hasher: StableHasher) -> Self {
let (_0, _1) = hasher.finalize();
Fingerprint(_0, _1)
}
Expand Down
7 changes: 4 additions & 3 deletions compiler/rustc_data_structures/src/fingerprint/tests.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
use super::*;
use crate::stable_hasher::Hash64;

// Check that `combine_commutative` is order independent.
#[test]
fn combine_commutative_is_order_independent() {
let a = Fingerprint::new(0xf6622fb349898b06, 0x70be9377b2f9c610);
let b = Fingerprint::new(0xa9562bf5a2a5303c, 0x67d9b6c82034f13d);
let c = Fingerprint::new(0x0d013a27811dbbc3, 0x9a3f7b3d9142ec43);
let a = Fingerprint::new(Hash64::new(0xf6622fb349898b06), Hash64::new(0x70be9377b2f9c610));
let b = Fingerprint::new(Hash64::new(0xa9562bf5a2a5303c), Hash64::new(0x67d9b6c82034f13d));
let c = Fingerprint::new(Hash64::new(0x0d013a27811dbbc3), Hash64::new(0x9a3f7b3d9142ec43));
let permutations = [(a, b, c), (a, c, b), (b, a, c), (b, c, a), (c, a, b), (c, b, a)];
let f = a.combine_commutative(b).combine_commutative(c);
for p in &permutations {
Expand Down
132 changes: 132 additions & 0 deletions compiler/rustc_data_structures/src/hashes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
//! rustc encodes a lot of hashes. If hashes are stored as `u64` or `u128`, a `derive(Encodable)`
//! will apply varint encoding to the hashes, which is less efficient than directly encoding the 8
//! or 16 bytes of the hash.
//!
//! The types in this module represent 64-bit or 128-bit hashes produced by a `StableHasher`.
//! `Hash64` and `Hash128` expose some utilty functions to encourage users to not extract the inner
//! hash value as an integer type and accidentally apply varint encoding to it.
//!
//! In contrast with `Fingerprint`, users of these types cannot and should not attempt to construct
//! and decompose these types into constitutent pieces. The point of these types is only to
//! connect the fact that they can only be produced by a `StableHasher` to their
//! `Encode`/`Decode` impls.
use crate::stable_hasher::{StableHasher, StableHasherResult};
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
use std::fmt;
use std::ops::BitXorAssign;

#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
pub struct Hash64 {
inner: u64,
}

impl Hash64 {
pub const ZERO: Hash64 = Hash64 { inner: 0 };

#[inline]
pub(crate) fn new(n: u64) -> Self {
Self { inner: n }
}

#[inline]
pub fn as_u64(self) -> u64 {
self.inner
}
}

impl BitXorAssign<u64> for Hash64 {
#[inline]
fn bitxor_assign(&mut self, rhs: u64) {
self.inner ^= rhs;
}
}

impl<S: Encoder> Encodable<S> for Hash64 {
#[inline]
fn encode(&self, s: &mut S) {
s.emit_raw_bytes(&self.inner.to_le_bytes());
}
}

impl<D: Decoder> Decodable<D> for Hash64 {
#[inline]
fn decode(d: &mut D) -> Self {
Self { inner: u64::from_le_bytes(d.read_raw_bytes(8).try_into().unwrap()) }
}
}

impl StableHasherResult for Hash64 {
#[inline]
fn finish(hasher: StableHasher) -> Self {
Self { inner: hasher.finalize().0 }
}
}

impl fmt::Debug for Hash64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.inner.fmt(f)
}
}

impl fmt::LowerHex for Hash64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::LowerHex::fmt(&self.inner, f)
}
}

#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
pub struct Hash128 {
inner: u128,
}

impl Hash128 {
#[inline]
pub fn truncate(self) -> Hash64 {
Hash64 { inner: self.inner as u64 }
}

#[inline]
pub fn wrapping_add(self, other: Self) -> Self {
Self { inner: self.inner.wrapping_add(other.inner) }
}

#[inline]
pub fn as_u128(self) -> u128 {
self.inner
}
}

impl<S: Encoder> Encodable<S> for Hash128 {
#[inline]
fn encode(&self, s: &mut S) {
s.emit_raw_bytes(&self.inner.to_le_bytes());
}
}

impl<D: Decoder> Decodable<D> for Hash128 {
#[inline]
fn decode(d: &mut D) -> Self {
Self { inner: u128::from_le_bytes(d.read_raw_bytes(16).try_into().unwrap()) }
}
}

impl StableHasherResult for Hash128 {
#[inline]
fn finish(hasher: StableHasher) -> Self {
let (_0, _1) = hasher.finalize();
Self { inner: u128::from(_0) | (u128::from(_1) << 64) }
}
}

impl fmt::Debug for Hash128 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.inner.fmt(f)
}
}

impl fmt::LowerHex for Hash128 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::LowerHex::fmt(&self.inner, f)
}
}
1 change: 1 addition & 0 deletions compiler/rustc_data_structures/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ pub mod work_queue;
pub use atomic_ref::AtomicRef;
pub mod aligned;
pub mod frozen;
mod hashes;
pub mod owned_slice;
pub mod sso;
pub mod steal;
Expand Down
Loading

0 comments on commit b3f1379

Please sign in to comment.