Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement adaptive hashing using specialization #5

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 133 additions & 0 deletions benches/bench_adaptive.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

#![cfg(test)]
#![feature(test)]

extern crate hashmap2;
extern crate test;

use test::Bencher;

use hashmap2::HashMap;

#[bench]
fn new_drop(b : &mut Bencher) {
b.iter(|| {
let m : HashMap<i32, i32> = HashMap::new();
assert_eq!(m.len(), 0);
test::black_box(&m);
})
}

#[bench]
fn new_insert_drop(b : &mut Bencher) {
b.iter(|| {
let mut m = HashMap::new();
m.insert(0, 0);
assert_eq!(m.len(), 1);
test::black_box(&m);
})
}

#[bench]
fn grow_by_insertion(b: &mut Bencher) {
let mut m = HashMap::new();

for i in 1..1001 {
m.insert(i, i);
}

test::black_box(&m);

let mut k = 1001;

b.iter(|| {
m.insert(k, k);
k += 1;
});
test::black_box(&m);
}

#[bench]
fn find_existing(b: &mut Bencher) {
let mut m = HashMap::new();

for i in 1..1001 {
m.insert(i, i);
}

test::black_box(&m);

b.iter(|| {
for i in 1..1001 {
test::black_box(m.contains_key(&i));
}
});
}

#[bench]
fn find_nonexisting(b: &mut Bencher) {
let mut m = HashMap::new();

for i in 1..1001 {
m.insert(i, i);
}

test::black_box(&m);

b.iter(|| {
for i in 1001..2001 {
test::black_box(m.contains_key(&i));
}
});
}

#[bench]
fn hashmap_as_queue(b: &mut Bencher) {
let mut m = HashMap::new();

for i in 1..1001 {
m.insert(i, i);
}

test::black_box(&m);

let mut k = 1;

b.iter(|| {
m.remove(&k);
m.insert(k + 1000, k + 1000);
k += 1;
});
test::black_box(&m);
}

#[bench]
fn get_remove_insert(b: &mut Bencher) {
let mut m = HashMap::new();

for i in 1..1001 {
m.insert(i, i);
}

test::black_box(&m);

let mut k = 1;

b.iter(|| {
m.get(&(k + 400));
m.get(&(k + 2000));
m.remove(&k);
m.insert(k + 1000, k + 1000);
k += 1;
});
test::black_box(&m);
}
133 changes: 133 additions & 0 deletions benches/bench_with_siphash.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

#![cfg(test)]
#![feature(test)]

extern crate hashmap2;
extern crate test;

use test::Bencher;

use hashmap2::{HashMap, RandomState};

#[bench]
fn new_drop(b : &mut Bencher) {
b.iter(|| {
let m : HashMap<i32, i32, _> = HashMap::with_hasher(RandomState::new());
assert_eq!(m.len(), 0);
test::black_box(&m);
})
}

#[bench]
fn new_insert_drop(b : &mut Bencher) {
b.iter(|| {
let mut m = HashMap::with_hasher(RandomState::new());
m.insert(0, 0);
assert_eq!(m.len(), 1);
test::black_box(&m);
})
}

#[bench]
fn grow_by_insertion(b: &mut Bencher) {
let mut m = HashMap::with_hasher(RandomState::new());

for i in 1..1001 {
m.insert(i, i);
}

test::black_box(&m);

let mut k = 1001;

b.iter(|| {
m.insert(k, k);
k += 1;
});
test::black_box(&m);
}

#[bench]
fn find_existing(b: &mut Bencher) {
let mut m = HashMap::with_hasher(RandomState::new());

for i in 1..1001 {
m.insert(i, i);
}

test::black_box(&m);

b.iter(|| {
for i in 1..1001 {
test::black_box(m.contains_key(&i));
}
});
}

#[bench]
fn find_nonexisting(b: &mut Bencher) {
let mut m = HashMap::with_hasher(RandomState::new());

for i in 1..1001 {
m.insert(i, i);
}

test::black_box(&m);

b.iter(|| {
for i in 1001..2001 {
test::black_box(m.contains_key(&i));
}
});
}

#[bench]
fn hashmap_as_queue(b: &mut Bencher) {
let mut m = HashMap::with_hasher(RandomState::new());

for i in 1..1001 {
m.insert(i, i);
}

test::black_box(&m);

let mut k = 1;

b.iter(|| {
m.remove(&k);
m.insert(k + 1000, k + 1000);
k += 1;
});
test::black_box(&m);
}

#[bench]
fn get_remove_insert(b: &mut Bencher) {
let mut m = HashMap::with_hasher(RandomState::new());

for i in 1..1001 {
m.insert(i, i);
}

test::black_box(&m);

let mut k = 1;

b.iter(|| {
m.get(&(k + 400));
m.get(&(k + 2000));
m.remove(&k);
m.insert(k + 1000, k + 1000);
k += 1;
});
test::black_box(&m);
}
131 changes: 131 additions & 0 deletions src/adaptive_hashing.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use std::hash::{BuildHasher, SipHasher13, Hasher};

use sip_hash_state::SipHashState;

#[derive(Clone)]
pub struct AdaptiveState {
inner: Option<SipHashState>
}

impl AdaptiveState {
#[inline]
pub fn new() -> Self {
AdaptiveState::new_for_safe_hashing()
}

#[inline]
pub fn new_for_fast_hashing() -> Self {
AdaptiveState {
inner: None
}
}
#[inline]
pub fn new_for_safe_hashing() -> Self {
AdaptiveState {
inner: Some(SipHashState::new())
}
}

#[inline]
pub fn switch_to_safe_hashing(&mut self) {
*self = AdaptiveState::new_for_safe_hashing();
}

pub fn uses_safe_hashing(&self) -> bool {
self.inner.is_some()
}
}

// For creating HashMap.
impl Default for AdaptiveState {
#[inline]
fn default() -> Self {
AdaptiveState::new_for_safe_hashing()
}
}

impl BuildHasher for AdaptiveState {
type Hasher = AdaptiveHasher;
#[inline]
fn build_hasher(&self) -> AdaptiveHasher {
AdaptiveHasher {
safe_hasher: self.inner.as_ref().map(|state| state.build_hasher()),
hash: 0,
}
}
}

pub struct AdaptiveHasher {
safe_hasher: Option<SipHasher13>,
hash: u64,
}

/// Load a full u64 word from a byte stream, in LE order. Use
/// `copy_nonoverlapping` to let the compiler generate the most efficient way
/// to load u64 from a possibly unaligned address.
///
/// Unsafe because: unchecked indexing at 0..len
#[inline]
unsafe fn load_u64_le(buf: &[u8], len: usize) -> u64 {
use std::ptr;
debug_assert!(len <= buf.len());
let mut data = 0u64;
ptr::copy_nonoverlapping(buf.as_ptr(), &mut data as *mut _ as *mut u8, len);
data.to_le()
}

// Primes used in XXH64's finalizer.
const PRIME_2: u64 = 14029467366897019727;
const PRIME_3: u64 = 1609587929392839161;

// Xxhash's finalizer.
fn mix(data: u64) -> u64 {
let mut hash = data;
hash ^= hash >> 33;
hash = hash.wrapping_mul(PRIME_2);
hash ^= hash >> 29;
hash = hash.wrapping_mul(PRIME_3);
hash ^= hash >> 32;
hash
}

impl Hasher for AdaptiveHasher {
#[inline]
fn write(&mut self, msg: &[u8]) {
if let Some(ref mut hasher) = self.safe_hasher {
// Use safe hashing.
hasher.write(msg);
} else {
// Use fast hashing.
let msg_data = unsafe {
if msg.len() <= 8 {
load_u64_le(msg, msg.len())
} else {
panic!()
}
};
self.hash = mix(msg_data);
}
}

#[inline]
fn finish(&self) -> u64 {
if let Some(ref hasher) = self.safe_hasher {
// Use safe hashing.
hasher.finish()
} else {
// Use fast hashing.
self.hash
}
}
}
Loading