Skip to content

Commit

Permalink
hash.c: use rb_hash_* functions for objid_hash, double and ident.
Browse files Browse the repository at this point in the history
Note, that ident becomes a bit slower, cause hash value is more "random".
New versions calculates fair hash value, and so provides statistically
fair collision rate.
Previous version relies on internal pattern of symbols and "usually" provides
lesser collisions. Though it could be compromised easely.
  • Loading branch information
funny-falcon committed Sep 27, 2016
1 parent 163273f commit 11eaca2
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 52 deletions.
55 changes: 19 additions & 36 deletions hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,7 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE))
}
else if (BUILTIN_TYPE(a) == T_FLOAT) {
flt:
hval = rb_dbl_hash(rb_float_value(a));
hnum = FIX2LONG(hval);
hnum = rb_dbl_hash_long(rb_float_value(a));
}
else {
hnum = other_func(a);
Expand All @@ -199,34 +198,29 @@ rb_any_hash(VALUE a)
return any_hash(a, obj_any_hash);
}

static st_index_t
rb_num_hash_start(st_index_t n)
long
rb_dbl_hash_long(double d)
{
/*
* This hash function is lightly-tuned for Ruby. Further tuning
* should be possible. Notes:
*
* - (n >> 3) alone is great for heap objects and OK for fixnum,
* however symbols perform poorly.
* - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well,
* n.b.: +3 to remove most ID scope, +1 worked well initially, too
* n.b.: +1 (instead of 3) worked well initially, too
* - (n << 16) was finally added to avoid losing bits for fixnums
* - avoid expensive modulo instructions, it is currently only
* shifts and bitmask operations.
*/
return (n >> (RUBY_SPECIAL_SHIFT + 3) ^ (n << 16)) ^ (n >> 3);
long hash;
unsigned i;
#define ind_in_dbl type_roomof(double, st_index_t)
union {
st_index_t i[ind_in_dbl];
double d;
} v = { {0} };
/* normalize -0.0 to 0.0 */
v.d = d == 0.0 ? 0.0 : d;
hash = rb_hash_start(v.i[0]);
for (i = 1; i < ind_in_dbl; i++) {
hash = rb_hash_uint(hash, v.i[i]);
}
return rb_hash_end(hash);
}

long
rb_objid_hash(st_index_t index)
{
st_index_t hnum = rb_num_hash_start(index);

hnum = rb_hash_start(hnum);
hnum = rb_hash_uint(hnum, (st_index_t)rb_any_hash);
hnum = rb_hash_end(hnum);
return hnum;
return rb_hash_end(index);
}

static st_index_t
Expand Down Expand Up @@ -258,18 +252,7 @@ static const struct st_hash_type objhash = {
static st_index_t
rb_ident_hash(st_data_t n)
{
#ifdef USE_FLONUM /* RUBY */
/*
* - flonum (on 64-bit) is pathologically bad, mix the actual
* float value in, but do not use the float value as-is since
* many integers get interpreted as 2.0 or -2.0 [Bug #10761]
*/
if (FLONUM_P(n)) {
n ^= (st_data_t)rb_float_value(n);
}
#endif

return (st_index_t)rb_num_hash_start((st_index_t)n);
return rb_hash_end((st_index_t)n);
}

static const struct st_hash_type identhash = {
Expand Down
2 changes: 1 addition & 1 deletion internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1080,7 +1080,6 @@ struct st_table *rb_hash_tbl_raw(VALUE hash);
VALUE rb_hash_has_key(VALUE hash, VALUE key);
VALUE rb_hash_default_value(VALUE hash, VALUE key);
VALUE rb_hash_set_default_proc(VALUE hash, VALUE proc);
long rb_objid_hash(st_index_t index);
st_table *rb_init_identtable(void);
st_table *rb_init_identtable_with_size(st_index_t size);

Expand Down Expand Up @@ -1147,6 +1146,7 @@ VALUE rb_int_modulo(VALUE x, VALUE y);
VALUE rb_int_round(VALUE num, int ndigits);
VALUE rb_int2str(VALUE num, int base);
VALUE rb_dbl_hash(double d);
long rb_dbl_hash_long(double d);
VALUE rb_fix_plus(VALUE x, VALUE y);
VALUE rb_int_ge(VALUE x, VALUE y);

Expand Down
7 changes: 1 addition & 6 deletions numeric.c
Original file line number Diff line number Diff line change
Expand Up @@ -1350,12 +1350,7 @@ flo_hash(VALUE num)
VALUE
rb_dbl_hash(double d)
{
st_index_t hash;

/* normalize -0.0 to 0.0 */
if (d == 0.0) d = 0.0;
hash = rb_memhash(&d, sizeof(d));
return LONG2FIX(hash);
return LONG2FIX(rb_dbl_hash_long(d));
}

VALUE
Expand Down
10 changes: 1 addition & 9 deletions object.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,15 +162,7 @@ rb_obj_equal(VALUE obj1, VALUE obj2)
VALUE
rb_obj_hash(VALUE obj)
{
VALUE oid = rb_obj_id(obj);
#if SIZEOF_LONG == SIZEOF_VOIDP
st_index_t index = NUM2LONG(oid);
#elif SIZEOF_LONG_LONG == SIZEOF_VOIDP
st_index_t index = NUM2LL(oid);
#else
# error not supported
#endif
return LONG2FIX(rb_objid_hash(index));
/* stub for documentation */
}
#else
VALUE rb_obj_hash(VALUE obj);
Expand Down

0 comments on commit 11eaca2

Please sign in to comment.