CS253: Software Development with C++

Fall 2019

Hashing

Show Lecture.Hashing as a slide show.

CS253 Hashing

Hashing in General

To hash an object:

Typical Hash Table

A hash table starts like this, an array of five (for instance) pointers, all initially null.

      ┌─────┬─────┬─────┬─────┬─────┐
      │  ●  │  ●  │  ●  │  ●  │  ●  │
      └─────┴─────┴─────┴─────┴─────┘

Typical Hash Table

After adding "animal" and "vegetable":

      ┌─────┬─────┬─────┬─────┬─────┐
      │  ●  │     │  ●  │     │  ●  │
      └─────┴──┼──┴─────┴──┼──┴─────┘
               │           │
               ∨           ∨
         ┌────────┐     ┌───────────┐
         │ animal │     │ vegetable │
         └────────┘     └───────────┘

Typical Hash Table

After adding "mineral":

      ┌─────┬─────┬─────┬─────┬─────┐
      │  ●  │     │  ●  │     │  ●  │
      └─────┴──┼──┴─────┴──┼──┴─────┘
               │           │
               ∨           ∨
         ┌────────┐     ┌─────────┐   ┌───────────┐
         │ animal │     │ mineral │──>│ vegetable │
         └────────┘     └─────────┘   └───────────┘

Typical Hash Table

      ┌─────┬─────┬─────┬─────┬─────┐
      │  ●  │     │  ●  │     │  ●  │
      └─────┴──┼──┴─────┴──┼──┴─────┘
               │           │
               ∨           ∨
         ┌────────┐     ┌─────────┐   ┌───────────┐
         │ animal │     │ mineral │──>│ vegetable │
         └────────┘     └─────────┘   └───────────┘

Expanding the Table

Hashing in C++

unordered_set<int> primes = {2, 3, 5, 7, 11, 13, 17, 19};
for (auto n : primes)
    cout << n << ' ';
19 17 11 7 5 3 13 2 

I Care

OK, let’s say that we care. We can find out:

unordered_set<int> primes = {2, 3, 5, 7, 11, 13, 17, 19};
cout << "We have " << primes.bucket_count() << " buckets.\n";
for (size_t b = 0; b<primes.bucket_count(); b++)
    if (primes.bucket_size(b))
        cout << "Bucket " << b << " has "
             << primes.bucket_size(b) << " items\n";
for (auto n : primes)
    cout << n << ' ';
We have 11 buckets.
Bucket 0 has 1 items
Bucket 2 has 2 items
Bucket 3 has 1 items
Bucket 5 has 1 items
Bucket 6 has 1 items
Bucket 7 has 1 items
Bucket 8 has 1 items
19 17 11 7 5 3 13 2 

Variable Number of Buckets

The number of buckets (usually prime) increases, based on how much data the hash contains:

unordered_set<int> us;
cout << us.size() << ' ' << us.bucket_count() << '\n';
while (us.size()<10) us.insert(rand());
cout << us.size() << ' ' << us.bucket_count() << '\n';
while (us.size()<100) us.insert(rand());
cout << us.size() << ' ' << us.bucket_count() << '\n';
while (us.size()<1000000) us.insert(rand());
cout << us.size() << ' ' << us.bucket_count() << '\n';
0 1
10 13
100 127
1000000 1447153

What are the Hash Values?

We can find out the hash values, if we care:

cout << hash<int>()(253) << '\n'
     << hash<int>()(-253) << '\n'
     << hash<double>()(253.0) << '\n'
     << hash<char>()('a') << '\n'
     << hash<bool>()(true) << '\n'
     << hash<string>()("CS253") << '\n'
     << hash<string>()("") << '\n'
     << hash<string>()("a") << '\n'
     << hash<string>()("b") << '\n'
     << hash<string>()("c") << '\n';
253
18446744073709551363
12026514335406308073
97
1
14414501272585457977
6142509188972423790
4993892634952068459
10838281452030117757
10959529184379665549

User-defined Types

It doesn’t know how to hash your types:

struct Point { float x, y; } p = {1.2, 3.4};

int main() {
    cout << hash<Point>()(p);
}
c.cc:4: error: use of deleted function 'std::hash<Point>::hash()'

User-defined Types

User-defined Types

We can create a template specialization for std::hash<Point>:

struct Point { float x, y; } p = {1.2, 3.4};

namespace std {
    template <>
    struct hash<Point> {
        size_t operator()(const Point &p) const {
           return hash<float>()(p.x) ^ hash<float>()(p.y);
        }
    }; 
}

int main() {
    cout << hash<Point>()(p);
}
11708950365973905104

User-defined Types

Still fails; needs ==:

struct Point { float x, y; } p = {1.2, 3.4};

namespace std {
    template <>
    struct hash<Point> {
        size_t operator()(const Point &p) const {
           return hash<float>()(p.x) ^ hash<float>()(p.y);
        }
    }; 
}

int main() {
    unordered_set<Point> us;
    us.insert(p);
}
In file included from /usr/include/c++/8/string:48,
                 from /usr/include/c++/8/bits/locale_classes.h:40,
                 from /usr/include/c++/8/bits/ios_base.h:41,
                 from /usr/include/c++/8/ios:42,
                 from /usr/include/c++/8/istream:38,
                 from /usr/include/c++/8/sstream:38,
                 from /usr/include/c++/8/complex:45,
                 from /usr/include/c++/8/ccomplex:39,
                 from /usr/include/c++/8/x86_64-redhat-linux/bits/stdc++.h:52,
                 from c.cc:1:
/usr/include/c++/8/bits/stl_function.h: In instantiation of 'constexpr bool std::equal_to<_Tp>::operator()(const _Tp&, const _Tp&) const [with _Tp = Point]':
/usr/include/c++/8/bits/hashtable_policy.h:1460:   required from 'static bool std::__detail::_Equal_helper<_Key, _Value, _ExtractKey, _Equal, _HashCodeType, true>::_S_equals(const _Equal&, const _ExtractKey&, const _Key&, _HashCodeType, std::__detail::_Hash_node<_Value, true>*) [with _Key = Point; _Value = Point; _ExtractKey = std::__detail::_Identity; _Equal = std::equal_to<Point>; _HashCodeType = long unsigned int]'
/usr/include/c++/8/bits/hashtable_policy.h:1844:   required from 'bool std::__detail::_Hashtable_base<_Key, _Value, _ExtractKey, _Equal, _H1, _H2, _Hash, _Traits>::_M_equals(const _Key&, std::__detail::_Hashtable_base<_Key, _Value, _ExtractKey, _Equal, _H1, _H2, _Hash, _Traits>::__hash_code, std::__detail::_Hashtable_base<_Key, _Value, _ExtractKey, _Equal, _H1, _H2, _Hash, _Traits>::__node_type*) const [with _Key = Point; _Value = Point; _ExtractKey = std::__detail::_Identity; _Equal = std::equal_to<Point>; _H1 = std::hash<Point>; _H2 = std::__detail::_Mod_range_hashing; _Hash = std::__detail::_Default_ranged_hash; _Traits = std::__detail::_Hashtable_traits<true, true, true>; std::__detail::_Hashtable_base<_Key, _Value, _ExtractKey, _Equal, _H1, _H2, _Hash, _Traits>::__hash_code = long unsigned int; std::__detail::_Hashtable_base<_Key, _Value, _ExtractKey, _Equal, _H1, _H2, _Hash, _Traits>::__node_type = std::__detail::_Hash_node<Point, true>]'
/usr/include/c++/8/bits/hashtable.h:1562:   required from 'std::_Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::__node_base* std::_Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::_M_find_before_node(std::_Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::size_type, const key_type&, std::_Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::__hash_code) const [with _Key = Point; _Value = Point; _Alloc = std::allocator<Point>; _ExtractKey = std::__detail::_Identity; _Equal = std::equal_to<Point>; _H1 = std::hash<Point>; _H2 = std::__detail::_Mod_range_hashing; _Hash = std::__detail::_Default_ranged_hash; _RehashPolicy = std::__detail::_Prime_rehash_policy; _Traits = std::__detail::_Hashtable_traits<true, true, true>; std::_Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::__node_base = std::__detail::_Hash_node_base; std::_Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::size_type = long unsigned int; std::_Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::key_type = Point; std::_Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::__hash_code = long unsigned int]'
/usr/include/c++/8/bits/hashtable.h:649:   required from 'std::_Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::__node_type* std::_Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::_M_find_node(std::_Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::size_type, const key_type&, std::_Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::__hash_code) const [with _Key = Point; _Value = Point; _Alloc = std::allocator<Point>; _ExtractKey = std::__detail::_Identity; _Equal = std::equal_to<Point>; _H1 = std::hash<Point>; _H2 = std::__detail::_Mod_range_hashing; _Hash = std::__detail::_Default_ranged_hash; _RehashPolicy = std::__detail::_Prime_rehash_policy; _Traits = std::__detail::_Hashtable_traits<true, true, true>; std::_Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::__node_type = std::__detail::_Hash_node<Point, true>; typename _Traits::__hash_cached = std::integral_constant<bool, true>; std::_Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::size_type = long unsigned int; std::_Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::key_type = Point; std::_Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::__hash_code = long unsigned int]'
/usr/include/c++/8/bits/hashtable.h:1830:   required from 'std::pair<typename std::__detail::_Hashtable_base<_Key, _Value, _ExtractKey, _Equal, _H1, _H2, _Hash, _Traits>::iterator, bool> std::_Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::_M_insert(_Arg&&, const _NodeGenerator&, std::true_type, std::_Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::size_type) [with _Arg = const Point&; _NodeGenerator = std::__detail::_AllocNode<std::allocator<std::__detail::_Hash_node<Point, true> > >; _Key = Point; _Value = Point; _Alloc = std::allocator<Point>; _ExtractKey = std::__detail::_Identity; _Equal = std::equal_to<Point>; _H1 = std::hash<Point>; _H2 = std::__detail::_Mod_range_hashing; _Hash = std::__detail::_Default_ranged_hash; _RehashPolicy = std::__detail::_Prime_rehash_policy; _Traits = std::__detail::_Hashtable_traits<true, true, true>; typename std::__detail::_Hashtable_base<_Key, _Value, _ExtractKey, _Equal, _H1, _H2, _Hash, _Traits>::iterator = std::__detail::_Node_iterator<Point, true, true>; std::true_type = std::integral_constant<bool, true>; std::_Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::size_type = long unsigned int]'
/usr/include/c++/8/bits/hashtable_policy.h:834:   required from 'std::__detail::_Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::__ireturn_type std::__detail::_Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::insert(const value_type&) [with _Key = Point; _Value = Point; _Alloc = std::allocator<Point>; _ExtractKey = std::__detail::_Identity; _Equal = std::equal_to<Point>; _H1 = std::hash<Point>; _H2 = std::__detail::_Mod_range_hashing; _Hash = std::__detail::_Default_ranged_hash; _RehashPolicy = std::__detail::_Prime_rehash_policy; _Traits = std::__detail::_Hashtable_traits<true, true, true>; std::__detail::_Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::__ireturn_type = std::pair<std::__detail::_Node_iterator<Point, true, true>, bool>; std::__detail::_Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash, _RehashPolicy, _Traits>::value_type = Point]'
/usr/include/c++/8/bits/unordered_set.h:421:   required from 'std::pair<typename std::_Hashtable<_Value, _Value, _Alloc, std::__detail::_Identity, _Pred, _Hash, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<std::__not_<std::__and_<std::__is_fast_hash<_Hash>, std::__is_nothrow_invocable<const _Hash&, const _Tp&> > >::value, true, true> >::iterator, bool> std::unordered_set<_Value, _Hash, _Pred, _Alloc>::insert(const value_type&) [with _Value = Point; _Hash = std::hash<Point>; _Pred = std::equal_to<Point>; _Alloc = std::allocator<Point>; typename std::_Hashtable<_Value, _Value, _Alloc, std::__detail::_Identity, _Pred, _Hash, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<std::__not_<std::__and_<std::__is_fast_hash<_Hash>, std::__is_nothrow_invocable<const _Hash&, const _Tp&> > >::value, true, true> >::iterator = std::__detail::_Node_iterator<Point, true, true>; std::unordered_set<_Value, _Hash, _Pred, _Alloc>::value_type = Point]'
c.cc:14:   required from here
/usr/include/c++/8/bits/stl_function.h:356: error: no match for 'operator==' in 
   '__x == __y' (operand types are 'const Point' and 'const Point')

User-defined Types

Now, unordered_set works with a Point:

struct Point { float x, y; } p = {1.2, 3.4};

namespace std {
    template <>
    struct hash<Point> {
        size_t operator()(const Point &p) const {
           return hash<float>()(p.x) ^ hash<float>()(p.y);
        }
    }; 
}
bool operator==(const Point &a, const Point &b) {
    return a.x==b.x && a.y==b.y;
}

int main() {
    unordered_set<Point> us;
    us.insert(p);
}

The Rules