Description

The implementation of unordered containers rely on hashing techniques and utilize buckets for storing elements. Each bucket is essentially a vector containing a (singly) linked list. The following steps outline how elements are located, whether for finding, inserting, or erasing:

  1. Compute the hash value of the key.
  2. Determine the bucket index by taking the remainder of the hash value divided by the bucket size, e.g., index = {hash value} % {bucket size}.
  3. Bucket[index] represents the first element of the linked list.
  4. Traverse through the linked list to locate the element by key.

While these operations have an average constant-time complexity, they are not necessarily lightweight. They involve computationally expensive overhead, including hash computation and linked list traversal.

Hashtable

The underlying hash table mechanisms for std::unordered_set, std::unordered_map, std::unordered_multiset, and std::unordered_multimap are fundamentally the same. Their differences lie in the specific features of the hash table they employ. In this post, I only focus on the simplest implementation, which is unordered_set.

Featureunordered_setunordered_mapunordered_multisetunordered_multimap
Element TypeSingle ElementKey-Value PairSingle ElementKey-Value Pair
Allows DuplicatesNoNo (Unique Keys)YesYes
Hash Table Bucket0 or 1 Element0 or 1 Key-Value PairMultiple ElementsMultiple Key-Value Pairs

Code

#include <forward_list>
#include <functional>
#include <iostream>
#include <vector>

template <typename Key, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>>
class HashTable
{
  public:
  HashTable(size_t bucket_count = 10)
      : bucket_count_(bucket_count), element_count_(0), buckets_(bucket_count)
  {
  }

  void insert(const Key& key)
  {
    check_load_factor();

    size_t bucket_index = Hash{}(key) % bucket_count_;
    for(const auto& element : buckets_[bucket_index])
    {
      if(KeyEqual{}(element, key))
      {
        return;
      }
    }
    buckets_[bucket_index].push_front(key);
    ++element_count_;
  }

  bool find(const Key& key)    // stl'find returns an iterator, and stl'count returns 1 or 0
  {
    size_t bucket_index = Hash{}(key) % bucket_count_;

    for(const auto& element : buckets_[bucket_index])
    {
      if(KeyEqual{}(element, key))
      {
        return true;
      }
    }
    return false;
  }

  void erase(const Key& key)
  {
    size_t bucket_index = Hash{}(key) % bucket_count_;
    // take advantage of forward_list's remove_if
    buckets_[bucket_index].remove_if([&key](const auto& element) {
      return KeyEqual{}(element, key);
    });
    --element_count_;
  }

  size_t size() const
  {
    return element_count_;
  }

  private:
  void check_load_factor()
  {
    double load_factor = static_cast<double>(element_count_) / static_cast<double>(bucket_count_);
    if(load_factor > 0.7)
    {
      rehash(bucket_count_ * 2);
    }
  }

  void rehash(size_t new_bucket_count)
  {
    std::vector<std::forward_list<Key>> new_buckets(new_bucket_count);
    for(const auto& bucket : buckets_)
    {
      for(const auto& element : bucket)
      {
        size_t new_bucket_index = Hash{}(element) % new_bucket_count;
        new_buckets[new_bucket_index].push_front(element);
      }
    }
    std::swap(new_buckets, buckets_);
    bucket_count_ = new_bucket_count;
  }

  size_t                              bucket_count_;
  size_t                              element_count_;
  std::vector<std::forward_list<Key>> buckets_;
};

template <typename Key, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>>
class UnorderedSet
{
  private:
  HashTable<Key, Hash, KeyEqual> table;

  public:
  void insert(const Key& key)
  {
    table.insert(key);
  }

  bool find(const Key& key)
  {
    return table.find(key);
  }

  void erase(const Key& key)
  {
    table.erase(key);
  }

  size_t size() const
  {
    return table.size();
  }
};

template <typename Key, typename Value>
struct Pair
{
  using Key = Key;
  Key   key;
  Value value;

  Pair(const Key& k, const Value& v) : key(k), value(v) {}
};

template <
    typename Pair,
    typename Hash     = std::hash<Pair::Key>,
    typename KeyEqual = std::equal_to<Pair::Key>>
class UnorderedMap
{
  private:
  HashTable<Pair, Hash, KeyEqual> table;
};

int main()
{
  auto set = HashTable<int>();
  set.insert(19);
  set.insert(5);
  set.insert(3);
  set.erase(5);
  std::cout << set.find(5) << " " << set.size() << std::endl;
}