sparse-map
sparse_growth_policy.h
Go to the documentation of this file.
1 /**
2  * MIT License
3  *
4  * Copyright (c) 2017 Tessil
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef TSL_SPARSE_GROWTH_POLICY_H
25 #define TSL_SPARSE_GROWTH_POLICY_H
26 
27 
28 #include <algorithm>
29 #include <array>
30 #include <climits>
31 #include <cmath>
32 #include <cstddef>
33 #include <iterator>
34 #include <limits>
35 #include <ratio>
36 #include <stdexcept>
37 
38 
39 namespace tsl {
40 namespace sh {
41 
42 /**
43  * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a power of two. It allows
44  * the table to use a mask operation instead of a modulo operation to map a hash to a bucket.
45  *
46  * GrowthFactor must be a power of two >= 2.
47  */
48 template<std::size_t GrowthFactor>
50 public:
51  /**
52  * Called on the hash table creation and on rehash. The number of buckets for the table is passed in parameter.
53  * This number is a minimum, the policy may update this value with a higher value if needed (but not lower).
54  *
55  * If 0 is given, min_bucket_count_in_out must still be 0 after the policy creation and
56  * bucket_for_hash must always return 0 in this case.
57  */
58  explicit power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) {
59  if(min_bucket_count_in_out > max_bucket_count()) {
60  throw std::length_error("The hash table exceeds its maxmimum size.");
61  }
62 
63  if(min_bucket_count_in_out > 0) {
64  min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out);
65  m_mask = min_bucket_count_in_out - 1;
66  }
67  else {
68  m_mask = 0;
69  }
70  }
71 
72  /**
73  * Return the bucket [0, bucket_count()) to which the hash belongs.
74  * If bucket_count() is 0, it must always return 0.
75  */
76  std::size_t bucket_for_hash(std::size_t hash) const noexcept {
77  return hash & m_mask;
78  }
79 
80  /**
81  * Return the number of buckets that should be used on next growth.
82  */
83  std::size_t next_bucket_count() const {
84  if((m_mask + 1) > max_bucket_count() / GrowthFactor) {
85  throw std::length_error("The hash table exceeds its maxmimum size.");
86  }
87 
88  return (m_mask + 1) * GrowthFactor;
89  }
90 
91  /**
92  * Return the maximum number of buckets supported by the policy.
93  */
94  std::size_t max_bucket_count() const {
95  // Largest power of two.
96  return (std::numeric_limits<std::size_t>::max() / 2) + 1;
97  }
98 
99  /**
100  * Reset the growth policy as if it was created with a bucket count of 0.
101  * After a clear, the policy must always return 0 when bucket_for_hash is called.
102  */
103  void clear() noexcept {
104  m_mask = 0;
105  }
106 
107 private:
108  static std::size_t round_up_to_power_of_two(std::size_t value) {
109  if(is_power_of_two(value)) {
110  return value;
111  }
112 
113  if(value == 0) {
114  return 1;
115  }
116 
117  --value;
118  for(std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) {
119  value |= value >> i;
120  }
121 
122  return value + 1;
123  }
124 
125  static constexpr bool is_power_of_two(std::size_t value) {
126  return value != 0 && (value & (value - 1)) == 0;
127  }
128 
129 protected:
130  static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2, "GrowthFactor must be a power of two >= 2.");
131 
132  std::size_t m_mask;
133 };
134 
135 
136 /**
137  * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo to map a hash
138  * to a bucket. Slower but it can be useful if you want a slower growth.
139  */
140 template<class GrowthFactor = std::ratio<3, 2>>
142 public:
143  explicit mod_growth_policy(std::size_t& min_bucket_count_in_out) {
144  if(min_bucket_count_in_out > max_bucket_count()) {
145  throw std::length_error("The hash table exceeds its maxmimum size.");
146  }
147 
148  if(min_bucket_count_in_out > 0) {
149  m_mod = min_bucket_count_in_out;
150  }
151  else {
152  m_mod = 1;
153  }
154  }
155 
156  std::size_t bucket_for_hash(std::size_t hash) const noexcept {
157  return hash % m_mod;
158  }
159 
160  std::size_t next_bucket_count() const {
161  if(m_mod == max_bucket_count()) {
162  throw std::length_error("The hash table exceeds its maxmimum size.");
163  }
164 
165  const double next_bucket_count = std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR);
166  if(!std::isnormal(next_bucket_count)) {
167  throw std::length_error("The hash table exceeds its maxmimum size.");
168  }
169 
170  if(next_bucket_count > double(max_bucket_count())) {
171  return max_bucket_count();
172  }
173  else {
174  return std::size_t(next_bucket_count);
175  }
176  }
177 
178  std::size_t max_bucket_count() const {
179  return MAX_BUCKET_COUNT;
180  }
181 
182  void clear() noexcept {
183  m_mod = 1;
184  }
185 
186 private:
187  static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR = 1.0 * GrowthFactor::num / GrowthFactor::den;
188  static const std::size_t MAX_BUCKET_COUNT =
189  std::size_t(double(
190  std::numeric_limits<std::size_t>::max() / REHASH_SIZE_MULTIPLICATION_FACTOR
191  ));
192 
193  static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1.");
194 
195  std::size_t m_mod;
196 };
197 
198 
199 
200 namespace detail {
201 
202 static constexpr const std::array<std::size_t, 40> PRIMES = {{
203  1ul, 5ul, 17ul, 29ul, 37ul, 53ul, 67ul, 79ul, 97ul, 131ul, 193ul, 257ul, 389ul, 521ul, 769ul, 1031ul,
204  1543ul, 2053ul, 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
205  1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul,
206  402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul
207 }};
208 
209 template<unsigned int IPrime>
210 static constexpr std::size_t mod(std::size_t hash) { return hash % PRIMES[IPrime]; }
211 
212 // MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for faster modulo as the
213 // compiler can optimize the modulo code better with a constant known at the compilation.
214 static constexpr const std::array<std::size_t(*)(std::size_t), 40> MOD_PRIME = {{
215  &mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>, &mod<8>, &mod<9>, &mod<10>,
216  &mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>,
217  &mod<21>, &mod<22>, &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>,
218  &mod<31>, &mod<32>, &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37> , &mod<38>, &mod<39>
219 }};
220 
221 }
222 
223 /**
224  * Grow the hash table by using prime numbers as bucket count. Slower than tsl::sh::power_of_two_growth_policy in
225  * general but will probably distribute the values around better in the buckets with a poor hash function.
226  *
227  * To allow the compiler to optimize the modulo operation, a lookup table is used with constant primes numbers.
228  *
229  * With a switch the code would look like:
230  * \code
231  * switch(iprime) { // iprime is the current prime of the hash table
232  * case 0: hash % 5ul;
233  * break;
234  * case 1: hash % 17ul;
235  * break;
236  * case 2: hash % 29ul;
237  * break;
238  * ...
239  * }
240  * \endcode
241  *
242  * Due to the constant variable in the modulo the compiler is able to optimize the operation
243  * by a series of multiplications, substractions and shifts.
244  *
245  * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) * 5' in a 64 bits environement.
246  */
248 public:
249  explicit prime_growth_policy(std::size_t& min_bucket_count_in_out) {
250  auto it_prime = std::lower_bound(detail::PRIMES.begin(),
251  detail::PRIMES.end(), min_bucket_count_in_out);
252  if(it_prime == detail::PRIMES.end()) {
253  throw std::length_error("The hash table exceeds its maxmimum size.");
254  }
255 
256  m_iprime = static_cast<unsigned int>(std::distance(detail::PRIMES.begin(), it_prime));
257  if(min_bucket_count_in_out > 0) {
258  min_bucket_count_in_out = *it_prime;
259  }
260  else {
261  min_bucket_count_in_out = 0;
262  }
263  }
264 
265  std::size_t bucket_for_hash(std::size_t hash) const noexcept {
266  return detail::MOD_PRIME[m_iprime](hash);
267  }
268 
269  std::size_t next_bucket_count() const {
270  if(m_iprime + 1 >= detail::PRIMES.size()) {
271  throw std::length_error("The hash table exceeds its maxmimum size.");
272  }
273 
274  return detail::PRIMES[m_iprime + 1];
275  }
276 
277  std::size_t max_bucket_count() const {
278  return detail::PRIMES.back();
279  }
280 
281  void clear() noexcept {
282  m_iprime = 0;
283  }
284 
285 private:
286  unsigned int m_iprime;
287 
288  static_assert(std::numeric_limits<decltype(m_iprime)>::max() >= detail::PRIMES.size(),
289  "The type of m_iprime is not big enough.");
290 };
291 
292 }
293 }
294 
295 #endif
power_of_two_growth_policy(std::size_t &min_bucket_count_in_out)
Definition: sparse_growth_policy.h:58
Definition: sparse_growth_policy.h:39
void clear() noexcept
Definition: sparse_growth_policy.h:103
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition: sparse_growth_policy.h:156
Definition: sparse_growth_policy.h:200
std::size_t max_bucket_count() const
Definition: sparse_growth_policy.h:178
std::size_t next_bucket_count() const
Definition: sparse_growth_policy.h:83
std::size_t max_bucket_count() const
Definition: sparse_growth_policy.h:94
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition: sparse_growth_policy.h:76
std::size_t max_bucket_count() const
Definition: sparse_growth_policy.h:277
Definition: sparse_growth_policy.h:40
void clear() noexcept
Definition: sparse_growth_policy.h:182
std::size_t m_mask
Definition: sparse_growth_policy.h:130
Definition: sparse_growth_policy.h:141
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition: sparse_growth_policy.h:265
Definition: sparse_growth_policy.h:247
std::size_t next_bucket_count() const
Definition: sparse_growth_policy.h:160
prime_growth_policy(std::size_t &min_bucket_count_in_out)
Definition: sparse_growth_policy.h:249
void clear() noexcept
Definition: sparse_growth_policy.h:281
std::size_t next_bucket_count() const
Definition: sparse_growth_policy.h:269
Definition: sparse_growth_policy.h:49
mod_growth_policy(std::size_t &min_bucket_count_in_out)
Definition: sparse_growth_policy.h:143