robin-map
robin_growth_policy.h
Go to the documentation of this file.
1 /**
2  * MIT License
3  *
4  * Copyright (c) 2017 Tessil
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef TSL_ROBIN_GROWTH_POLICY_H
25 #define TSL_ROBIN_GROWTH_POLICY_H
26 
27 
28 #include <algorithm>
29 #include <array>
30 #include <climits>
31 #include <cmath>
32 #include <cstddef>
33 #include <iterator>
34 #include <limits>
35 #include <ratio>
36 #include <stdexcept>
37 
38 
39 #ifdef TSL_DEBUG
40 # define tsl_rh_assert(expr) assert(expr)
41 #else
42 # define tsl_rh_assert(expr) (static_cast<void>(0))
43 #endif
44 
45 
46 /**
47  * If exceptions are enabled, throw the exception passed in parameter, otherwise call std::terminate.
48  */
49 #if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || (defined (_MSC_VER) && defined (_CPPUNWIND))) && !defined(TSL_NO_EXCEPTIONS)
50 # define TSL_RH_THROW_OR_TERMINATE(ex, msg) throw ex(msg)
51 #else
52 # ifdef NDEBUG
53 # define TSL_RH_THROW_OR_TERMINATE(ex, msg) std::terminate()
54 # else
55 # include <cstdio>
56 # define TSL_RH_THROW_OR_TERMINATE(ex, msg) do { std::fprintf(stderr, msg); std::terminate(); } while(0)
57 # endif
58 #endif
59 
60 
61 #if defined(__GNUC__) || defined(__clang__)
62 # define TSL_RH_LIKELY(exp) (__builtin_expect(!!(exp), true))
63 #else
64 # define TSL_RH_LIKELY(exp) (exp)
65 #endif
66 
67 
68 namespace tsl {
69 namespace rh {
70 
71 /**
72  * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a power of two. It allows
73  * the table to use a mask operation instead of a modulo operation to map a hash to a bucket.
74  *
75  * GrowthFactor must be a power of two >= 2.
76  */
77 template<std::size_t GrowthFactor>
79 public:
80  /**
81  * Called on the hash table creation and on rehash. The number of buckets for the table is passed in parameter.
82  * This number is a minimum, the policy may update this value with a higher value if needed (but not lower).
83  *
84  * If 0 is given, min_bucket_count_in_out must still be 0 after the policy creation and
85  * bucket_for_hash must always return 0 in this case.
86  */
87  explicit power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) {
88  if(min_bucket_count_in_out > max_bucket_count()) {
89  TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
90  }
91 
92  if(min_bucket_count_in_out > 0) {
93  min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out);
94  m_mask = min_bucket_count_in_out - 1;
95  }
96  else {
97  m_mask = 0;
98  }
99  }
100 
101  /**
102  * Return the bucket [0, bucket_count()) to which the hash belongs.
103  * If bucket_count() is 0, it must always return 0.
104  */
105  std::size_t bucket_for_hash(std::size_t hash) const noexcept {
106  return hash & m_mask;
107  }
108 
109  /**
110  * Return the number of buckets that should be used on next growth.
111  */
112  std::size_t next_bucket_count() const {
113  if((m_mask + 1) > max_bucket_count() / GrowthFactor) {
114  TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
115  }
116 
117  return (m_mask + 1) * GrowthFactor;
118  }
119 
120  /**
121  * Return the maximum number of buckets supported by the policy.
122  */
123  std::size_t max_bucket_count() const {
124  // Largest power of two.
125  return (std::numeric_limits<std::size_t>::max() / 2) + 1;
126  }
127 
128  /**
129  * Reset the growth policy as if it was created with a bucket count of 0.
130  * After a clear, the policy must always return 0 when bucket_for_hash is called.
131  */
132  void clear() noexcept {
133  m_mask = 0;
134  }
135 
136 private:
137  static std::size_t round_up_to_power_of_two(std::size_t value) {
138  if(is_power_of_two(value)) {
139  return value;
140  }
141 
142  if(value == 0) {
143  return 1;
144  }
145 
146  --value;
147  for(std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) {
148  value |= value >> i;
149  }
150 
151  return value + 1;
152  }
153 
154  static constexpr bool is_power_of_two(std::size_t value) {
155  return value != 0 && (value & (value - 1)) == 0;
156  }
157 
158 protected:
159  static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2, "GrowthFactor must be a power of two >= 2.");
160 
161  std::size_t m_mask;
162 };
163 
164 
165 /**
166  * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo to map a hash
167  * to a bucket. Slower but it can be useful if you want a slower growth.
168  */
169 template<class GrowthFactor = std::ratio<3, 2>>
171 public:
172  explicit mod_growth_policy(std::size_t& min_bucket_count_in_out) {
173  if(min_bucket_count_in_out > max_bucket_count()) {
174  TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
175  }
176 
177  if(min_bucket_count_in_out > 0) {
178  m_mod = min_bucket_count_in_out;
179  }
180  else {
181  m_mod = 1;
182  }
183  }
184 
185  std::size_t bucket_for_hash(std::size_t hash) const noexcept {
186  return hash % m_mod;
187  }
188 
189  std::size_t next_bucket_count() const {
190  if(m_mod == max_bucket_count()) {
191  TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
192  }
193 
194  const double next_bucket_count = std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR);
195  if(!std::isnormal(next_bucket_count)) {
196  TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
197  }
198 
199  if(next_bucket_count > double(max_bucket_count())) {
200  return max_bucket_count();
201  }
202  else {
203  return std::size_t(next_bucket_count);
204  }
205  }
206 
207  std::size_t max_bucket_count() const {
208  return MAX_BUCKET_COUNT;
209  }
210 
211  void clear() noexcept {
212  m_mod = 1;
213  }
214 
215 private:
216  static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR = 1.0 * GrowthFactor::num / GrowthFactor::den;
217  static const std::size_t MAX_BUCKET_COUNT =
218  std::size_t(double(
219  std::numeric_limits<std::size_t>::max() / REHASH_SIZE_MULTIPLICATION_FACTOR
220  ));
221 
222  static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1.");
223 
224  std::size_t m_mod;
225 };
226 
227 
228 
229 namespace detail {
230 
231 static constexpr const std::array<std::size_t, 40> PRIMES = {{
232  1ul, 5ul, 17ul, 29ul, 37ul, 53ul, 67ul, 79ul, 97ul, 131ul, 193ul, 257ul, 389ul, 521ul, 769ul, 1031ul,
233  1543ul, 2053ul, 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
234  1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul,
235  402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul
236 }};
237 
238 template<unsigned int IPrime>
239 static constexpr std::size_t mod(std::size_t hash) { return hash % PRIMES[IPrime]; }
240 
241 // MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for faster modulo as the
242 // compiler can optimize the modulo code better with a constant known at the compilation.
243 static constexpr const std::array<std::size_t(*)(std::size_t), 40> MOD_PRIME = {{
244  &mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>, &mod<8>, &mod<9>, &mod<10>,
245  &mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>,
246  &mod<21>, &mod<22>, &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>,
247  &mod<31>, &mod<32>, &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37> , &mod<38>, &mod<39>
248 }};
249 
250 }
251 
252 /**
253  * Grow the hash table by using prime numbers as bucket count. Slower than tsl::rh::power_of_two_growth_policy in
254  * general but will probably distribute the values around better in the buckets with a poor hash function.
255  *
256  * To allow the compiler to optimize the modulo operation, a lookup table is used with constant primes numbers.
257  *
258  * With a switch the code would look like:
259  * \code
260  * switch(iprime) { // iprime is the current prime of the hash table
261  * case 0: hash % 5ul;
262  * break;
263  * case 1: hash % 17ul;
264  * break;
265  * case 2: hash % 29ul;
266  * break;
267  * ...
268  * }
269  * \endcode
270  *
271  * Due to the constant variable in the modulo the compiler is able to optimize the operation
272  * by a series of multiplications, substractions and shifts.
273  *
274  * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) * 5' in a 64 bits environement.
275  */
277 public:
278  explicit prime_growth_policy(std::size_t& min_bucket_count_in_out) {
279  auto it_prime = std::lower_bound(detail::PRIMES.begin(),
280  detail::PRIMES.end(), min_bucket_count_in_out);
281  if(it_prime == detail::PRIMES.end()) {
282  TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
283  }
284 
285  m_iprime = static_cast<unsigned int>(std::distance(detail::PRIMES.begin(), it_prime));
286  if(min_bucket_count_in_out > 0) {
287  min_bucket_count_in_out = *it_prime;
288  }
289  else {
290  min_bucket_count_in_out = 0;
291  }
292  }
293 
294  std::size_t bucket_for_hash(std::size_t hash) const noexcept {
295  return detail::MOD_PRIME[m_iprime](hash);
296  }
297 
298  std::size_t next_bucket_count() const {
299  if(m_iprime + 1 >= detail::PRIMES.size()) {
300  TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
301  }
302 
303  return detail::PRIMES[m_iprime + 1];
304  }
305 
306  std::size_t max_bucket_count() const {
307  return detail::PRIMES.back();
308  }
309 
310  void clear() noexcept {
311  m_iprime = 0;
312  }
313 
314 private:
315  unsigned int m_iprime;
316 
317  static_assert(std::numeric_limits<decltype(m_iprime)>::max() >= detail::PRIMES.size(),
318  "The type of m_iprime is not big enough.");
319 };
320 
321 }
322 }
323 
324 #endif
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition: robin_growth_policy.h:105
Definition: robin_growth_policy.h:69
std::size_t next_bucket_count() const
Definition: robin_growth_policy.h:189
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition: robin_growth_policy.h:294
void clear() noexcept
Definition: robin_growth_policy.h:211
Definition: robin_growth_policy.h:68
std::size_t max_bucket_count() const
Definition: robin_growth_policy.h:306
mod_growth_policy(std::size_t &min_bucket_count_in_out)
Definition: robin_growth_policy.h:172
void clear() noexcept
Definition: robin_growth_policy.h:132
std::size_t max_bucket_count() const
Definition: robin_growth_policy.h:123
std::size_t next_bucket_count() const
Definition: robin_growth_policy.h:298
Definition: robin_growth_policy.h:170
std::size_t m_mask
Definition: robin_growth_policy.h:159
void clear() noexcept
Definition: robin_growth_policy.h:310
prime_growth_policy(std::size_t &min_bucket_count_in_out)
Definition: robin_growth_policy.h:278
#define TSL_RH_THROW_OR_TERMINATE(ex, msg)
Definition: robin_growth_policy.h:56
power_of_two_growth_policy(std::size_t &min_bucket_count_in_out)
Definition: robin_growth_policy.h:87
std::size_t max_bucket_count() const
Definition: robin_growth_policy.h:207
Definition: robin_growth_policy.h:276
std::size_t next_bucket_count() const
Definition: robin_growth_policy.h:112
Definition: robin_growth_policy.h:229
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition: robin_growth_policy.h:185
Definition: robin_growth_policy.h:78