array-hash
array_hash.h
Go to the documentation of this file.
1 /**
2  * MIT License
3  *
4  * Copyright (c) 2017 Tessil
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef TSL_ARRAY_HASH_H
25 #define TSL_ARRAY_HASH_H
26 
27 #include <algorithm>
28 #include <cassert>
29 #include <cmath>
30 #include <cstddef>
31 #include <cstdint>
32 #include <cstdlib>
33 #include <cstring>
34 #include <iterator>
35 #include <limits>
36 #include <memory>
37 #include <stdexcept>
38 #include <type_traits>
39 #include <utility>
40 #include <vector>
42 
43 
44 /*
45  * __has_include is a bit useless (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79433),
46  * check also __cplusplus version.
47  */
48 #ifdef __has_include
49 # if __has_include(<string_view>) && __cplusplus >= 201703L
50 # define TSL_AH_HAS_STRING_VIEW
51 # endif
52 #endif
53 
54 
55 #ifdef TSL_AH_HAS_STRING_VIEW
56 # include <string_view>
57 #endif
58 
59 
60 #ifdef TSL_DEBUG
61 # define tsl_ah_assert(expr) assert(expr)
62 #else
63 # define tsl_ah_assert(expr) (static_cast<void>(0))
64 #endif
65 
66 
67 
68 /**
69  * Implementation of the array hash structure described in the
70  * "Cache-conscious collision resolution in string hash tables." (Askitis Nikolas and Justin Zobel, 2005) paper.
71  */
72 namespace tsl {
73 
74 namespace ah {
75 
76 template<class CharT>
77 struct str_hash {
78 #ifdef TSL_AH_HAS_STRING_VIEW
79  std::size_t operator()(const CharT* key, std::size_t key_size) const {
81  }
82 #else
83  /**
84  * FNV-1a hash
85  */
86  std::size_t operator()(const CharT* key, std::size_t key_size) const {
87  static const std::size_t init = std::size_t((sizeof(std::size_t) == 8)?0xcbf29ce484222325:0x811c9dc5);
88  static const std::size_t multiplier = std::size_t((sizeof(std::size_t) == 8)?0x100000001b3:0x1000193);
89 
90  std::size_t hash = init;
91  for (std::size_t i = 0; i < key_size; ++i) {
92  hash ^= key[i];
93  hash *= multiplier;
94  }
95 
96  return hash;
97  }
98 #endif
99 };
100 
101 template<class CharT>
102 struct str_equal {
103  bool operator()(const CharT* key_lhs, std::size_t key_size_lhs,
104  const CharT* key_rhs, std::size_t key_size_rhs) const
105  {
106  if(key_size_lhs != key_size_rhs) {
107  return false;
108  }
109  else {
110  return std::memcmp(key_lhs, key_rhs, key_size_lhs * sizeof(CharT)) == 0;
111  }
112  }
113 };
114 }
115 
116 
117 namespace detail_array_hash {
118 
119 template<typename T, typename = void>
120 struct is_iterator: std::false_type {
121 };
122 
123 template<typename T>
124 struct is_iterator<T, typename std::enable_if<!std::is_same<typename std::iterator_traits<T>::iterator_category, void>::value>::type>: std::true_type {
125 };
126 
127 static constexpr bool is_power_of_two(std::size_t value) {
128  return value != 0 && (value & (value - 1)) == 0;
129 }
130 
131 
132 
133 /**
134  * Fixed size type used to represent size_type values on serialization. Need to be big enough
135  * to represent a std::size_t on 32 and 64 bits platforms, and must be the same size on both platforms.
136  */
137 using slz_size_type = std::uint64_t;
138 
139 template<class T, class Deserializer>
140 static T deserialize_value(Deserializer& deserializer) {
141  // MSVC < 2017 is not conformant, circumvent the problem by removing the template keyword
142 #if defined (_MSC_VER) && _MSC_VER < 1910
143  return deserializer.Deserializer::operator()<T>();
144 #else
145  return deserializer.Deserializer::template operator()<T>();
146 #endif
147 }
148 
149 /**
150  * For each string in the bucket, store the size of the string, the chars of the string
151  * and T, if it's not void. T should be either void or an unsigned type.
152  *
153  * End the buffer with END_OF_BUCKET flag. END_OF_BUCKET has the same type as the string size variable.
154  *
155  * m_buffer (CharT*):
156  * | size of str1 (KeySizeT) | str1 (const CharT*) | value (T if T != void) | ... |
157  * | size of strN (KeySizeT) | strN (const CharT*) | value (T if T != void) | END_OF_BUCKET (KeySizeT) |
158  *
159  * m_buffer is null if there is no string in the bucket.
160  *
161  * KeySizeT and T are extended to be a multiple of CharT when stored in the buffer.
162  *
163  * Use std::malloc and std::free instead of new and delete so we can have access to std::realloc.
164  */
165 template<class CharT,
166  class T,
167  class KeyEqual,
168  class KeySizeT,
169  bool StoreNullTerminator>
170 class array_bucket {
171  template<typename U>
172  using has_mapped_type = typename std::integral_constant<bool, !std::is_same<U, void>::value>;
173 
174  static_assert(!has_mapped_type<T>::value || std::is_unsigned<T>::value,
175  "T should be either void or an unsigned type.");
176 
177  static_assert(std::is_unsigned<KeySizeT>::value, "KeySizeT should be an unsigned type.");
178 
179 public:
180  template<bool IsConst>
181  class array_bucket_iterator;
182 
183  using char_type = CharT;
184  using key_size_type = KeySizeT;
185  using mapped_type = T;
186  using size_type = std::size_t;
187  using key_equal = KeyEqual;
188  using iterator = array_bucket_iterator<false>;
189  using const_iterator = array_bucket_iterator<true>;
190 
191  static_assert(sizeof(KeySizeT) <= sizeof(size_type), "sizeof(KeySizeT) should be <= sizeof(std::size_t;)");
192  static_assert(std::is_unsigned<size_type>::value, "");
193 
194 private:
195  /**
196  * Return how much space in bytes the type U will take when stored in the buffer.
197  * As the buffer is of type CharT, U may take more space than sizeof(U).
198  *
199  * Example: sizeof(CharT) = 4, sizeof(U) = 2 => U will take 4 bytes in the buffer instead of 2.
200  */
201  template<typename U>
202  static constexpr size_type sizeof_in_buff() noexcept {
203  static_assert(is_power_of_two(sizeof(U)), "sizeof(U) should be a power of two.");
204  static_assert(is_power_of_two(sizeof(CharT)), "sizeof(CharT) should be a power of two.");
205 
206  return std::max(sizeof(U), sizeof(CharT));
207  }
208 
209  /**
210  * Same as sizeof_in_buff<U>, but instead of returning the size in bytes return it in term of sizeof(CharT).
211  */
212  template<typename U>
213  static constexpr size_type size_as_char_t() noexcept {
214  return sizeof_in_buff<U>() / sizeof(CharT);
215  }
216 
217  static key_size_type read_key_size(const CharT* buffer) noexcept {
218  key_size_type key_size;
219  std::memcpy(&key_size, buffer, sizeof(key_size));
220 
221  return key_size;
222  }
223 
224  static mapped_type read_value(const CharT* buffer) noexcept {
225  mapped_type value;
226  std::memcpy(&value, buffer, sizeof(value));
227 
228  return value;
229  }
230 
231  static bool is_end_of_bucket(const CharT* buffer) noexcept {
232  return read_key_size(buffer) == END_OF_BUCKET;
233  }
234 
235 public:
236  /**
237  * Return the size required for an entry with a key of size 'key_size'.
238  */
239  template<class U = T, typename std::enable_if<!has_mapped_type<U>::value>::type* = nullptr>
240  static size_type entry_required_bytes(size_type key_size) noexcept {
241  return sizeof_in_buff<key_size_type>() + (key_size + KEY_EXTRA_SIZE) * sizeof(CharT);
242  }
243 
244  template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
245  static size_type entry_required_bytes(size_type key_size) noexcept {
246  return sizeof_in_buff<key_size_type>() + (key_size + KEY_EXTRA_SIZE) * sizeof(CharT) +
247  sizeof_in_buff<mapped_type>();
248  }
249 
250 private:
251  /**
252  * Return the size of the current entry in buffer.
253  */
254  static size_type entry_size_bytes(const CharT* buffer) noexcept {
255  return entry_required_bytes(read_key_size(buffer));
256  }
257 
258 public:
259  template<bool IsConst>
260  class array_bucket_iterator {
261  friend class array_bucket;
262 
263  using buffer_type = typename std::conditional<IsConst, const CharT, CharT>::type;
264 
265  explicit array_bucket_iterator(buffer_type* position) noexcept: m_position(position) {
266  }
267 
268  public:
269  using iterator_category = std::forward_iterator_tag;
270  using value_type = void;
271  using difference_type = std::ptrdiff_t;
272  using reference = void;
273  using pointer = void;
274 
275  public:
276  array_bucket_iterator() noexcept: m_position(nullptr) {
277  }
278 
279  const CharT* key() const {
280  return m_position + size_as_char_t<key_size_type>();
281  }
282 
283  size_type key_size() const {
284  return read_key_size(m_position);
285  }
286 
287  template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
288  U value() const {
289  return read_value(m_position + size_as_char_t<key_size_type>() + key_size() + KEY_EXTRA_SIZE);
290  }
291 
292 
293  template<class U = T, typename std::enable_if<has_mapped_type<U>::value && !IsConst && std::is_same<U, T>::value>::type* = nullptr>
294  void set_value(U value) noexcept {
295  std::memcpy(m_position + size_as_char_t<key_size_type>() + key_size() + KEY_EXTRA_SIZE,
296  &value, sizeof(value));
297  }
298 
299  array_bucket_iterator& operator++() {
300  m_position += entry_size_bytes(m_position)/sizeof(CharT);
301  if(is_end_of_bucket(m_position)) {
302  m_position = nullptr;
303  }
304 
305  return *this;
306  }
307 
308  array_bucket_iterator operator++(int) {
309  array_bucket_iterator tmp(*this);
310  ++*this;
311 
312  return tmp;
313  }
314 
315  friend bool operator==(const array_bucket_iterator& lhs, const array_bucket_iterator& rhs) {
316  return lhs.m_position == rhs.m_position;
317  }
318 
319  friend bool operator!=(const array_bucket_iterator& lhs, const array_bucket_iterator& rhs) {
320  return !(lhs == rhs);
321  }
322 
323  private:
324  buffer_type* m_position;
325  };
326 
327 
328 
329  static iterator end_it() noexcept {
330  return iterator(nullptr);
331  }
332 
333  static const_iterator cend_it() noexcept {
334  return const_iterator(nullptr);
335  }
336 
337 public:
338  array_bucket(): m_buffer(nullptr) {
339  }
340 
341  /**
342  * Reserve 'size' in the buffer of the bucket. The created bucket is empty.
343  */
344  array_bucket(std::size_t size): m_buffer(nullptr) {
345  if(size == 0) {
346  return;
347  }
348 
349  m_buffer = static_cast<CharT*>(std::malloc(size*sizeof(CharT) + sizeof_in_buff<decltype(END_OF_BUCKET)>()));
350  if(m_buffer == nullptr) {
351  throw std::bad_alloc();
352  }
353 
354  const auto end_of_bucket = END_OF_BUCKET;
355  std::memcpy(m_buffer, &end_of_bucket, sizeof(end_of_bucket));
356  }
357 
358  ~array_bucket() {
359  clear();
360  }
361 
362  array_bucket(const array_bucket& other) {
363  if(other.m_buffer == nullptr) {
364  m_buffer = nullptr;
365  return;
366  }
367 
368  const size_type other_buffer_size = other.size();
369  m_buffer = static_cast<CharT*>(std::malloc(other_buffer_size*sizeof(CharT) + sizeof_in_buff<decltype(END_OF_BUCKET)>()));
370  if(m_buffer == nullptr) {
371  throw std::bad_alloc();
372  }
373 
374  std::memcpy(m_buffer, other.m_buffer, other_buffer_size*sizeof(CharT));
375 
376  const auto end_of_bucket = END_OF_BUCKET;
377  std::memcpy(m_buffer + other_buffer_size, &end_of_bucket, sizeof(end_of_bucket));
378  }
379 
380  array_bucket(array_bucket&& other) noexcept: m_buffer(other.m_buffer) {
381  other.m_buffer = nullptr;
382  }
383 
384  array_bucket& operator=(array_bucket other) noexcept {
385  other.swap(*this);
386 
387  return *this;
388  }
389 
390  void swap(array_bucket& other) noexcept {
391  std::swap(m_buffer, other.m_buffer);
392  }
393 
394  iterator begin() noexcept { return iterator(m_buffer); }
395  iterator end() noexcept { return iterator(nullptr); }
396  const_iterator begin() const noexcept { return cbegin(); }
397  const_iterator end() const noexcept { return cend(); }
398  const_iterator cbegin() const noexcept { return const_iterator(m_buffer); }
399  const_iterator cend() const noexcept { return const_iterator(nullptr); }
400 
401  /**
402  * Return an iterator pointing to the key entry if presents or, if not there, to the position
403  * past the last element of the bucket. Return end() if the bucket has not be initialized yet.
404  *
405  * The boolean of the pair is set to true if the key is there, false otherwise.
406  */
407  std::pair<const_iterator, bool> find_or_end_of_bucket(const CharT* key, size_type key_size) const noexcept {
408  if(m_buffer == nullptr) {
409  return std::make_pair(cend(), false);
410  }
411 
412  const CharT* buffer_ptr_in_out = m_buffer;
413  const bool found = find_or_end_of_bucket_impl(key, key_size, buffer_ptr_in_out);
414 
415  return std::make_pair(const_iterator(buffer_ptr_in_out), found);
416  }
417 
418  /**
419  * Append the element 'key' with its potential value at the end of the bucket.
420  * 'end_of_bucket' should point past the end of the last element in the bucket, end() if the bucket
421  * was not intiailized yet. You usually get this value from find_or_end_of_bucket.
422  *
423  * Return the position where the element was actually inserted.
424  */
425  template<class... ValueArgs>
426  const_iterator append(const_iterator end_of_bucket, const CharT* key, size_type key_size,
427  ValueArgs&&... value)
428  {
429  const key_size_type key_sz = as_key_size_type(key_size);
430 
431  if(end_of_bucket == cend()) {
432  tsl_ah_assert(m_buffer == nullptr);
433 
434  const size_type buffer_size = entry_required_bytes(key_sz) + sizeof_in_buff<decltype(END_OF_BUCKET)>();
435 
436  m_buffer = static_cast<CharT*>(std::malloc(buffer_size));
437  if(m_buffer == nullptr) {
438  throw std::bad_alloc();
439  }
440 
441  append_impl(key, key_sz, m_buffer, std::forward<ValueArgs>(value)...);
442 
443  return const_iterator(m_buffer);
444  }
445  else {
446  tsl_ah_assert(is_end_of_bucket(end_of_bucket.m_position));
447 
448  const size_type current_size = ((end_of_bucket.m_position + size_as_char_t<decltype(END_OF_BUCKET)>()) -
449  m_buffer) * sizeof(CharT);
450  const size_type new_size = current_size + entry_required_bytes(key_sz);
451 
452 
453  CharT* new_buffer = static_cast<CharT*>(std::realloc(m_buffer, new_size));
454  if(new_buffer == nullptr) {
455  throw std::bad_alloc();
456  }
457  m_buffer = new_buffer;
458 
459 
460  CharT* buffer_append_pos = m_buffer + current_size / sizeof(CharT) -
461  size_as_char_t<decltype(END_OF_BUCKET)>();
462  append_impl(key, key_sz, buffer_append_pos, std::forward<ValueArgs>(value)...);
463 
464  return const_iterator(buffer_append_pos);
465  }
466 
467  }
468 
469  const_iterator erase(const_iterator position) noexcept {
470  tsl_ah_assert(position.m_position != nullptr && !is_end_of_bucket(position.m_position));
471 
472  // get mutable pointers
473  CharT* start_entry = m_buffer + (position.m_position - m_buffer);
474  CharT* start_next_entry = start_entry + entry_size_bytes(start_entry) / sizeof(CharT);
475 
476 
477  CharT* end_buffer_ptr = start_next_entry;
478  while(!is_end_of_bucket(end_buffer_ptr)) {
479  end_buffer_ptr += entry_size_bytes(end_buffer_ptr) / sizeof(CharT);
480  }
481  end_buffer_ptr += size_as_char_t<decltype(END_OF_BUCKET)>();
482 
483 
484  const size_type size_to_move = (end_buffer_ptr - start_next_entry) * sizeof(CharT);
485  std::memmove(start_entry, start_next_entry, size_to_move);
486 
487 
488  if(is_end_of_bucket(m_buffer)) {
489  clear();
490  return cend();
491  }
492  else if(is_end_of_bucket(start_entry)) {
493  return cend();
494  }
495  else {
496  return const_iterator(start_entry);
497  }
498  }
499 
500  /**
501  * Return true if the element has been erased
502  */
503  bool erase(const CharT* key, size_type key_size) noexcept {
504  if(m_buffer == nullptr) {
505  return false;
506  }
507 
508  const CharT* entry_buffer_ptr_in_out = m_buffer;
509  bool found = find_or_end_of_bucket_impl(key, key_size, entry_buffer_ptr_in_out);
510  if(found) {
511  erase(const_iterator(entry_buffer_ptr_in_out));
512 
513  return true;
514  }
515  else {
516  return false;
517  }
518  }
519 
520  /**
521  * Bucket should be big enough and there is no check to see if the key already exists.
522  * No check on key_size.
523  */
524  template<class... ValueArgs>
525  void append_in_reserved_bucket_no_check(const CharT* key, size_type key_size, ValueArgs&&... value) noexcept {
526  CharT* buffer_ptr = m_buffer;
527  while(!is_end_of_bucket(buffer_ptr)) {
528  buffer_ptr += entry_size_bytes(buffer_ptr)/sizeof(CharT);
529  }
530 
531  append_impl(key, key_size_type(key_size), buffer_ptr, std::forward<ValueArgs>(value)...);
532  }
533 
534  bool empty() const noexcept {
535  return m_buffer == nullptr || is_end_of_bucket(m_buffer);
536  }
537 
538  void clear() noexcept {
539  std::free(m_buffer);
540  m_buffer = nullptr;
541  }
542 
543  iterator mutable_iterator(const_iterator pos) noexcept {
544  return iterator(m_buffer + (pos.m_position - m_buffer));
545  }
546 
547  template<class Serializer>
548  void serialize(Serializer& serializer) const {
549  const slz_size_type bucket_size = size();
550  tsl_ah_assert(m_buffer != nullptr || bucket_size == 0);
551 
552  serializer(bucket_size);
553  serializer(m_buffer, bucket_size);
554  }
555 
556  template<class Deserializer>
557  static array_bucket deserialize(Deserializer& deserializer) {
558  array_bucket bucket;
559  const slz_size_type bucket_size_ds = deserialize_value<slz_size_type>(deserializer);
560 
561  if(bucket_size_ds == 0) {
562  return bucket;
563  }
564 
565 
566  if(bucket_size_ds > std::numeric_limits<std::size_t>::max()) {
567  throw std::runtime_error("Deserialized bucket_size is bigger than the max value of std::size_t on the current platform.");
568  }
569 
570  const std::size_t bucket_size = static_cast<std::size_t>(bucket_size_ds);
571  bucket.m_buffer = static_cast<CharT*>(std::malloc(bucket_size*sizeof(CharT) + sizeof_in_buff<decltype(END_OF_BUCKET)>()));
572  if(bucket.m_buffer == nullptr) {
573  throw std::bad_alloc();
574  }
575 
576 
577  deserializer(bucket.m_buffer, bucket_size);
578 
579  const auto end_of_bucket = END_OF_BUCKET;
580  std::memcpy(bucket.m_buffer + bucket_size, &end_of_bucket, sizeof(end_of_bucket));
581 
582 
583  tsl_ah_assert(bucket.size() == bucket_size);
584  return bucket;
585  }
586 
587 private:
588  key_size_type as_key_size_type(size_type key_size) const {
589  if(key_size > MAX_KEY_SIZE) {
590  throw std::length_error("Key is too long.");
591  }
592 
593  return key_size_type(key_size);
594  }
595 
596  /*
597  * Return true if found, false otherwise.
598  * If true, buffer_ptr_in_out points to the start of the entry matching 'key'.
599  * If false, buffer_ptr_in_out points to where the 'key' should be inserted.
600  *
601  * Start search from buffer_ptr_in_out.
602  */
603  bool find_or_end_of_bucket_impl(const CharT* key, size_type key_size,
604  const CharT* & buffer_ptr_in_out) const noexcept
605  {
606  while(!is_end_of_bucket(buffer_ptr_in_out)) {
607  const key_size_type buffer_key_size = read_key_size(buffer_ptr_in_out);
608  const CharT* buffer_str = buffer_ptr_in_out + size_as_char_t<key_size_type>();
609  if(KeyEqual()(buffer_str, buffer_key_size, key, key_size)) {
610  return true;
611  }
612 
613  buffer_ptr_in_out += entry_size_bytes(buffer_ptr_in_out)/sizeof(CharT);
614  }
615 
616  return false;
617  }
618 
619  template<typename U = T, typename std::enable_if<!has_mapped_type<U>::value>::type* = nullptr>
620  void append_impl(const CharT* key, key_size_type key_size, CharT* buffer_append_pos) noexcept {
621  std::memcpy(buffer_append_pos, &key_size, sizeof(key_size));
622  buffer_append_pos += size_as_char_t<key_size_type>();
623 
624  std::memcpy(buffer_append_pos, key, key_size * sizeof(CharT));
625  buffer_append_pos += key_size;
626 
627  const CharT zero = 0;
628  std::memcpy(buffer_append_pos, &zero, KEY_EXTRA_SIZE * sizeof(CharT));
629  buffer_append_pos += KEY_EXTRA_SIZE;
630 
631  const auto end_of_bucket = END_OF_BUCKET;
632  std::memcpy(buffer_append_pos, &end_of_bucket, sizeof(end_of_bucket));
633  }
634 
635  template<typename U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
636  void append_impl(const CharT* key, key_size_type key_size, CharT* buffer_append_pos,
637  typename array_bucket<CharT, U, KeyEqual, KeySizeT, StoreNullTerminator>::mapped_type value) noexcept
638  {
639  std::memcpy(buffer_append_pos, &key_size, sizeof(key_size));
640  buffer_append_pos += size_as_char_t<key_size_type>();
641 
642  std::memcpy(buffer_append_pos, key, key_size * sizeof(CharT));
643  buffer_append_pos += key_size;
644 
645  const CharT zero = 0;
646  std::memcpy(buffer_append_pos, &zero, KEY_EXTRA_SIZE * sizeof(CharT));
647  buffer_append_pos += KEY_EXTRA_SIZE;
648 
649  std::memcpy(buffer_append_pos, &value, sizeof(value));
650  buffer_append_pos += size_as_char_t<mapped_type>();
651 
652  const auto end_of_bucket = END_OF_BUCKET;
653  std::memcpy(buffer_append_pos, &end_of_bucket, sizeof(end_of_bucket));
654  }
655 
656  /**
657  * Return the number of CharT in m_buffer. As the size of the buffer is not stored to gain some space,
658  * the method need to find the EOF marker and is thus in O(n).
659  */
660  size_type size() const noexcept {
661  if(m_buffer == nullptr) {
662  return 0;
663  }
664 
665  CharT* buffer_ptr = m_buffer;
666  while(!is_end_of_bucket(buffer_ptr)) {
667  buffer_ptr += entry_size_bytes(buffer_ptr)/sizeof(CharT);
668  }
669 
670  return buffer_ptr - m_buffer;
671  }
672 
673 private:
674  static const key_size_type END_OF_BUCKET = std::numeric_limits<key_size_type>::max();
675  static const key_size_type KEY_EXTRA_SIZE = StoreNullTerminator?1:0;
676 
677  CharT* m_buffer;
678 
679 public:
680  static const key_size_type MAX_KEY_SIZE =
681  // -1 for END_OF_BUCKET
682  key_size_type(std::numeric_limits<key_size_type>::max() - KEY_EXTRA_SIZE - 1);
683 };
684 
685 
686 template<class T>
687 class value_container {
688 public:
689  void clear() noexcept {
690  m_values.clear();
691  }
692 
693  void reserve(std::size_t new_cap) {
694  m_values.reserve(new_cap);
695  }
696 
697  void shrink_to_fit() {
698  m_values.shrink_to_fit();
699  }
700 
701  friend void swap(value_container& lhs, value_container& rhs) {
702  lhs.m_values.swap(rhs.m_values);
703  }
704 
705 protected:
706  static constexpr float VECTOR_GROWTH_RATE = 1.5f;
707 
708  // TODO use a sparse array? or a std::dequeu
709  std::vector<T> m_values;
710 };
711 
712 template<>
713 class value_container<void> {
714 public:
715  void clear() noexcept {
716  }
717 
718  void shrink_to_fit() {
719  }
720 
721  void reserve(std::size_t /*new_cap*/) {
722  }
723 };
724 
725 
726 
727 /**
728  * If there is no value in the array_hash (in the case of a set for example), T should be void.
729  *
730  * The size of a key string is limited to std::numeric_limits<KeySizeT>::max() - 1.
731  *
732  * The number of elements in the map is limited to std::numeric_limits<IndexSizeT>::max().
733  */
734 template<class CharT,
735  class T,
736  class Hash,
737  class KeyEqual,
738  bool StoreNullTerminator,
739  class KeySizeT,
740  class IndexSizeT,
741  class GrowthPolicy>
742 class array_hash: private value_container<T>, private Hash, private GrowthPolicy {
743 private:
744  template<typename U>
745  using has_mapped_type = typename std::integral_constant<bool, !std::is_same<U, void>::value>;
746 
747  /**
748  * If there is a mapped type in array_hash, we store the values in m_values of value_container class
749  * and we store an index to m_values in the bucket. The index is of type IndexSizeT.
750  */
751  using array_bucket = tsl::detail_array_hash::array_bucket<CharT,
752  typename std::conditional<has_mapped_type<T>::value,
753  IndexSizeT,
754  void>::type,
755  KeyEqual, KeySizeT, StoreNullTerminator>;
756 
757 public:
758  template<bool IsConst>
760 
761  using char_type = CharT;
762  using key_size_type = KeySizeT;
763  using index_size_type = IndexSizeT;
764  using size_type = std::size_t;
765  using hasher = Hash;
766  using key_equal = KeyEqual;
767  using iterator = array_hash_iterator<false>;
768  using const_iterator = array_hash_iterator<true>;
769 
770 
771 /*
772  * Iterator classes
773  */
774 public:
775  template<bool IsConst>
776  class array_hash_iterator {
777  friend class array_hash;
778 
779  private:
780  using iterator_array_bucket = typename array_bucket::const_iterator;
781 
782  using iterator_buckets = typename std::conditional<IsConst,
783  typename std::vector<array_bucket>::const_iterator,
784  typename std::vector<array_bucket>::iterator>::type;
785 
786  using array_hash_ptr = typename std::conditional<IsConst,
787  const array_hash*,
788  array_hash*>::type;
789 
790  public:
791  using iterator_category = std::forward_iterator_tag;
792  using value_type = typename std::conditional<has_mapped_type<T>::value, T, void>::type;
793  using difference_type = std::ptrdiff_t;
794  using reference = typename std::conditional<has_mapped_type<T>::value,
795  typename std::conditional<
796  IsConst,
797  typename std::add_lvalue_reference<const T>::type,
798  typename std::add_lvalue_reference<T>::type>::type,
799  void>::type;
800  using pointer = typename std::conditional<has_mapped_type<T>::value,
801  typename std::conditional<IsConst, const T*, T*>::type,
802  void>::type;
803 
804 
805  private:
806  array_hash_iterator(iterator_buckets buckets_iterator, iterator_array_bucket array_bucket_iterator,
807  array_hash_ptr array_hash_p) noexcept:
808  m_buckets_iterator(buckets_iterator),
809  m_array_bucket_iterator(array_bucket_iterator),
810  m_array_hash(array_hash_p)
811  {
812  tsl_ah_assert(m_array_hash != nullptr);
813  }
814 
815  public:
816  array_hash_iterator() noexcept: m_array_hash(nullptr) {
817  }
818 
819  array_hash_iterator(const array_hash_iterator<false>& other) noexcept :
820  m_buckets_iterator(other.m_buckets_iterator),
821  m_array_bucket_iterator(other.m_array_bucket_iterator),
822  m_array_hash(other.m_array_hash)
823  {
824  }
825 
826  const CharT* key() const {
827  return m_array_bucket_iterator.key();
828  }
829 
830  size_type key_size() const {
831  return m_array_bucket_iterator.key_size();
832  }
833 
834 #ifdef TSL_AH_HAS_STRING_VIEW
836  return std::basic_string_view<CharT>(key(), key_size());
837  }
838 #endif
839 
840  template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
841  reference value() const {
842  return this->m_array_hash->m_values[value_position()];
843  }
844 
845  template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
846  reference operator*() const {
847  return value();
848  }
849 
850  template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
851  pointer operator->() const {
852  return std::addressof(value());
853  }
854 
856  tsl_ah_assert(m_buckets_iterator != m_array_hash->m_buckets.end());
857  tsl_ah_assert(m_array_bucket_iterator != m_buckets_iterator->cend());
858 
859  ++m_array_bucket_iterator;
860  if(m_array_bucket_iterator == m_buckets_iterator->cend()) {
861  do {
862  ++m_buckets_iterator;
863  } while(m_buckets_iterator != m_array_hash->m_buckets.end() &&
864  m_buckets_iterator->empty());
865 
866  if(m_buckets_iterator != m_array_hash->m_buckets.end()) {
867  m_array_bucket_iterator = m_buckets_iterator->cbegin();
868  }
869  }
870 
871  return *this;
872  }
873 
875  array_hash_iterator tmp(*this);
876  ++*this;
877 
878  return tmp;
879  }
880 
881  friend bool operator==(const array_hash_iterator& lhs, const array_hash_iterator& rhs) {
882  return lhs.m_buckets_iterator == rhs.m_buckets_iterator &&
883  lhs.m_array_bucket_iterator == rhs.m_array_bucket_iterator &&
884  lhs.m_array_hash == rhs.m_array_hash;
885  }
886 
887  friend bool operator!=(const array_hash_iterator& lhs, const array_hash_iterator& rhs) {
888  return !(lhs == rhs);
889  }
890 
891  private:
892  template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
893  IndexSizeT value_position() const {
894  return this->m_array_bucket_iterator.value();
895  }
896 
897  private:
898  iterator_buckets m_buckets_iterator;
899  iterator_array_bucket m_array_bucket_iterator;
900 
901  array_hash_ptr m_array_hash;
902  };
903 
904 
905 
906 public:
907  array_hash(size_type bucket_count,
908  const Hash& hash,
909  float max_load_factor): value_container<T>(),
910  Hash(hash),
911  GrowthPolicy(bucket_count),
912  m_buckets(bucket_count > max_bucket_count()?
913  throw std::length_error("The map exceeds its maxmimum bucket count."):
914  bucket_count),
915  m_first_or_empty_bucket(m_buckets.empty()?static_empty_bucket_ptr():m_buckets.data()),
916  m_nb_elements(0)
917  {
918  this->max_load_factor(max_load_factor);
919  }
920 
921  array_hash(const array_hash& other): value_container<T>(other),
922  Hash(other),
923  GrowthPolicy(other),
924  m_buckets(other.m_buckets),
925  m_first_or_empty_bucket(m_buckets.empty()?static_empty_bucket_ptr():m_buckets.data()),
926  m_nb_elements(other.m_nb_elements),
927  m_max_load_factor(other.m_max_load_factor),
928  m_load_threshold(other.m_load_threshold)
929  {
930  }
931 
936  : value_container<T>(std::move(other)),
937  Hash(std::move(other)),
938  GrowthPolicy(std::move(other)),
939  m_buckets(std::move(other.m_buckets)),
940  m_first_or_empty_bucket(m_buckets.empty()?static_empty_bucket_ptr():m_buckets.data()),
941  m_nb_elements(other.m_nb_elements),
942  m_max_load_factor(other.m_max_load_factor),
943  m_load_threshold(other.m_load_threshold)
944  {
945  other.value_container<T>::clear();
946  other.GrowthPolicy::clear();
947  other.m_buckets.clear();
948  other.m_first_or_empty_bucket = static_empty_bucket_ptr();
949  other.m_nb_elements = 0;
950  other.m_load_threshold = 0;
951  }
952 
953  array_hash& operator=(const array_hash& other) {
954  if(&other != this) {
955  value_container<T>::operator=(other);
956  Hash::operator=(other);
957  GrowthPolicy::operator=(other);
958 
959  m_buckets = other.m_buckets;
960  m_first_or_empty_bucket = m_buckets.empty()?static_empty_bucket_ptr():
961  m_buckets.data();
962  m_nb_elements = other.m_nb_elements;
963  m_max_load_factor = other.m_max_load_factor;
964  m_load_threshold = other.m_load_threshold;
965  }
966 
967  return *this;
968  }
969 
971  other.swap(*this);
972  other.clear();
973 
974  return *this;
975  }
976 
977 
978  /*
979  * Iterators
980  */
981  iterator begin() noexcept {
982  auto begin = m_buckets.begin();
983  while(begin != m_buckets.end() && begin->empty()) {
984  ++begin;
985  }
986 
987  return (begin != m_buckets.end())?iterator(begin, begin->cbegin(), this):end();
988  }
989 
990  const_iterator begin() const noexcept {
991  return cbegin();
992  }
993 
994  const_iterator cbegin() const noexcept {
995  auto begin = m_buckets.cbegin();
996  while(begin != m_buckets.cend() && begin->empty()) {
997  ++begin;
998  }
999 
1000  return (begin != m_buckets.cend())?const_iterator(begin, begin->cbegin(), this):cend();
1001  }
1002 
1003  iterator end() noexcept {
1004  return iterator(m_buckets.end(), array_bucket::cend_it(), this);
1005  }
1006 
1007  const_iterator end() const noexcept {
1008  return cend();
1009  }
1010 
1011  const_iterator cend() const noexcept {
1012  return const_iterator(m_buckets.end(), array_bucket::cend_it(), this);
1013  }
1014 
1015 
1016  /*
1017  * Capacity
1018  */
1019  bool empty() const noexcept {
1020  return m_nb_elements == 0;
1021  }
1022 
1023  size_type size() const noexcept {
1024  return m_nb_elements;
1025  }
1026 
1027  size_type max_size() const noexcept {
1028  return std::numeric_limits<IndexSizeT>::max();
1029  }
1030 
1031  size_type max_key_size() const noexcept {
1032  return MAX_KEY_SIZE;
1033  }
1034 
1035  void shrink_to_fit() {
1036  clear_old_erased_values();
1037  value_container<T>::shrink_to_fit();
1038 
1039  rehash_impl(size_type(std::ceil(float(size())/max_load_factor())));
1040  }
1041 
1042  /*
1043  * Modifiers
1044  */
1045  void clear() noexcept {
1046  value_container<T>::clear();
1047 
1048  for(auto& bucket : m_buckets) {
1049  bucket.clear();
1050  }
1051 
1052  m_nb_elements = 0;
1053  }
1054 
1055 
1056 
1057  template<class... ValueArgs>
1058  std::pair<iterator, bool> emplace(const CharT* key, size_type key_size, ValueArgs&&... value_args) {
1059  const std::size_t hash = hash_key(key, key_size);
1060  std::size_t ibucket = bucket_for_hash(hash);
1061 
1062  auto it_find = m_first_or_empty_bucket[ibucket].find_or_end_of_bucket(key, key_size);
1063  if(it_find.second) {
1064  return std::make_pair(iterator(m_buckets.begin() + ibucket, it_find.first, this), false);
1065  }
1066 
1067  if(grow_on_high_load()) {
1068  ibucket = bucket_for_hash(hash);
1069  it_find = m_first_or_empty_bucket[ibucket].find_or_end_of_bucket(key, key_size);
1070  }
1071 
1072  return emplace_impl(ibucket, it_find.first, key, key_size, std::forward<ValueArgs>(value_args)...);
1073  }
1074 
1075  template<class M>
1076  std::pair<iterator, bool> insert_or_assign(const CharT* key, size_type key_size, M&& obj) {
1077  auto it = emplace(key, key_size, std::forward<M>(obj));
1078  if(!it.second) {
1079  it.first.value() = std::forward<M>(obj);
1080  }
1081 
1082  return it;
1083  }
1084 
1085 
1086 
1087  iterator erase(const_iterator pos) {
1088  if(shoud_clear_old_erased_values()) {
1089  clear_old_erased_values();
1090  }
1091 
1092  return erase_from_bucket(mutable_iterator(pos));
1093  }
1094 
1095  iterator erase(const_iterator first, const_iterator last) {
1096  if(first == last) {
1097  return mutable_iterator(first);
1098  }
1099 
1100  /**
1101  * When erasing an element from a bucket with erase_from_bucket, it invalidates all the iterators
1102  * in the array bucket of the element (m_array_bucket_iterator) but not the iterators of the buckets
1103  * itself (m_buckets_iterator).
1104  *
1105  * So first erase all the values between first and last which are not part of the bucket of last,
1106  * and then erase carefully the values in last's bucket.
1107  */
1108  auto to_delete = mutable_iterator(first);
1109  while(to_delete.m_buckets_iterator != last.m_buckets_iterator) {
1110  to_delete = erase_from_bucket(to_delete);
1111  }
1112 
1113  std::size_t nb_elements_until_last = std::distance(to_delete.m_array_bucket_iterator,
1114  last.m_array_bucket_iterator);
1115  while(nb_elements_until_last > 0) {
1116  to_delete = erase_from_bucket(to_delete);
1117  nb_elements_until_last--;
1118  }
1119 
1120  if(shoud_clear_old_erased_values()) {
1121  clear_old_erased_values();
1122  }
1123 
1124  return to_delete;
1125  }
1126 
1127 
1128 
1129  size_type erase(const CharT* key, size_type key_size) {
1130  return erase(key, key_size, hash_key(key, key_size));
1131  }
1132 
1133  size_type erase(const CharT* key, size_type key_size, std::size_t hash) {
1134  if(shoud_clear_old_erased_values()) {
1135  clear_old_erased_values();
1136  }
1137 
1138  const std::size_t ibucket = bucket_for_hash(hash);
1139  if(m_first_or_empty_bucket[ibucket].erase(key, key_size)) {
1140  m_nb_elements--;
1141  return 1;
1142  }
1143  else {
1144  return 0;
1145  }
1146  }
1147 
1148 
1149 
1150  void swap(array_hash& other) {
1151  using std::swap;
1152 
1153  swap(static_cast<value_container<T>&>(*this), static_cast<value_container<T>&>(other));
1154  swap(static_cast<Hash&>(*this), static_cast<Hash&>(other));
1155  swap(static_cast<GrowthPolicy&>(*this), static_cast<GrowthPolicy&>(other));
1156  swap(m_buckets, other.m_buckets);
1157  swap(m_first_or_empty_bucket, other.m_first_or_empty_bucket);
1158  swap(m_nb_elements, other.m_nb_elements);
1159  swap(m_max_load_factor, other.m_max_load_factor);
1160  swap(m_load_threshold, other.m_load_threshold);
1161  }
1162 
1163  /*
1164  * Lookup
1165  */
1166  template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
1167  U& at(const CharT* key, size_type key_size) {
1168  return at(key, key_size, hash_key(key, key_size));
1169  }
1170 
1171  template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
1172  const U& at(const CharT* key, size_type key_size) const {
1173  return at(key, key_size, hash_key(key, key_size));
1174  }
1175 
1176  template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
1177  U& at(const CharT* key, size_type key_size, std::size_t hash) {
1178  return const_cast<U&>(static_cast<const array_hash*>(this)->at(key, key_size, hash));
1179  }
1180 
1181  template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
1182  const U& at(const CharT* key, size_type key_size, std::size_t hash) const {
1183  const std::size_t ibucket = bucket_for_hash(hash);
1184 
1185  auto it_find = m_first_or_empty_bucket[ibucket].find_or_end_of_bucket(key, key_size);
1186  if(it_find.second) {
1187  return this->m_values[it_find.first.value()];
1188  }
1189  else {
1190  throw std::out_of_range("Couldn't find key.");
1191  }
1192  }
1193 
1194 
1195 
1196  template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
1197  U& access_operator(const CharT* key, size_type key_size) {
1198  const std::size_t hash = hash_key(key, key_size);
1199  std::size_t ibucket = bucket_for_hash(hash);
1200 
1201  auto it_find = m_first_or_empty_bucket[ibucket].find_or_end_of_bucket(key, key_size);
1202  if(it_find.second) {
1203  return this->m_values[it_find.first.value()];
1204  }
1205  else {
1206  if(grow_on_high_load()) {
1207  ibucket = bucket_for_hash(hash);
1208  it_find = m_buckets[ibucket].find_or_end_of_bucket(key, key_size);
1209  }
1210 
1211  return emplace_impl(ibucket, it_find.first, key, key_size, U{}).first.value();
1212  }
1213  }
1214 
1215 
1216 
1217  size_type count(const CharT* key, size_type key_size) const {
1218  return count(key, key_size, hash_key(key, key_size));
1219  }
1220 
1221  size_type count(const CharT* key, size_type key_size, std::size_t hash) const {
1222  const std::size_t ibucket = bucket_for_hash(hash);
1223 
1224  auto it_find = m_first_or_empty_bucket[ibucket].find_or_end_of_bucket(key, key_size);
1225  if(it_find.second) {
1226  return 1;
1227  }
1228  else {
1229  return 0;
1230  }
1231  }
1232 
1233 
1234 
1235  iterator find(const CharT* key, size_type key_size) {
1236  return find(key, key_size, hash_key(key, key_size));
1237  }
1238 
1239  const_iterator find(const CharT* key, size_type key_size) const {
1240  return find(key, key_size, hash_key(key, key_size));
1241  }
1242 
1243  iterator find(const CharT* key, size_type key_size, std::size_t hash) {
1244  const std::size_t ibucket = bucket_for_hash(hash);
1245 
1246  auto it_find = m_first_or_empty_bucket[ibucket].find_or_end_of_bucket(key, key_size);
1247  if(it_find.second) {
1248  return iterator(m_buckets.begin() + ibucket, it_find.first, this);
1249  }
1250  else {
1251  return end();
1252  }
1253  }
1254 
1255  const_iterator find(const CharT* key, size_type key_size, std::size_t hash) const {
1256  const std::size_t ibucket = bucket_for_hash(hash);
1257 
1258  auto it_find = m_first_or_empty_bucket[ibucket].find_or_end_of_bucket(key, key_size);
1259  if(it_find.second) {
1260  return const_iterator(m_buckets.cbegin() + ibucket, it_find.first, this);
1261  }
1262  else {
1263  return cend();
1264  }
1265  }
1266 
1267 
1268 
1269  std::pair<iterator, iterator> equal_range(const CharT* key, size_type key_size) {
1270  return equal_range(key, key_size, hash_key(key, key_size));
1271  }
1272 
1273  std::pair<const_iterator, const_iterator> equal_range(const CharT* key, size_type key_size) const {
1274  return equal_range(key, key_size, hash_key(key, key_size));
1275  }
1276 
1277  std::pair<iterator, iterator> equal_range(const CharT* key, size_type key_size, std::size_t hash) {
1278  iterator it = find(key, key_size, hash);
1279  return std::make_pair(it, (it == end())?it:std::next(it));
1280  }
1281 
1282  std::pair<const_iterator, const_iterator> equal_range(const CharT* key, size_type key_size,
1283  std::size_t hash) const
1284  {
1285  const_iterator it = find(key, key_size, hash);
1286  return std::make_pair(it, (it == cend())?it:std::next(it));
1287  }
1288 
1289  /*
1290  * Bucket interface
1291  */
1292  size_type bucket_count() const {
1293  return m_buckets.size();
1294  }
1295 
1296  size_type max_bucket_count() const {
1297  return std::min(GrowthPolicy::max_bucket_count(), m_buckets.max_size());
1298  }
1299 
1300 
1301  /*
1302  * Hash policy
1303  */
1304  float load_factor() const {
1305  return float(m_nb_elements) / float(bucket_count());
1306  }
1307 
1308  float max_load_factor() const {
1309  return m_max_load_factor;
1310  }
1311 
1312  void max_load_factor(float ml) {
1313  m_max_load_factor = std::max(0.1f, ml);
1314  m_load_threshold = size_type(float(bucket_count())*m_max_load_factor);
1315  }
1316 
1317  void rehash(size_type count) {
1318  count = std::max(count, size_type(std::ceil(float(size())/max_load_factor())));
1319  rehash_impl(count);
1320  }
1321 
1322  void reserve(size_type count) {
1323  rehash(size_type(std::ceil(float(count)/max_load_factor())));
1324  }
1325 
1326  /*
1327  * Observers
1328  */
1329  hasher hash_function() const {
1330  return static_cast<const hasher&>(*this);
1331  }
1332 
1333  // TODO add support for statefull KeyEqual
1334  key_equal key_eq() const {
1335  return KeyEqual();
1336  }
1337 
1338  /*
1339  * Other
1340  */
1341  iterator mutable_iterator(const_iterator it) noexcept {
1342  auto it_bucket = m_buckets.begin() + std::distance(m_buckets.cbegin(), it.m_buckets_iterator);
1343  return iterator(it_bucket, it.m_array_bucket_iterator, this);
1344  }
1345 
1346  template<class Serializer>
1347  void serialize(Serializer& serializer) const {
1348  serialize_impl(serializer);
1349  }
1350 
1351  template<class Deserializer>
1352  void deserialize(Deserializer& deserializer, bool hash_compatible) {
1353  deserialize_impl(deserializer, hash_compatible);
1354  }
1355 
1356 private:
1357  std::size_t hash_key(const CharT* key, size_type key_size) const {
1358  return Hash::operator()(key, key_size);
1359  }
1360 
1361  std::size_t bucket_for_hash(std::size_t hash) const {
1362  return GrowthPolicy::bucket_for_hash(hash);
1363  }
1364 
1365  /**
1366  * If there is a mapped_type, the mapped value in m_values is not erased now.
1367  * It will be erased when the ratio between the size of the map and
1368  * the size of the map + the number of deleted values still stored is low enough (see clear_old_erased_values).
1369  */
1370  iterator erase_from_bucket(iterator pos) noexcept {
1371  auto array_bucket_next_it = pos.m_buckets_iterator->erase(pos.m_array_bucket_iterator);
1372  m_nb_elements--;
1373 
1374  if(array_bucket_next_it != pos.m_buckets_iterator->cend()) {
1375  return iterator(pos.m_buckets_iterator, array_bucket_next_it, this);
1376  }
1377  else {
1378  do {
1379  ++pos.m_buckets_iterator;
1380  } while(pos.m_buckets_iterator != m_buckets.end() && pos.m_buckets_iterator->empty());
1381 
1382  if(pos.m_buckets_iterator != m_buckets.end()) {
1383  return iterator(pos.m_buckets_iterator, pos.m_buckets_iterator->cbegin(), this);
1384  }
1385  else {
1386  return end();
1387  }
1388  }
1389  }
1390 
1391 
1392  template<class U = T, typename std::enable_if<!has_mapped_type<U>::value>::type* = nullptr>
1393  bool shoud_clear_old_erased_values(float /*threshold*/ = DEFAULT_CLEAR_OLD_ERASED_VALUE_THRESHOLD) const {
1394  return false;
1395  }
1396 
1397  template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
1398  bool shoud_clear_old_erased_values(float threshold = DEFAULT_CLEAR_OLD_ERASED_VALUE_THRESHOLD) const {
1399  if(this->m_values.size() == 0) {
1400  return false;
1401  }
1402 
1403  return float(m_nb_elements)/float(this->m_values.size()) < threshold;
1404  }
1405 
1406  template<class U = T, typename std::enable_if<!has_mapped_type<U>::value>::type* = nullptr>
1407  void clear_old_erased_values() {
1408  }
1409 
1410  template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
1411  void clear_old_erased_values() {
1412  static_assert(std::is_nothrow_move_constructible<U>::value ||
1413  std::is_copy_constructible<U>::value,
1414  "mapped_value must be either copy constructible or nothrow move constructible.");
1415 
1416  if(m_nb_elements == this->m_values.size()) {
1417  return;
1418  }
1419 
1420  std::vector<T> new_values;
1421  new_values.reserve(size());
1422 
1423  for(auto it = begin(); it != end(); ++it) {
1424  new_values.push_back(std::move_if_noexcept(it.value()));
1425  }
1426 
1427 
1428  IndexSizeT ivalue = 0;
1429  for(auto it = begin(); it != end(); ++it) {
1430  auto it_array_bucket = it.m_buckets_iterator->mutable_iterator(it.m_array_bucket_iterator);
1431  it_array_bucket.set_value(ivalue);
1432  ivalue++;
1433  }
1434 
1435  new_values.swap(this->m_values);
1436  tsl_ah_assert(m_nb_elements == this->m_values.size());
1437  }
1438 
1439  /**
1440  * Return true if a rehash occured.
1441  */
1442  bool grow_on_high_load() {
1443  if(size() >= m_load_threshold) {
1444  rehash_impl(GrowthPolicy::next_bucket_count());
1445  return true;
1446  }
1447 
1448  return false;
1449  }
1450 
1451  template<class... ValueArgs, class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
1452  std::pair<iterator, bool> emplace_impl(std::size_t ibucket, typename array_bucket::const_iterator end_of_bucket,
1453  const CharT* key, size_type key_size, ValueArgs&&... value_args)
1454  {
1455  if(this->m_values.size() >= max_size()) {
1456  // Try to clear old erased values lingering in m_values. Throw if it doesn't change anything.
1457  clear_old_erased_values();
1458  if(this->m_values.size() >= max_size()) {
1459  throw std::length_error("Can't insert value, too much values in the map.");
1460  }
1461  }
1462 
1463  if(this->m_values.size() == this->m_values.capacity()) {
1464  this->m_values.reserve(std::size_t(float(this->m_values.size()) * value_container<T>::VECTOR_GROWTH_RATE));
1465  }
1466 
1467 
1468  this->m_values.emplace_back(std::forward<ValueArgs>(value_args)...);
1469 
1470  try {
1471  auto it = m_first_or_empty_bucket[ibucket].append(end_of_bucket, key, key_size, IndexSizeT(this->m_values.size() - 1));
1472  m_nb_elements++;
1473 
1474  return std::make_pair(iterator(m_buckets.begin() + ibucket, it, this), true);
1475  } catch(...) {
1476  // Rollback
1477  this->m_values.pop_back();
1478  throw;
1479  }
1480  }
1481 
1482  template<class U = T, typename std::enable_if<!has_mapped_type<U>::value>::type* = nullptr>
1483  std::pair<iterator, bool> emplace_impl(std::size_t ibucket, typename array_bucket::const_iterator end_of_bucket,
1484  const CharT* key, size_type key_size)
1485  {
1486  if(m_nb_elements >= max_size()) {
1487  throw std::length_error("Can't insert value, too much values in the map.");
1488  }
1489 
1490  auto it = m_first_or_empty_bucket[ibucket].append(end_of_bucket, key, key_size);
1491  m_nb_elements++;
1492 
1493  return std::make_pair(iterator(m_buckets.begin() + ibucket, it, this), true);
1494  }
1495 
1496  void rehash_impl(size_type bucket_count) {
1497  GrowthPolicy new_growth_policy(bucket_count);
1498  if(bucket_count == this->bucket_count()) {
1499  return;
1500  }
1501 
1502 
1503  if(shoud_clear_old_erased_values(0.9f)) {
1504  clear_old_erased_values();
1505  }
1506 
1507 
1508  std::vector<std::size_t> required_size_for_bucket(bucket_count, 0);
1509  std::vector<std::size_t> bucket_for_ivalue(size(), 0);
1510 
1511  std::size_t ivalue = 0;
1512  for(auto it = begin(); it != end(); ++it) {
1513  const std::size_t hash = hash_key(it.key(), it.key_size());
1514  const std::size_t ibucket = new_growth_policy.bucket_for_hash(hash);
1515 
1516  bucket_for_ivalue[ivalue] = ibucket;
1517  required_size_for_bucket[ibucket] += array_bucket::entry_required_bytes(it.key_size());
1518  ivalue++;
1519  }
1520 
1521 
1522 
1523 
1524  std::vector<array_bucket> new_buckets;
1525  new_buckets.reserve(bucket_count);
1526  for(std::size_t ibucket = 0; ibucket < bucket_count; ibucket++) {
1527  new_buckets.emplace_back(required_size_for_bucket[ibucket]);
1528  }
1529 
1530 
1531  ivalue = 0;
1532  for(auto it = begin(); it != end(); ++it) {
1533  const std::size_t ibucket = bucket_for_ivalue[ivalue];
1534  append_iterator_in_reserved_bucket_no_check(new_buckets[ibucket], it);
1535 
1536  ivalue++;
1537  }
1538 
1539 
1540  using std::swap;
1541  swap(static_cast<GrowthPolicy&>(*this), new_growth_policy);
1542 
1543  m_buckets.swap(new_buckets);
1544  m_first_or_empty_bucket = m_buckets.data();
1545 
1546  // Call max_load_factor to change m_load_threshold
1547  max_load_factor(m_max_load_factor);
1548  }
1549 
1550  template<class U = T, typename std::enable_if<!has_mapped_type<U>::value>::type* = nullptr>
1551  void append_iterator_in_reserved_bucket_no_check(array_bucket& bucket, iterator it) {
1552  bucket.append_in_reserved_bucket_no_check(it.key(), it.key_size());
1553  }
1554 
1555  template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
1556  void append_iterator_in_reserved_bucket_no_check(array_bucket& bucket, iterator it) {
1557  bucket.append_in_reserved_bucket_no_check(it.key(), it.key_size(), it.value_position());
1558  }
1559 
1560 
1561 
1562  /**
1563  * On serialization the values of each bucket (if has_mapped_type is true) are serialized
1564  * next to the bucket. The potential old erased values in value_container are thus not serialized.
1565  *
1566  * On deserialization, when hash_compatible is true, we reaffect the value index (IndexSizeT) of each
1567  * bucket with set_value as the position of each value is no more the same in value_container compared
1568  * to when they were serialized.
1569  *
1570  * It's done this way as we can't call clear_old_erased_values() because we want the serialize
1571  * method to remain const and we don't want to serialize/deserialize old erased values. As we may
1572  * not serialize all the values in value_container, the values we keep can change of index.
1573  * We thus have to modify the value indexes in the buckets.
1574  */
1575  template<class Serializer>
1576  void serialize_impl(Serializer& serializer) const {
1577  const slz_size_type version = SERIALIZATION_PROTOCOL_VERSION;
1578  serializer(version);
1579 
1580  const slz_size_type bucket_count = m_buckets.size();
1581  serializer(bucket_count);
1582 
1583  const slz_size_type nb_elements = m_nb_elements;
1584  serializer(nb_elements);
1585 
1586  const float max_load_factor = m_max_load_factor;
1587  serializer(max_load_factor);
1588 
1589  for(const array_bucket& bucket: m_buckets) {
1590  bucket.serialize(serializer);
1591  serialize_bucket_values(serializer, bucket);
1592  }
1593  }
1594 
1595  template<class Serializer, class U = T,
1596  typename std::enable_if<!has_mapped_type<U>::value>::type* = nullptr>
1597  void serialize_bucket_values(Serializer& /*serializer*/, const array_bucket& /*bucket*/) const {
1598  }
1599 
1600  template<class Serializer, class U = T,
1601  typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
1602  void serialize_bucket_values(Serializer& serializer, const array_bucket& bucket) const {
1603  for(auto it = bucket.begin(); it != bucket.end(); ++it) {
1604  serializer(this->m_values[it.value()]);
1605  }
1606  }
1607 
1608  template<class Deserializer>
1609  void deserialize_impl(Deserializer& deserializer, bool hash_compatible) {
1610  tsl_ah_assert(m_buckets.empty()); // Current hash table must be empty
1611 
1612  const slz_size_type version = deserialize_value<slz_size_type>(deserializer);
1613  // For now we only have one version of the serialization protocol.
1614  // If it doesn't match there is a problem with the file.
1615  if(version != SERIALIZATION_PROTOCOL_VERSION) {
1616  throw std::runtime_error("Can't deserialize the array_map/set. The protocol version header is invalid.");
1617  }
1618 
1619  const slz_size_type bucket_count_ds = deserialize_value<slz_size_type>(deserializer);
1620  const slz_size_type nb_elements = deserialize_value<slz_size_type>(deserializer);
1621  const float max_load_factor = deserialize_value<float>(deserializer);
1622 
1623 
1624 
1625  if(nb_elements > std::numeric_limits<IndexSizeT>::max()) {
1626  throw std::runtime_error("Deserialized nb_elements is bigger than the max value of IndexSizeT.");
1627  }
1628  m_nb_elements = static_cast<IndexSizeT>(nb_elements);
1629 
1630 
1631  if(bucket_count_ds > std::numeric_limits<std::size_t>::max()) {
1632  throw std::runtime_error("Deserialized bucket_count is bigger than the max value of std::size_t on the current platform.");
1633  }
1634  std::size_t bucket_count = static_cast<std::size_t>(bucket_count_ds);
1635  GrowthPolicy::operator=(GrowthPolicy(bucket_count));
1636 
1637 
1638  this->max_load_factor(max_load_factor);
1639  value_container<T>::reserve(m_nb_elements);
1640 
1641 
1642  if(hash_compatible) {
1643  if(bucket_count != bucket_count_ds) {
1644  throw std::runtime_error("The GrowthPolicy is not the same even though hash_compatible is true.");
1645  }
1646 
1647  m_buckets.reserve(bucket_count);
1648  for(std::size_t i = 0; i < bucket_count; i++) {
1649  m_buckets.push_back(array_bucket::deserialize(deserializer));
1650  deserialize_bucket_values(deserializer, m_buckets.back());
1651  }
1652  }
1653  else {
1654  m_buckets.resize(bucket_count);
1655  for(std::size_t i = 0; i < bucket_count; i++) {
1656  // TODO use buffer to avoid reallocation on each deserialization.
1657  array_bucket bucket = array_bucket::deserialize(deserializer);
1658  deserialize_bucket_values(deserializer, bucket);
1659 
1660  for(auto it_val = bucket.cbegin(); it_val != bucket.cend(); ++it_val) {
1661  const std::size_t ibucket = bucket_for_hash(hash_key(it_val.key(), it_val.key_size()));
1662 
1663  auto it_find = m_buckets[ibucket].find_or_end_of_bucket(it_val.key(), it_val.key_size());
1664  if(it_find.second) {
1665  throw std::runtime_error("Error on deserialization, the same key is presents multiple times.");
1666  }
1667 
1668  append_array_bucket_iterator_in_bucket(m_buckets[ibucket], it_find.first, it_val);
1669  }
1670  }
1671  }
1672 
1673  m_first_or_empty_bucket = m_buckets.data();
1674 
1675 
1676  if(load_factor() > this->max_load_factor()) {
1677  throw std::runtime_error("");
1678  }
1679  }
1680 
1681  template<class Deserializer, class U = T,
1682  typename std::enable_if<!has_mapped_type<U>::value>::type* = nullptr>
1683  void deserialize_bucket_values(Deserializer& /*deserializer*/, array_bucket& /*bucket*/) {
1684  }
1685 
1686  template<class Deserializer, class U = T,
1687  typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
1688  void deserialize_bucket_values(Deserializer& deserializer, array_bucket& bucket) {
1689  for(auto it = bucket.begin(); it != bucket.end(); ++it) {
1690  this->m_values.emplace_back(deserialize_value<U>(deserializer));
1691 
1692  tsl_ah_assert(this->m_values.size() - 1 <= std::numeric_limits<IndexSizeT>::max());
1693  it.set_value(static_cast<IndexSizeT>(this->m_values.size() - 1));
1694  }
1695  }
1696 
1697  template<class U = T, typename std::enable_if<!has_mapped_type<U>::value>::type* = nullptr>
1698  void append_array_bucket_iterator_in_bucket(array_bucket& bucket,
1699  typename array_bucket::const_iterator end_of_bucket,
1700  typename array_bucket::const_iterator it_val)
1701  {
1702  bucket.append(end_of_bucket, it_val.key(), it_val.key_size());
1703  }
1704 
1705  template<class U = T, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
1706  void append_array_bucket_iterator_in_bucket(array_bucket& bucket,
1707  typename array_bucket::const_iterator end_of_bucket,
1708  typename array_bucket::const_iterator it_val)
1709  {
1710  bucket.append(end_of_bucket, it_val.key(), it_val.key_size(), it_val.value());
1711  }
1712 
1713 public:
1714  static const size_type DEFAULT_INIT_BUCKET_COUNT = 16;
1715  static constexpr float DEFAULT_MAX_LOAD_FACTOR = 2.0f;
1716  static const size_type MAX_KEY_SIZE = array_bucket::MAX_KEY_SIZE;
1717 
1718 private:
1719  /**
1720  * Protocol version currenlty used for serialization.
1721  */
1722  static const slz_size_type SERIALIZATION_PROTOCOL_VERSION = 1;
1723 
1724 
1725  static constexpr float DEFAULT_CLEAR_OLD_ERASED_VALUE_THRESHOLD = 0.6f;
1726 
1727 
1728  /**
1729  * Return an always valid pointer to a static empty array_bucket.
1730  */
1731  array_bucket* static_empty_bucket_ptr() {
1732  static array_bucket empty_bucket;
1733  return &empty_bucket;
1734  }
1735 
1736 private:
1737  std::vector<array_bucket> m_buckets;
1738 
1739  /**
1740  * Points to m_buckets.data() if !m_buckets.empty() otherwise points to static_empty_bucket_ptr.
1741  * This variable is useful to avoid the cost of checking if m_buckets is empty when trying
1742  * to find an element.
1743  *
1744  * TODO Remove m_buckets and only use a pointer+size instead of a pointer+vector to save some space in the array_hash object.
1745  */
1746  array_bucket* m_first_or_empty_bucket;
1747 
1748  IndexSizeT m_nb_elements;
1749  float m_max_load_factor;
1750  size_type m_load_threshold;
1751 };
1752 
1753 } // end namespace detail_array_hash
1754 } //end namespace tsl
1755 
1756 #endif
iterator begin() noexcept
Definition: array_hash.h:981
array_hash_iterator(const array_hash_iterator< false > &other) noexcept
Definition: array_hash.h:819
std::pair< iterator, bool > insert_or_assign(const CharT *key, size_type key_size, M &&obj)
Definition: array_hash.h:1076
iterator end() noexcept
Definition: array_hash.h:1003
array_hash_iterator & operator++()
Definition: array_hash.h:855
const_iterator cend() const noexcept
Definition: array_hash.h:1011
void rehash(size_type count)
Definition: array_hash.h:1317
void shrink_to_fit()
Definition: array_hash.h:1035
Definition: array_growth_policy.h:40
size_type max_key_size() const noexcept
Definition: array_hash.h:1031
float max_load_factor() const
Definition: array_hash.h:1308
U & at(const CharT *key, size_type key_size, std::size_t hash)
Definition: array_hash.h:1177
Definition: array_growth_policy.h:39
size_type max_bucket_count() const
Definition: array_hash.h:1296
float load_factor() const
Definition: array_hash.h:1304
size_type count(const CharT *key, size_type key_size) const
Definition: array_hash.h:1217
const U & at(const CharT *key, size_type key_size) const
Definition: array_hash.h:1172
Definition: array_hash.h:117
std::pair< const_iterator, const_iterator > equal_range(const CharT *key, size_type key_size) const
Definition: array_hash.h:1273
bool empty() const noexcept
Definition: array_hash.h:1019
static constexpr float DEFAULT_MAX_LOAD_FACTOR
Definition: array_hash.h:1715
std::basic_string_view< CharT > key_sv() const
Definition: array_hash.h:835
iterator erase(const_iterator pos)
Definition: array_hash.h:1087
U & access_operator(const CharT *key, size_type key_size)
Definition: array_hash.h:1197
Definition: array_hash.h:102
hasher hash_function() const
Definition: array_hash.h:1329
void deserialize(Deserializer &deserializer, bool hash_compatible)
Definition: array_hash.h:1352
size_type erase(const CharT *key, size_type key_size, std::size_t hash)
Definition: array_hash.h:1133
const_iterator begin() const noexcept
Definition: array_hash.h:990
reference value() const
Definition: array_hash.h:841
iterator find(const CharT *key, size_type key_size)
Definition: array_hash.h:1235
void clear() noexcept
Definition: array_hash.h:1045
#define tsl_ah_assert(expr)
Definition: array_hash.h:63
bool operator()(const CharT *key_lhs, std::size_t key_size_lhs, const CharT *key_rhs, std::size_t key_size_rhs) const
Definition: array_hash.h:103
size_type size() const noexcept
Definition: array_hash.h:1023
array_hash(const array_hash &other)
Definition: array_hash.h:921
array_hash(array_hash &&other) noexcept(std::is_nothrow_move_constructible< value_container< T >>::value &&std::is_nothrow_move_constructible< Hash >::value &&std::is_nothrow_move_constructible< GrowthPolicy >::value &&std::is_nothrow_move_constructible< std::vector< array_bucket >>::value)
Definition: array_hash.h:932
std::pair< iterator, bool > emplace(const CharT *key, size_type key_size, ValueArgs &&... value_args)
Definition: array_hash.h:1058
size_type erase(const CharT *key, size_type key_size)
Definition: array_hash.h:1129
void max_load_factor(float ml)
Definition: array_hash.h:1312
size_type count(const CharT *key, size_type key_size, std::size_t hash) const
Definition: array_hash.h:1221
key_equal key_eq() const
Definition: array_hash.h:1334
void serialize(Serializer &serializer) const
Definition: array_hash.h:1347
array_hash & operator=(array_hash &&other)
Definition: array_hash.h:970
array_hash & operator=(const array_hash &other)
Definition: array_hash.h:953
const_iterator find(const CharT *key, size_type key_size) const
Definition: array_hash.h:1239
const CharT * key() const
Definition: array_hash.h:826
const U & at(const CharT *key, size_type key_size, std::size_t hash) const
Definition: array_hash.h:1182
friend bool operator!=(const array_hash_iterator &lhs, const array_hash_iterator &rhs)
Definition: array_hash.h:887
std::pair< iterator, iterator > equal_range(const CharT *key, size_type key_size)
Definition: array_hash.h:1269
std::size_t operator()(const CharT *key, std::size_t key_size) const
Definition: array_hash.h:79
pointer operator->() const
Definition: array_hash.h:851
U & at(const CharT *key, size_type key_size)
Definition: array_hash.h:1167
array_hash_iterator() noexcept
Definition: array_hash.h:816
size_type bucket_count() const
Definition: array_hash.h:1292
const_iterator find(const CharT *key, size_type key_size, std::size_t hash) const
Definition: array_hash.h:1255
void reserve(size_type count)
Definition: array_hash.h:1322
static const size_type DEFAULT_INIT_BUCKET_COUNT
Definition: array_hash.h:1714
static const size_type MAX_KEY_SIZE
Definition: array_hash.h:1716
iterator find(const CharT *key, size_type key_size, std::size_t hash)
Definition: array_hash.h:1243
void swap(array_hash &other)
Definition: array_hash.h:1150
const_iterator cbegin() const noexcept
Definition: array_hash.h:994
const_iterator end() const noexcept
Definition: array_hash.h:1007
size_type key_size() const
Definition: array_hash.h:830
reference operator*() const
Definition: array_hash.h:846
Definition: array_hash.h:742
array_hash_iterator operator++(int)
Definition: array_hash.h:874
array_hash(size_type bucket_count, const Hash &hash, float max_load_factor)
Definition: array_hash.h:907
friend bool operator==(const array_hash_iterator &lhs, const array_hash_iterator &rhs)
Definition: array_hash.h:881
std::pair< const_iterator, const_iterator > equal_range(const CharT *key, size_type key_size, std::size_t hash) const
Definition: array_hash.h:1282
iterator mutable_iterator(const_iterator it) noexcept
Definition: array_hash.h:1341
std::pair< iterator, iterator > equal_range(const CharT *key, size_type key_size, std::size_t hash)
Definition: array_hash.h:1277
size_type max_size() const noexcept
Definition: array_hash.h:1027
iterator erase(const_iterator first, const_iterator last)
Definition: array_hash.h:1095