hat-trie
htrie_set.h
Go to the documentation of this file.
1 /**
2  * MIT License
3  *
4  * Copyright (c) 2017 Tessil
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef TSL_HTRIE_SET_H
25 #define TSL_HTRIE_SET_H
26 
27 #include <cstddef>
28 #include <cstring>
29 #include <initializer_list>
30 #include <string>
31 #include <utility>
32 #include "htrie_hash.h"
33 
34 namespace tsl {
35 
36 /**
37  * Implementation of a hat-trie set.
38  *
39  * The size of a key string is limited to std::numeric_limits<KeySizeT>::max() - 1.
40  * That is 65 535 characters by default, but can be raised with the KeySizeT template parameter.
41  * See max_key_size() for an easy access to this limit.
42  *
43  * Iterators invalidation:
44  * - clear, operator=: always invalidate the iterators.
45  * - insert: always invalidate the iterators.
46  * - erase: always invalidate the iterators.
47  */
48 template<class CharT,
49  class Hash = tsl::ah::str_hash<CharT>,
50  class KeySizeT = std::uint16_t>
51 class htrie_set {
52 private:
53  template<typename U>
54  using is_iterator = tsl::detail_array_hash::is_iterator<U>;
55 
56  using ht = tsl::detail_htrie_hash::htrie_hash<CharT, void, Hash, KeySizeT>;
57 
58 public:
59  using char_type = typename ht::char_type;
60  using key_size_type = typename ht::key_size_type;
61  using size_type = typename ht::size_type;
62  using hasher = typename ht::hasher;
63  using iterator = typename ht::iterator;
64  using const_iterator = typename ht::const_iterator;
65  using prefix_iterator = typename ht::prefix_iterator;
66  using const_prefix_iterator = typename ht::const_prefix_iterator;
67 
68 public:
69  explicit htrie_set(const Hash& hash = Hash()): m_ht(hash, ht::HASH_NODE_DEFAULT_MAX_LOAD_FACTOR,
70  ht::DEFAULT_BURST_THRESHOLD)
71  {
72  }
73 
74  explicit htrie_set(size_type burst_threshold,
75  const Hash& hash = Hash()): m_ht(hash, ht::HASH_NODE_DEFAULT_MAX_LOAD_FACTOR,
76  burst_threshold)
77  {
78  }
79 
80  template<class InputIt, typename std::enable_if<is_iterator<InputIt>::value>::type* = nullptr>
81  htrie_set(InputIt first, InputIt last,
82  const Hash& hash = Hash()): htrie_set(hash)
83  {
84  insert(first, last);
85  }
86 
87 
88 
89 #ifdef TSL_HT_HAS_STRING_VIEW
91  const Hash& hash = Hash()): htrie_set(hash)
92  {
93  insert(init);
94  }
95 #else
96  htrie_set(std::initializer_list<const CharT*> init,
97  const Hash& hash = Hash()): htrie_set(hash)
98  {
99  insert(init);
100  }
101 #endif
102 
103 
104 
105 #ifdef TSL_HT_HAS_STRING_VIEW
107  clear();
108  insert(ilist);
109 
110  return *this;
111  }
112 #else
113  htrie_set& operator=(std::initializer_list<const CharT*> ilist) {
114  clear();
115  insert(ilist);
116 
117  return *this;
118  }
119 #endif
120 
121 
122 
123  /*
124  * Iterators
125  */
126  iterator begin() noexcept { return m_ht.begin(); }
127  const_iterator begin() const noexcept { return m_ht.begin(); }
128  const_iterator cbegin() const noexcept { return m_ht.cbegin(); }
129 
130  iterator end() noexcept { return m_ht.end(); }
131  const_iterator end() const noexcept { return m_ht.end(); }
132  const_iterator cend() const noexcept { return m_ht.cend(); }
133 
134 
135  /*
136  * Capacity
137  */
138  bool empty() const noexcept { return m_ht.empty(); }
139  size_type size() const noexcept { return m_ht.size(); }
140  size_type max_size() const noexcept { return m_ht.max_size(); }
141  size_type max_key_size() const noexcept { return m_ht.max_key_size(); }
142 
143  /**
144  * Call shrink_to_fit() on each hash node of the hat-trie to reduce its size.
145  */
146  void shrink_to_fit() { m_ht.shrink_to_fit(); }
147 
148 
149  /*
150  * Modifiers
151  */
152  void clear() noexcept { m_ht.clear(); }
153 
154 
155 
156  std::pair<iterator, bool> insert_ks(const CharT* key, size_type key_size) {
157  return m_ht.insert(key, key_size);
158  }
159 #ifdef TSL_HT_HAS_STRING_VIEW
161  return m_ht.insert(key.data(), key.size());
162  }
163 #else
164  std::pair<iterator, bool> insert(const CharT* key) {
165  return m_ht.insert(key, std::strlen(key));
166  }
167 
168  std::pair<iterator, bool> insert(const std::basic_string<CharT>& key) {
169  return m_ht.insert(key.data(), key.size());
170  }
171 #endif
172 
173 
174 
175  template<class InputIt, typename std::enable_if<is_iterator<InputIt>::value>::type* = nullptr>
176  void insert(InputIt first, InputIt last) {
177  for(auto it = first; it != last; ++it) {
178  insert(*it);
179  }
180  }
181 
182 
183 
184 #ifdef TSL_HT_HAS_STRING_VIEW
186  insert(ilist.begin(), ilist.end());
187  }
188 #else
189  void insert(std::initializer_list<const CharT*> ilist) {
190  insert(ilist.begin(), ilist.end());
191  }
192 #endif
193 
194 
195 
196  std::pair<iterator, bool> emplace_ks(const CharT* key, size_type key_size) {
197  return m_ht.insert(key, key_size);
198  }
199 #ifdef TSL_HT_HAS_STRING_VIEW
201  return m_ht.insert(key.data(), key.size());
202  }
203 #else
204  std::pair<iterator, bool> emplace(const CharT* key) {
205  return m_ht.insert(key, std::strlen(key));
206  }
207 
208  std::pair<iterator, bool> emplace(const std::basic_string<CharT>& key) {
209  return m_ht.insert(key.data(), key.size());
210  }
211 #endif
212 
213 
214 
215  iterator erase(const_iterator pos) { return m_ht.erase(pos); }
216  iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); }
217 
218 
219 
220  size_type erase_ks(const CharT* key, size_type key_size) {
221  return m_ht.erase(key, key_size);
222  }
223 #ifdef TSL_HT_HAS_STRING_VIEW
225  return m_ht.erase(key.data(), key.size());
226  }
227 #else
228  size_type erase(const CharT* key) {
229  return m_ht.erase(key, std::strlen(key));
230  }
231 
232  size_type erase(const std::basic_string<CharT>& key) {
233  return m_ht.erase(key.data(), key.size());
234  }
235 #endif
236 
237 
238 
239  /**
240  * Erase all the elements which have 'prefix' as prefix. Return the number of erase elements.
241  */
242  size_type erase_prefix_ks(const CharT* prefix, size_type prefix_size) {
243  return m_ht.erase_prefix(prefix, prefix_size);
244  }
245 #ifdef TSL_HT_HAS_STRING_VIEW
246  /**
247  * @copydoc erase_prefix_ks(const CharT* prefix, size_type prefix_size)
248  */
250  return m_ht.erase_prefix(prefix.data(), prefix.size());
251  }
252 #else
253  /**
254  * @copydoc erase_prefix_ks(const CharT* prefix, size_type prefix_size)
255  */
256  size_type erase_prefix(const CharT* prefix) {
257  return m_ht.erase_prefix(prefix, std::strlen(prefix));
258  }
259 
260  /**
261  * @copydoc erase_prefix_ks(const CharT* prefix, size_type prefix_size)
262  */
263  size_type erase_prefix(const std::basic_string<CharT>& prefix) {
264  return m_ht.erase_prefix(prefix.data(), prefix.size());
265  }
266 #endif
267 
268 
269 
270  void swap(htrie_set& other) { other.m_ht.swap(m_ht); }
271 
272 
273  /*
274  * Lookup
275  */
276  size_type count_ks(const CharT* key, size_type key_size) const { return m_ht.count(key, key_size); }
277 #ifdef TSL_HT_HAS_STRING_VIEW
278  size_type count(const std::basic_string_view<CharT>& key) const { return m_ht.count(key.data(), key.size()); }
279 #else
280  size_type count(const CharT* key) const { return m_ht.count(key, std::strlen(key)); }
281  size_type count(const std::basic_string<CharT>& key) const { return m_ht.count(key.data(), key.size()); }
282 #endif
283 
284 
285 
286  iterator find_ks(const CharT* key, size_type key_size) {
287  return m_ht.find(key, key_size);
288  }
289 
290  const_iterator find_ks(const CharT* key, size_type key_size) const {
291  return m_ht.find(key, key_size);
292  }
293 #ifdef TSL_HT_HAS_STRING_VIEW
295  return m_ht.find(key.data(), key.size());
296  }
297 
299  return m_ht.find(key.data(), key.size());
300  }
301 #else
302  iterator find(const CharT* key) {
303  return m_ht.find(key, std::strlen(key));
304  }
305 
306  const_iterator find(const CharT* key) const {
307  return m_ht.find(key, std::strlen(key));
308  }
309 
310  iterator find(const std::basic_string<CharT>& key) {
311  return m_ht.find(key.data(), key.size());
312  }
313 
314  const_iterator find(const std::basic_string<CharT>& key) const {
315  return m_ht.find(key.data(), key.size());
316  }
317 #endif
318 
319 
320 
321  std::pair<iterator, iterator> equal_range_ks(const CharT* key, size_type key_size) {
322  return m_ht.equal_range(key, key_size);
323  }
324 
325  std::pair<const_iterator, const_iterator> equal_range_ks(const CharT* key, size_type key_size) const {
326  return m_ht.equal_range(key, key_size);
327  }
328 #ifdef TSL_HT_HAS_STRING_VIEW
330  return m_ht.equal_range(key.data(), key.size());
331  }
332 
334  return m_ht.equal_range(key.data(), key.size());
335  }
336 #else
337  std::pair<iterator, iterator> equal_range(const CharT* key) {
338  return m_ht.equal_range(key, std::strlen(key));
339  }
340 
341  std::pair<const_iterator, const_iterator> equal_range(const CharT* key) const {
342  return m_ht.equal_range(key, std::strlen(key));
343  }
344 
345  std::pair<iterator, iterator> equal_range(const std::basic_string<CharT>& key) {
346  return m_ht.equal_range(key.data(), key.size());
347  }
348 
349  std::pair<const_iterator, const_iterator> equal_range(const std::basic_string<CharT>& key) const {
350  return m_ht.equal_range(key.data(), key.size());
351  }
352 #endif
353 
354 
355 
356  /**
357  * Return a range containing all the elements which have 'prefix' as prefix. The range is defined by a pair
358  * of iterator, the first being the begin iterator and the second being the end iterator.
359  */
360  std::pair<prefix_iterator, prefix_iterator> equal_prefix_range_ks(const CharT* prefix, size_type prefix_size) {
361  return m_ht.equal_prefix_range(prefix, prefix_size);
362  }
363 
364  /**
365  * @copydoc equal_prefix_range_ks(const CharT* prefix, size_type prefix_size)
366  */
367  std::pair<const_prefix_iterator, const_prefix_iterator> equal_prefix_range_ks(const CharT* prefix, size_type prefix_size) const {
368  return m_ht.equal_prefix_range(prefix, prefix_size);
369  }
370 #ifdef TSL_HT_HAS_STRING_VIEW
371  /**
372  * @copydoc equal_prefix_range_ks(const CharT* prefix, size_type prefix_size)
373  */
376  }
377 
378  /**
379  * @copydoc equal_prefix_range_ks(const CharT* prefix, size_type prefix_size)
380  */
383  }
384 #else
385  /**
386  * @copydoc equal_prefix_range_ks(const CharT* prefix, size_type prefix_size)
387  */
388  std::pair<prefix_iterator, prefix_iterator> equal_prefix_range(const CharT* prefix) {
389  return m_ht.equal_prefix_range(prefix, std::strlen(prefix));
390  }
391 
392  /**
393  * @copydoc equal_prefix_range_ks(const CharT* prefix, size_type prefix_size)
394  */
395  std::pair<const_prefix_iterator, const_prefix_iterator> equal_prefix_range(const CharT* prefix) const {
396  return m_ht.equal_prefix_range(prefix, std::strlen(prefix));
397  }
398 
399  /**
400  * @copydoc equal_prefix_range_ks(const CharT* prefix, size_type prefix_size)
401  */
402  std::pair<prefix_iterator, prefix_iterator> equal_prefix_range(const std::basic_string<CharT>& prefix) {
403  return m_ht.equal_prefix_range(prefix.data(), prefix.size());
404  }
405 
406  /**
407  * @copydoc equal_prefix_range_ks(const CharT* prefix, size_type prefix_size)
408  */
409  std::pair<const_prefix_iterator, const_prefix_iterator> equal_prefix_range(const std::basic_string<CharT>& prefix) const {
410  return m_ht.equal_prefix_range(prefix.data(), prefix.size());
411  }
412 #endif
413 
414 
415 
416  /**
417  * Return the element in the trie which is the longest prefix of `key`. If no
418  * element in the trie is a prefix of `key`, the end iterator is returned.
419  *
420  * Example:
421  *
422  * tsl::htrie_set<char> set = {"/foo", "/foo/bar"};
423  *
424  * set.longest_prefix("/foo"); // returns "/foo"
425  * set.longest_prefix("/foo/baz"); // returns "/foo"
426  * set.longest_prefix("/foo/bar/baz"); // returns "/foo/bar"
427  * set.longest_prefix("/foo/bar/"); // returns "/foo/bar"
428  * set.longest_prefix("/bar"); // returns end()
429  * set.longest_prefix(""); // returns end()
430  */
431  iterator longest_prefix_ks(const CharT* key, size_type key_size) {
432  return m_ht.longest_prefix(key, key_size);
433  }
434 
435  /**
436  * @copydoc longest_prefix_ks(const CharT* key, size_type key_size)
437  */
438  const_iterator longest_prefix_ks(const CharT* key, size_type key_size) const {
439  return m_ht.longest_prefix(key, key_size);
440  }
441 #ifdef TSL_HT_HAS_STRING_VIEW
442  /**
443  * @copydoc longest_prefix_ks(const CharT* key, size_type key_size)
444  */
446  return m_ht.longest_prefix(key.data(), key.size());
447  }
448 
449  /**
450  * @copydoc longest_prefix_ks(const CharT* key, size_type key_size)
451  */
453  return m_ht.longest_prefix(key.data(), key.size());
454  }
455 #else
456  /**
457  * @copydoc longest_prefix_ks(const CharT* key, size_type key_size)
458  */
459  iterator longest_prefix(const CharT* key) {
460  return m_ht.longest_prefix(key, std::strlen(key));
461  }
462 
463  /**
464  * @copydoc longest_prefix_ks(const CharT* key, size_type key_size)
465  */
466  const_iterator longest_prefix(const CharT* key) const {
467  return m_ht.longest_prefix(key, std::strlen(key));
468  }
469 
470  /**
471  * @copydoc longest_prefix_ks(const CharT* key, size_type key_size)
472  */
473  iterator longest_prefix(const std::basic_string<CharT>& key) {
474  return m_ht.longest_prefix(key.data(), key.size());
475  }
476 
477  /**
478  * @copydoc longest_prefix_ks(const CharT* key, size_type key_size)
479  */
480  const_iterator longest_prefix(const std::basic_string<CharT>& key) const {
481  return m_ht.longest_prefix(key.data(), key.size());
482  }
483 #endif
484 
485 
486 
487  /*
488  * Hash policy
489  */
490  float max_load_factor() const { return m_ht.max_load_factor(); }
491  void max_load_factor(float ml) { m_ht.max_load_factor(ml); }
492 
493 
494  /*
495  * Burst policy
496  */
497  size_type burst_threshold() const { return m_ht.burst_threshold(); }
498  void burst_threshold(size_type threshold) { m_ht.burst_threshold(threshold); }
499 
500 
501  /*
502  * Observers
503  */
504  hasher hash_function() const { return m_ht.hash_function(); }
505 
506 
507 
508  /*
509  * Other
510  */
511 
512  /**
513  * Serialize the set through the `serializer` parameter.
514  *
515  * The `serializer` parameter must be a function object that supports the following calls:
516  * - `void operator()(const U& value);` where the types `std::uint64_t` and `float` must be supported for U.
517  * - `void operator()(const CharT* value, std::size_t value_size);`
518  *
519  * The implementation leaves binary compatibilty (endianness, IEEE 754 for floats, ...) of the types it serializes
520  * in the hands of the `Serializer` function object if compatibilty is required.
521  */
522  template<class Serializer>
523  void serialize(Serializer& serializer) const {
524  m_ht.serialize(serializer);
525  }
526 
527 
528  /**
529  * Deserialize a previouly serialized set through the `deserializer` parameter.
530  *
531  * The `deserializer` parameter must be a function object that supports the following calls:
532  * - `template<typename U> U operator()();` where the types `std::uint64_t` and `float` must be supported for U.
533  * - `void operator()(CharT* value_out, std::size_t value_size);`
534  *
535  * If the deserialized hash set part of the hat-trie is hash compatible with the serialized set, the deserialization process
536  * can be sped up by setting `hash_compatible` to true. To be hash compatible, the Hash (take care of the 32-bits vs 64 bits),
537  * and KeySizeT must behave the same than the ones used in the serialized set. Otherwise the behaviour is undefined
538  * with `hash_compatible` sets to true.
539  *
540  * The behaviour is undefined if the type `CharT` of the `htrie_set` is not the same as the
541  * type used during serialization.
542  *
543  * The implementation leaves binary compatibilty (endianness, IEEE 754 for floats, size of int, ...) of the types it
544  * deserializes in the hands of the `Deserializer` function object if compatibilty is required.
545  */
546  template<class Deserializer>
547  static htrie_set deserialize(Deserializer& deserializer, bool hash_compatible = false) {
548  htrie_set set;
549  set.m_ht.deserialize(deserializer, hash_compatible);
550 
551  return set;
552  }
553 
554  friend bool operator==(const htrie_set& lhs, const htrie_set& rhs) {
555  if(lhs.size() != rhs.size()) {
556  return false;
557  }
558 
559  std::string key_buffer;
560  for(auto it = lhs.cbegin(); it != lhs.cend(); ++it) {
561  it.key(key_buffer);
562 
563  const auto it_element_rhs = rhs.find(key_buffer);
564  if(it_element_rhs == rhs.cend()) {
565  return false;
566  }
567  }
568 
569  return true;
570  }
571 
572  friend bool operator!=(const htrie_set& lhs, const htrie_set& rhs) {
573  return !operator==(lhs, rhs);
574  }
575 
576  friend void swap(htrie_set& lhs, htrie_set& rhs) {
577  lhs.swap(rhs);
578  }
579 
580 private:
581  ht m_ht;
582 };
583 
584 } // end namespace tsl
585 
586 #endif
const_iterator cend() const noexcept
Definition: htrie_set.h:132
std::pair< prefix_iterator, prefix_iterator > equal_prefix_range_ks(const CharT *prefix, size_type prefix_size)
Definition: htrie_set.h:360
static htrie_set deserialize(Deserializer &deserializer, bool hash_compatible=false)
Definition: htrie_set.h:547
size_type max_key_size() const noexcept
Definition: htrie_set.h:141
Definition: htrie_hash.h:68
const_iterator begin() const noexcept
Definition: htrie_set.h:127
void shrink_to_fit()
Definition: htrie_set.h:146
htrie_set(size_type burst_threshold, const Hash &hash=Hash())
Definition: htrie_set.h:74
void max_load_factor(float ml)
Definition: htrie_set.h:491
void burst_threshold(size_type threshold)
Definition: htrie_set.h:498
htrie_set(const Hash &hash=Hash())
Definition: htrie_set.h:69
size_type erase_ks(const CharT *key, size_type key_size)
Definition: htrie_set.h:220
std::pair< const_prefix_iterator, const_prefix_iterator > equal_prefix_range_ks(const CharT *prefix, size_type prefix_size) const
Definition: htrie_set.h:367
std::pair< iterator, iterator > equal_range_ks(const CharT *key, size_type key_size)
Definition: htrie_set.h:321
void serialize(Serializer &serializer) const
Definition: htrie_set.h:523
iterator erase(const_iterator pos)
Definition: htrie_set.h:215
htrie_set(InputIt first, InputIt last, const Hash &hash=Hash())
Definition: htrie_set.h:81
size_type size() const noexcept
Definition: htrie_set.h:139
const_iterator longest_prefix(const std::basic_string_view< CharT > &key) const
Definition: htrie_set.h:452
std::pair< iterator, bool > insert_ks(const CharT *key, size_type key_size)
Definition: htrie_set.h:156
void insert(InputIt first, InputIt last)
Definition: htrie_set.h:176
const_iterator end() const noexcept
Definition: htrie_set.h:131
void clear() noexcept
Definition: htrie_set.h:152
iterator end() noexcept
Definition: htrie_set.h:130
std::pair< const_iterator, const_iterator > equal_range_ks(const CharT *key, size_type key_size) const
Definition: htrie_set.h:325
std::pair< iterator, bool > emplace_ks(const CharT *key, size_type key_size)
Definition: htrie_set.h:196
size_type burst_threshold() const
Definition: htrie_set.h:497
size_type erase_prefix_ks(const CharT *prefix, size_type prefix_size)
Definition: htrie_set.h:242
friend void swap(htrie_set &lhs, htrie_set &rhs)
Definition: htrie_set.h:576
friend bool operator==(const htrie_set &lhs, const htrie_set &rhs)
Definition: htrie_set.h:554
iterator find_ks(const CharT *key, size_type key_size)
Definition: htrie_set.h:286
hasher hash_function() const
Definition: htrie_set.h:504
const_iterator longest_prefix_ks(const CharT *key, size_type key_size) const
Definition: htrie_set.h:438
void swap(htrie_set &other)
Definition: htrie_set.h:270
size_type count_ks(const CharT *key, size_type key_size) const
Definition: htrie_set.h:276
float max_load_factor() const
Definition: htrie_set.h:490
iterator longest_prefix_ks(const CharT *key, size_type key_size)
Definition: htrie_set.h:431
Definition: htrie_hash.h:130
iterator begin() noexcept
Definition: htrie_set.h:126
size_type max_size() const noexcept
Definition: htrie_set.h:140
Definition: htrie_hash.h:70
const_iterator find_ks(const CharT *key, size_type key_size) const
Definition: htrie_set.h:290
bool empty() const noexcept
Definition: htrie_set.h:138
friend bool operator!=(const htrie_set &lhs, const htrie_set &rhs)
Definition: htrie_set.h:572
const_iterator cbegin() const noexcept
Definition: htrie_set.h:128
iterator erase(const_iterator first, const_iterator last)
Definition: htrie_set.h:216