HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_ARTMap.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: UT_ARTMap.h
7  *
8  * COMMENTS: Implementation of an Adaptive Radix Tree
9  *
10  * UT_ARTMap<entry> map;
11  * entry item;
12  *
13  * map[id] = item; // insert or replace
14  *
15  * item = map[id]; // find or create new
16  *
17  * UT_ARTMap<entry>::iterator map_it = map.find(id); // find only
18  * if (map_it != map.end())
19  * item = map_it->second;
20  *
21  * map.erase(id); // remove entry
22  *
23  * for (auto&& [id, item] : map) // traverse
24  * {
25  * }
26  *
27  * map.contains(id); // check existence
28  *
29  * map.clear(); // clear all entries
30  *
31  * RELATION TO THE STL:
32  *
33  * Use UT_ARTMap over all maps WHEN your strings are fairly unique to each other.
34  *
35  * Reasoning to not use other maps:
36  * - Performance for queries is far greater then all other map types.
37  * - Does a good job on saving space as it uses the smaller node sizes where possible
38  *
39  * Reasong to no use UT_ARTMap:
40  * - If youre doing more inserting over querying then you'll want to profile first
41  * before using as its performance will be far less then if youre doing mostly queries
42  * - If the keys are mostly the same strings (ie the same except the last character)
43  * then theres a performance cost as it needs to check the entire string length
44  * instead of the ideal case where it only needs to check a couple characters
45  * at the beginning.
46  *
47  * NOTES:
48  * - Keys need to be binary-comparible keys and as such UNICODE strings need to
49  * encoded using a third party library like ICU to produce appropriate
50  * unicode sort keys. Unicode decoding is not well defined and as such
51  * normally you would store the unicode string alongside the items data for
52  * proper retrieval of the original unicode string.
53  */
54 
55 #ifndef __UT_ARTMAP_H__
56 #define __UT_ARTMAP_H__
57 
58 #include "UT_API.h"
59 
60 #include "UT_Array.h"
61 #include "UT_Assert.h"
62 #include "UT_Debug.h"
63 #include "UT_Optional.h"
64 #include "UT_StringHolder.h"
65 #include "UT_UniquePtr.h"
66 #include "UT_WorkBuffer.h"
67 
68 #include <SYS/SYS_Compiler.h>
69 #include <SYS/SYS_Platform.h>
71 
72 #include <cctype>
73 #include <variant>
74 
75 #if defined(__i386__) || defined(__amd64__)
76 #include <emmintrin.h>
77 #include <immintrin.h>
78 #elif defined (_WIN32)
79 #include <intrin.h>
80 #endif
81 
82 
83 #if defined(USE_MEMORY_RESOURCE)
84 #include <memory_resource>
85 namespace UT_PMR = std::pmr;
86 #endif
87 
88 #if defined(LINUX) || defined(MBSD)
89  #if defined(__i386) || defined(__amd64)
90  #define SUPPORT_FAST_STR
91  #endif
92 #elif !defined(__clang__) && defined(_M_AMD64)
93  #define SUPPORT_FAST_STR
94 #endif
95 
96 #define UT_VERIFY_ROOT UT_ASSERT(myRoot && !myRoot->hasValue())
97 
98 template <typename T>
99 class UT_ARTMap;
100 template <typename T>
102 
103 /// Deletor for node types which is mostly used for the memory resource
104 /// version of this tree.
106 {
107 public:
108  template <typename T>
109  void operator()(T* ptr);
110 
111 private:
112 };
113 template <typename T>
115 
116 /// Based node type used to store common functionality across node types and
117 /// to store the item value.
118 template <typename T>
119 class UT_ARTNode
120 {
121 public:
122  using value_type = T;
123 
124  explicit UT_ARTNode(
125  const UT_StringHolder& key,
126  const UT_StringView& prefix
127 #if defined(USE_MEMORY_RESOURCE)
128  , UT_PMR::polymorphic_allocator<UT_ARTNode<T>> alloc
129 #endif
130  )
131  : myKey(key)
132  , myPrefix(prefix)
133  , myValue()
134  , myAllowsPartial(false)
135 #if defined(USE_MEMORY_RESOURCE)
136  , myAllocator(alloc)
137 #endif
138  {
139  }
140  virtual ~UT_ARTNode() = default;
142 
143  UT_StringHolder key() const { return myKey; }
145  {
146  if (myValue.has_value())
147  return &myValue.value();
148  return nullptr;
149  }
150  template <typename... Args>
151  void emplace(Args&&... args)
152  {
153  myValue.emplace(std::forward<Args>(args)...);
154  }
156  {
157  if (myValue.has_value())
158  return &myValue.value();
159  return nullptr;
160  }
161  SYS_FORCE_INLINE bool hasValue() const { return myValue.has_value(); }
162  SYS_FORCE_INLINE const UT_StringView& prefix() const { return myPrefix; }
164  {
165  return myNumChildren == 0;
166  }
167  virtual bool isFull() const = 0;
169  void setAllowsPartial(bool allow) { myAllowsPartial = allow; }
170 
171  virtual const UT_StringHolder& type() const = 0;
172 
173  void debug(UT_WorkBuffer& wbuf, unsigned indent = 0) const
174  {
175  wbuf.append(indent, ' ');
176  if (myPrefix)
177  wbuf.append(myPrefix);
178  else
179  wbuf.append("<empty>");
180  wbuf.append(' ');
181  if (hasValue())
182  wbuf.appendFormat("{}", *value());
183  else
184  wbuf.append("<None>");
185  wbuf.append('\n');
186 
187  int idx = 0;
188  const UT_ARTNode<T>* child = nullptr;
189  do
190  {
191  child = nextChild(idx);
192  idx++;
193  if (child != nullptr)
194  {
195  child->debug(wbuf, indent + 1);
196  }
197  } while (child != nullptr);
198  }
199 
200  /// Compress the current node and its child. Compression happens when
201  /// this node has no value (edge) and its only child has a value. When
202  /// this occurs we can move the child up to this node.
204  {
205  if (myNumChildren == 1)
206  {
207  // If this node is not a root and has no value move the child
208  // up.
209  if (!hasValue())
210  {
212  = stealChild(ref, 0);
213  if (stolen)
214  {
215  exint prefix_length = myPrefix.length();
216  myKey = stolen->myKey;
218  stolen->myPrefix.begin() - prefix_length,
219  stolen->myPrefix.end());
220  myAllowsPartial = stolen->myAllowsPartial;
221  myValue = std::move(stolen->myValue);
222 
223  stolen->moveChildren(ref);
224  }
225  }
226  }
227  }
228 
229  virtual UT_ARTNode<T>** insertChild(
230  UT_ARTNode** ref,
232  = 0;
234  UT_ARTNode** ref,
235  UT_ARTNode* child)
236  = 0;
238  {
239  exint node_prefix_length = this->myPrefix.length();
240 
241  for (int i = this->myNumChildren; i-- > 0;)
242  {
243  UT_ARTNodePtr<UT_ARTNode<T>> child = stealChild(ref, i);
244  const char* start = child->myPrefix.begin() - node_prefix_length;
245  UT_ASSERT(start >= child->myKey.buffer());
246  child->myPrefix = UT_StringView(start, child->myPrefix.end());
247 
248  (*ref)->insertChild(ref, std::move(child));
249  }
250  }
251  virtual UT_ARTNode<T>* nextChild(int& idx) = 0;
252  virtual const UT_ARTNode<T>* nextChild(int& idx) const = 0;
254  UT_ARTNode<T>** ref,
255  int idx)
256  = 0;
257 
258  // Find the max length that both key and node prefix share
260  const UT_StringView& key,
261  const UT_StringView& node_prefix)
262  {
263  if (key.isEmpty() || node_prefix.isEmpty())
264  return 0;
265 
266  exint length = 0;
267  const char* k = key.data();
268  const char* p = node_prefix.data();
269  exint n = std::min(key.length(), node_prefix.length());
270 
271 #if defined(SUPPORT_FAST_STR) && !defined(ASAN_ENABLED)
272  static constexpr exint theMaxSIMDLen = 16;
273  const int mode = _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_EACH
274  | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY;
275  while (n > theMaxSIMDLen)
276  {
277  const __m128i data = _mm_loadu_si128(
278  reinterpret_cast<const __m128i*>(k));
279  const __m128i prefix = _mm_loadu_si128(
280  reinterpret_cast<const __m128i*>(p));
281 
282  int check_len = std::min(n, theMaxSIMDLen);
283  int res = _mm_cmpestri(data, check_len, prefix, check_len, mode);
284 
285  // For some reason if the length of the string is less then the
286  // max and the entire string matches it returns 16 instead of
287  // the length of the string so we cover that case here
288  if (n < 16 && res == 16)
289  res = n;
290 
291  length += res;
292 
293  n -= check_len;
294 
295  k += check_len;
296  p += check_len;
297  }
298 #endif
299 
300  while (n > 0)
301  {
302  if (*k != *p)
303  break;
304 
305  length++;
306  k++;
307  p++;
308  n--;
309  }
310  return length;
311  }
312 
313  // Find a child that is a partial match to the prefix.
315  const UT_StringView& prefix)
316  = 0;
317 
318  void moveHeader(UT_ARTNode<T>& move_to)
319  {
320  move_to.myKey = myKey;
321  move_to.myPrefix = myPrefix;
322  move_to.myValue = std::move(myValue);
324  move_to.myNumChildren = myNumChildren;
325  }
326 
327 #if defined(USE_MEMORY_RESOURCE)
328  UT_PMR::polymorphic_allocator<UT_ARTNode<T>> getAllocator()
329  {
330  return myAllocator;
331  }
332 #endif
333 
334  template <typename NodeT>
336  {
337 #if defined(USE_MEMORY_RESOURCE)
338  UT_PMR::polymorphic_allocator<NodeT> alloc(myAllocator);
339  NodeT* node = alloc.allocate(1);
340  alloc.construct(node, myKey, myPrefix, alloc);
342  return ptr;
343 #else
344  UT_ARTNodePtr<NodeT> ptr(new NodeT(myKey, myPrefix));
345  return ptr;
346 #endif
347  }
348  static void destroy(UT_ARTNode<T>* node)
349  {
350  if (node)
351  {
352  UT_ARTNodeDelete del;
353  del(node);
354  }
355  }
356 
357 #if defined(USE_MEMORY_RESOURCE)
358  UT_PMR::polymorphic_allocator<UT_ARTNode<T>> myAllocator;
359 #endif
363 
364  // If the node allows for partial matches
365  bool myAllowsPartial = false;
366 
367  uint8_t myNumChildren = 0;
368 };
369 
370 template <typename T>
371 inline void
373 {
374 #if defined(USE_MEMORY_RESOURCE)
375  auto alloc = ptr->getAllocator();
376  alloc.destroy(ptr);
377  alloc.deallocate(ptr, 1);
378 #else
379  delete ptr;
380 #endif
381 }
382 
383 /// Iterator for traversing the adaptive radix tree.
384 template <typename T>
386 {
387 public:
388  using difference_type = std::ptrdiff_t;
390  using pointer = value_type*;
391  using const_pointer = const value_type*;
394  using iterator_category = std::forward_iterator_tag;
395 
396  UT_ARTIterator() = default;
397 
398  bool operator==(const UT_ARTIterator& it) const
399  {
400  return myCurrent == it.myCurrent;
401  }
402  bool operator!=(const UT_ARTIterator& it) const
403  {
404  return !(*this == it);
405  }
406  const_reference operator*() const { return *this; }
407  reference operator*() { return *this; }
408  const_pointer operator->() const { return this; }
409  pointer operator->() { return this; }
410 
412  {
413  if (!myCurrent)
414  return *this;
415 
416  findNextValue_();
417 
418  return *this;
419  }
420 
422  {
423  UT_ARTIterator tmp = *this;
424  ++(*this);
425  return tmp;
426  }
427 
428  bool hasValue() const { return myCurrent && myCurrent->hasValue(); }
429 
430  const T& value() const { return *myCurrent->value(); }
431  T& value() { return *myCurrent->value(); }
432 
433  UT_StringHolder key() const { return myCurrent->key(); }
434 
435 private:
436  friend class UT_ARTMap<T>;
437 
438  explicit UT_ARTIterator(UT_ARTNode<T>* node) : myCurrent(node)
439  {
440  // If the given value is not a value node skip to the next value node.
441  // Note that this should only be the case when the node is a root node.
442  if (myCurrent && !myCurrent->hasValue())
443  findNextValue_();
444  }
445 
446  void findNextValue_()
447  {
448  if (!myCurrent)
449  return;
450 
451  bool has_seen = false;
452  do
453  {
454  has_seen = false;
455  UT_ARTNode<T>* child = myCurrent->nextChild(myIndex);
456  if (child)
457  {
458  myIndex++;
459  myNodeStack.emplace_back(std::make_pair(myCurrent, myIndex));
460  myCurrent = child;
461  myIndex = 0;
462  }
463  else if (!myNodeStack.isEmpty())
464  {
465  auto&& [node, index] = myNodeStack.last();
466  myNodeStack.removeLast();
467 
468  // we have already seen this node so we need another iteration
469  // to find the next unseen value.
470  has_seen = true;
471  myCurrent = node;
472  myIndex = index;
473  }
474  else
475  {
476  myCurrent = nullptr;
477  myIndex = 0;
478  }
479  } while (myCurrent != nullptr && (has_seen || !myCurrent->hasValue()));
480 
481  return;
482  }
483 
484  UT_Array<std::pair<UT_ARTNode<T>*, int>> myNodeStack;
485  UT_ARTNode<T>* myCurrent = nullptr;
486  int myIndex = 0;
487 };
488 
489 /// The smallest (also known as base node) node with 4 max children.
490 template <typename T>
491 class UT_ARTNode4 final : public UT_ARTNode<T>
492 {
493 public:
496 
497 #if defined(USE_MEMORY_RESOURCE)
498  UT_ARTNode4(
499  const UT_StringHolder& key,
501  , UT_PMR::polymorphic_allocator<UT_ARTNode4<T>> alloc)
502  : UT_ARTNode<T>(key, prefix, alloc)
503  {
504  }
505 #else
508  UT_ARTNode<T>(key, prefix)
509  {}
510 #endif
512  ~UT_ARTNode4() override
513  {
518  }
519 
520  const UT_StringHolder& type() const override
521  {
522  static constexpr UT_StringLit theType("node4");
523  return theType.asHolder();
524  }
525  bool isFull() const override;
527  parent_t** ref,
528  parent_ptr_t child) override;
530  parent_t** ref,
531  parent_t* child) override;
533  const UT_StringView& prefix) override;
534  parent_t* nextChild(int& index) override;
535  const parent_t* nextChild(int& index) const override;
536  parent_ptr_t stealChild(parent_t** ref, int idx) override;
537 
538  unsigned char myChildKeys[4] = {0,0,0,0};
539  parent_t* myChildren[4] = {nullptr, nullptr, nullptr, nullptr};
540 };
541 
542 /// The middle sized node with 16 max children.
543 template <typename T>
544 class UT_ARTNode16 final : public UT_ARTNode<T>
545 {
546 public:
549 
550 #if defined(USE_MEMORY_RESOURCE)
551  UT_ARTNode16(
552  const UT_StringHolder& key,
554  UT_PMR::polymorphic_allocator<UT_ARTNode4<T>> alloc)
555  : UT_ARTNode<T>(key, prefix, alloc)
556 #else
558  : UT_ARTNode<T>(key, prefix)
559 #endif
560  {
561  memset(myChildKeys, 0, 16);
562  }
564  ~UT_ARTNode16() override
565  {
566  for (int i = 16; i-->0; )
568  }
569 
570  const UT_StringHolder& type() const override
571  {
572  static constexpr UT_StringLit theType("node16");
573  return theType.asHolder();
574  }
575  bool isFull() const override;
577  parent_t** ref,
578  parent_ptr_t child) override;
580  parent_t** ref,
581  parent_t* child) override;
583  const UT_StringView& prefix) override;
584  parent_t* nextChild(int& index) override;
585  const parent_t* nextChild(int& index) const override;
586  parent_ptr_t stealChild(parent_t** ref, int idx)
587  override;
588 
589  int prefixToIndex_(char c) const
590  {
591  unsigned mask = (1 << this->myNumChildren) - 1;
592  unsigned bitfield = 0;
593 #if defined(__i386) || defined(__amd64)
594  // SSE2 instructions so shouldnt need to check processor...
595  __m128i key_vec = _mm_set1_epi8(c);
596  __m128i results = _mm_cmpeq_epi8(
597  key_vec, _mm_loadu_si128((__m128i*)myChildKeys));
598  bitfield = _mm_movemask_epi8(results) & mask;
599 #else
600  for (int i = 0; i < 16; ++i)
601  {
602  if (myChildKeys[i] == c)
603  bitfield |= (1 << i);
604  }
605 
606  bitfield &= mask;
607 #endif
608 
609  if (bitfield)
610  {
611 #if !defined(_WIN32)
612  return __builtin_ctz(bitfield);
613 #else
614  return _tzcnt_u32(bitfield);
615 #endif
616  }
617  return -1;
618  }
619 
620  unsigned char myChildKeys[16];
621  parent_t* myChildren[16] = {};
622 };
623 
624 /// The second largest node size with 48 max children.
625 template <typename T>
626 class UT_ARTNode48 final : public UT_ARTNode<T>
627 {
628 public:
631 
632 #if defined(USE_MEMORY_RESOURCE)
633  UT_ARTNode48(
634  const UT_StringHolder& key,
636  UT_PMR::polymorphic_allocator<UT_ARTNode4<T>> alloc)
637  : UT_ARTNode<T>(key, prefix, alloc)
638 #else
640  : UT_ARTNode<T>(key, prefix)
641 #endif
642  {
643  memset(myChildKeys, 0, 256);
644  }
646  ~UT_ARTNode48() override
647  {
648  for (int i = 48; i-->0; )
650  }
651  const UT_StringHolder& type() const override
652  {
653  static constexpr UT_StringLit theType("node48");
654  return theType.asHolder();
655  }
656  bool isFull() const override;
658  parent_t** ref,
659  parent_ptr_t child) override;
661  parent_t** ref,
662  parent_t* child) override;
664  const UT_StringView& prefix) override;
665  parent_t* nextChild(int& index) override;
666  const parent_t* nextChild(int& index) const override;
667  parent_ptr_t stealChild(parent_t** ref, int idx)
668  override;
669 
670  unsigned char myChildKeys[256];
671  parent_t* myChildren[48] = {};
672 };
673 
674 /// The largest node size in the art map. The 256 node has at most 256 children
675 template <typename T>
676 class UT_ARTNode256 final : public UT_ARTNode<T>
677 {
678 public:
681 
682 #if defined(USE_MEMORY_RESOURCE)
684  const UT_StringHolder& key,
686  UT_PMR::polymorphic_allocator<UT_ARTNode4<T>> alloc)
687  : UT_ARTNode<T>(key, prefix, alloc)
688 #else
690  : UT_ARTNode<T>(key, prefix)
691 #endif
692  {
693  }
695  ~UT_ARTNode256() override
696  {
697  for (int i = 256; i-->0; )
699  }
700 
701  const UT_StringHolder& type() const override
702  {
703  static constexpr UT_StringLit theType("node256");
704  return theType.asHolder();
705  }
706  bool isFull() const override;
708  parent_t** ref,
709  parent_ptr_t child) override;
711  parent_t** ref,
712  parent_t* child) override;
714  const UT_StringView& prefix) override;
715  parent_t* nextChild(int& index) override;
716  const parent_t* nextChild(int& index) const override;
717  parent_ptr_t stealChild(parent_t** ref, int idx)
718  override;
719 
720  parent_t* myChildren[256] = {};
721 };
722 
723 /// A basic adaptive radix tree implementation using node4, node16, node48,
724 /// and node256 for space and performance.
725 template <typename T>
726 class UT_ARTMap
727 {
728 private:
729  using Node = UT_ARTNode<T>;
730  using StartNode = UT_ARTNode4<T>;
731 
732 public:
735  using mapped_type = T;
736 
737  /// Initialize an empty map.
738 #if defined(USE_MEMORY_RESOURCE)
739  UT_ARTMap() :
740  UT_ARTMap(UT_PMR::get_default_resource())
741  {
742  }
743  explicit UT_ARTMap(UT_PMR::memory_resource* resource) :
744  myAllocator(resource)
745 #else
747 #endif
748  {
749  myRoot = createBaseNode(
751  .release();
752  }
755  {
756 #if defined(USE_MEMORY_RESOURCE)
757  UT_PMR::polymorphic_allocator<Node> alloc(myAllocator);
758  alloc.destroy(myRoot);
759  alloc.deallocate(myRoot, 1);
760 #else
761  delete myRoot;
762 #endif
763  }
764 
765  /// Insert a node with the additional option to specify if partial matches
766  /// are allowed for this item. If you're using UT_ARTMap like a traditional
767  /// map then partial matches arent something that needs to be considered.
768  std::pair<iterator, bool> insert(
769  const UT_StringHolder& name,
770  const T& data,
771  bool allow_partial = false)
772  {
773  return insert_(name, allow_partial, data);
774  }
775 
776  /// Insert a node by moving its values instead of copying it. This function
777  /// additionally supports specifying if partial matches are allowed for this
778  /// item. If you're using UT_ARTMap like a traditional map then partial
779  /// matches arent something that needs to be considered.
780  std::pair<iterator, bool> insert(
781  const UT_StringHolder& name,
782  T&& value,
783  bool allow_partial = false)
784  {
785  return insert_(name, allow_partial, value);
786  }
787 
788  /// Insert a node into the tree by emplacing its value. This function does
789  /// not allow specifying partial matches as its meant to be equivalent to
790  /// STL maps.
791  template <typename... Args>
792  std::pair<iterator, bool> emplace(
793  const UT_StringHolder& name,
794  Args&&... args)
795  {
796  return insert_(
797  name, /*allows_partial=*/false, std::forward<Args>(args)...);
798  }
799 
800  /// Lookup an existing item based on the key. If no item is found a
801  /// default constructible item is placed into the tree. This function
802  /// does not support allowing partial matches.
803  typename std::enable_if_t<std::is_default_constructible_v<T>, T&>
805  {
806  auto&& [it, inserted] = insert_(key, /*allows_partial=*/false);
807 
808  UT_ASSERT(it.myCurrent != nullptr);
809  if (it.myCurrent && inserted)
810  {
811  it.myCurrent->emplace();
812  }
813 
814  return it.value();
815  }
816 
817  /// Lookup an existing item based on the key. If no item is found a
818  /// default constructible item is placed into the tree. This function
819  /// does not support allowing partial matches.
820  typename std::enable_if_t<std::is_default_constructible_v<T>, T&>
822  {
823  auto&& [it, inserted] = insert_(key, /*allows_partial=*/false);
824 
825  UT_ASSERT(it.myCurrent != nullptr);
826  if (it.myCurrent && inserted)
827  {
828  it.myCurrent->emplace();
829  }
830 
831  return it.value();
832  }
833 
834  /// Find the node based on the provided prefix.
836  find(UT_StringView prefix) const
837  {
839  Node* child = SYSconst_cast(this)->myRoot;
840  Node* last_partial = nullptr;
841 
842  while (!prefix.isEmpty() && child != nullptr)
843  {
844  Node** found = child->findPartialPrefixChild(prefix);
845  if (found && *found)
846  {
847  UT_StringView found_prefix((*found)->prefix());
848  exint common_length = child->getCommonPrefixLength(
849  prefix, found_prefix);
850  if (common_length == found_prefix.length())
851  {
852  prefix.removePrefix(common_length);
853 
854  // If we have found the last node but the common length is
855  // not an exact match then we have not found a node.
856  if (prefix.isEmpty()
857  && common_length != found_prefix.length())
858  {
859  child = nullptr;
860  break;
861  }
862 
863  child = *found;
864  // If this child allows for partial matches then update the
865  // last partial node we found.
866  if (child->allowsPartial())
867  last_partial = child;
868 
869  continue;
870  }
871  else
872  {
873  child = nullptr;
874  break;
875  }
876  }
877  else
878  {
879  child = nullptr;
880  break;
881  }
882  }
883 
884  if (child && child->hasValue())
885  return iterator(child);
886  // If we didnt find a child that matches check if our traversal came
887  // across a node that allows for partial matches. If we found one
888  // then return this node instead of reporting nothing found.
889  else if (last_partial != nullptr && last_partial->hasValue())
890  return iterator(last_partial);
891  return end();
892  }
893 
894  /// Check if the tree contains the provided string
896  {
897  return find(prefix) != end();
898  }
899 
900  /// Write out the tree into the work buffer. This is pretty printed.
901  void debug(UT_WorkBuffer& wbuf) const
902  {
903  wbuf.clear();
904 
906  myRoot->debug(wbuf, 0);
907  }
908  /// Erase a given entry based on the prefix. Returns true if the item was
909  /// removed from the tree.
910  bool erase(const UT_StringRef& prefix)
911  {
912  return doErase_(prefix);
913  }
914 
915  /// Clear out the tree
916  void clear()
917  {
918 #if defined(USE_MEMORY_RESOURCE)
919  UT_PMR::polymorphic_allocator<Node> alloc(myAllocator);
920  alloc.destroy(myRoot);
921  alloc.deallocate(myRoot, 1);
922 
923 #else
924  delete myRoot;
925 #endif
926  myRoot = createBaseNode(
928  }
929 
930  /// Returns true if the map has no items currently inserted.
932  {
933  return myRoot->childCount();
934  }
935 
936  /// Returns the beginning of an iterator used to traverse the full tree.
937  using const_iterator = const iterator;
939  {
940  auto me = SYSconst_cast(this);
941  return iterator(me->myRoot);
942  }
943  /// Returns the beginning of an iterator used to traverse the full tree.
945  {
946  return iterator(myRoot);
947  }
948  /// Returns the end of an iterator used with begin() to traverse the full
949  /// tree.
951  /// Returns the end of an iterator used with begin() to traverse the full
952  /// tree.
954  /// Returns the end of an iterator used with begin() to traverse the full
955  /// tree.
957  /// Returns the end of an iterator used with begin() to traverse the full
958  /// tree.
960 
961 private:
962  // Internal function to initiate inserting an item into the tree.
963  template <typename... Args>
964  std::pair<iterator, bool> insert_(
965  const UT_StringHolder& name,
966  bool allows_partial,
967  Args&&... args)
968  {
969  if (name.isEmpty())
970  return std::make_pair(end(), false);
971 
972  bool inserted = false;
973  Node* node = doInsert_(name, inserted);
974 
975  UT_ASSERT((inserted && node != nullptr) || !inserted);
976  if (node && inserted)
977  {
978  node->emplace(std::forward<Args>(args)...);
979  node->setAllowsPartial(allows_partial);
980  }
981  return std::make_pair(iterator(node), inserted);
982  }
983 
984  /// Start the process of erasing a found item in the tree.
985  bool doErase_(const UT_StringRef& str)
986  {
987  if (str.isEmpty())
988  return false;
989 
991  Node** current = &myRoot;
992 
993  bool removed = false;
994  UT_StringView prefix(str);
995  while (!removed && current && prefix)
996  {
997  Node** node = (*current)->findPartialPrefixChild(prefix);
998 
999  // If no child matches the prefix then theres nothing to do.
1000  if (node == nullptr)
1001  {
1002  return false;
1003  }
1004 
1005  exint common_length = (*current)->getCommonPrefixLength(
1006  prefix, (*node)->myPrefix);
1007 
1008  if (common_length == prefix.length())
1009  {
1010  // If we found the node but the found node does not have a value
1011  // it means its an edge so we dont delete it.
1012  if (!(*node)->hasValue())
1013  return false;
1014 
1015  removed = true;
1016 
1017  if ((*node)->isLeaf())
1018  {
1019  auto stolen = (*current)->stealChild(current, *node);
1020  }
1021  else
1022  {
1023  UT_ARTNodePtr<Node> removed_node = (*current)->stealChild(
1024  current, *node);
1025  removed_node->moveChildren(current);
1026  }
1027 
1028  if ((*current) != myRoot)
1029  (*current)->compress(current);
1030  }
1031  else
1032  {
1033  prefix.removePrefix(common_length);
1034  current = node;
1035  }
1036  }
1037 
1038  return removed;
1039  }
1040 
1041  /// Create a node4 also known as a based node.
1042  UT_ARTNodePtr<Node> createBaseNode(
1043  const UT_StringHolder& key,
1044  const UT_StringView& prefix)
1045  {
1046 #if defined(USE_MEMORY_RESOURCE)
1047  UT_PMR::polymorphic_allocator<UT_ARTNode4<T>> alloc(myAllocator);
1048  UT_ARTNode4<T>* node = alloc.allocate(1);
1049  alloc.construct(node, key, prefix, alloc);
1051  return ptr;
1052 #else
1053  UT_ARTNodePtr<UT_ARTNode4<T>> ptr(new UT_ARTNode4<T>(key, prefix));
1054  return ptr;
1055 #endif
1056  }
1057 
1058  /// Start the process of finding the insertion node and inserting the new
1059  /// node. If necessary the tree rebalences itself and updates the node
1060  /// size accordingly.
1061  Node* doInsert_(const UT_StringHolder& key, bool& inserted)
1062  {
1063  UT_StringView prefix(key);
1064  UT_ASSERT(key.length() > 0);
1065  UT_ASSERT(myRoot != nullptr);
1067 
1068  Node** current = &myRoot;
1069  inserted = false;
1070 
1071  while (current != nullptr && prefix.length() > 0)
1072  {
1073  Node** node = (*current)->findPartialPrefixChild(prefix);
1074  if (node == nullptr)
1075  {
1076  // No child partially matched the prefix so we will create
1077  // one
1078  Node** inserted_node = (*current)->insertChild(
1079  current, createBaseNode(key, prefix));
1080  inserted = true;
1081  return (*inserted_node);
1082  }
1083  else
1084  {
1085  const exint prefix_length = (*current)->getCommonPrefixLength(
1086  prefix, (*node)->myPrefix);
1087 
1088  // Check if this is a duplicate key
1089  if (prefix.length() == (*node)->myPrefix.length()
1090  && prefix.length() == prefix_length)
1091  {
1092  inserted = !(*node)->hasValue();
1093  return *node;
1094  }
1095  // The childs key is the prefix of the insert item
1096  // abc -> abc - d [ after inserting "abcd" ]
1097  else if (prefix_length == (*node)->myPrefix.length())
1098  {
1099  prefix.removePrefix(prefix_length);
1100  current = node;
1101  }
1102  // The insert item is the prefix of the childs prefix
1103  // abc -> ab - c [after inserting "abc"]
1104  else if (prefix_length == prefix.length())
1105  {
1106  auto new_child = createBaseNode(key, prefix);
1107 
1108  // Remove the current child
1109  UT_ARTNodePtr<Node> stolen = (*current)->stealChild(
1110  current, *node);
1111  // Update the child prefix
1112  stolen->myPrefix.removePrefix(prefix_length);
1113  // Add old child to the new child
1114  Node* ptr = new_child.get();
1115  new_child->insertChild(&ptr, std::move(stolen));
1116 
1117  // Attach the new child to the current children
1118  Node** inserted_node = (*current)->insertChild(
1119  current, std::move(new_child));
1120 
1121  inserted = true;
1122  return (*inserted_node);
1123  }
1124  // Inserting item and childs key have the same common prefix
1125  else
1126  {
1127  // We need to split the current child and the suffix
1128  // into
1129  // two children whos parent is the prefix of the two
1130  // children
1131 
1132  // remove the existing child as this will be move to the
1133  // parent of the two children.
1134  UT_ARTNodePtr<Node> erased_child = (*current)
1135  ->stealChild(current, *node);
1136 
1137  UT_StringView item_prefix(prefix.begin(), prefix_length);
1138  auto parent = createBaseNode(key, item_prefix);
1139  // update the new prefix
1140  erased_child->myPrefix.removePrefix(prefix_length);
1141  // Add the suffix of the old key into the new branch
1142  Node* ptr = parent.get();
1143  parent->insertChild(
1144  &ptr, std::move(erased_child));
1145 
1146  // Add the suffix of the key into the new split branch
1147  prefix.removePrefix(prefix_length);
1148  Node** inserted_node = parent->insertChild(
1149  &ptr, createBaseNode(key, prefix));
1150 
1151  // Add the prefix of the old child and the new key into
1152  // this children set.
1153  (*current)->insertChild(current, std::move(parent));
1154 
1155  inserted = true;
1156  return *inserted_node;
1157  }
1158  }
1159  }
1160 
1161  return nullptr;
1162  }
1163 
1164  Node* myRoot = nullptr;
1165 #if defined(USE_MEMORY_RESOURCE)
1166  UT_PMR::polymorphic_allocator<std::byte> myAllocator;
1167 #endif
1168 };
1169 
1170 // Structured binding support
1171 template <std::size_t N, class T, std::enable_if_t<N == 0, int> = 0>
1172 auto
1173 get(const UT_ARTIterator<T>& it) -> decltype(it.key())
1174 {
1175  return it.key();
1176 }
1177 
1178 template <std::size_t N, class T, std::enable_if_t<N == 1, int> = 0>
1179 auto
1180 get(const UT_ARTIterator<T>& it) -> decltype(it.value())
1181 {
1182  return it.value();
1183 }
1184 
1185 namespace std
1186 {
1187 template <typename T>
1188 struct tuple_size<UT_ARTIterator<T>>
1189  : public std::integral_constant<std::size_t, 2>
1190 {
1191 };
1192 
1193 template <std::size_t N, typename T>
1194 struct tuple_element<N, UT_ARTIterator<T>>
1195 {
1196  using type = decltype(get<N>(
1197  std::declval<UT_ARTIterator<T>>()));
1198 };
1199 } // namespace std
1200 
1201 #include "UT_ARTMapImpl.h"
1202 
1203 #endif // __UT_ARTMAP_H__
bool erase(const UT_StringRef &prefix)
Definition: UT_ARTMap.h:910
bool operator!=(const UT_ARTIterator &it) const
Definition: UT_ARTMap.h:402
T & last()
Definition: UT_Array.h:823
UT_StringView myPrefix
Definition: UT_ARTMap.h:361
const T & value() const
Definition: UT_ARTMap.h:430
The middle sized node with 16 max children.
Definition: UT_ARTMap.h:544
SYS_NO_DISCARD_RESULT bool isEmpty() const
Returns true if the map has no items currently inserted.
Definition: UT_ARTMap.h:931
UT_StringHolder key() const
Definition: UT_ARTMap.h:433
The largest node size in the art map. The 256 node has at most 256 children.
Definition: UT_ARTMap.h:676
UT_ARTNodePtr< parent_t > parent_ptr_t
Definition: UT_ARTMap.h:680
parent_t * nextChild(int &index) override
SYS_NO_DISCARD_RESULT iterator begin()
Returns the beginning of an iterator used to traverse the full tree.
Definition: UT_ARTMap.h:944
UT_ARTNode< T > parent_t
Definition: UT_ARTMap.h:494
Definition: Node.h:52
UT_ARTNode< T > parent_t
Definition: UT_ARTMap.h:547
parent_t * nextChild(int &index) override
SYS_FORCE_INLINE void removeLast()
Definition: UT_Array.h:379
virtual ~UT_ARTNode()=default
GLuint start
Definition: glcorearb.h:475
parent_ptr_t stealChild(parent_t **ref, parent_t *child) override
GLsizei const GLfloat * value
Definition: glcorearb.h:824
UT_ARTNodePtr< parent_t > parent_ptr_t
Definition: UT_ARTMap.h:495
std::ptrdiff_t difference_type
Definition: UT_ARTMap.h:388
SYS_NO_DISCARD_RESULT iterator end()
Definition: UT_ARTMap.h:953
UT_ARTIterator()=default
SYS_FORCE_INLINE T * SYSconst_cast(const T *foo)
Definition: SYS_Types.h:136
bool isEmpty() const
Same as !isstring()
void emplace(Args &&...args)
Definition: UT_ARTMap.h:151
void moveHeader(UT_ARTNode< T > &move_to)
Definition: UT_ARTMap.h:318
void debug(UT_WorkBuffer &wbuf, unsigned indent=0) const
Definition: UT_ARTMap.h:173
int64 exint
Definition: SYS_Types.h:125
UT_ARTNode16(const UT_StringHolder &key, UT_StringView prefix)
Definition: UT_ARTMap.h:557
bool hasValue() const
Definition: UT_ARTMap.h:428
SYS_NO_DISCARD_RESULT bool contains(const UT_StringHolder &prefix) const
Check if the tree contains the provided string.
Definition: UT_ARTMap.h:895
parent_t * myChildren[256]
Definition: UT_ARTMap.h:720
UT_StringHolder myKey
Definition: UT_ARTMap.h:360
GLuint GLsizei GLsizei * length
Definition: glcorearb.h:795
UT_NON_COPYABLE(UT_ARTNode)
UT_ARTNode4(const UT_StringHolder &key, UT_StringView prefix)
Definition: UT_ARTMap.h:506
parent_ptr_t stealChild(parent_t **ref, parent_t *child) override
ImageBuf OIIO_API min(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
bool operator==(const UT_ARTIterator &it) const
Definition: UT_ARTMap.h:398
const UT_StringHolder & type() const override
Definition: UT_ARTMap.h:701
The second largest node size with 48 max children.
Definition: UT_ARTMap.h:626
const UT_StringHolder & type() const override
Definition: UT_ARTMap.h:520
void moveChildren(UT_ARTNode< T > **ref)
Definition: UT_ARTMap.h:237
~UT_ARTNode4() override
Definition: UT_ARTMap.h:512
SYS_FORCE_INLINE const value_type * value() const
Definition: UT_ARTMap.h:144
int prefixToIndex_(char c) const
Definition: UT_ARTMap.h:589
parent_t ** findPartialPrefixChild(const UT_StringView &prefix) override
void removePrefix(exint n)
exint getCommonPrefixLength(const UT_StringView &key, const UT_StringView &node_prefix)
Definition: UT_ARTMap.h:259
~UT_ARTNode16() override
Definition: UT_ARTMap.h:564
decltype(get< N >(std::declval< UT_ARTIterator< T >>())) type
Definition: UT_ARTMap.h:1197
std::optional< T > UT_Optional
Definition: UT_Optional.h:26
virtual UT_ARTNode< T > * nextChild(int &idx)=0
std::unique_ptr< T, Deleter > UT_UniquePtr
A smart pointer for unique ownership of dynamically allocated objects.
Definition: UT_UniquePtr.h:39
parent_t * myChildren[16]
Definition: UT_ARTMap.h:621
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const char * data() const noexcept
Returns a pointer to the first character of a view.
void debug(UT_WorkBuffer &wbuf) const
Write out the tree into the work buffer. This is pretty printed.
Definition: UT_ARTMap.h:901
uint8_t myNumChildren
Definition: UT_ARTMap.h:367
A utility class to do read-only operations on a subset of an existing string.
Definition: UT_StringView.h:40
size_t appendFormat(const char *fmt, const Args &...args)
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool isEmpty() const
Returns true if the string is empty.
GLdouble n
Definition: glcorearb.h:2008
UT_ARTNodePtr< parent_t > parent_ptr_t
Definition: UT_ARTMap.h:548
void operator()(T *ptr)
Definition: UT_ARTMap.h:372
bool myAllowsPartial
Definition: UT_ARTMap.h:365
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE exint length() const
Returns the length of the string in bytes.
std::forward_iterator_tag iterator_category
Definition: UT_ARTMap.h:394
UT_ARTNode< T > parent_t
Definition: UT_ARTMap.h:679
exint emplace_back(S &&...s)
Definition: UT_ArrayImpl.h:769
std::pair< iterator, bool > insert(const UT_StringHolder &name, const T &data, bool allow_partial=false)
Definition: UT_ARTMap.h:768
unsigned char myChildKeys[16]
Definition: UT_ARTMap.h:620
exint length() const
bool isFull() const override
GLint ref
Definition: glcorearb.h:124
SYS_NO_DISCARD_RESULT const_iterator end() const
Definition: UT_ARTMap.h:950
virtual const UT_StringHolder & type() const =0
static const UT_StringHolder theEmptyString
SYS_NO_DISCARD_RESULT const_iterator begin() const
Definition: UT_ARTMap.h:938
const UT_StringHolder & type() const override
Definition: UT_ARTMap.h:570
void clear()
Clear out the tree.
Definition: UT_ARTMap.h:916
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
bool isFull() const override
Iterator for traversing the adaptive radix tree.
Definition: UT_ARTMap.h:385
UT_ARTNode(const UT_StringHolder &key, const UT_StringView &prefix)
Definition: UT_ARTMap.h:124
UT_ARTIterator< T > iterator
Definition: UT_ARTMap.h:733
SYS_NO_DISCARD_RESULT iterator find(UT_StringView prefix) const
Find the node based on the provided prefix.
Definition: UT_ARTMap.h:836
UT_ARTIterator & operator++()
Definition: UT_ARTMap.h:411
pointer operator->()
Definition: UT_ARTMap.h:409
GLint GLuint mask
Definition: glcorearb.h:124
The smallest (also known as base node) node with 4 max children.
Definition: UT_ARTMap.h:491
UT_NON_COPYABLE(UT_ARTNode48)
SYS_FORCE_INLINE bool isLeaf() const
Definition: UT_ARTMap.h:163
#define SYS_NO_DISCARD_RESULT
Definition: SYS_Compiler.h:93
UT_Optional< value_type > myValue
Definition: UT_ARTMap.h:362
parent_t ** insertChild(parent_t **ref, parent_ptr_t child) override
SYS_NO_DISCARD_RESULT const_iterator cend() const
Definition: UT_ARTMap.h:959
UT_ARTNode< T > parent_t
Definition: UT_ARTMap.h:629
GLuint const GLchar * name
Definition: glcorearb.h:786
#define UT_VERIFY_ROOT
Definition: UT_ARTMap.h:96
bool isFull() const override
std::enable_if_t< std::is_default_constructible_v< T >, T & > operator[](UT_StringHolder &&key)
Definition: UT_ARTMap.h:821
~UT_ARTNode48() override
Definition: UT_ARTMap.h:646
const UT_StringHolder & type() const override
Definition: UT_ARTMap.h:651
parent_t ** findPartialPrefixChild(const UT_StringView &prefix) override
UT_ARTNodePtr< parent_t > parent_ptr_t
Definition: UT_ARTMap.h:630
~UT_ARTMap()
Definition: UT_ARTMap.h:754
parent_t ** insertChild(parent_t **ref, parent_ptr_t child) override
parent_t ** findPartialPrefixChild(const UT_StringView &prefix) override
Definition: UT_ARTMapImpl.h:87
GLenum mode
Definition: glcorearb.h:99
parent_t ** insertChild(parent_t **ref, parent_ptr_t child) override
parent_ptr_t stealChild(parent_t **ref, parent_t *child) override
Definition: UT_ARTMapImpl.h:67
~UT_ARTNode256() override
Definition: UT_ARTMap.h:695
virtual UT_ARTNodePtr< UT_ARTNode< T > > stealChild(UT_ARTNode **ref, UT_ARTNode *child)=0
const_reference operator*() const
Definition: UT_ARTMap.h:406
virtual UT_ARTNode< T > ** findPartialPrefixChild(const UT_StringView &prefix)=0
parent_t ** insertChild(parent_t **ref, parent_ptr_t child) override
Definition: UT_ARTMapImpl.h:29
UT_ARTIterator operator++(int)
Definition: UT_ARTMap.h:421
unsigned char myChildKeys[256]
Definition: UT_ARTMap.h:670
std::pair< iterator, bool > emplace(const UT_StringHolder &name, Args &&...args)
Definition: UT_ARTMap.h:792
std::pair< iterator, bool > insert(const UT_StringHolder &name, T &&value, bool allow_partial=false)
Definition: UT_ARTMap.h:780
unsigned char myChildKeys[4]
Definition: UT_ARTMap.h:538
SYS_FORCE_INLINE bool hasValue() const
Definition: UT_ARTMap.h:161
UT_ARTNodePtr< NodeT > newNode()
Definition: UT_ARTMap.h:335
reference operator*()
Definition: UT_ARTMap.h:407
GLuint index
Definition: glcorearb.h:786
T mapped_type
Definition: UT_ARTMap.h:735
virtual bool isFull() const =0
parent_t * nextChild(int &index) override
SYS_FORCE_INLINE void append(char character)
SYS_NO_DISCARD_RESULT const_iterator cbegin() const
Definition: UT_ARTMap.h:956
auto ptr(T p) -> const void *
Definition: format.h:4331
GA_API const UT_StringHolder N
if(num_boxed_items<=0)
Definition: UT_RTreeImpl.h:697
parent_t * nextChild(int &index) override
**If you just want to fire and args
Definition: thread.h:618
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const_iterator begin() const
Returns a constant iterator pointing to the beginning of the string.
UT_ARTNode256(const UT_StringHolder &key, UT_StringView prefix)
Definition: UT_ARTMap.h:689
SYS_FORCE_INLINE value_type * value()
Definition: UT_ARTMap.h:155
static void destroy(UT_ARTNode< T > *node)
Definition: UT_ARTMap.h:348
UT_NON_COPYABLE(UT_ARTMap)
#define UT_ASSERT(ZZ)
Definition: UT_Assert.h:156
SYS_FORCE_INLINE void clear()
parent_t ** findPartialPrefixChild(const UT_StringView &prefix) override
UT_NON_COPYABLE(UT_ARTNode16)
UT_ARTMap()
Initialize an empty map.
Definition: UT_ARTMap.h:746
void setAllowsPartial(bool allow)
Definition: UT_ARTMap.h:169
std::enable_if_t< std::is_default_constructible_v< T >, T & > operator[](const UT_StringHolder &key)
Definition: UT_ARTMap.h:804
parent_ptr_t stealChild(parent_t **ref, parent_t *child) override
virtual UT_ARTNode< T > ** insertChild(UT_ARTNode **ref, UT_ARTNodePtr< UT_ARTNode< T >> node)=0
void compress(UT_ARTNode< T > **ref)
Definition: UT_ARTMap.h:203
parent_t * myChildren[48]
Definition: UT_ARTMap.h:671
UT_UniquePtr< T, UT_ARTNodeDelete > UT_ARTNodePtr
Definition: UT_ARTMap.h:114
SYS_FORCE_INLINE const UT_StringView & prefix() const
Definition: UT_ARTMap.h:162
UT_NON_COPYABLE(UT_ARTNode256)
UT_NON_COPYABLE(UT_ARTNode4)
Definition: format.h:1821
bool isFull() const override
Definition: UT_ARTMapImpl.h:22
UT_StringHolder key() const
Definition: UT_ARTMap.h:143
const_pointer operator->() const
Definition: UT_ARTMap.h:408
UT_ARTNode48(const UT_StringHolder &key, UT_StringView prefix)
Definition: UT_ARTMap.h:639
parent_t * myChildren[4]
Definition: UT_ARTMap.h:539
bool isEmpty() const
Returns true iff there are no occupied elements in the array.
Definition: UT_Array.h:657
SYS_FORCE_INLINE bool allowsPartial() const
Definition: UT_ARTMap.h:168