HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_ArraySet.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: UT_ArraySet.h (UT Library, C++)
7  *
8  * COMMENTS: Flat-array-based hash set implementation.
9  */
10 
11 #pragma once
12 
13 #ifndef __UT_ArraySet__
14 #define __UT_ArraySet__
15 
16 #include "UT_ArrayHelp.h"
17 #include "UT_Assert.h"
18 #include <SYS/SYS_Inline.h>
19 #include <SYS/SYS_Math.h>
20 #include <SYS/SYS_StaticAssert.h>
21 #include <SYS/SYS_Types.h>
22 
23 #include <hboost/functional/hash.hpp>
24 
25 #include <algorithm>
26 #include <limits>
27 #include <stdint.h>
28 #include <utility>
29 
30 namespace UT {
31 
32 template<typename T>
33 struct DefaultClearer;
34 
35 template<typename S>
36 struct DefaultClearer<S*>
37 {
38  static void clear(S*&v) { v = nullptr; }
39  static bool isClear(S *v) { return v == nullptr; }
40  static void clearConstruct(S**p) { clear(*p); }
41 
42  static const bool clearNeedsDestruction = false;
43 };
44 
45 template<typename T>
47 {
48  static void clear(T &v)
49  {
50  if (std::numeric_limits<T>::has_quiet_NaN) {
51  // Use QNaN as unused floating-point marker.
52  // NOTE: All NaNs get treated as unused, even if they're not
53  // the representative QNaN.
54  v = std::numeric_limits<T>::quiet_NaN();
55  }
57  // Use min as unused signed integer marker.
59  }
60  else {
61  // Use max as unused unsigned integer marker.
63  }
64  }
65  static bool isClear(T v)
66  {
67  if (std::numeric_limits<T>::has_quiet_NaN) {
68  // NOTE: All NaNs get treated as unused, even if they're not
69  // the representative QNaN.
70  return SYSisNan(v);
71  }
73  return (v == std::numeric_limits<T>::min());
74  }
75  return (v == std::numeric_limits<T>::max());
76  }
77  static void clearConstruct(T *p) { clear(*p); }
78 
79  static const bool clearNeedsDestruction = false;
80 };
81 
82 template<> struct DefaultClearer< int8_t > : public NumericClearer< int8_t > {};
83 template<> struct DefaultClearer<uint8_t > : public NumericClearer<uint8_t > {};
84 template<> struct DefaultClearer< int16_t> : public NumericClearer< int16_t> {};
85 template<> struct DefaultClearer<uint16_t> : public NumericClearer<uint16_t> {};
86 template<> struct DefaultClearer< int32_t> : public NumericClearer< int32_t> {};
87 template<> struct DefaultClearer<uint32_t> : public NumericClearer<uint32_t> {};
88 template<> struct DefaultClearer< int64_t> : public NumericClearer< int64_t> {};
89 template<> struct DefaultClearer<uint64_t> : public NumericClearer<uint64_t> {};
90 template<> struct DefaultClearer<float> : public NumericClearer<float> {};
91 template<> struct DefaultClearer<double> : public NumericClearer<double> {};
92 template<> struct DefaultClearer<long double> : public NumericClearer<long double> {};
93 #ifdef MBSD
94 // On macOS, size_t is "unsigned long" which matches none of the above
95 template<> struct DefaultClearer<size_t> : public NumericClearer<size_t> {};
96 #endif
97 
98 /// NOTE: It may seem silly to have bool here, but things like
99 /// std::pair<T,bool> should work automatically, so they need some
100 /// default behaviour for bool.
101 template<>
103 {
104  static void clear(bool&v) { v = false; }
105  static bool isClear(bool v) { return !v; }
106  static void clearConstruct(bool *p) { clear(*p); }
107 
108  static const bool clearNeedsDestruction = false;
109 };
110 
111 template<typename S0,typename S1>
112 struct DefaultClearer<std::pair<S0,S1> >
113 {
114  static void clear(std::pair<S0,S1> &v)
115  {
116  DefaultClearer<S0>::clear(v.first);
117  DefaultClearer<S1>::clear(v.second);
118  }
119  static bool isClear(const std::pair<S0,S1> &v)
120  {
121  return DefaultClearer<S0>::isClear(v.first) && DefaultClearer<S1>::isClear(v.second);
122  }
123  static void clearConstruct(std::pair<S0,S1>*p) {
126  }
128 };
129 
130 namespace detail
131 {
132  template <class T>
134 
135  template <class C, class T>
136  struct member_type_helper<T C::*> { using type = T; };
137 
138  template <class T>
139  struct member_type
141  typename std::remove_cv_t<std::remove_reference_t<T>>>
142  {
143  };
144 }
145 
146 template <class T>
148 
149 /// Sometimes objects are very simple and dont require anything special. To aid
150 /// in making UT_ArraySet use more streamlined this macro defines a very basic
151 /// clearer that can be used for simple objects. Anything further requires the
152 /// implementor to define a custom clearer.
153 #define UT_DEFINE_CLEARER_ON_MEMBER(_cls_, _mem_ptr_, _clear_flag_) \
154  namespace UT \
155  { \
156  template <> \
157  struct DefaultClearer<_cls_> \
158  { \
159  using member_t = member_type_t<decltype(_mem_ptr_)>; \
160  using C = _cls_; \
161  \
162  static void clear(C &v) \
163  { \
164  member_t &mem = member(v); \
165  UT::DefaultClearer<member_t>::clear(mem); \
166  } \
167  \
168  static bool isClear(const C &v) \
169  { \
170  member_t &mem = member(SYSconst_cast(v)); \
171  return UT::DefaultClearer<member_t>::isClear(mem); \
172  } \
173  \
174  static void clearConstruct(C *p) \
175  { \
176  new ((void *)p) C(_clear_flag_); \
177  } \
178  \
179  static const bool clearNeedsDestruction = true; \
180  \
181  static member_t &member(C &v) \
182  { \
183  member_t C::*mem_ptr = _mem_ptr_; \
184  C *ptr_v = &v; \
185  return (ptr_v->*mem_ptr); \
186  } \
187  }; \
188  }
189 
190 /// This is close to a drop-in replacement for std::unordered_set,
191 /// except that it uses a single array of items and empty spaces
192 /// marked with a dedicated "clear" value.
193 /// It also has a fixed maximum load factor, and doesn't store a
194 /// hasher, comparator, or allocator object as member data,
195 /// avoiding unnecessary overhead, but these differences introduce
196 /// some interface incompatibilities.
197 ///
198 /// The incompatibility that will probably be encountered most often
199 /// is the lack of time guarantees. This structure is more efficient in
200 /// many common cases, due to better memory coherency and fewer
201 /// memory allocations, but cannot guarantee as good time scaling in
202 /// worst or even average case as std::unordered_set guarantees for most
203 /// functions.
204 ///
205 /// Because there is only space for one item in each bucket, if a collision
206 /// is hit on insertion, the structure will scan forward until a "clear"
207 /// bucket is found. However, items in a contiguous block will always
208 /// be sorted in the order of their ideal bucket numbers.
209 /// If the end of the array is reached without finding a clear bucket,
210 /// instead of the obvious approach of wrapping to the beginning of the
211 /// array, the block will be shifted backward, so some items in this
212 /// block at the end may be earlier than their ideal bucket number.
213 /// This is about as complicated as wrapping, or not sorting, but avoids
214 /// an awkward issue when erasing while iterating where an item might get
215 /// visited multiple times due to the wrapping, or items might be missed
216 /// due to the lack of order. Even still, unlike
217 /// std::unordered_set, erase may invalidate other iterators and references.
218 /// Also, insert may invalidate other iterators and references.
219 template<
220  typename Key,
221  bool MULTI=false,
222  std::size_t MAX_LOAD_FACTOR_256=128,
223  typename Clearer=DefaultClearer<Key>,
224  class Hash=hboost::hash<Key>,
225  class KeyEqual=std::equal_to<Key>
226 >
227 class ArraySet
228 {
229 public:
231  typedef Key key_type;
232  typedef Key value_type;
233  typedef std::size_t size_type;
234  typedef std::ptrdiff_t difference_type;
235  typedef Hash hasher;
236  typedef KeyEqual key_equal;
238  typedef const value_type& const_reference;
239  typedef value_type *pointer;
240  typedef const value_type *const_pointer;
241  typedef Clearer clearer_type;
242 
244  : myBuckets(nullptr)
245  , myNumBuckets(0)
246  {
247  UT_ASSERT_COMPILETIME(MAX_LOAD_FACTOR_256 <= 256);
248  }
249  explicit ArraySet(size_type init_bucket_count)
250  : myBuckets(nullptr)
251  , myNumBuckets(0)
252  {
253  if (init_bucket_count != 0)
254  setNumBuckets(init_bucket_count);
255  }
256 
257  /// Move constructor, destroying the source.
259  : myBuckets(that.myBuckets)
260  , myNumBuckets(that.myNumBuckets)
261  {
262  that.myBuckets = nullptr;
263  that.myNumBuckets = 0;
264  }
265 
266  /// Copy constructor.
267  /// Avoid accidental copying by making the constructor explicit.
268  explicit ArraySet(const set_type &that)
269  : myBuckets(nullptr)
270  , myNumBuckets(0)
271  {
272  *this = that;
273  }
274 
275  /// Inserts all of the items in the range [start_input,end_input).
276  template<typename INPUT_ITERATOR>
277  ArraySet(INPUT_ITERATOR start_input, INPUT_ITERATOR end_input, size_type init_bucket_count = 0)
278  : myBuckets(nullptr)
279  , myNumBuckets(0)
280  {
281  // Preallocate enough buckets, instead of waiting for
282  // insert to potentially hit collisions.
283  size_type ninput_items = std::distance(start_input, end_input);
284  if (ninput_items == 0)
285  return;
286  setNumBuckets(SYSmax(init_bucket_count, minBuckets(ninput_items)));
287 
288  // Now, just insert them all, (no need to realloc)
289  for (; start_input != end_input; ++start_input)
290  {
291  pointer pdest;
292  bool success = insertHelper<!MULTI,false>(myBuckets,myNumBuckets,*start_input,pdest);
293  if (success)
294  *pdest = *start_input;
295  else
296  --ninput_items;
297  }
298  *((size_type*)(myBuckets+myNumBuckets)) = ninput_items;
299  }
300  /// Constructs a set from an initializer list, with an optional
301  /// bucket count, e.g.:
302  /// UT::ArraySet<int> some_set({5,123,500}, 20);
303  ArraySet(std::initializer_list<value_type> init, size_type init_bucket_count = 0)
304  : myBuckets(nullptr)
305  , myNumBuckets(0)
306  {
308  if (init_bucket_count == 0 || init_bucket_count < init.size()) {
309  bucket_count = minBuckets(init.size());
310  }
311  else {
312  bucket_count = init_bucket_count;
313  }
314  setNumBuckets(bucket_count);
315  for (auto &&p = init.begin(); p != init.end(); ++p) {
316  insert(*p);
317  }
318  }
319 
321  {
322  if (this == &that)
323  return *this;
324 
325  clear();
327 
328  if (!myBuckets)
329  return *this;
330 
331  size_type size = that.size();
332  *((size_type*)(myBuckets+myNumBuckets)) = size;
333 
334  if (size)
335  {
336  // Now, just copy them all, (no need to realloc)
337  const_pointer srcp = that.myBuckets;
339  for (pointer destp = myBuckets; destp != pend; ++destp, ++srcp)
340  {
341  *destp = *srcp;
342  }
343  }
344  return *this;
345  }
346 
347  set_type &operator=(std::initializer_list<value_type> init)
348  {
349  clear();
350  insert(init.begin(), init.end());
351  return *this;
352  }
353 
355  {
356  destroy();
357  myBuckets = that.myBuckets;
358  myNumBuckets = that.myNumBuckets;
359  that.myBuckets = nullptr;
360  that.myNumBuckets = 0;
361  return *this;
362  }
363 
364  bool operator==(const set_type &that) const
365  {
366  if (size() != that.size())
367  return false;
368 
369  // Check if one has no allocation (both empty)
370  if (!myBuckets || !that.myBuckets)
371  return true;
372 
373  const const_pointer pend = myBuckets + myNumBuckets;
374  for (const_pointer p = myBuckets; p != pend; ++p)
375  {
376  if (Clearer::isClear(*p))
377  continue;
378  // NOTE: Don't just use that.contains(*p), in case it doesn't check the whole item contents!
379  const_iterator it = that.find(*p);
380  if (it.atEnd())
381  return false;
382  if (*p != *it)
383  return false;
384  }
385  return true;
386  }
387  bool operator!=(const set_type &that) const
388  {
389  return !(*this == that);
390  }
391 
392  /// Swaps another set with this one
393  void swap(set_type &that)
394  {
395  pointer temp_buckets = that.myBuckets;
396  size_type temp_numbuckets = that.myNumBuckets;
397  that.myBuckets = myBuckets;
398  that.myNumBuckets = myNumBuckets;
399  myBuckets = temp_buckets;
400  myNumBuckets = temp_numbuckets;
401  }
402 
404  {
405  destroy();
406  }
407 
408  /// Returns true iff there are no items in the set
409  bool empty() const
410  {
411  return !myBuckets || (*((const size_type*)(myBuckets+myNumBuckets)) == 0);
412  }
413 
414  /// Returns the number of items in the set
415  size_type size() const
416  {
417  return myBuckets ? *(const size_type*)(myBuckets+myNumBuckets) : 0;
418  }
419 
420  /// This only exists because the standard requires it.
421  /// The only hard limit is what memory will allow.
423  {
425  }
426 
427  /// Removes all items from the set, but does not free the memory.
428  /// Call destroy() to free all the memory.
429  void clear()
430  {
431  if (!myBuckets)
432  return;
433  pointer p = myBuckets;
434  pointer pend = p+myNumBuckets;
435  if (*((const size_type*)pend) == 0)
436  return;
437  while (p != pend)
438  {
439  Clearer::clear(*p);
440  ++p;
441  }
442  *((size_type*)pend) = 0;
443  }
444 
445  /// Removes all items from the set and frees all the memory.
446  void destroy()
447  {
448  if (!myBuckets)
449  return;
450  if (!std::is_trivially_destructible<value_type>::value && (Clearer::clearNeedsDestruction || !empty()))
451  {
452  pointer p = myBuckets;
453  const_pointer pend = p+myNumBuckets;
454  while (p != pend)
455  {
456  // I'm assuming that in some cases, it's faster to check isClear than to call the destructor,
457  // even if it's only because of a better chance of inlining.
458  if (Clearer::clearNeedsDestruction || !Clearer::isClear(*p))
459  p->~value_type();
460  ++p;
461  }
462  }
463  free(myBuckets);
464  myBuckets = nullptr;
465  myNumBuckets = 0;
466  }
467 
468  /// Returns the current number of buckets.
470  {
471  return myNumBuckets;
472  }
473 
474  /// This only exists because the standard requires it.
475  /// The only hard limit is what memory will allow.
477  {
479  }
480 
481  /// This only exists because the standard requires it.
482  /// This function doesn't really make sense for this implementation,
483  /// but it returns 1 if the bucket is occupied, and 0 if it is not.
485  {
486  if (i >= myNumBuckets) {
487  return 0;
488  }
489  if (Clearer::isClear(myBuckets[i])) {
490  return 0;
491  }
492  return 1;
493  }
494 
495  /// Returns the portion of buckets that need to be occupied before
496  /// reallocation occurs.
497  /// Unlike in the standard, the maximum load factor is a constant
498  /// in this implementation, so max_load_factor() is a static function.
499  static float max_load_factor()
500  {
501  return float(MAX_LOAD_FACTOR_256)/256;
502  }
503 
504  /// Returns the current portion of buckets that are occupied.
505  float load_factor() const
506  {
507  return float(size())/float(myNumBuckets);
508  }
509 
510  /// Sets the number of buckets to larger of new_num_buckets and the
511  /// required minimum number of buckets for size() items, unless
512  /// that number is the same as the current number of buckets.
513  void rehash(size_type new_num_buckets)
514  {
515  size_type min_buckets = minBuckets(size());
516  if (new_num_buckets < min_buckets) {
517  new_num_buckets = min_buckets;
518  }
519  if (myNumBuckets == new_num_buckets) {
520  return;
521  }
522  setNumBuckets(min_buckets);
523  }
524 
525  /// Sets the number of buckets to the required minimum number of buckets
526  /// for the larger of num_items and size().
528  {
529  size_type min_buckets = minBuckets(SYSmax(size(),num_items));
530  setNumBuckets(min_buckets);
531  }
532 
533  /// Sets the number of buckets to new_num_buckets, regardless of the
534  /// required minimum number of buckets for size() items, though
535  /// you probably shouldn't be calling it with num_new_buckets < size(),
536  /// except that if num_new_buclets==0, this calls destroy().
537  void setNumBuckets(size_type new_num_buckets)
538  {
539  if (new_num_buckets == myNumBuckets)
540  return;
541  if (new_num_buckets == 0)
542  {
543  // Assume that the caller wants to delete everything
544  destroy();
545  return;
546  }
547 
548  size_type old_size = size();
549  if (new_num_buckets < old_size)
550  {
551  UT_ASSERT_MSG(0,"What does the caller expect to happen when setting the number of buckets lower than the size, but not to zero?\nThis class doesn't support multiple items per bucket.");
552  new_num_buckets = old_size;
553  }
554 
555  // NOTE: I'm assuming that the caller, in explicitly calling
556  // setNumBuckets, doesn't want to be limited by MAX_LOAD_FACTOR_256,
557  // so don't check against it.
558 
559  pointer new_buckets = (pointer)malloc(new_num_buckets*sizeof(value_type) + sizeof(size_type));
560  pointer new_end = new_buckets + new_num_buckets;
561  for (pointer p = new_buckets; p < new_end; ++p)
562  {
563  Clearer::clearConstruct(p);
564  }
565 
566  if (old_size)
567  {
568  // Move items over and clear them from myBuckets
570  for (pointer srcp = myBuckets; srcp < pend; ++srcp)
571  {
572  if (!Clearer::isClear(*srcp))
573  {
574  value_type v;
575  Clearer::clear(v);
576  // Need to make sure that *p is set to clear,
577  // as opposed to whatever std::move leaves it as,
578  // so swap with clear.
579  std::swap(*srcp, v);
580  pointer destp;
581  insertHelper<false,false>(new_buckets, new_num_buckets, v, destp);
582  *destp = std::move(v);
583  }
584  }
585 
586  // Temporarily set the size to 0, so that destroy() won't re-destruct them.
587  *((size_type*)pend) = 0;
588  }
589  // Don't worry; this will just destruct the current buckets
590  destroy();
591 
592  myBuckets = new_buckets;
593  myNumBuckets = new_num_buckets;
594  *((size_type*)new_end) = old_size;
595  }
596 
597  /// This increases the number of buckets, if needed, for
598  /// the specified number of items
599  void bumpNumBuckets(size_type new_num_items)
600  {
601  size_type num_buckets = minBuckets(new_num_items);
602  if (num_buckets <= myNumBuckets)
603  return;
604  // Although UT_Array normally bumps up by about 12.5% each time,
605  // and so does UTbumpAllocToPrime, to reduce significant rehashing
606  // overhead, we bump to at least twice the previous size.
607  num_buckets = SYSmax(num_buckets, 2*myNumBuckets);
608  num_buckets = UTbumpAllocToPrime(num_buckets);
609  setNumBuckets(num_buckets);
610  }
611 
612  /// Set iterator class
613  template<bool constant_type>
615  {
616  public:
617  typedef std::forward_iterator_tag iterator_category;
618  typedef std::ptrdiff_t difference_type;
622 
623  iterator_t() = default;
624  iterator_t(pointer current, const_pointer end, bool scan=true)
625  : myCurrent(current)
626  , myEnd(end)
627  {
628  UT_ASSERT_MSG_P(scan || current == end || !Clearer::isClear(*current), "An iterator can't point to a clear item!");
629  if (scan)
630  {
631  while (myCurrent != myEnd && Clearer::isClear(*myCurrent))
632  {
633  ++myCurrent;
634  }
635  }
636  }
637  iterator_t(const iterator_t &) = default;
638  iterator_t(iterator_t &&) = default;
639  iterator_t &operator=(const iterator_t &) = default;
640  iterator_t &operator=(iterator_t &&) = default;
641  bool operator==(const iterator_t &that) const
642  {
643  UT_ASSERT_MSG_P(myEnd == that.myEnd, "Probably unintentional: Comparing against iterator from different set or iterator invalidated by reallocation!");
644  return (myCurrent == that.myCurrent);
645  }
646  bool operator!=(const iterator_t &that) const
647  {
648  UT_ASSERT_MSG_P(myEnd == that.myEnd, "Probably unintentional: Comparing against iterator from different set or iterator invalidated by reallocation!");
649  return (myCurrent != that.myCurrent);
650  }
651  bool atEnd() const
652  {
653  return myCurrent == myEnd;
654  }
656  {
657  UT_ASSERT_MSG_P(myCurrent < myEnd, "Incrementing end iterator!");
658  if (myCurrent == myEnd) {
659  return *this;
660  }
661  do {
662  ++myCurrent;
663  } while (myCurrent != myEnd && Clearer::isClear(*myCurrent));
664  return *this;
665  }
666 
667  // This is intentionally not implemented to force its non-use but we
668  // keep it here just in case there's weird C++ rules on iterator
669  // requirements
671 
673  {
674  UT_ASSERT_MSG_P(myCurrent < myEnd, "Dereferencing end iterator!");
675  return *myCurrent;
676  }
678  {
679  UT_ASSERT_MSG_P(myCurrent < myEnd, "Dereferencing end iterator!");
680  return myCurrent;
681  }
682  operator pointer() const
683  {
684  UT_ASSERT_MSG_P(myCurrent <= myEnd, "Dereferencing iterator past end!");
685  return myCurrent;
686  }
688  {
689  UT_ASSERT_MSG_P(myCurrent <= myEnd, "Dereferencing iterator past end!");
690  return myCurrent;
691  }
693  {
694  return myEnd;
695  }
696  private:
697  friend class iterator_t<!constant_type>;
698 
699  pointer myCurrent;
700  const_pointer myEnd;
701  };
702 
703  /// Iterator type for iterating over non-constant elements.
704  typedef iterator_t<false> iterator;
705 
706  /// Iterator type for iterating over constant elements.
707  /// It may be useful to convert an iterator to a const_iterator,
708  /// so making this a subclass lets us add a constructor for that.
709  class const_iterator : public iterator_t<true>
710  {
711  public:
713 
714  const_iterator() = default;
715  const_iterator(const iterator &non_const_it)
716  : iterator_t<true>(non_const_it.getCurrent(), non_const_it.getEnd(), false)
717  {}
718  };
719 
720  /// Returns a non-const iterator for the beginning of the set.
722  {
724  }
725  /// Returns a const iterator for the beginning of the set.
726  const_iterator cbegin() const
727  {
728  return const_iterator(myBuckets,myBuckets+myNumBuckets,true);
729  }
730  /// Returns a const iterator for the beginning of the set.
731  const_iterator begin() const
732  {
733  return const_iterator(myBuckets,myBuckets+myNumBuckets,true);
734  }
735  /// Returns a non-const end iterator for the set.
737  {
739  return iterator(pend,pend,false);
740  }
741  /// Returns a const end iterator for the set.
742  const_iterator cend() const
743  {
745  return const_iterator(pend,pend,false);
746  }
747  /// Returns a const end iterator for the set.
748  const_iterator end() const
749  {
751  return const_iterator(pend,pend,false);
752  }
753 
754  /// Returns an iterator to the first item matching key,
755  /// or an end iterator if no items match key.
756  /// @{
757  iterator find(const Key &key)
758  {
759  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
760 
761  const pointer pstart = myBuckets;
762  if (!pstart)
763  return iterator(nullptr,nullptr,false);
764 
765  const pointer pend = pstart + myNumBuckets;
766 
767  const pointer search_start = searchStart(key);
768  for (pointer p = search_start; p != pend; ++p)
769  {
770  if (key_equal()(key,*p))
771  return iterator(p,pend,false);
772  if (Clearer::isClear(*p))
773  return iterator(pend,pend,false);
774  }
775  // Hit the end, so search back from before the start of the block.
776  for (pointer p = search_start-1; p >= pstart; --p)
777  {
778  if (key_equal()(key,*p))
779  return iterator(p,pend,false);
780  if (Clearer::isClear(*p))
781  return iterator(pend,pend,false);
782  }
783  return iterator(pend,pend,false);
784  }
785  const_iterator find(const Key &key) const
786  {
787  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
788 
789  const const_pointer pstart = myBuckets;
790  if (!pstart)
791  return const_iterator(nullptr,nullptr,false);
792 
793  const const_pointer pend = pstart + myNumBuckets;
794 
795  const_pointer search_start = searchStart(key);
796  for (const_pointer p = search_start; p != pend; ++p)
797  {
798  if (key_equal()(key,*p))
799  return const_iterator(p,pend,false);
800  if (Clearer::isClear(*p))
801  return const_iterator(pend,pend,false);
802  }
803  // Hit the end, so search back from before the start of the block.
804  for (const_pointer p = search_start-1; p >= pstart; --p)
805  {
806  if (key_equal()(key,*p))
807  return const_iterator(p,pend,false);
808  if (Clearer::isClear(*p))
809  return const_iterator(pend,pend,false);
810  }
811  return const_iterator(pend,pend,false);
812  }
813  /// @}
814 
815  /// Returns the number of entries matching key.
816  /// If MULTI is false, this will only return either 0 or 1.
817  size_type count(const Key &key) const
818  {
819  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
820 
821  if (!myBuckets)
822  return 0;
823 
824  const const_pointer pstart = myBuckets;
825  const const_pointer pend = pstart + myNumBuckets;
826  const_pointer search_start = searchStart(key);
827  size_type count = 0;
828  for (const_pointer p = search_start; p != pend; ++p)
829  {
830  if (key_equal()(key,*p))
831  {
832  if (!MULTI)
833  return 1;
834  ++count;
835  }
836  else if (Clearer::isClear(*p))
837  return MULTI ? count : 0;
838  }
839  // Hit the end, so search back from before the start of the block.
840  for (const_pointer p = search_start-1; p >= pstart; --p)
841  {
842  if (key_equal()(key,*p))
843  {
844  if (!MULTI)
845  return 1;
846  ++count;
847  }
848  else if (Clearer::isClear(*p))
849  return MULTI ? count : 0;
850  }
851  return MULTI ? count : 0;
852  }
853 
854  /// Returns true iff the set contains the given key.
855  /// This should be faster than count() if MULTI is true.
856  bool contains(const Key &key) const
857  {
858  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
859 
860  if (!myBuckets)
861  return false;
862 
863  const const_pointer pstart = myBuckets;
864  const const_pointer pend = pstart + myNumBuckets;
865  const_pointer search_start = searchStart(key);
866  for (const_pointer p = search_start; p != pend; ++p)
867  {
868  if (key_equal()(key,*p))
869  return true;
870  if (Clearer::isClear(*p))
871  return false;
872  }
873  // Hit the end, so search back from before the start of the block.
874  for (const_pointer p = search_start-1; p >= pstart; --p)
875  {
876  if (key_equal()(key,*p))
877  return true;
878  if (Clearer::isClear(*p))
879  return false;
880  }
881  return false;
882  }
883 
884  template <typename S>
885  bool containsAll(const S &src) const
886  {
887  return std::all_of(std::begin(src), std::end(src),
888  [this](const auto v) { return contains(v); });
889  }
890 
891  /// Returns a pair of iterators representing the range of values matching
892  /// key, as [first,second).
893  /// @{
894  std::pair<const_iterator,const_iterator> equal_range(const Key &key) const
895  {
896  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
897 
898  const const_pointer pstart = myBuckets;
899  const const_pointer pend = pstart + myNumBuckets;
900  if (!pstart)
901  return std::make_pair(const_iterator(nullptr,nullptr,false), const_iterator(nullptr,nullptr,false));
902 
903  const const_pointer search_start = searchStart(key);
904  if (!MULTI)
905  {
906  for (const_pointer p = search_start; p != pend; ++p)
907  {
908  if (key_equal()(key,*p))
909  return std::make_pair(const_iterator(p,pend,false), const_iterator(p+1,pend,true));
910  if (Clearer::isClear(*p))
911  return std::make_pair(const_iterator(pend,pend,false), const_iterator(pend,pend,false));
912  }
913  // Hit the end, so search back from before the start of the block.
914  for (const_pointer p = search_start-1; p >= pstart; --p)
915  {
916  if (key_equal()(key,*p))
917  return std::make_pair(const_iterator(p,pend,false), const_iterator(p+1,pend,true));
918  if (Clearer::isClear(*p))
919  return std::make_pair(const_iterator(pend,pend,false), const_iterator(pend,pend,false));
920  }
921  return std::make_pair(const_iterator(pend,pend,false), const_iterator(pend,pend,false));
922  }
923 
924  // MULTI is true, so we might have multiple matches
925  const_pointer first_found = nullptr;
926  const_pointer end_found = pend;
927  for (const_pointer p = search_start; p != pend; ++p)
928  {
929  if (key_equal()(key,*p))
930  {
931  if (!first_found)
932  first_found = p;
933  }
934  else if (first_found)
935  {
936  if (first_found != search_start)
937  return std::make_pair(const_iterator(first_found,pend,false), const_iterator(p,pend,true));
938 
939  end_found = p;
940  break;
941  }
942  else if (Clearer::isClear(*p))
943  {
944  // NOTE: first_found must be false
945  return std::make_pair(const_iterator(pend,pend,false), const_iterator(pend,pend,false));
946  }
947  }
948 
949  // Hit the end, so search back from before the start of the block.
950  for (const_pointer p = search_start-1; p >= pstart; --p)
951  {
952  if (!key_equal()(key,*p))
953  {
954  return std::make_pair(const_iterator(p+1,pend,false), const_iterator(end_found,pend,true));
955  }
956  }
957  return std::make_pair(const_iterator(pstart,pend,false), const_iterator(end_found,pend,true));
958  }
959  std::pair<iterator,iterator> equal_range(const Key &key)
960  {
961  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
962 
963  const pointer pstart = myBuckets;
964  const pointer pend = pstart + myNumBuckets;
965  if (!pstart)
966  return std::make_pair(iterator(nullptr,nullptr,false), iterator(nullptr,nullptr,false));
967 
968  const pointer search_start = searchStart(key);
969  if (!MULTI)
970  {
971  for (pointer p = search_start; p != pend; ++p)
972  {
973  if (key_equal()(key,*p))
974  return std::make_pair(iterator(p,pend,false), iterator(p+1,pend,true));
975  if (Clearer::isClear(*p))
976  return std::make_pair(iterator(pend,pend,false), iterator(pend,pend,false));
977  }
978  // Hit the end, so search back from before the start of the block.
979  for (pointer p = search_start-1; p >= pstart; --p)
980  {
981  if (key_equal()(key,*p))
982  return std::make_pair(iterator(p,pend,false), iterator(p+1,pend,true));
983  if (Clearer::isClear(*p))
984  return std::make_pair(iterator(pend,pend,false), iterator(pend,pend,false));
985  }
986  return std::make_pair(iterator(pend,pend,false), iterator(pend,pend,false));
987  }
988 
989  // MULTI is true, so we might have multiple matches
990  pointer first_found = nullptr;
991  pointer end_found = pend;
992  for (pointer p = search_start; p != pend; ++p)
993  {
994  if (key_equal()(key,*p))
995  {
996  if (!first_found)
997  first_found = p;
998  }
999  else if (first_found)
1000  {
1001  if (first_found != search_start)
1002  return std::make_pair(iterator(first_found,pend,false), iterator(p,pend,true));
1003 
1004  end_found = p;
1005  break;
1006  }
1007  else if (Clearer::isClear(*p))
1008  {
1009  // NOTE: first_found must be false
1010  return std::make_pair(iterator(pend,pend,false), iterator(pend,pend,false));
1011  }
1012  }
1013 
1014  // Hit the end, so search back from before the start of the block.
1015  for (pointer p = search_start-1; p >= pstart; --p)
1016  {
1017  if (!key_equal()(key,*p))
1018  {
1019  return std::make_pair(iterator(p+1,pend,false), iterator(end_found,pend,true));
1020  }
1021  }
1022  return std::make_pair(iterator(pstart,pend,false), iterator(end_found,pend,true));
1023  }
1024  /// @}
1025 
1026  /// Inserts the given value into the set, unless MULTI is false and
1027  /// there's already an item for which key_equal()(value,other)
1028  /// returns true. Returns a pair of iterator to the inserted value
1029  /// (or the existing item if not inserted), and a bool that's true
1030  /// iff the item was inserted.
1031  /// @{
1032  std::pair<iterator, bool> insert(const value_type &value)
1033  {
1034  pointer p;
1035  bool success = insertHelper(myBuckets, myNumBuckets, value, p);
1036  if (success)
1037  {
1038  *p = value;
1039  ++*((size_type*)(myBuckets+myNumBuckets));
1040  }
1041  return std::make_pair(iterator(p,myBuckets+myNumBuckets,false),success);
1042  }
1043  std::pair<iterator, bool> insert(value_type &&value)
1044  {
1045  pointer p;
1046  bool success = insertHelper(myBuckets, myNumBuckets, value, p);
1047  if (success)
1048  {
1049  *p = std::move(value);
1050  ++*((size_type*)(myBuckets+myNumBuckets));
1051  }
1052  return std::make_pair(iterator(p,myBuckets+myNumBuckets,false),success);
1053  }
1054  /// @}
1055 
1056  /// Inserts all of the items in the range [start_input,end_input).
1057  template<typename INPUT_ITERATOR>
1058  void insert(INPUT_ITERATOR start_input, INPUT_ITERATOR end_input)
1059  {
1060  // Preallocate enough buckets, instead of waiting for
1061  // insert to potentially hit collisions.
1062  size_type ninput_items = std::distance(start_input, end_input);
1063  if (!ninput_items)
1064  return;
1065 
1066  size_type new_size = size() + ninput_items;
1067  if (myNumBuckets < minBuckets(new_size))
1068  bumpNumBuckets(new_size);
1069 
1070  // Now, just insert them all, (no need to realloc)
1071  for (; start_input != end_input; ++start_input)
1072  {
1073  pointer pdest;
1074  bool success = insertHelper<!MULTI,false>(myBuckets,myNumBuckets,*start_input,pdest);
1075  if (success)
1076  *pdest = *start_input;
1077  else
1078  --new_size;
1079 
1080  }
1081  *((size_type*)(myBuckets+myNumBuckets)) = new_size;
1082  }
1083  /// Inserts all of the items from the initializer_list.
1084  void insert(std::initializer_list<value_type> list)
1085  {
1086  // Just delegate to the iterator range insert function.
1087  insert(list.begin(),list.end());
1088  }
1089 
1090  /// Inserts an element constructed with the given arguments.
1091  /// Maybe the standard implementation can somehow get a speedup
1092  /// from this, but we need the item's hash before inserting,
1093  /// so it needs to be constructed first.
1094  template <typename... Args>
1095  std::pair<iterator,bool> emplace(Args&&... args)
1096  {
1097  return insert(value_type(std::forward<Args>(args)...));
1098  }
1099 
1100  /// Removes the item at the specified iterator location,
1101  /// returning an iterator to the next item.
1102  ///
1103  /// Unlike std::unordered_set::erase(const_iterator), this may
1104  /// invalidate iterators and references to other items in the
1105  /// map, if they are in a contiguous block of non-empty items
1106  /// with the item that iter points to. Also, this only
1107  /// accepts an iterator, instead of a const_iterator.
1108  /// (Why would std::unordered_set::erase accept a const_iterator?)
1110  {
1111  const pointer pstart = myBuckets;
1112  const pointer pend = pstart + myNumBuckets;
1113 
1114  const pointer p = (pointer)iter;
1115 
1116  UT_ASSERT_MSG_P(p >= pstart && p < pend && !Clearer::isClear(*p), "An iterator to erase can't point to a clear item!");
1117 
1118  // Actually erase the item
1119  Clearer::clear(*p);
1120  size_type new_size = --*((size_type*)(myBuckets+myNumBuckets));
1121 
1122  if (new_size == 0)
1123  return iterator(pend,pend,false);
1124 
1125  // Check for the common, simple case, where there's nothing after p in
1126  // the block of possible conflicts, and p isn't at the end.
1127  pointer pnext = p+1;
1128  bool end_case = (pnext == pend);
1129  if (!end_case)
1130  {
1131  if (Clearer::isClear(*pnext))
1132  return iterator(pnext,pend,true);
1133 
1134  // Now, for the hard case... -_-
1135 
1136  // If we're not right at the end of the array already,
1137  // find the end of the block, and check if it's the end of the array.
1138  // pnext is already clear, else we'd have returned above.
1139  do
1140  {
1141  ++pnext;
1142  end_case = (pnext == pend);
1143  } while (!end_case && !Clearer::isClear(*pnext));
1144  }
1145 
1146  if (end_case && (p != pstart) && !Clearer::isClear(*(p-1)))
1147  {
1148  // If this is a block right at the end of the array, there may be earlier
1149  // items that are too early, in which case, we need to shift
1150  // everything after that down one, into the empty spot.
1151 
1152  // Find the start of the block.
1153  pointer pprev = p-1;
1154  while (pprev != pstart && !Clearer::isClear(*(pprev-1)))
1155  {
1156  --pprev;
1157  }
1158 
1159  // Find the earliest item that should go at or after
1160  // its current bucket.
1161  do
1162  {
1163  // Check where this item ideally belongs.
1164  size_type hash = hasher()(*pprev);
1165  size_type ideal_bucket_num = hash%myNumBuckets;
1166  if (ideal_bucket_num > (pprev-pstart))
1167  {
1168  // Shift everything later by one and return.
1169  for (pointer pdest = p; pdest != pprev; --pdest)
1170  {
1171  *pdest = std::move(*(pdest-1));
1172  }
1173  Clearer::clear(*pprev);
1174  return iterator(p+1,pend,true);
1175  }
1176 
1177  ++pprev;
1178  } while (pprev != p);
1179  }
1180 
1181  if (p+1 != pend)
1182  {
1183  // We're not at the end of the array, or there were no items before
1184  // this block to move into the empty space, but there is at least
1185  // one item after p, so shift items backward until there's one
1186  // that doesn't belong earlier than it is now.
1187 
1188  size_type empty_bucket_num = p-pstart;
1189 
1190  // NOTE: pnext points to the end of this block.
1191  for (pointer pdest = p; pdest+1 != pnext; ++pdest)
1192  {
1193  // Check where this item ideally belongs.
1194  size_type hash = hasher()(*(pdest+1));
1195  size_type ideal_bucket_num = hash%myNumBuckets;
1196  if (ideal_bucket_num > empty_bucket_num)
1197  {
1198  Clearer::clear(*pdest);
1199  return iterator(p+(pdest==p),pend,false);
1200  }
1201 
1202  *pdest = std::move(*(pdest+1));
1203  ++empty_bucket_num;
1204  }
1205  Clearer::clear(*(pnext-1));
1206  // p now contains an item that was previously after it.
1207  return iterator(p,pend,false);
1208  }
1209 
1210  // p is still empty, but there's an item (or the end pointer)
1211  // right after it.
1212  return iterator(p+1,pend,false);
1213  }
1214 
1215 #if 0 // This isn't fully implemented, so it's excluded to avoid anyone accidentally using it.
1216  /// Removes all items in the range [start_iter,end_iter),
1217  /// and returns an iterator immediately following that range.
1218  ///
1219  /// Unlike std::unordered_set::erase(const_iterator,const_iterator),
1220  /// this may invalidate iterators and references to other items in the
1221  /// map, if they are in a contiguous block of non-empty items
1222  /// with the items in the erased range. Also, this only
1223  /// accepts iterator's, instead of const_iterator's.
1224  /// (Why would std::unordered_set::erase accept a const_iterator?)
1225  iterator erase(iterator start_iter, iterator end_iter) {
1226  if (start_iter == end_iter)
1227  return end_iter;
1228 
1229  const pointer pstart = myBuckets;
1230  const const_pointer pend = pstart + myNumBuckets;
1231 
1232  const pointer prange_start = (pointer)start_iter;
1233  const pointer prange_end = (pointer)end_iter;
1234 
1235  UT_ASSERT_MSG_P(prange_start >= pstart && prange_start < prange_end && prange_end <= pend && !Clearer::isClear(*prange_start), "An iterator to erase can't point to a clear item!");
1236 
1237  // Actually erase the items
1238  size_type nremoved_items = 1;
1239  size_type nitems_in_last_block = 1;
1240  Clearer::clear(*prange_start);
1241  for (pointer p = prange_start+1; p != prange_end; ++p)
1242  {
1243  if (!Clearer::isClear(*p))
1244  {
1245  Clearer::clear(*p);
1246  ++nitems_in_last_block;
1247  ++nremoved_items;
1248  }
1249  else
1250  {
1251  nitems_in_last_block = 0;
1252  }
1253  }
1254 
1255  size_type new_size = (*((size_type*)(myBuckets+myNumBuckets)) -= nremoved_items);
1256 
1257  if (new_size == 0)
1258  {
1259  // This must be pointing to the end of the array.
1260  return end_iter;
1261  }
1262 
1263  pointer block_end = prange_end;
1264 
1265  if (prange_end != pend)
1266  {
1267  // NOTE: If prange_end is not pend, it is always occupied, because
1268  // it came from an iterator, which should be dereferenceable.
1269  // However, if there was a gap between the last removed item
1270  // and prange_end, we can still exit early.
1271  if (nitems_in_last_block == 0)
1272  return end_iter;
1273 
1274  // Find the end of the block
1275  do
1276  {
1277  ++block_end;
1278  } while (block_end != pend && !Clearer::isClear(*block_end));
1279  }
1280  else {
1281  // If end_iter is at the array end, and there's at least one empty
1282  // spot in the given range, we're done, because the last block
1283  // was completely eliminated.
1284  if (nitems_in_last_block != prange_end-prange_start)
1285  return end_iter;
1286  }
1287 
1288  UT_ASSERT_P(nitems_in_last_block > 0);
1289 
1290  // If the end of the block is the array end and
1291  // nitems_in_last_block == prange_end-prange_start (no gaps),
1292  // find the beginning of the block, and if there are any items that
1293  // belong in later buckets:
1294  // 1) shift everything after to the end of the clear block.
1295  // 2) move them backward to where they belong, as available.
1296  // 3) if there is still an item in the last cleared spot, return.
1297  if (block_end == pend && nitems_in_last_block == prange_end-prange_start && prange_start != pstart && !Clearer::isClear(*(prange_start-1)))
1298  {
1299  // Find the start of the block.
1300  pointer block_start = prange_start;
1301  do
1302  {
1303  --block_start;
1304  } while (block_start != pstart && !Clearer::isClear(*(block_start-1)));
1305 
1306  UT_ASSERT_MSG(0,"FIXME: Implement this!!!");
1307  }
1308 
1309  // Move items backward until there's one that doesn't
1310  // need to be moved backward.
1311  UT_ASSERT_MSG(0,"FIXME: Implement this!!!");
1312  return end_iter; // This is probably incorrect, but just to get it to compile
1313  }
1314 #endif
1315 
1316  /// Removes all items matching key and returns
1317  /// the number of items removed.
1318  size_type erase(const Key &key)
1319  {
1320  if (!MULTI)
1321  {
1322  iterator it = find(key);
1323  if (it.atEnd())
1324  return 0;
1325  erase(it);
1326  return 1;
1327  }
1328 
1329  // NOTE: This commented out code won't work, because the 2-argument erase isn't implemented.
1330  //std::pair<iterator,iterator> its = equal_range(key);
1331  //size_type count = std::distance(its.first, its.second);
1332  //erase(its.first, its.second);
1333 
1334  size_type count = 0;
1335  for (iterator it = find(key); !it.atEnd(); it = find(key))
1336  {
1337  erase(it);
1338  }
1339  return count;
1340  }
1341 
1342  /// NOTE: std::unordered_set::hash_function() isn't static,
1343  /// but here, we're not storing a hasher as
1344  /// a data member, so it's independent of the instance.
1346  {
1347  return hasher();
1348  }
1349 
1350  /// NOTE: std::unordered_set::key_eq() isn't static,
1351  /// but here, we're not storing a key_equal as
1352  /// a data member, so it's independent of the instance.
1354  {
1355  return key_equal();
1356  }
1357 
1358  /// Returns the amount of memory used by this, excluding any memory that might be
1359  /// allocated by any of the items, themselves.
1360  int64 getMemoryUsage(bool inclusive) const
1361  {
1362  int64 mem = inclusive ? sizeof(*this) : 0;
1363  mem += sizeof(value_type)*myNumBuckets;
1364  return mem;
1365  }
1366 
1367  template<typename FUNCTOR>
1369  void forEach(FUNCTOR &&functor) const
1370  {
1372  const const_pointer pend = p + myNumBuckets;
1373  for (; p != pend; ++p)
1374  {
1375  if (!Clearer::isClear(*p))
1376  functor(*p);
1377  }
1378  }
1379 
1380 protected:
1382  {
1383  // If there are only 0-2 elements, there might as well only be
1384  // that many buckets.
1385  if (size <= 2)
1386  return size;
1387 
1388  // Simpler case if MAX_LOAD_FACTOR_256 evenly divides 256
1389  if ((256%MAX_LOAD_FACTOR_256) == 0)
1390  // +1 because otherwise it's guaranteed to be composite.
1391  return size * (256/MAX_LOAD_FACTOR_256) + 1;
1392 
1393  return (size*256 + MAX_LOAD_FACTOR_256-1)/MAX_LOAD_FACTOR_256;
1394  }
1395  pointer searchStart(const Key &key)
1396  {
1397  const size_type hash = hasher()(key);
1398  return myBuckets + (hash%myNumBuckets);
1399  }
1400  const_pointer searchStart(const Key &key) const
1401  {
1402  const size_type hash = hasher()(key);
1403  return myBuckets + (hash%myNumBuckets);
1404  }
1405 
1406  template<bool fail_on_equal=!MULTI,bool check_realloc=true>
1407  bool insertHelper(pointer pstart, size_type nbuckets, const value_type &value,pointer &destp)
1408  {
1409  UT_ASSERT_P(!Clearer::isClear(value));
1410 
1411  if (check_realloc && nbuckets == 0)
1412  {
1413  setNumBuckets(1);
1414  destp = myBuckets;
1415  return true;
1416  }
1417 
1418  const pointer pend = pstart + nbuckets;
1419  const size_type hash = hasher()(value);
1420  const size_type ideal_bucket = (hash%nbuckets);
1421  pointer init_searchp = pstart + ideal_bucket;
1422  pointer searchp = init_searchp;
1423 
1424  if (Clearer::isClear(*searchp))
1425  {
1426  // Don't bother checking MAX_LOAD_FACTOR_256 if exact match is clear,
1427  // since there's no harm in perfect hashes... unless it fills up and
1428  // people do a lot of querying keys that aren't in the set.
1429  // If that ends up being a bottleneck, feel free to change this.
1430  destp = searchp;
1431  return true;
1432  }
1433 
1434  if (fail_on_equal && key_equal()(*searchp,value))
1435  {
1436  destp = searchp;
1437  return false;
1438  }
1439 
1440  if (!fail_on_equal && check_realloc)
1441  {
1442  // We're always adding one.
1443  size_type new_size = size() + 1;
1444  if (nbuckets < minBuckets(new_size)) {
1445  bumpNumBuckets(new_size);
1446  // Delegate to version that doesn't check for realloc.
1447  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1448  }
1449  // We're not reallocating in this case.
1450  }
1451 
1452  // Find the place where our ideal bucket is less than the next
1453  // item's ideal bucket, i.e. the end of where collisions on ideal_bucket go,
1454  // in case there are multiple collision spans that have written past ideal_bucket.
1455  if ((hasher()(*searchp)%nbuckets) <= ideal_bucket)
1456  {
1457  while (true)
1458  {
1459  ++searchp;
1460  if (searchp == pend || Clearer::isClear(*searchp))
1461  break;
1462 
1463  const size_type other_hash = hasher()(*searchp);
1464  if (fail_on_equal && other_hash==hash && key_equal()(*searchp,value))
1465  {
1466  destp = searchp;
1467  return false;
1468  }
1469  if (ideal_bucket < (other_hash%nbuckets))
1470  break;
1471  }
1472  }
1473  // Known:
1474  // [init_searchp, searchp] does not contain value.
1475  // searchp is one of:
1476  // Clear
1477  // pend
1478  // idealbucket(searchp) > idealbucket(value)
1479 
1480  pointer insertp = searchp;
1481 
1482  // Find the end of the block of present items, all of
1483  // which should have idealbuckets greater than us.
1484  while (searchp != pend && !Clearer::isClear(*searchp))
1485  {
1486  ++searchp;
1487  }
1488 
1489  if (searchp != pend)
1490  {
1491  // Common case: block end is not the array end.
1492  // Implies searchp is Clear.
1493  // Since we didn't hit the end, this occupied block
1494  // was never bumped back, so we don't have to look
1495  // back for matches or proper ideal bucket span.
1496  // We can easily make room by moving everything in
1497  // [insertp, searchp) -> [insertp+1, searchp+1)
1498  // so we can write to [insertp].
1499 
1500  if (fail_on_equal && check_realloc)
1501  {
1502  // We're always adding one.
1503  size_type new_size = *((const size_type*)pend) + 1;
1504  if (nbuckets < minBuckets(new_size))
1505  {
1506  bumpNumBuckets(new_size);
1507  // Delegate to version that doesn't check for realloc.
1508  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1509  }
1510  // We're not reallocating in this case.
1511  }
1512 
1513  // Move items later by one.
1514  // std::move [insertp, searchp) to [insertp+1, searchp+1)
1515  while (searchp != insertp)
1516  {
1517  *searchp = std::move(*(searchp-1));
1518  --searchp;
1519  }
1520  destp = insertp;
1521  return true;
1522  }
1523 
1524  // Less common case: block end is the array end.
1525  // insertp may actually need to go earlier because we may
1526  // not be pointing to the end of our ideal block span.
1527  // We want insertp to refer to a location that we can update
1528  // with value and preserve the ordering of idealbuckets.
1529  if (insertp == init_searchp)
1530  {
1531  // There should always be a space somewhere if !fail_on_equal,
1532  // because reallocation was handled above.
1533  UT_ASSERT_P(insertp-1 >= pstart || (fail_on_equal && insertp == pstart));
1534  while ((!fail_on_equal || insertp != pstart) && !Clearer::isClear(*(insertp-1)))
1535  {
1536  const size_type other_hash = hasher()(*(insertp-1));
1537  if (fail_on_equal && other_hash == hash && key_equal()(*(insertp-1),value))
1538  {
1539  destp = insertp-1;
1540  return false;
1541  }
1542 
1543  // Exiting at <= rather than < means that we may be in
1544  // middle of an ideal block span.
1545  if ((other_hash%nbuckets) <= ideal_bucket)
1546  break;
1547  --insertp;
1548  UT_ASSERT_P(insertp-1 >= pstart || (fail_on_equal && insertp == pstart));
1549  }
1550  }
1551 
1552  // There are occupied buckets from init_searchp to the end,
1553  // but insertp is after init_searchp, so we don't need to look backward
1554  // ^^^^^^^^^ It isn't???
1555  // to find where to insert; we just need to shift items that
1556  // are in the range from the beginning of the occupied block to insertp
1557  // backward by 1.
1558  //
1559  // Because of the previous loop, insertp can be less than init_searchp.
1560  // In that case we want to start the blockstart at insertp. This
1561  // is both more performant, but more importantly the
1562  // in_ideal_bucket_span condition will actually make sense then.
1563  // Without this, the init_searchp might be *greater* than the
1564  // ideal bucket span and cause us to immediately stop checking.
1565 
1566  pointer blockstart = (init_searchp < insertp) ? init_searchp : insertp;
1567  if (fail_on_equal && check_realloc)
1568  {
1569  // It can happen that blockstart is pstart iff the entire
1570  // array is full, and since we've checked all items with equal hash
1571  // in that case, we know that we'll be inserting, so we need to
1572  // reallocate.
1573  if (blockstart == pstart)
1574  {
1575  size_type new_size = *((const size_type*)pend) + 1;
1576  bumpNumBuckets(new_size);
1577  // Delegate to version that doesn't check for realloc.
1578  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1579  }
1580  }
1581 
1582  // NOTE: blockstart shouldn't hit pstart, because myNumBuckets > size()
1583  UT_ASSERT_P(blockstart-1 >= pstart);
1584  bool in_ideal_bucket_span = true;
1585  bool checked_realloc = false;
1586  while (!Clearer::isClear(*(blockstart-1)))
1587  {
1588  --blockstart;
1589  // Note that we might be in the middle of a span of equal initial_bucket
1590  // values, so an equal item might be earlier than init_searchp.
1591  if (fail_on_equal && in_ideal_bucket_span)
1592  {
1593  const size_type other_hash = hasher()(*blockstart);
1594  if ((other_hash == hash) && key_equal()(*blockstart,value))
1595  {
1596  destp = blockstart;
1597  return false;
1598  }
1599  in_ideal_bucket_span = ((other_hash%nbuckets) == ideal_bucket);
1600  }
1601  if (fail_on_equal && check_realloc && !checked_realloc)
1602  {
1603  if (!in_ideal_bucket_span)
1604  {
1605  // We're always adding one, since items can't be equal outside
1606  // the equal ideal_bucket span. We only need to check for
1607  // reallocation once, since if there's at least one space free,
1608  // blockstart can't end up equalling pstart.
1609  size_type new_size = *((const size_type*)pend) + 1;
1610  if (nbuckets < minBuckets(new_size))
1611  {
1612  bumpNumBuckets(new_size);
1613  // Delegate to version that doesn't check for realloc.
1614  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1615  }
1616  // We're not reallocating in this case.
1617  checked_realloc = true;
1618  }
1619  // It can happen that blockstart is pstart iff the entire
1620  // array is full, and since we've checked all items with equal hash
1621  // in that case, we know that we'll be inserting, so we need to reallocate.
1622  else if (blockstart == pstart)
1623  {
1624  size_type new_size = *((const size_type*)pend) + 1;
1625  bumpNumBuckets(new_size);
1626  // Delegate to version that doesn't check for realloc.
1627  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1628  }
1629  }
1630  UT_ASSERT_P(blockstart-1 >= pstart);
1631  }
1632 
1633  if (fail_on_equal && check_realloc && !checked_realloc)
1634  {
1635  // We're always adding one.
1636  size_type new_size = *((const size_type*)pend) + 1;
1637  if (nbuckets < minBuckets(new_size))
1638  {
1639  bumpNumBuckets(new_size);
1640  // Delegate to version that doesn't check for realloc.
1641  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1642  }
1643  // We're not reallocating in this case.
1644  }
1645 
1646  // Move items earlier by one.
1647  while (blockstart != insertp)
1648  {
1649  *(blockstart-1) = std::move(*blockstart);
1650  ++blockstart;
1651  }
1652  destp = insertp-1;
1653  return true;
1654  }
1655 
1656 protected:
1659 };
1660 
1661 template<
1662  typename Key,
1663  bool MULTI,
1664  std::size_t MAX_LOAD_FACTOR_256,
1665  typename Clearer,
1666  class Hash,
1667  class KeyEqual
1668 >
1669 struct DefaultClearer<ArraySet<Key,MULTI,MAX_LOAD_FACTOR_256,Clearer,Hash,KeyEqual> >
1670 {
1673  static void clear(TheType&v) { v.destroy(); }
1675  static bool isClear(TheType &v) { return v.bucket_count() == 0; }
1677  static void clearConstruct(TheType *p) { new((void *)p) TheType(); }
1678 
1679  static const bool clearNeedsDestruction = false;
1680 };
1681 
1682 } // namespace UT
1683 
1684 template<typename Key,
1685  bool MULTI = false,
1686  int MAX_LOAD_FACTOR_256 = 128,
1687  typename Clearer = UT::DefaultClearer<Key>,
1688  class Hash = hboost::hash<Key>,
1689  class KeyEqual = std::equal_to<Key> >
1691 
1692 namespace std {
1693  template<typename Key,
1694  bool MULTI,
1695  int MAX_LOAD_FACTOR_256,
1696  typename Clearer,
1697  class Hash,
1698  class KeyEqual>
1699  void swap(
1702  {
1703  a.swap(b);
1704  }
1705 }
1706 
1707 #endif
float load_factor() const
Returns the current portion of buckets that are occupied.
Definition: UT_ArraySet.h:505
const_iterator find(const Key &key) const
Definition: UT_ArraySet.h:785
type
Definition: core.h:556
#define SYSmax(a, b)
Definition: SYS_Math.h:1582
const_pointer searchStart(const Key &key) const
Definition: UT_ArraySet.h:1400
pointer myBuckets
Definition: UT_ArraySet.h:1657
static void clearConstruct(bool *p)
Definition: UT_ArraySet.h:106
ArraySet(const set_type &that)
Definition: UT_ArraySet.h:268
ArraySet(size_type init_bucket_count)
Definition: UT_ArraySet.h:249
UT_ASSERT_COMPILETIME(BRAY_EVENT_MAXFLAGS<=32)
std::ptrdiff_t difference_type
Definition: UT_ArraySet.h:618
void clear()
Definition: UT_ArraySet.h:429
std::pair< iterator, bool > insert(const value_type &value)
Definition: UT_ArraySet.h:1032
size_type size() const
Returns the number of items in the set.
Definition: UT_ArraySet.h:415
UT_API exint UTbumpAllocToPrime(exint current_size)
void setNumBuckets(size_type new_num_buckets)
Definition: UT_ArraySet.h:537
static const bool clearNeedsDestruction
Definition: UT_ArraySet.h:79
STATIC_INLINE size_t Hash(const char *s, size_t len)
Definition: farmhash.h:2099
void swap(UT::ArraySet< Key, MULTI, MAX_LOAD_FACTOR_256, Clearer, Hash, KeyEqual > &a, UT::ArraySet< Key, MULTI, MAX_LOAD_FACTOR_256, Clearer, Hash, KeyEqual > &b)
Definition: UT_ArraySet.h:1699
const GLdouble * v
Definition: glcorearb.h:837
pointer getCurrent() const
Definition: UT_ArraySet.h:687
GLsizei const GLfloat * value
Definition: glcorearb.h:824
static void clearConstruct(T *p)
Definition: UT_ArraySet.h:77
static size_type max_bucket_count()
Definition: UT_ArraySet.h:476
std::pair< iterator, bool > insert(value_type &&value)
Definition: UT_ArraySet.h:1043
static void clear(S *&v)
Definition: UT_ArraySet.h:38
static void clear(bool &v)
Definition: UT_ArraySet.h:104
constexpr bool SYSisNan(const F f)
Definition: SYS_Math.h:184
set_type & operator=(set_type &&that)
Definition: UT_ArraySet.h:354
GLboolean GLboolean GLboolean GLboolean a
Definition: glcorearb.h:1222
bool empty() const
Returns true iff there are no items in the set.
Definition: UT_ArraySet.h:409
ImageBuf OIIO_API min(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
iterator end()
Returns a non-const end iterator for the set.
Definition: UT_ArraySet.h:736
iterator_t & operator=(const iterator_t &)=default
#define UT_ASSERT_MSG_P(ZZ,...)
Definition: UT_Assert.h:158
static bool isClear(bool v)
Definition: UT_ArraySet.h:105
OutGridT const XformOp bool bool
std::ptrdiff_t difference_type
Definition: UT_ArraySet.h:234
bool insertHelper(pointer pstart, size_type nbuckets, const value_type &value, pointer &destp)
Definition: UT_ArraySet.h:1407
static bool isClear(const std::pair< S0, S1 > &v)
Definition: UT_ArraySet.h:119
ArraySet< Key, MULTI, MAX_LOAD_FACTOR_256, Clearer, Hash, KeyEqual > set_type
Definition: UT_ArraySet.h:230
#define UT_ASSERT_MSG(ZZ,...)
Definition: UT_Assert.h:159
iterator erase(iterator iter)
Definition: UT_ArraySet.h:1109
void insert(INPUT_ITERATOR start_input, INPUT_ITERATOR end_input)
Inserts all of the items in the range [start_input,end_input).
Definition: UT_ArraySet.h:1058
iterator_t(pointer current, const_pointer end, bool scan=true)
Definition: UT_ArraySet.h:624
std::size_t size_type
Definition: UT_ArraySet.h:233
static size_type minBuckets(size_type size)
Definition: UT_ArraySet.h:1381
reference operator*() const
Definition: UT_ArraySet.h:672
static void clearConstruct(std::pair< S0, S1 > *p)
Definition: UT_ArraySet.h:123
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:155
void destroy()
Removes all items from the set and frees all the memory.
Definition: UT_ArraySet.h:446
static bool isClear(S *v)
Definition: UT_ArraySet.h:39
ArraySet(std::initializer_list< value_type > init, size_type init_bucket_count=0)
Definition: UT_ArraySet.h:303
GLuint GLuint end
Definition: glcorearb.h:475
iterator begin()
Returns a non-const iterator for the beginning of the set.
Definition: UT_ArraySet.h:721
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
Set iterator class.
Definition: UT_ArraySet.h:614
size_type myNumBuckets
Definition: UT_ArraySet.h:1658
long long int64
Definition: SYS_Types.h:116
set_type & operator=(const set_type &that)
Definition: UT_ArraySet.h:320
bool containsAll(const S &src) const
Definition: UT_ArraySet.h:885
set_type & operator=(std::initializer_list< value_type > init)
Definition: UT_ArraySet.h:347
static key_equal key_eq()
Definition: UT_ArraySet.h:1353
static void clearConstruct(S **p)
Definition: UT_ArraySet.h:40
void bumpNumBuckets(size_type new_num_items)
Definition: UT_ArraySet.h:599
const_iterator cbegin() const
Returns a const iterator for the beginning of the set.
Definition: UT_ArraySet.h:726
GLboolean GLboolean GLboolean b
Definition: glcorearb.h:1222
const_iterator cend() const
Returns a const end iterator for the set.
Definition: UT_ArraySet.h:742
iterator find(const Key &key)
Definition: UT_ArraySet.h:757
num_items
Definition: UT_RTreeImpl.h:672
std::conditional< constant_type, const value_type, value_type >::type & reference
Definition: UT_ArraySet.h:620
std::forward_iterator_tag iterator_category
Definition: UT_ArraySet.h:617
int64 getMemoryUsage(bool inclusive) const
Definition: UT_ArraySet.h:1360
ArraySet(set_type &&that)
Move constructor, destroying the source.
Definition: UT_ArraySet.h:258
size_type count(const Key &key) const
Definition: UT_ArraySet.h:817
GLsizeiptr size
Definition: glcorearb.h:664
std::integral_constant< bool, std::numeric_limits< T >::is_signed||std::is_same< T, int128_opt >::value > is_signed
Definition: format.h:818
bool operator!=(const set_type &that) const
Definition: UT_ArraySet.h:387
value_type & reference
Definition: UT_ArraySet.h:237
IMATH_NAMESPACE::V2f IMATH_NAMESPACE::Box2i std::string this attribute is obsolete as of OpenEXR v3 float
size_type bucket_size(size_type i) const
Definition: UT_ArraySet.h:484
GLenum void ** pointer
Definition: glcorearb.h:810
typename detail::member_type< T >::type member_type_t
Definition: UT_ArraySet.h:147
std::pair< iterator, bool > emplace(Args &&...args)
Definition: UT_ArraySet.h:1095
std::conditional< constant_type, const typename ArraySet::value_type, typename ArraySet::value_type >::type value_type
Definition: UT_ArraySet.h:619
SYS_FORCE_INLINE void forEach(FUNCTOR &&functor) const
Definition: UT_ArraySet.h:1369
void insert(std::initializer_list< value_type > list)
Inserts all of the items from the initializer_list.
Definition: UT_ArraySet.h:1084
const value_type * const_pointer
Definition: UT_ArraySet.h:240
const_pointer getEnd() const
Definition: UT_ArraySet.h:692
std::pair< const_iterator, const_iterator > equal_range(const Key &key) const
Definition: UT_ArraySet.h:894
static bool isClear(T v)
Definition: UT_ArraySet.h:65
ImageBuf OIIO_API max(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
iterator_t< constant_type > & operator++()
Definition: UT_ArraySet.h:655
void swap(set_type &that)
Swaps another set with this one.
Definition: UT_ArraySet.h:393
const value_type & const_reference
Definition: UT_ArraySet.h:238
pointer operator->() const
Definition: UT_ArraySet.h:677
**If you just want to fire and args
Definition: thread.h:618
static void clear(std::pair< S0, S1 > &v)
Definition: UT_ArraySet.h:114
static size_type max_size()
Definition: UT_ArraySet.h:422
void rehash(size_type new_num_buckets)
Definition: UT_ArraySet.h:513
std::pair< iterator, iterator > equal_range(const Key &key)
Definition: UT_ArraySet.h:959
static float max_load_factor()
Definition: UT_ArraySet.h:499
std::conditional< constant_type, typename set_type::const_pointer, typename set_type::pointer >::type pointer
Definition: UT_ArraySet.h:621
static hasher hash_function()
Definition: UT_ArraySet.h:1345
static void clear(T &v)
Definition: UT_ArraySet.h:48
const_iterator begin() const
Returns a const iterator for the beginning of the set.
Definition: UT_ArraySet.h:731
iterator_t< false > iterator
Iterator type for iterating over non-constant elements.
Definition: UT_ArraySet.h:704
SIM_API const UT_StringHolder distance
bool operator!=(const iterator_t &that) const
Definition: UT_ArraySet.h:646
bool contains(const Key &key) const
Definition: UT_ArraySet.h:856
size_type bucket_count() const
Returns the current number of buckets.
Definition: UT_ArraySet.h:469
KeyEqual key_equal
Definition: UT_ArraySet.h:236
pointer searchStart(const Key &key)
Definition: UT_ArraySet.h:1395
const_iterator(const iterator &non_const_it)
Definition: UT_ArraySet.h:715
bool operator==(const iterator_t &that) const
Definition: UT_ArraySet.h:641
size_type erase(const Key &key)
Definition: UT_ArraySet.h:1318
ArraySet(INPUT_ITERATOR start_input, INPUT_ITERATOR end_input, size_type init_bucket_count=0)
Inserts all of the items in the range [start_input,end_input).
Definition: UT_ArraySet.h:277
GLint GLsizei count
Definition: glcorearb.h:405
Clearer clearer_type
Definition: UT_ArraySet.h:241
const_iterator end() const
Returns a const end iterator for the set.
Definition: UT_ArraySet.h:748
bool operator==(const set_type &that) const
Definition: UT_ArraySet.h:364
GLenum src
Definition: glcorearb.h:1793
void reserve(size_type num_items)
Definition: UT_ArraySet.h:527
value_type * pointer
Definition: UT_ArraySet.h:239
PcpNodeRef_ChildrenIterator begin(const PcpNodeRef::child_const_range &r)
Support for range-based for loops for PcpNodeRef children ranges.
Definition: node.h:566