HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_ArraySet.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: UT_ArraySet.h (UT Library, C++)
7  *
8  * COMMENTS: Flat-array-based hash set implementation.
9  */
10 
11 #pragma once
12 
13 #ifndef __UT_ArraySet__
14 #define __UT_ArraySet__
15 
16 #include "UT_ArrayHelp.h"
17 #include "UT_Assert.h"
18 #include <SYS/SYS_Inline.h>
19 #include <SYS/SYS_Math.h>
20 #include <SYS/SYS_StaticAssert.h>
21 #include <SYS/SYS_Types.h>
22 
23 #include <hboost/functional/hash.hpp>
24 
25 #include <limits>
26 #include <stdint.h>
27 #include <utility>
28 
29 namespace UT {
30 
31 template<typename T>
32 struct DefaultClearer;
33 
34 template<typename S>
35 struct DefaultClearer<S*>
36 {
37  static void clear(S*&v) { v = nullptr; }
38  static bool isClear(S *v) { return v == nullptr; }
39  static void clearConstruct(S**p) { clear(*p); }
40 
41  static const bool clearNeedsDestruction = false;
42 };
43 
44 template<typename T>
46 {
47  static void clear(T &v)
48  {
49  if (std::numeric_limits<T>::has_quiet_NaN) {
50  // Use QNaN as unused floating-point marker.
51  // NOTE: All NaNs get treated as unused, even if they're not
52  // the representative QNaN.
53  v = std::numeric_limits<T>::quiet_NaN();
54  }
56  // Use min as unused signed integer marker.
58  }
59  else {
60  // Use max as unused unsigned integer marker.
62  }
63  }
64  static bool isClear(T v)
65  {
66  if (std::numeric_limits<T>::has_quiet_NaN) {
67  // NOTE: All NaNs get treated as unused, even if they're not
68  // the representative QNaN.
69  return SYSisNan(v);
70  }
72  return (v == std::numeric_limits<T>::min());
73  }
74  return (v == std::numeric_limits<T>::max());
75  }
76  static void clearConstruct(T *p) { clear(*p); }
77 
78  static const bool clearNeedsDestruction = false;
79 };
80 
81 template<> struct DefaultClearer< int8_t > : public NumericClearer< int8_t > {};
82 template<> struct DefaultClearer<uint8_t > : public NumericClearer<uint8_t > {};
83 template<> struct DefaultClearer< int16_t> : public NumericClearer< int16_t> {};
84 template<> struct DefaultClearer<uint16_t> : public NumericClearer<uint16_t> {};
85 template<> struct DefaultClearer< int32_t> : public NumericClearer< int32_t> {};
86 template<> struct DefaultClearer<uint32_t> : public NumericClearer<uint32_t> {};
87 template<> struct DefaultClearer< int64_t> : public NumericClearer< int64_t> {};
88 template<> struct DefaultClearer<uint64_t> : public NumericClearer<uint64_t> {};
89 template<> struct DefaultClearer<float> : public NumericClearer<float> {};
90 template<> struct DefaultClearer<double> : public NumericClearer<double> {};
91 template<> struct DefaultClearer<long double> : public NumericClearer<long double> {};
92 #ifdef MBSD
93 // On macOS, size_t is "unsigned long" which matches none of the above
94 template<> struct DefaultClearer<size_t> : public NumericClearer<size_t> {};
95 #endif
96 
97 /// NOTE: It may seem silly to have bool here, but things like
98 /// std::pair<T,bool> should work automatically, so they need some
99 /// default behaviour for bool.
100 template<>
101 struct DefaultClearer<bool>
102 {
103  static void clear(bool&v) { v = false; }
104  static bool isClear(bool v) { return !v; }
105  static void clearConstruct(bool *p) { clear(*p); }
106 
107  static const bool clearNeedsDestruction = false;
108 };
109 
110 template<typename S0,typename S1>
111 struct DefaultClearer<std::pair<S0,S1> >
112 {
113  static void clear(std::pair<S0,S1> &v)
114  {
115  DefaultClearer<S0>::clear(v.first);
116  DefaultClearer<S1>::clear(v.second);
117  }
118  static bool isClear(const std::pair<S0,S1> &v)
119  {
120  return DefaultClearer<S0>::isClear(v.first) && DefaultClearer<S1>::isClear(v.second);
121  }
122  static void clearConstruct(std::pair<S0,S1>*p) {
125  }
127 };
128 
129 /// This is close to a drop-in replacement for std::unordered_set,
130 /// except that it uses a single array of items and empty spaces
131 /// marked with a dedicated "clear" value.
132 /// It also has a fixed maximum load factor, and doesn't store a
133 /// hasher, comparator, or allocator object as member data,
134 /// avoiding unnecessary overhead, but these differences introduce
135 /// some interface incompatibilities.
136 ///
137 /// The incompatibility that will probably be encountered most often
138 /// is the lack of time guarantees. This structure is more efficient in
139 /// many common cases, due to better memory coherency and fewer
140 /// memory allocations, but cannot guarantee as good time scaling in
141 /// worst or even average case as std::unordered_set guarantees for most
142 /// functions.
143 ///
144 /// Because there is only space for one item in each bucket, if a collision
145 /// is hit on insertion, the structure will scan forward until a "clear"
146 /// bucket is found. However, items in a contiguous block will always
147 /// be sorted in the order of their ideal bucket numbers.
148 /// If the end of the array is reached without finding a clear bucket,
149 /// instead of the obvious approach of wrapping to the beginning of the
150 /// array, the block will be shifted backward, so some items in this
151 /// block at the end may be earlier than their ideal bucket number.
152 /// This is about as complicated as wrapping, or not sorting, but avoids
153 /// an awkward issue when erasing while iterating where an item might get
154 /// visited multiple times due to the wrapping, or items might be missed
155 /// due to the lack of order. Even still, unlike
156 /// std::unordered_set, erase may invalidate other iterators and references.
157 /// Also, insert may invalidate other iterators and references.
158 template<
159  typename Key,
160  bool MULTI=false,
161  std::size_t MAX_LOAD_FACTOR_256=128,
162  typename Clearer=DefaultClearer<Key>,
163  class Hash=hboost::hash<Key>,
164  class KeyEqual=std::equal_to<Key>
165 >
166 class ArraySet
167 {
168 public:
170  typedef Key key_type;
171  typedef Key value_type;
172  typedef std::size_t size_type;
173  typedef std::ptrdiff_t difference_type;
174  typedef Hash hasher;
175  typedef KeyEqual key_equal;
177  typedef const value_type& const_reference;
178  typedef value_type *pointer;
179  typedef const value_type *const_pointer;
180  typedef Clearer clearer_type;
181 
183  : myBuckets(nullptr)
184  , myNumBuckets(0)
185  {
186  UT_ASSERT_COMPILETIME(MAX_LOAD_FACTOR_256 <= 256);
187  }
188  explicit ArraySet(size_type init_bucket_count)
189  : myBuckets(nullptr)
190  , myNumBuckets(0)
191  {
192  if (init_bucket_count != 0)
193  setNumBuckets(init_bucket_count);
194  }
195 
196  /// Move constructor, destroying the source.
198  : myBuckets(that.myBuckets)
199  , myNumBuckets(that.myNumBuckets)
200  {
201  that.myBuckets = nullptr;
202  that.myNumBuckets = 0;
203  }
204 
205  /// Copy constructor.
206  /// Avoid accidental copying by making the constructor explicit.
207  explicit ArraySet(const set_type &that)
208  : myBuckets(nullptr)
209  , myNumBuckets(0)
210  {
211  *this = that;
212  }
213 
214  /// Inserts all of the items in the range [start_input,end_input).
215  template<typename INPUT_ITERATOR>
216  ArraySet(INPUT_ITERATOR start_input, INPUT_ITERATOR end_input, size_type init_bucket_count = 0)
217  : myBuckets(nullptr)
218  , myNumBuckets(0)
219  {
220  // Preallocate enough buckets, instead of waiting for
221  // insert to potentially hit collisions.
222  size_type ninput_items = std::distance(start_input, end_input);
223  setNumBuckets(SYSmax(init_bucket_count, minBuckets(ninput_items)));
224 
225  // Now, just insert them all, (no need to realloc)
226  for (; start_input != end_input; ++start_input)
227  {
228  pointer pdest;
229  bool success = insertHelper<!MULTI,false>(myBuckets,myNumBuckets,*start_input,pdest);
230  if (success)
231  *pdest = *start_input;
232  else
233  --ninput_items;
234  }
235  *((size_type*)(myBuckets+myNumBuckets)) = ninput_items;
236  }
237  /// Constructs a set from an initializer list, with an optional
238  /// bucket count, e.g.:
239  /// UT::ArraySet<int> some_set({5,123,500}, 20);
240  ArraySet(std::initializer_list<value_type> init, size_type init_bucket_count = 0)
241  : myBuckets(nullptr)
242  , myNumBuckets(0)
243  {
245  if (init_bucket_count == 0 || init_bucket_count < init.size()) {
246  bucket_count = minBuckets(init.size());
247  }
248  else {
249  bucket_count = init_bucket_count;
250  }
251  setNumBuckets(bucket_count);
252  for (auto &&p = init.begin(); p != init.end(); ++p) {
253  insert(*p);
254  }
255  }
256 
258  {
259  if (this == &that)
260  return *this;
261 
262  clear();
264 
265  if (!myBuckets)
266  return *this;
267 
268  size_type size = that.size();
269  *((size_type*)(myBuckets+myNumBuckets)) = size;
270 
271  if (size)
272  {
273  // Now, just copy them all, (no need to realloc)
274  const_pointer srcp = that.myBuckets;
276  for (pointer destp = myBuckets; destp != pend; ++destp, ++srcp)
277  {
278  *destp = *srcp;
279  }
280  }
281  return *this;
282  }
283 
284  set_type &operator=(std::initializer_list<value_type> init)
285  {
286  clear();
287  insert(init.begin(), init.end());
288  return *this;
289  }
290 
292  {
293  destroy();
294  myBuckets = that.myBuckets;
295  myNumBuckets = that.myNumBuckets;
296  that.myBuckets = nullptr;
297  that.myNumBuckets = 0;
298  return *this;
299  }
300 
301  bool operator==(const set_type &that) const
302  {
303  if (size() != that.size())
304  return false;
305 
306  // Check if one has no allocation (both empty)
307  if (!myBuckets || !that.myBuckets)
308  return true;
309 
310  const const_pointer pend = myBuckets + myNumBuckets;
311  for (const_pointer p = myBuckets; p != pend; ++p)
312  {
313  if (Clearer::isClear(*p))
314  continue;
315  // NOTE: Don't just use that.contains(*p), in case it doesn't check the whole item contents!
316  const_iterator it = that.find(*p);
317  if (it.atEnd())
318  return false;
319  if (*p != *it)
320  return false;
321  }
322  return true;
323  }
324  bool operator!=(const set_type &that) const
325  {
326  return !(*this == that);
327  }
328 
329  /// Swaps another set with this one
330  void swap(set_type &that)
331  {
332  pointer temp_buckets = that.myBuckets;
333  size_type temp_numbuckets = that.myNumBuckets;
334  that.myBuckets = myBuckets;
335  that.myNumBuckets = myNumBuckets;
336  myBuckets = temp_buckets;
337  myNumBuckets = temp_numbuckets;
338  }
339 
341  {
342  destroy();
343  }
344 
345  /// Returns true iff there are no items in the set
346  bool empty() const
347  {
348  return !myBuckets || (*((const size_type*)(myBuckets+myNumBuckets)) == 0);
349  }
350 
351  /// Returns the number of items in the set
352  size_type size() const
353  {
354  return myBuckets ? *(const size_type*)(myBuckets+myNumBuckets) : 0;
355  }
356 
357  /// This only exists because the standard requires it.
358  /// The only hard limit is what memory will allow.
360  {
362  }
363 
364  /// Removes all items from the set, but does not free the memory.
365  /// Call destroy() to free all the memory.
366  void clear()
367  {
368  if (!myBuckets)
369  return;
370  pointer p = myBuckets;
371  pointer pend = p+myNumBuckets;
372  if (*((const size_type*)pend) == 0)
373  return;
374  while (p != pend)
375  {
376  Clearer::clear(*p);
377  ++p;
378  }
379  *((size_type*)pend) = 0;
380  }
381 
382  /// Removes all items from the set and frees all the memory.
383  void destroy()
384  {
385  if (!myBuckets)
386  return;
387  if (!std::is_trivially_destructible<value_type>::value && (Clearer::clearNeedsDestruction || !empty()))
388  {
389  pointer p = myBuckets;
390  const_pointer pend = p+myNumBuckets;
391  while (p != pend)
392  {
393  // I'm assuming that in some cases, it's faster to check isClear than to call the destructor,
394  // even if it's only because of a better chance of inlining.
395  if (Clearer::clearNeedsDestruction || !Clearer::isClear(*p))
396  p->~value_type();
397  ++p;
398  }
399  }
400  free(myBuckets);
401  myBuckets = nullptr;
402  myNumBuckets = 0;
403  }
404 
405  /// Returns the current number of buckets.
407  {
408  return myNumBuckets;
409  }
410 
411  /// This only exists because the standard requires it.
412  /// The only hard limit is what memory will allow.
414  {
416  }
417 
418  /// This only exists because the standard requires it.
419  /// This function doesn't really make sense for this implementation,
420  /// but it returns 1 if the bucket is occupied, and 0 if it is not.
422  {
423  if (i >= myNumBuckets) {
424  return 0;
425  }
426  if (Clearer::isClear(myBuckets[i])) {
427  return 0;
428  }
429  return 1;
430  }
431 
432  /// Returns the portion of buckets that need to be occupied before
433  /// reallocation occurs.
434  /// Unlike in the standard, the maximum load factor is a constant
435  /// in this implementation, so max_load_factor() is a static function.
436  static float max_load_factor()
437  {
438  return float(MAX_LOAD_FACTOR_256)/256;
439  }
440 
441  /// Returns the current portion of buckets that are occupied.
442  float load_factor() const
443  {
444  return float(size())/float(myNumBuckets);
445  }
446 
447  /// Sets the number of buckets to larger of new_num_buckets and the
448  /// required minimum number of buckets for size() items, unless
449  /// that number is the same as the current number of buckets.
450  void rehash(size_type new_num_buckets)
451  {
452  size_type min_buckets = minBuckets(size());
453  if (new_num_buckets < min_buckets) {
454  new_num_buckets = min_buckets;
455  }
456  if (myNumBuckets == new_num_buckets) {
457  return;
458  }
459  setNumBuckets(min_buckets);
460  }
461 
462  /// Sets the number of buckets to the required minimum number of buckets
463  /// for the larger of num_items and size().
465  {
466  size_type min_buckets = minBuckets(SYSmax(size(),num_items));
467  setNumBuckets(min_buckets);
468  }
469 
470  /// Sets the number of buckets to new_num_buckets, regardless of the
471  /// required minimum number of buckets for size() items, though
472  /// you probably shouldn't be calling it with num_new_buckets < size(),
473  /// except that if num_new_buclets==0, this calls destroy().
474  void setNumBuckets(size_type new_num_buckets)
475  {
476  if (new_num_buckets == myNumBuckets)
477  return;
478  if (new_num_buckets == 0)
479  {
480  // Assume that the caller wants to delete everything
481  destroy();
482  return;
483  }
484 
485  size_type old_size = size();
486  if (new_num_buckets < old_size)
487  {
488  UT_ASSERT_MSG(0,"What does the caller expect to happen when setting the number of buckets lower than the size, but not to zero?\nThis class doesn't support multiple items per bucket.");
489  new_num_buckets = old_size;
490  }
491 
492  // NOTE: I'm assuming that the caller, in explicitly calling
493  // setNumBuckets, doesn't want to be limited by MAX_LOAD_FACTOR_256,
494  // so don't check against it.
495 
496  pointer new_buckets = (pointer)malloc(new_num_buckets*sizeof(value_type) + sizeof(size_type));
497  pointer new_end = new_buckets + new_num_buckets;
498  for (pointer p = new_buckets; p < new_end; ++p)
499  {
500  Clearer::clearConstruct(p);
501  }
502 
503  if (old_size)
504  {
505  // Move items over and clear them from myBuckets
507  for (pointer srcp = myBuckets; srcp < pend; ++srcp)
508  {
509  if (!Clearer::isClear(*srcp))
510  {
511  value_type v;
512  Clearer::clear(v);
513  // Need to make sure that *p is set to clear,
514  // as opposed to whatever std::move leaves it as,
515  // so swap with clear.
516  std::swap(*srcp, v);
517  pointer destp;
518  insertHelper<false,false>(new_buckets, new_num_buckets, v, destp);
519  *destp = std::move(v);
520  }
521  }
522 
523  // Temporarily set the size to 0, so that destroy() won't re-destruct them.
524  *((size_type*)pend) = 0;
525  }
526  // Don't worry; this will just destruct the current buckets
527  destroy();
528 
529  myBuckets = new_buckets;
530  myNumBuckets = new_num_buckets;
531  *((size_type*)new_end) = old_size;
532  }
533 
534  /// This increases the number of buckets, if needed, for
535  /// the specified number of items
536  void bumpNumBuckets(size_type new_num_items)
537  {
538  size_type num_buckets = minBuckets(new_num_items);
539  if (num_buckets <= myNumBuckets)
540  return;
541  // Although UT_Array normally bumps up by about 12.5% each time,
542  // and so does UTbumpAllocToPrime, to reduce significant rehashing
543  // overhead, we bump to at least twice the previous size.
544  num_buckets = SYSmax(num_buckets, 2*myNumBuckets);
545  num_buckets = UTbumpAllocToPrime(num_buckets);
546  setNumBuckets(num_buckets);
547  }
548 
549  /// Set iterator class
550  template<bool constant_type>
552  {
553  public:
554  typedef std::forward_iterator_tag iterator_category;
555  typedef std::ptrdiff_t difference_type;
559 
560  iterator_t() = default;
561  iterator_t(pointer current, const_pointer end, bool scan=true)
562  : myCurrent(current)
563  , myEnd(end)
564  {
565  UT_ASSERT_MSG_P(scan || current == end || !Clearer::isClear(*current), "An iterator can't point to a clear item!");
566  if (scan)
567  {
568  while (myCurrent != myEnd && Clearer::isClear(*myCurrent))
569  {
570  ++myCurrent;
571  }
572  }
573  }
574  iterator_t(const iterator_t &) = default;
575  iterator_t(iterator_t &&) = default;
576  iterator_t &operator=(const iterator_t &) = default;
577  iterator_t &operator=(iterator_t &&) = default;
578  bool operator==(const iterator_t &that) const
579  {
580  UT_ASSERT_MSG_P(myEnd == that.myEnd, "Probably unintentional: Comparing against iterator from different set or iterator invalidated by reallocation!");
581  return (myCurrent == that.myCurrent);
582  }
583  bool operator!=(const iterator_t &that) const
584  {
585  UT_ASSERT_MSG_P(myEnd == that.myEnd, "Probably unintentional: Comparing against iterator from different set or iterator invalidated by reallocation!");
586  return (myCurrent != that.myCurrent);
587  }
588  bool atEnd() const
589  {
590  return myCurrent == myEnd;
591  }
593  {
594  UT_ASSERT_MSG_P(myCurrent < myEnd, "Incrementing end iterator!");
595  if (myCurrent == myEnd) {
596  return *this;
597  }
598  do {
599  ++myCurrent;
600  } while (myCurrent != myEnd && Clearer::isClear(*myCurrent));
601  return *this;
602  }
603 
604  // This is intentionally not implemented to force its non-use but we
605  // keep it here just in case there's weird C++ rules on iterator
606  // requirements
608 
610  {
611  UT_ASSERT_MSG_P(myCurrent < myEnd, "Dereferencing end iterator!");
612  return *myCurrent;
613  }
615  {
616  UT_ASSERT_MSG_P(myCurrent < myEnd, "Dereferencing end iterator!");
617  return myCurrent;
618  }
619  operator pointer() const
620  {
621  UT_ASSERT_MSG_P(myCurrent <= myEnd, "Dereferencing iterator past end!");
622  return myCurrent;
623  }
625  {
626  UT_ASSERT_MSG_P(myCurrent <= myEnd, "Dereferencing iterator past end!");
627  return myCurrent;
628  }
630  {
631  return myEnd;
632  }
633  private:
634  friend class iterator_t<!constant_type>;
635 
636  pointer myCurrent;
637  const_pointer myEnd;
638  };
639 
640  /// Iterator type for iterating over non-constant elements.
641  typedef iterator_t<false> iterator;
642 
643  /// Iterator type for iterating over constant elements.
644  /// It may be useful to convert an iterator to a const_iterator,
645  /// so making this a subclass lets us add a constructor for that.
646  class const_iterator : public iterator_t<true>
647  {
648  public:
650 
651  const_iterator() = default;
652  const_iterator(const iterator &non_const_it)
653  : iterator_t<true>(non_const_it.getCurrent(), non_const_it.getEnd(), false)
654  {}
655  };
656 
657  /// Returns a non-const iterator for the beginning of the set.
659  {
661  }
662  /// Returns a const iterator for the beginning of the set.
663  const_iterator cbegin() const
664  {
665  return const_iterator(myBuckets,myBuckets+myNumBuckets,true);
666  }
667  /// Returns a const iterator for the beginning of the set.
668  const_iterator begin() const
669  {
670  return const_iterator(myBuckets,myBuckets+myNumBuckets,true);
671  }
672  /// Returns a non-const end iterator for the set.
674  {
676  return iterator(pend,pend,false);
677  }
678  /// Returns a const end iterator for the set.
679  const_iterator cend() const
680  {
682  return const_iterator(pend,pend,false);
683  }
684  /// Returns a const end iterator for the set.
685  const_iterator end() const
686  {
688  return const_iterator(pend,pend,false);
689  }
690 
691  /// Returns an iterator to the first item matching key,
692  /// or an end iterator if no items match key.
693  /// @{
694  iterator find(const Key &key)
695  {
696  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
697 
698  const pointer pstart = myBuckets;
699  if (!pstart)
700  return iterator(nullptr,nullptr,false);
701 
702  const pointer pend = pstart + myNumBuckets;
703 
704  const pointer search_start = searchStart(key);
705  for (pointer p = search_start; p != pend; ++p)
706  {
707  if (key_equal()(key,*p))
708  return iterator(p,pend,false);
709  if (Clearer::isClear(*p))
710  return iterator(pend,pend,false);
711  }
712  // Hit the end, so search back from before the start of the block.
713  for (pointer p = search_start-1; p >= pstart; --p)
714  {
715  if (key_equal()(key,*p))
716  return iterator(p,pend,false);
717  if (Clearer::isClear(*p))
718  return iterator(pend,pend,false);
719  }
720  return iterator(pend,pend,false);
721  }
722  const_iterator find(const Key &key) const
723  {
724  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
725 
726  const const_pointer pstart = myBuckets;
727  if (!pstart)
728  return const_iterator(nullptr,nullptr,false);
729 
730  const const_pointer pend = pstart + myNumBuckets;
731 
732  const_pointer search_start = searchStart(key);
733  for (const_pointer p = search_start; p != pend; ++p)
734  {
735  if (key_equal()(key,*p))
736  return const_iterator(p,pend,false);
737  if (Clearer::isClear(*p))
738  return const_iterator(pend,pend,false);
739  }
740  // Hit the end, so search back from before the start of the block.
741  for (const_pointer p = search_start-1; p >= pstart; --p)
742  {
743  if (key_equal()(key,*p))
744  return const_iterator(p,pend,false);
745  if (Clearer::isClear(*p))
746  return const_iterator(pend,pend,false);
747  }
748  return const_iterator(pend,pend,false);
749  }
750  /// @}
751 
752  /// Returns the number of entries matching key.
753  /// If MULTI is false, this will only return either 0 or 1.
754  size_type count(const Key &key) const
755  {
756  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
757 
758  if (!myBuckets)
759  return 0;
760 
761  const const_pointer pstart = myBuckets;
762  const const_pointer pend = pstart + myNumBuckets;
763  const_pointer search_start = searchStart(key);
764  size_type count = 0;
765  for (const_pointer p = search_start; p != pend; ++p)
766  {
767  if (key_equal()(key,*p))
768  {
769  if (!MULTI)
770  return 1;
771  ++count;
772  }
773  else if (Clearer::isClear(*p))
774  return MULTI ? count : 0;
775  }
776  // Hit the end, so search back from before the start of the block.
777  for (const_pointer p = search_start-1; p >= pstart; --p)
778  {
779  if (key_equal()(key,*p))
780  {
781  if (!MULTI)
782  return 1;
783  ++count;
784  }
785  else if (Clearer::isClear(*p))
786  return MULTI ? count : 0;
787  }
788  return MULTI ? count : 0;
789  }
790 
791  /// Returns true iff the set contains the given key.
792  /// This should be faster than count() if MULTI is true.
793  bool contains(const Key &key) const
794  {
795  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
796 
797  if (!myBuckets)
798  return false;
799 
800  const const_pointer pstart = myBuckets;
801  const const_pointer pend = pstart + myNumBuckets;
802  const_pointer search_start = searchStart(key);
803  for (const_pointer p = search_start; p != pend; ++p)
804  {
805  if (key_equal()(key,*p))
806  return true;
807  if (Clearer::isClear(*p))
808  return false;
809  }
810  // Hit the end, so search back from before the start of the block.
811  for (const_pointer p = search_start-1; p >= pstart; --p)
812  {
813  if (key_equal()(key,*p))
814  return true;
815  if (Clearer::isClear(*p))
816  return false;
817  }
818  return false;
819  }
820 
821  /// Returns a pair of iterators representing the range of values matching
822  /// key, as [first,second).
823  /// @{
824  std::pair<const_iterator,const_iterator> equal_range(const Key &key) const
825  {
826  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
827 
828  const const_pointer pstart = myBuckets;
829  const const_pointer pend = pstart + myNumBuckets;
830  if (!pstart)
831  return std::make_pair(const_iterator(nullptr,nullptr,false), const_iterator(nullptr,nullptr,false));
832 
833  const const_pointer search_start = searchStart(key);
834  if (!MULTI)
835  {
836  for (const_pointer p = search_start; p != pend; ++p)
837  {
838  if (key_equal()(key,*p))
839  return std::make_pair(const_iterator(p,pend,false), const_iterator(p+1,pend,true));
840  if (Clearer::isClear(*p))
841  return std::make_pair(const_iterator(pend,pend,false), const_iterator(pend,pend,false));
842  }
843  // Hit the end, so search back from before the start of the block.
844  for (const_pointer p = search_start-1; p >= pstart; --p)
845  {
846  if (key_equal()(key,*p))
847  return std::make_pair(const_iterator(p,pend,false), const_iterator(p+1,pend,true));
848  if (Clearer::isClear(*p))
849  return std::make_pair(const_iterator(pend,pend,false), const_iterator(pend,pend,false));
850  }
851  return std::make_pair(const_iterator(pend,pend,false), const_iterator(pend,pend,false));
852  }
853 
854  // MULTI is true, so we might have multiple matches
855  const_pointer first_found = nullptr;
856  const_pointer end_found = pend;
857  for (const_pointer p = search_start; p != pend; ++p)
858  {
859  if (key_equal()(key,*p))
860  {
861  if (!first_found)
862  first_found = p;
863  }
864  else if (first_found)
865  {
866  if (first_found != search_start)
867  return std::make_pair(const_iterator(first_found,pend,false), const_iterator(p,pend,true));
868 
869  end_found = p;
870  break;
871  }
872  else if (Clearer::isClear(*p))
873  {
874  // NOTE: first_found must be false
875  return std::make_pair(const_iterator(pend,pend,false), const_iterator(pend,pend,false));
876  }
877  }
878 
879  // Hit the end, so search back from before the start of the block.
880  for (const_pointer p = search_start-1; p >= pstart; --p)
881  {
882  if (!key_equal()(key,*p))
883  {
884  return std::make_pair(const_iterator(p+1,pend,false), const_iterator(end_found,pend,true));
885  }
886  }
887  return std::make_pair(const_iterator(pstart,pend,false), const_iterator(end_found,pend,true));
888  }
889  std::pair<iterator,iterator> equal_range(const Key &key)
890  {
891  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
892 
893  const pointer pstart = myBuckets;
894  const pointer pend = pstart + myNumBuckets;
895  if (!pstart)
896  return std::make_pair(iterator(nullptr,nullptr,false), iterator(nullptr,nullptr,false));
897 
898  const pointer search_start = searchStart(key);
899  if (!MULTI)
900  {
901  for (pointer p = search_start; p != pend; ++p)
902  {
903  if (key_equal()(key,*p))
904  return std::make_pair(iterator(p,pend,false), iterator(p+1,pend,true));
905  if (Clearer::isClear(*p))
906  return std::make_pair(iterator(pend,pend,false), iterator(pend,pend,false));
907  }
908  // Hit the end, so search back from before the start of the block.
909  for (pointer p = search_start-1; p >= pstart; --p)
910  {
911  if (key_equal()(key,*p))
912  return std::make_pair(iterator(p,pend,false), iterator(p+1,pend,true));
913  if (Clearer::isClear(*p))
914  return std::make_pair(iterator(pend,pend,false), iterator(pend,pend,false));
915  }
916  return std::make_pair(iterator(pend,pend,false), iterator(pend,pend,false));
917  }
918 
919  // MULTI is true, so we might have multiple matches
920  pointer first_found = nullptr;
921  pointer end_found = pend;
922  for (pointer p = search_start; p != pend; ++p)
923  {
924  if (key_equal()(key,*p))
925  {
926  if (!first_found)
927  first_found = p;
928  }
929  else if (first_found)
930  {
931  if (first_found != search_start)
932  return std::make_pair(iterator(first_found,pend,false), iterator(p,pend,true));
933 
934  end_found = p;
935  break;
936  }
937  else if (Clearer::isClear(*p))
938  {
939  // NOTE: first_found must be false
940  return std::make_pair(iterator(pend,pend,false), iterator(pend,pend,false));
941  }
942  }
943 
944  // Hit the end, so search back from before the start of the block.
945  for (pointer p = search_start-1; p >= pstart; --p)
946  {
947  if (!key_equal()(key,*p))
948  {
949  return std::make_pair(iterator(p+1,pend,false), iterator(end_found,pend,true));
950  }
951  }
952  return std::make_pair(iterator(pstart,pend,false), iterator(end_found,pend,true));
953  }
954  /// @}
955 
956  /// Inserts the given value into the set, unless MULTI is false and
957  /// there's already an item for which key_equal()(value,other)
958  /// returns true. Returns a pair of iterator to the inserted value
959  /// (or the existing item if not inserted), and a bool that's true
960  /// iff the item was inserted.
961  /// @{
962  std::pair<iterator, bool> insert(const value_type &value)
963  {
964  pointer p;
965  bool success = insertHelper(myBuckets, myNumBuckets, value, p);
966  if (success)
967  {
968  *p = value;
970  }
971  return std::make_pair(iterator(p,myBuckets+myNumBuckets,false),success);
972  }
973  std::pair<iterator, bool> insert(value_type &&value)
974  {
975  pointer p;
976  bool success = insertHelper(myBuckets, myNumBuckets, value, p);
977  if (success)
978  {
979  *p = std::move(value);
981  }
982  return std::make_pair(iterator(p,myBuckets+myNumBuckets,false),success);
983  }
984  /// @}
985 
986  /// Inserts all of the items in the range [start_input,end_input).
987  template<typename INPUT_ITERATOR>
988  void insert(INPUT_ITERATOR start_input, INPUT_ITERATOR end_input)
989  {
990  // Preallocate enough buckets, instead of waiting for
991  // insert to potentially hit collisions.
992  size_type ninput_items = std::distance(start_input, end_input);
993  if (!ninput_items)
994  return;
995 
996  size_type new_size = size() + ninput_items;
997  if (myNumBuckets < minBuckets(new_size))
998  bumpNumBuckets(new_size);
999 
1000  // Now, just insert them all, (no need to realloc)
1001  for (; start_input != end_input; ++start_input)
1002  {
1003  pointer pdest;
1004  bool success = insertHelper<!MULTI,false>(myBuckets,myNumBuckets,*start_input,pdest);
1005  if (success)
1006  *pdest = *start_input;
1007  else
1008  --new_size;
1009 
1010  }
1011  *((size_type*)(myBuckets+myNumBuckets)) = new_size;
1012  }
1013  /// Inserts all of the items from the initializer_list.
1014  void insert(std::initializer_list<value_type> list)
1015  {
1016  // Just delegate to the iterator range insert function.
1017  insert(list.begin(),list.end());
1018  }
1019 
1020  /// Inserts an element constructed with the given arguments.
1021  /// Maybe the standard implementation can somehow get a speedup
1022  /// from this, but we need the item's hash before inserting,
1023  /// so it needs to be constructed first.
1024  template <typename... Args>
1025  std::pair<iterator,bool> emplace(Args&&... args)
1026  {
1027  return insert(value_type(std::forward<Args>(args)...));
1028  }
1029 
1030  /// Removes the item at the specified iterator location,
1031  /// returning an iterator to the next item.
1032  ///
1033  /// Unlike std::unordered_set::erase(const_iterator), this may
1034  /// invalidate iterators and references to other items in the
1035  /// map, if they are in a contiguous block of non-empty items
1036  /// with the item that iter points to. Also, this only
1037  /// accepts an iterator, instead of a const_iterator.
1038  /// (Why would std::unordered_set::erase accept a const_iterator?)
1040  {
1041  const pointer pstart = myBuckets;
1042  const pointer pend = pstart + myNumBuckets;
1043 
1044  const pointer p = (pointer)iter;
1045 
1046  UT_ASSERT_MSG_P(p >= pstart && p < pend && !Clearer::isClear(*p), "An iterator to erase can't point to a clear item!");
1047 
1048  // Actually erase the item
1049  Clearer::clear(*p);
1050  size_type new_size = --*((size_type*)(myBuckets+myNumBuckets));
1051 
1052  if (new_size == 0)
1053  return iterator(pend,pend,false);
1054 
1055  // Check for the common, simple case, where there's nothing after p in
1056  // the block of possible conflicts, and p isn't at the end.
1057  pointer pnext = p+1;
1058  bool end_case = (pnext == pend);
1059  if (!end_case)
1060  {
1061  if (Clearer::isClear(*pnext))
1062  return iterator(pnext,pend,true);
1063 
1064  // Now, for the hard case... -_-
1065 
1066  // If we're not right at the end of the array already,
1067  // find the end of the block, and check if it's the end of the array.
1068  // pnext is already clear, else we'd have returned above.
1069  do
1070  {
1071  ++pnext;
1072  end_case = (pnext == pend);
1073  } while (!end_case && !Clearer::isClear(*pnext));
1074  }
1075 
1076  if (end_case && (p != pstart) && !Clearer::isClear(*(p-1)))
1077  {
1078  // If this is a block right at the end of the array, there may be earlier
1079  // items that are too early, in which case, we need to shift
1080  // everything after that down one, into the empty spot.
1081 
1082  // Find the start of the block.
1083  pointer pprev = p-1;
1084  while (pprev != pstart && !Clearer::isClear(*(pprev-1)))
1085  {
1086  --pprev;
1087  }
1088 
1089  // Find the earliest item that should go at or after
1090  // its current bucket.
1091  do
1092  {
1093  // Check where this item ideally belongs.
1094  size_type hash = hasher()(*pprev);
1095  size_type ideal_bucket_num = hash%myNumBuckets;
1096  if (ideal_bucket_num > (pprev-pstart))
1097  {
1098  // Shift everything later by one and return.
1099  for (pointer pdest = p; pdest != pprev; --pdest)
1100  {
1101  *pdest = std::move(*(pdest-1));
1102  }
1103  Clearer::clear(*pprev);
1104  return iterator(p+1,pend,true);
1105  }
1106 
1107  ++pprev;
1108  } while (pprev != p);
1109  }
1110 
1111  if (p+1 != pend)
1112  {
1113  // We're not at the end of the array, or there were no items before
1114  // this block to move into the empty space, but there is at least
1115  // one item after p, so shift items backward until there's one
1116  // that doesn't belong earlier than it is now.
1117 
1118  size_type empty_bucket_num = p-pstart;
1119 
1120  // NOTE: pnext points to the end of this block.
1121  for (pointer pdest = p; pdest+1 != pnext; ++pdest)
1122  {
1123  // Check where this item ideally belongs.
1124  size_type hash = hasher()(*(pdest+1));
1125  size_type ideal_bucket_num = hash%myNumBuckets;
1126  if (ideal_bucket_num > empty_bucket_num)
1127  {
1128  Clearer::clear(*pdest);
1129  return iterator(p+(pdest==p),pend,false);
1130  }
1131 
1132  *pdest = std::move(*(pdest+1));
1133  ++empty_bucket_num;
1134  }
1135  Clearer::clear(*(pnext-1));
1136  // p now contains an item that was previously after it.
1137  return iterator(p,pend,false);
1138  }
1139 
1140  // p is still empty, but there's an item (or the end pointer)
1141  // right after it.
1142  return iterator(p+1,pend,false);
1143  }
1144 
1145 #if 0 // This isn't fully implemented, so it's excluded to avoid anyone accidentally using it.
1146  /// Removes all items in the range [start_iter,end_iter),
1147  /// and returns an iterator immediately following that range.
1148  ///
1149  /// Unlike std::unordered_set::erase(const_iterator,const_iterator),
1150  /// this may invalidate iterators and references to other items in the
1151  /// map, if they are in a contiguous block of non-empty items
1152  /// with the items in the erased range. Also, this only
1153  /// accepts iterator's, instead of const_iterator's.
1154  /// (Why would std::unordered_set::erase accept a const_iterator?)
1155  iterator erase(iterator start_iter, iterator end_iter) {
1156  if (start_iter == end_iter)
1157  return end_iter;
1158 
1159  const pointer pstart = myBuckets;
1160  const const_pointer pend = pstart + myNumBuckets;
1161 
1162  const pointer prange_start = (pointer)start_iter;
1163  const pointer prange_end = (pointer)end_iter;
1164 
1165  UT_ASSERT_MSG_P(prange_start >= pstart && prange_start < prange_end && prange_end <= pend && !Clearer::isClear(*prange_start), "An iterator to erase can't point to a clear item!");
1166 
1167  // Actually erase the items
1168  size_type nremoved_items = 1;
1169  size_type nitems_in_last_block = 1;
1170  Clearer::clear(*prange_start);
1171  for (pointer p = prange_start+1; p != prange_end; ++p)
1172  {
1173  if (!Clearer::isClear(*p))
1174  {
1175  Clearer::clear(*p);
1176  ++nitems_in_last_block;
1177  ++nremoved_items;
1178  }
1179  else
1180  {
1181  nitems_in_last_block = 0;
1182  }
1183  }
1184 
1185  size_type new_size = (*((size_type*)(myBuckets+myNumBuckets)) -= nremoved_items);
1186 
1187  if (new_size == 0)
1188  {
1189  // This must be pointing to the end of the array.
1190  return end_iter;
1191  }
1192 
1193  pointer block_end = prange_end;
1194 
1195  if (prange_end != pend)
1196  {
1197  // NOTE: If prange_end is not pend, it is always occupied, because
1198  // it came from an iterator, which should be dereferenceable.
1199  // However, if there was a gap between the last removed item
1200  // and prange_end, we can still exit early.
1201  if (nitems_in_last_block == 0)
1202  return end_iter;
1203 
1204  // Find the end of the block
1205  do
1206  {
1207  ++block_end;
1208  } while (block_end != pend && !Clearer::isClear(*block_end));
1209  }
1210  else {
1211  // If end_iter is at the array end, and there's at least one empty
1212  // spot in the given range, we're done, because the last block
1213  // was completely eliminated.
1214  if (nitems_in_last_block != prange_end-prange_start)
1215  return end_iter;
1216  }
1217 
1218  UT_ASSERT_P(nitems_in_last_block > 0);
1219 
1220  // If the end of the block is the array end and
1221  // nitems_in_last_block == prange_end-prange_start (no gaps),
1222  // find the beginning of the block, and if there are any items that
1223  // belong in later buckets:
1224  // 1) shift everything after to the end of the clear block.
1225  // 2) move them backward to where they belong, as available.
1226  // 3) if there is still an item in the last cleared spot, return.
1227  if (block_end == pend && nitems_in_last_block == prange_end-prange_start && prange_start != pstart && !Clearer::isClear(*(prange_start-1)))
1228  {
1229  // Find the start of the block.
1230  pointer block_start = prange_start;
1231  do
1232  {
1233  --block_start;
1234  } while (block_start != pstart && !Clearer::isClear(*(block_start-1)));
1235 
1236  UT_ASSERT_MSG(0,"FIXME: Implement this!!!");
1237  }
1238 
1239  // Move items backward until there's one that doesn't
1240  // need to be moved backward.
1241  UT_ASSERT_MSG(0,"FIXME: Implement this!!!");
1242  return end_iter; // This is probably incorrect, but just to get it to compile
1243  }
1244 #endif
1245 
1246  /// Removes all items matching key and returns
1247  /// the number of items removed.
1248  size_type erase(const Key &key)
1249  {
1250  if (!MULTI)
1251  {
1252  iterator it = find(key);
1253  if (it.atEnd())
1254  return 0;
1255  erase(it);
1256  return 1;
1257  }
1258 
1259  // NOTE: This commented out code won't work, because the 2-argument erase isn't implemented.
1260  //std::pair<iterator,iterator> its = equal_range(key);
1261  //size_type count = std::distance(its.first, its.second);
1262  //erase(its.first, its.second);
1263 
1264  size_type count = 0;
1265  for (iterator it = find(key); !it.atEnd(); it = find(key))
1266  {
1267  erase(it);
1268  }
1269  return count;
1270  }
1271 
1272  /// NOTE: std::unordered_set::hash_function() isn't static,
1273  /// but here, we're not storing a hasher as
1274  /// a data member, so it's independent of the instance.
1276  {
1277  return hasher();
1278  }
1279 
1280  /// NOTE: std::unordered_set::key_eq() isn't static,
1281  /// but here, we're not storing a key_equal as
1282  /// a data member, so it's independent of the instance.
1284  {
1285  return key_equal();
1286  }
1287 
1288  /// Returns the amount of memory used by this, excluding any memory that might be
1289  /// allocated by any of the items, themselves.
1290  int64 getMemoryUsage(bool inclusive) const
1291  {
1292  int64 mem = inclusive ? sizeof(*this) : 0;
1293  mem += sizeof(value_type)*myNumBuckets;
1294  return mem;
1295  }
1296 
1297  template<typename FUNCTOR>
1299  void forEach(FUNCTOR &&functor) const
1300  {
1302  const const_pointer pend = p + myNumBuckets;
1303  for (; p != pend; ++p)
1304  {
1305  if (!Clearer::isClear(*p))
1306  functor(*p);
1307  }
1308  }
1309 
1310 protected:
1312  {
1313  // If there are only 0-2 elements, there might as well only be
1314  // that many buckets.
1315  if (size <= 2)
1316  return size;
1317 
1318  // Simpler case if MAX_LOAD_FACTOR_256 evenly divides 256
1319  if ((256%MAX_LOAD_FACTOR_256) == 0)
1320  // +1 because otherwise it's guaranteed to be composite.
1321  return size * (256/MAX_LOAD_FACTOR_256) + 1;
1322 
1323  return (size*256 + MAX_LOAD_FACTOR_256-1)/MAX_LOAD_FACTOR_256;
1324  }
1325  pointer searchStart(const Key &key)
1326  {
1327  const size_type hash = hasher()(key);
1328  return myBuckets + (hash%myNumBuckets);
1329  }
1330  const_pointer searchStart(const Key &key) const
1331  {
1332  const size_type hash = hasher()(key);
1333  return myBuckets + (hash%myNumBuckets);
1334  }
1335 
1336  template<bool fail_on_equal=!MULTI,bool check_realloc=true>
1337  bool insertHelper(pointer pstart, size_type nbuckets, const value_type &value,pointer &destp)
1338  {
1339  UT_ASSERT_P(!Clearer::isClear(value));
1340 
1341  if (check_realloc && nbuckets == 0)
1342  {
1343  setNumBuckets(1);
1344  destp = myBuckets;
1345  return true;
1346  }
1347 
1348  const pointer pend = pstart + nbuckets;
1349  const size_type hash = hasher()(value);
1350  const size_type ideal_bucket = (hash%nbuckets);
1351  pointer init_searchp = pstart + ideal_bucket;
1352  pointer searchp = init_searchp;
1353 
1354  if (Clearer::isClear(*searchp))
1355  {
1356  // Don't bother checking MAX_LOAD_FACTOR_256 if exact match is clear,
1357  // since there's no harm in perfect hashes... unless it fills up and
1358  // people do a lot of querying keys that aren't in the set.
1359  // If that ends up being a bottleneck, feel free to change this.
1360  destp = searchp;
1361  return true;
1362  }
1363 
1364  if (fail_on_equal && key_equal()(*searchp,value))
1365  {
1366  destp = searchp;
1367  return false;
1368  }
1369 
1370  if (!fail_on_equal && check_realloc)
1371  {
1372  // We're always adding one.
1373  size_type new_size = size() + 1;
1374  if (nbuckets < minBuckets(new_size)) {
1375  bumpNumBuckets(new_size);
1376  // Delegate to version that doesn't check for realloc.
1377  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1378  }
1379  // We're not reallocating in this case.
1380  }
1381 
1382  // Find the place where our ideal bucket is less than the next
1383  // item's ideal bucket, i.e. the end of where collisions on ideal_bucket go,
1384  // in case there are multiple collision spans that have written past ideal_bucket.
1385  if ((hasher()(*searchp)%nbuckets) <= ideal_bucket)
1386  {
1387  while (true)
1388  {
1389  ++searchp;
1390  if (searchp == pend || Clearer::isClear(*searchp))
1391  break;
1392 
1393  const size_type other_hash = hasher()(*searchp);
1394  if (fail_on_equal && other_hash==hash && key_equal()(*searchp,value))
1395  {
1396  destp = searchp;
1397  return false;
1398  }
1399  if (ideal_bucket < (other_hash%nbuckets))
1400  break;
1401  }
1402  }
1403  // Known:
1404  // [init_searchp, searchp] does not contain value.
1405  // searchp is one of:
1406  // Clear
1407  // pend
1408  // idealbucket(searchp) > idealbucket(value)
1409 
1410  pointer insertp = searchp;
1411 
1412  // Find the end of the block of present items, all of
1413  // which should have idealbuckets greater than us.
1414  while (searchp != pend && !Clearer::isClear(*searchp))
1415  {
1416  ++searchp;
1417  }
1418 
1419  if (searchp != pend)
1420  {
1421  // Common case: block end is not the array end.
1422  // Implies searchp is Clear.
1423  // Since we didn't hit the end, this occupied block
1424  // was never bumped back, so we don't have to look
1425  // back for matches or proper ideal bucket span.
1426  // We can easily make room by moving everything in
1427  // [insertp, searchp) -> [insertp+1, searchp+1)
1428  // so we can write to [insertp].
1429 
1430  if (fail_on_equal && check_realloc)
1431  {
1432  // We're always adding one.
1433  size_type new_size = *((const size_type*)pend) + 1;
1434  if (nbuckets < minBuckets(new_size))
1435  {
1436  bumpNumBuckets(new_size);
1437  // Delegate to version that doesn't check for realloc.
1438  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1439  }
1440  // We're not reallocating in this case.
1441  }
1442 
1443  // Move items later by one.
1444  // std::move [insertp, searchp) to [insertp+1, searchp+1)
1445  while (searchp != insertp)
1446  {
1447  *searchp = std::move(*(searchp-1));
1448  --searchp;
1449  }
1450  destp = insertp;
1451  return true;
1452  }
1453 
1454  // Less common case: block end is the array end.
1455  // insertp may actually need to go earlier because we may
1456  // not be pointing to the end of our ideal block span.
1457  // We want insertp to refer to a location that we can update
1458  // with value and preserve the ordering of idealbuckets.
1459  if (insertp == init_searchp)
1460  {
1461  // There should always be a space somewhere if !fail_on_equal,
1462  // because reallocation was handled above.
1463  UT_ASSERT_P(insertp-1 >= pstart || (fail_on_equal && insertp == pstart));
1464  while ((!fail_on_equal || insertp != pstart) && !Clearer::isClear(*(insertp-1)))
1465  {
1466  const size_type other_hash = hasher()(*(insertp-1));
1467  if (fail_on_equal && other_hash == hash && key_equal()(*(insertp-1),value))
1468  {
1469  destp = insertp-1;
1470  return false;
1471  }
1472 
1473  // Exiting at <= rather than < means that we may be in
1474  // middle of an ideal block span.
1475  if ((other_hash%nbuckets) <= ideal_bucket)
1476  break;
1477  --insertp;
1478  UT_ASSERT_P(insertp-1 >= pstart || (fail_on_equal && insertp == pstart));
1479  }
1480  }
1481 
1482  // There are occupied buckets from init_searchp to the end,
1483  // but insertp is after init_searchp, so we don't need to look backward
1484  // ^^^^^^^^^ It isn't???
1485  // to find where to insert; we just need to shift items that
1486  // are in the range from the beginning of the occupied block to insertp
1487  // backward by 1.
1488  //
1489  // Because of the previous loop, insertp can be less than init_searchp.
1490  // In that case we want to start the blockstart at insertp. This
1491  // is both more performant, but more importantly the
1492  // in_ideal_bucket_span condition will actually make sense then.
1493  // Without this, the init_searchp might be *greater* than the
1494  // ideal bucket span and cause us to immediately stop checking.
1495 
1496  pointer blockstart = (init_searchp < insertp) ? init_searchp : insertp;
1497  if (fail_on_equal && check_realloc)
1498  {
1499  // It can happen that blockstart is pstart iff the entire
1500  // array is full, and since we've checked all items with equal hash
1501  // in that case, we know that we'll be inserting, so we need to
1502  // reallocate.
1503  if (blockstart == pstart)
1504  {
1505  size_type new_size = *((const size_type*)pend) + 1;
1506  bumpNumBuckets(new_size);
1507  // Delegate to version that doesn't check for realloc.
1508  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1509  }
1510  }
1511 
1512  // NOTE: blockstart shouldn't hit pstart, because myNumBuckets > size()
1513  UT_ASSERT_P(blockstart-1 >= pstart);
1514  bool in_ideal_bucket_span = true;
1515  bool checked_realloc = false;
1516  while (!Clearer::isClear(*(blockstart-1)))
1517  {
1518  --blockstart;
1519  // Note that we might be in the middle of a span of equal initial_bucket
1520  // values, so an equal item might be earlier than init_searchp.
1521  if (fail_on_equal && in_ideal_bucket_span)
1522  {
1523  const size_type other_hash = hasher()(*blockstart);
1524  if ((other_hash == hash) && key_equal()(*blockstart,value))
1525  {
1526  destp = blockstart;
1527  return false;
1528  }
1529  in_ideal_bucket_span = ((other_hash%nbuckets) == ideal_bucket);
1530  }
1531  if (fail_on_equal && check_realloc && !checked_realloc)
1532  {
1533  if (!in_ideal_bucket_span)
1534  {
1535  // We're always adding one, since items can't be equal outside
1536  // the equal ideal_bucket span. We only need to check for
1537  // reallocation once, since if there's at least one space free,
1538  // blockstart can't end up equalling pstart.
1539  size_type new_size = *((const size_type*)pend) + 1;
1540  if (nbuckets < minBuckets(new_size))
1541  {
1542  bumpNumBuckets(new_size);
1543  // Delegate to version that doesn't check for realloc.
1544  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1545  }
1546  // We're not reallocating in this case.
1547  checked_realloc = true;
1548  }
1549  // It can happen that blockstart is pstart iff the entire
1550  // array is full, and since we've checked all items with equal hash
1551  // in that case, we know that we'll be inserting, so we need to reallocate.
1552  else if (blockstart == pstart)
1553  {
1554  size_type new_size = *((const size_type*)pend) + 1;
1555  bumpNumBuckets(new_size);
1556  // Delegate to version that doesn't check for realloc.
1557  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1558  }
1559  }
1560  UT_ASSERT_P(blockstart-1 >= pstart);
1561  }
1562 
1563  if (fail_on_equal && check_realloc && !checked_realloc)
1564  {
1565  // We're always adding one.
1566  size_type new_size = *((const size_type*)pend) + 1;
1567  if (nbuckets < minBuckets(new_size))
1568  {
1569  bumpNumBuckets(new_size);
1570  // Delegate to version that doesn't check for realloc.
1571  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1572  }
1573  // We're not reallocating in this case.
1574  }
1575 
1576  // Move items earlier by one.
1577  while (blockstart != insertp)
1578  {
1579  *(blockstart-1) = std::move(*blockstart);
1580  ++blockstart;
1581  }
1582  destp = insertp-1;
1583  return true;
1584  }
1585 
1586 protected:
1589 };
1590 
1591 template<
1592  typename Key,
1593  bool MULTI,
1594  std::size_t MAX_LOAD_FACTOR_256,
1595  typename Clearer,
1596  class Hash,
1597  class KeyEqual
1598 >
1599 struct DefaultClearer<ArraySet<Key,MULTI,MAX_LOAD_FACTOR_256,Clearer,Hash,KeyEqual> >
1600 {
1603  static void clear(TheType&v) { v.destroy(); }
1605  static bool isClear(TheType &v) { return v.bucket_count() == 0; }
1607  static void clearConstruct(TheType *p) { new((void *)p) TheType(); }
1608 
1609  static const bool clearNeedsDestruction = false;
1610 };
1611 
1612 } // namespace UT
1613 
1614 template<typename Key,
1615  bool MULTI = false,
1616  int MAX_LOAD_FACTOR_256 = 128,
1617  typename Clearer = UT::DefaultClearer<Key>,
1618  class Hash = hboost::hash<Key>,
1619  class KeyEqual = std::equal_to<Key> >
1621 
1622 namespace std {
1623  template<typename Key,
1624  bool MULTI,
1625  int MAX_LOAD_FACTOR_256,
1626  typename Clearer,
1627  class Hash,
1628  class KeyEqual>
1629  void swap(
1632  {
1633  a.swap(b);
1634  }
1635 }
1636 
1637 #endif
float load_factor() const
Returns the current portion of buckets that are occupied.
Definition: UT_ArraySet.h:442
const_iterator find(const Key &key) const
Definition: UT_ArraySet.h:722
#define SYSmax(a, b)
Definition: SYS_Math.h:1538
const_pointer searchStart(const Key &key) const
Definition: UT_ArraySet.h:1330
pointer myBuckets
Definition: UT_ArraySet.h:1587
static void clearConstruct(bool *p)
Definition: UT_ArraySet.h:105
ArraySet(const set_type &that)
Definition: UT_ArraySet.h:207
ArraySet(size_type init_bucket_count)
Definition: UT_ArraySet.h:188
UT_ASSERT_COMPILETIME(BRAY_EVENT_MAXFLAGS<=32)
std::ptrdiff_t difference_type
Definition: UT_ArraySet.h:555
void clear()
Definition: UT_ArraySet.h:366
std::pair< iterator, bool > insert(const value_type &value)
Definition: UT_ArraySet.h:962
size_type size() const
Returns the number of items in the set.
Definition: UT_ArraySet.h:352
UT_API exint UTbumpAllocToPrime(exint current_size)
void setNumBuckets(size_type new_num_buckets)
Definition: UT_ArraySet.h:474
static const bool clearNeedsDestruction
Definition: UT_ArraySet.h:78
STATIC_INLINE size_t Hash(const char *s, size_t len)
Definition: farmhash.h:2038
void swap(UT::ArraySet< Key, MULTI, MAX_LOAD_FACTOR_256, Clearer, Hash, KeyEqual > &a, UT::ArraySet< Key, MULTI, MAX_LOAD_FACTOR_256, Clearer, Hash, KeyEqual > &b)
Definition: UT_ArraySet.h:1629
const GLdouble * v
Definition: glcorearb.h:837
pointer getCurrent() const
Definition: UT_ArraySet.h:624
GLsizei const GLfloat * value
Definition: glcorearb.h:824
static void clearConstruct(T *p)
Definition: UT_ArraySet.h:76
static size_type max_bucket_count()
Definition: UT_ArraySet.h:413
std::pair< iterator, bool > insert(value_type &&value)
Definition: UT_ArraySet.h:973
static void clear(S *&v)
Definition: UT_ArraySet.h:37
static void clear(bool &v)
Definition: UT_ArraySet.h:103
constexpr bool SYSisNan(const F f)
Definition: SYS_Math.h:181
set_type & operator=(set_type &&that)
Definition: UT_ArraySet.h:291
GLboolean GLboolean GLboolean GLboolean a
Definition: glcorearb.h:1222
bool empty() const
Returns true iff there are no items in the set.
Definition: UT_ArraySet.h:346
ImageBuf OIIO_API min(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
iterator end()
Returns a non-const end iterator for the set.
Definition: UT_ArraySet.h:673
iterator_t & operator=(const iterator_t &)=default
#define UT_ASSERT_MSG_P(ZZ,...)
Definition: UT_Assert.h:158
std::integral_constant< bool, std::numeric_limits< T >::is_signed||std::is_same< T, int128_t >::value > is_signed
Definition: format.h:821
static bool isClear(bool v)
Definition: UT_ArraySet.h:104
std::ptrdiff_t difference_type
Definition: UT_ArraySet.h:173
bool insertHelper(pointer pstart, size_type nbuckets, const value_type &value, pointer &destp)
Definition: UT_ArraySet.h:1337
static bool isClear(const std::pair< S0, S1 > &v)
Definition: UT_ArraySet.h:118
ArraySet< Key, MULTI, MAX_LOAD_FACTOR_256, Clearer, Hash, KeyEqual > set_type
Definition: UT_ArraySet.h:169
#define UT_ASSERT_MSG(ZZ,...)
Definition: UT_Assert.h:159
iterator erase(iterator iter)
Definition: UT_ArraySet.h:1039
void insert(INPUT_ITERATOR start_input, INPUT_ITERATOR end_input)
Inserts all of the items in the range [start_input,end_input).
Definition: UT_ArraySet.h:988
iterator_t(pointer current, const_pointer end, bool scan=true)
Definition: UT_ArraySet.h:561
std::size_t size_type
Definition: UT_ArraySet.h:172
static size_type minBuckets(size_type size)
Definition: UT_ArraySet.h:1311
reference operator*() const
Definition: UT_ArraySet.h:609
static void clearConstruct(std::pair< S0, S1 > *p)
Definition: UT_ArraySet.h:122
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:155
void destroy()
Removes all items from the set and frees all the memory.
Definition: UT_ArraySet.h:383
static bool isClear(S *v)
Definition: UT_ArraySet.h:38
ArraySet(std::initializer_list< value_type > init, size_type init_bucket_count=0)
Definition: UT_ArraySet.h:240
GLuint GLuint end
Definition: glcorearb.h:475
iterator begin()
Returns a non-const iterator for the beginning of the set.
Definition: UT_ArraySet.h:658
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
Set iterator class.
Definition: UT_ArraySet.h:551
size_type myNumBuckets
Definition: UT_ArraySet.h:1588
long long int64
Definition: SYS_Types.h:116
set_type & operator=(const set_type &that)
Definition: UT_ArraySet.h:257
set_type & operator=(std::initializer_list< value_type > init)
Definition: UT_ArraySet.h:284
static key_equal key_eq()
Definition: UT_ArraySet.h:1283
static void clearConstruct(S **p)
Definition: UT_ArraySet.h:39
void bumpNumBuckets(size_type new_num_items)
Definition: UT_ArraySet.h:536
const_iterator cbegin() const
Returns a const iterator for the beginning of the set.
Definition: UT_ArraySet.h:663
GLboolean GLboolean GLboolean b
Definition: glcorearb.h:1222
const_iterator cend() const
Returns a const end iterator for the set.
Definition: UT_ArraySet.h:679
iterator find(const Key &key)
Definition: UT_ArraySet.h:694
num_items
Definition: UT_RTreeImpl.h:672
std::conditional< constant_type, const value_type, value_type >::type & reference
Definition: UT_ArraySet.h:557
std::forward_iterator_tag iterator_category
Definition: UT_ArraySet.h:554
int64 getMemoryUsage(bool inclusive) const
Definition: UT_ArraySet.h:1290
ArraySet(set_type &&that)
Move constructor, destroying the source.
Definition: UT_ArraySet.h:197
size_type count(const Key &key) const
Definition: UT_ArraySet.h:754
GLsizeiptr size
Definition: glcorearb.h:664
bool operator!=(const set_type &that) const
Definition: UT_ArraySet.h:324
value_type & reference
Definition: UT_ArraySet.h:176
size_type bucket_size(size_type i) const
Definition: UT_ArraySet.h:421
GLenum void ** pointer
Definition: glcorearb.h:810
std::pair< iterator, bool > emplace(Args &&...args)
Definition: UT_ArraySet.h:1025
std::conditional< constant_type, const typename ArraySet::value_type, typename ArraySet::value_type >::type value_type
Definition: UT_ArraySet.h:556
SYS_FORCE_INLINE void forEach(FUNCTOR &&functor) const
Definition: UT_ArraySet.h:1299
void insert(std::initializer_list< value_type > list)
Inserts all of the items from the initializer_list.
Definition: UT_ArraySet.h:1014
const value_type * const_pointer
Definition: UT_ArraySet.h:179
const_pointer getEnd() const
Definition: UT_ArraySet.h:629
std::pair< const_iterator, const_iterator > equal_range(const Key &key) const
Definition: UT_ArraySet.h:824
static bool isClear(T v)
Definition: UT_ArraySet.h:64
ImageBuf OIIO_API max(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
iterator_t< constant_type > & operator++()
Definition: UT_ArraySet.h:592
void swap(set_type &that)
Swaps another set with this one.
Definition: UT_ArraySet.h:330
const value_type & const_reference
Definition: UT_ArraySet.h:177
pointer operator->() const
Definition: UT_ArraySet.h:614
**If you just want to fire and args
Definition: thread.h:609
static void clear(std::pair< S0, S1 > &v)
Definition: UT_ArraySet.h:113
static size_type max_size()
Definition: UT_ArraySet.h:359
void rehash(size_type new_num_buckets)
Definition: UT_ArraySet.h:450
std::pair< iterator, iterator > equal_range(const Key &key)
Definition: UT_ArraySet.h:889
static float max_load_factor()
Definition: UT_ArraySet.h:436
std::conditional< constant_type, typename set_type::const_pointer, typename set_type::pointer >::type pointer
Definition: UT_ArraySet.h:558
Definition: core.h:1131
static hasher hash_function()
Definition: UT_ArraySet.h:1275
static void clear(T &v)
Definition: UT_ArraySet.h:47
const_iterator begin() const
Returns a const iterator for the beginning of the set.
Definition: UT_ArraySet.h:668
iterator_t< false > iterator
Iterator type for iterating over non-constant elements.
Definition: UT_ArraySet.h:641
SIM_API const UT_StringHolder distance
bool operator!=(const iterator_t &that) const
Definition: UT_ArraySet.h:583
bool contains(const Key &key) const
Definition: UT_ArraySet.h:793
size_type bucket_count() const
Returns the current number of buckets.
Definition: UT_ArraySet.h:406
type
Definition: core.h:1059
KeyEqual key_equal
Definition: UT_ArraySet.h:175
pointer searchStart(const Key &key)
Definition: UT_ArraySet.h:1325
const_iterator(const iterator &non_const_it)
Definition: UT_ArraySet.h:652
bool operator==(const iterator_t &that) const
Definition: UT_ArraySet.h:578
size_type erase(const Key &key)
Definition: UT_ArraySet.h:1248
ArraySet(INPUT_ITERATOR start_input, INPUT_ITERATOR end_input, size_type init_bucket_count=0)
Inserts all of the items in the range [start_input,end_input).
Definition: UT_ArraySet.h:216
GLint GLsizei count
Definition: glcorearb.h:405
Clearer clearer_type
Definition: UT_ArraySet.h:180
const_iterator end() const
Returns a const end iterator for the set.
Definition: UT_ArraySet.h:685
bool operator==(const set_type &that) const
Definition: UT_ArraySet.h:301
void reserve(size_type num_items)
Definition: UT_ArraySet.h:464
value_type * pointer
Definition: UT_ArraySet.h:178