HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_ArraySet.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: UT_ArraySet.h (UT Library, C++)
7  *
8  * COMMENTS: Flat-array-based hash set implementation.
9  */
10 
11 #pragma once
12 
13 #ifndef __UT_ArraySet__
14 #define __UT_ArraySet__
15 
16 #include "UT_ArrayHelp.h"
17 #include "UT_Assert.h"
18 #include <SYS/SYS_Inline.h>
19 #include <SYS/SYS_Math.h>
20 #include <SYS/SYS_StaticAssert.h>
21 #include <SYS/SYS_Types.h>
22 
23 #include <hboost/functional/hash.hpp>
24 
25 #include <limits>
26 #include <stdint.h>
27 #include <utility>
28 
29 namespace UT {
30 
31 template<typename T>
32 struct DefaultClearer;
33 
34 template<typename S>
35 struct DefaultClearer<S*>
36 {
37  static void clear(S*&v) { v = nullptr; }
38  static bool isClear(S *v) { return v == nullptr; }
39  static void clearConstruct(S**p) { clear(*p); }
40 
41  static const bool clearNeedsDestruction = false;
42 };
43 
44 template<typename T>
46 {
47  static void clear(T &v)
48  {
49  if (std::numeric_limits<T>::has_quiet_NaN) {
50  // Use QNaN as unused floating-point marker.
51  // NOTE: All NaNs get treated as unused, even if they're not
52  // the representative QNaN.
53  v = std::numeric_limits<T>::quiet_NaN();
54  }
56  // Use min as unused signed integer marker.
58  }
59  else {
60  // Use max as unused unsigned integer marker.
62  }
63  }
64  static bool isClear(T v)
65  {
66  if (std::numeric_limits<T>::has_quiet_NaN) {
67  // NOTE: All NaNs get treated as unused, even if they're not
68  // the representative QNaN.
69  return SYSisNan(v);
70  }
72  return (v == std::numeric_limits<T>::min());
73  }
74  return (v == std::numeric_limits<T>::max());
75  }
76  static void clearConstruct(T *p) { clear(*p); }
77 
78  static const bool clearNeedsDestruction = false;
79 };
80 
81 template<> struct DefaultClearer< int8_t > : public NumericClearer< int8_t > {};
82 template<> struct DefaultClearer<uint8_t > : public NumericClearer<uint8_t > {};
83 template<> struct DefaultClearer< int16_t> : public NumericClearer< int16_t> {};
84 template<> struct DefaultClearer<uint16_t> : public NumericClearer<uint16_t> {};
85 template<> struct DefaultClearer< int32_t> : public NumericClearer< int32_t> {};
86 template<> struct DefaultClearer<uint32_t> : public NumericClearer<uint32_t> {};
87 template<> struct DefaultClearer< int64_t> : public NumericClearer< int64_t> {};
88 template<> struct DefaultClearer<uint64_t> : public NumericClearer<uint64_t> {};
89 template<> struct DefaultClearer<float> : public NumericClearer<float> {};
90 template<> struct DefaultClearer<double> : public NumericClearer<double> {};
91 template<> struct DefaultClearer<long double> : public NumericClearer<long double> {};
92 #ifdef MBSD
93 // On macOS, size_t is "unsigned long" which matches none of the above
94 template<> struct DefaultClearer<size_t> : public NumericClearer<size_t> {};
95 #endif
96 
97 /// NOTE: It may seem silly to have bool here, but things like
98 /// std::pair<T,bool> should work automatically, so they need some
99 /// default behaviour for bool.
100 template<>
101 struct DefaultClearer<bool>
102 {
103  static void clear(bool&v) { v = false; }
104  static bool isClear(bool v) { return !v; }
105  static void clearConstruct(bool *p) { clear(*p); }
106 
107  static const bool clearNeedsDestruction = false;
108 };
109 
110 template<typename S0,typename S1>
111 struct DefaultClearer<std::pair<S0,S1> >
112 {
113  static void clear(std::pair<S0,S1> &v)
114  {
115  DefaultClearer<S0>::clear(v.first);
116  DefaultClearer<S1>::clear(v.second);
117  }
118  static bool isClear(const std::pair<S0,S1> &v)
119  {
120  return DefaultClearer<S0>::isClear(v.first) && DefaultClearer<S1>::isClear(v.second);
121  }
122  static void clearConstruct(std::pair<S0,S1>*p) {
125  }
127 };
128 
129 /// This is close to a drop-in replacement for std::unordered_set,
130 /// except that it uses a single array of items and empty spaces
131 /// marked with a dedicated "clear" value.
132 /// It also has a fixed maximum load factor, and doesn't store a
133 /// hasher, comparator, or allocator object as member data,
134 /// avoiding unnecessary overhead, but these differences introduce
135 /// some interface incompatibilities.
136 ///
137 /// The incompatibility that will probably be encountered most often
138 /// is the lack of time guarantees. This structure is more efficient in
139 /// many common cases, due to better memory coherency and fewer
140 /// memory allocations, but cannot guarantee as good time scaling in
141 /// worst or even average case as std::unordered_set guarantees for most
142 /// functions.
143 ///
144 /// Because there is only space for one item in each bucket, if a collision
145 /// is hit on insertion, the structure will scan forward until a "clear"
146 /// bucket is found. However, items in a contiguous block will always
147 /// be sorted in the order of their ideal bucket numbers.
148 /// If the end of the array is reached without finding a clear bucket,
149 /// instead of the obvious approach of wrapping to the beginning of the
150 /// array, the block will be shifted backward, so some items in this
151 /// block at the end may be earlier than their ideal bucket number.
152 /// This is about as complicated as wrapping, or not sorting, but avoids
153 /// an awkward issue when erasing while iterating where an item might get
154 /// visited multiple times due to the wrapping, or items might be missed
155 /// due to the lack of order. Even still, unlike
156 /// std::unordered_set, erase may invalidate other iterators and references.
157 /// Also, insert may invalidate other iterators and references.
158 template<
159  typename Key,
160  bool MULTI=false,
161  std::size_t MAX_LOAD_FACTOR_256=128,
162  typename Clearer=DefaultClearer<Key>,
163  class Hash=hboost::hash<Key>,
164  class KeyEqual=std::equal_to<Key>
165 >
166 class ArraySet
167 {
168 public:
170  typedef Key key_type;
171  typedef Key value_type;
172  typedef std::size_t size_type;
173  typedef std::ptrdiff_t difference_type;
174  typedef Hash hasher;
175  typedef KeyEqual key_equal;
177  typedef const value_type& const_reference;
178  typedef value_type *pointer;
179  typedef const value_type *const_pointer;
180  typedef Clearer clearer_type;
181 
183  : myBuckets(nullptr)
184  , myNumBuckets(0)
185  {
186  UT_ASSERT_COMPILETIME(MAX_LOAD_FACTOR_256 <= 256);
187  }
188  explicit ArraySet(size_type init_bucket_count)
189  : myBuckets(nullptr)
190  , myNumBuckets(0)
191  {
192  if (init_bucket_count != 0)
193  setNumBuckets(init_bucket_count);
194  }
195 
196  /// Move constructor, destroying the source.
198  : myBuckets(that.myBuckets)
199  , myNumBuckets(that.myNumBuckets)
200  {
201  that.myBuckets = nullptr;
202  that.myNumBuckets = 0;
203  }
204 
205  /// Copy constructor.
206  /// Avoid accidental copying by making the constructor explicit.
207  explicit ArraySet(const set_type &that)
208  : myBuckets(nullptr)
209  , myNumBuckets(0)
210  {
211  *this = that;
212  }
213 
214  /// Inserts all of the items in the range [start_input,end_input).
215  template<typename INPUT_ITERATOR>
216  ArraySet(INPUT_ITERATOR start_input, INPUT_ITERATOR end_input, size_type init_bucket_count = 0)
217  : myBuckets(nullptr)
218  , myNumBuckets(0)
219  {
220  // Preallocate enough buckets, instead of waiting for
221  // insert to potentially hit collisions.
222  size_type ninput_items = std::distance(start_input, end_input);
223  if (ninput_items == 0)
224  return;
225  setNumBuckets(SYSmax(init_bucket_count, minBuckets(ninput_items)));
226 
227  // Now, just insert them all, (no need to realloc)
228  for (; start_input != end_input; ++start_input)
229  {
230  pointer pdest;
231  bool success = insertHelper<!MULTI,false>(myBuckets,myNumBuckets,*start_input,pdest);
232  if (success)
233  *pdest = *start_input;
234  else
235  --ninput_items;
236  }
237  *((size_type*)(myBuckets+myNumBuckets)) = ninput_items;
238  }
239  /// Constructs a set from an initializer list, with an optional
240  /// bucket count, e.g.:
241  /// UT::ArraySet<int> some_set({5,123,500}, 20);
242  ArraySet(std::initializer_list<value_type> init, size_type init_bucket_count = 0)
243  : myBuckets(nullptr)
244  , myNumBuckets(0)
245  {
247  if (init_bucket_count == 0 || init_bucket_count < init.size()) {
248  bucket_count = minBuckets(init.size());
249  }
250  else {
251  bucket_count = init_bucket_count;
252  }
253  setNumBuckets(bucket_count);
254  for (auto &&p = init.begin(); p != init.end(); ++p) {
255  insert(*p);
256  }
257  }
258 
260  {
261  if (this == &that)
262  return *this;
263 
264  clear();
266 
267  if (!myBuckets)
268  return *this;
269 
270  size_type size = that.size();
271  *((size_type*)(myBuckets+myNumBuckets)) = size;
272 
273  if (size)
274  {
275  // Now, just copy them all, (no need to realloc)
276  const_pointer srcp = that.myBuckets;
278  for (pointer destp = myBuckets; destp != pend; ++destp, ++srcp)
279  {
280  *destp = *srcp;
281  }
282  }
283  return *this;
284  }
285 
286  set_type &operator=(std::initializer_list<value_type> init)
287  {
288  clear();
289  insert(init.begin(), init.end());
290  return *this;
291  }
292 
294  {
295  destroy();
296  myBuckets = that.myBuckets;
297  myNumBuckets = that.myNumBuckets;
298  that.myBuckets = nullptr;
299  that.myNumBuckets = 0;
300  return *this;
301  }
302 
303  bool operator==(const set_type &that) const
304  {
305  if (size() != that.size())
306  return false;
307 
308  // Check if one has no allocation (both empty)
309  if (!myBuckets || !that.myBuckets)
310  return true;
311 
312  const const_pointer pend = myBuckets + myNumBuckets;
313  for (const_pointer p = myBuckets; p != pend; ++p)
314  {
315  if (Clearer::isClear(*p))
316  continue;
317  // NOTE: Don't just use that.contains(*p), in case it doesn't check the whole item contents!
318  const_iterator it = that.find(*p);
319  if (it.atEnd())
320  return false;
321  if (*p != *it)
322  return false;
323  }
324  return true;
325  }
326  bool operator!=(const set_type &that) const
327  {
328  return !(*this == that);
329  }
330 
331  /// Swaps another set with this one
332  void swap(set_type &that)
333  {
334  pointer temp_buckets = that.myBuckets;
335  size_type temp_numbuckets = that.myNumBuckets;
336  that.myBuckets = myBuckets;
337  that.myNumBuckets = myNumBuckets;
338  myBuckets = temp_buckets;
339  myNumBuckets = temp_numbuckets;
340  }
341 
343  {
344  destroy();
345  }
346 
347  /// Returns true iff there are no items in the set
348  bool empty() const
349  {
350  return !myBuckets || (*((const size_type*)(myBuckets+myNumBuckets)) == 0);
351  }
352 
353  /// Returns the number of items in the set
354  size_type size() const
355  {
356  return myBuckets ? *(const size_type*)(myBuckets+myNumBuckets) : 0;
357  }
358 
359  /// This only exists because the standard requires it.
360  /// The only hard limit is what memory will allow.
362  {
364  }
365 
366  /// Removes all items from the set, but does not free the memory.
367  /// Call destroy() to free all the memory.
368  void clear()
369  {
370  if (!myBuckets)
371  return;
372  pointer p = myBuckets;
373  pointer pend = p+myNumBuckets;
374  if (*((const size_type*)pend) == 0)
375  return;
376  while (p != pend)
377  {
378  Clearer::clear(*p);
379  ++p;
380  }
381  *((size_type*)pend) = 0;
382  }
383 
384  /// Removes all items from the set and frees all the memory.
385  void destroy()
386  {
387  if (!myBuckets)
388  return;
389  if (!std::is_trivially_destructible<value_type>::value && (Clearer::clearNeedsDestruction || !empty()))
390  {
391  pointer p = myBuckets;
392  const_pointer pend = p+myNumBuckets;
393  while (p != pend)
394  {
395  // I'm assuming that in some cases, it's faster to check isClear than to call the destructor,
396  // even if it's only because of a better chance of inlining.
397  if (Clearer::clearNeedsDestruction || !Clearer::isClear(*p))
398  p->~value_type();
399  ++p;
400  }
401  }
402  free(myBuckets);
403  myBuckets = nullptr;
404  myNumBuckets = 0;
405  }
406 
407  /// Returns the current number of buckets.
409  {
410  return myNumBuckets;
411  }
412 
413  /// This only exists because the standard requires it.
414  /// The only hard limit is what memory will allow.
416  {
418  }
419 
420  /// This only exists because the standard requires it.
421  /// This function doesn't really make sense for this implementation,
422  /// but it returns 1 if the bucket is occupied, and 0 if it is not.
424  {
425  if (i >= myNumBuckets) {
426  return 0;
427  }
428  if (Clearer::isClear(myBuckets[i])) {
429  return 0;
430  }
431  return 1;
432  }
433 
434  /// Returns the portion of buckets that need to be occupied before
435  /// reallocation occurs.
436  /// Unlike in the standard, the maximum load factor is a constant
437  /// in this implementation, so max_load_factor() is a static function.
438  static float max_load_factor()
439  {
440  return float(MAX_LOAD_FACTOR_256)/256;
441  }
442 
443  /// Returns the current portion of buckets that are occupied.
444  float load_factor() const
445  {
446  return float(size())/float(myNumBuckets);
447  }
448 
449  /// Sets the number of buckets to larger of new_num_buckets and the
450  /// required minimum number of buckets for size() items, unless
451  /// that number is the same as the current number of buckets.
452  void rehash(size_type new_num_buckets)
453  {
454  size_type min_buckets = minBuckets(size());
455  if (new_num_buckets < min_buckets) {
456  new_num_buckets = min_buckets;
457  }
458  if (myNumBuckets == new_num_buckets) {
459  return;
460  }
461  setNumBuckets(min_buckets);
462  }
463 
464  /// Sets the number of buckets to the required minimum number of buckets
465  /// for the larger of num_items and size().
467  {
468  size_type min_buckets = minBuckets(SYSmax(size(),num_items));
469  setNumBuckets(min_buckets);
470  }
471 
472  /// Sets the number of buckets to new_num_buckets, regardless of the
473  /// required minimum number of buckets for size() items, though
474  /// you probably shouldn't be calling it with num_new_buckets < size(),
475  /// except that if num_new_buclets==0, this calls destroy().
476  void setNumBuckets(size_type new_num_buckets)
477  {
478  if (new_num_buckets == myNumBuckets)
479  return;
480  if (new_num_buckets == 0)
481  {
482  // Assume that the caller wants to delete everything
483  destroy();
484  return;
485  }
486 
487  size_type old_size = size();
488  if (new_num_buckets < old_size)
489  {
490  UT_ASSERT_MSG(0,"What does the caller expect to happen when setting the number of buckets lower than the size, but not to zero?\nThis class doesn't support multiple items per bucket.");
491  new_num_buckets = old_size;
492  }
493 
494  // NOTE: I'm assuming that the caller, in explicitly calling
495  // setNumBuckets, doesn't want to be limited by MAX_LOAD_FACTOR_256,
496  // so don't check against it.
497 
498  pointer new_buckets = (pointer)malloc(new_num_buckets*sizeof(value_type) + sizeof(size_type));
499  pointer new_end = new_buckets + new_num_buckets;
500  for (pointer p = new_buckets; p < new_end; ++p)
501  {
502  Clearer::clearConstruct(p);
503  }
504 
505  if (old_size)
506  {
507  // Move items over and clear them from myBuckets
509  for (pointer srcp = myBuckets; srcp < pend; ++srcp)
510  {
511  if (!Clearer::isClear(*srcp))
512  {
513  value_type v;
514  Clearer::clear(v);
515  // Need to make sure that *p is set to clear,
516  // as opposed to whatever std::move leaves it as,
517  // so swap with clear.
518  std::swap(*srcp, v);
519  pointer destp;
520  insertHelper<false,false>(new_buckets, new_num_buckets, v, destp);
521  *destp = std::move(v);
522  }
523  }
524 
525  // Temporarily set the size to 0, so that destroy() won't re-destruct them.
526  *((size_type*)pend) = 0;
527  }
528  // Don't worry; this will just destruct the current buckets
529  destroy();
530 
531  myBuckets = new_buckets;
532  myNumBuckets = new_num_buckets;
533  *((size_type*)new_end) = old_size;
534  }
535 
536  /// This increases the number of buckets, if needed, for
537  /// the specified number of items
538  void bumpNumBuckets(size_type new_num_items)
539  {
540  size_type num_buckets = minBuckets(new_num_items);
541  if (num_buckets <= myNumBuckets)
542  return;
543  // Although UT_Array normally bumps up by about 12.5% each time,
544  // and so does UTbumpAllocToPrime, to reduce significant rehashing
545  // overhead, we bump to at least twice the previous size.
546  num_buckets = SYSmax(num_buckets, 2*myNumBuckets);
547  num_buckets = UTbumpAllocToPrime(num_buckets);
548  setNumBuckets(num_buckets);
549  }
550 
551  /// Set iterator class
552  template<bool constant_type>
554  {
555  public:
556  typedef std::forward_iterator_tag iterator_category;
557  typedef std::ptrdiff_t difference_type;
561 
562  iterator_t() = default;
563  iterator_t(pointer current, const_pointer end, bool scan=true)
564  : myCurrent(current)
565  , myEnd(end)
566  {
567  UT_ASSERT_MSG_P(scan || current == end || !Clearer::isClear(*current), "An iterator can't point to a clear item!");
568  if (scan)
569  {
570  while (myCurrent != myEnd && Clearer::isClear(*myCurrent))
571  {
572  ++myCurrent;
573  }
574  }
575  }
576  iterator_t(const iterator_t &) = default;
577  iterator_t(iterator_t &&) = default;
578  iterator_t &operator=(const iterator_t &) = default;
579  iterator_t &operator=(iterator_t &&) = default;
580  bool operator==(const iterator_t &that) const
581  {
582  UT_ASSERT_MSG_P(myEnd == that.myEnd, "Probably unintentional: Comparing against iterator from different set or iterator invalidated by reallocation!");
583  return (myCurrent == that.myCurrent);
584  }
585  bool operator!=(const iterator_t &that) const
586  {
587  UT_ASSERT_MSG_P(myEnd == that.myEnd, "Probably unintentional: Comparing against iterator from different set or iterator invalidated by reallocation!");
588  return (myCurrent != that.myCurrent);
589  }
590  bool atEnd() const
591  {
592  return myCurrent == myEnd;
593  }
595  {
596  UT_ASSERT_MSG_P(myCurrent < myEnd, "Incrementing end iterator!");
597  if (myCurrent == myEnd) {
598  return *this;
599  }
600  do {
601  ++myCurrent;
602  } while (myCurrent != myEnd && Clearer::isClear(*myCurrent));
603  return *this;
604  }
605 
606  // This is intentionally not implemented to force its non-use but we
607  // keep it here just in case there's weird C++ rules on iterator
608  // requirements
610 
612  {
613  UT_ASSERT_MSG_P(myCurrent < myEnd, "Dereferencing end iterator!");
614  return *myCurrent;
615  }
617  {
618  UT_ASSERT_MSG_P(myCurrent < myEnd, "Dereferencing end iterator!");
619  return myCurrent;
620  }
621  operator pointer() const
622  {
623  UT_ASSERT_MSG_P(myCurrent <= myEnd, "Dereferencing iterator past end!");
624  return myCurrent;
625  }
627  {
628  UT_ASSERT_MSG_P(myCurrent <= myEnd, "Dereferencing iterator past end!");
629  return myCurrent;
630  }
632  {
633  return myEnd;
634  }
635  private:
636  friend class iterator_t<!constant_type>;
637 
638  pointer myCurrent;
639  const_pointer myEnd;
640  };
641 
642  /// Iterator type for iterating over non-constant elements.
643  typedef iterator_t<false> iterator;
644 
645  /// Iterator type for iterating over constant elements.
646  /// It may be useful to convert an iterator to a const_iterator,
647  /// so making this a subclass lets us add a constructor for that.
648  class const_iterator : public iterator_t<true>
649  {
650  public:
652 
653  const_iterator() = default;
654  const_iterator(const iterator &non_const_it)
655  : iterator_t<true>(non_const_it.getCurrent(), non_const_it.getEnd(), false)
656  {}
657  };
658 
659  /// Returns a non-const iterator for the beginning of the set.
661  {
663  }
664  /// Returns a const iterator for the beginning of the set.
665  const_iterator cbegin() const
666  {
667  return const_iterator(myBuckets,myBuckets+myNumBuckets,true);
668  }
669  /// Returns a const iterator for the beginning of the set.
670  const_iterator begin() const
671  {
672  return const_iterator(myBuckets,myBuckets+myNumBuckets,true);
673  }
674  /// Returns a non-const end iterator for the set.
676  {
678  return iterator(pend,pend,false);
679  }
680  /// Returns a const end iterator for the set.
681  const_iterator cend() const
682  {
684  return const_iterator(pend,pend,false);
685  }
686  /// Returns a const end iterator for the set.
687  const_iterator end() const
688  {
690  return const_iterator(pend,pend,false);
691  }
692 
693  /// Returns an iterator to the first item matching key,
694  /// or an end iterator if no items match key.
695  /// @{
696  iterator find(const Key &key)
697  {
698  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
699 
700  const pointer pstart = myBuckets;
701  if (!pstart)
702  return iterator(nullptr,nullptr,false);
703 
704  const pointer pend = pstart + myNumBuckets;
705 
706  const pointer search_start = searchStart(key);
707  for (pointer p = search_start; p != pend; ++p)
708  {
709  if (key_equal()(key,*p))
710  return iterator(p,pend,false);
711  if (Clearer::isClear(*p))
712  return iterator(pend,pend,false);
713  }
714  // Hit the end, so search back from before the start of the block.
715  for (pointer p = search_start-1; p >= pstart; --p)
716  {
717  if (key_equal()(key,*p))
718  return iterator(p,pend,false);
719  if (Clearer::isClear(*p))
720  return iterator(pend,pend,false);
721  }
722  return iterator(pend,pend,false);
723  }
724  const_iterator find(const Key &key) const
725  {
726  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
727 
728  const const_pointer pstart = myBuckets;
729  if (!pstart)
730  return const_iterator(nullptr,nullptr,false);
731 
732  const const_pointer pend = pstart + myNumBuckets;
733 
734  const_pointer search_start = searchStart(key);
735  for (const_pointer p = search_start; p != pend; ++p)
736  {
737  if (key_equal()(key,*p))
738  return const_iterator(p,pend,false);
739  if (Clearer::isClear(*p))
740  return const_iterator(pend,pend,false);
741  }
742  // Hit the end, so search back from before the start of the block.
743  for (const_pointer p = search_start-1; p >= pstart; --p)
744  {
745  if (key_equal()(key,*p))
746  return const_iterator(p,pend,false);
747  if (Clearer::isClear(*p))
748  return const_iterator(pend,pend,false);
749  }
750  return const_iterator(pend,pend,false);
751  }
752  /// @}
753 
754  /// Returns the number of entries matching key.
755  /// If MULTI is false, this will only return either 0 or 1.
756  size_type count(const Key &key) const
757  {
758  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
759 
760  if (!myBuckets)
761  return 0;
762 
763  const const_pointer pstart = myBuckets;
764  const const_pointer pend = pstart + myNumBuckets;
765  const_pointer search_start = searchStart(key);
766  size_type count = 0;
767  for (const_pointer p = search_start; p != pend; ++p)
768  {
769  if (key_equal()(key,*p))
770  {
771  if (!MULTI)
772  return 1;
773  ++count;
774  }
775  else if (Clearer::isClear(*p))
776  return MULTI ? count : 0;
777  }
778  // Hit the end, so search back from before the start of the block.
779  for (const_pointer p = search_start-1; p >= pstart; --p)
780  {
781  if (key_equal()(key,*p))
782  {
783  if (!MULTI)
784  return 1;
785  ++count;
786  }
787  else if (Clearer::isClear(*p))
788  return MULTI ? count : 0;
789  }
790  return MULTI ? count : 0;
791  }
792 
793  /// Returns true iff the set contains the given key.
794  /// This should be faster than count() if MULTI is true.
795  bool contains(const Key &key) const
796  {
797  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
798 
799  if (!myBuckets)
800  return false;
801 
802  const const_pointer pstart = myBuckets;
803  const const_pointer pend = pstart + myNumBuckets;
804  const_pointer search_start = searchStart(key);
805  for (const_pointer p = search_start; p != pend; ++p)
806  {
807  if (key_equal()(key,*p))
808  return true;
809  if (Clearer::isClear(*p))
810  return false;
811  }
812  // Hit the end, so search back from before the start of the block.
813  for (const_pointer p = search_start-1; p >= pstart; --p)
814  {
815  if (key_equal()(key,*p))
816  return true;
817  if (Clearer::isClear(*p))
818  return false;
819  }
820  return false;
821  }
822 
823  /// Returns a pair of iterators representing the range of values matching
824  /// key, as [first,second).
825  /// @{
826  std::pair<const_iterator,const_iterator> equal_range(const Key &key) const
827  {
828  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
829 
830  const const_pointer pstart = myBuckets;
831  const const_pointer pend = pstart + myNumBuckets;
832  if (!pstart)
833  return std::make_pair(const_iterator(nullptr,nullptr,false), const_iterator(nullptr,nullptr,false));
834 
835  const const_pointer search_start = searchStart(key);
836  if (!MULTI)
837  {
838  for (const_pointer p = search_start; p != pend; ++p)
839  {
840  if (key_equal()(key,*p))
841  return std::make_pair(const_iterator(p,pend,false), const_iterator(p+1,pend,true));
842  if (Clearer::isClear(*p))
843  return std::make_pair(const_iterator(pend,pend,false), const_iterator(pend,pend,false));
844  }
845  // Hit the end, so search back from before the start of the block.
846  for (const_pointer p = search_start-1; p >= pstart; --p)
847  {
848  if (key_equal()(key,*p))
849  return std::make_pair(const_iterator(p,pend,false), const_iterator(p+1,pend,true));
850  if (Clearer::isClear(*p))
851  return std::make_pair(const_iterator(pend,pend,false), const_iterator(pend,pend,false));
852  }
853  return std::make_pair(const_iterator(pend,pend,false), const_iterator(pend,pend,false));
854  }
855 
856  // MULTI is true, so we might have multiple matches
857  const_pointer first_found = nullptr;
858  const_pointer end_found = pend;
859  for (const_pointer p = search_start; p != pend; ++p)
860  {
861  if (key_equal()(key,*p))
862  {
863  if (!first_found)
864  first_found = p;
865  }
866  else if (first_found)
867  {
868  if (first_found != search_start)
869  return std::make_pair(const_iterator(first_found,pend,false), const_iterator(p,pend,true));
870 
871  end_found = p;
872  break;
873  }
874  else if (Clearer::isClear(*p))
875  {
876  // NOTE: first_found must be false
877  return std::make_pair(const_iterator(pend,pend,false), const_iterator(pend,pend,false));
878  }
879  }
880 
881  // Hit the end, so search back from before the start of the block.
882  for (const_pointer p = search_start-1; p >= pstart; --p)
883  {
884  if (!key_equal()(key,*p))
885  {
886  return std::make_pair(const_iterator(p+1,pend,false), const_iterator(end_found,pend,true));
887  }
888  }
889  return std::make_pair(const_iterator(pstart,pend,false), const_iterator(end_found,pend,true));
890  }
891  std::pair<iterator,iterator> equal_range(const Key &key)
892  {
893  UT_ASSERT_MSG_P(!Clearer::isClear(key),"Don't query a clear key!");
894 
895  const pointer pstart = myBuckets;
896  const pointer pend = pstart + myNumBuckets;
897  if (!pstart)
898  return std::make_pair(iterator(nullptr,nullptr,false), iterator(nullptr,nullptr,false));
899 
900  const pointer search_start = searchStart(key);
901  if (!MULTI)
902  {
903  for (pointer p = search_start; p != pend; ++p)
904  {
905  if (key_equal()(key,*p))
906  return std::make_pair(iterator(p,pend,false), iterator(p+1,pend,true));
907  if (Clearer::isClear(*p))
908  return std::make_pair(iterator(pend,pend,false), iterator(pend,pend,false));
909  }
910  // Hit the end, so search back from before the start of the block.
911  for (pointer p = search_start-1; p >= pstart; --p)
912  {
913  if (key_equal()(key,*p))
914  return std::make_pair(iterator(p,pend,false), iterator(p+1,pend,true));
915  if (Clearer::isClear(*p))
916  return std::make_pair(iterator(pend,pend,false), iterator(pend,pend,false));
917  }
918  return std::make_pair(iterator(pend,pend,false), iterator(pend,pend,false));
919  }
920 
921  // MULTI is true, so we might have multiple matches
922  pointer first_found = nullptr;
923  pointer end_found = pend;
924  for (pointer p = search_start; p != pend; ++p)
925  {
926  if (key_equal()(key,*p))
927  {
928  if (!first_found)
929  first_found = p;
930  }
931  else if (first_found)
932  {
933  if (first_found != search_start)
934  return std::make_pair(iterator(first_found,pend,false), iterator(p,pend,true));
935 
936  end_found = p;
937  break;
938  }
939  else if (Clearer::isClear(*p))
940  {
941  // NOTE: first_found must be false
942  return std::make_pair(iterator(pend,pend,false), iterator(pend,pend,false));
943  }
944  }
945 
946  // Hit the end, so search back from before the start of the block.
947  for (pointer p = search_start-1; p >= pstart; --p)
948  {
949  if (!key_equal()(key,*p))
950  {
951  return std::make_pair(iterator(p+1,pend,false), iterator(end_found,pend,true));
952  }
953  }
954  return std::make_pair(iterator(pstart,pend,false), iterator(end_found,pend,true));
955  }
956  /// @}
957 
958  /// Inserts the given value into the set, unless MULTI is false and
959  /// there's already an item for which key_equal()(value,other)
960  /// returns true. Returns a pair of iterator to the inserted value
961  /// (or the existing item if not inserted), and a bool that's true
962  /// iff the item was inserted.
963  /// @{
964  std::pair<iterator, bool> insert(const value_type &value)
965  {
966  pointer p;
967  bool success = insertHelper(myBuckets, myNumBuckets, value, p);
968  if (success)
969  {
970  *p = value;
972  }
973  return std::make_pair(iterator(p,myBuckets+myNumBuckets,false),success);
974  }
975  std::pair<iterator, bool> insert(value_type &&value)
976  {
977  pointer p;
978  bool success = insertHelper(myBuckets, myNumBuckets, value, p);
979  if (success)
980  {
981  *p = std::move(value);
983  }
984  return std::make_pair(iterator(p,myBuckets+myNumBuckets,false),success);
985  }
986  /// @}
987 
988  /// Inserts all of the items in the range [start_input,end_input).
989  template<typename INPUT_ITERATOR>
990  void insert(INPUT_ITERATOR start_input, INPUT_ITERATOR end_input)
991  {
992  // Preallocate enough buckets, instead of waiting for
993  // insert to potentially hit collisions.
994  size_type ninput_items = std::distance(start_input, end_input);
995  if (!ninput_items)
996  return;
997 
998  size_type new_size = size() + ninput_items;
999  if (myNumBuckets < minBuckets(new_size))
1000  bumpNumBuckets(new_size);
1001 
1002  // Now, just insert them all, (no need to realloc)
1003  for (; start_input != end_input; ++start_input)
1004  {
1005  pointer pdest;
1006  bool success = insertHelper<!MULTI,false>(myBuckets,myNumBuckets,*start_input,pdest);
1007  if (success)
1008  *pdest = *start_input;
1009  else
1010  --new_size;
1011 
1012  }
1013  *((size_type*)(myBuckets+myNumBuckets)) = new_size;
1014  }
1015  /// Inserts all of the items from the initializer_list.
1016  void insert(std::initializer_list<value_type> list)
1017  {
1018  // Just delegate to the iterator range insert function.
1019  insert(list.begin(),list.end());
1020  }
1021 
1022  /// Inserts an element constructed with the given arguments.
1023  /// Maybe the standard implementation can somehow get a speedup
1024  /// from this, but we need the item's hash before inserting,
1025  /// so it needs to be constructed first.
1026  template <typename... Args>
1027  std::pair<iterator,bool> emplace(Args&&... args)
1028  {
1029  return insert(value_type(std::forward<Args>(args)...));
1030  }
1031 
1032  /// Removes the item at the specified iterator location,
1033  /// returning an iterator to the next item.
1034  ///
1035  /// Unlike std::unordered_set::erase(const_iterator), this may
1036  /// invalidate iterators and references to other items in the
1037  /// map, if they are in a contiguous block of non-empty items
1038  /// with the item that iter points to. Also, this only
1039  /// accepts an iterator, instead of a const_iterator.
1040  /// (Why would std::unordered_set::erase accept a const_iterator?)
1042  {
1043  const pointer pstart = myBuckets;
1044  const pointer pend = pstart + myNumBuckets;
1045 
1046  const pointer p = (pointer)iter;
1047 
1048  UT_ASSERT_MSG_P(p >= pstart && p < pend && !Clearer::isClear(*p), "An iterator to erase can't point to a clear item!");
1049 
1050  // Actually erase the item
1051  Clearer::clear(*p);
1052  size_type new_size = --*((size_type*)(myBuckets+myNumBuckets));
1053 
1054  if (new_size == 0)
1055  return iterator(pend,pend,false);
1056 
1057  // Check for the common, simple case, where there's nothing after p in
1058  // the block of possible conflicts, and p isn't at the end.
1059  pointer pnext = p+1;
1060  bool end_case = (pnext == pend);
1061  if (!end_case)
1062  {
1063  if (Clearer::isClear(*pnext))
1064  return iterator(pnext,pend,true);
1065 
1066  // Now, for the hard case... -_-
1067 
1068  // If we're not right at the end of the array already,
1069  // find the end of the block, and check if it's the end of the array.
1070  // pnext is already clear, else we'd have returned above.
1071  do
1072  {
1073  ++pnext;
1074  end_case = (pnext == pend);
1075  } while (!end_case && !Clearer::isClear(*pnext));
1076  }
1077 
1078  if (end_case && (p != pstart) && !Clearer::isClear(*(p-1)))
1079  {
1080  // If this is a block right at the end of the array, there may be earlier
1081  // items that are too early, in which case, we need to shift
1082  // everything after that down one, into the empty spot.
1083 
1084  // Find the start of the block.
1085  pointer pprev = p-1;
1086  while (pprev != pstart && !Clearer::isClear(*(pprev-1)))
1087  {
1088  --pprev;
1089  }
1090 
1091  // Find the earliest item that should go at or after
1092  // its current bucket.
1093  do
1094  {
1095  // Check where this item ideally belongs.
1096  size_type hash = hasher()(*pprev);
1097  size_type ideal_bucket_num = hash%myNumBuckets;
1098  if (ideal_bucket_num > (pprev-pstart))
1099  {
1100  // Shift everything later by one and return.
1101  for (pointer pdest = p; pdest != pprev; --pdest)
1102  {
1103  *pdest = std::move(*(pdest-1));
1104  }
1105  Clearer::clear(*pprev);
1106  return iterator(p+1,pend,true);
1107  }
1108 
1109  ++pprev;
1110  } while (pprev != p);
1111  }
1112 
1113  if (p+1 != pend)
1114  {
1115  // We're not at the end of the array, or there were no items before
1116  // this block to move into the empty space, but there is at least
1117  // one item after p, so shift items backward until there's one
1118  // that doesn't belong earlier than it is now.
1119 
1120  size_type empty_bucket_num = p-pstart;
1121 
1122  // NOTE: pnext points to the end of this block.
1123  for (pointer pdest = p; pdest+1 != pnext; ++pdest)
1124  {
1125  // Check where this item ideally belongs.
1126  size_type hash = hasher()(*(pdest+1));
1127  size_type ideal_bucket_num = hash%myNumBuckets;
1128  if (ideal_bucket_num > empty_bucket_num)
1129  {
1130  Clearer::clear(*pdest);
1131  return iterator(p+(pdest==p),pend,false);
1132  }
1133 
1134  *pdest = std::move(*(pdest+1));
1135  ++empty_bucket_num;
1136  }
1137  Clearer::clear(*(pnext-1));
1138  // p now contains an item that was previously after it.
1139  return iterator(p,pend,false);
1140  }
1141 
1142  // p is still empty, but there's an item (or the end pointer)
1143  // right after it.
1144  return iterator(p+1,pend,false);
1145  }
1146 
1147 #if 0 // This isn't fully implemented, so it's excluded to avoid anyone accidentally using it.
1148  /// Removes all items in the range [start_iter,end_iter),
1149  /// and returns an iterator immediately following that range.
1150  ///
1151  /// Unlike std::unordered_set::erase(const_iterator,const_iterator),
1152  /// this may invalidate iterators and references to other items in the
1153  /// map, if they are in a contiguous block of non-empty items
1154  /// with the items in the erased range. Also, this only
1155  /// accepts iterator's, instead of const_iterator's.
1156  /// (Why would std::unordered_set::erase accept a const_iterator?)
1157  iterator erase(iterator start_iter, iterator end_iter) {
1158  if (start_iter == end_iter)
1159  return end_iter;
1160 
1161  const pointer pstart = myBuckets;
1162  const const_pointer pend = pstart + myNumBuckets;
1163 
1164  const pointer prange_start = (pointer)start_iter;
1165  const pointer prange_end = (pointer)end_iter;
1166 
1167  UT_ASSERT_MSG_P(prange_start >= pstart && prange_start < prange_end && prange_end <= pend && !Clearer::isClear(*prange_start), "An iterator to erase can't point to a clear item!");
1168 
1169  // Actually erase the items
1170  size_type nremoved_items = 1;
1171  size_type nitems_in_last_block = 1;
1172  Clearer::clear(*prange_start);
1173  for (pointer p = prange_start+1; p != prange_end; ++p)
1174  {
1175  if (!Clearer::isClear(*p))
1176  {
1177  Clearer::clear(*p);
1178  ++nitems_in_last_block;
1179  ++nremoved_items;
1180  }
1181  else
1182  {
1183  nitems_in_last_block = 0;
1184  }
1185  }
1186 
1187  size_type new_size = (*((size_type*)(myBuckets+myNumBuckets)) -= nremoved_items);
1188 
1189  if (new_size == 0)
1190  {
1191  // This must be pointing to the end of the array.
1192  return end_iter;
1193  }
1194 
1195  pointer block_end = prange_end;
1196 
1197  if (prange_end != pend)
1198  {
1199  // NOTE: If prange_end is not pend, it is always occupied, because
1200  // it came from an iterator, which should be dereferenceable.
1201  // However, if there was a gap between the last removed item
1202  // and prange_end, we can still exit early.
1203  if (nitems_in_last_block == 0)
1204  return end_iter;
1205 
1206  // Find the end of the block
1207  do
1208  {
1209  ++block_end;
1210  } while (block_end != pend && !Clearer::isClear(*block_end));
1211  }
1212  else {
1213  // If end_iter is at the array end, and there's at least one empty
1214  // spot in the given range, we're done, because the last block
1215  // was completely eliminated.
1216  if (nitems_in_last_block != prange_end-prange_start)
1217  return end_iter;
1218  }
1219 
1220  UT_ASSERT_P(nitems_in_last_block > 0);
1221 
1222  // If the end of the block is the array end and
1223  // nitems_in_last_block == prange_end-prange_start (no gaps),
1224  // find the beginning of the block, and if there are any items that
1225  // belong in later buckets:
1226  // 1) shift everything after to the end of the clear block.
1227  // 2) move them backward to where they belong, as available.
1228  // 3) if there is still an item in the last cleared spot, return.
1229  if (block_end == pend && nitems_in_last_block == prange_end-prange_start && prange_start != pstart && !Clearer::isClear(*(prange_start-1)))
1230  {
1231  // Find the start of the block.
1232  pointer block_start = prange_start;
1233  do
1234  {
1235  --block_start;
1236  } while (block_start != pstart && !Clearer::isClear(*(block_start-1)));
1237 
1238  UT_ASSERT_MSG(0,"FIXME: Implement this!!!");
1239  }
1240 
1241  // Move items backward until there's one that doesn't
1242  // need to be moved backward.
1243  UT_ASSERT_MSG(0,"FIXME: Implement this!!!");
1244  return end_iter; // This is probably incorrect, but just to get it to compile
1245  }
1246 #endif
1247 
1248  /// Removes all items matching key and returns
1249  /// the number of items removed.
1250  size_type erase(const Key &key)
1251  {
1252  if (!MULTI)
1253  {
1254  iterator it = find(key);
1255  if (it.atEnd())
1256  return 0;
1257  erase(it);
1258  return 1;
1259  }
1260 
1261  // NOTE: This commented out code won't work, because the 2-argument erase isn't implemented.
1262  //std::pair<iterator,iterator> its = equal_range(key);
1263  //size_type count = std::distance(its.first, its.second);
1264  //erase(its.first, its.second);
1265 
1266  size_type count = 0;
1267  for (iterator it = find(key); !it.atEnd(); it = find(key))
1268  {
1269  erase(it);
1270  }
1271  return count;
1272  }
1273 
1274  /// NOTE: std::unordered_set::hash_function() isn't static,
1275  /// but here, we're not storing a hasher as
1276  /// a data member, so it's independent of the instance.
1278  {
1279  return hasher();
1280  }
1281 
1282  /// NOTE: std::unordered_set::key_eq() isn't static,
1283  /// but here, we're not storing a key_equal as
1284  /// a data member, so it's independent of the instance.
1286  {
1287  return key_equal();
1288  }
1289 
1290  /// Returns the amount of memory used by this, excluding any memory that might be
1291  /// allocated by any of the items, themselves.
1292  int64 getMemoryUsage(bool inclusive) const
1293  {
1294  int64 mem = inclusive ? sizeof(*this) : 0;
1295  mem += sizeof(value_type)*myNumBuckets;
1296  return mem;
1297  }
1298 
1299  template<typename FUNCTOR>
1301  void forEach(FUNCTOR &&functor) const
1302  {
1304  const const_pointer pend = p + myNumBuckets;
1305  for (; p != pend; ++p)
1306  {
1307  if (!Clearer::isClear(*p))
1308  functor(*p);
1309  }
1310  }
1311 
1312 protected:
1314  {
1315  // If there are only 0-2 elements, there might as well only be
1316  // that many buckets.
1317  if (size <= 2)
1318  return size;
1319 
1320  // Simpler case if MAX_LOAD_FACTOR_256 evenly divides 256
1321  if ((256%MAX_LOAD_FACTOR_256) == 0)
1322  // +1 because otherwise it's guaranteed to be composite.
1323  return size * (256/MAX_LOAD_FACTOR_256) + 1;
1324 
1325  return (size*256 + MAX_LOAD_FACTOR_256-1)/MAX_LOAD_FACTOR_256;
1326  }
1327  pointer searchStart(const Key &key)
1328  {
1329  const size_type hash = hasher()(key);
1330  return myBuckets + (hash%myNumBuckets);
1331  }
1332  const_pointer searchStart(const Key &key) const
1333  {
1334  const size_type hash = hasher()(key);
1335  return myBuckets + (hash%myNumBuckets);
1336  }
1337 
1338  template<bool fail_on_equal=!MULTI,bool check_realloc=true>
1339  bool insertHelper(pointer pstart, size_type nbuckets, const value_type &value,pointer &destp)
1340  {
1341  UT_ASSERT_P(!Clearer::isClear(value));
1342 
1343  if (check_realloc && nbuckets == 0)
1344  {
1345  setNumBuckets(1);
1346  destp = myBuckets;
1347  return true;
1348  }
1349 
1350  const pointer pend = pstart + nbuckets;
1351  const size_type hash = hasher()(value);
1352  const size_type ideal_bucket = (hash%nbuckets);
1353  pointer init_searchp = pstart + ideal_bucket;
1354  pointer searchp = init_searchp;
1355 
1356  if (Clearer::isClear(*searchp))
1357  {
1358  // Don't bother checking MAX_LOAD_FACTOR_256 if exact match is clear,
1359  // since there's no harm in perfect hashes... unless it fills up and
1360  // people do a lot of querying keys that aren't in the set.
1361  // If that ends up being a bottleneck, feel free to change this.
1362  destp = searchp;
1363  return true;
1364  }
1365 
1366  if (fail_on_equal && key_equal()(*searchp,value))
1367  {
1368  destp = searchp;
1369  return false;
1370  }
1371 
1372  if (!fail_on_equal && check_realloc)
1373  {
1374  // We're always adding one.
1375  size_type new_size = size() + 1;
1376  if (nbuckets < minBuckets(new_size)) {
1377  bumpNumBuckets(new_size);
1378  // Delegate to version that doesn't check for realloc.
1379  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1380  }
1381  // We're not reallocating in this case.
1382  }
1383 
1384  // Find the place where our ideal bucket is less than the next
1385  // item's ideal bucket, i.e. the end of where collisions on ideal_bucket go,
1386  // in case there are multiple collision spans that have written past ideal_bucket.
1387  if ((hasher()(*searchp)%nbuckets) <= ideal_bucket)
1388  {
1389  while (true)
1390  {
1391  ++searchp;
1392  if (searchp == pend || Clearer::isClear(*searchp))
1393  break;
1394 
1395  const size_type other_hash = hasher()(*searchp);
1396  if (fail_on_equal && other_hash==hash && key_equal()(*searchp,value))
1397  {
1398  destp = searchp;
1399  return false;
1400  }
1401  if (ideal_bucket < (other_hash%nbuckets))
1402  break;
1403  }
1404  }
1405  // Known:
1406  // [init_searchp, searchp] does not contain value.
1407  // searchp is one of:
1408  // Clear
1409  // pend
1410  // idealbucket(searchp) > idealbucket(value)
1411 
1412  pointer insertp = searchp;
1413 
1414  // Find the end of the block of present items, all of
1415  // which should have idealbuckets greater than us.
1416  while (searchp != pend && !Clearer::isClear(*searchp))
1417  {
1418  ++searchp;
1419  }
1420 
1421  if (searchp != pend)
1422  {
1423  // Common case: block end is not the array end.
1424  // Implies searchp is Clear.
1425  // Since we didn't hit the end, this occupied block
1426  // was never bumped back, so we don't have to look
1427  // back for matches or proper ideal bucket span.
1428  // We can easily make room by moving everything in
1429  // [insertp, searchp) -> [insertp+1, searchp+1)
1430  // so we can write to [insertp].
1431 
1432  if (fail_on_equal && check_realloc)
1433  {
1434  // We're always adding one.
1435  size_type new_size = *((const size_type*)pend) + 1;
1436  if (nbuckets < minBuckets(new_size))
1437  {
1438  bumpNumBuckets(new_size);
1439  // Delegate to version that doesn't check for realloc.
1440  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1441  }
1442  // We're not reallocating in this case.
1443  }
1444 
1445  // Move items later by one.
1446  // std::move [insertp, searchp) to [insertp+1, searchp+1)
1447  while (searchp != insertp)
1448  {
1449  *searchp = std::move(*(searchp-1));
1450  --searchp;
1451  }
1452  destp = insertp;
1453  return true;
1454  }
1455 
1456  // Less common case: block end is the array end.
1457  // insertp may actually need to go earlier because we may
1458  // not be pointing to the end of our ideal block span.
1459  // We want insertp to refer to a location that we can update
1460  // with value and preserve the ordering of idealbuckets.
1461  if (insertp == init_searchp)
1462  {
1463  // There should always be a space somewhere if !fail_on_equal,
1464  // because reallocation was handled above.
1465  UT_ASSERT_P(insertp-1 >= pstart || (fail_on_equal && insertp == pstart));
1466  while ((!fail_on_equal || insertp != pstart) && !Clearer::isClear(*(insertp-1)))
1467  {
1468  const size_type other_hash = hasher()(*(insertp-1));
1469  if (fail_on_equal && other_hash == hash && key_equal()(*(insertp-1),value))
1470  {
1471  destp = insertp-1;
1472  return false;
1473  }
1474 
1475  // Exiting at <= rather than < means that we may be in
1476  // middle of an ideal block span.
1477  if ((other_hash%nbuckets) <= ideal_bucket)
1478  break;
1479  --insertp;
1480  UT_ASSERT_P(insertp-1 >= pstart || (fail_on_equal && insertp == pstart));
1481  }
1482  }
1483 
1484  // There are occupied buckets from init_searchp to the end,
1485  // but insertp is after init_searchp, so we don't need to look backward
1486  // ^^^^^^^^^ It isn't???
1487  // to find where to insert; we just need to shift items that
1488  // are in the range from the beginning of the occupied block to insertp
1489  // backward by 1.
1490  //
1491  // Because of the previous loop, insertp can be less than init_searchp.
1492  // In that case we want to start the blockstart at insertp. This
1493  // is both more performant, but more importantly the
1494  // in_ideal_bucket_span condition will actually make sense then.
1495  // Without this, the init_searchp might be *greater* than the
1496  // ideal bucket span and cause us to immediately stop checking.
1497 
1498  pointer blockstart = (init_searchp < insertp) ? init_searchp : insertp;
1499  if (fail_on_equal && check_realloc)
1500  {
1501  // It can happen that blockstart is pstart iff the entire
1502  // array is full, and since we've checked all items with equal hash
1503  // in that case, we know that we'll be inserting, so we need to
1504  // reallocate.
1505  if (blockstart == pstart)
1506  {
1507  size_type new_size = *((const size_type*)pend) + 1;
1508  bumpNumBuckets(new_size);
1509  // Delegate to version that doesn't check for realloc.
1510  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1511  }
1512  }
1513 
1514  // NOTE: blockstart shouldn't hit pstart, because myNumBuckets > size()
1515  UT_ASSERT_P(blockstart-1 >= pstart);
1516  bool in_ideal_bucket_span = true;
1517  bool checked_realloc = false;
1518  while (!Clearer::isClear(*(blockstart-1)))
1519  {
1520  --blockstart;
1521  // Note that we might be in the middle of a span of equal initial_bucket
1522  // values, so an equal item might be earlier than init_searchp.
1523  if (fail_on_equal && in_ideal_bucket_span)
1524  {
1525  const size_type other_hash = hasher()(*blockstart);
1526  if ((other_hash == hash) && key_equal()(*blockstart,value))
1527  {
1528  destp = blockstart;
1529  return false;
1530  }
1531  in_ideal_bucket_span = ((other_hash%nbuckets) == ideal_bucket);
1532  }
1533  if (fail_on_equal && check_realloc && !checked_realloc)
1534  {
1535  if (!in_ideal_bucket_span)
1536  {
1537  // We're always adding one, since items can't be equal outside
1538  // the equal ideal_bucket span. We only need to check for
1539  // reallocation once, since if there's at least one space free,
1540  // blockstart can't end up equalling pstart.
1541  size_type new_size = *((const size_type*)pend) + 1;
1542  if (nbuckets < minBuckets(new_size))
1543  {
1544  bumpNumBuckets(new_size);
1545  // Delegate to version that doesn't check for realloc.
1546  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1547  }
1548  // We're not reallocating in this case.
1549  checked_realloc = true;
1550  }
1551  // It can happen that blockstart is pstart iff the entire
1552  // array is full, and since we've checked all items with equal hash
1553  // in that case, we know that we'll be inserting, so we need to reallocate.
1554  else if (blockstart == pstart)
1555  {
1556  size_type new_size = *((const size_type*)pend) + 1;
1557  bumpNumBuckets(new_size);
1558  // Delegate to version that doesn't check for realloc.
1559  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1560  }
1561  }
1562  UT_ASSERT_P(blockstart-1 >= pstart);
1563  }
1564 
1565  if (fail_on_equal && check_realloc && !checked_realloc)
1566  {
1567  // We're always adding one.
1568  size_type new_size = *((const size_type*)pend) + 1;
1569  if (nbuckets < minBuckets(new_size))
1570  {
1571  bumpNumBuckets(new_size);
1572  // Delegate to version that doesn't check for realloc.
1573  return insertHelper<fail_on_equal,false>(myBuckets,myNumBuckets,value,destp);
1574  }
1575  // We're not reallocating in this case.
1576  }
1577 
1578  // Move items earlier by one.
1579  while (blockstart != insertp)
1580  {
1581  *(blockstart-1) = std::move(*blockstart);
1582  ++blockstart;
1583  }
1584  destp = insertp-1;
1585  return true;
1586  }
1587 
1588 protected:
1591 };
1592 
1593 template<
1594  typename Key,
1595  bool MULTI,
1596  std::size_t MAX_LOAD_FACTOR_256,
1597  typename Clearer,
1598  class Hash,
1599  class KeyEqual
1600 >
1601 struct DefaultClearer<ArraySet<Key,MULTI,MAX_LOAD_FACTOR_256,Clearer,Hash,KeyEqual> >
1602 {
1605  static void clear(TheType&v) { v.destroy(); }
1607  static bool isClear(TheType &v) { return v.bucket_count() == 0; }
1609  static void clearConstruct(TheType *p) { new((void *)p) TheType(); }
1610 
1611  static const bool clearNeedsDestruction = false;
1612 };
1613 
1614 } // namespace UT
1615 
1616 template<typename Key,
1617  bool MULTI = false,
1618  int MAX_LOAD_FACTOR_256 = 128,
1619  typename Clearer = UT::DefaultClearer<Key>,
1620  class Hash = hboost::hash<Key>,
1621  class KeyEqual = std::equal_to<Key> >
1623 
1624 namespace std {
1625  template<typename Key,
1626  bool MULTI,
1627  int MAX_LOAD_FACTOR_256,
1628  typename Clearer,
1629  class Hash,
1630  class KeyEqual>
1631  void swap(
1634  {
1635  a.swap(b);
1636  }
1637 }
1638 
1639 #endif
float load_factor() const
Returns the current portion of buckets that are occupied.
Definition: UT_ArraySet.h:444
const_iterator find(const Key &key) const
Definition: UT_ArraySet.h:724
#define SYSmax(a, b)
Definition: SYS_Math.h:1538
const_pointer searchStart(const Key &key) const
Definition: UT_ArraySet.h:1332
pointer myBuckets
Definition: UT_ArraySet.h:1589
static void clearConstruct(bool *p)
Definition: UT_ArraySet.h:105
ArraySet(const set_type &that)
Definition: UT_ArraySet.h:207
ArraySet(size_type init_bucket_count)
Definition: UT_ArraySet.h:188
UT_ASSERT_COMPILETIME(BRAY_EVENT_MAXFLAGS<=32)
std::ptrdiff_t difference_type
Definition: UT_ArraySet.h:557
void clear()
Definition: UT_ArraySet.h:368
std::pair< iterator, bool > insert(const value_type &value)
Definition: UT_ArraySet.h:964
size_type size() const
Returns the number of items in the set.
Definition: UT_ArraySet.h:354
UT_API exint UTbumpAllocToPrime(exint current_size)
void setNumBuckets(size_type new_num_buckets)
Definition: UT_ArraySet.h:476
static const bool clearNeedsDestruction
Definition: UT_ArraySet.h:78
STATIC_INLINE size_t Hash(const char *s, size_t len)
Definition: farmhash.h:2038
void swap(UT::ArraySet< Key, MULTI, MAX_LOAD_FACTOR_256, Clearer, Hash, KeyEqual > &a, UT::ArraySet< Key, MULTI, MAX_LOAD_FACTOR_256, Clearer, Hash, KeyEqual > &b)
Definition: UT_ArraySet.h:1631
const GLdouble * v
Definition: glcorearb.h:837
pointer getCurrent() const
Definition: UT_ArraySet.h:626
GLsizei const GLfloat * value
Definition: glcorearb.h:824
static void clearConstruct(T *p)
Definition: UT_ArraySet.h:76
static size_type max_bucket_count()
Definition: UT_ArraySet.h:415
std::pair< iterator, bool > insert(value_type &&value)
Definition: UT_ArraySet.h:975
static void clear(S *&v)
Definition: UT_ArraySet.h:37
static void clear(bool &v)
Definition: UT_ArraySet.h:103
constexpr bool SYSisNan(const F f)
Definition: SYS_Math.h:181
set_type & operator=(set_type &&that)
Definition: UT_ArraySet.h:293
GLboolean GLboolean GLboolean GLboolean a
Definition: glcorearb.h:1222
bool empty() const
Returns true iff there are no items in the set.
Definition: UT_ArraySet.h:348
ImageBuf OIIO_API min(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
iterator end()
Returns a non-const end iterator for the set.
Definition: UT_ArraySet.h:675
iterator_t & operator=(const iterator_t &)=default
#define UT_ASSERT_MSG_P(ZZ,...)
Definition: UT_Assert.h:158
std::integral_constant< bool, std::numeric_limits< T >::is_signed||std::is_same< T, int128_t >::value > is_signed
Definition: format.h:821
static bool isClear(bool v)
Definition: UT_ArraySet.h:104
std::ptrdiff_t difference_type
Definition: UT_ArraySet.h:173
bool insertHelper(pointer pstart, size_type nbuckets, const value_type &value, pointer &destp)
Definition: UT_ArraySet.h:1339
static bool isClear(const std::pair< S0, S1 > &v)
Definition: UT_ArraySet.h:118
ArraySet< Key, MULTI, MAX_LOAD_FACTOR_256, Clearer, Hash, KeyEqual > set_type
Definition: UT_ArraySet.h:169
#define UT_ASSERT_MSG(ZZ,...)
Definition: UT_Assert.h:159
iterator erase(iterator iter)
Definition: UT_ArraySet.h:1041
void insert(INPUT_ITERATOR start_input, INPUT_ITERATOR end_input)
Inserts all of the items in the range [start_input,end_input).
Definition: UT_ArraySet.h:990
iterator_t(pointer current, const_pointer end, bool scan=true)
Definition: UT_ArraySet.h:563
std::size_t size_type
Definition: UT_ArraySet.h:172
static size_type minBuckets(size_type size)
Definition: UT_ArraySet.h:1313
reference operator*() const
Definition: UT_ArraySet.h:611
static void clearConstruct(std::pair< S0, S1 > *p)
Definition: UT_ArraySet.h:122
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:155
void destroy()
Removes all items from the set and frees all the memory.
Definition: UT_ArraySet.h:385
static bool isClear(S *v)
Definition: UT_ArraySet.h:38
ArraySet(std::initializer_list< value_type > init, size_type init_bucket_count=0)
Definition: UT_ArraySet.h:242
GLuint GLuint end
Definition: glcorearb.h:475
iterator begin()
Returns a non-const iterator for the beginning of the set.
Definition: UT_ArraySet.h:660
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
Set iterator class.
Definition: UT_ArraySet.h:553
size_type myNumBuckets
Definition: UT_ArraySet.h:1590
long long int64
Definition: SYS_Types.h:116
set_type & operator=(const set_type &that)
Definition: UT_ArraySet.h:259
set_type & operator=(std::initializer_list< value_type > init)
Definition: UT_ArraySet.h:286
static key_equal key_eq()
Definition: UT_ArraySet.h:1285
static void clearConstruct(S **p)
Definition: UT_ArraySet.h:39
void bumpNumBuckets(size_type new_num_items)
Definition: UT_ArraySet.h:538
const_iterator cbegin() const
Returns a const iterator for the beginning of the set.
Definition: UT_ArraySet.h:665
GLboolean GLboolean GLboolean b
Definition: glcorearb.h:1222
const_iterator cend() const
Returns a const end iterator for the set.
Definition: UT_ArraySet.h:681
iterator find(const Key &key)
Definition: UT_ArraySet.h:696
num_items
Definition: UT_RTreeImpl.h:672
std::conditional< constant_type, const value_type, value_type >::type & reference
Definition: UT_ArraySet.h:559
std::forward_iterator_tag iterator_category
Definition: UT_ArraySet.h:556
int64 getMemoryUsage(bool inclusive) const
Definition: UT_ArraySet.h:1292
ArraySet(set_type &&that)
Move constructor, destroying the source.
Definition: UT_ArraySet.h:197
size_type count(const Key &key) const
Definition: UT_ArraySet.h:756
GLsizeiptr size
Definition: glcorearb.h:664
bool operator!=(const set_type &that) const
Definition: UT_ArraySet.h:326
value_type & reference
Definition: UT_ArraySet.h:176
size_type bucket_size(size_type i) const
Definition: UT_ArraySet.h:423
GLenum void ** pointer
Definition: glcorearb.h:810
std::pair< iterator, bool > emplace(Args &&...args)
Definition: UT_ArraySet.h:1027
std::conditional< constant_type, const typename ArraySet::value_type, typename ArraySet::value_type >::type value_type
Definition: UT_ArraySet.h:558
SYS_FORCE_INLINE void forEach(FUNCTOR &&functor) const
Definition: UT_ArraySet.h:1301
void insert(std::initializer_list< value_type > list)
Inserts all of the items from the initializer_list.
Definition: UT_ArraySet.h:1016
const value_type * const_pointer
Definition: UT_ArraySet.h:179
const_pointer getEnd() const
Definition: UT_ArraySet.h:631
std::pair< const_iterator, const_iterator > equal_range(const Key &key) const
Definition: UT_ArraySet.h:826
static bool isClear(T v)
Definition: UT_ArraySet.h:64
ImageBuf OIIO_API max(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
iterator_t< constant_type > & operator++()
Definition: UT_ArraySet.h:594
void swap(set_type &that)
Swaps another set with this one.
Definition: UT_ArraySet.h:332
const value_type & const_reference
Definition: UT_ArraySet.h:177
pointer operator->() const
Definition: UT_ArraySet.h:616
**If you just want to fire and args
Definition: thread.h:609
static void clear(std::pair< S0, S1 > &v)
Definition: UT_ArraySet.h:113
static size_type max_size()
Definition: UT_ArraySet.h:361
void rehash(size_type new_num_buckets)
Definition: UT_ArraySet.h:452
std::pair< iterator, iterator > equal_range(const Key &key)
Definition: UT_ArraySet.h:891
static float max_load_factor()
Definition: UT_ArraySet.h:438
std::conditional< constant_type, typename set_type::const_pointer, typename set_type::pointer >::type pointer
Definition: UT_ArraySet.h:560
Definition: core.h:1131
static hasher hash_function()
Definition: UT_ArraySet.h:1277
static void clear(T &v)
Definition: UT_ArraySet.h:47
const_iterator begin() const
Returns a const iterator for the beginning of the set.
Definition: UT_ArraySet.h:670
iterator_t< false > iterator
Iterator type for iterating over non-constant elements.
Definition: UT_ArraySet.h:643
SIM_API const UT_StringHolder distance
bool operator!=(const iterator_t &that) const
Definition: UT_ArraySet.h:585
bool contains(const Key &key) const
Definition: UT_ArraySet.h:795
size_type bucket_count() const
Returns the current number of buckets.
Definition: UT_ArraySet.h:408
type
Definition: core.h:1059
KeyEqual key_equal
Definition: UT_ArraySet.h:175
pointer searchStart(const Key &key)
Definition: UT_ArraySet.h:1327
const_iterator(const iterator &non_const_it)
Definition: UT_ArraySet.h:654
bool operator==(const iterator_t &that) const
Definition: UT_ArraySet.h:580
size_type erase(const Key &key)
Definition: UT_ArraySet.h:1250
ArraySet(INPUT_ITERATOR start_input, INPUT_ITERATOR end_input, size_type init_bucket_count=0)
Inserts all of the items in the range [start_input,end_input).
Definition: UT_ArraySet.h:216
GLint GLsizei count
Definition: glcorearb.h:405
Clearer clearer_type
Definition: UT_ArraySet.h:180
const_iterator end() const
Returns a const end iterator for the set.
Definition: UT_ArraySet.h:687
bool operator==(const set_type &that) const
Definition: UT_ArraySet.h:303
void reserve(size_type num_items)
Definition: UT_ArraySet.h:466
value_type * pointer
Definition: UT_ArraySet.h:178