HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
hash.h
Go to the documentation of this file.
1 //
2 // Copyright 2016 Pixar
3 //
4 // Licensed under the terms set forth in the LICENSE.txt file available at
5 // https://openusd.org/license.
6 //
7 #ifndef PXR_BASE_TF_HASH_H
8 #define PXR_BASE_TF_HASH_H
9 
10 /// \file tf/hash.h
11 /// \ingroup group_tf_String
12 
13 #include "pxr/pxr.h"
14 #include "pxr/base/tf/tf.h"
15 #include "pxr/base/tf/api.h"
16 
17 #include <cstring>
18 #include <string>
19 #include <map>
20 #include <memory>
21 #include <set>
22 #include <tuple>
23 #include <typeindex>
24 #include <type_traits>
25 #include <utility>
26 #include <vector>
27 
29 
30 // Support integers.
31 template <class HashState, class T>
33 TfHashAppend(HashState &h, T integral)
34 {
35  h.Append(integral);
36 }
37 
38 // Simple metafunction that returns an unsigned integral type given a size in
39 // bytes.
40 template <size_t Size> struct Tf_SizedUnsignedInt;
41 template <> struct Tf_SizedUnsignedInt<1> { using type = uint8_t; };
42 template <> struct Tf_SizedUnsignedInt<2> { using type = uint16_t; };
43 template <> struct Tf_SizedUnsignedInt<4> { using type = uint32_t; };
44 template <> struct Tf_SizedUnsignedInt<8> { using type = uint64_t; };
45 
46 // Support enums.
47 template <class HashState, class Enum>
49 TfHashAppend(HashState &h, Enum e)
50 {
51  h.Append(static_cast<std::underlying_type_t<Enum>>(e));
52 }
53 
54 // Support floating point.
55 template <class HashState, class T>
57 TfHashAppend(HashState &h, T fp)
58 {
59  // We want both positive and negative zero to hash the same, so we have to
60  // check against zero here.
61  typename Tf_SizedUnsignedInt<sizeof(T)>::type intbuf = 0;
62  if (fp != static_cast<T>(0)) {
63  memcpy(&intbuf, &fp, sizeof(T));
64  }
65  h.Append(intbuf);
66 }
67 
68 // Support std::pair.
69 template <class HashState, class T, class U>
70 inline void
71 TfHashAppend(HashState &h, std::pair<T, U> const &p)
72 {
73  h.Append(p.first);
74  h.Append(p.second);
75 }
76 
77 // Support std::tuple.
78 template <class HashState, class... T>
79 inline void
80 TfHashAppend(HashState &h, std::tuple<T...> const &t)
81 {
82  // XXX:
83  // This gives the same hash for a std::pair<T, U> and std::tuple<T, U>
84  // which may not be ideal in some cases. See USD-10578.
85  std::apply([&h](auto const&... v) { h.Append(v...); }, t);
86 }
87 
88 // Support std::vector. std::vector<bool> specialized below.
89 template <class HashState, class T>
90 inline void
91 TfHashAppend(HashState &h, std::vector<T> const &vec)
92 {
93  static_assert(!std::is_same_v<std::remove_cv_t<T>, bool>,
94  "Unexpected usage of vector of 'bool'."
95  "Expected explicit overload.");
96  h.AppendContiguous(vec.data(), vec.size());
97 }
98 
99 // Support std::vector<bool>.
100 template <class HashState>
101 inline void
102 TfHashAppend(HashState &h, std::vector<bool> const &vec)
103 {
104  h.Append(std::hash<std::vector<bool>>{}(vec));
105 }
106 
107 // Support std::set.
108 // NOTE: Supporting std::unordered_set is complicated because the traversal
109 // order is not guaranteed
110 template <class HashState, class T, class Compare>
111 inline void
112 TfHashAppend(HashState& h, std::set<T, Compare> const &elements)
113 {
114  h.AppendRange(std::begin(elements), std::end(elements));
115 }
116 
117 // Support std::map.
118 // NOTE: Supporting std::unordered_map is complicated because the traversal
119 // order is not guaranteed
120 template <class HashState, class Key, class Value, class Compare>
121 inline void
122 TfHashAppend(HashState& h, std::map<Key, Value, Compare> const &elements)
123 {
124  h.AppendRange(std::begin(elements), std::end(elements));
125 }
126 
127 // Support for hashing std::string.
128 template <class HashState>
129 inline void
130 TfHashAppend(HashState &h, const std::string& s)
131 {
132  return h.AppendContiguous(s.c_str(), s.length());
133 }
134 
135 // Support for hashing pointers, but we explicitly delete the version for
136 // [const] char pointers. See more below.
137 template <class HashState, class T>
138 inline void
139 TfHashAppend(HashState &h, const T* ptr) {
140  return h.Append(reinterpret_cast<uintptr_t>(ptr));
141 }
142 
143 // We refuse to hash [const] char *. You're almost certainly trying to hash the
144 // pointed-to string and this will not do that (it will hash the pointer
145 // itself). To hash a c-style null terminated string, you can use
146 // TfHashAsCStr() to indicate the intent, or use TfHashCString. If you
147 // really want to hash the pointer then use static_cast<const void*>(ptr) or
148 // TfHashCharPtr.
149 template <class HashState>
150 inline void TfHashAppend(HashState &h, char const *ptr) = delete;
151 template <class HashState>
152 inline void TfHashAppend(HashState &h, char *ptr) = delete;
153 
154 /// A structure that wraps a char pointer, indicating intent that it should be
155 /// hashed as a c-style null terminated string. See TfhashAsCStr().
157 {
158  explicit TfCStrHashWrapper(char const *cstr) : cstr(cstr) {}
159  char const *cstr;
160 };
161 
162 /// Indicate that a char pointer is intended to be hashed as a C-style null
163 /// terminated string. Use this to wrap a char pointer in a HashState::Append()
164 /// call when implementing a TfHashAppend overload.
165 ///
166 /// This structure provides a lightweight view on the char pointer passed to its
167 /// constructor. It does not copy the data or participate in its lifetime. The
168 /// passed char pointer must remain valid as long as this struct is used.
169 inline TfCStrHashWrapper
170 TfHashAsCStr(char const *cstr)
171 {
172  return TfCStrHashWrapper(cstr);
173 }
174 
175 template <class HashState>
176 inline void TfHashAppend(HashState &h, TfCStrHashWrapper hcstr)
177 {
178  return h.AppendContiguous(hcstr.cstr, std::strlen(hcstr.cstr));
179 }
180 
181 // Implementation detail: dispatch based on hash capability: Try TfHashAppend
182 // first, otherwise try std::hash, followed by hash_value. We rely on a
183 // combination of expression SFINAE and establishing preferred order by passing
184 // a 0 constant and having the overloads take int (highest priority), long
185 // (next priority) and '...' (lowest priority).
186 
187 // std::hash version, attempted second.
188 template <class HashState, class T>
189 inline auto Tf_HashImpl(HashState &h, T &&obj, long)
190  -> decltype(std::hash<typename std::decay<T>::type>()(
191  std::forward<T>(obj)), void())
192 {
193  TfHashAppend(
194  h, std::hash<typename std::decay<T>::type>()(std::forward<T>(obj)));
195 }
196 
197 // hash_value, attempted last.
198 template <class HashState, class T>
199 inline auto Tf_HashImpl(HashState &h, T &&obj, ...)
200  -> decltype(hash_value(std::forward<T>(obj)), void())
201 {
202  TfHashAppend(h, hash_value(std::forward<T>(obj)));
203 }
204 
205 // TfHashAppend, attempted first.
206 template <class HashState, class T>
207 inline auto Tf_HashImpl(HashState &h, T &&obj, int)
208  -> decltype(TfHashAppend(h, std::forward<T>(obj)), void())
209 {
210  TfHashAppend(h, std::forward<T>(obj));
211 }
212 
213 // Implementation detail, CRTP base that provides the public interface for hash
214 // state implementations.
215 template <class Derived>
217 {
218 public:
219  // Append several objects to the hash state.
220  template <class... Args>
221  void Append(Args &&... args) {
222  _AppendImpl(std::forward<Args>(args)...);
223  }
224 
225  // Append contiguous objects to the hash state.
226  template <class T>
227  void AppendContiguous(T const *elems, size_t numElems) {
228  this->_AsDerived()._AppendContiguous(elems, numElems);
229  }
230 
231  // Append a range of objects to the hash state.
232  template <class Iter>
233  void AppendRange(Iter f, Iter l) {
234  this->_AsDerived()._AppendRange(f, l);
235  }
236 
237  // Return the hash code for the current state.
238  size_t GetCode() const {
239  return this->_AsDerived()._GetCode();
240  }
241 
242 private:
243  template <class T, class... Args>
244  void _AppendImpl(T &&obj, Args &&... rest) {
245  this->_AsDerived()._Append(std::forward<T>(obj));
246  _AppendImpl(std::forward<Args>(rest)...);
247  }
248  void _AppendImpl() const {
249  // base case intentionally empty.
250  }
251 
252  Derived &_AsDerived() {
253  return *static_cast<Derived *>(this);
254  }
255 
256  Derived const &_AsDerived() const {
257  return *static_cast<Derived const *>(this);
258  }
259 };
260 
261 // Implementation detail, accumulates hashes.
262 class Tf_HashState : public Tf_HashStateAPI<Tf_HashState>
263 {
264 private:
266 
267  // Go thru Tf_HashImpl for non-integers.
268  template <class T>
269  std::enable_if_t<!std::is_integral<std::decay_t<T>>::value>
270  _Append(T &&obj) {
271  Tf_HashImpl(*this, std::forward<T>(obj), 0);
272  }
273 
274  // Integers bottom out here.
275  template <class T>
277  _Append(T i) {
278  if (!_didOne) {
279  _state = i;
280  _didOne = true;
281  }
282  else {
283  _state = _Combine(_state, i);
284  }
285  }
286 
287  // Append contiguous objects.
288  template <class T>
290  _AppendContiguous(T const *elems, size_t numElems) {
291  _AppendBytes(reinterpret_cast<char const *>(elems),
292  numElems * sizeof(T));
293  }
294 
295  // Append contiguous objects.
296  template <class T>
298  _AppendContiguous(T const *elems, size_t numElems) {
299  while (numElems--) {
300  Append(*elems++);
301  }
302  }
303 
304  // Append a range.
305  template <class Iter>
306  void _AppendRange(Iter f, Iter l) {
307  while (f != l) {
308  _Append(*f++);
309  }
310  }
311 
312  /// Append a number of bytes to the hash state.
313  TF_API void _AppendBytes(char const *bytes, size_t numBytes);
314 
315  // Return the hash code for the accumulated hash state.
316  size_t _GetCode() const {
317  // This is based on Knuth's multiplicative hash for integers. The
318  // constant is the closest prime to the binary expansion of the inverse
319  // golden ratio. The best way to produce a hash table bucket index from
320  // the result is to shift the result right, since the higher order bits
321  // have the most entropy. But since we can't know the number of buckets
322  // in a table that's using this, we just reverse the byte order instead,
323  // to get the highest entropy bits into the low-order bytes.
324  return _SwapByteOrder(_state * 11400714819323198549ULL);
325  }
326 
327  // This turns into a single bswap/pshufb type instruction on most compilers.
328  inline uint64_t
329  _SwapByteOrder(uint64_t val) const {
330  val =
331  ((val & 0xFF00000000000000u) >> 56u) |
332  ((val & 0x00FF000000000000u) >> 40u) |
333  ((val & 0x0000FF0000000000u) >> 24u) |
334  ((val & 0x000000FF00000000u) >> 8u) |
335  ((val & 0x00000000FF000000u) << 8u) |
336  ((val & 0x0000000000FF0000u) << 24u) |
337  ((val & 0x000000000000FF00u) << 40u) |
338  ((val & 0x00000000000000FFu) << 56u);
339  return val;
340  }
341 
342  size_t _Combine(size_t x, size_t y) const {
343  // This is our hash combiner. The task is, given two hash codes x and
344  // y, compute a single hash code. One way to do this is to exclusive-or
345  // the two codes together, but this can produce bad results if they
346  // differ by some fixed amount, For example if the input codes are
347  // multiples of 32, and the two codes we're given are N and N + 32 (this
348  // happens commonly when the hashed values are memory addresses) then
349  // the resulting hash codes for successive pairs of these produces many
350  // repeating values (32, 96, 32, XXX, 32, 96, 32, YYY...). That's a lot
351  // of collisions.
352  //
353  // Instead we combine hash values by assigning numbers to the lattice
354  // points in the plane, and then treating the inputs x and y as
355  // coordinates identifying a lattice point. Then the combined hash
356  // value is just the number assigned to the lattice point. This way
357  // each unique input pair (x, y) gets a unique output hash code.
358  //
359  // We number lattice points by triangular numbers like this:
360  //
361  // X 0 1 2 3 4 5
362  // Y
363  // 0 0 2 5 9 14 20
364  // 1 1 4 8 13 19 26
365  // 2 3 7 12 18 25 33
366  // 3 6 11 17 24 32 41
367  // 4 10 16 23 31 40 50
368  // 5 15 22 30 39 49 60
369  //
370  // This takes a couple of additions and a multiplication, which is a bit
371  // more expensive than something like an exclusive or, but the quality
372  // improvement outweighs the added expense.
373  x += y;
374  return y + x * (x + 1) / 2;
375  }
376 
377  size_t _state = 0;
378  bool _didOne = false;
379 };
380 
381 /// \class TfHash
382 /// \ingroup group_tf_String
383 ///
384 /// A user-extensible hashing mechanism for use with runtime hash tables.
385 ///
386 /// The hash functions here are appropriate for storing objects in runtime hash
387 /// tables. They are not appropriate for document signatures / fingerprinting
388 /// or for storage and offline use. No specific guarantee is made about hash
389 /// function quality, and the resulting hash codes are only 64-bits wide.
390 /// Callers must assume that collisions will occur and be prepared to deal with
391 /// them.
392 ///
393 /// Additionally, no guarantee is made about repeatability from run-to-run.
394 /// That is, within a process lifetime an object's hash code will not change
395 /// (provided its internal state does not change). But an object with
396 /// equivalent state in a future run of the same process may hash differently.
397 ///
398 /// At the time of this writing we observe good performance combined with the
399 /// "avalanche" quality (~50% output bit flip probability for a single input bit
400 /// flip) in the low-order 40 output bits. Higher order bits do not achieve
401 /// avalanche, and the highest order 8 bits are particularly poor. But for our
402 /// purposes we deem this performance/quality tradeoff acceptable.
403 ///
404 /// This mechanism has builtin support for integral and floating point types,
405 /// some STL types and types in Tf. TfHash uses three methods to attempt to
406 /// hash a passed object. First, TfHash tries to call TfHashAppend() on its
407 /// argument. This is the primary customization point for TfHash. If that is
408 /// not viable, TfHash tries to call std::hash<T>{}(). Lastly, TfHash makes an
409 /// unqualified call to hash_value.
410 ///
411 /// The best way to add TfHash support for user-defined types is to provide a
412 /// function overload like the following.
413 ///
414 /// \code
415 /// template <class HashState>
416 /// void TfHashAppend(HashState &h, MyType const &myObj)
417 /// h.Append(myObject._member1,
418 /// myObject._member2,
419 /// myObject._member3);
420 /// h.AppendContiguous(myObject._memberArray, myObject._numArrayElems);
421 /// }
422 /// \endcode
423 ///
424 /// The HashState object is left deliberately unspecified, so that different
425 /// hash state objects may be used in different circumstances without modifying
426 /// this support code, and without excess abstraction penalty. The methods
427 /// available for use in TfHashAppend overloads are:
428 ///
429 /// \code
430 /// // Append several objects to the hash state. This invokes the TfHash
431 /// // mechanism so it works for any types supported by TfHash.
432 /// template <class... Args>
433 /// void HashState::Append(Args &&... args);
434 ///
435 /// // Append contiguous objects to the hash state. Note that this is
436 /// // explicitly *not* guaranteed to produce the same result as calling
437 /// // Append() with each object in order.
438 /// template <class T>
439 /// void HashState::AppendContiguous(T const *objects, size_t numObjects);
440 ///
441 /// // Append a general range of objects to the hash state. Note that this is
442 /// // explicitly *not* guaranteed to produce the same result as calling
443 /// // Append() with each object in order.
444 /// template <class Iter>
445 /// void AppendRange(Iter f, Iter l);
446 /// \endcode
447 ///
448 /// The \c TfHash class function object supports:
449 /// \li integral types (including bool)
450 /// \li floating point types
451 /// \li std::string
452 /// \li TfRefPtr
453 /// \li TfWeakPtr
454 /// \li TfEnum
455 /// \li const void*
456 /// \li types that provide overloads for TfHashAppend
457 /// \li types that provide overloads for std::hash
458 /// \li types that provide overloads for hash_value
459 ///
460 /// The \c TfHash class can be used to instantiate a \c TfHashMap with \c string
461 /// keys as follows:
462 /// \code
463 /// TfHashMap<string, int, TfHash> m;
464 /// m["abc"] = 1;
465 /// \endcode
466 ///
467 /// \c TfHash()(const char*) is disallowed to avoid confusion of whether
468 /// the pointer or the string is being hashed. If you want to hash a
469 /// C-string use \c TfHashCString and if you want to hash a \c char* use
470 /// \c TfHashCharPtr.
471 ///
472 class TfHash {
473 public:
474  /// Produce a hash code for \p obj. See the class documentation for
475  /// details.
476  template <class T>
477  auto operator()(T &&obj) const ->
478  decltype(Tf_HashImpl(std::declval<Tf_HashState &>(),
479  std::forward<T>(obj), 0), size_t()) {
480  Tf_HashState h;
481  Tf_HashImpl(h, std::forward<T>(obj), 0);
482  return h.GetCode();
483  }
484 
485  /// Produce a hash code by combining the hash codes of several objects.
486  template <class... Args>
487  static size_t Combine(Args &&... args) {
488  Tf_HashState h;
489  _CombineImpl(h, std::forward<Args>(args)...);
490  return h.GetCode();
491  }
492 
493 private:
494  template <class HashState, class T, class... Args>
495  static void _CombineImpl(HashState &h, T &&obj, Args &&... rest) {
496  Tf_HashImpl(h, std::forward<T>(obj), 0);
497  _CombineImpl(h, std::forward<Args>(rest)...);
498  }
499 
500  template <class HashState>
501  static void _CombineImpl(HashState &h) {
502  // base case does nothing
503  }
504 };
505 
506 /// A hash function object that hashes the address of a char pointer.
508  size_t operator()(const char* ptr) const;
509 };
510 
511 /// A hash function object that hashes null-terminated c-string content.
513  size_t operator()(const char* ptr) const;
514 };
515 
516 /// A function object that compares two c-strings for equality.
518  bool operator()(const char* lhs, const char* rhs) const;
519 };
520 
522 
523 #endif
type
Definition: core.h:556
A hash function object that hashes the address of a char pointer.
Definition: hash.h:507
TfCStrHashWrapper TfHashAsCStr(char const *cstr)
Definition: hash.h:170
#define TF_API
Definition: api.h:23
const GLdouble * v
Definition: glcorearb.h:837
GLsizei const GLfloat * value
Definition: glcorearb.h:824
GLdouble s
Definition: glad.h:3009
GLint y
Definition: glcorearb.h:103
char const * cstr
Definition: hash.h:159
void Append(Args &&...args)
Definition: hash.h:221
Definition: hash.h:472
GLfloat f
Definition: glcorearb.h:1926
size_t GetCode() const
Definition: hash.h:238
size_t operator()(const char *ptr) const
TfCStrHashWrapper(char const *cstr)
Definition: hash.h:158
GLuint GLuint end
Definition: glcorearb.h:475
void AppendRange(Iter f, Iter l)
Definition: hash.h:233
size_t operator()(const char *ptr) const
GLint GLenum GLint x
Definition: glcorearb.h:409
GLdouble t
Definition: glad.h:2397
A hash function object that hashes null-terminated c-string content.
Definition: hash.h:512
bool operator()(const char *lhs, const char *rhs) const
GLfloat GLfloat GLfloat GLfloat h
Definition: glcorearb.h:2002
A function object that compares two c-strings for equality.
Definition: hash.h:517
auto Tf_HashImpl(HashState &h, T &&obj, long) -> decltype(std::hash< typename std::decay< T >::type >()(std::forward< T >(obj)), void())
Definition: hash.h:189
void AppendContiguous(T const *elems, size_t numElems)
Definition: hash.h:227
static size_t Combine(Args &&...args)
Produce a hash code by combining the hash codes of several objects.
Definition: hash.h:487
PXR_NAMESPACE_CLOSE_SCOPE PXR_NAMESPACE_OPEN_SCOPE
Definition: path.h:1425
auto ptr(T p) -> const void *
Definition: format.h:4331
GLuint GLfloat * val
Definition: glcorearb.h:1608
#define PXR_NAMESPACE_CLOSE_SCOPE
Definition: pxr.h:74
**If you just want to fire and args
Definition: thread.h:618
PXR_NAMESPACE_OPEN_SCOPE std::enable_if_t< std::is_integral< T >::value > TfHashAppend(HashState &h, T integral)
Definition: hash.h:33
size_t hash_value(const CH_ChannelRef &ref)
GA_API const UT_StringHolder rest
Definition: format.h:4365
auto operator()(T &&obj) const -> decltype(Tf_HashImpl(std::declval< Tf_HashState & >(), std::forward< T >(obj), 0), size_t())
Definition: hash.h:477
PcpNodeRef_ChildrenIterator begin(const PcpNodeRef::child_const_range &r)
Support for range-based for loops for PcpNodeRef children ranges.
Definition: node.h:566