HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
hash.h
Go to the documentation of this file.
1 //
2 // Copyright 2016 Pixar
3 //
4 // Licensed under the Apache License, Version 2.0 (the "Apache License")
5 // with the following modification; you may not use this file except in
6 // compliance with the Apache License and the following modification to it:
7 // Section 6. Trademarks. is deleted and replaced with:
8 //
9 // 6. Trademarks. This License does not grant permission to use the trade
10 // names, trademarks, service marks, or product names of the Licensor
11 // and its affiliates, except as required to comply with Section 4(c) of
12 // the License and to reproduce the content of the NOTICE file.
13 //
14 // You may obtain a copy of the Apache License at
15 //
16 // http://www.apache.org/licenses/LICENSE-2.0
17 //
18 // Unless required by applicable law or agreed to in writing, software
19 // distributed under the Apache License with the above modification is
20 // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 // KIND, either express or implied. See the Apache License for the specific
22 // language governing permissions and limitations under the Apache License.
23 //
24 #ifndef PXR_BASE_TF_HASH_H
25 #define PXR_BASE_TF_HASH_H
26 
27 /// \file tf/hash.h
28 /// \ingroup group_tf_String
29 
30 #include "pxr/pxr.h"
31 #include "pxr/base/tf/tf.h"
32 #include "pxr/base/tf/api.h"
33 
34 #include <cstring>
35 #include <string>
36 #include <map>
37 #include <memory>
38 #include <set>
39 #include <typeindex>
40 #include <type_traits>
41 #include <utility>
42 #include <vector>
43 
45 
46 // Support integers.
47 template <class HashState, class T>
49 TfHashAppend(HashState &h, T integral)
50 {
51  h.Append(integral);
52 }
53 
54 // Simple metafunction that returns an unsigned integral type given a size in
55 // bytes.
56 template <size_t Size> struct Tf_SizedUnsignedInt;
57 template <> struct Tf_SizedUnsignedInt<1> { using type = uint8_t; };
58 template <> struct Tf_SizedUnsignedInt<2> { using type = uint16_t; };
59 template <> struct Tf_SizedUnsignedInt<4> { using type = uint32_t; };
60 template <> struct Tf_SizedUnsignedInt<8> { using type = uint64_t; };
61 
62 // Support enums.
63 template <class HashState, class Enum>
65 TfHashAppend(HashState &h, Enum e)
66 {
67  h.Append(static_cast<std::underlying_type_t<Enum>>(e));
68 }
69 
70 // Support floating point.
71 template <class HashState, class T>
73 TfHashAppend(HashState &h, T fp)
74 {
75  // We want both positive and negative zero to hash the same, so we have to
76  // check against zero here.
77  typename Tf_SizedUnsignedInt<sizeof(T)>::type intbuf = 0;
78  if (fp != static_cast<T>(0)) {
79  memcpy(&intbuf, &fp, sizeof(T));
80  }
81  h.Append(intbuf);
82 }
83 
84 // Support std::pair.
85 template <class HashState, class T, class U>
86 inline void
87 TfHashAppend(HashState &h, std::pair<T, U> const &p)
88 {
89  h.Append(p.first);
90  h.Append(p.second);
91 }
92 
93 // Support std::vector. std::vector<bool> specialized below.
94 template <class HashState, class T>
95 inline void
96 TfHashAppend(HashState &h, std::vector<T> const &vec)
97 {
98  static_assert(!std::is_same_v<std::remove_cv_t<T>, bool>,
99  "Unexpected usage of vector of 'bool'."
100  "Expected explicit overload.");
101  h.AppendContiguous(vec.data(), vec.size());
102 }
103 
104 // Support std::vector<bool>.
105 template <class HashState>
106 inline void
107 TfHashAppend(HashState &h, std::vector<bool> const &vec)
108 {
109  h.Append(std::hash<std::vector<bool>>{}(vec));
110 }
111 
112 // Support std::set.
113 // NOTE: Supporting std::unordered_set is complicated because the traversal
114 // order is not guaranteed
115 template <class HashState, class T, class Compare>
116 inline void
117 TfHashAppend(HashState& h, std::set<T, Compare> const &elements)
118 {
119  h.AppendRange(std::begin(elements), std::end(elements));
120 }
121 
122 // Support std::map.
123 // NOTE: Supporting std::unordered_map is complicated because the traversal
124 // order is not guaranteed
125 template <class HashState, class Key, class Value, class Compare>
126 inline void
127 TfHashAppend(HashState& h, std::map<Key, Value, Compare> const &elements)
128 {
129  h.AppendRange(std::begin(elements), std::end(elements));
130 }
131 
132 // Support for hashing std::string.
133 template <class HashState>
134 inline void
135 TfHashAppend(HashState &h, const std::string& s)
136 {
137  return h.AppendContiguous(s.c_str(), s.length());
138 }
139 
140 // Support for hashing pointers, but we explicitly delete the version for
141 // [const] char pointers. See more below.
142 template <class HashState, class T>
143 inline void
144 TfHashAppend(HashState &h, const T* ptr) {
145  return h.Append(reinterpret_cast<uintptr_t>(ptr));
146 }
147 
148 // We refuse to hash [const] char *. You're almost certainly trying to hash the
149 // pointed-to string and this will not do that (it will hash the pointer
150 // itself). To hash a c-style null terminated string, you can use
151 // TfHashAsCStr() to indicate the intent, or use TfHashCString. If you
152 // really want to hash the pointer then use static_cast<const void*>(ptr) or
153 // TfHashCharPtr.
154 template <class HashState>
155 inline void TfHashAppend(HashState &h, char const *ptr) = delete;
156 template <class HashState>
157 inline void TfHashAppend(HashState &h, char *ptr) = delete;
158 
159 /// A structure that wraps a char pointer, indicating intent that it should be
160 /// hashed as a c-style null terminated string. See TfhashAsCStr().
162 {
163  explicit TfCStrHashWrapper(char const *cstr) : cstr(cstr) {}
164  char const *cstr;
165 };
166 
167 /// Indicate that a char pointer is intended to be hashed as a C-style null
168 /// terminated string. Use this to wrap a char pointer in a HashState::Append()
169 /// call when implementing a TfHashAppend overload.
170 ///
171 /// This structure provides a lightweight view on the char pointer passed to its
172 /// constructor. It does not copy the data or participate in its lifetime. The
173 /// passed char pointer must remain valid as long as this struct is used.
174 inline TfCStrHashWrapper
175 TfHashAsCStr(char const *cstr)
176 {
177  return TfCStrHashWrapper(cstr);
178 }
179 
180 template <class HashState>
181 inline void TfHashAppend(HashState &h, TfCStrHashWrapper hcstr)
182 {
183  return h.AppendContiguous(hcstr.cstr, std::strlen(hcstr.cstr));
184 }
185 
186 // Implementation detail: dispatch based on hash capability: Try TfHashAppend
187 // first, otherwise try std::hash, followed by hash_value. We rely on a
188 // combination of expression SFINAE and establishing preferred order by passing
189 // a 0 constant and having the overloads take int (highest priority), long
190 // (next priority) and '...' (lowest priority).
191 
192 // std::hash version, attempted second.
193 template <class HashState, class T>
194 inline auto Tf_HashImpl(HashState &h, T &&obj, long)
195  -> decltype(std::hash<typename std::decay<T>::type>()(
196  std::forward<T>(obj)), void())
197 {
198  TfHashAppend(
199  h, std::hash<typename std::decay<T>::type>()(std::forward<T>(obj)));
200 }
201 
202 // hash_value, attempted last.
203 template <class HashState, class T>
204 inline auto Tf_HashImpl(HashState &h, T &&obj, ...)
205  -> decltype(hash_value(std::forward<T>(obj)), void())
206 {
207  TfHashAppend(h, hash_value(std::forward<T>(obj)));
208 }
209 
210 // TfHashAppend, attempted first.
211 template <class HashState, class T>
212 inline auto Tf_HashImpl(HashState &h, T &&obj, int)
213  -> decltype(TfHashAppend(h, std::forward<T>(obj)), void())
214 {
215  TfHashAppend(h, std::forward<T>(obj));
216 }
217 
218 // Implementation detail, CRTP base that provides the public interface for hash
219 // state implementations.
220 template <class Derived>
222 {
223 public:
224  // Append several objects to the hash state.
225  template <class... Args>
226  void Append(Args &&... args) {
227  _AppendImpl(std::forward<Args>(args)...);
228  }
229 
230  // Append contiguous objects to the hash state.
231  template <class T>
232  void AppendContiguous(T const *elems, size_t numElems) {
233  this->_AsDerived()._AppendContiguous(elems, numElems);
234  }
235 
236  // Append a range of objects to the hash state.
237  template <class Iter>
238  void AppendRange(Iter f, Iter l) {
239  this->_AsDerived()._AppendRange(f, l);
240  }
241 
242  // Return the hash code for the current state.
243  size_t GetCode() const {
244  return this->_AsDerived()._GetCode();
245  }
246 
247 private:
248  template <class T, class... Args>
249  void _AppendImpl(T &&obj, Args &&... rest) {
250  this->_AsDerived()._Append(std::forward<T>(obj));
251  _AppendImpl(std::forward<Args>(rest)...);
252  }
253  void _AppendImpl() const {
254  // base case intentionally empty.
255  }
256 
257  Derived &_AsDerived() {
258  return *static_cast<Derived *>(this);
259  }
260 
261  Derived const &_AsDerived() const {
262  return *static_cast<Derived const *>(this);
263  }
264 };
265 
266 // Implementation detail, accumulates hashes.
267 class Tf_HashState : public Tf_HashStateAPI<Tf_HashState>
268 {
269 private:
271 
272  // Go thru Tf_HashImpl for non-integers.
273  template <class T>
274  std::enable_if_t<!std::is_integral<std::decay_t<T>>::value>
275  _Append(T &&obj) {
276  Tf_HashImpl(*this, std::forward<T>(obj), 0);
277  }
278 
279  // Integers bottom out here.
280  template <class T>
282  _Append(T i) {
283  if (!_didOne) {
284  _state = i;
285  _didOne = true;
286  }
287  else {
288  _state = _Combine(_state, i);
289  }
290  }
291 
292  // Append contiguous objects.
293  template <class T>
295  _AppendContiguous(T const *elems, size_t numElems) {
296  _AppendBytes(reinterpret_cast<char const *>(elems),
297  numElems * sizeof(T));
298  }
299 
300  // Append contiguous objects.
301  template <class T>
303  _AppendContiguous(T const *elems, size_t numElems) {
304  while (numElems--) {
305  Append(*elems++);
306  }
307  }
308 
309  // Append a range.
310  template <class Iter>
311  void _AppendRange(Iter f, Iter l) {
312  while (f != l) {
313  _Append(*f++);
314  }
315  }
316 
317  /// Append a number of bytes to the hash state.
318  TF_API void _AppendBytes(char const *bytes, size_t numBytes);
319 
320  // Return the hash code for the accumulated hash state.
321  size_t _GetCode() const {
322  // This is based on Knuth's multiplicative hash for integers. The
323  // constant is the closest prime to the binary expansion of the inverse
324  // golden ratio. The best way to produce a hash table bucket index from
325  // the result is to shift the result right, since the higher order bits
326  // have the most entropy. But since we can't know the number of buckets
327  // in a table that's using this, we just reverse the byte order instead,
328  // to get the highest entropy bits into the low-order bytes.
329  return _SwapByteOrder(_state * 11400714819323198549ULL);
330  }
331 
332  // This turns into a single bswap/pshufb type instruction on most compilers.
333  inline uint64_t
334  _SwapByteOrder(uint64_t val) const {
335  val =
336  ((val & 0xFF00000000000000u) >> 56u) |
337  ((val & 0x00FF000000000000u) >> 40u) |
338  ((val & 0x0000FF0000000000u) >> 24u) |
339  ((val & 0x000000FF00000000u) >> 8u) |
340  ((val & 0x00000000FF000000u) << 8u) |
341  ((val & 0x0000000000FF0000u) << 24u) |
342  ((val & 0x000000000000FF00u) << 40u) |
343  ((val & 0x00000000000000FFu) << 56u);
344  return val;
345  }
346 
347  size_t _Combine(size_t x, size_t y) const {
348  // This is our hash combiner. The task is, given two hash codes x and
349  // y, compute a single hash code. One way to do this is to exclusive-or
350  // the two codes together, but this can produce bad results if they
351  // differ by some fixed amount, For example if the input codes are
352  // multiples of 32, and the two codes we're given are N and N + 32 (this
353  // happens commonly when the hashed values are memory addresses) then
354  // the resulting hash codes for successive pairs of these produces many
355  // repeating values (32, 96, 32, XXX, 32, 96, 32, YYY...). That's a lot
356  // of collisions.
357  //
358  // Instead we combine hash values by assigning numbers to the lattice
359  // points in the plane, and then treating the inputs x and y as
360  // coordinates identifying a lattice point. Then the combined hash
361  // value is just the number assigned to the lattice point. This way
362  // each unique input pair (x, y) gets a unique output hash code.
363  //
364  // We number lattice points by triangular numbers like this:
365  //
366  // X 0 1 2 3 4 5
367  // Y
368  // 0 0 2 5 9 14 20
369  // 1 1 4 8 13 19 26
370  // 2 3 7 12 18 25 33
371  // 3 6 11 17 24 32 41
372  // 4 10 16 23 31 40 50
373  // 5 15 22 30 39 49 60
374  //
375  // This takes a couple of additions and a multiplication, which is a bit
376  // more expensive than something like an exclusive or, but the quality
377  // improvement outweighs the added expense.
378  x += y;
379  return y + x * (x + 1) / 2;
380  }
381 
382  size_t _state = 0;
383  bool _didOne = false;
384 };
385 
386 /// \class TfHash
387 /// \ingroup group_tf_String
388 ///
389 /// A user-extensible hashing mechanism for use with runtime hash tables.
390 ///
391 /// The hash functions here are appropriate for storing objects in runtime hash
392 /// tables. They are not appropriate for document signatures / fingerprinting
393 /// or for storage and offline use. No specific guarantee is made about hash
394 /// function quality, and the resulting hash codes are only 64-bits wide.
395 /// Callers must assume that collisions will occur and be prepared to deal with
396 /// them.
397 ///
398 /// Additionally, no guarantee is made about repeatability from run-to-run.
399 /// That is, within a process lifetime an object's hash code will not change
400 /// (provided its internal state does not change). But an object with
401 /// equivalent state in a future run of the same process may hash differently.
402 ///
403 /// At the time of this writing we observe good performance combined with the
404 /// "avalanche" quality (~50% output bit flip probability for a single input bit
405 /// flip) in the low-order 40 output bits. Higher order bits do not achieve
406 /// avalanche, and the highest order 8 bits are particularly poor. But for our
407 /// purposes we deem this performance/quality tradeoff acceptable.
408 ///
409 /// This mechanism has builtin support for integral and floating point types,
410 /// some STL types and types in Tf. TfHash uses three methods to attempt to
411 /// hash a passed object. First, TfHash tries to call TfHashAppend() on its
412 /// argument. This is the primary customization point for TfHash. If that is
413 /// not viable, TfHash tries to call std::hash<T>{}(). Lastly, TfHash makes an
414 /// unqualified call to hash_value.
415 ///
416 /// The best way to add TfHash support for user-defined types is to provide a
417 /// function overload like the following.
418 ///
419 /// \code
420 /// template <class HashState>
421 /// void TfHashAppend(HashState &h, MyType const &myObj)
422 /// h.Append(myObject._member1,
423 /// myObject._member2,
424 /// myObject._member3);
425 /// h.AppendContiguous(myObject._memberArray, myObject._numArrayElems);
426 /// }
427 /// \endcode
428 ///
429 /// The HashState object is left deliberately unspecified, so that different
430 /// hash state objects may be used in different circumstances without modifying
431 /// this support code, and without excess abstraction penalty. The methods
432 /// available for use in TfHashAppend overloads are:
433 ///
434 /// \code
435 /// // Append several objects to the hash state. This invokes the TfHash
436 /// // mechanism so it works for any types supported by TfHash.
437 /// template <class... Args>
438 /// void HashState::Append(Args &&... args);
439 ///
440 /// // Append contiguous objects to the hash state. Note that this is
441 /// // explicitly *not* guaranteed to produce the same result as calling
442 /// // Append() with each object in order.
443 /// template <class T>
444 /// void HashState::AppendContiguous(T const *objects, size_t numObjects);
445 ///
446 /// // Append a general range of objects to the hash state. Note that this is
447 /// // explicitly *not* guaranteed to produce the same result as calling
448 /// // Append() with each object in order.
449 /// template <class Iter>
450 /// void AppendRange(Iter f, Iter l);
451 /// \endcode
452 ///
453 /// The \c TfHash class function object supports:
454 /// \li integral types (including bool)
455 /// \li floating point types
456 /// \li std::string
457 /// \li TfRefPtr
458 /// \li TfWeakPtr
459 /// \li TfEnum
460 /// \li const void*
461 /// \li types that provide overloads for TfHashAppend
462 /// \li types that provide overloads for std::hash
463 /// \li types that provide overloads for hash_value
464 ///
465 /// The \c TfHash class can be used to instantiate a \c TfHashMap with \c string
466 /// keys as follows:
467 /// \code
468 /// TfHashMap<string, int, TfHash> m;
469 /// m["abc"] = 1;
470 /// \endcode
471 ///
472 /// \c TfHash()(const char*) is disallowed to avoid confusion of whether
473 /// the pointer or the string is being hashed. If you want to hash a
474 /// C-string use \c TfHashCString and if you want to hash a \c char* use
475 /// \c TfHashCharPtr.
476 ///
477 class TfHash {
478 public:
479  /// Produce a hash code for \p obj. See the class documentation for
480  /// details.
481  template <class T>
482  auto operator()(T &&obj) const ->
483  decltype(Tf_HashImpl(std::declval<Tf_HashState &>(),
484  std::forward<T>(obj), 0), size_t()) {
485  Tf_HashState h;
486  Tf_HashImpl(h, std::forward<T>(obj), 0);
487  return h.GetCode();
488  }
489 
490  /// Produce a hash code by combining the hash codes of several objects.
491  template <class... Args>
492  static size_t Combine(Args &&... args) {
493  Tf_HashState h;
494  _CombineImpl(h, std::forward<Args>(args)...);
495  return h.GetCode();
496  }
497 
498 private:
499  template <class HashState, class T, class... Args>
500  static void _CombineImpl(HashState &h, T &&obj, Args &&... rest) {
501  Tf_HashImpl(h, std::forward<T>(obj), 0);
502  _CombineImpl(h, std::forward<Args>(rest)...);
503  }
504 
505  template <class HashState>
506  static void _CombineImpl(HashState &h) {
507  // base case does nothing
508  }
509 };
510 
511 /// A hash function object that hashes the address of a char pointer.
513  size_t operator()(const char* ptr) const;
514 };
515 
516 /// A hash function object that hashes null-terminated c-string content.
518  size_t operator()(const char* ptr) const;
519 };
520 
521 /// A function object that compares two c-strings for equality.
523  bool operator()(const char* lhs, const char* rhs) const;
524 };
525 
527 
528 #endif
A hash function object that hashes the address of a char pointer.
Definition: hash.h:512
TfCStrHashWrapper TfHashAsCStr(char const *cstr)
Definition: hash.h:175
#define TF_API
Definition: api.h:40
GLsizei const GLchar *const * string
Definition: glcorearb.h:814
GLsizei const GLfloat * value
Definition: glcorearb.h:824
GLdouble s
Definition: glad.h:3009
GLint y
Definition: glcorearb.h:103
char const * cstr
Definition: hash.h:164
void Append(Args &&...args)
Definition: hash.h:226
Definition: hash.h:477
GLfloat f
Definition: glcorearb.h:1926
size_t GetCode() const
Definition: hash.h:243
size_t operator()(const char *ptr) const
TfCStrHashWrapper(char const *cstr)
Definition: hash.h:163
GLuint GLuint end
Definition: glcorearb.h:475
void AppendRange(Iter f, Iter l)
Definition: hash.h:238
size_t operator()(const char *ptr) const
GLint GLenum GLint x
Definition: glcorearb.h:409
A hash function object that hashes null-terminated c-string content.
Definition: hash.h:517
bool operator()(const char *lhs, const char *rhs) const
GLfloat GLfloat GLfloat GLfloat h
Definition: glcorearb.h:2002
A function object that compares two c-strings for equality.
Definition: hash.h:522
auto Tf_HashImpl(HashState &h, T &&obj, long) -> decltype(std::hash< typename std::decay< T >::type >()(std::forward< T >(obj)), void())
Definition: hash.h:194
void AppendContiguous(T const *elems, size_t numElems)
Definition: hash.h:232
static size_t Combine(Args &&...args)
Produce a hash code by combining the hash codes of several objects.
Definition: hash.h:492
PXR_NAMESPACE_CLOSE_SCOPE PXR_NAMESPACE_OPEN_SCOPE
Definition: path.h:1432
auto ptr(T p) -> const void *
Definition: format.h:2448
GLuint GLfloat * val
Definition: glcorearb.h:1608
#define PXR_NAMESPACE_CLOSE_SCOPE
Definition: pxr.h:91
**If you just want to fire and args
Definition: thread.h:609
PXR_NAMESPACE_OPEN_SCOPE std::enable_if_t< std::is_integral< T >::value > TfHashAppend(HashState &h, T integral)
Definition: hash.h:49
Definition: core.h:1131
size_t hash_value(const CH_ChannelRef &ref)
type
Definition: core.h:1059
GA_API const UT_StringHolder rest
Definition: format.h:2459
auto operator()(T &&obj) const -> decltype(Tf_HashImpl(std::declval< Tf_HashState & >(), std::forward< T >(obj), 0), size_t())
Definition: hash.h:482
PcpNodeRef_ChildrenIterator begin(const PcpNodeRef::child_const_range &r)
Support for range-based for loops for PcpNodeRef children ranges.
Definition: node.h:558