HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
hash.h
Go to the documentation of this file.
1 //
2 // Copyright 2016 Pixar
3 //
4 // Licensed under the Apache License, Version 2.0 (the "Apache License")
5 // with the following modification; you may not use this file except in
6 // compliance with the Apache License and the following modification to it:
7 // Section 6. Trademarks. is deleted and replaced with:
8 //
9 // 6. Trademarks. This License does not grant permission to use the trade
10 // names, trademarks, service marks, or product names of the Licensor
11 // and its affiliates, except as required to comply with Section 4(c) of
12 // the License and to reproduce the content of the NOTICE file.
13 //
14 // You may obtain a copy of the Apache License at
15 //
16 // http://www.apache.org/licenses/LICENSE-2.0
17 //
18 // Unless required by applicable law or agreed to in writing, software
19 // distributed under the Apache License with the above modification is
20 // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 // KIND, either express or implied. See the Apache License for the specific
22 // language governing permissions and limitations under the Apache License.
23 //
24 #ifndef PXR_BASE_TF_HASH_H
25 #define PXR_BASE_TF_HASH_H
26 
27 /// \file tf/hash.h
28 /// \ingroup group_tf_String
29 
30 #include "pxr/pxr.h"
31 #include "pxr/base/tf/tf.h"
32 #include "pxr/base/tf/api.h"
33 
34 #include <cstring>
35 #include <string>
36 #include <map>
37 #include <memory>
38 #include <set>
39 #include <typeindex>
40 #include <type_traits>
41 #include <utility>
42 #include <vector>
43 
45 
46 // Support integers.
47 template <class HashState, class T>
49 TfHashAppend(HashState &h, T integral)
50 {
51  h.Append(integral);
52 }
53 
54 // Simple metafunction that returns an unsigned integral type given a size in
55 // bytes.
56 template <size_t Size> struct Tf_SizedUnsignedInt;
57 template <> struct Tf_SizedUnsignedInt<1> { using type = uint8_t; };
58 template <> struct Tf_SizedUnsignedInt<2> { using type = uint16_t; };
59 template <> struct Tf_SizedUnsignedInt<4> { using type = uint32_t; };
60 template <> struct Tf_SizedUnsignedInt<8> { using type = uint64_t; };
61 
62 // Support enums.
63 template <class HashState, class Enum>
65 TfHashAppend(HashState &h, Enum e)
66 {
67  h.Append(static_cast<std::underlying_type_t<Enum>>(e));
68 }
69 
70 // Support floating point.
71 template <class HashState, class T>
73 TfHashAppend(HashState &h, T fp)
74 {
75  // We want both positive and negative zero to hash the same, so we have to
76  // check against zero here.
77  typename Tf_SizedUnsignedInt<sizeof(T)>::type intbuf = 0;
78  if (fp != static_cast<T>(0)) {
79  memcpy(&intbuf, &fp, sizeof(T));
80  }
81  h.Append(intbuf);
82 }
83 
84 // Support std::pair.
85 template <class HashState, class T, class U>
86 inline void
87 TfHashAppend(HashState &h, std::pair<T, U> const &p)
88 {
89  h.Append(p.first);
90  h.Append(p.second);
91 }
92 
93 // Support std::vector. std::vector<bool> specialized below.
94 template <class HashState, class T>
95 inline std::enable_if_t<!std::is_same<std::remove_const_t<T>, bool>::value>
96 TfHashAppend(HashState &h, std::vector<T> const &vec)
97 {
98  h.AppendContiguous(vec.data(), vec.size());
99 }
100 
101 // Support std::vector<bool>.
102 template <class HashState>
103 inline void
104 TfHashAppend(HashState &h, std::vector<bool> const &vec)
105 {
106  h.Append(std::hash<std::vector<bool>>{}(vec));
107 }
108 
109 // Support std::set.
110 // NOTE: Supporting std::unordered_set is complicated because the traversal
111 // order is not guaranteed
112 template <class HashState, class T, class Compare>
113 inline void
114 TfHashAppend(HashState& h, std::set<T, Compare> const &elements)
115 {
116  h.AppendRange(std::begin(elements), std::end(elements));
117 }
118 
119 // Support std::map.
120 // NOTE: Supporting std::unordered_map is complicated because the traversal
121 // order is not guaranteed
122 template <class HashState, class Key, class Value, class Compare>
123 inline void
124 TfHashAppend(HashState& h, std::map<Key, Value, Compare> const &elements)
125 {
126  h.AppendRange(std::begin(elements), std::end(elements));
127 }
128 
129 // Support std::type_index. When TfHash support for std::hash is enabled,
130 // this explicit specialization will no longer be necessary.
131 template <class HashState>
132 inline void
133 TfHashAppend(HashState& h, std::type_index const &type_index)
134 {
135  return h.Append(type_index.hash_code());
136 }
137 
138 // Support for hashing std::string.
139 template <class HashState>
140 inline void
141 TfHashAppend(HashState &h, const std::string& s)
142 {
143  return h.AppendContiguous(s.c_str(), s.length());
144 }
145 
146 // Support for hashing pointers, but we explicitly delete the version for
147 // [const] char pointers. See more below.
148 template <class HashState, class T>
149 inline void
150 TfHashAppend(HashState &h, const T* ptr) {
151  return h.Append(reinterpret_cast<uintptr_t>(ptr));
152 }
153 
154 // Support for hashing std::shared_ptr. When TfHash support for std::hash is
155 // enabled, this explicit specialization will no longer be necessary.
156 template <class HashState, class T>
157 inline void
158 TfHashAppend(HashState &h, const std::shared_ptr<T>& ptr) {
159  h.Append(std::hash<std::shared_ptr<T>>{}(ptr));
160 }
161 
162 // Support for hashing std::unique_ptr. When TfHash support for std::hash is
163 // enabled, this explicit specialization will no longer be necessary.
164 template <class HashState, class T>
165 inline void
166 TfHashAppend(HashState &h, const std::unique_ptr<T>& ptr) {
167  h.Append(std::hash<std::unique_ptr<T>>{}(ptr));
168 }
169 
170 // We refuse to hash [const] char *. You're almost certainly trying to hash the
171 // pointed-to string and this will not do that (it will hash the pointer
172 // itself). To hash a c-style null terminated string, you can use
173 // TfHashAsCStr() to indicate the intent, or use TfHashCString. If you
174 // really want to hash the pointer then use static_cast<const void*>(ptr) or
175 // TfHashCharPtr.
176 template <class HashState>
177 inline void TfHashAppend(HashState &h, char const *ptr) = delete;
178 template <class HashState>
179 inline void TfHashAppend(HashState &h, char *ptr) = delete;
180 
181 /// A structure that wraps a char pointer, indicating intent that it should be
182 /// hashed as a c-style null terminated string. See TfhashAsCStr().
184 {
185  explicit TfCStrHashWrapper(char const *cstr) : cstr(cstr) {}
186  char const *cstr;
187 };
188 
189 /// Indicate that a char pointer is intended to be hashed as a C-style null
190 /// terminated string. Use this to wrap a char pointer in a HashState::Append()
191 /// call when implementing a TfHashAppend overload.
192 ///
193 /// This structure provides a lightweight view on the char pointer passed to its
194 /// constructor. It does not copy the data or participate in its lifetime. The
195 /// passed char pointer must remain valid as long as this struct is used.
196 inline TfCStrHashWrapper
197 TfHashAsCStr(char const *cstr)
198 {
199  return TfCStrHashWrapper(cstr);
200 }
201 
202 template <class HashState>
203 inline void TfHashAppend(HashState &h, TfCStrHashWrapper hcstr)
204 {
205  return h.AppendContiguous(hcstr.cstr, std::strlen(hcstr.cstr));
206 }
207 
208 // Implementation detail: dispatch based on hash capability: Try TfHashAppend
209 // first, otherwise try hash_value. We'd like to otherwise try std::hash<T>,
210 // but std::hash<> is not SFINAE-friendly until c++17 and this code needs to
211 // support c++14 currently. We rely on a combination of expression SFINAE and
212 // establishing preferred order by passing a 0 constant and having the overloads
213 // take int (highest priority), long (next priority) and '...' (lowest
214 // priority).
215 
216 // std::hash version, attempted last. Consider adding when we move to
217 // C++17 or newer.
218 /*
219 template <class HashState, class T>
220 inline auto Tf_HashImpl(HashState &h, T &&obj, ...)
221  -> decltype(std::hash<typename std::decay<T>::type>()(
222  std::forward<T>(obj)), void())
223 {
224  TfHashAppend(
225  h, std::hash<typename std::decay<T>::type>()(std::forward<T>(obj)));
226 }
227 */
228 
229 // hash_value, attempted second.
230 template <class HashState, class T>
231 inline auto Tf_HashImpl(HashState &h, T &&obj, long)
232  -> decltype(hash_value(std::forward<T>(obj)), void())
233 {
234  TfHashAppend(h, hash_value(std::forward<T>(obj)));
235 }
236 
237 // TfHashAppend, attempted first.
238 template <class HashState, class T>
239 inline auto Tf_HashImpl(HashState &h, T &&obj, int)
240  -> decltype(TfHashAppend(h, std::forward<T>(obj)), void())
241 {
242  TfHashAppend(h, std::forward<T>(obj));
243 }
244 
245 // Implementation detail, CRTP base that provides the public interface for hash
246 // state implementations.
247 template <class Derived>
249 {
250 public:
251  // Append several objects to the hash state.
252  template <class... Args>
253  void Append(Args &&... args) {
254  _AppendImpl(std::forward<Args>(args)...);
255  }
256 
257  // Append contiguous objects to the hash state.
258  template <class T>
259  void AppendContiguous(T const *elems, size_t numElems) {
260  this->_AsDerived()._AppendContiguous(elems, numElems);
261  }
262 
263  // Append a range of objects to the hash state.
264  template <class Iter>
265  void AppendRange(Iter f, Iter l) {
266  this->_AsDerived()._AppendRange(f, l);
267  }
268 
269  // Return the hash code for the current state.
270  size_t GetCode() const {
271  return this->_AsDerived()._GetCode();
272  }
273 
274 private:
275  template <class T, class... Args>
276  void _AppendImpl(T &&obj, Args &&... rest) {
277  this->_AsDerived()._Append(std::forward<T>(obj));
278  _AppendImpl(std::forward<Args>(rest)...);
279  }
280  void _AppendImpl() const {
281  // base case intentionally empty.
282  }
283 
284  Derived &_AsDerived() {
285  return *static_cast<Derived *>(this);
286  }
287 
288  Derived const &_AsDerived() const {
289  return *static_cast<Derived const *>(this);
290  }
291 };
292 
293 // Implementation detail, accumulates hashes.
294 class Tf_HashState : public Tf_HashStateAPI<Tf_HashState>
295 {
296 private:
298 
299  // Go thru Tf_HashImpl for non-integers.
300  template <class T>
301  std::enable_if_t<!std::is_integral<std::decay_t<T>>::value>
302  _Append(T &&obj) {
303  Tf_HashImpl(*this, std::forward<T>(obj), 0);
304  }
305 
306  // Integers bottom out here.
307  template <class T>
309  _Append(T i) {
310  if (!_didOne) {
311  _state = i;
312  _didOne = true;
313  }
314  else {
315  _state = _Combine(_state, i);
316  }
317  }
318 
319  // Append contiguous objects.
320  template <class T>
322  _AppendContiguous(T const *elems, size_t numElems) {
323  _AppendBytes(reinterpret_cast<char const *>(elems),
324  numElems * sizeof(T));
325  }
326 
327  // Append contiguous objects.
328  template <class T>
330  _AppendContiguous(T const *elems, size_t numElems) {
331  while (numElems--) {
332  Append(*elems++);
333  }
334  }
335 
336  // Append a range.
337  template <class Iter>
338  void _AppendRange(Iter f, Iter l) {
339  while (f != l) {
340  _Append(*f++);
341  }
342  }
343 
344  /// Append a number of bytes to the hash state.
345  TF_API void _AppendBytes(char const *bytes, size_t numBytes);
346 
347  // Return the hash code for the accumulated hash state.
348  size_t _GetCode() const {
349  // This is based on Knuth's multiplicative hash for integers. The
350  // constant is the closest prime to the binary expansion of the inverse
351  // golden ratio. The best way to produce a hash table bucket index from
352  // the result is to shift the result right, since the higher order bits
353  // have the most entropy. But since we can't know the number of buckets
354  // in a table that's using this, we just reverse the byte order instead,
355  // to get the highest entropy bits into the low-order bytes.
356  return _SwapByteOrder(_state * 11400714819323198549ULL);
357  }
358 
359  // This turns into a single bswap/pshufb type instruction on most compilers.
360  inline uint64_t
361  _SwapByteOrder(uint64_t val) const {
362  val =
363  ((val & 0xFF00000000000000u) >> 56u) |
364  ((val & 0x00FF000000000000u) >> 40u) |
365  ((val & 0x0000FF0000000000u) >> 24u) |
366  ((val & 0x000000FF00000000u) >> 8u) |
367  ((val & 0x00000000FF000000u) << 8u) |
368  ((val & 0x0000000000FF0000u) << 24u) |
369  ((val & 0x000000000000FF00u) << 40u) |
370  ((val & 0x00000000000000FFu) << 56u);
371  return val;
372  }
373 
374  size_t _Combine(size_t x, size_t y) const {
375  // This is our hash combiner. The task is, given two hash codes x and
376  // y, compute a single hash code. One way to do this is to exclusive-or
377  // the two codes together, but this can produce bad results if they
378  // differ by some fixed amount, For example if the input codes are
379  // multiples of 32, and the two codes we're given are N and N + 32 (this
380  // happens commonly when the hashed values are memory addresses) then
381  // the resulting hash codes for successive pairs of these produces many
382  // repeating values (32, 96, 32, XXX, 32, 96, 32, YYY...). That's a lot
383  // of collisions.
384  //
385  // Instead we combine hash values by assigning numbers to the lattice
386  // points in the plane, and then treating the inputs x and y as
387  // coordinates identifying a lattice point. Then the combined hash
388  // value is just the number assigned to the lattice point. This way
389  // each unique input pair (x, y) gets a unique output hash code.
390  //
391  // We number lattice points by triangular numbers like this:
392  //
393  // X 0 1 2 3 4 5
394  // Y
395  // 0 0 2 5 9 14 20
396  // 1 1 4 8 13 19 26
397  // 2 3 7 12 18 25 33
398  // 3 6 11 17 24 32 41
399  // 4 10 16 23 31 40 50
400  // 5 15 22 30 39 49 60
401  //
402  // This takes a couple of additions and a multiplication, which is a bit
403  // more expensive than something like an exclusive or, but the quality
404  // improvement outweighs the added expense.
405  x += y;
406  return y + x * (x + 1) / 2;
407  }
408 
409  size_t _state = 0;
410  bool _didOne = false;
411 };
412 
413 /// \class TfHash
414 /// \ingroup group_tf_String
415 ///
416 /// A user-extensible hashing mechanism for use with runtime hash tables.
417 ///
418 /// The hash functions here are appropriate for storing objects in runtime hash
419 /// tables. They are not appropriate for document signatures / fingerprinting
420 /// or for storage and offline use. No specific guarantee is made about hash
421 /// function quality, and the resulting hash codes are only 64-bits wide.
422 /// Callers must assume that collisions will occur and be prepared to deal with
423 /// them.
424 ///
425 /// Additionally, no guarantee is made about repeatability from run-to-run.
426 /// That is, within a process lifetime an object's hash code will not change
427 /// (provided its internal state does not change). But an object with
428 /// equivalent state in a future run of the same process may hash differently.
429 ///
430 /// At the time of this writing we observe good performance combined with the
431 /// "avalanche" quality (~50% output bit flip probability for a single input bit
432 /// flip) in the low-order 40 output bits. Higher order bits do not achieve
433 /// avalanche, and the highest order 8 bits are particularly poor. But for our
434 /// purposes we deem this performance/quality tradeoff acceptable.
435 ///
436 /// This mechanism has builtin support for integral and floating point types,
437 /// some STL types and types in Tf. TfHash uses three methods to attempt to
438 /// hash a passed object. First, TfHash tries to call TfHashAppend() on its
439 /// argument. This is the primary customization point for TfHash. If that is
440 /// not viable, TfHash makes an unqualified call to hash_value(). We would like
441 /// TfHash to try to use std::hash<T> next, but std::hash<T> is not
442 /// SFINAE-friendly until c++17, and this code needs to support c++14.
443 ///
444 /// The best way to add TfHash support for user-defined types is to provide a
445 /// function overload like the following.
446 ///
447 /// \code
448 /// template <class HashState>
449 /// void TfHashAppend(HashState &h, MyType const &myObj)
450 /// h.Append(myObject._member1,
451 /// myObject._member2,
452 /// myObject._member3);
453 /// h.AppendContiguous(myObject._memberArray, myObject._numArrayElems);
454 /// }
455 /// \endcode
456 ///
457 /// The HashState object is left deliberately unspecified, so that different
458 /// hash state objects may be used in different circumstances without modifying
459 /// this support code, and without excess abstraction penalty. The methods
460 /// available for use in TfHashAppend overloads are:
461 ///
462 /// \code
463 /// // Append several objects to the hash state. This invokes the TfHash
464 /// // mechanism so it works for any types supported by TfHash.
465 /// template <class... Args>
466 /// void HashState::Append(Args &&... args);
467 ///
468 /// // Append contiguous objects to the hash state. Note that this is
469 /// // explicitly *not* guaranteed to produce the same result as calling
470 /// // Append() with each object in order.
471 /// template <class T>
472 /// void HashState::AppendContiguous(T const *objects, size_t numObjects);
473 ///
474 /// // Append a general range of objects to the hash state. Note that this is
475 /// // explicitly *not* guaranteed to produce the same result as calling
476 /// // Append() with each object in order.
477 /// template <class Iter>
478 /// void AppendRange(Iter f, Iter l);
479 /// \endcode
480 ///
481 /// The \c TfHash class function object supports:
482 /// \li integral types (including bool)
483 /// \li floating point types
484 /// \li std::string
485 /// \li TfRefPtr
486 /// \li TfWeakPtr
487 /// \li TfEnum
488 /// \li const void*
489 /// \li types that provide overloads for TfHashAppend
490 /// \li types that provide overloads for hash_value
491 ///
492 /// The \c TfHash class can be used to instantiate a \c TfHashMap with \c string
493 /// keys as follows:
494 /// \code
495 /// TfHashMap<string, int, TfHash> m;
496 /// m["abc"] = 1;
497 /// \endcode
498 ///
499 /// \c TfHash()(const char*) is disallowed to avoid confusion of whether
500 /// the pointer or the string is being hashed. If you want to hash a
501 /// C-string use \c TfHashCString and if you want to hash a \c char* use
502 /// \c TfHashCharPtr.
503 ///
504 class TfHash {
505 public:
506  /// Produce a hash code for \p obj. See the class documentation for
507  /// details.
508  template <class T>
509  auto operator()(T &&obj) const ->
510  decltype(Tf_HashImpl(std::declval<Tf_HashState &>(),
511  std::forward<T>(obj), 0), size_t()) {
512  Tf_HashState h;
513  Tf_HashImpl(h, std::forward<T>(obj), 0);
514  return h.GetCode();
515  }
516 
517  /// Produce a hash code by combining the hash codes of several objects.
518  template <class... Args>
519  static size_t Combine(Args &&... args) {
520  Tf_HashState h;
521  _CombineImpl(h, std::forward<Args>(args)...);
522  return h.GetCode();
523  }
524 
525 private:
526  template <class HashState, class T, class... Args>
527  static void _CombineImpl(HashState &h, T &&obj, Args &&... rest) {
528  Tf_HashImpl(h, std::forward<T>(obj), 0);
529  _CombineImpl(h, std::forward<Args>(rest)...);
530  }
531 
532  template <class HashState>
533  static void _CombineImpl(HashState &h) {
534  // base case does nothing
535  }
536 };
537 
538 /// A hash function object that hashes the address of a char pointer.
540  size_t operator()(const char* ptr) const;
541 };
542 
543 /// A hash function object that hashes null-terminated c-string content.
545  size_t operator()(const char* ptr) const;
546 };
547 
548 /// A function object that compares two c-strings for equality.
550  bool operator()(const char* lhs, const char* rhs) const;
551 };
552 
554 
555 #endif
A hash function object that hashes the address of a char pointer.
Definition: hash.h:539
TfCStrHashWrapper TfHashAsCStr(char const *cstr)
Definition: hash.h:197
#define TF_API
Definition: api.h:40
GLsizei const GLchar *const * string
Definition: glcorearb.h:814
GLsizei const GLfloat * value
Definition: glcorearb.h:824
GLdouble s
Definition: glad.h:3009
GLint y
Definition: glcorearb.h:103
char const * cstr
Definition: hash.h:186
void Append(Args &&...args)
Definition: hash.h:253
Definition: hash.h:504
GLfloat f
Definition: glcorearb.h:1926
size_t GetCode() const
Definition: hash.h:270
size_t operator()(const char *ptr) const
TfCStrHashWrapper(char const *cstr)
Definition: hash.h:185
GLuint GLuint end
Definition: glcorearb.h:475
void AppendRange(Iter f, Iter l)
Definition: hash.h:265
size_t operator()(const char *ptr) const
GLint GLenum GLint x
Definition: glcorearb.h:409
auto Tf_HashImpl(HashState &h, T &&obj, long) -> decltype(hash_value(std::forward< T >(obj)), void())
Definition: hash.h:231
A hash function object that hashes null-terminated c-string content.
Definition: hash.h:544
bool operator()(const char *lhs, const char *rhs) const
GLfloat GLfloat GLfloat GLfloat h
Definition: glcorearb.h:2002
A function object that compares two c-strings for equality.
Definition: hash.h:549
void AppendContiguous(T const *elems, size_t numElems)
Definition: hash.h:259
static size_t Combine(Args &&...args)
Produce a hash code by combining the hash codes of several objects.
Definition: hash.h:519
PXR_NAMESPACE_CLOSE_SCOPE PXR_NAMESPACE_OPEN_SCOPE
Definition: path.h:1441
auto ptr(T p) -> const void *
Definition: format.h:2448
GLuint GLfloat * val
Definition: glcorearb.h:1608
#define PXR_NAMESPACE_CLOSE_SCOPE
Definition: pxr.h:91
**If you just want to fire and args
Definition: thread.h:609
PXR_NAMESPACE_OPEN_SCOPE std::enable_if_t< std::is_integral< T >::value > TfHashAppend(HashState &h, T integral)
Definition: hash.h:49
Definition: core.h:1131
#define const
Definition: zconf.h:214
size_t hash_value(const CH_ChannelRef &ref)
GA_API const UT_StringHolder rest
Definition: format.h:2459
auto operator()(T &&obj) const -> decltype(Tf_HashImpl(std::declval< Tf_HashState & >(), std::forward< T >(obj), 0), size_t())
Definition: hash.h:509
PcpNodeRef_ChildrenIterator begin(const PcpNodeRef::child_const_range &r)
Support for range-based for loops for PcpNodeRef children ranges.
Definition: node.h:483