HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
hash.h
Go to the documentation of this file.
1 //
2 // Copyright 2016 Pixar
3 //
4 // Licensed under the Apache License, Version 2.0 (the "Apache License")
5 // with the following modification; you may not use this file except in
6 // compliance with the Apache License and the following modification to it:
7 // Section 6. Trademarks. is deleted and replaced with:
8 //
9 // 6. Trademarks. This License does not grant permission to use the trade
10 // names, trademarks, service marks, or product names of the Licensor
11 // and its affiliates, except as required to comply with Section 4(c) of
12 // the License and to reproduce the content of the NOTICE file.
13 //
14 // You may obtain a copy of the Apache License at
15 //
16 // http://www.apache.org/licenses/LICENSE-2.0
17 //
18 // Unless required by applicable law or agreed to in writing, software
19 // distributed under the Apache License with the above modification is
20 // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 // KIND, either express or implied. See the Apache License for the specific
22 // language governing permissions and limitations under the Apache License.
23 //
24 #ifndef PXR_BASE_TF_HASH_H
25 #define PXR_BASE_TF_HASH_H
26 
27 /// \file tf/hash.h
28 /// \ingroup group_tf_String
29 
30 #include "pxr/pxr.h"
31 #include "pxr/base/tf/tf.h"
32 #include "pxr/base/tf/api.h"
33 
34 #include <cstring>
35 #include <string>
36 #include <type_traits>
37 #include <utility>
38 #include <vector>
39 
41 
42 // Support integers.
43 template <class HashState, class T>
45 TfHashAppend(HashState &h, T integral)
46 {
47  h.Append(integral);
48 }
49 
50 // Simple metafunction that returns an unsigned integral type given a size in
51 // bytes.
52 template <size_t Size> struct Tf_SizedUnsignedInt;
53 template <> struct Tf_SizedUnsignedInt<1> { using type = uint8_t; };
54 template <> struct Tf_SizedUnsignedInt<2> { using type = uint16_t; };
55 template <> struct Tf_SizedUnsignedInt<4> { using type = uint32_t; };
56 template <> struct Tf_SizedUnsignedInt<8> { using type = uint64_t; };
57 
58 // Support enums.
59 template <class HashState, class Enum>
61 TfHashAppend(HashState &h, Enum e)
62 {
63  h.Append(static_cast<std::underlying_type_t<Enum>>(e));
64 }
65 
66 // Support floating point.
67 template <class HashState, class T>
69 TfHashAppend(HashState &h, T fp)
70 {
71  // We want both positive and negative zero to hash the same, so we have to
72  // check against zero here.
73  typename Tf_SizedUnsignedInt<sizeof(T)>::type intbuf = 0;
74  if (fp != static_cast<T>(0)) {
75  memcpy(&intbuf, &fp, sizeof(T));
76  }
77  h.Append(intbuf);
78 }
79 
80 // Support std::pair.
81 template <class HashState, class T, class U>
82 inline void
83 TfHashAppend(HashState &h, std::pair<T, U> const &p)
84 {
85  h.Append(p.first);
86  h.Append(p.second);
87 }
88 
89 // Support std::vector.
90 template <class HashState, class T>
91 inline void
92 TfHashAppend(HashState &h, std::vector<T> const &vec)
93 {
94  h.AppendContiguous(vec.data(), vec.size());
95 }
96 
97 // Support for hashing std::string.
98 template <class HashState>
99 inline void
100 TfHashAppend(HashState &h, const std::string& s)
101 {
102  return h.AppendContiguous(s.c_str(), s.length());
103 }
104 
105 // Support for hashing pointers, but we explicitly delete the version for
106 // [const] char pointers. See more below.
107 template <class HashState, class T>
108 inline void
109 TfHashAppend(HashState &h, const T* ptr) {
110  return h.Append(reinterpret_cast<uintptr_t>(ptr));
111 }
112 
113 // We refuse to hash [const] char *. You're almost certainly trying to hash the
114 // pointed-to string and this will not do that (it will hash the pointer
115 // itself). To hash a c-style null terminated string, you can use
116 // TfHashAsCStr() to indicate the intent, or use TfHashCString. If you
117 // really want to hash the pointer then use static_cast<const void*>(ptr) or
118 // TfHashCharPtr.
119 template <class HashState>
120 inline void TfHashAppend(HashState &h, char const *ptr) = delete;
121 template <class HashState>
122 inline void TfHashAppend(HashState &h, char *ptr) = delete;
123 
124 /// A structure that wraps a char pointer, indicating intent that it should be
125 /// hashed as a c-style null terminated string. See TfhashAsCStr().
127 {
128  explicit TfCStrHashWrapper(char const *cstr) : cstr(cstr) {}
129  char const *cstr;
130 };
131 
132 /// Indicate that a char pointer is intended to be hashed as a C-style null
133 /// terminated string. Use this to wrap a char pointer in a HashState::Append()
134 /// call when implementing a TfHashAppend overload.
135 ///
136 /// This structure provides a lightweight view on the char pointer passed to its
137 /// constructor. It does not copy the data or participate in its lifetime. The
138 /// passed char pointer must remain valid as long as this struct is used.
139 inline TfCStrHashWrapper
140 TfHashAsCStr(char const *cstr)
141 {
142  return TfCStrHashWrapper(cstr);
143 }
144 
145 template <class HashState>
146 inline void TfHashAppend(HashState &h, TfCStrHashWrapper hcstr)
147 {
148  return h.AppendContiguous(hcstr.cstr, std::strlen(hcstr.cstr));
149 }
150 
151 // Implementation detail: dispatch based on hash capability: Try TfHashAppend
152 // first, otherwise try hash_value. We'd like to otherwise try std::hash<T>,
153 // but std::hash<> is not SFINAE-friendly until c++17 and this code needs to
154 // support c++14 currently. We rely on a combination of expression SFINAE and
155 // establishing preferred order by passing a 0 constant and having the overloads
156 // take int (highest priority), long (next priority) and '...' (lowest
157 // priority).
158 
159 // std::hash version, attempted last. Consider adding when we move to
160 // C++17 or newer.
161 /*
162 template <class HashState, class T>
163 inline auto Tf_HashImpl(HashState &h, T &&obj, ...)
164  -> decltype(std::hash<typename std::decay<T>::type>()(
165  std::forward<T>(obj)), void())
166 {
167  TfHashAppend(
168  h, std::hash<typename std::decay<T>::type>()(std::forward<T>(obj)));
169 }
170 */
171 
172 // hash_value, attempted second.
173 template <class HashState, class T>
174 inline auto Tf_HashImpl(HashState &h, T &&obj, long)
175  -> decltype(hash_value(std::forward<T>(obj)), void())
176 {
177  TfHashAppend(h, hash_value(std::forward<T>(obj)));
178 }
179 
180 // TfHashAppend, attempted first.
181 template <class HashState, class T>
182 inline auto Tf_HashImpl(HashState &h, T &&obj, int)
183  -> decltype(TfHashAppend(h, std::forward<T>(obj)), void())
184 {
185  TfHashAppend(h, std::forward<T>(obj));
186 }
187 
188 // Implementation detail, CRTP base that provides the public interface for hash
189 // state implementations.
190 template <class Derived>
192 {
193 public:
194  // Append several objects to the hash state.
195  template <class... Args>
196  void Append(Args &&... args) {
197  _AppendImpl(args...);
198  }
199 
200  // Append contiguous objects to the hash state.
201  template <class T>
202  void AppendContiguous(T const *elems, size_t numElems) {
203  this->_AsDerived()._AppendContiguous(elems, numElems);
204  }
205 
206  // Append a range of objects to the hash state.
207  template <class Iter>
208  void AppendRange(Iter f, Iter l) {
209  this->_AsDerived()._AppendRange(f, l);
210  }
211 
212  // Return the hash code for the current state.
213  size_t GetCode() const {
214  return this->_AsDerived()._GetCode();
215  }
216 
217 private:
218  template <class T, class... Args>
219  void _AppendImpl(T &&obj, Args &&... rest) {
220  this->_AsDerived()._Append(std::forward<T>(obj));
221  _AppendImpl(rest...);
222  }
223  void _AppendImpl() const {
224  // base case intentionally empty.
225  }
226 
227  Derived &_AsDerived() {
228  return *static_cast<Derived *>(this);
229  }
230 
231  Derived const &_AsDerived() const {
232  return *static_cast<Derived const *>(this);
233  }
234 };
235 
236 // Implementation detail, accumulates hashes.
237 class Tf_HashState : public Tf_HashStateAPI<Tf_HashState>
238 {
239 private:
241 
242  // Go thru Tf_HashImpl for non-integers.
243  template <class T>
244  std::enable_if_t<!std::is_integral<std::decay_t<T>>::value>
245  _Append(T &&obj) {
246  Tf_HashImpl(*this, std::forward<T>(obj), 0);
247  }
248 
249  // Integers bottom out here.
250  template <class T>
252  _Append(T i) {
253  if (!_didOne) {
254  _state = i;
255  _didOne = true;
256  }
257  else {
258  _state = _Combine(_state, i);
259  }
260  }
261 
262  // Append contiguous objects.
263  template <class T>
265  _AppendContiguous(T const *elems, size_t numElems) {
266  _AppendBytes(reinterpret_cast<char const *>(elems),
267  numElems * sizeof(T));
268  }
269 
270  // Append contiguous objects.
271  template <class T>
273  _AppendContiguous(T const *elems, size_t numElems) {
274  while (numElems--) {
275  Append(*elems++);
276  }
277  }
278 
279  // Append a range.
280  template <class Iter>
281  void _AppendRange(Iter f, Iter l) {
282  while (f != l) {
283  _Append(*f++);
284  }
285  }
286 
287  /// Append a number of bytes to the hash state.
288  TF_API void _AppendBytes(char const *bytes, size_t numBytes);
289 
290  // Return the hash code for the accumulated hash state.
291  size_t _GetCode() const {
292  // This is based on Knuth's multiplicative hash for integers. The
293  // constant is the closest prime to the binary expansion of the inverse
294  // golden ratio. The best way to produce a hash table bucket index from
295  // the result is to shift the result right, since the higher order bits
296  // have the most entropy. But since we can't know the number of buckets
297  // in a table that's using this, we just reverse the byte order instead,
298  // to get the highest entropy bits into the low-order bytes.
299  return _SwapByteOrder(_state * 11400714819323198549ULL);
300  }
301 
302  // This turns into a single bswap/pshufb type instruction on most compilers.
303  inline uint64_t
304  _SwapByteOrder(uint64_t val) const {
305  val =
306  ((val & 0xFF00000000000000u) >> 56u) |
307  ((val & 0x00FF000000000000u) >> 40u) |
308  ((val & 0x0000FF0000000000u) >> 24u) |
309  ((val & 0x000000FF00000000u) >> 8u) |
310  ((val & 0x00000000FF000000u) << 8u) |
311  ((val & 0x0000000000FF0000u) << 24u) |
312  ((val & 0x000000000000FF00u) << 40u) |
313  ((val & 0x00000000000000FFu) << 56u);
314  return val;
315  }
316 
317  size_t _Combine(size_t x, size_t y) const {
318  // This is our hash combiner. The task is, given two hash codes x and
319  // y, compute a single hash code. One way to do this is to exclusive-or
320  // the two codes together, but this can produce bad results if they
321  // differ by some fixed amount, For example if the input codes are
322  // multiples of 32, and the two codes we're given are N and N + 32 (this
323  // happens commonly when the hashed values are memory addresses) then
324  // the resulting hash codes for successive pairs of these produces many
325  // repeating values (32, 96, 32, XXX, 32, 96, 32, YYY...). That's a lot
326  // of collisions.
327  //
328  // Instead we combine hash values by assigning numbers to the lattice
329  // points in the plane, and then treating the inputs x and y as
330  // coordinates identifying a lattice point. Then the combined hash
331  // value is just the number assigned to the lattice point. This way
332  // each unique input pair (x, y) gets a unique output hash code.
333  //
334  // We number lattice points by triangular numbers like this:
335  //
336  // X 0 1 2 3 4 5
337  // Y
338  // 0 0 2 5 9 14 20
339  // 1 1 4 8 13 19 26
340  // 2 3 7 12 18 25 33
341  // 3 6 11 17 24 32 41
342  // 4 10 16 23 31 40 50
343  // 5 15 22 30 39 49 60
344  //
345  // This takes a couple of additions and a multiplication, which is a bit
346  // more expensive than something like an exclusive or, but the quality
347  // improvement outweighs the added expense.
348  x += y;
349  return y + x * (x + 1) / 2;
350  }
351 
352  size_t _state = 0;
353  bool _didOne = false;
354 };
355 
356 /// \class TfHash
357 /// \ingroup group_tf_String
358 ///
359 /// A user-extensible hashing mechanism for use with runtime hash tables.
360 ///
361 /// The hash functions here are appropriate for storing objects in runtime hash
362 /// tables. They are not appropriate for document signatures / fingerprinting
363 /// or for storage and offline use. No specific guarantee is made about hash
364 /// function quality, and the resulting hash codes are only 64-bits wide.
365 /// Callers must assume that collisions will occur and be prepared to deal with
366 /// them.
367 ///
368 /// Additionally, no guarantee is made about repeatability from run-to-run.
369 /// That is, within a process lifetime an object's hash code will not change
370 /// (provided its internal state does not change). But an object with
371 /// equivalent state in a future run of the same process may hash differently.
372 ///
373 /// At the time of this writing we observe good performance combined with the
374 /// "avalanche" quality (~50% output bit flip probability for a single input bit
375 /// flip) in the low-order 40 output bits. Higher order bits do not achieve
376 /// avalanche, and the highest order 8 bits are particularly poor. But for our
377 /// purposes we deem this performance/quality tradeoff acceptable.
378 ///
379 /// This mechanism has builtin support for integral and floating point types,
380 /// some STL types and types in Tf. TfHash uses three methods to attempt to
381 /// hash a passed object. First, TfHash tries to call TfHashAppend() on its
382 /// argument. This is the primary customization point for TfHash. If that is
383 /// not viable, TfHash makes an unqualified call to hash_value(). We would like
384 /// TfHash to try to use std::hash<T> next, but std::hash<T> is not
385 /// SFINAE-friendly until c++17, and this code needs to support c++14.
386 ///
387 /// The best way to add TfHash support for user-defined types is to provide a
388 /// function overload like the following.
389 ///
390 /// \code
391 /// template <class HashState>
392 /// void TfHashAppend(HashState &h, MyType const &myObj)
393 /// h.Append(myObject._member1,
394 /// myObject._member2,
395 /// myObject._member3);
396 /// h.AppendContiguous(myObject._memberArray, myObject._numArrayElems);
397 /// }
398 /// \endcode
399 ///
400 /// The HashState object is left deliberately unspecified, so that different
401 /// hash state objects may be used in different circumstances without modifying
402 /// this support code, and without excess abstraction penalty. The methods
403 /// available for use in TfHashAppend overloads are:
404 ///
405 /// \code
406 /// // Append several objects to the hash state. This invokes the TfHash
407 /// // mechanism so it works for any types supported by TfHash.
408 /// template <class... Args>
409 /// void HashState::Append(Args &&... args);
410 ///
411 /// // Append contiguous objects to the hash state. Note that this is
412 /// // explicitly *not* guaranteed to produce the same result as calling
413 /// // Append() with each object in order.
414 /// template <class T>
415 /// void HashState::AppendContiguous(T const *objects, size_t numObjects);
416 ///
417 /// // Append a general range of objects to the hash state. Note that this is
418 /// // explicitly *not* guaranteed to produce the same result as calling
419 /// // Append() with each object in order.
420 /// template <class Iter>
421 /// void AppendRange(Iter f, Iter l);
422 /// \endcode
423 ///
424 /// The \c TfHash class function object supports:
425 /// \li integral types (including bool)
426 /// \li floating point types
427 /// \li std::string
428 /// \li TfRefPtr
429 /// \li TfWeakPtr
430 /// \li TfEnum
431 /// \li const void*
432 /// \li types that provide overloads for TfHashAppend
433 /// \li types that provide overloads for hash_value
434 ///
435 /// The \c TfHash class can be used to instantiate a \c TfHashMap with \c string
436 /// keys as follows:
437 /// \code
438 /// TfHashMap<string, int, TfHash> m;
439 /// m["abc"] = 1;
440 /// \endcode
441 ///
442 /// \c TfHash()(const char*) is disallowed to avoid confusion of whether
443 /// the pointer or the string is being hashed. If you want to hash a
444 /// C-string use \c TfHashCString and if you want to hash a \c char* use
445 /// \c TfHashCharPtr.
446 ///
447 class TfHash {
448 public:
449  /// Produce a hash code for \p obj. See the class documentation for
450  /// details.
451  template <class T>
452  auto operator()(T &&obj) const ->
453  decltype(Tf_HashImpl(std::declval<Tf_HashState &>(),
454  std::forward<T>(obj), 0), size_t()) {
455  Tf_HashState h;
456  Tf_HashImpl(h, std::forward<T>(obj), 0);
457  return h.GetCode();
458  }
459 
460  /// Produce a hash code by combining the hash codes of several objects.
461  template <class... Args>
462  static size_t Combine(Args &&... args) {
463  Tf_HashState h;
464  _CombineImpl(h, args...);
465  return h.GetCode();
466  }
467 
468 private:
469  template <class HashState, class T, class... Args>
470  static void _CombineImpl(HashState &h, T &&obj, Args &&... rest) {
471  Tf_HashImpl(h, std::forward<T>(obj), 0);
472  _CombineImpl(h, rest...);
473  }
474 
475  template <class HashState>
476  static void _CombineImpl(HashState &h) {
477  // base case does nothing
478  }
479 };
480 
481 /// A hash function object that hashes the address of a char pointer.
483  size_t operator()(const char* ptr) const;
484 };
485 
486 /// A hash function object that hashes null-terminated c-string content.
488  size_t operator()(const char* ptr) const;
489 };
490 
491 /// A function object that compares two c-strings for equality.
493  bool operator()(const char* lhs, const char* rhs) const;
494 };
495 
497 
498 #endif
A hash function object that hashes the address of a char pointer.
Definition: hash.h:482
TfCStrHashWrapper TfHashAsCStr(char const *cstr)
Definition: hash.h:140
#define TF_API
Definition: api.h:40
char const * cstr
Definition: hash.h:129
void Append(Args &&...args)
Definition: hash.h:196
GLdouble l
Definition: glew.h:9164
GLint GLenum GLint x
Definition: glcorearb.h:408
Definition: hash.h:447
size_t GetCode() const
Definition: hash.h:213
size_t operator()(const char *ptr) const
TfCStrHashWrapper(char const *cstr)
Definition: hash.h:128
GLsizei const GLchar *const * string
Definition: glcorearb.h:813
GLhandleARB obj
Definition: glew.h:6266
void AppendRange(Iter f, Iter l)
Definition: hash.h:208
size_t operator()(const char *ptr) const
GLfloat GLfloat p
Definition: glew.h:16656
auto Tf_HashImpl(HashState &h, T &&obj, long) -> decltype(hash_value(std::forward< T >(obj)), void())
Definition: hash.h:174
A hash function object that hashes null-terminated c-string content.
Definition: hash.h:487
bool operator()(const char *lhs, const char *rhs) const
GLuint GLfloat * val
Definition: glcorearb.h:1607
GLfloat GLfloat GLfloat GLfloat h
Definition: glcorearb.h:2001
A function object that compares two c-strings for equality.
Definition: hash.h:492
void AppendContiguous(T const *elems, size_t numElems)
Definition: hash.h:202
static size_t Combine(Args &&...args)
Produce a hash code by combining the hash codes of several objects.
Definition: hash.h:462
PXR_NAMESPACE_CLOSE_SCOPE PXR_NAMESPACE_OPEN_SCOPE
Definition: path.h:1375
const void * ptr(const T *p)
Definition: format.h:3603
#define PXR_NAMESPACE_CLOSE_SCOPE
Definition: pxr.h:91
GLsizei const GLfloat * value
Definition: glcorearb.h:823
GLfloat f
Definition: glcorearb.h:1925
**If you just want to fire and args
Definition: thread.h:615
PXR_NAMESPACE_OPEN_SCOPE std::enable_if_t< std::is_integral< T >::value > TfHashAppend(HashState &h, T integral)
Definition: hash.h:45
#define const
Definition: zconf.h:214
size_t hash_value(const CH_ChannelRef &ref)
GA_API const UT_StringHolder rest
GLdouble s
Definition: glew.h:1395
GLint y
Definition: glcorearb.h:102
Definition: format.h:3611
auto operator()(T &&obj) const -> decltype(Tf_HashImpl(std::declval< Tf_HashState & >(), std::forward< T >(obj), 0), size_t())
Definition: hash.h:452