HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
hash.h
Go to the documentation of this file.
1 //
2 // Copyright 2016 Pixar
3 //
4 // Licensed under the Apache License, Version 2.0 (the "Apache License")
5 // with the following modification; you may not use this file except in
6 // compliance with the Apache License and the following modification to it:
7 // Section 6. Trademarks. is deleted and replaced with:
8 //
9 // 6. Trademarks. This License does not grant permission to use the trade
10 // names, trademarks, service marks, or product names of the Licensor
11 // and its affiliates, except as required to comply with Section 4(c) of
12 // the License and to reproduce the content of the NOTICE file.
13 //
14 // You may obtain a copy of the Apache License at
15 //
16 // http://www.apache.org/licenses/LICENSE-2.0
17 //
18 // Unless required by applicable law or agreed to in writing, software
19 // distributed under the Apache License with the above modification is
20 // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 // KIND, either express or implied. See the Apache License for the specific
22 // language governing permissions and limitations under the Apache License.
23 //
24 #ifndef PXR_BASE_TF_HASH_H
25 #define PXR_BASE_TF_HASH_H
26 
27 /// \file tf/hash.h
28 /// \ingroup group_tf_String
29 
30 #include "pxr/pxr.h"
31 #include "pxr/base/tf/tf.h"
32 #include "pxr/base/tf/api.h"
33 
34 #include <cstring>
35 #include <string>
36 #include <type_traits>
37 
39 
40 // Support integers.
41 template <class HashState, class T>
43 TfHashAppend(HashState &h, T integral)
44 {
45  h.Append(integral);
46 }
47 
48 // Simple metafunction that returns an unsigned integral type given a size in
49 // bytes.
50 template <size_t Size> struct Tf_SizedUnsignedInt;
51 template <> struct Tf_SizedUnsignedInt<1> { using type = uint8_t; };
52 template <> struct Tf_SizedUnsignedInt<2> { using type = uint16_t; };
53 template <> struct Tf_SizedUnsignedInt<4> { using type = uint32_t; };
54 template <> struct Tf_SizedUnsignedInt<8> { using type = uint64_t; };
55 
56 // Support enums.
57 template <class HashState, class Enum>
59 TfHashAppend(HashState &h, Enum e)
60 {
61  h.Append(static_cast<std::underlying_type_t<Enum>>(e));
62 }
63 
64 // Support floating point.
65 template <class HashState, class T>
67 TfHashAppend(HashState &h, T fp)
68 {
69  // We want both positive and negative zero to hash the same, so we have to
70  // check against zero here.
71  typename Tf_SizedUnsignedInt<sizeof(T)>::type intbuf = 0;
72  if (fp != static_cast<T>(0)) {
73  memcpy(&intbuf, &fp, sizeof(T));
74  }
75  h.Append(intbuf);
76 }
77 
78 // Support for hashing std::string.
79 template <class HashState>
80 inline void
81 TfHashAppend(HashState &h, const std::string& s)
82 {
83  return h.AppendContiguous(s.c_str(), s.length());
84 }
85 
86 // Support for hashing pointers, but we explicitly delete the version for
87 // [const] char pointers. See more below.
88 template <class HashState, class T>
89 inline void
90 TfHashAppend(HashState &h, const T* ptr) {
91  return h.Append(reinterpret_cast<size_t>(ptr));
92 }
93 
94 // We refuse to hash [const] char *. You're almost certainly trying to hash the
95 // pointed-to string and this will not do that (it will hash the pointer
96 // itself). To hash a c-style null terminated string, you can use
97 // TfHashAsCStr() to indicate the intent, or use TfHashCString. If you
98 // really want to hash the pointer then use static_cast<const void*>(ptr) or
99 // TfHashCharPtr.
100 template <class HashState>
101 inline void TfHashAppend(HashState &h, char const *ptr) = delete;
102 template <class HashState>
103 inline void TfHashAppend(HashState &h, char *ptr) = delete;
104 
105 /// A structure that wraps a char pointer, indicating intent that it should be
106 /// hashed as a c-style null terminated string. See TfhashAsCStr().
108 {
109  explicit TfCStrHashWrapper(char const *cstr) : cstr(cstr) {}
110  char const *cstr;
111 };
112 
113 /// Indicate that a char pointer is intended to be hashed as a C-style null
114 /// terminated string. Use this to wrap a char pointer in a HashState::Append()
115 /// call when implementing a TfHashAppend overload.
116 ///
117 /// This structure provides a lightweight view on the char pointer passed to its
118 /// constructor. It does not copy the data or participate in its lifetime. The
119 /// passed char pointer must remain valid as long as this struct is used.
120 inline TfCStrHashWrapper
121 TfHashAsCStr(char const *cstr)
122 {
123  return TfCStrHashWrapper(cstr);
124 }
125 
126 template <class HashState>
127 inline void TfHashAppend(HashState &h, TfCStrHashWrapper hcstr)
128 {
129  return h.AppendContiguous(hcstr.cstr, std::strlen(hcstr.cstr));
130 }
131 
132 // Implementation detail: dispatch based on hash capability: Try TfHashAppend
133 // first, otherwise try hash_value. We'd like to otherwise try std::hash<T>,
134 // but std::hash<> is not SFINAE-friendly until c++17 and this code needs to
135 // support c++14 currently. We rely on a combination of expression SFINAE and
136 // establishing preferred order by passing a 0 constant and having the overloads
137 // take int (highest priority), long (next priority) and '...' (lowest
138 // priority).
139 
140 // std::hash version, attempted last. Consider adding when we move to
141 // C++17 or newer.
142 /*
143 template <class HashState, class T>
144 inline auto Tf_HashImpl(HashState &h, T &&obj, ...)
145  -> decltype(std::hash<typename std::decay<T>::type>()(
146  std::forward<T>(obj)), void())
147 {
148  TfHashAppend(
149  h, std::hash<typename std::decay<T>::type>()(std::forward<T>(obj)));
150 }
151 */
152 
153 // hash_value, attempted second.
154 template <class HashState, class T>
155 inline auto Tf_HashImpl(HashState &h, T &&obj, long)
156  -> decltype(hash_value(std::forward<T>(obj)), void())
157 {
158  TfHashAppend(h, hash_value(std::forward<T>(obj)));
159 }
160 
161 // TfHashAppend, attempted first.
162 template <class HashState, class T>
163 inline auto Tf_HashImpl(HashState &h, T &&obj, int)
164  -> decltype(TfHashAppend(h, std::forward<T>(obj)), void())
165 {
166  TfHashAppend(h, std::forward<T>(obj));
167 }
168 
169 // Implementation detail, accumulates hashes.
171 {
172  // Go thru Tf_HashImpl for non-integers.
173  template <class T>
174  std::enable_if_t<!std::is_integral<std::decay_t<T>>::value>
175  Append(T &&obj) {
176  Tf_HashImpl(*this, std::forward<T>(obj), 0);
177  }
178 
179  // Integers bottom out here.
180  template <class T>
182  Append(T i) {
183  if (!_didOne) {
184  _state = i;
185  _didOne = true;
186  }
187  else {
188  _state = _Combine(_state, i);
189  }
190  }
191 
192  // Append contiguous objects.
193  template <class T>
195  AppendContiguous(T const *elems, size_t numElems) {
196  _AppendBytes(reinterpret_cast<char const *>(elems),
197  numElems * sizeof(T));
198  }
199 
200  // Append contiguous objects.
201  template <class T>
203  AppendContiguous(T const *elems, size_t numElems) {
204  while (numElems--) {
205  Append(*elems++);
206  }
207  }
208 
209 private:
210  friend class TfHash;
211 
212  /// Append a number of bytes to the hash state.
213  TF_API void _AppendBytes(char const *bytes, size_t numBytes);
214 
215  // Return the hash code for the accumulated hash state.
216  size_t _GetCode() const {
217  // This is based on Knuth's multiplicative hash for integers. The
218  // constant is the closest prime to the binary expansion of the inverse
219  // golden ratio. The best way to produce a hash table bucket index from
220  // the result is to shift the result right, since the higher order bits
221  // have the most entropy. But since we can't know the number of buckets
222  // in a table that's using this, we just reverse the byte order instead,
223  // to get the highest entropy bits into the low-order bytes.
224  return _SwapByteOrder(_state * 11400714819323198549ULL);
225  }
226 
227  // This turns into a single bswap/pshufb type instruction on most compilers.
228  inline uint64_t
229  _SwapByteOrder(uint64_t val) const {
230  val =
231  ((val & 0xFF00000000000000u) >> 56u) |
232  ((val & 0x00FF000000000000u) >> 40u) |
233  ((val & 0x0000FF0000000000u) >> 24u) |
234  ((val & 0x000000FF00000000u) >> 8u) |
235  ((val & 0x00000000FF000000u) << 8u) |
236  ((val & 0x0000000000FF0000u) << 24u) |
237  ((val & 0x000000000000FF00u) << 40u) |
238  ((val & 0x00000000000000FFu) << 56u);
239  return val;
240  }
241 
242  size_t _Combine(size_t x, size_t y) const {
243  // This is our hash combiner. The task is, given two hash codes x and
244  // y, compute a single hash code. One way to do this is to exclusive-or
245  // the two codes together, but this can produce bad results if they
246  // differ by some fixed amount, For example if the input codes are
247  // multiples of 32, and the two codes we're given are N and N + 32 (this
248  // happens commonly when the hashed values are memory addresses) then
249  // the resulting hash codes for successive pairs of these produces many
250  // repeating values (32, 96, 32, XXX, 32, 96, 32, YYY...). That's a lot
251  // of collisions.
252  //
253  // Instead we combine hash values by assigning numbers to the lattice
254  // points in the plane, and then treating the inputs x and y as
255  // coordinates identifying a lattice point. Then the combined hash
256  // value is just the number assigned to the lattice point. This way
257  // each unique input pair (x, y) gets a unique output hash code.
258  //
259  // We number lattice points by triangular numbers like this:
260  //
261  // X 0 1 2 3 4 5
262  // Y
263  // 0 0 2 5 9 14 20
264  // 1 1 4 8 13 19 26
265  // 2 3 7 12 18 25 33
266  // 3 6 11 17 24 32 41
267  // 4 10 16 23 31 40 50
268  // 5 15 22 30 39 49 60
269  //
270  // This takes a couple of additions and a multiplication, which is a bit
271  // more expensive than something like an exclusive or, but the quality
272  // improvement outweighs the added expense.
273  x += y;
274  return y + x * (x + 1) / 2;
275  }
276 
277  size_t _state = 0;
278  bool _didOne = false;
279 };
280 
281 /// \class TfHash
282 /// \ingroup group_tf_String
283 ///
284 /// A user-extensible hashing mechanism for use with runtime hash tables.
285 ///
286 /// The hash functions here are appropriate for storing objects in runtime hash
287 /// tables. They are not appropriate for document signatures / fingerprinting
288 /// or for storage and offline use. No specific guarantee is made about hash
289 /// function quality, and the resulting hash codes are only 64-bits wide.
290 /// Callers must assume that collisions will occur and be prepared to deal with
291 /// them.
292 ///
293 /// Additionally, no guarantee is made about repeatability from run-to-run.
294 /// That is, within a process lifetime an object's hash code will not change
295 /// (provided its internal state does not change). But an object with
296 /// equivalent state in a future run of the same process may hash differently.
297 ///
298 /// At the time of this writing we observe good performance combined with the
299 /// "avalanche" quality (~50% output bit flip probability for a single input bit
300 /// flip) in the low-order 40 output bits. Higher order bits do not achieve
301 /// avalanche, and the highest order 8 bits are particularly poor. But for our
302 /// purposes we deem this performance/quality tradeoff acceptable.
303 ///
304 /// This mechanism has builtin support for integral and floating point types,
305 /// some STL types and types in Tf. TfHash uses three methods to attempt to
306 /// hash a passed object. First, TfHash tries to call TfHashAppend() on its
307 /// argument. This is the primary customization point for TfHash. If that is
308 /// not viable, TfHash makes an unqualified call to hash_value(). We would like
309 /// TfHash to try to use std::hash<T> next, but std::hash<T> is not
310 /// SFINAE-friendly until c++17, and this code needs to support c++14.
311 ///
312 /// The best way to add TfHash support for user-defined types is to provide a
313 /// function overload like the following.
314 ///
315 /// \code
316 /// template <class HashState>
317 /// void TfHashAppend(HashState &h, MyType const &myObj)
318 /// h.Append(myObject._member1);
319 /// h.Append(myObject._member2);
320 /// h.Append(myObject._member3);
321 /// h.AppendContiguous(myObject._memberArray, myObject._numArrayElems);
322 /// }
323 /// \endcode
324 ///
325 /// The HashState object is left deliberately unspecified, so that different
326 /// hash state objects may be used in different circumstances without modifying
327 /// this support code, and without excess abstraction penalty. The methods
328 /// available for use in TfHashAppend overloads are:
329 ///
330 /// \code
331 /// // Append one object to the hash state. This invokes the TfHash mechanism
332 /// // so it works for any type that is supported by TfHash.
333 /// template <class T>
334 /// void HashState::Append(T &&obj);
335 ///
336 /// // Append contiguous objects to the hash state. Note that this is
337 /// // explicitly *not* guaranteed to produce the same result as calling
338 /// // Append() with each object in order.
339 /// template <class T>
340 /// void HashState::AppendContiguous(T const *objects, size_t numObjects);
341 /// \endcode
342 ///
343 /// The \c TfHash class function object supports:
344 /// \li integral types (including bool)
345 /// \li floating point types
346 /// \li std::string
347 /// \li TfRefPtr
348 /// \li TfWeakPtr
349 /// \li TfEnum
350 /// \li const void*
351 /// \li types that provide overloads for TfHashAppend
352 /// \li types that provide overloads for hash_value
353 ///
354 /// The \c TfHash class can be used to instantiate a \c TfHashMap with \c string
355 /// keys as follows:
356 /// \code
357 /// TfHashMap<string, int, TfHash> m;
358 /// m["abc"] = 1;
359 /// \endcode
360 ///
361 /// \c TfHash()(const char*) is disallowed to avoid confusion of whether
362 /// the pointer or the string is being hashed. If you want to hash a
363 /// C-string use \c TfHashCString and if you want to hash a \c char* use
364 /// \c TfHashCharPtr.
365 ///
366 class TfHash {
367 public:
368  /// Produce a hash code for \p obj. See the class documentation for
369  /// details.
370  template <class T>
371  auto operator()(T &&obj) const ->
372  decltype(Tf_HashImpl(std::declval<Tf_HashState &>(),
373  std::forward<T>(obj), 0), size_t()) {
374  Tf_HashState h;
375  Tf_HashImpl(h, std::forward<T>(obj), 0);
376  return h._GetCode();
377  }
378 };
379 
380 /// A hash function object that hashes the address of a char pointer.
382  size_t operator()(const char* ptr) const;
383 };
384 
385 /// A hash function object that hashes null-terminated c-string content.
387  size_t operator()(const char* ptr) const;
388 };
389 
390 /// A function object that compares two c-strings for equality.
392  bool operator()(const char* lhs, const char* rhs) const;
393 };
394 
396 
397 #endif
GLdouble s
Definition: glew.h:1390
A hash function object that hashes the address of a char pointer.
Definition: hash.h:381
TfCStrHashWrapper TfHashAsCStr(char const *cstr)
Definition: hash.h:121
#define TF_API
Definition: api.h:40
GLuint const GLfloat * val
Definition: glew.h:2794
std::add_rvalue_reference< T >::type declval() FMT_NOEXCEPT
char const * cstr
Definition: hash.h:110
GLhandleARB obj
Definition: glew.h:6236
std::enable_if_t<!std::is_integral< std::decay_t< T > >::value > Append(T &&obj)
Definition: hash.h:175
Definition: hash.h:366
GLint GLint GLint GLint GLint x
Definition: glew.h:1252
size_t operator()(const char *ptr) const
GLint GLint GLint GLint GLint GLint y
Definition: glew.h:1252
TfCStrHashWrapper(char const *cstr)
Definition: hash.h:109
std::enable_if_t< std::is_integral< T >::value > AppendContiguous(T const *elems, size_t numElems)
Definition: hash.h:195
size_t operator()(const char *ptr) const
GLfloat GLfloat GLfloat GLfloat h
Definition: glew.h:8011
auto Tf_HashImpl(HashState &h, T &&obj, long) -> decltype(hash_value(std::forward< T >(obj)), void())
Definition: hash.h:155
std::enable_if_t< std::is_integral< T >::value > Append(T i)
Definition: hash.h:182
A hash function object that hashes null-terminated c-string content.
Definition: hash.h:386
std::enable_if_t<!std::is_integral< T >::value > AppendContiguous(T const *elems, size_t numElems)
Definition: hash.h:203
bool operator()(const char *lhs, const char *rhs) const
GLsizei const GLchar *const * string
Definition: glew.h:1844
A function object that compares two c-strings for equality.
Definition: hash.h:391
PXR_NAMESPACE_CLOSE_SCOPE PXR_NAMESPACE_OPEN_SCOPE
Definition: path.h:1346
const void * ptr(const T *p)
Definition: format.h:3292
#define PXR_NAMESPACE_CLOSE_SCOPE
Definition: pxr.h:83
PXR_NAMESPACE_OPEN_SCOPE std::enable_if_t< std::is_integral< T >::value > TfHashAppend(HashState &h, T integral)
Definition: hash.h:43
#define const
Definition: zconf.h:214
size_t hash_value(const CH_ChannelRef &ref)
GLsizei const GLfloat * value
Definition: glew.h:1849
auto operator()(T &&obj) const -> decltype(Tf_HashImpl(std::declval< Tf_HashState & >(), std::forward< T >(obj), 0), size_t())
Definition: hash.h:371