HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SYS_AtomicImpl.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: SYS_AtomicImpl.h (SYS Library, C++)
7  *
8  * COMMENTS: Platform-specific atomic operations implementation.
9  *
10  * RELATION TO THE STL:
11  *
12  * Use SYS_AtomicImpl.h instead of std::atomic
13  *
14  * Reasoning:
15  *
16  * This is a very platform-specific construct that is still being standardized.
17  * C++ finally has a memory model, so maybe SYS_Atomic will become
18  * a wrapper sooner than later.
19  */
20 
21 #ifndef __SYS_ATOMICIMPL_H_INCLUDED__
22 #define __SYS_ATOMICIMPL_H_INCLUDED__
23 
24 #include "SYS_Deprecated.h"
25 #include "SYS_Inline.h"
26 #include "SYS_MemoryOrder.h"
27 #include "SYS_StaticAssert.h"
28 #include "SYS_Types.h"
29 
30 #ifdef WIN32
31  #include <intrin.h>
32  #define SYS_ATOMIC_INLINE SYS_FORCE_INLINE
33 #else
34  #define SYS_ATOMIC_INLINE inline
35 #endif
36 #ifdef MBSD
37 #include <libkern/OSAtomic.h>
38 #endif
39 
40 
41 namespace SYS_AtomicImpl
42 {
43 
44 template <typename T> T test_and_set(T *addr, T val)
45 {
46  SYS_STATIC_ASSERT_MSG(sizeof(T) == 0,
47  "Cannot instantiate test_and_set for unsupported type.");
48 }
49 template <typename T> T test_and_add(T *addr, T val)
50 {
51  SYS_STATIC_ASSERT_MSG(sizeof(T) == 0,
52  "Cannot instantiate test_and_add for unsupported type.");
53 }
54 template <typename T> T compare_and_swap(volatile T *addr, T oldval, T newval)
55 {
56  SYS_STATIC_ASSERT_MSG(sizeof(T) == 0,
57  "Cannot instantiate compare_and_swap for unsupported type.");
58 }
59 
60 // Group these together because the load/store implementation is the same
61 #if defined(LINUX) || defined(MBSD)
62 
63 #if defined(LINUX)
64 
65 // GCC 4.0 doesn't support __sync_lock_test_and_set,
66 // but GCC 4.1 and onwards do
67 #if !SYS_IS_GCC_GE(4, 1)
68 #error "Unsupported gcc version"
69 #endif
70 
71 template <>
73 test_and_set<int32>(int32 *addr, int32 val)
74 {
75  return __sync_lock_test_and_set(addr, val);
76 }
77 
78 template <>
80 test_and_add<int32>(int32 *addr, int32 val)
81 {
82  return __sync_fetch_and_add(addr, val);
83 }
84 
85 // if (*addr = oldval) *addr = newval
86 template <>
88 compare_and_swap<int32>(volatile int32 *addr, int32 oldval, int32 newval)
89 {
90  return __sync_val_compare_and_swap(const_cast<int32 *>(addr),oldval,newval);
91 }
92 
93 // NOTE: The int64 GCC built-ins are implemented for 32-bit platforms,
94 // using CMPXCHG8B and, if necessary, looping.
95 
96 template <>
98 test_and_set<int64>(int64 *addr, int64 val)
99 {
100  return __sync_lock_test_and_set(addr, val);
101 }
102 
103 template <>
105 test_and_add<int64>(int64 *addr, int64 val)
106 {
107  return __sync_fetch_and_add(addr, val);
108 }
109 
110 template <>
112 compare_and_swap<int64>(volatile int64 *addr, int64 oldval, int64 newval)
113 {
114  return __sync_val_compare_and_swap(const_cast<int64 *>(addr),oldval,newval);
115 }
116 
117 #else // LINUX
118 
119 //
120 // Code for MBSD
121 //
122 
123 template <>
125 test_and_set<int32>(int32 *addr, int32 val)
126 {
127  int32 oldval;
128 #if defined(MBSD_ARM)
129  return __sync_lock_test_and_set(addr, val);
130 #else
131  __asm__ __volatile__("lock xchgl %0, %1"
132  : "=r"(oldval), "=m"(*(addr))
133  : "0"(val), "m"(*(addr)));
134 #endif
135  return oldval;
136 }
137 
138 template <>
140 test_and_add<int32>(int32 *addr, int32 val)
141 {
142  return __sync_fetch_and_add(addr, val);
143 }
144 
145 template <>
147 compare_and_swap<int32>(volatile int32 *addr, int32 oldval, int32 newval)
148 {
149  return __sync_val_compare_and_swap(const_cast<int32 *>(addr),oldval,newval);
150 }
151 
152 template <>
154 test_and_set<int64>(int64 *addr, int64 val)
155 {
156  return __sync_lock_test_and_set(addr, val);
157 }
158 
159 template <>
161 test_and_add<int64>(int64 *addr, int64 val)
162 {
163  return __sync_fetch_and_add(addr, val);
164 }
165 
166 template <>
168 compare_and_swap<int64>(volatile int64 *addr, int64 oldval, int64 newval)
169 {
170  return __sync_val_compare_and_swap(const_cast<int64 *>(addr),oldval,newval);
171 }
172 
173 template <>
174 SYS_ATOMIC_INLINE time_t
175 test_and_set<time_t>(time_t *addr, time_t val)
176 {
177  return __sync_lock_test_and_set(addr, val);
178 }
179 
180 template <>
181 SYS_ATOMIC_INLINE time_t
182 test_and_add<time_t>(time_t *addr, time_t val)
183 {
184  return __sync_fetch_and_add(addr, val);
185 }
186 
187 template <>
188 SYS_ATOMIC_INLINE time_t
189 compare_and_swap<time_t>(volatile time_t *addr, time_t oldval, time_t newval)
190 {
191  return __sync_val_compare_and_swap(
192  const_cast<time_t *>(addr),oldval,newval);
193 }
194 
195 #endif // defined(LINUX) || defined(MBSD)
196 
197 template <typename T>
199 store(T *addr, T val, SYS_MemoryOrder order)
200 {
201  if (order == SYS_MEMORY_ORDER_STORE)
202  {
203  SYSstoreFence();
204  *static_cast<volatile T *>(addr) = val;
205  }
206  else if (order == SYS_MEMORY_ORDER_SEQ_CST)
207  {
208  T dummy = 1;
209 
210  // __sync_lock_release() is a release barrier, ensuring all previous
211  // memory stores are globally visible, and all previous memory loads
212  // have been satisfied.
213  __sync_lock_release(&dummy); // release barrier
214 
215  // __sync_lock_test_and_set is an acquire barrier, preventing any
216  // subsequent memory references from moving before this operation.
217  // Consequently, this store will be globally visible before any
218  // subsequent stores or loads are processed.
219  (void)__sync_lock_test_and_set(addr, val); // acquire barrier
220  }
221  else
222  {
223  // NOTE: order MUST be SYS_MEMORY_ORDER_RELAXED, but we can't use
224  // UT_ASSERT_P in SYS.
225  // Use volatile to force the compiler to issue a store instruction, but
226  // we don't care what the CPU does.
227  *static_cast<volatile T *>(addr) = val;
228  }
229 }
230 
231 template <typename T>
233 load(const T *addr, SYS_MemoryOrder order)
234 {
235  if (order == SYS_MEMORY_ORDER_LOAD)
236  {
237  T val = *static_cast<const volatile T *>(addr);
238  SYSloadFence();
239  return val;
240  }
241  else if (order == SYS_MEMORY_ORDER_SEQ_CST)
242  {
243  T tmp = 0;
244  return __sync_val_compare_and_swap(const_cast<T *>(addr), tmp, tmp);
245  }
246  else
247  {
248  // NOTE: order MUST be SYS_MEMORY_ORDER_RELAXED, but we can't use
249  // UT_ASSERT_P in SYS.
250  return *static_cast<const volatile T *>(addr);
251  }
252 }
253 
254 #elif defined(WIN32)
255 
256 #pragma intrinsic (_InterlockedExchange)
257 #pragma intrinsic (_InterlockedExchangeAdd)
258 #pragma intrinsic (_InterlockedCompareExchange)
259 
260 template <>
262 test_and_set<int32>(int32 *addr, int32 val)
263 {
264  return (int32)_InterlockedExchange((long *)addr, (long)val);
265 }
266 
267 template <>
269 test_and_add<int32>(int32 *addr, int32 val)
270 {
271  return (int32)_InterlockedExchangeAdd((long *)addr, (long)val);
272 }
273 
274 template <>
276 compare_and_swap<int32>(volatile int32 *addr, int32 oldval, int32 newval)
277 {
278  return _InterlockedCompareExchange((volatile long *)addr, newval, oldval);
279 }
280 
281 // NOTE: _InterlockedCompareExchange64 is available on 32-bit platforms
282 // from the Pentium onward, using the CMPXCHG8B instruction.
283 #pragma intrinsic (_InterlockedCompareExchange64)
284 
285 template <>
287 compare_and_swap<int64>(volatile int64 *addr, int64 oldval, int64 newval)
288 {
289  return _InterlockedCompareExchange64(addr, newval, oldval);
290 }
291 
292 #if defined(AMD64) || defined(ARM64)
293 
294 #pragma intrinsic (_InterlockedExchange64)
295 #pragma intrinsic (_InterlockedExchangeAdd64)
296 
297 template <>
299 test_and_set<int64>(int64 *addr, int64 val)
300 {
301  return _InterlockedExchange64(addr, val);
302 }
303 
304 template <>
306 test_and_add<int64>(int64 *addr, int64 val)
307 {
308  return _InterlockedExchangeAdd64(addr, val);
309 }
310 
311 #else // AMD64
312 
313 // On 32-bit platforms, we have to implement our own Exchange64 and
314 // ExchangeAdd64, using CompareExchange64
315 
316 template <>
318 test_and_set<int64>(int64 *addr, int64 val)
319 {
320  int64 retval = *addr;
321  do
322  {
323  int64 newretval = _InterlockedCompareExchange64(addr, val, retval);
324  if (retval == newretval)
325  return retval;
326  retval = newretval;
327  } while (true);
328 
329  // Unreachable
330 }
331 
332 template <>
334 test_and_add<int64>(int64 *addr, int64 val)
335 {
336  int64 retval = *addr;
337  do
338  {
339  int64 newretval = _InterlockedCompareExchange64(addr, retval+val, retval);
340  if (retval == newretval)
341  return retval;
342  retval = newretval;
343  } while (true);
344 
345  // Unreachable
346 }
347 
348 #endif // AMD64
349 
350 // The following implementations of store() and load() are valid only for
351 // MS Visual C++ 2005 and higher.
352 #pragma intrinsic (_ReadBarrier)
353 #pragma intrinsic (_WriteBarrier)
354 #pragma intrinsic (_InterlockedCompareExchange)
355 
356 template <typename T>
358 store(T *addr, T val, SYS_MemoryOrder order)
359 {
360  // In Visual C++ 2005 and up, reads from volatile variables are defined to
361  // have read-acquire semantics, and writes to volatile variables are
362  // defined to have write-release semantics. The compiler will not move
363  // any reads and writes past them, and on Windows will ensure that the CPU
364  // does not do so either. Thus there is no need for explicit calls to
365  // _ReadWriteBarrier().
366  //
367  // NOTE:
368  // Visual Studio 2005 had a bug (subsequently fixed) on IA64 where the
369  // compiler did not respect the acquire/release semantics for volatile
370  // floats.
371  // http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=288218
372  if (order == SYS_MEMORY_ORDER_STORE)
373  {
374  *static_cast<volatile T *>(addr) = val;
375  }
376  else if (order == SYS_MEMORY_ORDER_SEQ_CST)
377  {
378  (void)test_and_set(addr, val);
379  }
380  else
381  {
382  // NOTE: order MUST be SYS_MEMORY_ORDER_RELAXED, but we can't use
383  // UT_ASSERT_P in SYS.
384  // We want to force the compiler to respect this write. One way would
385  // be to treat addr as volatile, but, as already explained, that would
386  // enforce release semantics on both the compiler and CPU. It should
387  // theoretically be more efficient to simply prevent the compiler from
388  // allowing optimization using any following writes.
389  *addr = val;
391  _WriteBarrier();
393  }
394 }
395 
396 template <typename T>
398 load(const T *addr, SYS_MemoryOrder order)
399 {
400  // In Visual C++ 2005 and up, reads from volatile variables are defined to
401  // have read-acquire semantics, and writes to volatile variables are
402  // defined to have write-release semantics. The compiler will not move
403  // any reads and writes past them, and on Windows will ensure that the CPU
404  // does not do so either. Thus there is no need for explicit calls to
405  // _ReadWriteBarrier().
406  //
407  // NOTE:
408  // Visual Studio 2005 had a bug (subsequently fixed) on IA64 where the
409  // compiler did not respect the acquire/release semantics for volatile
410  // floats.
411  // http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=288218
412  if (order == SYS_MEMORY_ORDER_LOAD)
413  {
414  return *static_cast<const volatile T *>(addr);
415  }
416  else if (order == SYS_MEMORY_ORDER_SEQ_CST)
417  {
418  T tmp = 0;
419  return compare_and_swap(const_cast<T *>(addr), tmp, tmp);
420  }
421  else
422  {
423  // NOTE: order MUST be SYS_MEMORY_ORDER_RELAXED, but we can't use
424  // UT_ASSERT_P in SYS.
425  // We want to force the compiler to respect this read. One way would
426  // be to treat addr as volatile, but, as already explained, that would
427  // enforce acquire semantics on both the compiler and CPU. It should
428  // theoretically be more efficient to simply prevent the compiler from
429  // equating earlier reads with this one.
431  _ReadBarrier();
433  return *addr;
434  }
435 }
436 
437 #else
438 
439 #error "Unsupported platform"
440 #endif
441 
442 } // namespace SYS_AtomicImpl
443 
444 #endif // __SYS_ATOMICIMPL_H_INCLUDED__
int int32
Definition: SYS_Types.h:39
T compare_and_swap(volatile T *addr, T oldval, T newval)
void
Definition: png.h:1083
#define SYS_STATIC_ASSERT_MSG(expr, msg)
#define SYS_DEPRECATED_PUSH_DISABLE()
#define SYS_DEPRECATED_POP_DISABLE()
#define SYSloadFence()
T test_and_set(T *addr, T val)
SYS_MemoryOrder
#define SYS_ATOMIC_INLINE
constexpr enabler dummy
An instance to use in EnableIf.
Definition: CLI11.h:985
GLdouble GLdouble GLint GLint order
Definition: glad.h:2676
long long int64
Definition: SYS_Types.h:116
#define SYSstoreFence()
T test_and_add(T *addr, T val)
GLuint GLfloat * val
Definition: glcorearb.h:1608
#define const
Definition: zconf.h:214