HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SYS_AtomicImpl.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: SYS_AtomicImpl.h (SYS Library, C++)
7  *
8  * COMMENTS: Platform-specific atomic operations implementation.
9  */
10 
11 #ifndef __SYS_ATOMICIMPL_H_INCLUDED__
12 #define __SYS_ATOMICIMPL_H_INCLUDED__
13 
14 #include "SYS_Inline.h"
15 #include "SYS_Types.h"
16 #include "SYS_MemoryOrder.h"
17 #include "SYS_StaticAssert.h"
18 
19 #ifdef WIN32
20  #include <intrin.h>
21  #define SYS_ATOMIC_INLINE SYS_FORCE_INLINE
22 #else
23  #define SYS_ATOMIC_INLINE inline
24 #endif
25 #ifdef MBSD
26 #include <libkern/OSAtomic.h>
27 #endif
28 
29 
30 namespace SYS_AtomicImpl
31 {
32 
33 template <typename T> T test_and_set(T *addr, T val)
34 {
35  SYS_STATIC_ASSERT_MSG(sizeof(T) == 0,
36  "Cannot instantiate test_and_set for unsupported type.");
37 }
38 template <typename T> T test_and_add(T *addr, T val)
39 {
40  SYS_STATIC_ASSERT_MSG(sizeof(T) == 0,
41  "Cannot instantiate test_and_add for unsupported type.");
42 }
43 template <typename T> T compare_and_swap(volatile T *addr, T oldval, T newval)
44 {
45  SYS_STATIC_ASSERT_MSG(sizeof(T) == 0,
46  "Cannot instantiate compare_and_swap for unsupported type.");
47 }
48 
49 // Group these together because the load/store implementation is the same
50 #if defined(LINUX) || defined(MBSD_INTEL)
51 
52 #if defined(LINUX)
53 
54 // GCC 4.0 doesn't support __sync_lock_test_and_set,
55 // but GCC 4.1 and onwards do
56 #if !SYS_IS_GCC_GE(4, 1)
57 #error "Unsupported gcc version"
58 #endif
59 
60 template <>
62 test_and_set<int32>(int32 *addr, int32 val)
63 {
64  return __sync_lock_test_and_set(addr, val);
65 }
66 
67 template <>
69 test_and_add<int32>(int32 *addr, int32 val)
70 {
71  return __sync_fetch_and_add(addr, val);
72 }
73 
74 // if (*addr = oldval) *addr = newval
75 template <>
77 compare_and_swap<int32>(volatile int32 *addr, int32 oldval, int32 newval)
78 {
79  return __sync_val_compare_and_swap(const_cast<int32 *>(addr),oldval,newval);
80 }
81 
82 // NOTE: The int64 GCC built-ins are implemented for 32-bit platforms,
83 // using CMPXCHG8B and, if necessary, looping.
84 
85 template <>
87 test_and_set<int64>(int64 *addr, int64 val)
88 {
89  return __sync_lock_test_and_set(addr, val);
90 }
91 
92 template <>
94 test_and_add<int64>(int64 *addr, int64 val)
95 {
96  return __sync_fetch_and_add(addr, val);
97 }
98 
99 template <>
101 compare_and_swap<int64>(volatile int64 *addr, int64 oldval, int64 newval)
102 {
103  return __sync_val_compare_and_swap(const_cast<int64 *>(addr),oldval,newval);
104 }
105 
106 #else // LINUX
107 
108 //
109 // Code for MBSD_INTEL
110 //
111 
112 template <>
114 test_and_set<int32>(int32 *addr, int32 val)
115 {
116  int32 oldval;
117  __asm__ __volatile__("lock xchgl %0, %1"
118  : "=r"(oldval), "=m"(*(addr))
119  : "0"(val), "m"(*(addr)));
120  return oldval;
121 }
122 
123 template <>
125 test_and_add<int32>(int32 *addr, int32 val)
126 {
127  return __sync_fetch_and_add(addr, val);
128 }
129 
130 template <>
132 compare_and_swap<int32>(volatile int32 *addr, int32 oldval, int32 newval)
133 {
134  return __sync_val_compare_and_swap(const_cast<int32 *>(addr),oldval,newval);
135 }
136 
137 // NOTE: MBSD_INTEL implies AMD64
138 template <>
140 test_and_set<int64>(int64 *addr, int64 val)
141 {
142  return __sync_lock_test_and_set(addr, val);
143 }
144 
145 // NOTE: MBSD_INTEL implies AMD64
146 template <>
148 test_and_add<int64>(int64 *addr, int64 val)
149 {
150  return __sync_fetch_and_add(addr, val);
151 }
152 
153 // NOTE: MBSD_INTEL implies AMD64
154 template <>
156 compare_and_swap<int64>(volatile int64 *addr, int64 oldval, int64 newval)
157 {
158  return __sync_val_compare_and_swap(const_cast<int64 *>(addr),oldval,newval);
159 }
160 
161 template <>
162 SYS_ATOMIC_INLINE time_t
163 test_and_set<time_t>(time_t *addr, time_t val)
164 {
165  return __sync_lock_test_and_set(addr, val);
166 }
167 
168 template <>
169 SYS_ATOMIC_INLINE time_t
170 test_and_add<time_t>(time_t *addr, time_t val)
171 {
172  return __sync_fetch_and_add(addr, val);
173 }
174 
175 template <>
176 SYS_ATOMIC_INLINE time_t
177 compare_and_swap<time_t>(volatile time_t *addr, time_t oldval, time_t newval)
178 {
179  return __sync_val_compare_and_swap(
180  const_cast<time_t *>(addr),oldval,newval);
181 }
182 
183 #endif // defined(LINUX) || defined(MBSD_INTEL)
184 
185 template <typename T>
187 store(T *addr, T val, SYS_MemoryOrder order)
188 {
189  if (order == SYS_MEMORY_ORDER_STORE)
190  {
191  SYSstoreFence();
192  *static_cast<volatile T *>(addr) = val;
193  }
194  else if (order == SYS_MEMORY_ORDER_SEQ_CST)
195  {
196  T dummy = 1;
197 
198  // __sync_lock_release() is a release barrier, ensuring all previous
199  // memory stores are globally visible, and all previous memory loads
200  // have been satisfied.
201  __sync_lock_release(&dummy); // release barrier
202 
203  // __sync_lock_test_and_set is an acquire barrier, preventing any
204  // subsequent memory references from moving before this operation.
205  // Consequently, this store will be globally visible before any
206  // subsequent stores or loads are processed.
207  (void)__sync_lock_test_and_set(addr, val); // acquire barrier
208  }
209  else
210  {
211  // NOTE: order MUST be SYS_MEMORY_ORDER_RELAXED, but we can't use
212  // UT_ASSERT_P in SYS.
213  // Use volatile to force the compiler to issue a store instruction, but
214  // we don't care what the CPU does.
215  *static_cast<volatile T *>(addr) = val;
216  }
217 }
218 
219 template <typename T>
221 load(const T *addr, SYS_MemoryOrder order)
222 {
223  if (order == SYS_MEMORY_ORDER_LOAD)
224  {
225  T val = *static_cast<const volatile T *>(addr);
226  SYSloadFence();
227  return val;
228  }
229  else if (order == SYS_MEMORY_ORDER_SEQ_CST)
230  {
231  T tmp = 0;
232  return __sync_val_compare_and_swap(const_cast<T *>(addr), tmp, tmp);
233  }
234  else
235  {
236  // NOTE: order MUST be SYS_MEMORY_ORDER_RELAXED, but we can't use
237  // UT_ASSERT_P in SYS.
238  return *static_cast<const volatile T *>(addr);
239  }
240 }
241 
242 #elif defined(WIN32)
243 
244 #pragma intrinsic (_InterlockedExchange)
245 #pragma intrinsic (_InterlockedExchangeAdd)
246 #pragma intrinsic (_InterlockedCompareExchange)
247 
248 template <>
250 test_and_set<int32>(int32 *addr, int32 val)
251 {
252  return (int32)_InterlockedExchange((long *)addr, (long)val);
253 }
254 
255 template <>
257 test_and_add<int32>(int32 *addr, int32 val)
258 {
259  return (int32)_InterlockedExchangeAdd((long *)addr, (long)val);
260 }
261 
262 template <>
264 compare_and_swap<int32>(volatile int32 *addr, int32 oldval, int32 newval)
265 {
266  return _InterlockedCompareExchange((volatile long *)addr, newval, oldval);
267 }
268 
269 // NOTE: _InterlockedCompareExchange64 is available on 32-bit platforms
270 // from the Pentium onward, using the CMPXCHG8B instruction.
271 #pragma intrinsic (_InterlockedCompareExchange64)
272 
273 template <>
275 compare_and_swap<int64>(volatile int64 *addr, int64 oldval, int64 newval)
276 {
277  return _InterlockedCompareExchange64(addr, newval, oldval);
278 }
279 
280 #if defined(AMD64)
281 
282 #pragma intrinsic (_InterlockedExchange64)
283 #pragma intrinsic (_InterlockedExchangeAdd64)
284 
285 template <>
287 test_and_set<int64>(int64 *addr, int64 val)
288 {
289  return _InterlockedExchange64(addr, val);
290 }
291 
292 template <>
294 test_and_add<int64>(int64 *addr, int64 val)
295 {
296  return _InterlockedExchangeAdd64(addr, val);
297 }
298 
299 #else // AMD64
300 
301 // On 32-bit platforms, we have to implement our own Exchange64 and
302 // ExchangeAdd64, using CompareExchange64
303 
304 template <>
306 test_and_set<int64>(int64 *addr, int64 val)
307 {
308  int64 retval = *addr;
309  do
310  {
311  int64 newretval = _InterlockedCompareExchange64(addr, val, retval);
312  if (retval == newretval)
313  return retval;
314  retval = newretval;
315  } while (true);
316 
317  // Unreachable
318 }
319 
320 template <>
322 test_and_add<int64>(int64 *addr, int64 val)
323 {
324  int64 retval = *addr;
325  do
326  {
327  int64 newretval = _InterlockedCompareExchange64(addr, retval+val, retval);
328  if (retval == newretval)
329  return retval;
330  retval = newretval;
331  } while (true);
332 
333  // Unreachable
334 }
335 
336 #endif // AMD64
337 
338 // The following implementations of store() and load() are valid only for
339 // MS Visual C++ 2005 and higher.
340 #pragma intrinsic (_ReadBarrier)
341 #pragma intrinsic (_WriteBarrier)
342 #pragma intrinsic (_InterlockedCompareExchange)
343 
344 template <typename T>
346 store(T *addr, T val, SYS_MemoryOrder order)
347 {
348  // In Visual C++ 2005 and up, reads from volatile variables are defined to
349  // have read-acquire semantics, and writes to volatile variables are
350  // defined to have write-release semantics. The compiler will not move
351  // any reads and writes past them, and on Windows will ensure that the CPU
352  // does not do so either. Thus there is no need for explicit calls to
353  // _ReadWriteBarrier().
354  //
355  // NOTE:
356  // Visual Studio 2005 had a bug (subsequently fixed) on IA64 where the
357  // compiler did not respect the acquire/release semantics for volatile
358  // floats.
359  // http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=288218
360  if (order == SYS_MEMORY_ORDER_STORE)
361  {
362  *static_cast<volatile T *>(addr) = val;
363  }
364  else if (order == SYS_MEMORY_ORDER_SEQ_CST)
365  {
366  (void)test_and_set(addr, val);
367  }
368  else
369  {
370  // NOTE: order MUST be SYS_MEMORY_ORDER_RELAXED, but we can't use
371  // UT_ASSERT_P in SYS.
372  // We want to force the compiler to respect this write. One way would
373  // be to treat addr as volatile, but, as already explained, that would
374  // enforce release semantics on both the compiler and CPU. It should
375  // theoretically be more efficient to simply prevent the compiler from
376  // allowing optimization using any following writes.
377  *addr = val;
378  _WriteBarrier();
379  }
380 }
381 
382 template <typename T>
384 load(const T *addr, SYS_MemoryOrder order)
385 {
386  // In Visual C++ 2005 and up, reads from volatile variables are defined to
387  // have read-acquire semantics, and writes to volatile variables are
388  // defined to have write-release semantics. The compiler will not move
389  // any reads and writes past them, and on Windows will ensure that the CPU
390  // does not do so either. Thus there is no need for explicit calls to
391  // _ReadWriteBarrier().
392  //
393  // NOTE:
394  // Visual Studio 2005 had a bug (subsequently fixed) on IA64 where the
395  // compiler did not respect the acquire/release semantics for volatile
396  // floats.
397  // http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=288218
398  if (order == SYS_MEMORY_ORDER_LOAD)
399  {
400  return *static_cast<const volatile T *>(addr);
401  }
402  else if (order == SYS_MEMORY_ORDER_SEQ_CST)
403  {
404  T tmp = 0;
405  return compare_and_swap(const_cast<T *>(addr), tmp, tmp);
406  }
407  else
408  {
409  // NOTE: order MUST be SYS_MEMORY_ORDER_RELAXED, but we can't use
410  // UT_ASSERT_P in SYS.
411  // We want to force the compiler to respect this read. One way would
412  // be to treat addr as volatile, but, as already explained, that would
413  // enforce acquire semantics on both the compiler and CPU. It should
414  // theoretically be more efficient to simply prevent the compiler from
415  // equating earlier reads with this one.
416  _ReadBarrier();
417  return *addr;
418  }
419 }
420 
421 #else
422 
423 #error "Unsupported platform"
424 #endif
425 
426 } // namespace SYS_AtomicImpl
427 
428 #endif // __SYS_ATOMICIMPL_H_INCLUDED__
T compare_and_swap(volatile T *addr, T oldval, T newval)
#define SYS_STATIC_ASSERT_MSG(expr, msg)
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
#define SYSloadFence()
T test_and_set(T *addr, T val)
SYS_MemoryOrder
long long int64
Definition: SYS_Types.h:107
#define SYS_ATOMIC_INLINE
#define SYSstoreFence()
T test_and_add(T *addr, T val)
int int32
Definition: SYS_Types.h:35
GLuint GLfloat * val
Definition: glcorearb.h:1607