HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SYS_AtomicImpl.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: SYS_AtomicImpl.h (SYS Library, C++)
7  *
8  * COMMENTS: Platform-specific atomic operations implementation.
9  */
10 
11 #ifndef __SYS_ATOMICIMPL_H_INCLUDED__
12 #define __SYS_ATOMICIMPL_H_INCLUDED__
13 
14 #include "SYS_Inline.h"
15 #include "SYS_Types.h"
16 #include "SYS_MemoryOrder.h"
17 #include "SYS_StaticAssert.h"
18 
19 #ifdef WIN32
20  #if defined(_MSC_VER) && _MSC_VER >= 1400
21  #include <intrin.h>
22  #else
23  #error "This file will not build with MSVC2003 and older"
24  #endif
25  #define SYS_ATOMIC_INLINE SYS_FORCE_INLINE
26  #define SYS_ATOMIC_STATIC_INLINE SYS_STATIC_FORCE_INLINE
27 #else
28  #define SYS_ATOMIC_INLINE inline
29  #define SYS_ATOMIC_STATIC_INLINE SYS_STATIC_INLINE
30 #endif
31 #ifdef SOLARIS
32 #include <bits/atomicity.h> // for __test_and_set
33 #endif
34 #ifdef MBSD
35 #include <libkern/OSAtomic.h>
36 #endif
37 
38 
39 namespace SYS_AtomicImpl
40 {
41 
42 template <typename T> T test_and_set(T *addr, T val)
43 {
44  SYS_STATIC_ASSERT_MSG(sizeof(T) == 0,
45  "Cannot instantiate test_and_set for unsupported type.");
46 }
47 template <typename T> T test_and_add(T *addr, T val)
48 {
49  SYS_STATIC_ASSERT_MSG(sizeof(T) == 0,
50  "Cannot instantiate test_and_add for unsupported type.");
51 }
52 template <typename T> T compare_and_swap(volatile T *addr, T oldval, T newval)
53 {
54  SYS_STATIC_ASSERT_MSG(sizeof(T) == 0,
55  "Cannot instantiate compare_and_swap for unsupported type.");
56 }
57 
58 // Group these together because the load/store implementation is the same
59 #if defined(LINUX) || defined(MBSD_INTEL)
60 
61 #if defined(LINUX)
62 
63 // GCC 4.0 doesn't support __sync_lock_test_and_set,
64 // but GCC 4.1 and onwards do
65 #if !SYS_IS_GCC_GE(4, 1)
66 #error "Unsupported gcc version"
67 #endif
68 
69 template <>
71 test_and_set<int32>(int32 *addr, int32 val)
72 {
73  return __sync_lock_test_and_set(addr, val);
74 }
75 
76 template <>
78 test_and_add<int32>(int32 *addr, int32 val)
79 {
80  return __sync_fetch_and_add(addr, val);
81 }
82 
83 // if (*addr = oldval) *addr = newval
84 template <>
86 compare_and_swap<int32>(volatile int32 *addr, int32 oldval, int32 newval)
87 {
88  return __sync_val_compare_and_swap(const_cast<int32 *>(addr),oldval,newval);
89 }
90 
91 // NOTE: The int64 GCC built-ins are implemented for 32-bit platforms,
92 // using CMPXCHG8B and, if necessary, looping.
93 
94 template <>
96 test_and_set<int64>(int64 *addr, int64 val)
97 {
98  return __sync_lock_test_and_set(addr, val);
99 }
100 
101 template <>
103 test_and_add<int64>(int64 *addr, int64 val)
104 {
105  return __sync_fetch_and_add(addr, val);
106 }
107 
108 template <>
110 compare_and_swap<int64>(volatile int64 *addr, int64 oldval, int64 newval)
111 {
112  return __sync_val_compare_and_swap(const_cast<int64 *>(addr),oldval,newval);
113 }
114 
115 #else // LINUX
116 
117 //
118 // Code for MBSD_INTEL
119 //
120 
121 template <>
123 test_and_set<int32>(int32 *addr, int32 val)
124 {
125  int32 oldval;
126  __asm__ __volatile__("lock xchgl %0, %1"
127  : "=r"(oldval), "=m"(*(addr))
128  : "0"(val), "m"(*(addr)));
129  return oldval;
130 }
131 
132 template <>
134 test_and_add<int32>(int32 *addr, int32 val)
135 {
136  return __sync_fetch_and_add(addr, val);
137 }
138 
139 template <>
141 compare_and_swap<int32>(volatile int32 *addr, int32 oldval, int32 newval)
142 {
143  return __sync_val_compare_and_swap(const_cast<int32 *>(addr),oldval,newval);
144 }
145 
146 // NOTE: MBSD_INTEL implies AMD64
147 template <>
149 test_and_set<int64>(int64 *addr, int64 val)
150 {
151  return __sync_lock_test_and_set(addr, val);
152 }
153 
154 // NOTE: MBSD_INTEL implies AMD64
155 template <>
157 test_and_add<int64>(int64 *addr, int64 val)
158 {
159  return __sync_fetch_and_add(addr, val);
160 }
161 
162 // NOTE: MBSD_INTEL implies AMD64
163 template <>
165 compare_and_swap<int64>(volatile int64 *addr, int64 oldval, int64 newval)
166 {
167  return __sync_val_compare_and_swap(const_cast<int64 *>(addr),oldval,newval);
168 }
169 
170 template <>
172 test_and_set<time_t>(time_t *addr, time_t val)
173 {
174  return __sync_lock_test_and_set(addr, val);
175 }
176 
177 template <>
179 test_and_add<time_t>(time_t *addr, time_t val)
180 {
181  return __sync_fetch_and_add(addr, val);
182 }
183 
184 template <>
186 compare_and_swap<time_t>(volatile time_t *addr, time_t oldval, time_t newval)
187 {
188  return __sync_val_compare_and_swap(
189  const_cast<time_t *>(addr),oldval,newval);
190 }
191 
192 #endif // defined(LINUX) || defined(MBSD_INTEL)
193 
194 template <typename T>
196 store(T *addr, T val, SYS_MemoryOrder order)
197 {
198  if (order == SYS_MEMORY_ORDER_STORE)
199  {
200  SYSstoreFence();
201  *static_cast<volatile T *>(addr) = val;
202  }
203  else if (order == SYS_MEMORY_ORDER_SEQ_CST)
204  {
205  T dummy = 1;
206 
207  // __sync_lock_release() is a release barrier, ensuring all previous
208  // memory stores are globally visible, and all previous memory loads
209  // have been satisfied.
210  __sync_lock_release(&dummy); // release barrier
211 
212  // __sync_lock_test_and_set is an acquire barrier, preventing any
213  // subsequent memory references from moving before this operation.
214  // Consequently, this store will be globally visible before any
215  // subsequent stores or loads are processed.
216  (void)__sync_lock_test_and_set(addr, val); // acquire barrier
217  }
218  else
219  {
220  // NOTE: order MUST be SYS_MEMORY_ORDER_RELAXED, but we can't use
221  // UT_ASSERT_P in SYS.
222  // Use volatile to force the compiler to issue a store instruction, but
223  // we don't care what the CPU does.
224  *static_cast<volatile T *>(addr) = val;
225  }
226 }
227 
228 template <typename T>
230 load(const T *addr, SYS_MemoryOrder order)
231 {
232  if (order == SYS_MEMORY_ORDER_LOAD)
233  {
234  T val = *static_cast<const volatile T *>(addr);
235  SYSloadFence();
236  return val;
237  }
238  else if (order == SYS_MEMORY_ORDER_SEQ_CST)
239  {
240  T tmp = 0;
241  return __sync_val_compare_and_swap(const_cast<T *>(addr), tmp, tmp);
242  }
243  else
244  {
245  // NOTE: order MUST be SYS_MEMORY_ORDER_RELAXED, but we can't use
246  // UT_ASSERT_P in SYS.
247  return *static_cast<const volatile T *>(addr);
248  }
249 }
250 
251 #elif defined(WIN32)
252 
253 #pragma intrinsic (_InterlockedExchange)
254 #pragma intrinsic (_InterlockedExchangeAdd)
255 #pragma intrinsic (_InterlockedCompareExchange)
256 
257 template <>
259 test_and_set<int32>(int32 *addr, int32 val)
260 {
261  return (int32)_InterlockedExchange((long *)addr, (long)val);
262 }
263 
264 template <>
266 test_and_add<int32>(int32 *addr, int32 val)
267 {
268  return (int32)_InterlockedExchangeAdd((long *)addr, (long)val);
269 }
270 
271 template <>
273 compare_and_swap<int32>(volatile int32 *addr, int32 oldval, int32 newval)
274 {
275  return _InterlockedCompareExchange((volatile long *)addr, newval, oldval);
276 }
277 
278 // NOTE: _InterlockedCompareExchange64 is available on 32-bit platforms
279 // from the Pentium onward, using the CMPXCHG8B instruction.
280 #pragma intrinsic (_InterlockedCompareExchange64)
281 
282 template <>
284 compare_and_swap<int64>(volatile int64 *addr, int64 oldval, int64 newval)
285 {
286  return _InterlockedCompareExchange64(addr, newval, oldval);
287 }
288 
289 #if defined(AMD64)
290 
291 #pragma intrinsic (_InterlockedExchange64)
292 #pragma intrinsic (_InterlockedExchangeAdd64)
293 
294 template <>
296 test_and_set<int64>(int64 *addr, int64 val)
297 {
298  return _InterlockedExchange64(addr, val);
299 }
300 
301 template <>
303 test_and_add<int64>(int64 *addr, int64 val)
304 {
305  return _InterlockedExchangeAdd64(addr, val);
306 }
307 
308 #else // AMD64
309 
310 // On 32-bit platforms, we have to implement our own Exchange64 and
311 // ExchangeAdd64, using CompareExchange64
312 
313 template <>
315 test_and_set<int64>(int64 *addr, int64 val)
316 {
317  int64 retval = *addr;
318  do
319  {
320  int64 newretval = _InterlockedCompareExchange64(addr, val, retval);
321  if (retval == newretval)
322  return retval;
323  retval = newretval;
324  } while (true);
325 
326  // Unreachable
327 }
328 
329 template <>
331 test_and_add<int64>(int64 *addr, int64 val)
332 {
333  int64 retval = *addr;
334  do
335  {
336  int64 newretval = _InterlockedCompareExchange64(addr, retval+val, retval);
337  if (retval == newretval)
338  return retval;
339  retval = newretval;
340  } while (true);
341 
342  // Unreachable
343 }
344 
345 #endif // AMD64
346 
347 // The following implementations of store() and load() are valid only for
348 // MS Visual C++ 2005 and higher.
349 #if defined(_MSC_VER) && _MSC_VER >= 1400
350 #pragma intrinsic (_ReadBarrier)
351 #pragma intrinsic (_WriteBarrier)
352 #pragma intrinsic (_InterlockedCompareExchange)
353 
354 template <typename T>
356 store(T *addr, T val, SYS_MemoryOrder order)
357 {
358  // In Visual C++ 2005 and up, reads from volatile variables are defined to
359  // have read-acquire semantics, and writes to volatile variables are
360  // defined to have write-release semantics. The compiler will not move
361  // any reads and writes past them, and on Windows will ensure that the CPU
362  // does not do so either. Thus there is no need for explicit calls to
363  // _ReadWriteBarrier().
364  //
365  // NOTE:
366  // Visual Studio 2005 had a bug (subsequently fixed) on IA64 where the
367  // compiler did not respect the acquire/release semantics for volatile
368  // floats.
369  // http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=288218
370  if (order == SYS_MEMORY_ORDER_STORE)
371  {
372  *static_cast<volatile T *>(addr) = val;
373  }
374  else if (order == SYS_MEMORY_ORDER_SEQ_CST)
375  {
376  (void)test_and_set(addr, val);
377  }
378  else
379  {
380  // NOTE: order MUST be SYS_MEMORY_ORDER_RELAXED, but we can't use
381  // UT_ASSERT_P in SYS.
382  // We want to force the compiler to respect this write. One way would
383  // be to treat addr as volatile, but, as already explained, that would
384  // enforce release semantics on both the compiler and CPU. It should
385  // theoretically be more efficient to simply prevent the compiler from
386  // allowing optimization using any following writes.
387  *addr = val;
388  _WriteBarrier();
389  }
390 }
391 
392 template <typename T>
394 load(const T *addr, SYS_MemoryOrder order)
395 {
396  // In Visual C++ 2005 and up, reads from volatile variables are defined to
397  // have read-acquire semantics, and writes to volatile variables are
398  // defined to have write-release semantics. The compiler will not move
399  // any reads and writes past them, and on Windows will ensure that the CPU
400  // does not do so either. Thus there is no need for explicit calls to
401  // _ReadWriteBarrier().
402  //
403  // NOTE:
404  // Visual Studio 2005 had a bug (subsequently fixed) on IA64 where the
405  // compiler did not respect the acquire/release semantics for volatile
406  // floats.
407  // http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=288218
408  if (order == SYS_MEMORY_ORDER_LOAD)
409  {
410  return *static_cast<const volatile T *>(addr);
411  }
412  else if (order == SYS_MEMORY_ORDER_SEQ_CST)
413  {
414  T tmp = 0;
415  return compare_and_swap(const_cast<T *>(addr), tmp, tmp);
416  }
417  else
418  {
419  // NOTE: order MUST be SYS_MEMORY_ORDER_RELAXED, but we can't use
420  // UT_ASSERT_P in SYS.
421  // We want to force the compiler to respect this read. One way would
422  // be to treat addr as volatile, but, as already explained, that would
423  // enforce acquire semantics on both the compiler and CPU. It should
424  // theoretically be more efficient to simply prevent the compiler from
425  // equating earlier reads with this one.
426  _ReadBarrier();
427  return *addr;
428  }
429 }
430 #else
431 #error store() only implemented under WIN32/WIN64 for MSVC 2005 and up
432 #error load() only implemented under WIN32/WIN64 for MSVC 2005 and up
433 #endif // _MSC_VER
434 
435 #else
436 
437 #error "Unsupported platform"
438 #endif
439 
440 }
441 
442 #endif // __SYS_ATOMICIMPL_H_INCLUDED__
T compare_and_swap(volatile T *addr, T oldval, T newval)
#define SYS_STATIC_ASSERT_MSG(expr, msg)
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
#define SYSloadFence()
T test_and_set(T *addr, T val)
SYS_MemoryOrder
long long int64
Definition: SYS_Types.h:106
#define SYS_ATOMIC_STATIC_INLINE
#define SYS_ATOMIC_INLINE
#define SYSstoreFence()
T test_and_add(T *addr, T val)
int int32
Definition: SYS_Types.h:34
GLuint GLfloat * val
Definition: glcorearb.h:1607