HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
VM_AltivecFunc.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: VM_AltivecFunc.h ( VM Library, C++)
7  *
8  * COMMENTS:
9  */
10 
11 #ifndef __VM_AltivecFunc__
12 #define __VM_AltivecFunc__
13 
14 #include "VM_API.h"
15 #include <SYS/SYS_Types.h>
16 #include <SYS/SYS_Math.h>
17 #include <altivec.h>
18 
19 #define CPU_HAS_SIMD_INSTR 1
20 #define VM_ALTIVEC_STYLE 1
21 #define VM_ALTIVEC_VECTOR 1
22 
23 typedef vector float v4sf;
24 typedef vector int v4si;
25 
26 #define V4SF(A) (v4sf)A
27 #define V4SI(A) (v4si)A
28 
29 #define V4SF_CONST(val) (vector float){val,val,val,val}
30 #define V4SI_CONST(val) (vector int){val,val,val,val}
31 
32 static inline v4sf
33 vm_vec_reciprocal(const v4sf &a)
34 {
35  // Get the estimate
36  vector float est = vec_re(a);
37 
38  // Now, one round of Newton-Raphson refinement
39  return vec_madd(vec_nmsub(est, a, V4SF_CONST(1)), est, est);
40 }
41 
42 static inline v4sf
43 vm_vec_fdiv(v4sf a, v4sf b) // Fast division
44 {
45  return vec_madd(a, vec_re(b), V4SF_CONST(0));
46 }
47 
48 static inline v4sf
49 vm_vec_qdiv(v4sf a, v4sf b) // Higher Quality division
50 {
51  return vec_madd(a, vm_vec_reciprocal(b), V4SF_CONST(0));
52 }
53 
54 static inline v4sf
55 vm_vec_fsqrt(v4sf a) // Fast sqrt()
56 {
57  v4sf mask = (v4sf)vec_cmpgt(a, V4SF_CONST(0));
58  return vec_and(mask, vec_re(vec_rsqrte(a)));
59 }
60 
61 static inline v4sf
62 vm_vec_qisqrt(v4sf a) // Higher quality rsqrte()
63 {
64  // Get the root reciprocal estimate
65  vector float zero = V4SF_CONST(0);
66  vector float p5 = V4SF_CONST(0.5);
67  vector float one = V4SF_CONST(1);
68  vector float est = vec_rsqrte(a);
69 
70  // Now perform one round of Newton-Raphson refinement
71  vector float est2 = vec_madd(est, est, zero);
72  vector float half = vec_madd(est, p5, zero);
73  return vec_madd(vec_nmsub(a, est2, one), half, est);
74 }
75 
76 static inline v4sf
77 vm_vec_qsqrt(v4sf a) // Higher Quality sqrt
78 {
79  v4sf mask = (v4sf)vec_cmpgt(a, V4SF_CONST(0));
80  return vec_and(mask, vm_vec_reciprocal(vm_vec_qisqrt(a)));
81 }
82 
83 static inline v4sf
84 vm_vec_mul(v4sf a, v4sf b)
85 {
86  return vec_madd(a, b, V4SF_CONST(0));
87 }
88 
89 static inline v4si
90 vm_vec_andnot(v4si a, v4si b)
91 {
92  return vec_andc(b, a);
93 }
94 
95 static inline v4sf
96 vm_vec_negate(v4sf a)
97 {
98  return vec_sub(V4SF_CONST(0), a);
99 }
100 
101 static inline v4si
102 vm_vec_cmpneq(v4sf a, v4sf b)
103 {
104  return (v4si)vec_xor(vec_cmpeq(a, b), V4SI_CONST(0xFFFFFFFF));
105 }
106 
107 static inline v4si
108 vm_vec_cmpneq(v4si a, v4si b)
109 {
110  return (v4si)vec_xor(vec_cmpeq(a, b), V4SI_CONST(0xFFFFFFFF));
111 }
112 
113 static inline v4si
114 vm_vec_cmple(v4sf a, v4sf b)
115 {
116  return (v4si)vec_cmple(a, b);
117 }
118 
119 static inline v4si
120 vm_vec_cmple(v4si a, v4si b)
121 {
122  return (v4si)vec_xor(vec_cmpgt(a, b), V4SI_CONST(0xFFFFFFFF));
123 }
124 
125 static inline v4si
126 vm_vec_cmpge(v4sf a, v4sf b)
127 {
128  return (v4si)vec_cmpge(a, b);
129 }
130 
131 static inline v4si
132 vm_vec_cmpge(v4si a, v4si b)
133 {
134  return (v4si)vec_xor(vec_cmplt(a, b), V4SI_CONST(0xFFFFFFFF));
135 }
136 
137 static inline bool
138 vm_allbits(const v4si &a)
139 {
140  return vec_all_eq(a, V4SI_CONST(0xFFFFFFFF));
141 }
142 
143 static inline void
144 vm_splats(v4si &v, uint32 a)
145 {
146  uint32 *vi = (uint32 *)&v;
147  vi[0] = vi[1] = vi[2] = vi[3] = a;
148 }
149 
150 static inline void
151 vm_splats(v4sf &v, float a)
152 {
153  float *vf = (float *)&v;
154  vf[0] = vf[1] = vf[2] = vf[3] = a;
155 }
156 
157 static inline void
158 vm_splats(v4si &v, uint32 a, uint32 b, uint32 c, uint32 d)
159 {
160  uint32 *vi = (uint32 *)&v;
161  vi[0] = a;
162  vi[1] = b;
163  vi[2] = c;
164  vi[3] = d;
165 }
166 
167 static inline void
168 vm_splats(v4sf &v, float a, float b, float c, float d)
169 {
170  float *vf = (float *)&v;
171  vf[0] = a;
172  vf[1] = b;
173  vf[2] = c;
174  vf[3] = d;
175 }
176 
177 template <int A, int B, int C, int D>
178 static inline v4sf
179 vm_shuffle(const v4sf &v)
180 {
181  vector unsigned char permute;
182  unsigned char *p = (unsigned char *)&permute;
183  int a = A * 4;
184  int b = B * 4;
185  int c = C * 4;
186  int d = D * 4;
187  p[0] = (unsigned char)a;
188  p[1] = (unsigned char)a+1;
189  p[2] = (unsigned char)a+2;
190  p[3] = (unsigned char)a+3;
191  p[4] = (unsigned char)b;
192  p[5] = (unsigned char)b+1;
193  p[6] = (unsigned char)b+2;
194  p[7] = (unsigned char)b+3;
195  p[8] = (unsigned char)c;
196  p[9] = (unsigned char)c+1;
197  p[10] = (unsigned char)c+2;
198  p[11] = (unsigned char)c+3;
199  p[12] = (unsigned char)d;
200  p[13] = (unsigned char)d+1;
201  p[14] = (unsigned char)d+2;
202  p[15] = (unsigned char)d+3;
203 
204  return vec_perm(v, v, permute);
205 }
206 
207 //
208 // Fall back to regular floating-point for trig functions
209 //
210 #define VMBASIC_DEFINE_UNARY(FUN) \
211  static inline v4sf vm_##FUN(const v4sf& v) \
212  { \
213  v4sf r; \
214  float *rf = (float *)&r; \
215  const float *vf = (const float *)&v; \
216  rf[0] = SYS##FUN(vf[0]); \
217  rf[1] = SYS##FUN(vf[1]); \
218  rf[2] = SYS##FUN(vf[2]); \
219  rf[3] = SYS##FUN(vf[3]); \
220  return r; \
221  }
225 #undef VMBASIC_DEFINE_UNARY
226 
227 static inline
228 void vm_sincos(const v4sf &x, v4sf *s, v4sf *c)
229 {
230  const float *xf = (const float *)&x;
231  float *sf = (float *)s;
232  float *cf = (float *)c;
233  SYSsincos(xf[0], sf + 0, cf + 0);
234  SYSsincos(xf[1], sf + 1, cf + 1);
235  SYSsincos(xf[2], sf + 2, cf + 2);
236  SYSsincos(xf[3], sf + 3, cf + 3);
237 }
238 
239 static inline v4si
240 vm_shiftleft(const v4si &a, int c)
241 {
242  // TODO: use vec_slo
243  v4si r;
244  uint32 *rui = (uint32 *)&r;
245  const uint32 *aui = (const uint32 *)&a;
246  rui[0] = aui[0] << c;
247  rui[1] = aui[1] << c;
248  rui[2] = aui[2] << c;
249  rui[3] = aui[3] << c;
250  return r;
251 }
252 
253 static inline v4si
254 vm_shiftright(const v4si &a, int c)
255 {
256  // TODO: use vec_sro
257  v4si r;
258  uint32 *rui = (uint32 *)&r;
259  const uint32 *aui = (const uint32 *)&a;
260  rui[0] = aui[0] >> c;
261  rui[1] = aui[1] >> c;
262  rui[2] = aui[2] >> c;
263  rui[3] = aui[3] >> c;
264  return r;
265 }
266 
267 #define VM_SPLATS vm_splats
268 #define VM_CMPLT (v4si)vec_cmplt
269 #define VM_CMPLE (v4si)vm_vec_cmple
270 #define VM_CMPGT (v4si)vec_cmpgt
271 #define VM_CMPGE (v4si)vm_vec_cmpge
272 #define VM_CMPEQ (v4si)vec_cmpeq
273 #define VM_CMPNE (v4si)vm_vec_cmpneq
274 
275 #define VM_ICMPLT (v4si)vec_cmplt
276 #define VM_ICMPGT (v4si)vec_cmpgt
277 #define VM_ICMPEQ (v4si)vec_cmpeq
278 
279 #define VM_IADD vec_add
280 #define VM_ISUB vec_sub
281 #error VM_IMUL is not implemented!
282 
283 #define VM_ADD vec_add
284 #define VM_SUB vec_sub
285 #define VM_MUL vm_vec_mul
286 #define VM_DIV vm_vec_qdiv
287 #define VM_SQRT vm_vec_qsqrt
288 #define VM_ISQRT vec_rsqrte
289 #define VM_INVERT vm_vec_reciprocal
290 #define VM_ABS vec_abs
291 
292 #define VM_FDIV vm_vec_fdiv
293 #define VM_FSQRT vm_vec_fsqrt
294 #define VM_NEG vm_vec_negate
295 #define VM_MADD vec_madd
296 
297 #define VM_MIN vec_min
298 #define VM_MAX vec_max
299 
300 #define VM_AND vec_and
301 #define VM_ANDNOT vm_vec_andnot
302 #define VM_OR vec_or
303 #define VM_XOR vec_xor
304 
305 #define VM_ALLBITS vm_allbits
306 
307 #define VM_SHUFFLE vm_shuffle
308 
309 static inline v4sf
310 vm_vec_floatcast(const v4si i)
311 {
312  return vec_ctf(i, 0);
313 }
314 
315 static inline v4si
316 vm_vec_intcast(const v4sf f)
317 {
318  return vec_cts(f, 0);
319 }
320 
321 static inline v4si
322 vm_vec_floor(const v4sf &f)
323 {
324  return vec_cts(vec_floor(f), 0);
325 }
326 
327 #define VM_P_FLOOR()
328 #define VM_FLOOR vm_vec_floor
329 #define VM_E_FLOOR()
330 
331 #define VM_INT vm_vec_intcast
332 
333 // Float to integer conversion
334 #define VM_IFLOAT vm_vec_floatcast
335 
336 #define VM_SIN vm_sin
337 #define VM_COS vm_cos
338 #define VM_TAN vm_tan
339 #define VM_SINCOS vm_sincos
340 
341 // bitshifing A=v4si C=int
342 #define VM_SHIFTLEFT(A,C) vm_shiftleft(A,C)
343 #define VM_SHIFTRIGHT(A,C) vm_shiftright(A,C)
344 
345 #endif
GLdouble s
Definition: glew.h:1390
SYS_API double cos(double x)
Definition: SYS_FPUMath.h:69
vector int v4si
GLboolean GLboolean GLboolean GLboolean a
Definition: glew.h:9477
#define V4SI_CONST(val)
const GLdouble * v
Definition: glew.h:1391
GLenum GLint GLuint mask
Definition: glew.h:1845
#define V4SF_CONST(val)
GLclampf f
Definition: glew.h:3499
GLint GLint GLint GLint GLint x
Definition: glew.h:1252
const GLfloat * c
Definition: glew.h:16296
GLdouble GLdouble GLdouble b
Definition: glew.h:9122
GLfloat GLfloat p
Definition: glew.h:16321
SYS_API double tan(double x)
Definition: SYS_FPUMath.h:75
GLdouble GLdouble GLdouble r
Definition: glew.h:1406
ImageBuf OIIO_API zero(ROI roi, int nthreads=0)
#define VMBASIC_DEFINE_UNARY(FUN)
vector float v4sf
Definition: half.h:91
SYS_API double sin(double x)
Definition: SYS_FPUMath.h:71
unsigned int uint32
Definition: SYS_Types.h:40