HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
VM_AltivecFunc.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: VM_AltivecFunc.h ( VM Library, C++)
7  *
8  * COMMENTS:
9  */
10 
11 #ifndef __VM_AltivecFunc__
12 #define __VM_AltivecFunc__
13 
14 #include "VM_API.h"
15 #include <SYS/SYS_Types.h>
16 #include <altivec.h>
17 
18 #define CPU_HAS_SIMD_INSTR 1
19 #define VM_ALTIVEC_STYLE 1
20 #define VM_ALTIVEC_VECTOR 1
21 
22 typedef vector float v4sf;
23 typedef vector int v4si;
24 
25 #define V4SF(A) (v4sf)A
26 #define V4SI(A) (v4si)A
27 
28 #define V4SF_CONST(val) (vector float){val,val,val,val}
29 #define V4SI_CONST(val) (vector int){val,val,val,val}
30 
31 static inline v4sf
32 vm_vec_reciprocal(const v4sf &a)
33 {
34  // Get the estimate
35  vector float est = vec_re(a);
36 
37  // Now, one round of Newton-Raphson refinement
38  return vec_madd(vec_nmsub(est, a, V4SF_CONST(1)), est, est);
39 }
40 
41 static inline v4sf
42 vm_vec_fdiv(v4sf a, v4sf b) // Fast division
43 {
44  return vec_madd(a, vec_re(b), V4SF_CONST(0));
45 }
46 
47 static inline v4sf
48 vm_vec_qdiv(v4sf a, v4sf b) // Higher Quality division
49 {
50  return vec_madd(a, vm_vec_reciprocal(b), V4SF_CONST(0));
51 }
52 
53 static inline v4sf
54 vm_vec_fsqrt(v4sf a) // Fast sqrt()
55 {
56  v4sf mask = (v4sf)vec_cmpgt(a, V4SF_CONST(0));
57  return vec_and(mask, vec_re(vec_rsqrte(a)));
58 }
59 
60 static inline v4sf
61 vm_vec_qisqrt(v4sf a) // Higher quality rsqrte()
62 {
63  // Get the root reciprocal estimate
64  vector float zero = V4SF_CONST(0);
65  vector float p5 = V4SF_CONST(0.5);
66  vector float one = V4SF_CONST(1);
67  vector float est = vec_rsqrte(a);
68 
69  // Now perform one round of Newton-Raphson refinement
70  vector float est2 = vec_madd(est, est, zero);
71  vector float half = vec_madd(est, p5, zero);
72  return vec_madd(vec_nmsub(a, est2, one), half, est);
73 }
74 
75 static inline v4sf
76 vm_vec_qsqrt(v4sf a) // Higher Quality sqrt
77 {
78  v4sf mask = (v4sf)vec_cmpgt(a, V4SF_CONST(0));
79  return vec_and(mask, vm_vec_reciprocal(vm_vec_qisqrt(a)));
80 }
81 
82 static inline v4sf
83 vm_vec_mul(v4sf a, v4sf b)
84 {
85  return vec_madd(a, b, V4SF_CONST(0));
86 }
87 
88 static inline v4si
89 vm_vec_andnot(v4si a, v4si b)
90 {
91  return vec_andc(b, a);
92 }
93 
94 static inline v4sf
95 vm_vec_negate(v4sf a)
96 {
97  return vec_sub(V4SF_CONST(0), a);
98 }
99 
100 static inline v4si
101 vm_vec_cmpneq(v4sf a, v4sf b)
102 {
103  return (v4si)vec_xor(vec_cmpeq(a, b), V4SI_CONST(0xFFFFFFFF));
104 }
105 
106 static inline v4si
107 vm_vec_cmpneq(v4si a, v4si b)
108 {
109  return (v4si)vec_xor(vec_cmpeq(a, b), V4SI_CONST(0xFFFFFFFF));
110 }
111 
112 static inline v4si
113 vm_vec_cmple(v4sf a, v4sf b)
114 {
115  return (v4si)vec_cmple(a, b);
116 }
117 
118 static inline v4si
119 vm_vec_cmple(v4si a, v4si b)
120 {
121  return (v4si)vec_xor(vec_cmpgt(a, b), V4SI_CONST(0xFFFFFFFF));
122 }
123 
124 static inline v4si
125 vm_vec_cmpge(v4sf a, v4sf b)
126 {
127  return (v4si)vec_cmpge(a, b);
128 }
129 
130 static inline v4si
131 vm_vec_cmpge(v4si a, v4si b)
132 {
133  return (v4si)vec_xor(vec_cmplt(a, b), V4SI_CONST(0xFFFFFFFF));
134 }
135 
136 static inline bool
137 vm_allbits(const v4si &a)
138 {
139  return vec_all_eq(a, V4SI_CONST(0xFFFFFFFF));
140 }
141 
142 static inline void
143 vm_splats(v4si &v, uint32 a)
144 {
145  uint32 *vi = (uint32 *)&v;
146  vi[0] = vi[1] = vi[2] = vi[3] = a;
147 }
148 
149 static inline void
150 vm_splats(v4sf &v, float a)
151 {
152  float *vf = (float *)&v;
153  vf[0] = vf[1] = vf[2] = vf[3] = a;
154 }
155 
156 static inline void
157 vm_splats(v4si &v, uint32 a, uint32 b, uint32 c, uint32 d)
158 {
159  uint32 *vi = (uint32 *)&v;
160  vi[0] = a;
161  vi[1] = b;
162  vi[2] = c;
163  vi[3] = d;
164 }
165 
166 static inline void
167 vm_splats(v4sf &v, float a, float b, float c, float d)
168 {
169  float *vf = (float *)&v;
170  vf[0] = a;
171  vf[1] = b;
172  vf[2] = c;
173  vf[3] = d;
174 }
175 
176 template <int A, int B, int C, int D>
177 static inline v4sf
178 vm_shuffle(const v4sf &v)
179 {
180  vector unsigned char permute;
181  unsigned char *p = (unsigned char *)&permute;
182  int a = A * 4;
183  int b = B * 4;
184  int c = C * 4;
185  int d = D * 4;
186  p[0] = (unsigned char)a;
187  p[1] = (unsigned char)a+1;
188  p[2] = (unsigned char)a+2;
189  p[3] = (unsigned char)a+3;
190  p[4] = (unsigned char)b;
191  p[5] = (unsigned char)b+1;
192  p[6] = (unsigned char)b+2;
193  p[7] = (unsigned char)b+3;
194  p[8] = (unsigned char)c;
195  p[9] = (unsigned char)c+1;
196  p[10] = (unsigned char)c+2;
197  p[11] = (unsigned char)c+3;
198  p[12] = (unsigned char)d;
199  p[13] = (unsigned char)d+1;
200  p[14] = (unsigned char)d+2;
201  p[15] = (unsigned char)d+3;
202 
203  return vec_perm(v, v, permute);
204 }
205 
206 #define VM_SPLATS vm_splats
207 #define VM_CMPLT (v4si)vec_cmplt
208 #define VM_CMPLE (v4si)vm_vec_cmple
209 #define VM_CMPGT (v4si)vec_cmpgt
210 #define VM_CMPGE (v4si)vm_vec_cmpge
211 #define VM_CMPEQ (v4si)vec_cmpeq
212 #define VM_CMPNE (v4si)vm_vec_cmpneq
213 
214 #define VM_ICMPLT (v4si)vec_cmplt
215 #define VM_ICMPGT (v4si)vec_cmpgt
216 #define VM_ICMPEQ (v4si)vec_cmpeq
217 
218 #define VM_IADD vec_add
219 #define VM_ISUB vec_sub
220 #error VM_IMUL is not implemented!
221 
222 #define VM_ADD vec_add
223 #define VM_SUB vec_sub
224 #define VM_MUL vm_vec_mul
225 #define VM_DIV vm_vec_qdiv
226 #define VM_SQRT vm_vec_qsqrt
227 #define VM_ISQRT vec_rsqrte
228 #define VM_INVERT vm_vec_reciprocal
229 #define VM_ABS vec_abs
230 
231 #define VM_FDIV vm_vec_fdiv
232 #define VM_FSQRT vm_vec_fsqrt
233 #define VM_NEG vm_vec_negate
234 #define VM_MADD vec_madd
235 
236 #define VM_MIN vec_min
237 #define VM_MAX vec_max
238 
239 #define VM_AND vec_and
240 #define VM_ANDNOT vm_vec_andnot
241 #define VM_OR vec_or
242 #define VM_XOR vec_xor
243 
244 #define VM_ALLBITS vm_allbits
245 
246 #define VM_SHUFFLE vm_shuffle
247 
248 static inline v4sf
249 vm_vec_floatcast(const v4si i)
250 {
251  return vec_ctf(i, 0);
252 }
253 
254 static inline v4si
255 vm_vec_intcast(const v4sf f)
256 {
257  return vec_cts(f, 0);
258 }
259 
260 static inline v4si
261 vm_vec_floor(const v4sf &f)
262 {
263  return vec_cts(vec_floor(f), 0);
264 }
265 
266 #define VM_P_FLOOR()
267 #define VM_FLOOR vm_vec_floor
268 #define VM_E_FLOOR()
269 
270 #define VM_INT vm_vec_intcast
271 
272 // Float to integer conversion
273 #define VM_IFLOAT vm_vec_floatcast
274 
275 #endif
const GLdouble * v
Definition: glcorearb.h:836
vector int v4si
#define V4SI_CONST(val)
GLboolean GLboolean GLboolean GLboolean a
Definition: glcorearb.h:1221
GLint GLuint mask
Definition: glcorearb.h:123
#define V4SF_CONST(val)
png_uint_32 i
Definition: png.h:2877
GLfloat f
Definition: glcorearb.h:1925
GLboolean GLboolean GLboolean b
Definition: glcorearb.h:1221
vector float v4sf
Definition: half.h:91
unsigned int uint32
Definition: SYS_Types.h:36