00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #ifndef __VM_AltivecFunc__
00019 #define __VM_AltivecFunc__
00020
00021 #include "VM_API.h"
00022 #include <SYS/SYS_Types.h>
00023 #include <altivec.h>
00024
00025 #define CPU_HAS_SIMD_INSTR 1
00026 #define VM_ALTIVEC_STYLE 1
00027 #define VM_ALTIVEC_VECTOR 1
00028
00029 typedef vector float v4sf;
00030 typedef vector int v4si;
00031
00032 #define V4SF(A) (v4sf)A
00033 #define V4SI(A) (v4si)A
00034
00035 #define V4SF_CONST(val) (vector float){val,val,val,val}
00036 #define V4SI_CONST(val) (vector int){val,val,val,val}
00037
00038 static inline v4sf
00039 vm_vec_reciprocal(const v4sf &a)
00040 {
00041
00042 vector float est = vec_re(a);
00043
00044
00045 return vec_madd(vec_nmsub(est, a, V4SF_CONST(1)), est, est);
00046 }
00047
00048 static inline v4sf
00049 vm_vec_fdiv(v4sf a, v4sf b)
00050 {
00051 return vec_madd(a, vec_re(b), V4SF_CONST(0));
00052 }
00053
00054 static inline v4sf
00055 vm_vec_qdiv(v4sf a, v4sf b)
00056 {
00057 return vec_madd(a, vm_vec_reciprocal(b), V4SF_CONST(0));
00058 }
00059
00060 static inline v4sf
00061 vm_vec_fsqrt(v4sf a)
00062 {
00063 v4sf mask = (v4sf)vec_cmpgt(a, V4SF_CONST(0));
00064 return vec_and(mask, vec_re(vec_rsqrte(a)));
00065 }
00066
00067 static inline v4sf
00068 vm_vec_qisqrt(v4sf a)
00069 {
00070
00071 vector float zero = V4SF_CONST(0);
00072 vector float p5 = V4SF_CONST(0.5);
00073 vector float one = V4SF_CONST(1);
00074 vector float est = vec_rsqrte(a);
00075
00076
00077 vector float est2 = vec_madd(est, est, zero);
00078 vector float half = vec_madd(est, p5, zero);
00079 return vec_madd(vec_nmsub(a, est2, one), half, est);
00080 }
00081
00082 static inline v4sf
00083 vm_vec_qsqrt(v4sf a)
00084 {
00085 v4sf mask = (v4sf)vec_cmpgt(a, V4SF_CONST(0));
00086 return vec_and(mask, vm_vec_reciprocal(vm_vec_qisqrt(a)));
00087 }
00088
00089 static inline v4sf
00090 vm_vec_mul(v4sf a, v4sf b)
00091 {
00092 return vec_madd(a, b, V4SF_CONST(0));
00093 }
00094
00095 static inline v4si
00096 vm_vec_andnot(v4si a, v4si b)
00097 {
00098 return vec_andc(b, a);
00099 }
00100
00101 static inline v4sf
00102 vm_vec_negate(v4sf a)
00103 {
00104 return vec_sub(V4SF_CONST(0), a);
00105 }
00106
00107 static inline v4si
00108 vm_vec_cmpneq(v4sf a, v4sf b)
00109 {
00110 return (v4si)vec_xor(vec_cmpeq(a, b), V4SI_CONST(0xFFFFFFFF));
00111 }
00112
00113 static inline v4si
00114 vm_vec_cmpneq(v4si a, v4si b)
00115 {
00116 return (v4si)vec_xor(vec_cmpeq(a, b), V4SI_CONST(0xFFFFFFFF));
00117 }
00118
00119 static inline v4si
00120 vm_vec_cmple(v4sf a, v4sf b)
00121 {
00122 return (v4si)vec_cmple(a, b);
00123 }
00124
00125 static inline v4si
00126 vm_vec_cmple(v4si a, v4si b)
00127 {
00128 return (v4si)vec_xor(vec_cmpgt(a, b), V4SI_CONST(0xFFFFFFFF));
00129 }
00130
00131 static inline v4si
00132 vm_vec_cmpge(v4sf a, v4sf b)
00133 {
00134 return (v4si)vec_cmpge(a, b);
00135 }
00136
00137 static inline v4si
00138 vm_vec_cmpge(v4si a, v4si b)
00139 {
00140 return (v4si)vec_xor(vec_cmplt(a, b), V4SI_CONST(0xFFFFFFFF));
00141 }
00142
00143 static inline bool
00144 vm_allbits(const v4si &a)
00145 {
00146 return vec_all_eq(a, V4SI_CONST(0xFFFFFFFF));
00147 }
00148
00149 static inline void
00150 vm_splats(v4si &v, uint32 a)
00151 {
00152 uint32 *vi = (uint32 *)&v;
00153 vi[0] = vi[1] = vi[2] = vi[3] = a;
00154 }
00155
00156 static inline void
00157 vm_splats(v4sf &v, float a)
00158 {
00159 float *vf = (float *)&v;
00160 vf[0] = vf[1] = vf[2] = vf[3] = a;
00161 }
00162
00163 static inline void
00164 vm_splats(v4si &v, uint32 a, uint32 b, uint32 c, uint32 d)
00165 {
00166 uint32 *vi = (uint32 *)&v;
00167 vi[0] = a;
00168 vi[1] = b;
00169 vi[2] = c;
00170 vi[3] = d;
00171 }
00172
00173 static inline void
00174 vm_splats(v4sf &v, float a, float b, float c, float d)
00175 {
00176 float *vf = (float *)&v;
00177 vf[0] = a;
00178 vf[1] = b;
00179 vf[2] = c;
00180 vf[3] = d;
00181 }
00182
00183 template <int A, int B, int C, int D>
00184 static inline v4sf
00185 vm_shuffle(const v4sf &v)
00186 {
00187 vector unsigned char permute;
00188 unsigned char *p = (unsigned char *)&permute;
00189 int a = A * 4;
00190 int b = B * 4;
00191 int c = C * 4;
00192 int d = D * 4;
00193 p[0] = (unsigned char)a;
00194 p[1] = (unsigned char)a+1;
00195 p[2] = (unsigned char)a+2;
00196 p[3] = (unsigned char)a+3;
00197 p[4] = (unsigned char)b;
00198 p[5] = (unsigned char)b+1;
00199 p[6] = (unsigned char)b+2;
00200 p[7] = (unsigned char)b+3;
00201 p[8] = (unsigned char)c;
00202 p[9] = (unsigned char)c+1;
00203 p[10] = (unsigned char)c+2;
00204 p[11] = (unsigned char)c+3;
00205 p[12] = (unsigned char)d;
00206 p[13] = (unsigned char)d+1;
00207 p[14] = (unsigned char)d+2;
00208 p[15] = (unsigned char)d+3;
00209
00210 return vec_perm(v, v, permute);
00211 }
00212
00213 #define VM_SPLATS vm_splats
00214 #define VM_CMPLT (v4si)vec_cmplt
00215 #define VM_CMPLE (v4si)vm_vec_cmple
00216 #define VM_CMPGT (v4si)vec_cmpgt
00217 #define VM_CMPGE (v4si)vm_vec_cmpge
00218 #define VM_CMPEQ (v4si)vec_cmpeq
00219 #define VM_CMPNE (v4si)vm_vec_cmpneq
00220
00221 #define VM_ICMPLT (v4si)vec_cmplt
00222 #define VM_ICMPGT (v4si)vec_cmpgt
00223 #define VM_ICMPEQ (v4si)vec_cmpeq
00224
00225 #define VM_IADD vec_add
00226 #define VM_ISUB vec_sub
00227
00228 #define VM_ADD vec_add
00229 #define VM_SUB vec_sub
00230 #define VM_MUL vm_vec_mul
00231 #define VM_DIV vm_vec_qdiv
00232 #define VM_SQRT vm_vec_qsqrt
00233 #define VM_ISQRT vec_rsqrte
00234 #define VM_INVERT vm_vec_reciprocal
00235 #define VM_ABS vec_abs
00236
00237 #define VM_FDIV vm_vec_fdiv
00238 #define VM_FSQRT vm_vec_fsqrt
00239 #define VM_NEG vm_vec_negate
00240 #define VM_MADD vec_madd
00241
00242 #define VM_MIN vec_min
00243 #define VM_MAX vec_max
00244
00245 #define VM_AND vec_and
00246 #define VM_ANDNOT vm_vec_andnot
00247 #define VM_OR vec_or
00248 #define VM_XOR vec_xor
00249
00250 #define VM_ALLBITS vm_allbits
00251
00252 #define VM_SHUFFLE vm_shuffle
00253
00254 static inline v4sf
00255 vm_vec_floatcast(const v4si i)
00256 {
00257 return vec_ctf(i, 0);
00258 }
00259
00260 static inline v4si
00261 vm_vec_intcast(const v4sf f)
00262 {
00263 return vec_cts(f, 0);
00264 }
00265
00266 static inline v4si
00267 vm_vec_floor(const v4sf &f)
00268 {
00269 return vec_cts(vec_floor(f), 0);
00270 }
00271
00272 #define VM_P_FLOOR()
00273 #define VM_FLOOR vm_vec_floor
00274 #define VM_E_FLOOR()
00275
00276 #define VM_P_INT()
00277 #define VM_INT vm_vec_intcast
00278 #define VM_E_INT()
00279
00280
00281 #define VM_IFLOAT vm_vec_floatcast
00282
00283 #endif