00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #ifndef __VM_SIMD__
00019 #define __VM_SIMD__
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #if defined __GNUC__
00030 #define VM_CHECK_GCC(MAJOR, MINOR) \
00031 (__GNUC__ > MAJOR || (__GNUC__ == MAJOR && __GNUC_MINOR__ >= MINOR))
00032 #else
00033 #define VM_CHECK_GCC(MAJOR, MINOR) 0
00034 #endif
00035
00036 #if defined(FORCE_NON_SIMD)
00037 #include "VM_BasicFunc.h"
00038 #else
00039 #if defined(CELLRSX) || defined(PPC)
00040 #include "VM_AltivecFunc.h"
00041 #elif defined(LINUX) && VM_CHECK_GCC(3, 4) && defined(__SSE__)
00042 #include "VM_SSEFunc.h"
00043 #elif defined(WIN32)
00044 #include "VM_SSEFunc.h"
00045 #elif defined(MBSD_INTEL)
00046 #include "VM_SSEFunc.h"
00047 #else
00048 #include "VM_BasicFunc.h"
00049 #endif
00050 #endif
00051
00052
00053
00054 class v4uf;
00055
00056 class v4uu {
00057 public:
00058 v4uu() {}
00059 v4uu(const v4si &v) : vector(v) {}
00060 v4uu(const v4uu &v) : vector(v.vector) {}
00061 explicit v4uu(int32 v) { VM_SPLATS(vector, v); }
00062 explicit v4uu(const int32 v[4])
00063 { VM_SPLATS(vector, v[0], v[1], v[2], v[3]); }
00064 v4uu(int32 a, int32 b, int32 c, int32 d)
00065 { VM_SPLATS(vector, a, b, c, d); }
00066
00067
00068 inline v4uu operator=(int32 v)
00069 { vector = v4uu(v).vector; return *this; }
00070 inline v4uu operator=(v4si v)
00071 { vector = v; return *this; }
00072 inline v4uu operator=(const v4uu &v)
00073 { vector = v.vector; return *this; }
00074
00075 inline void condAssign(const v4uu &val, const v4uu &c)
00076 { *this = (c & val) | (!c & *this); }
00077
00078
00079 inline v4uu operator == (const v4uu &v) const
00080 { return v4uu(VM_ICMPEQ(vector, v.vector)); }
00081 inline v4uu operator != (const v4uu &v) const
00082 { return ~(*this == v); }
00083 inline v4uu operator > (const v4uu &v) const
00084 { return v4uu(VM_ICMPGT(vector, v.vector)); }
00085 inline v4uu operator < (const v4uu &v) const
00086 { return v4uu(VM_ICMPLT(vector, v.vector)); }
00087 inline v4uu operator >= (const v4uu &v) const
00088 { return ~(*this < v); }
00089 inline v4uu operator <= (const v4uu &v) const
00090 { return ~(*this > v); }
00091
00092 inline v4uu operator == (int32 v) const { return *this == v4uu(v); }
00093 inline v4uu operator != (int32 v) const { return *this != v4uu(v); }
00094 inline v4uu operator > (int32 v) const { return *this > v4uu(v); }
00095 inline v4uu operator < (int32 v) const { return *this < v4uu(v); }
00096 inline v4uu operator >= (int32 v) const { return *this >= v4uu(v); }
00097 inline v4uu operator <= (int32 v) const { return *this <= v4uu(v); }
00098
00099
00100 inline v4uu operator+(const v4uu &r) const
00101 { return v4uu(VM_IADD(vector, r.vector)); }
00102 inline v4uu operator-(const v4uu &r) const
00103 { return v4uu(VM_ISUB(vector, r.vector)); }
00104 inline v4uu operator+=(const v4uu &r) { return (*this = *this + r); }
00105 inline v4uu operator-=(const v4uu &r) { return (*this = *this - r); }
00106 inline v4uu operator+(int32 r) const { return *this + v4uu(r); }
00107 inline v4uu operator-(int32 r) const { return *this - v4uu(r); }
00108 inline v4uu operator+=(int32 r) { return (*this = *this + r); }
00109 inline v4uu operator-=(int32 r) { return (*this = *this - r); }
00110
00111
00112
00113 inline v4uu operator||(const v4uu &r) const
00114 { return v4uu(VM_OR(vector, r.vector)); }
00115 inline v4uu operator&&(const v4uu &r) const
00116 { return v4uu(VM_AND(vector, r.vector)); }
00117 inline v4uu operator^(const v4uu &r) const
00118 { return v4uu(VM_XOR(vector, r.vector)); }
00119 inline v4uu operator!() const
00120 { return *this == v4uu(0); }
00121
00122 inline v4uu operator|(const v4uu &r) const { return *this || r; }
00123 inline v4uu operator&(const v4uu &r) const { return *this && r; }
00124 inline v4uu operator~() const
00125 { return *this ^ v4uu(0xFFFFFFFF); }
00126
00127
00128 inline int32 operator[](int idx) const { return comp[idx]; }
00129 inline void setComp(int idx, int32 v) { comp[idx] = v; }
00130
00131 v4uf toFloat() const;
00132
00133 public:
00134 union {
00135 v4si vector;
00136 int32 comp[4];
00137 };
00138 };
00139
00140 class v4uf {
00141 public:
00142 v4uf() {}
00143 v4uf(const v4sf &v) : vector(v) {}
00144 v4uf(const v4uf &v) : vector(v.vector) {}
00145 explicit v4uf(float v) { VM_SPLATS(vector, v); }
00146 explicit v4uf(const float v[4])
00147 { VM_SPLATS(vector, v[0], v[1], v[2], v[3]); }
00148 v4uf(float a, float b, float c, float d)
00149 { VM_SPLATS(vector, a, b, c, d); }
00150
00151
00152 inline v4uf operator=(float v)
00153 { vector = v4uf(v).vector; return *this; }
00154 inline v4uf operator=(v4sf v)
00155 { vector = v; return *this; }
00156 inline v4uf operator=(const v4uf &v)
00157 { vector = v.vector; return *this; }
00158
00159 inline void condAssign(const v4uf &val, const v4uu &c)
00160 { *this = (val & c) | (*this & ~c); }
00161
00162
00163 inline v4uu operator == (const v4uf &v) const
00164 { return v4uu(VM_CMPEQ(vector, v.vector)); }
00165 inline v4uu operator != (const v4uf &v) const
00166 { return v4uu(VM_CMPNE(vector, v.vector)); }
00167 inline v4uu operator > (const v4uf &v) const
00168 { return v4uu(VM_CMPGT(vector, v.vector)); }
00169 inline v4uu operator < (const v4uf &v) const
00170 { return v4uu(VM_CMPLT(vector, v.vector)); }
00171 inline v4uu operator >= (const v4uf &v) const
00172 { return v4uu(VM_CMPGE(vector, v.vector)); }
00173 inline v4uu operator <= (const v4uf &v) const
00174 { return v4uu(VM_CMPLE(vector, v.vector)); }
00175
00176 inline v4uu operator == (float v) const { return *this == v4uf(v); }
00177 inline v4uu operator != (float v) const { return *this != v4uf(v); }
00178 inline v4uu operator > (float v) const { return *this > v4uf(v); }
00179 inline v4uu operator < (float v) const { return *this < v4uf(v); }
00180 inline v4uu operator >= (float v) const { return *this >= v4uf(v); }
00181 inline v4uu operator <= (float v) const { return *this <= v4uf(v); }
00182
00183
00184
00185 inline v4uf operator+(const v4uf &r) const
00186 { return v4uf(VM_ADD(vector, r.vector)); }
00187 inline v4uf operator-(const v4uf &r) const
00188 { return v4uf(VM_SUB(vector, r.vector)); }
00189 inline v4uf operator-() const
00190 { return v4uf(VM_NEG(vector)); }
00191 v4uf operator*(const v4uf &r) const
00192 { return v4uf(VM_MUL(vector, r.vector)); }
00193 v4uf operator/(const v4uf &r) const
00194 { return v4uf(VM_DIV(vector, r.vector)); }
00195
00196 inline v4uf operator+=(const v4uf &r) { return (*this = *this + r); }
00197 inline v4uf operator-=(const v4uf &r) { return (*this = *this - r); }
00198 inline v4uf operator*=(const v4uf &r) { return (*this = *this * r); }
00199 inline v4uf operator/=(const v4uf &r) { return (*this = *this / r); }
00200
00201 inline v4uf operator+(float r) const { return *this + v4uf(r); }
00202 inline v4uf operator-(float r) const { return *this - v4uf(r); }
00203 inline v4uf operator*(float r) const { return *this * v4uf(r); }
00204 inline v4uf operator/(float r) const { return *this / v4uf(r); }
00205 inline v4uf operator+=(float r) { return (*this = *this + r); }
00206 inline v4uf operator-=(float r) { return (*this = *this - r); }
00207 inline v4uf operator*=(float r) { return (*this = *this * r); }
00208 inline v4uf operator/=(float r) { return (*this = *this / r); }
00209
00210
00211
00212 inline v4uf operator||(const v4uu &r) const
00213 { return v4uf(V4SF(VM_OR(V4SI(vector), r.vector))); }
00214 inline v4uf operator&&(const v4uu &r) const
00215 { return v4uf(V4SF(VM_AND(V4SI(vector), r.vector))); }
00216 inline v4uf operator^(const v4uu &r) const
00217 { return v4uf(V4SF(VM_XOR(V4SI(vector), r.vector))); }
00218 inline v4uf operator!() const
00219 { return v4uf(V4SF((*this == v4uf(0.0F)).vector)); }
00220
00221 inline v4uf operator||(const v4uf &r) const
00222 { return v4uf(V4SF(VM_OR(V4SI(vector), V4SI(r.vector)))); }
00223 inline v4uf operator&&(const v4uf &r) const
00224 { return v4uf(V4SF(VM_AND(V4SI(vector), V4SI(r.vector)))); }
00225 inline v4uf operator^(const v4uf &r) const
00226 { return v4uf(V4SF(VM_XOR(V4SI(vector), V4SI(r.vector)))); }
00227
00228 inline v4uf operator|(const v4uu &r) const { return *this || r; }
00229 inline v4uf operator&(const v4uu &r) const { return *this && r; }
00230 inline v4uf operator~() const
00231 { return *this ^ v4uu(0xFFFFFFFF); }
00232
00233 inline v4uf operator|(const v4uf &r) const { return *this || r; }
00234 inline v4uf operator&(const v4uf &r) const { return *this && r; }
00235
00236
00237 inline float operator[](int idx) const { return comp[idx]; }
00238 inline void setComp(int idx, float v) { comp[idx] = v; }
00239
00240
00241 v4uf abs() const { return v4uf(VM_ABS(vector)); }
00242 v4uf clamp(const v4uf &low, const v4uf &high) const
00243 { return v4uf(
00244 VM_MIN(VM_MAX(vector, low.vector), high.vector)); }
00245 v4uf clamp(float low, float high) const
00246 { return v4uf(VM_MIN(VM_MAX(vector,
00247 v4uf(low).vector), v4uf(high).vector)); }
00248 v4uf recip() const { return v4uf(VM_INVERT(vector)); }
00249
00250
00251
00252
00253
00254 v4uu toUnsignedInt() const { return VM_INT(vector); }
00255 v4uu toSignedInt() const { return VM_INT(vector); }
00256
00257 v4uu floor() const
00258 {
00259 v4uu result;
00260 VM_P_FLOOR();
00261 result = VM_FLOOR(vector);
00262 VM_E_FLOOR();
00263 return result;
00264 }
00265
00266
00267
00268 v4uu splitFloat()
00269 {
00270 v4uu base;
00271 base = toSignedInt();
00272 *this -= base.toFloat();
00273 return base;
00274 }
00275
00276 template <int A, int B, int C, int D>
00277 v4uf swizzle() const
00278 {
00279 return VM_SHUFFLE<A,B,C,D>(vector);
00280 }
00281
00282 public:
00283 union {
00284 v4sf vector;
00285 fpreal32 comp[4];
00286 };
00287 };
00288
00289 inline v4uf
00290 v4uu::toFloat() const
00291 {
00292 return v4uf(VM_IFLOAT(vector));
00293 }
00294
00295
00296
00297
00298
00299 static inline v4uf
00300 sqrt(const v4uf &a)
00301 {
00302 return v4uf(VM_SQRT(a.vector));
00303 }
00304
00305 static inline v4uf
00306 fabs(const v4uf &a)
00307 {
00308 return a.abs();
00309 }
00310
00311
00312
00313
00314 static inline v4uf
00315 andn(const v4uu &a, const v4uf &b)
00316 {
00317 return v4uf(V4SF(VM_ANDNOT(a.vector, V4SI(b.vector))));
00318 }
00319
00320 static inline v4uu
00321 andn(const v4uu &a, const v4uu &b)
00322 {
00323 return v4uu(VM_ANDNOT(a.vector, b.vector));
00324 }
00325
00326
00327 static inline v4uf
00328 ternary(const v4uu &a, const v4uf &b, const v4uf &c)
00329 {
00330 return (b & a) | andn(a, c);
00331 }
00332
00333 static inline v4uu
00334 ternary(const v4uu &a, const v4uu &b, const v4uu &c)
00335 {
00336 return (b & a) | andn(a, c);
00337 }
00338
00339
00340 static inline v4uu
00341 nand(const v4uu &a, const v4uu &b)
00342 {
00343 return !v4uu(VM_AND(a.vector, b.vector));
00344 }
00345
00346 static inline v4uf
00347 vmin(const v4uf &a, const v4uf &b)
00348 {
00349 return v4uf(VM_MIN(a.vector, b.vector));
00350 }
00351
00352 static inline v4uf
00353 vmax(const v4uf &a, const v4uf &b)
00354 {
00355 return v4uf(VM_MAX(a.vector, b.vector));
00356 }
00357
00358 static inline v4uf
00359 clamp(const v4uf &a, const v4uf &b, const v4uf &c)
00360 {
00361 return vmax(vmin(a, c), b);
00362 }
00363
00364 static inline v4uf
00365 clamp(const v4uf &a, float b, float c)
00366 {
00367 return vmax(vmin(a, v4uf(c)), v4uf(b));
00368 }
00369
00370 static inline bool
00371 allbits(const v4uu &a)
00372 {
00373 return vm_allbits(a.vector);
00374 }
00375
00376 static inline bool
00377 anybits(const v4uu &a)
00378 {
00379 return !allbits(~a);
00380 }
00381
00382 static inline v4uf
00383 madd(const v4uf &v, const v4uf &f, const v4uf &a)
00384 {
00385 return v4uf(VM_MADD(v.vector, f.vector, a.vector));
00386 }
00387
00388 static inline v4uf
00389 madd(const v4uf &v, float f, float a)
00390 {
00391 return v4uf(VM_MADD(v.vector, v4uf(f).vector, v4uf(a).vector));
00392 }
00393
00394 static inline v4uf
00395 msub(const v4uf &v, const v4uf &f, const v4uf &s)
00396 {
00397 return madd(v, f, -s);
00398 }
00399
00400 static inline v4uf
00401 msub(const v4uf &v, float f, float s)
00402 {
00403 return madd(v, f, -s);
00404 }
00405
00406 static inline v4uf
00407 lerp(const v4uf &a, const v4uf &b, const v4uf &w)
00408 {
00409 v4uf w1 = v4uf(1.0F) - w;
00410 return madd(a, w1, b*w);
00411 }
00412
00413 static inline v4uf
00414 luminance(const v4uf &r, const v4uf &g, const v4uf &b,
00415 float rw, float gw, float bw)
00416 {
00417 return v4uf(madd(r, v4uf(rw), madd(g, v4uf(gw), b * bw)));
00418 }
00419
00420 static inline float
00421 dot3(const v4uf &a, const v4uf &b)
00422 {
00423 v4uf res = a*b;
00424 return res[0] + res[1] + res[2];
00425 }
00426
00427 static inline float
00428 dot4(const v4uf &a, const v4uf &b)
00429 {
00430 v4uf res = a*b;
00431 return res[0] + res[1] + res[2] + res[3];
00432 }
00433
00434 static inline float
00435 length(const v4uf &a)
00436 {
00437 return sqrt(dot3(a, a));
00438 }
00439
00440 static inline v4uf
00441 normalize(const v4uf &a)
00442 {
00443 return a / length(a);
00444 }
00445
00446 static inline v4uf
00447 cross(const v4uf &a, const v4uf &b)
00448 {
00449 return v4uf(a[1]*b[2] - a[2]*b[1],
00450 a[2]*b[0] - a[0]*b[2],
00451 a[0]*b[1] - a[1]*b[0], 0);
00452 }
00453
00454
00455 typedef v4uu v4ui;
00456
00457
00458
00459
00460 #define VM_ALIGN(ptr, ASIZE, STYPE) \
00461 ((((1<<ASIZE)-(long)ptr)&((1<<ASIZE)-1))/sizeof(STYPE))
00462
00463
00464
00465 #endif