00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #ifndef __VM_Math__
00019 #define __VM_Math__
00020
00021 #include "VM_API.h"
00022 #include <SYS/SYS_Math.h>
00023 #include <SYS/SYS_Floor.h>
00024 #include <string.h>
00025 #include "VM_SIMD.h"
00026
00027 class VM_SPUMath;
00028
00029 #define VM_SIV static inline void
00030
00031
00032
00033
00034 #if defined(CPU_HAS_SIMD_INSTR)
00035 #define VM_SIMDFUNC(signature) signature;
00036 #define VM_SIMDFUNCR(signature) signature;
00037 #else
00038 #define VM_SIMDFUNC(signature) signature {}
00039 #define VM_SIMDFUNCR(signature) signature { return 0; }
00040 #endif
00041
00042 #define VM_DECL_vFvFvF(name, expr) \
00043 VM_SIV name(fpreal32 *d, const fpreal32 *a, \
00044 const fpreal32 *b, int num) { \
00045 if (theSIMD) name##SIMD(d, a, b, num); \
00046 else for (int n=0; n<num; n++) expr; \
00047 } \
00048 VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, const fpreal32 *a, const fpreal32 *b, int num))
00049
00050 #define VM_DECL_vFvFuF(name, expr) \
00051 VM_SIV name(fpreal32 *d, const fpreal32 *a, \
00052 fpreal32 b, int num) { \
00053 if (theSIMD) name##SIMD(d, a, b, num); \
00054 else for (int n=0; n<num; n++) expr; \
00055 } \
00056 VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, const fpreal32 *a, fpreal32 b, int num))
00057
00058 #define VM_DECL_vFuFvF(name, expr) \
00059 VM_SIV name(fpreal32 *d, fpreal32 a, \
00060 const fpreal32 *b, int num) { \
00061 if (theSIMD) name##SIMD(d, a, b, num); \
00062 else for (int n=0; n<num; n++) expr; \
00063 } \
00064 VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, fpreal32 a, const fpreal32 *b, int num))
00065
00066 #define VM_DECL_vFuFuF(name, expr) \
00067 VM_SIV name(fpreal32 *d, fpreal32 a, \
00068 fpreal32 b, int num) { \
00069 if (theSIMD) name##SIMD(d, a, b, num); \
00070 else for (int n=0; n<num; n++) expr; \
00071 } \
00072 VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, fpreal32 a, fpreal32 b, int num))
00073
00074 #define VM_DECL_vFvF(name, expr) \
00075 VM_SIV name(fpreal32 *d, const fpreal32 *a, int num) { \
00076 if (theSIMD) name##SIMD(d, a, num); \
00077 else for (int n=0; n<num; n++) expr; \
00078 } \
00079 VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, const fpreal32 *a, int num))
00080
00081 #define VM_DECL_vFuF(name, expr) \
00082 VM_SIV name(fpreal32 *d, fpreal32 a, int num) { \
00083 if (theSIMD) name##SIMD(d, a, num); \
00084 else for (int n=0; n<num; n++) expr; \
00085 } \
00086 VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, fpreal32 a, int num))
00087
00088 #define VM_DECL_vIvF(name, expr) \
00089 VM_SIV name(int32 *d, const fpreal32 *a, int num) { \
00090 if (theSIMD) name##SIMD(d, a, num); \
00091 else for (int n=0; n<num; n++) expr; \
00092 } \
00093 VM_SIMDFUNC(static void name##SIMD(int32 *d, const fpreal32 *a, int num))
00094
00095 #define VM_DECL_vIvF_nonconst(name, expr) \
00096 VM_SIV name(int32 *d, fpreal32 *a, int num) { \
00097 if (theSIMD) name##SIMD(d, a, num); \
00098 else for (int n=0; n<num; n++) expr; \
00099 } \
00100 VM_SIMDFUNC(static void name##SIMD(int32 *d, fpreal32 *a, int num))
00101
00102 #define VM_DECL_vIvFvF(name, expr) \
00103 VM_SIV name(int32 *d, const fpreal32 *a, const fpreal32 *b, int num) { \
00104 if (theSIMD) name##SIMD(d, a, b, num); \
00105 else for (int n=0; n<num; n++) expr; \
00106 } \
00107 VM_SIMDFUNC(static void name##SIMD(int32 *d, const fpreal32 *a, const fpreal32 *b, int num))
00108
00109 #define VM_DECL_vIvVFvF(name, expr) \
00110 VM_SIV name(int32 *d, fpreal32 *a, const fpreal32 *b, int num) { \
00111 if (theSIMD) name##SIMD(d, a, b, num); \
00112 else for (int n=0; n<num; n++) expr; \
00113 } \
00114 VM_SIMDFUNC(static void name##SIMD(int32 *d, fpreal32 *a, const fpreal32 *b, int num))
00115
00116 #define VM_DECL_vIvFuF(name, expr) \
00117 VM_SIV name(int32 *d, const fpreal32 *a, fpreal32 b, int num) { \
00118 if (theSIMD) name##SIMD(d, a, b, num); \
00119 else for (int n=0; n<num; n++) expr; \
00120 } \
00121 VM_SIMDFUNC(static void name##SIMD(int32 *d, const fpreal32 *a, fpreal32 b, int num))
00122
00123 #define VM_DECL_WP(type, name, expr) \
00124 VM_SIV name(type *d, const fpreal32 *a, fpreal32 b, \
00125 fpreal32 c, fpreal32 e, int num) { \
00126 if (theSIMD) name##SIMD(d, a, b, c, e, num); \
00127 else for (int n=0; n<num; n++) expr; \
00128 } \
00129 VM_SIMDFUNC(static void name##SIMD(type *d, const fpreal32 *a, fpreal32 b, fpreal32 c, fpreal32 e, int num))
00130
00131 #define VM_DECL_IWP(type, name, expr) \
00132 VM_SIV name(fpreal32 *d, const type *a, fpreal32 b, \
00133 type e, int num) { \
00134 if (theSIMD) name##SIMD(d, a, b, e, num); \
00135 else for (int n=0; n<num; n++) expr; \
00136 } \
00137 VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, const type *a, fpreal32 b, type e, int num))
00138
00139 #define VM_DECL_vIvIvI(name, expr) \
00140 VM_SIV name(int32 *d, const int32 *a, const int32 *b, int num) { \
00141 if (theSIMD) name##SIMD(d, a, b, num); \
00142 else for (int n=0; n<num; n++) expr; \
00143 } \
00144 VM_SIMDFUNC(static void name##SIMD(int32 *d, const int32 *a, const int32 *b, int num))
00145
00146 #define VM_DECL_vIvIuI(name, expr) \
00147 VM_SIV name(int32 *d, const int32 *a, int32 b, int num) { \
00148 if (theSIMD) name##SIMD(d, a, b, num); \
00149 else for (int n=0; n<num; n++) expr; \
00150 } \
00151 VM_SIMDFUNC(static void name##SIMD(int32 *d, const int32 *a, int32 b, int num))
00152
00153 #define VM_DECL_uIvI(name, expr) \
00154 static inline int32 name##SISD(const int32 *a, int num) { \
00155 int32 d = 0; \
00156 for (int n=0; n < num; n++) expr; \
00157 return d; \
00158 } \
00159 VM_SIMDFUNCR(static int32 name##SIMD(const int32 *a, int num)) \
00160 static inline int32 name(const int32 *a, int num) { \
00161 return theSIMD ? name##SIMD(a, num) : \
00162 name##SISD(a, num); \
00163 } \
00164
00165
00166
00167
00168
00169 #define VM_DECL_CMP(name, op) \
00170 VM_DECL_vIvFvF(name, d[n] = a[n] op b[n]) \
00171 VM_DECL_vIvFuF(name, d[n] = a[n] op b) \
00172 VM_DECL_vIvFvF(fast##name, d[n] = a[n] op b[n]) \
00173 VM_DECL_vIvFuF(fast##name, d[n] = a[n] op b) \
00174 VM_DECL_vIvIvI(name, d[n] = a[n] op b[n]) \
00175 VM_DECL_vIvIuI(name, d[n] = a[n] op b) \
00176 VM_DECL_vIvIvI(fast##name, d[n] = a[n] op b[n]) \
00177 VM_DECL_vIvIuI(fast##name, d[n] = a[n] op b)
00178
00179 template <class T>
00180 static inline void
00181 wpoint(T &d, fpreal32 v, fpreal32 wpoint, fpreal32 max, fpreal32 offset)
00182 {
00183 d = (T)SYSclamp(v*wpoint + offset, 0.0F, max);
00184 }
00185
00186 template <class T>
00187 static inline void
00188 iwpoint(fpreal32 &d, T v, fpreal32 iwpoint, T offset)
00189 {
00190 d = (fpreal32)(v - offset) * iwpoint;
00191 }
00192
00193 class VM_API VM_Math {
00194 public:
00195 static bool isSIMD() { return theSIMD; }
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205 VM_DECL_CMP(lt, <)
00206 VM_DECL_CMP(le, <=)
00207 VM_DECL_CMP(gt, >)
00208 VM_DECL_CMP(ge, >=)
00209 VM_DECL_CMP(eq, ==)
00210 VM_DECL_CMP(ne, !=)
00211
00212
00213 VM_DECL_vIvIvI(bor, d[n] = a[n] | b[n])
00214 VM_DECL_vIvIuI(bor, d[n] = a[n] | b)
00215 VM_DECL_vIvIvI(band, d[n] = a[n] & b[n])
00216 VM_DECL_vIvIuI(band, d[n] = a[n] & b)
00217 VM_DECL_vIvIvI(bxor, d[n] = a[n] ^ b[n])
00218 VM_DECL_vIvIuI(bxor, d[n] = a[n] ^ b)
00219
00220
00221 VM_DECL_vFvFvF(add, d[n] = a[n] + b[n])
00222 VM_DECL_vFvFuF(add, d[n] = a[n] + b)
00223
00224
00225 VM_DECL_vFvFvF(sub, d[n] = a[n] - b[n])
00226 VM_DECL_vFvFuF(sub, d[n] = a[n] - b)
00227 VM_DECL_vFuFvF(sub, d[n] = a - b[n])
00228
00229
00230 VM_DECL_vFvFvF(mul, d[n] = a[n] * b[n])
00231 VM_DECL_vFvFuF(mul, d[n] = a[n] * b)
00232
00233
00234 VM_DECL_vFvFvF(div, d[n] = a[n] / b[n])
00235 VM_DECL_vFuFvF(div, d[n] = a / b[n])
00236
00237
00238 VM_DECL_vFvFvF(safediv, d[n] = SYSsafediv(a[n], b[n]))
00239 VM_DECL_vFuFvF(safediv, d[n] = SYSsafediv(a, b[n]))
00240
00241
00242 VM_SIV div(fpreal32 *d, const fpreal32 *a, fpreal32 b, int num)
00243 { mul(d, a, 1/b, num); }
00244 VM_SIV safediv(fpreal32 *d, const fpreal32 *a, fpreal32 b, int num)
00245 {
00246 if (b == 0) set(d, 0.0F, num);
00247 else mul(d, a, 1/b, num);
00248 }
00249
00250
00251
00252
00253 VM_DECL_vFvFvF(fdiv, d[n] = a[n] / b[n])
00254 VM_DECL_vFuFvF(fdiv, d[n] = a / b[n])
00255 VM_SIV fdiv(fpreal32 *d, const fpreal32 *a, fpreal32 b, int num)
00256 { b = 1/b; mul(d, a, b, num); }
00257
00258
00259 VM_DECL_vFvFvF(vmmax, d[n] = SYSmax(a[n], b[n]) )
00260 VM_DECL_vFvFuF(vmmax, d[n] = SYSmax(a[n], b) )
00261
00262
00263 VM_DECL_vFvFvF(vmmin, d[n] = SYSmin(a[n], b[n]) )
00264 VM_DECL_vFvFuF(vmmin, d[n] = SYSmin(a[n], b) )
00265
00266
00267 VM_SIV clamp(fpreal32 *d, const fpreal32 *a,
00268 fpreal32 min, fpreal32 max, int num)
00269 {
00270 if (theSIMD) clampSIMD(d, a, min, max, num);
00271 else for (int n=0; n<num; n++) d[n] = SYSclamp(a[n], min, max);
00272 }
00273 VM_SIMDFUNC(static void clampSIMD(fpreal32 *d, const fpreal32 *a, fpreal32 min, fpreal32 max, int num))
00274
00275
00276 static inline float dot(const fpreal32 *a, const fpreal32 *b, int n)
00277 { return (theSIMD) ? dotSIMD(a, b, n) : dotSISD(a, b, n); }
00278 static inline float dotSISD(const fpreal32 *a, const fpreal32 *b, int n)
00279 {
00280 int i;
00281 float sum = 0;
00282 for (i = 0; i < n; i++)
00283 sum += a[i]*b[i];
00284 return sum;
00285 }
00286 VM_SIMDFUNCR(static float dotSIMD(const fpreal32 *a, const fpreal32 *b, int n))
00287
00288 static inline float maddAndNorm(fpreal32 *d, const fpreal32 *a, fpreal s, int n)
00289 { return (theSIMD) ? maddAndNormSIMD(d, a, s, n) : maddAndNormSISD(d, a, s, n); }
00290 static inline float maddAndNormSISD(fpreal32 *d, const fpreal32 *a, fpreal s, int n)
00291 {
00292 int i;
00293 float sum = 0;
00294 for (i = 0; i < n; i++)
00295 {
00296 d[i] += a[i] * s;
00297 sum += d[i] * d[i];
00298 }
00299 return sum;
00300 }
00301 VM_SIMDFUNCR(static float maddAndNormSIMD(fpreal32 *d, const fpreal32 *a, fpreal32 s, int n))
00302
00303
00304 VM_DECL_uIvI(zerocount, d += a[n] == 0)
00305
00306
00307 VM_DECL_vFvFvF(scaleoffset, d[n] = d[n]*a[n] + b[n])
00308 VM_DECL_vFvFuF(scaleoffset, d[n] = d[n]*a[n] + b)
00309 VM_DECL_vFuFvF(scaleoffset, d[n] = d[n]*a + b[n])
00310 VM_DECL_vFuFuF(scaleoffset, d[n] = d[n]*a + b)
00311
00312
00313 VM_DECL_vFvFvF(madd, d[n] += a[n]*b[n])
00314 VM_DECL_vFvFuF(madd, d[n] += a[n]*b)
00315
00316
00317 VM_DECL_vFvF(sqrt, d[n] = SYSsqrt(a[n]) )
00318
00319
00320
00321
00322 VM_DECL_vFvF(fsqrt, d[n] = SYSsqrt(a[n]) )
00323
00324
00325 VM_DECL_vFvF(isqrt, d[n] = 1/SYSsqrt(a[n]) )
00326
00327
00328 VM_DECL_vIvF(floor, d[n] = (int)SYSfloorIL(a[n]) )
00329
00330 VM_DECL_vIvF_nonconst(splitFloat, SYSfastSplitFloat(a[n], d[n]) )
00331 VM_DECL_vIvVFvF(splitFloat, { a[n] = b[n]; SYSfastSplitFloat(a[n], d[n]); } )
00332
00333
00334 VM_DECL_vIvF(cast, d[n] = (int)a[n] )
00335
00336
00337 VM_DECL_vFvF(negate, d[n] = -a[n] )
00338
00339
00340 VM_DECL_vFvF(invert, d[n] = 1.0 / a[n] )
00341
00342
00343 VM_DECL_vFvF(abs, d[n] = SYSabs(a[n]) )
00344
00345
00346 VM_DECL_WP(fpreal32, wpoint, ::wpoint<fpreal32>(d[n], a[n], b, c, e+0.5F));
00347 VM_DECL_WP(uint8, wpoint, ::wpoint<uint8>(d[n], a[n], b, c, e+0.5F));
00348 VM_DECL_WP(uint16, wpoint, ::wpoint<uint16>(d[n], a[n], b, c, e+0.5F));
00349
00350
00351 VM_DECL_IWP(fpreal32, iwpoint, ::iwpoint<fpreal32>(d[n], a[n], 1.0F/b, e));
00352 VM_DECL_IWP(uint8, iwpoint, ::iwpoint<uint8>(d[n], a[n], 1.0F/b, e));
00353 VM_DECL_IWP(uint16, iwpoint, ::iwpoint<uint16>(d[n], a[n], 1.0F/b, e));
00354
00355
00356 VM_DECL_vFuF(set, d[n] = a )
00357 VM_SIV set(fpreal32 *d, const fpreal32 *a, int num)
00358 { if (a != d) ::memmove(d, a, num*sizeof(fpreal32)); }
00359 VM_SIV set(int32 *d, int a, int num)
00360 { for (int n=0; n<num; n++) d[n] = a; }
00361 VM_SIV set(int32 *d, const int32 *a, int num)
00362 { if (a != d) ::memmove(d, a, num*sizeof(int32)); }
00363
00364
00365 VM_SIV set(fpreal32 *d, const fpreal32 *a, int num,
00366 const uint32 *disabled)
00367 { set((int32 *)d, (const int32 *)a, num, disabled); }
00368 VM_SIV set(int32 *d, const int32 *a, int num,
00369 const uint32 *disabled)
00370 {
00371 if (theSIMD)
00372 setSIMD(d, a, num, disabled);
00373 else
00374 {
00375 int i;
00376 for (i = 0; i < num; i++)
00377 d[i] = disabled[i] ? d[i] : a[i];
00378 }
00379 }
00380 VM_SIMDFUNC(static void setSIMD(int32 *d, const int32 *a, int num, const uint32 *disabled))
00381 VM_SIV set(fpreal32 *d, fpreal32 a, int num,
00382 const uint32 *disabled)
00383 {
00384 SYS_FPReal32Union fu;
00385 fu.fval = a;
00386 set((int32 *)d, fu.ival, num, disabled);
00387 }
00388 VM_SIV set(int32 *d, int32 a, int num,
00389 const uint32 *disabled)
00390 {
00391 if (theSIMD)
00392 setSIMD(d, a, num, disabled);
00393 else
00394 {
00395 int i;
00396 for (i = 0; i < num; i++)
00397 d[i] = disabled[i] ? d[i] : a;
00398 }
00399 }
00400 VM_SIMDFUNC(static void setSIMD(int32 *d, int32 a, int num,
00401 const uint32 *disabled))
00402
00403
00404 VM_SIV swap(fpreal32 *a, fpreal32 *b, int num)
00405 {
00406 if (theSIMD) swapSIMD(a, b, num);
00407 else swapSISD(a, b, num);
00408 }
00409 VM_SIV swapSISD(fpreal32 *a, fpreal32 *b, int num)
00410 {
00411 int i;
00412 fpreal32 t;
00413 for (i = 0; i < num; i++)
00414 {
00415 t = a[i];
00416 a[i] = b[i];
00417 b[i] = t;
00418 }
00419 }
00420 VM_SIMDFUNC(static void swapSIMD(fpreal32 *a, fpreal32 *b, int num))
00421
00422
00423
00424 VM_SIV lerp(fpreal32 *d, const fpreal32 *a, const fpreal32 *b,
00425 const fpreal32 *t, int num)
00426 {
00427 if (theSIMD)
00428 {
00429 subSIMD(d, b, a, num);
00430 scaleoffsetSIMD(d, t, a, num);
00431 }
00432 else
00433 {
00434 for (int n=0; n<num; n++)
00435 d[n] = a[n] + (b[n]-a[n])*t[n];
00436 }
00437 }
00438 VM_SIV lerp(fpreal32 *d, const fpreal32 *a, const fpreal32 *b,
00439 fpreal32 t, int num)
00440 {
00441 if (theSIMD)
00442 {
00443 mulSIMD (d, a, 1-t, num);
00444 maddSIMD(d, b, t, num);
00445 }
00446 else
00447 {
00448 for (int n=0; n<num; n++)
00449 d[n] = SYSlerp(a[n], b[n], t);
00450 }
00451 }
00452
00453
00454
00455
00456
00457
00458
00459
00460
00461
00462
00463 static void mulRowVec44(fpreal32 *v4, const fpreal32 m1[4][4], int nv,
00464 const uint32 *enable_flags=0);
00465 static void mulRowVec34(fpreal32 *v3, const fpreal32 m1[4][4], int nv,
00466 const uint32 *enable_flags=0);
00467 static void mulColVec44(fpreal32 *v4, const fpreal32 m1[4][4], int nv,
00468 const uint32 *enable_flags=0);
00469 static void mulColVec34(fpreal32 *v3, const fpreal32 m1[4][4], int nv,
00470 const uint32 *enable_flags=0);
00471
00472 static void mulRowVec44_3(fpreal32 *v4, const fpreal32 m1[4][4], int nv,
00473 const uint32 *enable_flags=0);
00474 static void mulRowVec34_3(fpreal32 *v3, const fpreal32 m1[4][4], int nv,
00475 const uint32 *enable_flags=0);
00476 static void mulColVec44_3(fpreal32 *v4, const fpreal32 m1[4][4], int nv,
00477 const uint32 *enable_flags=0);
00478 static void mulColVec34_3(fpreal32 *v3, const fpreal32 m1[4][4], int nv,
00479 const uint32 *enable_flags=0);
00480
00481
00482 static void vadd4u4(fpreal32 *v4, const fpreal32 a[4], int nv,
00483 const uint32 *enable_flags=0);
00484 static void vsub4u4(fpreal32 *v4, const fpreal32 a[4], int nv,
00485 const uint32 *enable_flags=0);
00486 static void vmul4u4(fpreal32 *v4, const fpreal32 a[4], int nv,
00487 const uint32 *enable_flags=0);
00488
00489 static void forceSIMD(bool onoff) { theSIMD = onoff; }
00490 static void setSPUMath(VM_SPUMath *math) { theSPUMath = math; }
00491 static VM_SPUMath *getSPUMath() { return theSPUMath; }
00492
00493 private:
00494 static bool theSIMD;
00495 static VM_SPUMath *theSPUMath;
00496 };
00497
00498 #endif