HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
VM_Math.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: VM_Math.h ( VM Library, C++)
7  *
8  * COMMENTS: Vector Math.
9  */
10 
11 #ifndef __VM_Math__
12 #define __VM_Math__
13 
14 #include "VM_API.h"
15 #include <SYS/SYS_Math.h>
16 #include <SYS/SYS_Floor.h>
17 #include <string.h>
18 #include "VM_SIMD.h"
19 
20 class VM_SPUMath;
21 
22 #define VM_SIV static inline void
23 
24 // On platforms that don't support vector instructions, we define empty
25 // function signatures for SIMD operations. The runtime check on theSIMD
26 // should prevent them from being called.
27 #if defined(CPU_HAS_SIMD_INSTR)
28 #define VM_SIMDFUNC(signature) signature;
29 #define VM_SIMDFUNCR(signature) signature;
30 #else
31 #define VM_SIMDFUNC(signature) signature {}
32 #define VM_SIMDFUNCR(signature) signature { return 0; }
33 #endif
34 
35 #define VM_DECL_vFvFvF(name, expr) \
36  VM_SIV name(fpreal32 *d, const fpreal32 *a, \
37  const fpreal32 *b, exint num) { \
38  if (theSIMD) name##SIMD(d, a, b, num); \
39  else for (exint n=0; n<num; n++) expr; \
40  } \
41  VM_SIV name(fpreal64 *d, const fpreal64 *a, \
42  const fpreal64 *b, exint num) { \
43  for (exint n=0; n<num; n++) expr; \
44  } \
45  VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, const fpreal32 *a, const fpreal32 *b, exint num))
46 
47 #define VM_DECL_vFvFuF(name, expr) \
48  VM_SIV name(fpreal32 *d, const fpreal32 *a, \
49  fpreal32 b, exint num) { \
50  if (theSIMD) name##SIMD(d, a, b, num); \
51  else for (exint n=0; n<num; n++) expr; \
52  } \
53  VM_SIV name(fpreal64 *d, const fpreal64 *a, \
54  fpreal64 b, exint num) { \
55  for (exint n=0; n<num; n++) expr; \
56  } \
57  VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, const fpreal32 *a, fpreal32 b, exint num))
58 
59 #define VM_DECL_vFuFvF(name, expr) \
60  VM_SIV name(fpreal32 *d, fpreal32 a, \
61  const fpreal32 *b, exint num) { \
62  if (theSIMD) name##SIMD(d, a, b, num); \
63  else for (exint n=0; n<num; n++) expr; \
64  } \
65  VM_SIV name(fpreal64 *d, fpreal64 a, const fpreal64 *b, exint num) { \
66  for (exint n=0; n<num; n++) expr; \
67  } \
68  VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, fpreal32 a, const fpreal32 *b, exint num))
69 
70 #define VM_DECL_vFuFuF(name, expr) \
71  VM_SIV name(fpreal32 *d, fpreal32 a, \
72  fpreal32 b, exint num) { \
73  if (theSIMD) name##SIMD(d, a, b, num); \
74  else for (exint n=0; n<num; n++) expr; \
75  } \
76  VM_SIV name(fpreal64 *d, fpreal64 a, \
77  fpreal64 b, exint num) { \
78  for (exint n=0; n<num; n++) expr; \
79  } \
80  VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, fpreal32 a, fpreal32 b, exint num))
81 
82 #define VM_DECL_vFvF(name, expr) \
83  VM_SIV name(fpreal32 *d, const fpreal32 *a, exint num) { \
84  if (theSIMD) name##SIMD(d, a, num); \
85  else for (exint n=0; n<num; n++) expr; \
86  } \
87  VM_SIV name(fpreal64 *d, const fpreal64 *a, exint num) { \
88  for (exint n=0; n<num; n++) expr; \
89  } \
90  VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, const fpreal32 *a, exint num))
91 
92 #define VM_DECL_vFuF(name, expr) \
93  VM_SIV name(fpreal32 *d, fpreal32 a, exint num) { \
94  if (theSIMD) name##SIMD(d, a, num); \
95  else for (exint n=0; n<num; n++) expr; \
96  } \
97  VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, fpreal32 a, exint num))
98 
99 #define VM_DECL_vIvF(name, expr) \
100  VM_SIV name(int32 *d, const fpreal32 *a, exint num) { \
101  if (theSIMD) name##SIMD(d, a, num); \
102  else for (exint n=0; n<num; n++) expr; \
103  } \
104  VM_SIMDFUNC(static void name##SIMD(int32 *d, const fpreal32 *a, exint num))
105 
106 #define VM_DECL_vIvF_nonconst(name, expr) \
107  VM_SIV name(int32 *d, fpreal32 *a, exint num) { \
108  if (theSIMD) name##SIMD(d, a, num); \
109  else for (exint n=0; n<num; n++) expr; \
110  } \
111  VM_SIMDFUNC(static void name##SIMD(int32 *d, fpreal32 *a, exint num))
112 
113 #define VM_DECL_vIvFvF(name, expr) \
114  VM_SIV name(int32 *d, const fpreal32 *a, const fpreal32 *b, exint num) { \
115  if (theSIMD) name##SIMD(d, a, b, num); \
116  else for (exint n=0; n<num; n++) expr; \
117  } \
118  VM_SIV name(int32 *d, const fpreal64 *a, const fpreal64 *b, exint num) { \
119  for (exint n=0; n<num; n++) expr; \
120  } \
121  VM_SIMDFUNC(static void name##SIMD(int32 *d, const fpreal32 *a, const fpreal32 *b, exint num))
122 
123 #define VM_DECL_vIvVFvF(name, expr) \
124  VM_SIV name(int32 *d, fpreal32 *a, const fpreal32 *b, exint num) { \
125  if (theSIMD) name##SIMD(d, a, b, num); \
126  else for (exint n=0; n<num; n++) expr; \
127  } \
128  VM_SIMDFUNC(static void name##SIMD(int32 *d, fpreal32 *a, const fpreal32 *b, exint num))
129 
130 #define VM_DECL_vIvFuF(name, expr) \
131  VM_SIV name(int32 *d, const fpreal32 *a, fpreal32 b, exint num) { \
132  if (theSIMD) name##SIMD(d, a, b, num); \
133  else for (exint n=0; n<num; n++) expr; \
134  } \
135  VM_SIV name(int32 *d, const fpreal64 *a, fpreal64 b, exint num) { \
136  for (exint n=0; n<num; n++) expr; \
137  } \
138  VM_SIMDFUNC(static void name##SIMD(int32 *d, const fpreal32 *a, fpreal32 b, exint num))
139 
140 #define VM_DECL_WP(type, name, expr) \
141  VM_SIV name(type *d, const fpreal32 *a, fpreal32 b, \
142  fpreal32 c, fpreal32 e, exint num) { \
143  if (theSIMD) name##SIMD(d, a, b, c, e, num); \
144  else for (exint n=0; n<num; n++) expr; \
145  } \
146  VM_SIMDFUNC(static void name##SIMD(type *d, const fpreal32 *a, fpreal32 b, fpreal32 c, fpreal32 e, exint num))
147 
148 #define VM_DECL_IWP(type, name, expr) \
149  VM_SIV name(fpreal32 *d, const type *a, fpreal32 b, \
150  type e, exint num) { \
151  if (theSIMD) name##SIMD(d, a, b, e, num); \
152  else for (exint n=0; n<num; n++) expr; \
153  } \
154  VM_SIMDFUNC(static void name##SIMD(fpreal32 *d, const type *a, fpreal32 b, type e, exint num))
155 
156 #define VM_DECL_vIvIvI(name, expr) \
157  VM_SIV name(int32 *d, const int32 *a, const int32 *b, exint num) { \
158  if (theSIMD) name##SIMD(d, a, b, num); \
159  else for (exint n=0; n<num; n++) expr; \
160  } \
161  VM_SIMDFUNC(static void name##SIMD(int32 *d, const int32 *a, const int32 *b, exint num))
162 
163 #define VM_DECL_vIvIuI(name, expr) \
164  VM_SIV name(int32 *d, const int32 *a, int32 b, exint num) { \
165  if (theSIMD) name##SIMD(d, a, b, num); \
166  else for (exint n=0; n<num; n++) expr; \
167  } \
168  VM_SIMDFUNC(static void name##SIMD(int32 *d, const int32 *a, int32 b, exint num))
169 
170 #define VM_DECL_uIvI(name, expr) \
171  static inline int32 name##SISD(const int32 *a, exint num) { \
172  int32 d = 0; \
173  for (exint n=0; n < num; n++) expr; \
174  return d; \
175  } \
176  VM_SIMDFUNCR(static int32 name##SIMD(const int32 *a, exint num)) \
177  static inline int32 name(const int32 *a, exint num) { \
178  return theSIMD ? name##SIMD(a, num) : \
179  name##SISD(a, num); \
180  } \
181 
182 // Declare the 8 variations of a single comparison operator
183 // - vector and scalar b
184 // - fast and standard
185 // - int32 and fpreal32
186 #define VM_DECL_CMP(name, op) \
187  VM_DECL_vIvFvF(name, d[n] = a[n] op b[n]) \
188  VM_DECL_vIvFuF(name, d[n] = a[n] op b) \
189  VM_DECL_vIvFvF(fast##name, d[n] = a[n] op b[n]) \
190  VM_DECL_vIvFuF(fast##name, d[n] = a[n] op b) \
191  VM_DECL_vIvIvI(name, d[n] = a[n] op b[n]) \
192  VM_DECL_vIvIuI(name, d[n] = a[n] op b) \
193  VM_DECL_vIvIvI(fast##name, d[n] = a[n] op b[n]) \
194  VM_DECL_vIvIuI(fast##name, d[n] = a[n] op b)
195 
196 template <class T>
197 static inline void
198 wpoint(T &d, fpreal32 v, fpreal32 wpoint, fpreal32 max, fpreal32 offset)
199 {
200  d = (T)SYSclamp(v*wpoint + offset, 0.0F, max);
201 }
202 
203 template <class T>
204 static inline void
205 iwpoint(fpreal32 &d, T v, fpreal32 iwpoint, T offset)
206 {
207  d = (fpreal32)(v - offset) * iwpoint;
208 }
209 
211 public:
212  static bool isSIMD() { return theSIMD; }
213 
214  /// The fast operations assume that any non-zero return value is
215  /// interpreted as true. Standard operations always set the result to
216  /// either 0 or 1.
217  ///
218  /// VM_Math::lt(d, a, b) := d[i] = a[i] < b[i]
219  /// VM_Math::lt(d, a, b) := d[i] = a[i] < b
220  /// VM_Math::fastlt(d, a, b) := d[i] = a[i] < b[i]
221  /// VM_Math::fastlt(d, a, b) := d[i] = a[i] < b
222  VM_DECL_CMP(lt, <)
223  VM_DECL_CMP(le, <=)
224  VM_DECL_CMP(gt, >)
225  VM_DECL_CMP(ge, >=)
226  VM_DECL_CMP(eq, ==)
227  VM_DECL_CMP(ne, !=)
228 
229  /// Bitwise operations
230  VM_DECL_vIvIvI(bor, d[n] = a[n] | b[n])
231  VM_DECL_vIvIuI(bor, d[n] = a[n] | b)
232  VM_DECL_vIvIvI(band, d[n] = a[n] & b[n])
233  VM_DECL_vIvIuI(band, d[n] = a[n] & b)
234  VM_DECL_vIvIvI(bxor, d[n] = a[n] ^ b[n])
235  VM_DECL_vIvIuI(bxor, d[n] = a[n] ^ b)
236 
237  /// VM_Math::add(d, a, b) := d[i] = a[i] + b[i]
238  VM_DECL_vFvFvF(add, d[n] = a[n] + b[n])
239  VM_DECL_vFvFuF(add, d[n] = a[n] + b)
240  VM_DECL_vIvIvI(add, d[n] = a[n] + b[n])
241  VM_DECL_vIvIuI(add, d[n] = a[n] + b)
242 
243  /// VM_Math::sub(d, a, b) := d[i] = a[i] - b[i]
244  VM_DECL_vFvFvF(sub, d[n] = a[n] - b[n])
245  VM_DECL_vFvFuF(sub, d[n] = a[n] - b)
246  VM_DECL_vFuFvF(sub, d[n] = a - b[n])
247 
248  /// VM_Math::mul(d, a, b) := d[i] = a[i] * b[i]
249  VM_DECL_vFvFvF(mul, d[n] = a[n] * b[n])
250  VM_DECL_vFvFuF(mul, d[n] = a[n] * b)
251 
252  /// VM_Math::div(d, a, b) := d[i] = a[i] / b[i]
253  VM_DECL_vFvFvF(div, d[n] = a[n] / b[n])
254  VM_DECL_vFuFvF(div, d[n] = a / b[n])
255 
256  /// VM_Math::safediv(d, a, b) := d[i] = b[i] != 0 ? a[i] / b[i] : a[i]
257  VM_DECL_vFvFvF(safediv, d[n] = SYSsafediv(a[n], b[n]))
258  VM_DECL_vFuFvF(safediv, d[n] = SYSsafediv(a, b[n]))
259 
260 #if 0 // Turn this off to use reciprocal multiplication
261  VM_DECL_vFvFuF(div, d[n] = a[n] / b);
262  VM_SIV safediv(fpreal32 *d, const fpreal32 *a, fpreal32 b, exint num)
263  {
264  if (b == 0) set(d, 0.f, num);
265  else div(d, a, b, num);
266  }
267 #else
268  // Multiply by reciprocal rather than dividing by a constant
269  VM_SIV div(fpreal32 *d, const fpreal32 *a, fpreal32 b, exint num)
270  { mul(d, a, 1/b, num); }
271  VM_SIV safediv(fpreal32 *d, const fpreal32 *a, fpreal32 b, exint num)
272  {
273  if (b == 0) set(d, 0.0F, num);
274  else mul(d, a, 1/b, num);
275  }
276 #endif
277 
278  /// VM_Math::fdiv(d, a, b) := d[i] = a[i] * (1.0 / b[i])
279  /// A faster version than div(), but less accurate since it uses the
280  /// reciprocal.
281  VM_DECL_vFvFvF(fdiv, d[n] = a[n] / b[n])
282  VM_DECL_vFuFvF(fdiv, d[n] = a / b[n])
283  VM_SIV fdiv(fpreal32 *d, const fpreal32 *a, fpreal32 b, exint num)
284  { b = 1/b; mul(d, a, b, num); }
285 
286  /// VM_Math::max(d, a, b) := d[i] = SYSmax(a[i] * b[i])
287  VM_DECL_vFvFvF(vmmax, d[n] = SYSmax(a[n], b[n]) )
288  VM_DECL_vFvFuF(vmmax, d[n] = SYSmax(a[n], b) )
289 
290  /// VM_Math::min(d, a, b) := d[i] = SYSmin(a[i] * b[i])
291  VM_DECL_vFvFvF(vmmin, d[n] = SYSmin(a[n], b[n]) )
292  VM_DECL_vFvFuF(vmmin, d[n] = SYSmin(a[n], b) )
293 
294  /// VM_Math::clamp(d, a, b) := d[i] = SYSclamp(a[i], min, max)
296  fpreal32 min, fpreal32 max, exint num)
297  {
298  if (theSIMD) clampSIMD(d, a, min, max, num);
299  else for (exint n=0; n<num; n++) d[n] = SYSclamp(a[n], min, max);
300  }
301  VM_SIMDFUNC(static void clampSIMD(fpreal32 *d, const fpreal32 *a, fpreal32 min, fpreal32 max, exint num))
302 
303  /// VM_Math::dot(a,b,n) := return sum(a[i]*b[i], i=0,n)
304  static inline fpreal64 dot(const fpreal32 *a, const fpreal32 *b, exint n)
305  { return (theSIMD) ? dotSIMD(a, b, n) : dotSISD(a, b, n); }
306  static inline double dotSISD(const fpreal32 *a, const fpreal32 *b, exint n)
307  {
308  exint i;
309  double sum = 0;
310  for (i = 0; i < n; i++)
311  sum += a[i]*b[i];
312  return sum;
313  }
314  VM_SIMDFUNCR(static double dotSIMD(const fpreal32 *a, const fpreal32 *b, exint n))
315 
316  static inline double maddAndNorm(fpreal32 *d, const fpreal32 *a, fpreal s, exint n)
317  { return (theSIMD) ? maddAndNormSIMD(d, a, s, n) : maddAndNormSISD(d, a, s, n); }
318  static inline double maddAndNormSISD(fpreal32 *d, const fpreal32 *a, fpreal s, exint n)
319  {
320  exint i;
321  double sum = 0;
322  for (i = 0; i < n; i++)
323  {
324  d[i] += a[i] * s;
325  sum += d[i] * d[i];
326  }
327  return sum;
328  }
329  VM_SIMDFUNCR(static double maddAndNormSIMD(fpreal32 *d, const fpreal32 *a, fpreal32 s, exint n))
330 
331  static inline double mulAndDotDA(fpreal32 *d, const fpreal32 *a, const fpreal32 *b, exint n)
332  { return (theSIMD) ? mulAndDotDASIMD(d, a, b, n) : mulAndDotDASISD(d, a, b, n); }
333  static inline double mulAndDotDASISD(fpreal32 *d, const fpreal32 *a, const fpreal32 *b, exint n)
334  {
335  exint i;
336  double sum = 0;
337  for (i = 0; i < n; i++)
338  {
339  d[i] = a[i] * b[i];
340  sum += d[i] * a[i];
341  }
342  return sum;
343  }
344  VM_SIMDFUNCR(static double mulAndDotDASIMD(fpreal32 *d, const fpreal32 *a, const fpreal32 *s, exint n))
345 
346  /// VM_Math::zerocount(a,n) := return sum(a[i]==0, i=0,n)
347  VM_DECL_uIvI(zerocount, d += a[n] == 0)
348 
349  /// VM_Math::scaleoffset(d, a, b) := d[i] = d[i]*a[i] + b[i]
350  VM_DECL_vFvFvF(scaleoffset, d[n] = d[n]*a[n] + b[n])
351  VM_DECL_vFvFuF(scaleoffset, d[n] = d[n]*a[n] + b)
352  VM_DECL_vFuFvF(scaleoffset, d[n] = d[n]*a + b[n])
353  VM_DECL_vFuFuF(scaleoffset, d[n] = d[n]*a + b)
354 
355  /// VM_Math::madd(d, a, b) := d[i] = d[i] + a[i]*b[i]
356  VM_DECL_vFvFvF(madd, d[n] += a[n]*b[n])
357  VM_DECL_vFvFuF(madd, d[n] += a[n]*b)
358 
359  /// VM_Math::sqrt(d, a) := d[i] = sqrt(a[i]);
360  VM_DECL_vFvF(sqrt, d[n] = SYSsqrt(a[n]) )
361 
362  /// VM_Math::fsqrt(d, a) := d[i] = 1.0 / isqrt(a[i]);
363  /// This is a faster, but far less accurate version of sqrt() since it uses
364  /// the reciprocal sqrt().
365  VM_DECL_vFvF(fsqrt, d[n] = SYSsqrt(a[n]) )
366 
367  /// VM_Math::isqrt(d, a) := d[i] = 1.0 / sqrt(a[i])
368  VM_DECL_vFvF(isqrt, d[n] = 1/SYSsqrt(a[n]) )
369 
370  /// VM_Math::floor(a) := SYSfloorIL(a)
371  VM_DECL_vIvF(floor, d[n] = (int)SYSfloorIL(a[n]) )
372 
373  VM_DECL_vIvF_nonconst(splitFloat, SYSfastSplitFloat(a[n], d[n]) )
374  VM_DECL_vIvVFvF(splitFloat, { a[n] = b[n]; SYSfastSplitFloat(a[n], d[n]); } )
375 
376  /// VM_Math::cast(a) := (int)a
377  VM_DECL_vIvF(cast, d[n] = (int)a[n] )
378 
379  /// VM_Math::negate(d, a) := d[i] = -a[i]
380  VM_DECL_vFvF(negate, d[n] = -a[n] )
381 
382  /// VM_Math::invert(d, a) := d[i] = 1.0 / a[i]
383  VM_DECL_vFvF(invert, d[n] = 1.0 / a[n] )
384 
385  /// VM_Math::abs(d, a) := d[i] = abs(a[i])
386  VM_DECL_vFvF(abs, d[n] = SYSabs(a[n]) )
387 
388  /// VM_Math::wpoint(d,a,b,c,e) := d[i] = SYSclamp(a[i]*b+e+0.5F, 0, c)
389  VM_DECL_WP(fpreal32, wpoint, ::wpoint<fpreal32>(d[n], a[n], b, c, e+0.5F));
390  VM_DECL_WP(uint8, wpoint, ::wpoint<uint8>(d[n], a[n], b, c, e+0.5F));
391  VM_DECL_WP(uint16, wpoint, ::wpoint<uint16>(d[n], a[n], b, c, e+0.5F));
392 
393  /// VM_Math::iwpoint(d,a,b,e) := d[i] = (fpreal32)(a[i]-e)/b;
394  VM_DECL_IWP(fpreal32, iwpoint, ::iwpoint<fpreal32>(d[n], a[n], 1.0F/b, e));
395  VM_DECL_IWP(uint8, iwpoint, ::iwpoint<uint8>(d[n], a[n], 1.0F/b, e));
396  VM_DECL_IWP(uint16, iwpoint, ::iwpoint<uint16>(d[n], a[n], 1.0F/b, e));
397 
398  VM_DECL_vFuF(set, d[n] = a )
399  /// VM_Math::set(d, a) := d[i] = a
400  VM_SIV set(int32 *d, int a, exint num)
401  { for (exint n=0; n<num; n++) d[n] = a; }
402 
403  /// VM_Math::set(d, a, disabled) := d[i] = disabled[i] ? d[i] : a[i]
404  VM_SIV set(fpreal32 *d, const fpreal32 *a, exint num,
405  const uint32 *disabled)
406  { set((int32 *)d, (const int32 *)a, num, disabled); }
407  VM_SIV set(int32 *d, const int32 *a, exint num,
408  const uint32 *disabled)
409  {
410  if (theSIMD)
411  setSIMD(d, a, num, disabled);
412  else
413  {
414  exint i;
415  for (i = 0; i < num; i++)
416  d[i] = disabled[i] ? d[i] : a[i];
417  }
418  }
419  VM_SIMDFUNC(static void setSIMD(int32 *d, const int32 *a, exint num, const uint32 *disabled))
420  VM_SIV set(fpreal32 *d, fpreal32 a, exint num,
421  const uint32 *disabled)
422  {
423  SYS_FPRealUnionF fu;
424  fu.fval = a;
425  set((int32 *)d, fu.ival, num, disabled);
426  }
427  VM_SIV set(int32 *d, int32 a, exint num,
428  const uint32 *disabled)
429  {
430  if (theSIMD)
431  setSIMD(d, a, num, disabled);
432  else
433  {
434  exint i;
435  for (i = 0; i < num; i++)
436  d[i] = disabled[i] ? d[i] : a;
437  }
438  }
439  VM_SIMDFUNC(static void setSIMD(int32 *d, int32 a, exint num,
440  const uint32 *disabled))
441 
442 
444  {
445  if (theSIMD) swapSIMD(a, b, num);
446  else swapSISD<fpreal32>(a, b, num);
447  }
449  {
450  swapSISD<fpreal64>(a, b, num);
451  }
452  template <typename T>
453  VM_SIV swapSISD(T *a, T *b, exint num)
454  {
455  for (exint i = 0; i < num; i++)
456  {
457  T t = a[i];
458  a[i] = b[i];
459  b[i] = t;
460  }
461  }
462  VM_SIMDFUNC(static void swapSIMD(fpreal32 *a, fpreal32 *b, exint num))
463 
464 
465  /// VM_Math::lerp(d, a, b, t) := d[i] = a[i] + (b[i]-a[i])*t[i]
467  const fpreal32 *t, exint num)
468  {
469  if (theSIMD)
470  {
471  subSIMD(d, b, a, num);
472  scaleoffsetSIMD(d, t, a, num);
473  }
474  else
475  {
476  for (exint n=0; n<num; n++)
477  d[n] = a[n] + (b[n]-a[n])*t[n];
478  }
479  }
480  VM_SIV lerp(fpreal32 *d, const fpreal32 *a, const fpreal32 *b,
481  fpreal32 t, exint num)
482  {
483  if (theSIMD)
484  {
485  mulSIMD (d, a, 1-t, num);
486  maddSIMD(d, b, t, num);
487  }
488  else
489  {
490  for (exint n=0; n<num; n++)
491  d[n] = SYSlerp(a[n], b[n], t);
492  }
493  }
494 
495  /// Vector Functions.
496  /// The following methods assume that the values coming in are "vectors".
497  /// The mnemonics are:
498  /// 3 - Vector3 or Matrix3
499  /// 4 - Vector4 or Matrix4
500  /// The enable_flags are an array corresponding 1-1 to the vectors to be
501  /// processed. Unlike typical flags, the vector will be processed
502  /// if the flag is set to 0. This is to match the VEX style calling.
503  /// If the VEX processor mask flag changes, the mask type here should
504  /// change too.
505  static void mulRowVec44(fpreal32 *v4, const fpreal32 m1[4][4], exint nv,
506  const uint32 *enable_flags=0);
507  static void mulRowVec34(fpreal32 *v3, const fpreal32 m1[4][4], exint nv,
508  const uint32 *enable_flags=0);
509  static void mulColVec44(fpreal32 *v4, const fpreal32 m1[4][4], exint nv,
510  const uint32 *enable_flags=0);
511  static void mulColVec34(fpreal32 *v3, const fpreal32 m1[4][4], exint nv,
512  const uint32 *enable_flags=0);
513  /// Multiplication, but treating the matrix as a 3x3 (i.e. no translate)
514  static void mulRowVec44_3(fpreal32 *v4, const fpreal32 m1[4][4], exint nv,
515  const uint32 *enable_flags=0);
516  static void mulRowVec34_3(fpreal32 *v3, const fpreal32 m1[4][4], exint nv,
517  const uint32 *enable_flags=0);
518  static void mulColVec44_3(fpreal32 *v4, const fpreal32 m1[4][4], exint nv,
519  const uint32 *enable_flags=0);
520  static void mulColVec34_3(fpreal32 *v3, const fpreal32 m1[4][4], exint nv,
521  const uint32 *enable_flags=0);
522 
523  // Add a 4-touple to an array of floats assumed to be a 4-tuple
524  static void vadd4u4(fpreal32 *v4, const fpreal32 a[4], exint nv,
525  const uint32 *enable_flags=0);
526  static void vsub4u4(fpreal32 *v4, const fpreal32 a[4], exint nv,
527  const uint32 *enable_flags=0);
528  static void vmul4u4(fpreal32 *v4, const fpreal32 a[4], exint nv,
529  const uint32 *enable_flags=0);
530 
531  static void forceSIMD(bool onoff) { theSIMD = onoff; }
532  static void setSPUMath(VM_SPUMath *math) { theSPUMath = math; }
533  static VM_SPUMath *getSPUMath() { return theSPUMath; }
534 
535 private:
536  static bool theSIMD;
537  static VM_SPUMath *theSPUMath;
538 };
539 
540 #endif
#define SYSmax(a, b)
Definition: SYS_Math.h:1367
GA_API const UT_StringHolder div
#define VM_DECL_IWP(type, name, expr)
Definition: VM_Math.h:148
#define VM_DECL_vFuFuF(name, expr)
Definition: VM_Math.h:70
GLboolean invert
Definition: glcorearb.h:548
const GLdouble * v
Definition: glcorearb.h:836
#define VM_DECL_uIvI(name, expr)
Definition: VM_Math.h:170
#define VM_SIMDFUNC(signature)
Definition: VM_Math.h:31
virtual bool lerp(GA_AttributeOperand &d, GA_AttributeOperand &a, GA_AttributeOperand &b, GA_AttributeOperand &t) const
d = SYSlerp(a, b, t);
#define VM_API
Definition: VM_API.h:10
GLboolean GLboolean GLboolean GLboolean a
Definition: glcorearb.h:1221
#define SYSabs(a)
Definition: SYS_Math.h:1369
UT_Matrix2T< T > SYSlerp(const UT_Matrix2T< T > &v1, const UT_Matrix2T< T > &v2, S t)
Definition: UT_Matrix2.h:595
static void setSPUMath(VM_SPUMath *math)
Definition: VM_Math.h:532
GLfloat GLfloat GLfloat GLfloat v3
Definition: glcorearb.h:818
png_uint_32 i
Definition: png.h:2877
VM_SIV swapSISD(T *a, T *b, exint num)
Definition: VM_Math.h:453
VM_SIV set(int32 *d, int32 a, exint num, const uint32 *disabled)
Definition: VM_Math.h:427
#define VM_DECL_vIvIuI(name, expr)
Definition: VM_Math.h:163
GLdouble n
Definition: glcorearb.h:2007
#define VM_DECL_vFvFvF(name, expr)
Definition: VM_Math.h:35
GLfloat f
Definition: glcorearb.h:1925
#define VM_DECL_vIvVFvF(name, expr)
Definition: VM_Math.h:123
IMATH_INTERNAL_NAMESPACE_HEADER_ENTER T abs(T a)
Definition: ImathFun.h:55
#define VM_DECL_CMP(name, op)
Definition: VM_Math.h:186
int64 exint
Definition: SYS_Types.h:116
const std::enable_if<!VecTraits< T >::IsVec, T >::type & max(const T &a, const T &b)
Definition: Composite.h:133
static VM_SPUMath * getSPUMath()
Definition: VM_Math.h:533
double fpreal64
Definition: SYS_Types.h:192
fpreal64 dot(const CE_VectorT< T > &a, const CE_VectorT< T > &b)
Definition: CE_Vector.h:218
VM_SIV lerp(fpreal32 *d, const fpreal32 *a, const fpreal32 *b, fpreal32 t, exint num)
Definition: VM_Math.h:480
GLintptr offset
Definition: glcorearb.h:664
#define VM_DECL_vFvFuF(name, expr)
Definition: VM_Math.h:47
#define VM_DECL_vFvF(name, expr)
Definition: VM_Math.h:82
int int32
Definition: SYS_Types.h:35
GLboolean GLboolean GLboolean b
Definition: glcorearb.h:1221
#define VM_DECL_vIvF_nonconst(name, expr)
Definition: VM_Math.h:106
int floor(T x)
Definition: ImathFun.h:150
#define VM_SIMDFUNCR(signature)
Definition: VM_Math.h:32
unsigned short uint16
Definition: SYS_Types.h:34
double fpreal
Definition: SYS_Types.h:270
unsigned char uint8
Definition: SYS_Types.h:32
VM_SIV set(fpreal32 *d, const fpreal32 *a, exint num, const uint32 *disabled)
VM_Math::set(d, a, disabled) := d[i] = disabled[i] ? d[i] : a[i].
Definition: VM_Math.h:404
static bool isSIMD()
Definition: VM_Math.h:212
static void forceSIMD(bool onoff)
Definition: VM_Math.h:531
void swap(TfErrorTransport &l, TfErrorTransport &r)
#define VM_SIV
Definition: VM_Math.h:22
png_infop png_uint_32 int num
Definition: png.h:2158
#define const
Definition: zconf.h:214
#define VM_DECL_vFuF(name, expr)
Definition: VM_Math.h:92
#define VM_DECL_vIvIvI(name, expr)
Definition: VM_Math.h:156
#define SYSmin(a, b)
Definition: SYS_Math.h:1368
const std::enable_if<!VecTraits< T >::IsVec, T >::type & min(const T &a, const T &b)
Definition: Composite.h:129
fpreal32 SYSfloorIL(fpreal32 val)
Definition: SYS_Floor.h:59
float fpreal32
Definition: SYS_Types.h:191
#define VM_DECL_vIvF(name, expr)
Definition: VM_Math.h:99
VM_SIV set(int32 *d, const int32 *a, exint num, const uint32 *disabled)
Definition: VM_Math.h:407
VM_SIV swap(fpreal64 *a, fpreal64 *b, exint num)
Definition: VM_Math.h:448
#define VM_DECL_WP(type, name, expr)
Definition: VM_Math.h:140
#define VM_DECL_vFuFvF(name, expr)
Definition: VM_Math.h:59
GLenum clamp
Definition: glcorearb.h:1233
unsigned int uint32
Definition: SYS_Types.h:36