HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
UT_Vector.C
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: Vector of arbitrary size (C++)
7  *
8  * COMMENTS:
9  *
10  */
11 
12 #ifdef WIN32
13  #include <memory.h>
14 #else
15 #ifdef LINUX
16  #include <string.h>
17 #endif
18 #endif
19 
20 #include <iostream>
21 
22 #include <SYS/SYS_Align.h>
23 #include <VM/VM_Math.h>
24 
25 #include <SYS/SYS_Math.h>
26 #include "UT_Vector.h"
27 
28 #include "UT_Vector2.h"
29 #include "UT_Vector3.h"
30 #include "UT_Vector4.h"
31 #include "UT_EnvControl.h"
32 
33 template <typename T>
35 {
36  myVector = 0;
37  myOwnData = 0;
38  init(nl, nh);
39 }
40 
41 template <typename T>
43 {
44  exint i;
45 
46  myVector = 0;
47  myOwnData = 0;
48  init(v.myNL, v.myNH);
49  for (i = v.myNL; i <= v.myNH; i++)
50  myVector[i] = v.myVector[i];
51 }
52 
53 template <typename T>
55 {
56  if (myOwnData && myVector)
57  free(myVector + myNL);
58 }
59 
60 template <typename T>
61 void
63 {
64  exint n = nh - nl + 1;
65 
66  if (myOwnData && myVector)
67  free(myVector + myNL);
68  myOwnData = 0;
69  myNL = 1;
70  myNH = n;
71  myVector = &v.myVector[nl] - 1;
72 }
73 
74 // Assign from a float array into the given subvector
75 template <typename T, typename S>
76 static void
77 utAssign(const T *data, S *vec, exint nl, exint nh)
78 {
79  const exint n = nh - nl + 1;
80  S *dst;
81  exint i;
82 
83  // assign the data, converting from single to double precision
84  // NB: this may be vectorized into VM on x86 using the CVTPS2PD instr.
85  dst = vec + nl;
86  for (i = 0; i < n; i++)
87  *dst++ = *data++;
88 }
89 
90 template <typename T>
91 void
93 {
94  UT_ASSERT_P(nl >= myNL && nh <= myNH && (nh - nl + 1) >= 0);
95  utAssign(data, myVector, nl, nh);
96 }
97 
98 template <typename T>
99 void
101 {
102  UT_ASSERT_P(nl >= myNL && nh <= myNH && (nh - nl + 1) >= 0);
103  utAssign(data, myVector, nl, nh);
104 }
105 
106 template <typename T>
107 void
109 {
110  UT_ASSERT((nh - nl + 1) >= 0);
111 
112  if (myVector && myOwnData)
113  {
114  if (nh-nl > myNH-myNL)
115  {
116  myVector = (T *)realloc(myVector + myNL, (nh - nl + 1)*sizeof(T));
117  myVector = myVector - nl;
118  }
119  else
120  {
121  myVector = myVector + myNL - nl;
122  }
123  }
124  else
125  {
126  myVector = (T *)malloc((nh - nl + 1)*sizeof(T));
127  myVector = myVector - nl;
128  myOwnData = 1;
129  }
130 
131  myNL = nl;
132  myNH = nh;
133 }
134 
135 template <typename T>
136 void
138 {
139  exint units = length();
140  info.divideWork(units, start, end);
141  start += getNL();
142  end += getNL();
143 }
144 
145 template <typename T>
146 void
148  exint &start, exint &end, const exint blocksize, const UT_JobInfo &info) const
149 {
150  exint units = (length() + blocksize-1)/blocksize;
151  info.divideWork(units, start, end);
152 }
153 
154 template <typename T>
155 void
157 {
158  UT_ASSERT_P(nl >= myNL && nh <= myNH && (nh - nl + 1) >= 0);
159  memset(myVector + nl, 0, (nh - nl + 1)*sizeof(T));
160 }
161 
162 template <typename T>
163 void
165 {
166  v.x() = (*this)(idx);
167  v.y() = (*this)(idx+1);
168 }
169 
170 template <typename T>
171 void
173 {
174  (*this)(idx) = v.x();
175  (*this)(idx+1) = v.y();
176 }
177 
178 template <typename T>
179 void
181 {
182  v.x() = (*this)(idx);
183  v.y() = (*this)(idx+1);
184  v.z() = (*this)(idx+2);
185 }
186 
187 template <typename T>
188 void
190 {
191  (*this)(idx) = v.x();
192  (*this)(idx+1) = v.y();
193  (*this)(idx+2) = v.z();
194 }
195 
196 template <typename T>
197 void
199 {
200  v.x() = (*this)(idx);
201  v.y() = (*this)(idx+1);
202  v.z() = (*this)(idx+2);
203  v.w() = (*this)(idx+3);
204 }
205 
206 template <typename T>
207 void
209 {
210  (*this)(idx) = v.x();
211  (*this)(idx+1) = v.y();
212  (*this)(idx+2) = v.z();
213  (*this)(idx+3) = v.w();
214 }
215 
216 template <typename T>
217 void
219 {
220  exint diff = myNL-nl;
221 
222  myNL = nl;
223  myNH -= diff;
224  myVector += diff;
225 }
226 
227 template <typename T>
228 T
230 {
231  exint nblocks = (length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
232  if (nblocks <= 0)
233  return 0;
234 
235  UT_StackBuffer<fpreal64> accumulators(nblocks);
236 
237  normInternal((fpreal64*)accumulators, type);
238 
239  fpreal64 result = accumulators[0];
240 
241  if (type == 0)
242  {
243  for (exint i = 1; i < nblocks; ++i)
244  {
245  fpreal64 v = accumulators[i];
246  if (v > result)
247  result = v;
248  }
249  }
250  else
251  {
252  for (exint i = 1; i < nblocks; ++i)
253  result += accumulators[i];
254 
255  // normalize result.
256  if (type == 2) // L2 norm
257  result = SYSsqrt(result);
258  }
259 
260  return result;
261 }
262 
263 
264 template <typename T>
265 T
266 UT_VectorT<T>::norm2() const
267 {
268  exint nblocks = (length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
269  if (nblocks <= 0)
270  return 0;
271 
272  UT_StackBuffer<fpreal64> accumulators(nblocks);
273 
274  normInternal((fpreal64*)accumulators, 2);
275 
276  fpreal64 result = accumulators[0];
277  for (exint i = 1; i < nblocks; ++i)
278  result += accumulators[i];
279 
280  return result;
281 }
282 
283 template <typename T>
284 void
286  fpreal64 *output, int type, const UT_JobInfo &info) const
287 {
288  exint startblock;
289  exint endblock;
290  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
291 
292  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
293 
294  if (type == 0)
295  {
296  for (exint block = startblock; block < endblock; ++block)
297  {
298  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
299  fpreal64 result = SYSabs(myVector[i]);
300  for (++i; i < blockend; ++i)
301  {
302  T v = SYSabs(myVector[i]);
303  if (v > result)
304  result = v;
305  }
306  output[block] = result;
307  }
308  }
309  else if (type == 1)
310  {
311  for (exint block = startblock; block < endblock; ++block)
312  {
313  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
314  fpreal64 result = SYSabs(myVector[i]);
315  for (++i; i < blockend; ++i)
316  {
317  result += SYSabs(myVector[i]);
318  }
319  output[block] = result;
320  }
321  }
322  else // L2-norm
323  {
324  for (exint block = startblock; block < endblock; ++block)
325  {
326  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
327  T v = myVector[i];
328  fpreal64 result = v*v;
329  for (++i; i < blockend; ++i)
330  {
331  v = myVector[i];
332  result += v*v;
333  }
334  output[block] = result;
335  }
336  }
337 }
338 
339 template <typename T>
340 T
342 {
343  UT_ASSERT_P(myNL == v.myNL && myNH == v.myNH);
344 
345  exint nblocks = (length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
346  if (nblocks <= 0)
347  return 0;
348 
349  UT_StackBuffer<fpreal64> accumulators(nblocks);
350 
351  distance2Internal((fpreal64*)accumulators, v);
352 
353  fpreal64 result = accumulators[0];
354  for (exint i = 1; i < nblocks; ++i)
355  result += accumulators[i];
356 
357  return result;
358 }
359 
360 template <typename T>
361 void
363  fpreal64 *output, const UT_VectorT<T> &v, const UT_JobInfo &info) const
364 {
365  exint startblock;
366  exint endblock;
367  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
368 
369  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
370 
371  for (exint block = startblock; block < endblock; ++block)
372  {
373  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
374  fpreal64 diff = myVector[i] - v.myVector[i];
375  fpreal64 result = diff*diff;
376  for (++i; i < blockend; ++i)
377  {
378  diff = myVector[i] - v.myVector[i];
379  result += diff*diff;
380  }
381  output[block] = result;
382  }
383 }
384 
385 template <typename T>
386 void
388 {
389  exint i, end;
390 
391  getPartialRange(i, end, info);
392 
393 #if 0
394  for (; i < end; i++)
395  myVector[i] = -myVector[i];
396 #else
397  // NOTE: Compilers get confused, thinking that i or end
398  // might change underneath us, because we passed a non-const reference
399  // to them to getPartialRange, so they don't make any assumptions
400  // and have terrible performance, unless we make separate
401  // variables j and jend that are entirely local.
402  for (exint j = i, jend = end; j < jend; j++)
403  myVector[j] = -myVector[j];
404 #endif
405 }
406 
407 template <typename T>
408 void
410 {
411  exint i, end;
412 
413  getPartialRange(i, end, info);
414 
415 #if 0
416  for (; i < end; i++)
417  myVector[i] = v.myVector[i] - myVector[i];
418 #else
419  // NOTE: Compilers get confused, thinking that i or end
420  // might change underneath us, because we passed a non-const reference
421  // to them to getPartialRange, so they don't make any assumptions
422  // and have terrible performance, unless we make separate
423  // variables j and jend that are entirely local.
424  for (exint j = i, jend = end; j < jend; j++)
425  myVector[j] = v.myVector[j] - myVector[j];
426 #endif
427 }
428 
429 template <typename T>
430 void
432  const UT_JobInfo &info)
433 {
434  exint i, end;
435 
436  getPartialRange(i, end, info);
437 
438 #if 0
439  for (; i < end; i++)
440  myVector[i] += s * v.myVector[i];
441 #else
442  // NOTE: Compilers get confused, thinking that i or end
443  // might change underneath us, because we passed a non-const reference
444  // to them to getPartialRange, so they don't make any assumptions
445  // and have terrible performance (2.4x slower for this function),
446  // unless we make separate variables j and jend that are entirely local.
447  for (exint j = i, jend = end; j < jend; j++)
448  myVector[j] += s * v.myVector[j];
449 #endif
450 }
451 
452 template <typename T>
453 void
455  T s, const UT_VectorT<T> &v, fpreal64 *norm2)
456 {
457  UT_ASSERT_P(myNL == v.myNL && myNH == v.myNH);
458 
459  // Just in case, handle null
460  if (!norm2)
461  {
462  addScaledVec(s, v);
463  return;
464  }
465 
466  exint nblocks = (length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
467  if (nblocks <= 0)
468  {
469  *norm2 = 0;
470  return;
471  }
472 
473  UT_StackBuffer<fpreal64> accumulators(nblocks);
474 
475  addScaledVecNorm2Internal(s, v, (fpreal64*)accumulators);
476 
477  fpreal64 result = accumulators[0];
478  for (exint i = 1; i < nblocks; ++i)
479  result += accumulators[i];
480 
481  *norm2 = result;
482 }
483 
484 template <typename T>
485 void
487  T s, const UT_VectorT<T> &v, fpreal64 *norm2, const UT_JobInfo &info)
488 {
489  exint startblock;
490  exint endblock;
491  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
492 
493  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
494 
495  for (exint block = startblock; block < endblock; ++block)
496  {
497  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
498  T val = myVector[i] + s*v.myVector[i];
499  myVector[i] = val;
500  fpreal64 result = val*val;
501  for (++i; i < blockend; ++i)
502  {
503  val = myVector[i] + s*v.myVector[i];
504  myVector[i] = val;
505  result += val*val;
506  }
507  norm2[block] = result;
508  }
509 }
510 
511 template <typename T>
512 void
514  T s, const UT_VectorT<T> &v, fpreal64 *norm2, exint normlimit)
515 {
516  UT_ASSERT_P(myNL == v.myNL && myNH == v.myNH);
517 
518  // Just in case, handle null
519  if (!norm2 || normlimit <= 0)
520  {
521  addScaledVec(s, v);
522 
523  if (norm2)
524  *norm2 = 0;
525 
526  return;
527  }
528 
529  exint nblocks = (length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
530  if (nblocks <= 0)
531  {
532  *norm2 = 0;
533  return;
534  }
535 
536  UT_StackBuffer<fpreal64> accumulators(nblocks);
537 
538  addScaledVecNorm2UpToInternal(s, v, (fpreal64*)accumulators, normlimit);
539 
540  fpreal64 result = accumulators[0];
541  for (exint i = 1; i < nblocks; ++i)
542  result += accumulators[i];
543 
544  *norm2 = result;
545 }
546 
547 template <typename T>
548 void
550  T s, const UT_VectorT<T> &v, fpreal64 *norm2, exint normlimit, const UT_JobInfo &info)
551 {
552  exint startblock;
553  exint endblock;
554  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
555 
556  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
557 
558  for (exint block = startblock; block < endblock; ++block)
559  {
560  const exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
561  const exint normmax = SYSmin(blockend, normlimit);
562  fpreal64 result = 0;
563  for (; i < normmax; ++i)
564  {
565  T val = myVector[i] + s*v.myVector[i];
566  myVector[i] = val;
567  result += val*val;
568  }
569  for (; i < blockend; ++i)
570  {
571  myVector[i] += s*v.myVector[i];
572  }
573  norm2[block] = result;
574  }
575 }
576 
577 template <typename T>
578 void
580  const UT_JobInfo &info)
581 {
582  exint i, end;
583 
584  getPartialRange(i, end, info);
585 
586 #if 0
587  for (; i < end; i++)
588  myVector[i] = s * myVector[i] + v.myVector[i];
589 #else
590  // NOTE: Compilers get confused, thinking that i or end
591  // might change underneath us, because we passed a non-const reference
592  // to them to getPartialRange, so they don't make any assumptions
593  // and have terrible performance, unless we make separate
594  // variables j and jend that are entirely local.
595  for (exint j = i, jend = end; j < jend; j++)
596  myVector[j] = s * myVector[j] + v.myVector[j];
597 #endif
598 }
599 
600 template <typename T>
601 void
603  const UT_JobInfo &info)
604 {
605  exint i, end;
606 
607  getPartialRange(i, end, info);
608 
609 #if 0
610  for (; i < end; i++)
611  myVector[i] = a.myVector[i] * b.myVector[i];
612 #else
613  // NOTE: Compilers get confused, thinking that i or end
614  // might change underneath us, because we passed a non-const reference
615  // to them to getPartialRange, so they don't make any assumptions
616  // and have terrible performance, unless we make separate
617  // variables j and jend that are entirely local.
618  for (exint j = i, jend = end; j < jend; j++)
619  myVector[j] = a.myVector[j] * b.myVector[j];
620 #endif
621 }
622 
623 template <typename T>
624 void
626  const UT_VectorT<T> &a, const UT_VectorT<T> &b,
627  fpreal64 *dot_aba, exint dotlimit)
628 {
629  UT_ASSERT_P(myNL == a.myNL && myNH == a.myNH);
630  UT_ASSERT_P(myNL == b.myNL && myNH == b.myNH);
631 
632  if (!dot_aba)
633  {
634  multAndSet(a, b);
635  return;
636  }
637 
638  exint nblocks = (length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
639  if (nblocks <= 0)
640  {
641  *dot_aba = 0;
642  return;
643  }
644 
645  UT_StackBuffer<fpreal64> accumulators(nblocks);
646 
647  multSetAndDotUpToInternal(a, b, (fpreal64*)accumulators, dotlimit);
648 
649  fpreal64 result = accumulators[0];
650  for (exint i = 1; i < nblocks; ++i)
651  result += accumulators[i];
652 
653  *dot_aba = result;
654 }
655 
656 template <typename T>
657 void
659  const UT_VectorT<T> &a, const UT_VectorT<T> &b,
660  fpreal64 *dot_aba, exint dotlimit,
661  const UT_JobInfo &info)
662 {
663  exint startblock;
664  exint endblock;
665  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
666 
667  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
668 
669  for (exint block = startblock; block < endblock; ++block)
670  {
671  const exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
672  const exint dotmax = SYSmin(blockend, dotlimit);
673  fpreal64 result = 0;
674  for (; i < dotmax; ++i)
675  {
676  T av = a.myVector[i];
677  T bv = b.myVector[i];
678  T val = av*bv;
679  myVector[i] = val;
680  result += val*av;
681  }
682  for (; i < blockend; ++i)
683  {
684  myVector[i] += a.myVector[i]*b.myVector[i];
685  }
686  dot_aba[block] = result;
687  }
688 }
689 
690 template <typename T>
691 void
693  const UT_JobInfo &info)
694 {
695  exint i, end;
696 
697  getPartialRange(i, end, info);
698 
699 #if 0
700  for (; i < end; i++)
701  myVector[i] = a.myVector[i] / b.myVector[i];
702 #else
703  // NOTE: Compilers get confused, thinking that i or end
704  // might change underneath us, because we passed a non-const reference
705  // to them to getPartialRange, so they don't make any assumptions
706  // and have terrible performance, unless we make separate
707  // variables j and jend that are entirely local.
708  for (exint j = i, jend = end; j < jend; j++)
709  myVector[j] = a.myVector[j] / b.myVector[j];
710 #endif
711 }
712 
713 template <typename T>
714 void
716  const UT_JobInfo &info)
717 {
718  exint i, end;
719 
720  getPartialRange(i, end, info);
721 
722 #if 0
723  for (; i < end; i++)
724  {
725  myVector[i] = SYSsafediv(T(1.0), a.myVector[i]);
726  }
727 #else
728  // NOTE: Compilers get confused, thinking that i or end
729  // might change underneath us, because we passed a non-const reference
730  // to them to getPartialRange, so they don't make any assumptions
731  // and have terrible performance, unless we make separate
732  // variables j and jend that are entirely local.
733  for (exint j = i, jend = end; j < jend; j++)
734  myVector[j] = SYSsafediv(T(1.0), a.myVector[j]);
735 #endif
736 }
737 
738 template <typename T>
739 void
741  const UT_JobInfo &info)
742 {
743  exint i, end;
744 
745  getPartialRange(i, end, info);
746 
747 #if 0
748  for (; i < end; i++)
749  {
750  myVector[i] = T(1.0) / a.myVector[i];
751  }
752 #else
753  // NOTE: Compilers get confused, thinking that i or end
754  // might change underneath us, because we passed a non-const reference
755  // to them to getPartialRange, so they don't make any assumptions
756  // and have terrible performance, unless we make separate
757  // variables j and jend that are entirely local.
758  for (exint j = i, jend = end; j < jend; j++)
759  myVector[j] = T(1.0) / a.myVector[j];
760 #endif
761 }
762 
763 template <typename T>
766 {
767  UT_ASSERT_P(length() == v.length());
768  copyFrom(v);
769  return *this;
770 }
771 
772 template <typename T>
773 void
775  const UT_JobInfo &info)
776 {
777  UT_ASSERT_P(length() == v.length());
778  exint i, end;
779 
780  getPartialRange(i, end, info);
781 
782  memcpy(&myVector[i], &v.myVector[i],
783  (end - i) * sizeof(T));
784 }
785 
786 template <typename T>
789 {
790  exint i;
791 
792  for (i=myNL; i<=myNH; i++)
793  myVector[i] += v.myVector[i];
794  return *this;
795 }
796 
797 template <typename T>
800 {
801  exint i;
802 
803  for (i=myNL; i<=myNH; i++)
804  myVector[i] -= v.myVector[i];
805  return *this;
806 }
807 
808 template <typename T>
811 {
812  exint i;
813 
814  for (i=myNL; i<=myNH; i++)
815  myVector[i] *= v.myVector[i];
816  return *this;
817 }
818 
819 template <typename T>
822 {
823  exint i;
824 
825  for (i=myNL; i<=myNH; i++)
826  myVector[i] /= v.myVector[i];
827  return *this;
828 }
829 
830 template <typename T>
833 {
834  exint i;
835 
836  for (i=myNL; i<=myNH; i++)
837  myVector[i] *= scalar;
838  return *this;
839 }
840 
841 
842 template <typename T>
845 {
846  exint i;
847 
848  scalar = 1.0F / scalar;
849  for (i=myNL; i<=myNH; i++)
850  myVector[i] *= scalar;
851  return *this;
852 }
853 
854 
855 template <typename T>
856 T
858 {
859  UT_ASSERT_P(myNL == v.myNL && myNH == v.myNH);
860 
861  exint nblocks = (length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
862  if (nblocks <= 0)
863  return 0;
864 
865  UT_StackBuffer<fpreal64> accumulators(nblocks);
866 
867  dotInternal((fpreal64*)accumulators, v);
868 
869  fpreal64 result = accumulators[0];
870  for (exint i = 1; i < nblocks; ++i)
871  result += accumulators[i];
872 
873  return result;
874 }
875 
876 template <typename T>
877 void
878 UT_VectorT<T>::dotInternalPartial(fpreal64 *output, const UT_VectorT<T> &v, const UT_JobInfo &info) const
879 {
880  exint startblock;
881  exint endblock;
882  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
883 
884  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
885 
886  for (exint block = startblock; block < endblock; ++block)
887  {
888  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
889  fpreal64 result = myVector[i]*v.myVector[i];
890  for (++i; i < blockend; ++i)
891  {
892  result += myVector[i]*v.myVector[i];
893  }
894  output[block] = result;
895  }
896 }
897 
898 template <typename T>
899 std::ostream &
900 UT_VectorT<T>::save(std::ostream &os) const
901 {
902  exint i;
903 
904  for (i=myNL; i<=myNH; i++)
905  os << i << ": " << myVector[i] << "\n";
906 
907  return os;
908 }
909 
910 template <typename T>
911 bool
913 {
914  if (getNL() != v.getNL() || getNH() != v.getNH())
915  return false;
916  for (exint i = getNL(); i < getNH(); i++)
917  {
918  if (!SYSalmostEqual((*this)(i), v(i), ulps))
919  return false;
920  }
921  return true;
922 }
923 
924 template <typename T>
925 bool
927 {
928  for (exint i = myNL; i <= myNH; i++)
929  {
930  if (SYSisNan(myVector[i]))
931  return true;
932  }
933  return false;
934 }
935 
936 template <typename T>
937 void
939 {
941  {
942  std::cerr << "NAN found in UT_VectorT\n";
943  UT_ASSERT(0);
944  }
945 }
946 
947 //////////////////////////////
948 // UT_Vector Specializations
949 //////////////////////////////
950 
951 #if 1
952 
953 template <>
954 inline void
956 {
957  exint i, end;
958 
959  getPartialRange(i, end, info);
960 
961  VM_Math::negate(&myVector[i], &myVector[i], end-i);
962 }
963 
964 template <>
965 inline void
967 {
968  exint i, end;
969 
970  getPartialRange(i, end, info);
971 
972  VM_Math::scaleoffset(&myVector[i], -1.0F, &v.myVector[i], end-i);
973 }
974 
975 template <>
976 inline void
978  const UT_VectorT<fpreal32> &v,
979  const UT_JobInfo &info)
980 {
981  exint i, end;
982 
983  getPartialRange(i, end, info);
984 
985  VM_Math::madd(&myVector[i], &v.myVector[i], s, end-i);
986 }
987 
988 template <>
989 inline void
991  fpreal32 s,
992  const UT_VectorT<fpreal32> &v,
993  fpreal64 *norm2,
994  const UT_JobInfo &info)
995 {
996  exint startblock;
997  exint endblock;
998  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
999 
1000  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
1001 
1002  for (exint block = startblock; block < endblock; ++block)
1003  {
1004  const exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1005 
1006  fpreal64 result = VM_Math::maddAndNorm(&myVector[i], &v.myVector[i], s, blockend-i);
1007 
1008  norm2[block] = result;
1009 
1010  i = blockend;
1011  }
1012 }
1013 
1014 template <>
1015 inline void
1017  fpreal32 s,
1018  const UT_VectorT<fpreal32> &v,
1019  fpreal64 *norm2,
1020  exint normlimit,
1021  const UT_JobInfo &info)
1022 {
1023  exint startblock;
1024  exint endblock;
1025  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
1026 
1027  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
1028 
1029  for (exint block = startblock; block < endblock; ++block)
1030  {
1031  const exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1032 
1033  fpreal64 result = 0;
1034  if (i < normlimit)
1035  {
1036  const exint normmax = SYSmin(blockend, normlimit);
1037  result = VM_Math::maddAndNorm(&myVector[i], &v.myVector[i], s, normmax-i);
1038 
1039  i = normmax;
1040  if (i < blockend)
1041  VM_Math::madd(&myVector[i], &v.myVector[i], s, blockend-i);
1042  }
1043  else
1044  VM_Math::madd(&myVector[i], &v.myVector[i], s, blockend-i);
1045 
1046  norm2[block] = result;
1047 
1048  i = blockend;
1049  }
1050 }
1051 
1052 template <>
1053 inline void
1055  const UT_VectorT<fpreal32> &v,
1056  const UT_JobInfo &info)
1057 {
1058  exint i, end;
1059 
1060  getPartialRange(i, end, info);
1061 
1062  VM_Math::scaleoffset(&myVector[i], s, &v.myVector[i], end-i);
1063 }
1064 
1065 
1066 template <>
1067 inline void
1069  const UT_VectorT<fpreal32> &b,
1070  const UT_JobInfo &info)
1071 {
1072  exint i, end;
1073 
1074  getPartialRange(i, end, info);
1075 
1076  VM_Math::mul(&myVector[i], &a.myVector[i], &b.myVector[i], end-i);
1077 }
1078 
1079 template <>
1080 inline void
1082  const UT_VectorT<fpreal32> &a,
1083  const UT_VectorT<fpreal32> &b,
1084  fpreal64 *dot_aba, exint dotlimit,
1085  const UT_JobInfo &info)
1086 {
1087  exint startblock;
1088  exint endblock;
1089  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
1090 
1091  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
1092 
1093  for (exint block = startblock; block < endblock; ++block)
1094  {
1095  const exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1096 
1097  fpreal64 result = 0;
1098  if (i < dotlimit)
1099  {
1100  const exint dotmax = SYSmin(blockend, dotlimit);
1101 
1102  result = VM_Math::mulAndDotDA(&myVector[i], &a.myVector[i], &b.myVector[i], dotmax-i);
1103 
1104  i = dotmax;
1105  if (i < blockend)
1106  VM_Math::mul(&myVector[i], &a.myVector[i], &b.myVector[i], blockend-i);
1107  }
1108  else
1109  VM_Math::mul(&myVector[i], &a.myVector[i], &b.myVector[i], blockend-i);
1110 
1111  dot_aba[block] = result;
1112 
1113  i = blockend;
1114  }
1115 }
1116 
1117 template <>
1118 inline void
1120  const UT_VectorT<fpreal32> &b,
1121  const UT_JobInfo &info)
1122 {
1123  exint i, end;
1124 
1125  getPartialRange(i, end, info);
1126 
1127  VM_Math::div(&myVector[i], &a.myVector[i], &b.myVector[i], end-i);
1128 }
1129 
1130 template <>
1131 inline void
1133  const UT_JobInfo &info)
1134 {
1135  exint i, end;
1136 
1137  getPartialRange(i, end, info);
1138 
1139  VM_Math::safediv(&myVector[i], 1.0f, &a.myVector[i], end-i);
1140 }
1141 
1142 template <>
1143 inline void
1145  const UT_JobInfo &info)
1146 {
1147  exint i, end;
1148 
1149  getPartialRange(i, end, info);
1150 
1151  VM_Math::div(&myVector[i], 1.0f, &a.myVector[i], end-i);
1152 }
1153 
1154 template <>
1155 inline UT_VectorT<fpreal32> &
1157 {
1158  VM_Math::add(&myVector[myNL], &myVector[myNL], &v.myVector[myNL], myNH-myNL+1);
1159  return *this;
1160 }
1161 
1162 template <>
1163 inline UT_VectorT<fpreal32> &
1165 {
1166  VM_Math::sub(&myVector[myNL], &myVector[myNL], &v.myVector[myNL], myNH-myNL+1);
1167  return *this;
1168 }
1169 
1170 template <>
1171 inline UT_VectorT<fpreal32> &
1173 {
1174  VM_Math::mul(&myVector[myNL], &myVector[myNL], &v.myVector[myNL], myNH-myNL+1);
1175  return *this;
1176 }
1177 
1178 template <>
1179 inline UT_VectorT<fpreal32> &
1181 {
1182  VM_Math::div(&myVector[myNL], &myVector[myNL], &v.myVector[myNL], myNH-myNL+1);
1183  return *this;
1184 }
1185 
1186 template <>
1187 inline UT_VectorT<fpreal32> &
1189 {
1190  VM_Math::mul(&myVector[myNL], &myVector[myNL], scalar, myNH-myNL+1);
1191  return *this;
1192 }
1193 
1194 template <>
1195 inline UT_VectorT<fpreal32> &
1197 {
1198  VM_Math::div(&myVector[myNL], &myVector[myNL], scalar, myNH-myNL+1);
1199  return *this;
1200 }
1201 
1202 template <>
1203 inline void
1205 {
1206  exint startblock;
1207  exint endblock;
1208  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
1209 
1210  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
1211 
1212  for (exint block = startblock; block < endblock; ++block)
1213  {
1214  const exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1215 
1216  fpreal64 result = VM_Math::dot(&myVector[i], &v.myVector[i], blockend-i);
1217 
1218  output[block] = result;
1219 
1220  i = blockend;
1221  }
1222 }
1223 
1224 template <>
1225 inline void
1227  fpreal64 *output, int type,
1228  const UT_JobInfo &info) const
1229 {
1230  exint startblock;
1231  exint endblock;
1232  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
1233 
1234  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
1235 
1236  if (type == 0)
1237  {
1238  for (exint block = startblock; block < endblock; ++block)
1239  {
1240  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1241  fpreal32 result = SYSabs(myVector[i]);
1242  for (++i; i < blockend; ++i)
1243  {
1244  fpreal32 v = SYSabs(myVector[i]);
1245  if (v > result)
1246  result = v;
1247  }
1248  output[block] = result;
1249  }
1250  }
1251  else if (type == 1)
1252  {
1253  for (exint block = startblock; block < endblock; ++block)
1254  {
1255  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1256  fpreal64 result = SYSabs(myVector[i]);
1257  for (++i; i < blockend; ++i)
1258  {
1259  result += SYSabs(myVector[i]);
1260  }
1261  output[block] = result;
1262  }
1263  }
1264  else // L2-norm
1265  {
1266  for (exint block = startblock; block < endblock; ++block)
1267  {
1268  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1269 
1270  fpreal64 result = VM_Math::dot(&myVector[i], &myVector[i], blockend-i);
1271 
1272  output[block] = result;
1273 
1274  i = blockend;
1275  }
1276  }
1277 }
1278 #endif
1279 
1280 
1281 //////////////////////////
1282 // UT_Permutation class
1283 //////////////////////////
1284 
1285 
1286 template <typename T>
1288 {
1289  myNL = nl;
1290  myNH = nh;
1291  myVector = (T *)malloc((nh - nl + 1)*sizeof(T));
1292  myVector = myVector - nl;
1293 }
1294 
1295 template <typename T>
1297 {
1298  free(myVector + myNL);
1299 }
1300 
1301 template <typename T>
1303 {
1304  clone(p);
1305 }
1306 
1307 template <typename T>
1310 {
1311  if (&p != this)
1312  {
1313  free(myVector + myNL);
1314  clone(p);
1315  }
1316  return *this;
1317 }
1318 
1319 template <typename T>
1320 void
1322 {
1323  UT_ASSERT(&p != this);
1324  myNL = p.myNL;
1325  myNH = p.myNH;
1326  myVector = (T *)malloc((myNH - myNL + 1)*sizeof(T));
1327  myVector = myVector - myNL;
1328 
1329  memcpy(myVector + myNL, p.myVector + myNL, (myNH - myNL + 1)*sizeof(T));
1330 }
1331 
1332 template <typename T>
1333 void
1335 {
1336  memset(myVector + myNL, 0, (myNH - myNL + 1)*sizeof(T));
1337 }
1338 
1339 template <typename T>
1340 void
1342 {
1343  exint diff = myNL-nl;
1344 
1345  myNL = nl;
1346  myNH -= diff;
1347  myVector += diff;
1348 }
1349 
1350 
UT_PermutationT< T > & operator=(const UT_PermutationT< T > &p)
Definition: UT_Vector.C:1309
bool hasNan() const
Definition: UT_Vector.C:926
void zero()
Definition: UT_Vector.h:58
void getPartialRange(exint &start, exint &end, const UT_JobInfo &info) const
Determines the [start,end) interval that should be worked on.
Definition: UT_Vector.C:137
GA_API const UT_StringHolder div
void negPartial(const UT_JobInfo &info)
Negate.
Definition: UT_Vector.C:387
T & z(void)
Definition: UT_Vector4.h:372
void divideWork(int units, int &start, int &end) const
void init(exint nl, exint nh)
Initialize nl, nh and allocate space.
Definition: UT_Vector.C:108
UT_VectorT & operator+=(const UT_VectorT< T > &v)
Definition: UT_Vector.C:788
const GLdouble * v
Definition: glcorearb.h:836
GLuint start
Definition: glcorearb.h:474
exint getNH() const
Get the high index.
Definition: UT_Vector.h:76
T & x(void)
Definition: UT_Vector2.h:284
GLboolean GLboolean GLboolean GLboolean a
Definition: glcorearb.h:1221
void getPartialBlockRange(exint &startblock, exint &endblock, const exint blocksize, const UT_JobInfo &info) const
Definition: UT_Vector.C:147
bool SYSisNan(const F f)
Definition: SYS_Math.h:173
#define SYSabs(a)
Definition: SYS_Math.h:1367
void addScaledVecNorm2(T s, const UT_VectorT< T > &v, fpreal64 *norm2)
Add scaled vector and compute the squared L2 norm.
Definition: UT_Vector.C:454
SYS_FORCE_INLINE T & x(void)
Definition: UT_Vector3.h:581
T dot(const UT_VectorT< T > &v) const
Definition: UT_Vector.C:857
void multSetAndDotUpTo(const UT_VectorT< T > &a, const UT_VectorT< T > &b, fpreal64 *dot_aba, exint dotlimit)
Definition: UT_Vector.C:625
png_uint_32 i
Definition: png.h:2877
void testForNan() const
Definition: UT_Vector.C:938
void addScaledVecNorm2UpTo(T s, const UT_VectorT< T > &v, fpreal64 *norm2, exint normlimit)
Definition: UT_Vector.C:513
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:101
SYS_FORCE_INLINE T & z(void)
Definition: UT_Vector3.h:585
~UT_VectorT()
Definition: UT_Vector.C:54
void changeNL(exint nl)
Change the low index, and the high index will adjust itself.
Definition: UT_Vector.C:218
long long int64
Definition: SYS_Types.h:100
GLdouble n
Definition: glcorearb.h:2007
GLfloat f
Definition: glcorearb.h:1925
UT_PermutationT(exint nl, exint nh)
Definition: UT_Vector.C:1287
UT_VectorT & operator/=(const UT_VectorT< T > &v)
Definition: UT_Vector.C:821
T norm(int type=2) const
Definition: UT_Vector.C:229
#define UT_ASSERT(ZZ)
Definition: UT_Assert.h:102
int64 exint
Definition: SYS_Types.h:109
double fpreal64
Definition: SYS_Types.h:185
UT_VectorT()
Input the index range [nl..nh].
Definition: UT_Vector.h:38
fpreal64 dot(const CE_VectorT< T > &a, const CE_VectorT< T > &b)
Definition: CE_Vector.h:218
GLuint GLuint end
Definition: glcorearb.h:474
std::ostream & save(std::ostream &os) const
Output.
Definition: UT_Vector.C:900
void getSubvector4(UT_Vector4 &v, exint idx) const
Definition: UT_Vector.C:198
T distance2(const UT_VectorT< T > &v) const
Definition: UT_Vector.C:341
void changeNL(exint nl)
Definition: UT_Vector.C:1341
UT_VectorT & operator-=(const UT_VectorT< T > &v)
Definition: UT_Vector.C:799
T norm2() const
Square of L2-norm.
GLfloat units
Definition: glcorearb.h:407
GLboolean * data
Definition: glcorearb.h:130
T & y(void)
Definition: UT_Vector4.h:370
GLboolean GLboolean GLboolean b
Definition: glcorearb.h:1221
void setSubvector3(exint idx, const UT_Vector3 &v)
Definition: UT_Vector.C:189
static int getInt(UT_IntControl i)
void getSubvector2(UT_Vector2 &v, exint idx) const
Definition: UT_Vector.C:164
GLenum GLenum dst
Definition: glcorearb.h:1792
SYS_FORCE_INLINE T & y(void)
Definition: UT_Vector3.h:583
void setSubvector4(exint idx, const UT_Vector4 &v)
Definition: UT_Vector.C:208
void setSubvector2(exint idx, const UT_Vector2 &v)
Definition: UT_Vector.C:172
void getSubvector3(UT_Vector3 &v, exint idx) const
Definition: UT_Vector.C:180
GLuint GLfloat * val
Definition: glcorearb.h:1607
GLint GLint GLsizei GLint GLenum GLenum type
Definition: glcorearb.h:107
void subvector(const UT_VectorT< T > &v, exint nl, exint nh)
Steal from another vector, resulting vector has origin at 1.
Definition: UT_Vector.C:62
T & x(void)
Definition: UT_Vector4.h:368
T & y(void)
Definition: UT_Vector2.h:286
void assign(const fpreal32 *data, exint nl, exint nh)
Definition: UT_Vector.C:92
UT_VectorT & operator=(const UT_VectorT< T > &v)
Definition: UT_Vector.C:765
UT_VectorT & operator*=(const UT_VectorT< T > &v)
Componentwise multiplication & division.
Definition: UT_Vector.C:810
T & w(void)
Definition: UT_Vector4.h:374
exint length() const
Get dimension of this vector.
Definition: UT_Vector.h:79
#define SYSmin(a, b)
Definition: SYS_Math.h:1366
float fpreal32
Definition: SYS_Types.h:184
bool isEqual(const UT_VectorT< T > &v, int64 ulps)
Definition: UT_Vector.C:912
exint getNL() const
Get the low index.
Definition: UT_Vector.h:73
GLuint GLsizei GLsizei * length
Definition: glcorearb.h:794