HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_Vector.C
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: Vector of arbitrary size (C++)
7  *
8  * COMMENTS:
9  *
10  */
11 
12 #ifdef WIN32
13  #include <memory.h>
14 #else
15 #ifdef LINUX
16  #include <string.h>
17 #endif
18 #endif
19 
20 #include <iostream>
21 
22 #include <SYS/SYS_Align.h>
23 #include <VM/VM_Math.h>
24 
25 #include <SYS/SYS_Math.h>
26 #include "UT_Vector.h"
27 
28 #include "UT_Vector2.h"
29 #include "UT_Vector3.h"
30 #include "UT_Vector4.h"
31 #include "UT_EnvControl.h"
32 #include "UT_Format.h"
33 #include "UT_StringStream.h"
34 
35 template <typename T>
37 {
38  myVector = 0;
39  myOwnData = 0;
40  init(nl, nh);
41 }
42 
43 template <typename T>
45 {
46  exint i;
47 
48  myVector = 0;
49  myOwnData = 0;
50  init(v.myNL, v.myNH);
51  for (i = v.myNL; i <= v.myNH; i++)
52  myVector[i] = v.myVector[i];
53 }
54 
55 template <typename T>
57 {
58  if (myOwnData && myVector)
59  free(myVector + myNL);
60 }
61 
62 template <typename T>
63 void
65 {
66  exint n = nh - nl + 1;
67 
68  if (myOwnData && myVector)
69  free(myVector + myNL);
70  myOwnData = 0;
71  myNL = 1;
72  myNH = n;
73  myVector = &v.myVector[nl] - 1;
74 }
75 
76 // Assign from a float array into the given subvector
77 template <typename T, typename S>
78 static void
79 utAssign(const T *data, S *vec, exint nl, exint nh)
80 {
81  const exint n = nh - nl + 1;
82  S *dst;
83  exint i;
84 
85  // assign the data, converting from single to double precision
86  // NB: this may be vectorized into VM on x86 using the CVTPS2PD instr.
87  dst = vec + nl;
88  for (i = 0; i < n; i++)
89  *dst++ = *data++;
90 }
91 
92 template <typename T>
93 void
95 {
96  UT_ASSERT_P(nl >= myNL && nh <= myNH && (nh - nl + 1) >= 0);
97  utAssign(data, myVector, nl, nh);
98 }
99 
100 template <typename T>
101 void
103 {
104  UT_ASSERT_P(nl >= myNL && nh <= myNH && (nh - nl + 1) >= 0);
105  utAssign(data, myVector, nl, nh);
106 }
107 
108 template <typename T>
109 void
111 {
112  UT_ASSERT((nh - nl + 1) >= 0);
113 
114  if (myVector && myOwnData)
115  {
116  if (nh-nl > myNH-myNL)
117  {
118  myVector = (T *)realloc(myVector + myNL, (nh - nl + 1)*sizeof(T));
119  myVector = myVector - nl;
120  }
121  else
122  {
123  myVector = myVector + myNL - nl;
124  }
125  }
126  else
127  {
128  myVector = (T *)malloc((nh - nl + 1)*sizeof(T));
129  myVector = myVector - nl;
130  myOwnData = 1;
131  }
132 
133  myNL = nl;
134  myNH = nh;
135 }
136 
137 template <typename T>
138 void
140 {
141  exint units = length();
142  info.divideWork(units, start, end);
143  start += getNL();
144  end += getNL();
145 }
146 
147 template <typename T>
148 void
150  exint &start, exint &end, const exint blocksize, const UT_JobInfo &info) const
151 {
152  exint units = (length() + blocksize-1)/blocksize;
153  info.divideWork(units, start, end);
154 }
155 
156 template <typename T>
157 void
159 {
160  UT_ASSERT_P(nl >= myNL && nh <= myNH && (nh - nl + 1) >= 0);
161  memset(myVector + nl, 0, (nh - nl + 1)*sizeof(T));
162 }
163 
164 template <typename T>
165 void
167 {
168  v.x() = (*this)(idx);
169  v.y() = (*this)(idx+1);
170 }
171 
172 template <typename T>
173 void
175 {
176  (*this)(idx) = v.x();
177  (*this)(idx+1) = v.y();
178 }
179 
180 template <typename T>
181 void
183 {
184  v.x() = (*this)(idx);
185  v.y() = (*this)(idx+1);
186  v.z() = (*this)(idx+2);
187 }
188 
189 template <typename T>
190 void
192 {
193  (*this)(idx) = v.x();
194  (*this)(idx+1) = v.y();
195  (*this)(idx+2) = v.z();
196 }
197 
198 template <typename T>
199 void
201 {
202  v.x() = (*this)(idx);
203  v.y() = (*this)(idx+1);
204  v.z() = (*this)(idx+2);
205  v.w() = (*this)(idx+3);
206 }
207 
208 template <typename T>
209 void
211 {
212  (*this)(idx) = v.x();
213  (*this)(idx+1) = v.y();
214  (*this)(idx+2) = v.z();
215  (*this)(idx+3) = v.w();
216 }
217 
218 template <typename T>
219 void
221 {
222  exint diff = myNL-nl;
223 
224  myNL = nl;
225  myNH -= diff;
226  myVector += diff;
227 }
228 
229 template <typename T>
230 T
232 {
233  exint nblocks = (length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
234  if (nblocks <= 0)
235  return 0;
236 
237  UT_StackBuffer<fpreal64> accumulators(nblocks);
238 
239  normInternal((fpreal64*)accumulators, type);
240 
241  fpreal64 result = accumulators[0];
242 
243  if (type == 0)
244  {
245  for (exint i = 1; i < nblocks; ++i)
246  {
247  fpreal64 v = accumulators[i];
248  if (v > result)
249  result = v;
250  }
251  }
252  else
253  {
254  for (exint i = 1; i < nblocks; ++i)
255  result += accumulators[i];
256 
257  // normalize result.
258  if (type == 2) // L2 norm
259  result = SYSsqrt(result);
260  }
261 
262  return result;
263 }
264 
265 
266 template <typename T>
267 T
268 UT_VectorT<T>::norm2() const
269 {
270  exint nblocks = (length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
271  if (nblocks <= 0)
272  return 0;
273 
274  UT_StackBuffer<fpreal64> accumulators(nblocks);
275 
276  normInternal((fpreal64*)accumulators, 2);
277 
278  fpreal64 result = accumulators[0];
279  for (exint i = 1; i < nblocks; ++i)
280  result += accumulators[i];
281 
282  return result;
283 }
284 
285 template <typename T>
286 void
288  fpreal64 *output, int type, const UT_JobInfo &info) const
289 {
290  exint startblock;
291  exint endblock;
292  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
293 
294  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
295 
296  if (type == 0)
297  {
298  for (exint block = startblock; block < endblock; ++block)
299  {
300  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
301  fpreal64 result = SYSabs(myVector[i]);
302  for (++i; i < blockend; ++i)
303  {
304  T v = SYSabs(myVector[i]);
305  if (v > result)
306  result = v;
307  }
308  output[block] = result;
309  }
310  }
311  else if (type == 1)
312  {
313  for (exint block = startblock; block < endblock; ++block)
314  {
315  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
316  fpreal64 result = SYSabs(myVector[i]);
317  for (++i; i < blockend; ++i)
318  {
319  result += SYSabs(myVector[i]);
320  }
321  output[block] = result;
322  }
323  }
324  else // L2-norm
325  {
326  for (exint block = startblock; block < endblock; ++block)
327  {
328  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
329  T v = myVector[i];
330  fpreal64 result = v*v;
331  for (++i; i < blockend; ++i)
332  {
333  v = myVector[i];
334  result += v*v;
335  }
336  output[block] = result;
337  }
338  }
339 }
340 
341 template <typename T>
342 T
344 {
345  UT_ASSERT_P(myNL == v.myNL && myNH == v.myNH);
346 
347  exint nblocks = (length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
348  if (nblocks <= 0)
349  return 0;
350 
351  UT_StackBuffer<fpreal64> accumulators(nblocks);
352 
353  distance2Internal((fpreal64*)accumulators, v);
354 
355  fpreal64 result = accumulators[0];
356  for (exint i = 1; i < nblocks; ++i)
357  result += accumulators[i];
358 
359  return result;
360 }
361 
362 template <typename T>
363 void
365  fpreal64 *output, const UT_VectorT<T> &v, const UT_JobInfo &info) const
366 {
367  exint startblock;
368  exint endblock;
369  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
370 
371  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
372 
373  for (exint block = startblock; block < endblock; ++block)
374  {
375  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
376  fpreal64 diff = myVector[i] - v.myVector[i];
377  fpreal64 result = diff*diff;
378  for (++i; i < blockend; ++i)
379  {
380  diff = myVector[i] - v.myVector[i];
381  result += diff*diff;
382  }
383  output[block] = result;
384  }
385 }
386 
387 template <typename T>
388 void
390 {
391  exint i, end;
392 
393  getPartialRange(i, end, info);
394 
395 #if 0
396  for (; i < end; i++)
397  myVector[i] = -myVector[i];
398 #else
399  // NOTE: Compilers get confused, thinking that i or end
400  // might change underneath us, because we passed a non-const reference
401  // to them to getPartialRange, so they don't make any assumptions
402  // and have terrible performance, unless we make separate
403  // variables j and jend that are entirely local.
404  for (exint j = i, jend = end; j < jend; j++)
405  myVector[j] = -myVector[j];
406 #endif
407 }
408 
409 template <typename T>
410 void
412 {
413  exint i, end;
414 
415  getPartialRange(i, end, info);
416 
417 #if 0
418  for (; i < end; i++)
419  myVector[i] = v.myVector[i] - myVector[i];
420 #else
421  // NOTE: Compilers get confused, thinking that i or end
422  // might change underneath us, because we passed a non-const reference
423  // to them to getPartialRange, so they don't make any assumptions
424  // and have terrible performance, unless we make separate
425  // variables j and jend that are entirely local.
426  for (exint j = i, jend = end; j < jend; j++)
427  myVector[j] = v.myVector[j] - myVector[j];
428 #endif
429 }
430 
431 template <typename T>
432 void
434  const UT_JobInfo &info)
435 {
436  exint i, end;
437 
438  getPartialRange(i, end, info);
439 
440 #if 0
441  for (; i < end; i++)
442  myVector[i] += s * v.myVector[i];
443 #else
444  // NOTE: Compilers get confused, thinking that i or end
445  // might change underneath us, because we passed a non-const reference
446  // to them to getPartialRange, so they don't make any assumptions
447  // and have terrible performance (2.4x slower for this function),
448  // unless we make separate variables j and jend that are entirely local.
449  for (exint j = i, jend = end; j < jend; j++)
450  myVector[j] += s * v.myVector[j];
451 #endif
452 }
453 
454 template <typename T>
455 void
457  T s, const UT_VectorT<T> &v, fpreal64 *norm2)
458 {
459  UT_ASSERT_P(myNL == v.myNL && myNH == v.myNH);
460 
461  // Just in case, handle null
462  if (!norm2)
463  {
464  addScaledVec(s, v);
465  return;
466  }
467 
468  exint nblocks = (length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
469  if (nblocks <= 0)
470  {
471  *norm2 = 0;
472  return;
473  }
474 
475  UT_StackBuffer<fpreal64> accumulators(nblocks);
476 
477  addScaledVecNorm2Internal(s, v, (fpreal64*)accumulators);
478 
479  fpreal64 result = accumulators[0];
480  for (exint i = 1; i < nblocks; ++i)
481  result += accumulators[i];
482 
483  *norm2 = result;
484 }
485 
486 template <typename T>
487 void
489  T s, const UT_VectorT<T> &v, fpreal64 *norm2, const UT_JobInfo &info)
490 {
491  exint startblock;
492  exint endblock;
493  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
494 
495  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
496 
497  for (exint block = startblock; block < endblock; ++block)
498  {
499  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
500  T val = myVector[i] + s*v.myVector[i];
501  myVector[i] = val;
502  fpreal64 result = val*val;
503  for (++i; i < blockend; ++i)
504  {
505  val = myVector[i] + s*v.myVector[i];
506  myVector[i] = val;
507  result += val*val;
508  }
509  norm2[block] = result;
510  }
511 }
512 
513 template <typename T>
514 void
516  T s, const UT_VectorT<T> &v, fpreal64 *norm2, exint normlimit)
517 {
518  UT_ASSERT_P(myNL == v.myNL && myNH == v.myNH);
519 
520  // Just in case, handle null
521  if (!norm2 || normlimit <= 0)
522  {
523  addScaledVec(s, v);
524 
525  if (norm2)
526  *norm2 = 0;
527 
528  return;
529  }
530 
531  exint nblocks = (length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
532  if (nblocks <= 0)
533  {
534  *norm2 = 0;
535  return;
536  }
537 
538  UT_StackBuffer<fpreal64> accumulators(nblocks);
539 
540  addScaledVecNorm2UpToInternal(s, v, (fpreal64*)accumulators, normlimit);
541 
542  fpreal64 result = accumulators[0];
543  for (exint i = 1; i < nblocks; ++i)
544  result += accumulators[i];
545 
546  *norm2 = result;
547 }
548 
549 template <typename T>
550 void
552  T s, const UT_VectorT<T> &v, fpreal64 *norm2, exint normlimit, const UT_JobInfo &info)
553 {
554  exint startblock;
555  exint endblock;
556  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
557 
558  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
559 
560  for (exint block = startblock; block < endblock; ++block)
561  {
562  const exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
563  const exint normmax = SYSmin(blockend, normlimit);
564  fpreal64 result = 0;
565  for (; i < normmax; ++i)
566  {
567  T val = myVector[i] + s*v.myVector[i];
568  myVector[i] = val;
569  result += val*val;
570  }
571  for (; i < blockend; ++i)
572  {
573  myVector[i] += s*v.myVector[i];
574  }
575  norm2[block] = result;
576  }
577 }
578 
579 template <typename T>
580 void
582  const UT_JobInfo &info)
583 {
584  exint i, end;
585 
586  getPartialRange(i, end, info);
587 
588 #if 0
589  for (; i < end; i++)
590  myVector[i] = s * myVector[i] + v.myVector[i];
591 #else
592  // NOTE: Compilers get confused, thinking that i or end
593  // might change underneath us, because we passed a non-const reference
594  // to them to getPartialRange, so they don't make any assumptions
595  // and have terrible performance, unless we make separate
596  // variables j and jend that are entirely local.
597  for (exint j = i, jend = end; j < jend; j++)
598  myVector[j] = s * myVector[j] + v.myVector[j];
599 #endif
600 }
601 
602 template <typename T>
603 void
605  const UT_JobInfo &info)
606 {
607  exint i, end;
608 
609  getPartialRange(i, end, info);
610 
611 #if 0
612  for (; i < end; i++)
613  myVector[i] = a.myVector[i] * b.myVector[i];
614 #else
615  // NOTE: Compilers get confused, thinking that i or end
616  // might change underneath us, because we passed a non-const reference
617  // to them to getPartialRange, so they don't make any assumptions
618  // and have terrible performance, unless we make separate
619  // variables j and jend that are entirely local.
620  for (exint j = i, jend = end; j < jend; j++)
621  myVector[j] = a.myVector[j] * b.myVector[j];
622 #endif
623 }
624 
625 template <typename T>
626 void
628  const UT_VectorT<T> &a, const UT_VectorT<T> &b,
629  fpreal64 *dot_aba, exint dotlimit)
630 {
631  UT_ASSERT_P(myNL == a.myNL && myNH == a.myNH);
632  UT_ASSERT_P(myNL == b.myNL && myNH == b.myNH);
633 
634  if (!dot_aba)
635  {
636  multAndSet(a, b);
637  return;
638  }
639 
640  exint nblocks = (length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
641  if (nblocks <= 0)
642  {
643  *dot_aba = 0;
644  return;
645  }
646 
647  UT_StackBuffer<fpreal64> accumulators(nblocks);
648 
649  multSetAndDotUpToInternal(a, b, (fpreal64*)accumulators, dotlimit);
650 
651  fpreal64 result = accumulators[0];
652  for (exint i = 1; i < nblocks; ++i)
653  result += accumulators[i];
654 
655  *dot_aba = result;
656 }
657 
658 template <typename T>
659 void
661  const UT_VectorT<T> &a, const UT_VectorT<T> &b,
662  fpreal64 *dot_aba, exint dotlimit,
663  const UT_JobInfo &info)
664 {
665  exint startblock;
666  exint endblock;
667  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
668 
669  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
670 
671  for (exint block = startblock; block < endblock; ++block)
672  {
673  const exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
674  const exint dotmax = SYSmin(blockend, dotlimit);
675  fpreal64 result = 0;
676  for (; i < dotmax; ++i)
677  {
678  T av = a.myVector[i];
679  T bv = b.myVector[i];
680  T val = av*bv;
681  myVector[i] = val;
682  result += val*av;
683  }
684  for (; i < blockend; ++i)
685  {
686  myVector[i] += a.myVector[i]*b.myVector[i];
687  }
688  dot_aba[block] = result;
689  }
690 }
691 
692 template <typename T>
693 void
695  const UT_JobInfo &info)
696 {
697  exint i, end;
698 
699  getPartialRange(i, end, info);
700 
701 #if 0
702  for (; i < end; i++)
703  myVector[i] = a.myVector[i] / b.myVector[i];
704 #else
705  // NOTE: Compilers get confused, thinking that i or end
706  // might change underneath us, because we passed a non-const reference
707  // to them to getPartialRange, so they don't make any assumptions
708  // and have terrible performance, unless we make separate
709  // variables j and jend that are entirely local.
710  for (exint j = i, jend = end; j < jend; j++)
711  myVector[j] = a.myVector[j] / b.myVector[j];
712 #endif
713 }
714 
715 template <typename T>
716 void
718  const UT_JobInfo &info)
719 {
720  exint i, end;
721 
722  getPartialRange(i, end, info);
723 
724 #if 0
725  for (; i < end; i++)
726  {
727  myVector[i] = SYSsafediv(T(1.0), a.myVector[i]);
728  }
729 #else
730  // NOTE: Compilers get confused, thinking that i or end
731  // might change underneath us, because we passed a non-const reference
732  // to them to getPartialRange, so they don't make any assumptions
733  // and have terrible performance, unless we make separate
734  // variables j and jend that are entirely local.
735  for (exint j = i, jend = end; j < jend; j++)
736  myVector[j] = SYSsafediv(T(1.0), a.myVector[j]);
737 #endif
738 }
739 
740 template <typename T>
741 void
743  const UT_JobInfo &info)
744 {
745  exint i, end;
746 
747  getPartialRange(i, end, info);
748 
749 #if 0
750  for (; i < end; i++)
751  {
752  myVector[i] = T(1.0) / a.myVector[i];
753  }
754 #else
755  // NOTE: Compilers get confused, thinking that i or end
756  // might change underneath us, because we passed a non-const reference
757  // to them to getPartialRange, so they don't make any assumptions
758  // and have terrible performance, unless we make separate
759  // variables j and jend that are entirely local.
760  for (exint j = i, jend = end; j < jend; j++)
761  myVector[j] = T(1.0) / a.myVector[j];
762 #endif
763 }
764 
765 template <typename T>
768 {
769  UT_ASSERT_P(length() == v.length());
770  copyFrom(v);
771  return *this;
772 }
773 
774 template <typename T>
775 void
777  const UT_JobInfo &info)
778 {
779  UT_ASSERT_P(length() == v.length());
780  exint i, end;
781 
782  getPartialRange(i, end, info);
783 
784  memcpy(&myVector[i], &v.myVector[i],
785  (end - i) * sizeof(T));
786 }
787 
788 template <typename T>
791 {
792  exint i;
793 
794  for (i=myNL; i<=myNH; i++)
795  myVector[i] += v.myVector[i];
796  return *this;
797 }
798 
799 template <typename T>
802 {
803  exint i;
804 
805  for (i=myNL; i<=myNH; i++)
806  myVector[i] -= v.myVector[i];
807  return *this;
808 }
809 
810 template <typename T>
813 {
814  exint i;
815 
816  for (i=myNL; i<=myNH; i++)
817  myVector[i] *= v.myVector[i];
818  return *this;
819 }
820 
821 template <typename T>
824 {
825  exint i;
826 
827  for (i=myNL; i<=myNH; i++)
828  myVector[i] /= v.myVector[i];
829  return *this;
830 }
831 
832 template <typename T>
835 {
836  exint i;
837 
838  for (i=myNL; i<=myNH; i++)
839  myVector[i] *= scalar;
840  return *this;
841 }
842 
843 
844 template <typename T>
847 {
848  exint i;
849 
850  scalar = 1.0F / scalar;
851  for (i=myNL; i<=myNH; i++)
852  myVector[i] *= scalar;
853  return *this;
854 }
855 
856 
857 template <typename T>
858 T
860 {
861  UT_ASSERT_P(myNL == v.myNL && myNH == v.myNH);
862 
863  exint nblocks = (length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
864  if (nblocks <= 0)
865  return 0;
866 
867  UT_StackBuffer<fpreal64> accumulators(nblocks);
868 
869  dotInternal((fpreal64*)accumulators, v);
870 
871  fpreal64 result = accumulators[0];
872  for (exint i = 1; i < nblocks; ++i)
873  result += accumulators[i];
874 
875  return result;
876 }
877 
878 template <typename T>
879 void
880 UT_VectorT<T>::dotInternalPartial(fpreal64 *output, const UT_VectorT<T> &v, const UT_JobInfo &info) const
881 {
882  exint startblock;
883  exint endblock;
884  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
885 
886  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
887 
888  for (exint block = startblock; block < endblock; ++block)
889  {
890  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
891  fpreal64 result = myVector[i]*v.myVector[i];
892  for (++i; i < blockend; ++i)
893  {
894  result += myVector[i]*v.myVector[i];
895  }
896  output[block] = result;
897  }
898 }
899 
900 template <typename T>
901 std::ostream &
902 UT_VectorT<T>::save(std::ostream &os) const
903 {
904  exint i;
905 
906  for (i=myNL; i<=myNH; i++)
907  os << i << ": " << myVector[i] << "\n";
908 
909  return os;
910 }
911 
912 template <typename T>
913 bool
915 {
916  if (getNL() != v.getNL() || getNH() != v.getNH())
917  return false;
918  for (exint i = getNL(); i < getNH(); i++)
919  {
920  if (!SYSalmostEqual((*this)(i), v(i), ulps))
921  return false;
922  }
923  return true;
924 }
925 
926 template <typename T>
927 bool
929 {
930  for (exint i = myNL; i <= myNH; i++)
931  {
932  if (SYSisNan(myVector[i]))
933  return true;
934  }
935  return false;
936 }
937 
938 template <typename T>
939 void
941 {
943  {
944  std::cerr << "NAN found in UT_VectorT\n";
945  UT_ASSERT(0);
946  }
947 }
948 
949 //////////////////////////////
950 // UT_Vector Specializations
951 //////////////////////////////
952 
953 #if 1
954 
955 template <>
956 inline void
958 {
959  exint i, end;
960 
961  getPartialRange(i, end, info);
962 
963  VM_Math::negate(&myVector[i], &myVector[i], end-i);
964 }
965 
966 template <>
967 inline void
969 {
970  exint i, end;
971 
972  getPartialRange(i, end, info);
973 
974  VM_Math::scaleoffset(&myVector[i], -1.0F, &v.myVector[i], end-i);
975 }
976 
977 template <>
978 inline void
980  const UT_VectorT<fpreal32> &v,
981  const UT_JobInfo &info)
982 {
983  exint i, end;
984 
985  getPartialRange(i, end, info);
986 
987  VM_Math::madd(&myVector[i], &v.myVector[i], s, end-i);
988 }
989 
990 template <>
991 inline void
993  fpreal32 s,
994  const UT_VectorT<fpreal32> &v,
995  fpreal64 *norm2,
996  const UT_JobInfo &info)
997 {
998  exint startblock;
999  exint endblock;
1000  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
1001 
1002  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
1003 
1004  for (exint block = startblock; block < endblock; ++block)
1005  {
1006  const exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1007 
1008  fpreal64 result = VM_Math::maddAndNorm(&myVector[i], &v.myVector[i], s, blockend-i);
1009 
1010  norm2[block] = result;
1011 
1012  i = blockend;
1013  }
1014 }
1015 
1016 template <>
1017 inline void
1019  fpreal32 s,
1020  const UT_VectorT<fpreal32> &v,
1021  fpreal64 *norm2,
1022  exint normlimit,
1023  const UT_JobInfo &info)
1024 {
1025  exint startblock;
1026  exint endblock;
1027  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
1028 
1029  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
1030 
1031  for (exint block = startblock; block < endblock; ++block)
1032  {
1033  const exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1034 
1035  fpreal64 result = 0;
1036  if (i < normlimit)
1037  {
1038  const exint normmax = SYSmin(blockend, normlimit);
1039  result = VM_Math::maddAndNorm(&myVector[i], &v.myVector[i], s, normmax-i);
1040 
1041  i = normmax;
1042  if (i < blockend)
1043  VM_Math::madd(&myVector[i], &v.myVector[i], s, blockend-i);
1044  }
1045  else
1046  VM_Math::madd(&myVector[i], &v.myVector[i], s, blockend-i);
1047 
1048  norm2[block] = result;
1049 
1050  i = blockend;
1051  }
1052 }
1053 
1054 template <>
1055 inline void
1057  const UT_VectorT<fpreal32> &v,
1058  const UT_JobInfo &info)
1059 {
1060  exint i, end;
1061 
1062  getPartialRange(i, end, info);
1063 
1064  VM_Math::scaleoffset(&myVector[i], s, &v.myVector[i], end-i);
1065 }
1066 
1067 
1068 template <>
1069 inline void
1071  const UT_VectorT<fpreal32> &b,
1072  const UT_JobInfo &info)
1073 {
1074  exint i, end;
1075 
1076  getPartialRange(i, end, info);
1077 
1078  VM_Math::mul(&myVector[i], &a.myVector[i], &b.myVector[i], end-i);
1079 }
1080 
1081 template <>
1082 inline void
1084  const UT_VectorT<fpreal32> &a,
1085  const UT_VectorT<fpreal32> &b,
1086  fpreal64 *dot_aba, exint dotlimit,
1087  const UT_JobInfo &info)
1088 {
1089  exint startblock;
1090  exint endblock;
1091  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
1092 
1093  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
1094 
1095  for (exint block = startblock; block < endblock; ++block)
1096  {
1097  const exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1098 
1099  fpreal64 result = 0;
1100  if (i < dotlimit)
1101  {
1102  const exint dotmax = SYSmin(blockend, dotlimit);
1103 
1104  result = VM_Math::mulAndDotDA(&myVector[i], &a.myVector[i], &b.myVector[i], dotmax-i);
1105 
1106  i = dotmax;
1107  if (i < blockend)
1108  VM_Math::mul(&myVector[i], &a.myVector[i], &b.myVector[i], blockend-i);
1109  }
1110  else
1111  VM_Math::mul(&myVector[i], &a.myVector[i], &b.myVector[i], blockend-i);
1112 
1113  dot_aba[block] = result;
1114 
1115  i = blockend;
1116  }
1117 }
1118 
1119 template <>
1120 inline void
1122  const UT_VectorT<fpreal32> &b,
1123  const UT_JobInfo &info)
1124 {
1125  exint i, end;
1126 
1127  getPartialRange(i, end, info);
1128 
1129  VM_Math::div(&myVector[i], &a.myVector[i], &b.myVector[i], end-i);
1130 }
1131 
1132 template <>
1133 inline void
1135  const UT_JobInfo &info)
1136 {
1137  exint i, end;
1138 
1139  getPartialRange(i, end, info);
1140 
1141  VM_Math::safediv(&myVector[i], 1.0f, &a.myVector[i], end-i);
1142 }
1143 
1144 template <>
1145 inline void
1147  const UT_JobInfo &info)
1148 {
1149  exint i, end;
1150 
1151  getPartialRange(i, end, info);
1152 
1153  VM_Math::div(&myVector[i], 1.0f, &a.myVector[i], end-i);
1154 }
1155 
1156 template <>
1157 inline UT_VectorT<fpreal32> &
1159 {
1160  VM_Math::add(&myVector[myNL], &myVector[myNL], &v.myVector[myNL], myNH-myNL+1);
1161  return *this;
1162 }
1163 
1164 template <>
1165 inline UT_VectorT<fpreal32> &
1167 {
1168  VM_Math::sub(&myVector[myNL], &myVector[myNL], &v.myVector[myNL], myNH-myNL+1);
1169  return *this;
1170 }
1171 
1172 template <>
1173 inline UT_VectorT<fpreal32> &
1175 {
1176  VM_Math::mul(&myVector[myNL], &myVector[myNL], &v.myVector[myNL], myNH-myNL+1);
1177  return *this;
1178 }
1179 
1180 template <>
1181 inline UT_VectorT<fpreal32> &
1183 {
1184  VM_Math::div(&myVector[myNL], &myVector[myNL], &v.myVector[myNL], myNH-myNL+1);
1185  return *this;
1186 }
1187 
1188 template <>
1189 inline UT_VectorT<fpreal32> &
1191 {
1192  VM_Math::mul(&myVector[myNL], &myVector[myNL], scalar, myNH-myNL+1);
1193  return *this;
1194 }
1195 
1196 template <>
1197 inline UT_VectorT<fpreal32> &
1199 {
1200  VM_Math::div(&myVector[myNL], &myVector[myNL], scalar, myNH-myNL+1);
1201  return *this;
1202 }
1203 
1204 template <>
1205 inline void
1207 {
1208  exint startblock;
1209  exint endblock;
1210  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
1211 
1212  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
1213 
1214  for (exint block = startblock; block < endblock; ++block)
1215  {
1216  const exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1217 
1218  fpreal64 result = VM_Math::dot(&myVector[i], &v.myVector[i], blockend-i);
1219 
1220  output[block] = result;
1221 
1222  i = blockend;
1223  }
1224 }
1225 
1226 template <>
1227 inline void
1229  fpreal64 *output, int type,
1230  const UT_JobInfo &info) const
1231 {
1232  exint startblock;
1233  exint endblock;
1234  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
1235 
1236  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
1237 
1238  if (type == 0)
1239  {
1240  for (exint block = startblock; block < endblock; ++block)
1241  {
1242  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1243  fpreal32 result = SYSabs(myVector[i]);
1244  for (++i; i < blockend; ++i)
1245  {
1246  fpreal32 v = SYSabs(myVector[i]);
1247  if (v > result)
1248  result = v;
1249  }
1250  output[block] = result;
1251  }
1252  }
1253  else if (type == 1)
1254  {
1255  for (exint block = startblock; block < endblock; ++block)
1256  {
1257  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1258  fpreal64 result = SYSabs(myVector[i]);
1259  for (++i; i < blockend; ++i)
1260  {
1261  result += SYSabs(myVector[i]);
1262  }
1263  output[block] = result;
1264  }
1265  }
1266  else // L2-norm
1267  {
1268  for (exint block = startblock; block < endblock; ++block)
1269  {
1270  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1271 
1272  fpreal64 result = VM_Math::dot(&myVector[i], &myVector[i], blockend-i);
1273 
1274  output[block] = result;
1275 
1276  i = blockend;
1277  }
1278  }
1279 }
1280 #endif
1281 
1282 template <typename V> inline size_t
1283 format(char *buffer, size_t bufsize, const UT_VectorT<V> &v)
1284 {
1286  for (int c = v.getNL(); c <= v.getNH(); c++) {
1287  if (c != v.getNL())
1288  row << ", ";
1289  UTformat(row, "{}", v(c));
1290  }
1291 
1292  UT::Format::Writer writer(buffer, bufsize);
1294  return f.format(writer, "[{}]", {row.str()});
1295 }
1296 
1297 // Explicit instantiation for format
1298 #define VECTOR_INSTANTIATE_FMT(T) \
1299  template UT_API size_t format<T>(char*, size_t, const UT_VectorT<T>&);
1302 
1303 //////////////////////////
1304 // UT_Permutation class
1305 //////////////////////////
1306 
1307 
1308 template <typename T>
1310 {
1311  myNL = nl;
1312  myNH = nh;
1313  myVector = (T *)malloc((nh - nl + 1)*sizeof(T));
1314  myVector = myVector - nl;
1315 }
1316 
1317 template <typename T>
1319 {
1320  free(myVector + myNL);
1321 }
1322 
1323 template <typename T>
1325 {
1326  clone(p);
1327 }
1328 
1329 template <typename T>
1332 {
1333  if (&p != this)
1334  {
1335  free(myVector + myNL);
1336  clone(p);
1337  }
1338  return *this;
1339 }
1340 
1341 template <typename T>
1342 void
1344 {
1345  UT_ASSERT(&p != this);
1346  myNL = p.myNL;
1347  myNH = p.myNH;
1348  myVector = (T *)malloc((myNH - myNL + 1)*sizeof(T));
1349  myVector = myVector - myNL;
1350 
1351  memcpy(myVector + myNL, p.myVector + myNL, (myNH - myNL + 1)*sizeof(T));
1352 }
1353 
1354 template <typename T>
1355 void
1357 {
1358  memset(myVector + myNL, 0, (myNH - myNL + 1)*sizeof(T));
1359 }
1360 
1361 template <typename T>
1362 void
1364 {
1365  exint diff = myNL-nl;
1366 
1367  myNL = nl;
1368  myNH -= diff;
1369  myVector += diff;
1370 }
1371 
1372 
UT_PermutationT< T > & operator=(const UT_PermutationT< T > &p)
Definition: UT_Vector.C:1331
bool hasNan() const
Definition: UT_Vector.C:928
void zero()
Definition: UT_Vector.h:58
void getPartialRange(exint &start, exint &end, const UT_JobInfo &info) const
Determines the [start,end) interval that should be worked on.
Definition: UT_Vector.C:139
GA_API const UT_StringHolder div
GLenum GLuint GLsizei bufsize
Definition: glcorearb.h:1817
size_t format(W &writer, const char *format, std::initializer_list< ArgValue > args)
void negPartial(const UT_JobInfo &info)
Negate.
Definition: UT_Vector.C:389
#define VECTOR_INSTANTIATE_FMT(T)
Definition: UT_Vector.C:1298
T & z(void)
Definition: UT_Vector4.h:379
void divideWork(int units, int &start, int &end) const
void init(exint nl, exint nh)
Initialize nl, nh and allocate space.
Definition: UT_Vector.C:110
UT_VectorT & operator+=(const UT_VectorT< T > &v)
Definition: UT_Vector.C:790
const GLdouble * v
Definition: glcorearb.h:836
GLuint start
Definition: glcorearb.h:474
exint getNH() const
Get the high index.
Definition: UT_Vector.h:76
T & x(void)
Definition: UT_Vector2.h:285
GLboolean GLboolean GLboolean GLboolean a
Definition: glcorearb.h:1221
void getPartialBlockRange(exint &startblock, exint &endblock, const exint blocksize, const UT_JobInfo &info) const
Definition: UT_Vector.C:149
bool SYSisNan(const F f)
Definition: SYS_Math.h:173
#define SYSabs(a)
Definition: SYS_Math.h:1369
void addScaledVecNorm2(T s, const UT_VectorT< T > &v, fpreal64 *norm2)
Add scaled vector and compute the squared L2 norm.
Definition: UT_Vector.C:456
An output stream object that owns its own string buffer storage.
const UT_WorkBuffer & str() const
Returns a read-only reference to the underlying UT_WorkBuffer.
SYS_FORCE_INLINE T & x(void)
Definition: UT_Vector3.h:498
T dot(const UT_VectorT< T > &v) const
Definition: UT_Vector.C:859
void multSetAndDotUpTo(const UT_VectorT< T > &a, const UT_VectorT< T > &b, fpreal64 *dot_aba, exint dotlimit)
Definition: UT_Vector.C:627
GLuint buffer
Definition: glcorearb.h:659
png_uint_32 i
Definition: png.h:2877
void testForNan() const
Definition: UT_Vector.C:940
void addScaledVecNorm2UpTo(T s, const UT_VectorT< T > &v, fpreal64 *norm2, exint normlimit)
Definition: UT_Vector.C:515
SYS_FORCE_INLINE T & z(void)
Definition: UT_Vector3.h:502
~UT_VectorT()
Definition: UT_Vector.C:56
void changeNL(exint nl)
Change the low index, and the high index will adjust itself.
Definition: UT_Vector.C:220
long long int64
Definition: SYS_Types.h:107
GLdouble n
Definition: glcorearb.h:2007
GLfloat f
Definition: glcorearb.h:1925
UT_PermutationT(exint nl, exint nh)
Definition: UT_Vector.C:1309
UT_VectorT & operator/=(const UT_VectorT< T > &v)
Definition: UT_Vector.C:823
T norm(int type=2) const
Definition: UT_Vector.C:231
int64 exint
Definition: SYS_Types.h:116
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:125
double fpreal64
Definition: SYS_Types.h:192
UT_VectorT()
Input the index range [nl..nh].
Definition: UT_Vector.h:38
fpreal64 dot(const CE_VectorT< T > &a, const CE_VectorT< T > &b)
Definition: CE_Vector.h:218
GLuint GLuint end
Definition: glcorearb.h:474
std::ostream & save(std::ostream &os) const
Output.
Definition: UT_Vector.C:902
void getSubvector4(UT_Vector4 &v, exint idx) const
Definition: UT_Vector.C:200
T distance2(const UT_VectorT< T > &v) const
Definition: UT_Vector.C:343
void changeNL(exint nl)
Definition: UT_Vector.C:1363
size_t format(char *buffer, size_t bufsize, const UT_VectorT< V > &v)
Definition: UT_Vector.C:1283
UT_VectorT & operator-=(const UT_VectorT< T > &v)
Definition: UT_Vector.C:801
T norm2() const
Square of L2-norm.
GLfloat units
Definition: glcorearb.h:407
GLboolean * data
Definition: glcorearb.h:130
png_bytepp row
Definition: png.h:1836
T & y(void)
Definition: UT_Vector4.h:377
GLboolean GLboolean GLboolean b
Definition: glcorearb.h:1221
void setSubvector3(exint idx, const UT_Vector3 &v)
Definition: UT_Vector.C:191
static int getInt(UT_IntControl i)
void getSubvector2(UT_Vector2 &v, exint idx) const
Definition: UT_Vector.C:166
GLenum GLenum dst
Definition: glcorearb.h:1792
SYS_FORCE_INLINE T & y(void)
Definition: UT_Vector3.h:500
void setSubvector4(exint idx, const UT_Vector4 &v)
Definition: UT_Vector.C:210
void setSubvector2(exint idx, const UT_Vector2 &v)
Definition: UT_Vector.C:174
void getSubvector3(UT_Vector3 &v, exint idx) const
Definition: UT_Vector.C:182
GLuint GLfloat * val
Definition: glcorearb.h:1607
Type-safe formatting, modeled on the Python str.format function.
size_t UTformat(FILE *file, const char *format, const Args &...args)
GLint GLint GLsizei GLint GLenum GLenum type
Definition: glcorearb.h:107
void subvector(const UT_VectorT< T > &v, exint nl, exint nh)
Steal from another vector, resulting vector has origin at 1.
Definition: UT_Vector.C:64
T & x(void)
Definition: UT_Vector4.h:375
T & y(void)
Definition: UT_Vector2.h:287
#define UT_ASSERT(ZZ)
Definition: UT_Assert.h:126
void assign(const fpreal32 *data, exint nl, exint nh)
Definition: UT_Vector.C:94
UT_VectorT & operator=(const UT_VectorT< T > &v)
Definition: UT_Vector.C:767
UT_VectorT & operator*=(const UT_VectorT< T > &v)
Componentwise multiplication & division.
Definition: UT_Vector.C:812
T & w(void)
Definition: UT_Vector4.h:381
exint length() const
Get dimension of this vector.
Definition: UT_Vector.h:79
#define SYSmin(a, b)
Definition: SYS_Math.h:1368
float fpreal32
Definition: SYS_Types.h:191
bool isEqual(const UT_VectorT< T > &v, int64 ulps)
Definition: UT_Vector.C:914
exint getNL() const
Get the low index.
Definition: UT_Vector.h:73
GLuint GLsizei GLsizei * length
Definition: glcorearb.h:794