HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_Vector.C
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: Vector of arbitrary size (C++)
7  *
8  * COMMENTS:
9  *
10  */
11 
12 #ifdef WIN32
13  #include <memory.h>
14 #else
15 #ifdef LINUX
16  #include <string.h>
17 #endif
18 #endif
19 
20 #include <iostream>
21 
22 #include <SYS/SYS_Align.h>
23 #include <VM/VM_Math.h>
24 
25 #include <SYS/SYS_Math.h>
26 #include "UT_Vector.h"
27 
28 #include "UT_Vector2.h"
29 #include "UT_Vector3.h"
30 #include "UT_Vector4.h"
31 #include "UT_EnvControl.h"
32 #include "UT_Format.h"
33 #include "UT_StringStream.h"
34 
35 template <typename T>
37 {
38  myVector = 0;
39  myOwnData = 0;
40  init(nl, nh);
41 }
42 
43 template <typename T>
45 {
46  myVector = 0;
47  myOwnData = 0;
48  init(v.myNL, v.myNH);
49  copyFrom(v);
50 }
51 
52 template <typename T>
54 {
55  if (myOwnData && myVector)
56  free(myVector + myNL);
57 }
58 
59 template <typename T>
60 void
62 {
63  exint n = nh - nl + 1;
64 
65  if (myOwnData && myVector)
66  free(myVector + myNL);
67  myOwnData = 0;
68  myNL = 1;
69  myNH = n;
70  myVector = &v.myVector[nl] - 1;
71 }
72 
73 // Assign from a float array into the given subvector
74 template <typename T, typename S>
75 static void
76 utAssign(const T *data, S *vec, exint nl, exint nh)
77 {
78  const exint n = nh - nl + 1;
79  S *dst;
80  exint i;
81 
82  // assign the data, converting from single to double precision
83  // NB: this may be vectorized into VM on x86 using the CVTPS2PD instr.
84  dst = vec + nl;
85  for (i = 0; i < n; i++)
86  *dst++ = *data++;
87 }
88 
89 template <typename T>
90 void
92 {
93  UT_ASSERT_P(nl >= myNL && nh <= myNH && (nh - nl + 1) >= 0);
94  utAssign(data, myVector, nl, nh);
95 }
96 
97 template <typename T>
98 void
100 {
101  UT_ASSERT_P(nl >= myNL && nh <= myNH && (nh - nl + 1) >= 0);
102  utAssign(data, myVector, nl, nh);
103 }
104 
105 template <typename T>
106 void
108 {
109  UT_ASSERT((nh - nl + 1) >= 0);
110 
111  if (myVector && myOwnData)
112  {
113  if (nh-nl > myNH-myNL)
114  {
115  myVector = (T *)realloc(myVector + myNL, (nh - nl + 1)*sizeof(T));
116  myVector = myVector - nl;
117  }
118  else
119  {
120  myVector = myVector + myNL - nl;
121  }
122  }
123  else
124  {
125  myVector = (T *)malloc((nh - nl + 1)*sizeof(T));
126  myVector = myVector - nl;
127  myOwnData = 1;
128  }
129 
130  myNL = nl;
131  myNH = nh;
132 }
133 
134 template <typename T>
135 void
137 {
138  exint units = length();
139  info.divideWork(units, start, end);
140  start += getNL();
141  end += getNL();
142 }
143 
144 template <typename T>
145 void
147  exint &start, exint &end, const exint blocksize, const UT_JobInfo &info) const
148 {
149  exint units = (length() + blocksize-1)/blocksize;
150  info.divideWork(units, start, end);
151 }
152 
153 template <typename T>
154 void
156 {
157  UT_ASSERT_P(nl >= myNL && nh <= myNH && (nh - nl + 1) >= 0);
158 
159  exint start, end;
160  info.divideWork(nh - nl + 1, start, end);
161  start += nl;
162  end += nl;
163  memset(myVector + start, 0, (end - start) * sizeof(T));
164 }
165 
166 template <typename T>
167 void
169  const UT_JobInfo &info)
170 {
171  UT_ASSERT_P(nl >= myNL && nh <= myNH && (nh - nl + 1) >= 0);
172 
173  exint start, end;
174  info.divideWork(nh - nl + 1, start, end);
175  start += nl;
176  end += nl;
177 
178  for(exint i = start; i < end; i++)
179  myVector[i] = c;
180 }
181 
182 template <>
183 void
185 {
186  if(c == 0)
187  zero(nl, nh);
188  else
189  VM_Math::set(myVector + nl, c, nh - nl + 1);
190 }
191 template <>
192 void
194 {
195  if(c == 0)
196  zero(nl, nh);
197  else
198  VM_Math::set(myVector + nl, c, nh - nl + 1);
199 }
200 
201 template <typename T>
202 void
204 {
205  if(c == 0)
206  zero(nl, nh);
207  else
208  constantInternal(nl, nh, c);
209 }
210 
211 template <typename T>
212 void
214 {
215  v.x() = (*this)(idx);
216  v.y() = (*this)(idx+1);
217 }
218 
219 template <typename T>
220 void
222 {
223  (*this)(idx) = v.x();
224  (*this)(idx+1) = v.y();
225 }
226 
227 template <typename T>
228 void
230 {
231  v.x() = (*this)(idx);
232  v.y() = (*this)(idx+1);
233  v.z() = (*this)(idx+2);
234 }
235 
236 template <typename T>
237 void
239 {
240  (*this)(idx) = v.x();
241  (*this)(idx+1) = v.y();
242  (*this)(idx+2) = v.z();
243 }
244 
245 template <typename T>
246 void
248 {
249  v.x() = (*this)(idx);
250  v.y() = (*this)(idx+1);
251  v.z() = (*this)(idx+2);
252  v.w() = (*this)(idx+3);
253 }
254 
255 template <typename T>
256 void
258 {
259  (*this)(idx) = v.x();
260  (*this)(idx+1) = v.y();
261  (*this)(idx+2) = v.z();
262  (*this)(idx+3) = v.w();
263 }
264 
265 template <typename T>
266 void
268 {
269  exint diff = myNL-nl;
270 
271  myNL = nl;
272  myNH -= diff;
273  myVector += diff;
274 }
275 
276 template <typename T>
277 T
279 {
280  exint nblocks = (length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
281  if (nblocks <= 0)
282  return 0;
283 
284  UT_StackBuffer<fpreal64> accumulators(nblocks);
285 
286  normInternal((fpreal64*)accumulators, type);
287 
288  fpreal64 result = accumulators[0];
289 
290  if (type == 0)
291  {
292  for (exint i = 1; i < nblocks; ++i)
293  {
294  fpreal64 v = accumulators[i];
295  if (v > result)
296  result = v;
297  }
298  }
299  else
300  {
301  for (exint i = 1; i < nblocks; ++i)
302  result += accumulators[i];
303 
304  // normalize result.
305  if (type == 2) // L2 norm
306  result = SYSsqrt(result);
307  }
308 
309  return result;
310 }
311 
312 
313 template <typename T>
314 T
315 UT_VectorT<T>::norm2() const
316 {
317  exint nblocks = (length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
318  if (nblocks <= 0)
319  return 0;
320 
321  UT_StackBuffer<fpreal64> accumulators(nblocks);
322 
323  normInternal((fpreal64*)accumulators, 2);
324 
325  fpreal64 result = accumulators[0];
326  for (exint i = 1; i < nblocks; ++i)
327  result += accumulators[i];
328 
329  return result;
330 }
331 
332 template <typename T>
333 void
335  fpreal64 *output, int type, const UT_JobInfo &info) const
336 {
337  exint startblock;
338  exint endblock;
339  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
340 
341  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
342 
343  if (type == 0)
344  {
345  for (exint block = startblock; block < endblock; ++block)
346  {
347  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
348  fpreal64 result = SYSabs(myVector[i]);
349  for (++i; i < blockend; ++i)
350  {
351  T v = SYSabs(myVector[i]);
352  if (v > result)
353  result = v;
354  }
355  output[block] = result;
356  }
357  }
358  else if (type == 1)
359  {
360  for (exint block = startblock; block < endblock; ++block)
361  {
362  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
363  fpreal64 result = SYSabs(myVector[i]);
364  for (++i; i < blockend; ++i)
365  {
366  result += SYSabs(myVector[i]);
367  }
368  output[block] = result;
369  }
370  }
371  else // L2-norm
372  {
373  for (exint block = startblock; block < endblock; ++block)
374  {
375  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
376  T v = myVector[i];
377  fpreal64 result = v*v;
378  for (++i; i < blockend; ++i)
379  {
380  v = myVector[i];
381  result += v*v;
382  }
383  output[block] = result;
384  }
385  }
386 }
387 
388 template <typename T>
389 T
391 {
392  UT_ASSERT_P(myNL == v.myNL && myNH == v.myNH);
393 
394  exint nblocks = (length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
395  if (nblocks <= 0)
396  return 0;
397 
398  UT_StackBuffer<fpreal64> accumulators(nblocks);
399 
400  distance2Internal((fpreal64*)accumulators, v);
401 
402  fpreal64 result = accumulators[0];
403  for (exint i = 1; i < nblocks; ++i)
404  result += accumulators[i];
405 
406  return result;
407 }
408 
409 template <typename T>
410 void
412  fpreal64 *output, const UT_VectorT<T> &v, const UT_JobInfo &info) const
413 {
414  exint startblock;
415  exint endblock;
416  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
417 
418  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
419 
420  for (exint block = startblock; block < endblock; ++block)
421  {
422  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
423  fpreal64 diff = myVector[i] - v.myVector[i];
424  fpreal64 result = diff*diff;
425  for (++i; i < blockend; ++i)
426  {
427  diff = myVector[i] - v.myVector[i];
428  result += diff*diff;
429  }
430  output[block] = result;
431  }
432 }
433 
434 template <typename T>
435 void
437 {
438  exint i, end;
439 
440  getPartialRange(i, end, info);
441 
442 #if 0
443  for (; i < end; i++)
444  myVector[i] = -myVector[i];
445 #else
446  // NOTE: Compilers get confused, thinking that i or end
447  // might change underneath us, because we passed a non-const reference
448  // to them to getPartialRange, so they don't make any assumptions
449  // and have terrible performance, unless we make separate
450  // variables j and jend that are entirely local.
451  for (exint j = i, jend = end; j < jend; j++)
452  myVector[j] = -myVector[j];
453 #endif
454 }
455 
456 template <typename T>
457 void
459 {
460  exint i, end;
461 
462  getPartialRange(i, end, info);
463 
464 #if 0
465  for (; i < end; i++)
466  myVector[i] = v.myVector[i] - myVector[i];
467 #else
468  // NOTE: Compilers get confused, thinking that i or end
469  // might change underneath us, because we passed a non-const reference
470  // to them to getPartialRange, so they don't make any assumptions
471  // and have terrible performance, unless we make separate
472  // variables j and jend that are entirely local.
473  for (exint j = i, jend = end; j < jend; j++)
474  myVector[j] = v.myVector[j] - myVector[j];
475 #endif
476 }
477 
478 template <typename T>
479 void
481  const UT_JobInfo &info)
482 {
483  exint i, end;
484 
485  getPartialRange(i, end, info);
486 
487 #if 0
488  for (; i < end; i++)
489  myVector[i] += s * v.myVector[i];
490 #else
491  // NOTE: Compilers get confused, thinking that i or end
492  // might change underneath us, because we passed a non-const reference
493  // to them to getPartialRange, so they don't make any assumptions
494  // and have terrible performance (2.4x slower for this function),
495  // unless we make separate variables j and jend that are entirely local.
496  for (exint j = i, jend = end; j < jend; j++)
497  myVector[j] += s * v.myVector[j];
498 #endif
499 }
500 
501 template <typename T>
502 void
504  T s, const UT_VectorT<T> &v, fpreal64 *norm2)
505 {
506  UT_ASSERT_P(myNL == v.myNL && myNH == v.myNH);
507 
508  // Just in case, handle null
509  if (!norm2)
510  {
511  addScaledVec(s, v);
512  return;
513  }
514 
515  exint nblocks = (length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
516  if (nblocks <= 0)
517  {
518  *norm2 = 0;
519  return;
520  }
521 
522  UT_StackBuffer<fpreal64> accumulators(nblocks);
523 
524  addScaledVecNorm2Internal(s, v, (fpreal64*)accumulators);
525 
526  fpreal64 result = accumulators[0];
527  for (exint i = 1; i < nblocks; ++i)
528  result += accumulators[i];
529 
530  *norm2 = result;
531 }
532 
533 template <typename T>
534 void
536  T s, const UT_VectorT<T> &v, fpreal64 *norm2, const UT_JobInfo &info)
537 {
538  exint startblock;
539  exint endblock;
540  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
541 
542  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
543 
544  for (exint block = startblock; block < endblock; ++block)
545  {
546  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
547  T val = myVector[i] + s*v.myVector[i];
548  myVector[i] = val;
549  fpreal64 result = val*val;
550  for (++i; i < blockend; ++i)
551  {
552  val = myVector[i] + s*v.myVector[i];
553  myVector[i] = val;
554  result += val*val;
555  }
556  norm2[block] = result;
557  }
558 }
559 
560 template <typename T>
561 void
563  T s, const UT_VectorT<T> &v, fpreal64 *norm2, exint normlimit)
564 {
565  UT_ASSERT_P(myNL == v.myNL && myNH == v.myNH);
566 
567  // Just in case, handle null
568  if (!norm2 || normlimit <= 0)
569  {
570  addScaledVec(s, v);
571 
572  if (norm2)
573  *norm2 = 0;
574 
575  return;
576  }
577 
578  exint nblocks = (length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
579  if (nblocks <= 0)
580  {
581  *norm2 = 0;
582  return;
583  }
584 
585  UT_StackBuffer<fpreal64> accumulators(nblocks);
586 
587  addScaledVecNorm2UpToInternal(s, v, (fpreal64*)accumulators, normlimit);
588 
589  fpreal64 result = accumulators[0];
590  for (exint i = 1; i < nblocks; ++i)
591  result += accumulators[i];
592 
593  *norm2 = result;
594 }
595 
596 template <typename T>
597 void
599  T s, const UT_VectorT<T> &v, fpreal64 *norm2, exint normlimit, const UT_JobInfo &info)
600 {
601  exint startblock;
602  exint endblock;
603  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
604 
605  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
606 
607  for (exint block = startblock; block < endblock; ++block)
608  {
609  const exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
610  const exint normmax = SYSmin(blockend, normlimit);
611  fpreal64 result = 0;
612  for (; i < normmax; ++i)
613  {
614  T val = myVector[i] + s*v.myVector[i];
615  myVector[i] = val;
616  result += val*val;
617  }
618  for (; i < blockend; ++i)
619  {
620  myVector[i] += s*v.myVector[i];
621  }
622  norm2[block] = result;
623  }
624 }
625 
626 template <typename T>
627 void
629  const UT_JobInfo &info)
630 {
631  exint i, end;
632 
633  getPartialRange(i, end, info);
634 
635 #if 0
636  for (; i < end; i++)
637  myVector[i] = s * myVector[i] + v.myVector[i];
638 #else
639  // NOTE: Compilers get confused, thinking that i or end
640  // might change underneath us, because we passed a non-const reference
641  // to them to getPartialRange, so they don't make any assumptions
642  // and have terrible performance, unless we make separate
643  // variables j and jend that are entirely local.
644  for (exint j = i, jend = end; j < jend; j++)
645  myVector[j] = s * myVector[j] + v.myVector[j];
646 #endif
647 }
648 
649 template <typename T>
650 void
652  const UT_JobInfo &info)
653 {
654  exint i, end;
655 
656  getPartialRange(i, end, info);
657 
658 #if 0
659  for (; i < end; i++)
660  myVector[i] = a.myVector[i] * b.myVector[i];
661 #else
662  // NOTE: Compilers get confused, thinking that i or end
663  // might change underneath us, because we passed a non-const reference
664  // to them to getPartialRange, so they don't make any assumptions
665  // and have terrible performance, unless we make separate
666  // variables j and jend that are entirely local.
667  for (exint j = i, jend = end; j < jend; j++)
668  myVector[j] = a.myVector[j] * b.myVector[j];
669 #endif
670 }
671 
672 template <typename T>
673 void
675  const UT_VectorT<T> &a, const UT_VectorT<T> &b,
676  fpreal64 *dot_aba, exint dotlimit)
677 {
678  UT_ASSERT_P(myNL == a.myNL && myNH == a.myNH);
679  UT_ASSERT_P(myNL == b.myNL && myNH == b.myNH);
680 
681  if (!dot_aba)
682  {
683  multAndSet(a, b);
684  return;
685  }
686 
687  exint nblocks = (length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
688  if (nblocks <= 0)
689  {
690  *dot_aba = 0;
691  return;
692  }
693 
694  UT_StackBuffer<fpreal64> accumulators(nblocks);
695 
696  multSetAndDotUpToInternal(a, b, (fpreal64*)accumulators, dotlimit);
697 
698  fpreal64 result = accumulators[0];
699  for (exint i = 1; i < nblocks; ++i)
700  result += accumulators[i];
701 
702  *dot_aba = result;
703 }
704 
705 template <typename T>
706 void
708  const UT_VectorT<T> &a, const UT_VectorT<T> &b,
709  fpreal64 *dot_aba, exint dotlimit,
710  const UT_JobInfo &info)
711 {
712  exint startblock;
713  exint endblock;
714  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
715 
716  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
717 
718  for (exint block = startblock; block < endblock; ++block)
719  {
720  const exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
721  const exint dotmax = SYSmin(blockend, dotlimit);
722  fpreal64 result = 0;
723  for (; i < dotmax; ++i)
724  {
725  T av = a.myVector[i];
726  T bv = b.myVector[i];
727  T val = av*bv;
728  myVector[i] = val;
729  result += val*av;
730  }
731  for (; i < blockend; ++i)
732  {
733  myVector[i] += a.myVector[i]*b.myVector[i];
734  }
735  dot_aba[block] = result;
736  }
737 }
738 
739 template <typename T>
740 void
742  const UT_JobInfo &info)
743 {
744  exint i, end;
745 
746  getPartialRange(i, end, info);
747 
748 #if 0
749  for (; i < end; i++)
750  myVector[i] = a.myVector[i] / b.myVector[i];
751 #else
752  // NOTE: Compilers get confused, thinking that i or end
753  // might change underneath us, because we passed a non-const reference
754  // to them to getPartialRange, so they don't make any assumptions
755  // and have terrible performance, unless we make separate
756  // variables j and jend that are entirely local.
757  for (exint j = i, jend = end; j < jend; j++)
758  myVector[j] = a.myVector[j] / b.myVector[j];
759 #endif
760 }
761 
762 template <typename T>
763 void
765  const UT_JobInfo &info)
766 {
767  exint i, end;
768 
769  getPartialRange(i, end, info);
770 
771 #if 0
772  for (; i < end; i++)
773  {
774  myVector[i] = SYSsafediv(T(1.0), a.myVector[i]);
775  }
776 #else
777  // NOTE: Compilers get confused, thinking that i or end
778  // might change underneath us, because we passed a non-const reference
779  // to them to getPartialRange, so they don't make any assumptions
780  // and have terrible performance, unless we make separate
781  // variables j and jend that are entirely local.
782  for (exint j = i, jend = end; j < jend; j++)
783  myVector[j] = SYSsafediv(T(1.0), a.myVector[j]);
784 #endif
785 }
786 
787 template <typename T>
788 void
790  const UT_JobInfo &info)
791 {
792  exint i, end;
793 
794  getPartialRange(i, end, info);
795 
796 #if 0
797  for (; i < end; i++)
798  {
799  myVector[i] = T(1.0) / a.myVector[i];
800  }
801 #else
802  // NOTE: Compilers get confused, thinking that i or end
803  // might change underneath us, because we passed a non-const reference
804  // to them to getPartialRange, so they don't make any assumptions
805  // and have terrible performance, unless we make separate
806  // variables j and jend that are entirely local.
807  for (exint j = i, jend = end; j < jend; j++)
808  myVector[j] = T(1.0) / a.myVector[j];
809 #endif
810 }
811 
812 template <typename T>
815 {
816  UT_ASSERT_P(length() == v.length());
817  copyFrom(v);
818  return *this;
819 }
820 
821 template <typename T>
822 void
824  const UT_JobInfo &info)
825 {
826  UT_ASSERT_P(length() == v.length());
827  exint i, end;
828 
829  getPartialRange(i, end, info);
830 
831  memcpy(&myVector[i], &v.myVector[i],
832  (end - i) * sizeof(T));
833 }
834 
835 template <typename T>
838 {
839  exint i;
840 
841  for (i=myNL; i<=myNH; i++)
842  myVector[i] += v.myVector[i];
843  return *this;
844 }
845 
846 template <typename T>
849 {
850  exint i;
851 
852  for (i=myNL; i<=myNH; i++)
853  myVector[i] -= v.myVector[i];
854  return *this;
855 }
856 
857 template <typename T>
860 {
861  exint i;
862 
863  for (i=myNL; i<=myNH; i++)
864  myVector[i] *= v.myVector[i];
865  return *this;
866 }
867 
868 template <typename T>
871 {
872  exint i;
873 
874  for (i=myNL; i<=myNH; i++)
875  myVector[i] /= v.myVector[i];
876  return *this;
877 }
878 
879 template <typename T>
882 {
883  exint i;
884 
885  for (i=myNL; i<=myNH; i++)
886  myVector[i] *= scalar;
887  return *this;
888 }
889 
890 
891 template <typename T>
894 {
895  exint i;
896 
897  scalar = 1.0F / scalar;
898  for (i=myNL; i<=myNH; i++)
899  myVector[i] *= scalar;
900  return *this;
901 }
902 
903 
904 template <typename T>
905 T
907 {
908  UT_ASSERT_P(myNL == v.myNL && myNH == v.myNH);
909 
910  exint nblocks = (length()+PARALLEL_BLOCK_SIZE-1)/PARALLEL_BLOCK_SIZE;
911  if (nblocks <= 0)
912  return 0;
913 
914  UT_StackBuffer<fpreal64> accumulators(nblocks);
915 
916  dotInternal((fpreal64*)accumulators, v);
917 
918  fpreal64 result = accumulators[0];
919  for (exint i = 1; i < nblocks; ++i)
920  result += accumulators[i];
921 
922  return result;
923 }
924 
925 template <typename T>
926 void
927 UT_VectorT<T>::dotInternalPartial(fpreal64 *output, const UT_VectorT<T> &v, const UT_JobInfo &info) const
928 {
929  exint startblock;
930  exint endblock;
931  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
932 
933  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
934 
935  for (exint block = startblock; block < endblock; ++block)
936  {
937  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
938  fpreal64 result = myVector[i]*v.myVector[i];
939  for (++i; i < blockend; ++i)
940  {
941  result += myVector[i]*v.myVector[i];
942  }
943  output[block] = result;
944  }
945 }
946 
947 template <typename T>
948 std::ostream &
949 UT_VectorT<T>::save(std::ostream &os) const
950 {
951  exint i;
952 
953  for (i=myNL; i<=myNH; i++)
954  os << i << ": " << myVector[i] << "\n";
955 
956  return os;
957 }
958 
959 template <typename T>
960 bool
962 {
963  if (getNL() != v.getNL() || getNH() != v.getNH())
964  return false;
965  for (exint i = getNL(); i < getNH(); i++)
966  {
967  if (!SYSalmostEqual((*this)(i), v(i), ulps))
968  return false;
969  }
970  return true;
971 }
972 
973 template <typename T>
974 bool
976 {
977  for (exint i = myNL; i <= myNH; i++)
978  {
979  if (SYSisNan(myVector[i]))
980  return true;
981  }
982  return false;
983 }
984 
985 template <typename T>
986 void
988 {
990  {
991  std::cerr << "NAN found in UT_VectorT\n";
992  UT_ASSERT(0);
993  }
994 }
995 
996 //////////////////////////////
997 // UT_Vector Specializations
998 //////////////////////////////
999 
1000 #if 1
1001 
1002 template <>
1003 inline void
1005 {
1006  exint i, end;
1007 
1008  getPartialRange(i, end, info);
1009 
1010  VM_Math::negate(&myVector[i], &myVector[i], end-i);
1011 }
1012 
1013 template <>
1014 inline void
1016 {
1017  exint i, end;
1018 
1019  getPartialRange(i, end, info);
1020 
1021  VM_Math::scaleoffset(&myVector[i], -1.0F, &v.myVector[i], end-i);
1022 }
1023 
1024 template <>
1025 inline void
1027  const UT_VectorT<fpreal32> &v,
1028  const UT_JobInfo &info)
1029 {
1030  exint i, end;
1031 
1032  getPartialRange(i, end, info);
1033 
1034  VM_Math::madd(&myVector[i], &v.myVector[i], s, end-i);
1035 }
1036 
1037 template <>
1038 inline void
1040  fpreal32 s,
1041  const UT_VectorT<fpreal32> &v,
1042  fpreal64 *norm2,
1043  const UT_JobInfo &info)
1044 {
1045  exint startblock;
1046  exint endblock;
1047  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
1048 
1049  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
1050 
1051  for (exint block = startblock; block < endblock; ++block)
1052  {
1053  const exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1054 
1055  fpreal64 result = VM_Math::maddAndNorm(&myVector[i], &v.myVector[i], s, blockend-i);
1056 
1057  norm2[block] = result;
1058 
1059  i = blockend;
1060  }
1061 }
1062 
1063 template <>
1064 inline void
1066  fpreal32 s,
1067  const UT_VectorT<fpreal32> &v,
1068  fpreal64 *norm2,
1069  exint normlimit,
1070  const UT_JobInfo &info)
1071 {
1072  exint startblock;
1073  exint endblock;
1074  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
1075 
1076  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
1077 
1078  for (exint block = startblock; block < endblock; ++block)
1079  {
1080  const exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1081 
1082  fpreal64 result = 0;
1083  if (i < normlimit)
1084  {
1085  const exint normmax = SYSmin(blockend, normlimit);
1086  result = VM_Math::maddAndNorm(&myVector[i], &v.myVector[i], s, normmax-i);
1087 
1088  i = normmax;
1089  if (i < blockend)
1090  VM_Math::madd(&myVector[i], &v.myVector[i], s, blockend-i);
1091  }
1092  else
1093  VM_Math::madd(&myVector[i], &v.myVector[i], s, blockend-i);
1094 
1095  norm2[block] = result;
1096 
1097  i = blockend;
1098  }
1099 }
1100 
1101 template <>
1102 inline void
1104  const UT_VectorT<fpreal32> &v,
1105  const UT_JobInfo &info)
1106 {
1107  exint i, end;
1108 
1109  getPartialRange(i, end, info);
1110 
1111  VM_Math::scaleoffset(&myVector[i], s, &v.myVector[i], end-i);
1112 }
1113 
1114 
1115 template <>
1116 inline void
1118  const UT_VectorT<fpreal32> &b,
1119  const UT_JobInfo &info)
1120 {
1121  exint i, end;
1122 
1123  getPartialRange(i, end, info);
1124 
1125  VM_Math::mul(&myVector[i], &a.myVector[i], &b.myVector[i], end-i);
1126 }
1127 
1128 template <>
1129 inline void
1131  const UT_VectorT<fpreal32> &a,
1132  const UT_VectorT<fpreal32> &b,
1133  fpreal64 *dot_aba, exint dotlimit,
1134  const UT_JobInfo &info)
1135 {
1136  exint startblock;
1137  exint endblock;
1138  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
1139 
1140  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
1141 
1142  for (exint block = startblock; block < endblock; ++block)
1143  {
1144  const exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1145 
1146  fpreal64 result = 0;
1147  if (i < dotlimit)
1148  {
1149  const exint dotmax = SYSmin(blockend, dotlimit);
1150 
1151  result = VM_Math::mulAndDotDA(&myVector[i], &a.myVector[i], &b.myVector[i], dotmax-i);
1152 
1153  i = dotmax;
1154  if (i < blockend)
1155  VM_Math::mul(&myVector[i], &a.myVector[i], &b.myVector[i], blockend-i);
1156  }
1157  else
1158  VM_Math::mul(&myVector[i], &a.myVector[i], &b.myVector[i], blockend-i);
1159 
1160  dot_aba[block] = result;
1161 
1162  i = blockend;
1163  }
1164 }
1165 
1166 template <>
1167 inline void
1169  const UT_VectorT<fpreal32> &b,
1170  const UT_JobInfo &info)
1171 {
1172  exint i, end;
1173 
1174  getPartialRange(i, end, info);
1175 
1176  VM_Math::div(&myVector[i], &a.myVector[i], &b.myVector[i], end-i);
1177 }
1178 
1179 template <>
1180 inline void
1182  const UT_JobInfo &info)
1183 {
1184  exint i, end;
1185 
1186  getPartialRange(i, end, info);
1187 
1188  VM_Math::safediv(&myVector[i], 1.0f, &a.myVector[i], end-i);
1189 }
1190 
1191 template <>
1192 inline void
1194  const UT_JobInfo &info)
1195 {
1196  exint i, end;
1197 
1198  getPartialRange(i, end, info);
1199 
1200  VM_Math::div(&myVector[i], 1.0f, &a.myVector[i], end-i);
1201 }
1202 
1203 template <>
1204 inline UT_VectorT<fpreal32> &
1206 {
1207  VM_Math::add(&myVector[myNL], &myVector[myNL], &v.myVector[myNL], myNH-myNL+1);
1208  return *this;
1209 }
1210 
1211 template <>
1212 inline UT_VectorT<fpreal32> &
1214 {
1215  VM_Math::sub(&myVector[myNL], &myVector[myNL], &v.myVector[myNL], myNH-myNL+1);
1216  return *this;
1217 }
1218 
1219 template <>
1220 inline UT_VectorT<fpreal32> &
1222 {
1223  VM_Math::mul(&myVector[myNL], &myVector[myNL], &v.myVector[myNL], myNH-myNL+1);
1224  return *this;
1225 }
1226 
1227 template <>
1228 inline UT_VectorT<fpreal32> &
1230 {
1231  VM_Math::div(&myVector[myNL], &myVector[myNL], &v.myVector[myNL], myNH-myNL+1);
1232  return *this;
1233 }
1234 
1235 template <>
1236 inline UT_VectorT<fpreal32> &
1238 {
1239  VM_Math::mul(&myVector[myNL], &myVector[myNL], scalar, myNH-myNL+1);
1240  return *this;
1241 }
1242 
1243 template <>
1244 inline UT_VectorT<fpreal32> &
1246 {
1247  VM_Math::div(&myVector[myNL], &myVector[myNL], scalar, myNH-myNL+1);
1248  return *this;
1249 }
1250 
1251 template <>
1252 inline void
1254 {
1255  exint startblock;
1256  exint endblock;
1257  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
1258 
1259  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
1260 
1261  for (exint block = startblock; block < endblock; ++block)
1262  {
1263  const exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1264 
1265  fpreal64 result = VM_Math::dot(&myVector[i], &v.myVector[i], blockend-i);
1266 
1267  output[block] = result;
1268 
1269  i = blockend;
1270  }
1271 }
1272 
1273 template <>
1274 inline void
1276  fpreal64 *output, int type,
1277  const UT_JobInfo &info) const
1278 {
1279  exint startblock;
1280  exint endblock;
1281  getPartialBlockRange(startblock, endblock, PARALLEL_BLOCK_SIZE, info);
1282 
1283  exint i = startblock*PARALLEL_BLOCK_SIZE + myNL;
1284 
1285  if (type == 0)
1286  {
1287  for (exint block = startblock; block < endblock; ++block)
1288  {
1289  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1290  fpreal32 result = SYSabs(myVector[i]);
1291  for (++i; i < blockend; ++i)
1292  {
1293  fpreal32 v = SYSabs(myVector[i]);
1294  if (v > result)
1295  result = v;
1296  }
1297  output[block] = result;
1298  }
1299  }
1300  else if (type == 1)
1301  {
1302  for (exint block = startblock; block < endblock; ++block)
1303  {
1304  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1305  fpreal64 result = SYSabs(myVector[i]);
1306  for (++i; i < blockend; ++i)
1307  {
1308  result += SYSabs(myVector[i]);
1309  }
1310  output[block] = result;
1311  }
1312  }
1313  else // L2-norm
1314  {
1315  for (exint block = startblock; block < endblock; ++block)
1316  {
1317  exint blockend = SYSmin(i+PARALLEL_BLOCK_SIZE, myNH+1);
1318 
1319  fpreal64 result = VM_Math::dot(&myVector[i], &myVector[i], blockend-i);
1320 
1321  output[block] = result;
1322 
1323  i = blockend;
1324  }
1325  }
1326 }
1327 #endif
1328 
1329 template <typename V> inline size_t
1330 format(char *buffer, size_t bufsize, const UT_VectorT<V> &v)
1331 {
1333  for (int c = v.getNL(); c <= v.getNH(); c++) {
1334  if (c != v.getNL())
1335  row << ", ";
1336  UTformat(row, "{}", v(c));
1337  }
1338 
1339  UT::Format::Writer writer(buffer, bufsize);
1341  return f.format(writer, "[{}]", {row.str()});
1342 }
1343 
1344 // Explicit instantiation for format
1345 #define VECTOR_INSTANTIATE_FMT(T) \
1346  template UT_API size_t format<T>(char*, size_t, const UT_VectorT<T>&);
1349 
1350 //////////////////////////
1351 // UT_Permutation class
1352 //////////////////////////
1353 
1354 
1355 template <typename T>
1357 {
1358  myNL = nl;
1359  myNH = nh;
1360  myVector = (T *)malloc((nh - nl + 1)*sizeof(T));
1361  myVector = myVector - nl;
1362 }
1363 
1364 template <typename T>
1366 {
1367  free(myVector + myNL);
1368 }
1369 
1370 template <typename T>
1372 {
1373  clone(p);
1374 }
1375 
1376 template <typename T>
1379 {
1380  if (&p != this)
1381  {
1382  free(myVector + myNL);
1383  clone(p);
1384  }
1385  return *this;
1386 }
1387 
1388 template <typename T>
1389 void
1391 {
1392  UT_ASSERT(&p != this);
1393  myNL = p.myNL;
1394  myNH = p.myNH;
1395  myVector = (T *)malloc((myNH - myNL + 1)*sizeof(T));
1396  myVector = myVector - myNL;
1397 
1398  memcpy(myVector + myNL, p.myVector + myNL, (myNH - myNL + 1)*sizeof(T));
1399 }
1400 
1401 template <typename T>
1402 void
1404 {
1405  memset(myVector + myNL, 0, (myNH - myNL + 1)*sizeof(T));
1406 }
1407 
1408 template <typename T>
1409 void
1411 {
1412  exint diff = myNL-nl;
1413 
1414  myNL = nl;
1415  myNH -= diff;
1416  myVector += diff;
1417 }
1418 
1419 
GLdouble s
Definition: glew.h:1390
UT_PermutationT< T > & operator=(const UT_PermutationT< T > &p)
Definition: UT_Vector.C:1378
bool hasNan() const
Definition: UT_Vector.C:975
void getPartialRange(exint &start, exint &end, const UT_JobInfo &info) const
Determines the [start,end) interval that should be worked on.
Definition: UT_Vector.C:136
GA_API const UT_StringHolder div
size_t format(W &writer, const char *format, std::initializer_list< ArgValue > args)
void negPartial(const UT_JobInfo &info)
Negate.
Definition: UT_Vector.C:436
int int32
Definition: SYS_Types.h:39
#define VECTOR_INSTANTIATE_FMT(T)
Definition: UT_Vector.C:1345
GLsizei bufsize
Definition: glew.h:2581
void divideWork(int units, int &start, int &end) const
void init(exint nl, exint nh)
Initialize nl, nh and allocate space.
Definition: UT_Vector.C:107
UT_VectorT & operator+=(const UT_VectorT< T > &v)
Definition: UT_Vector.C:837
GLuint const GLfloat * val
Definition: glew.h:2794
GLboolean GLboolean GLboolean GLboolean a
Definition: glew.h:9477
exint getNH() const
Get the high index.
Definition: UT_Vector.h:82
int64 exint
Definition: SYS_Types.h:125
constexpr bool SYSisNan(const F f)
Definition: SYS_Math.h:178
void getPartialBlockRange(exint &startblock, exint &endblock, const exint blocksize, const UT_JobInfo &info) const
Definition: UT_Vector.C:146
#define SYSabs(a)
Definition: SYS_Math.h:1528
void addScaledVecNorm2(T s, const UT_VectorT< T > &v, fpreal64 *norm2)
Add scaled vector and compute the squared L2 norm.
Definition: UT_Vector.C:503
An output stream object that owns its own string buffer storage.
basic_writer< back_insert_range< internal::buffer > > writer
Definition: format.h:361
void constantInternalPartial(exint nl, exint nh, T c, const UT_JobInfo &info)
Definition: UT_Vector.C:168
const GLdouble * v
Definition: glew.h:1391
T dot(const UT_VectorT< T > &v) const
Definition: UT_Vector.C:906
void multSetAndDotUpTo(const UT_VectorT< T > &a, const UT_VectorT< T > &b, fpreal64 *dot_aba, exint dotlimit)
Definition: UT_Vector.C:674
float fpreal32
Definition: SYS_Types.h:200
const UT_WorkBuffer & str()
Returns a read-only reference to the underlying UT_WorkBuffer.
void testForNan() const
Definition: UT_Vector.C:987
void addScaledVecNorm2UpTo(T s, const UT_VectorT< T > &v, fpreal64 *norm2, exint normlimit)
Definition: UT_Vector.C:562
OIIO_HOSTDEVICE float madd(float a, float b, float c)
Fused multiply and add: (a*b + c)
Definition: fmath.h:267
double fpreal64
Definition: SYS_Types.h:201
~UT_VectorT()
Definition: UT_Vector.C:53
void changeNL(exint nl)
Change the low index, and the high index will adjust itself.
Definition: UT_Vector.C:267
UT_PermutationT(exint nl, exint nh)
Definition: UT_Vector.C:1356
SYS_FORCE_INLINE T & y()
Definition: UT_Vector3.h:513
ImageBuf OIIO_API sub(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
UT_VectorT & operator/=(const UT_VectorT< T > &v)
Definition: UT_Vector.C:870
T norm(int type=2) const
Definition: UT_Vector.C:278
GLclampf f
Definition: glew.h:3499
void zeroPartial(exint nl, exint nh, const UT_JobInfo &info)
Definition: UT_Vector.C:155
GLuint buffer
Definition: glew.h:1680
GLint GLenum GLsizei GLint GLsizei const void * data
Definition: glew.h:1379
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:134
UT_VectorT()
Input the index range [nl..nh].
Definition: UT_Vector.h:38
fpreal64 dot(const CE_VectorT< T > &a, const CE_VectorT< T > &b)
Definition: CE_Vector.h:127
std::ostream & save(std::ostream &os) const
Output.
Definition: UT_Vector.C:949
SYS_FORCE_INLINE T & z()
Definition: UT_Vector3.h:515
void getSubvector4(UT_Vector4 &v, exint idx) const
Definition: UT_Vector.C:247
GLuint GLuint end
Definition: glew.h:1253
T distance2(const UT_VectorT< T > &v) const
Definition: UT_Vector.C:390
void changeNL(exint nl)
Definition: UT_Vector.C:1410
GLsizei n
Definition: glew.h:4040
const GLfloat * c
Definition: glew.h:16296
GLuint GLsizei GLsizei * length
Definition: glew.h:1825
GLenum GLenum dst
Definition: glew.h:2410
size_t format(char *buffer, size_t bufsize, const UT_VectorT< V > &v)
Definition: UT_Vector.C:1330
UT_VectorT & operator-=(const UT_VectorT< T > &v)
Definition: UT_Vector.C:848
T norm2() const
Square of L2-norm.
long long int64
Definition: SYS_Types.h:116
void setSubvector3(exint idx, const UT_Vector3 &v)
Definition: UT_Vector.C:238
GLuint GLuint GLsizei GLenum type
Definition: glew.h:1253
static int getInt(UT_IntControl i)
GLuint start
Definition: glew.h:1253
void getSubvector2(UT_Vector2 &v, exint idx) const
Definition: UT_Vector.C:213
GLdouble GLdouble GLdouble b
Definition: glew.h:9122
GLfloat GLfloat p
Definition: glew.h:16321
void setSubvector4(exint idx, const UT_Vector4 &v)
Definition: UT_Vector.C:257
void setSubvector2(exint idx, const UT_Vector2 &v)
Definition: UT_Vector.C:221
VM_SIV set(fpreal32 *d, const fpreal32 *a, exint num, const uint32 *disabled)
VM_Math::set(d, a, disabled) := d[i] = disabled[i] ? d[i] : a[i].
Definition: VM_Math.h:403
void getSubvector3(UT_Vector3 &v, exint idx) const
Definition: UT_Vector.C:229
Type-safe formatting, modeled on the Python str.format function.
size_t UTformat(FILE *file, const char *format, const Args &...args)
SYS_FORCE_INLINE T & x()
Definition: UT_Vector3.h:511
void subvector(const UT_VectorT< T > &v, exint nl, exint nh)
Steal from another vector, resulting vector has origin at 1.
Definition: UT_Vector.C:61
GLuint64EXT * result
Definition: glew.h:14007
#define UT_ASSERT(ZZ)
Definition: UT_Assert.h:135
void assign(const fpreal32 *data, exint nl, exint nh)
Definition: UT_Vector.C:91
UT_VectorT & operator=(const UT_VectorT< T > &v)
Definition: UT_Vector.C:814
ImageBuf OIIO_API add(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
UT_VectorT & operator*=(const UT_VectorT< T > &v)
Componentwise multiplication & division.
Definition: UT_Vector.C:859
GLenum GLenum void * row
Definition: glew.h:4965
exint length() const
Get dimension of this vector.
Definition: UT_Vector.h:85
ImageBuf OIIO_API zero(ROI roi, int nthreads=0)
#define SYSmin(a, b)
Definition: SYS_Math.h:1527
void constant(T c)
Initialize to the given constant.
Definition: UT_Vector.h:63
* for(int i=0;i< n_subtasks;++i)*tasks.push(pool-> push(myfunc))
GLfloat units
Definition: glew.h:10289
bool isEqual(const UT_VectorT< T > &v, int64 ulps)
Definition: UT_Vector.C:961
exint getNL() const
Get the low index.
Definition: UT_Vector.h:79
ImageBuf OIIO_API mul(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)