16 #ifndef __UT_BVHImpl_h__
17 #define __UT_BVHImpl_h__
29 #define BVH_TRY_ALL_AXES 0
33 template<
typename T,u
int NAXES>
37 for (
uint axis = 1; axis < NAXES; ++axis)
42 return has_nan_or_inf;
46 const int32 *pboxints =
reinterpret_cast<const int32*
>(&box);
48 bool has_nan_or_inf = ((pboxints[0] & 0x7F800000) == 0x7F800000);
49 has_nan_or_inf |= ((pboxints[1] & 0x7F800000) == 0x7F800000);
50 for (
uint axis = 1; axis < NAXES; ++axis)
52 has_nan_or_inf |= ((pboxints[2*axis] & 0x7F800000) == 0x7F800000);
53 has_nan_or_inf |= ((pboxints[2*axis + 1] & 0x7F800000) == 0x7F800000);
55 return has_nan_or_inf;
57 template<
typename T,u
int NAXES>
59 const T*
v = box.vals[axis];
67 template<
typename T,u
int NAXES>
70 for (
uint axis = 1; axis < NAXES; ++axis)
72 return has_nan_or_inf;
78 bool has_nan_or_inf = ((ppositionints[0] & 0x7F800000) == 0x7F800000);
79 for (
uint axis = 1; axis < NAXES; ++axis)
80 has_nan_or_inf |= ((ppositionints[axis] & 0x7F800000) == 0x7F800000);
81 return has_nan_or_inf;
83 template<
typename T,u
int NAXES>
87 template<
typename T,u
int NAXES>
108 bool has_nan_or_inf = ((ppositionints[0] & 0x7F800000) == 0x7F800000);
109 has_nan_or_inf |= ((ppositionints[1] & 0x7F800000) == 0x7F800000);
110 return has_nan_or_inf;
116 bool has_nan_or_inf = ((ppositionints[0] & 0x7F800000) == 0x7F800000);
117 has_nan_or_inf |= ((ppositionints[1] & 0x7F800000) == 0x7F800000);
118 has_nan_or_inf |= ((ppositionints[2] & 0x7F800000) == 0x7F800000);
119 return has_nan_or_inf;
125 bool has_nan_or_inf = ((ppositionints[0] & 0x7F800000) == 0x7F800000);
126 has_nan_or_inf |= ((ppositionints[1] & 0x7F800000) == 0x7F800000);
127 has_nan_or_inf |= ((ppositionints[2] & 0x7F800000) == 0x7F800000);
128 has_nan_or_inf |= ((ppositionints[3] & 0x7F800000) == 0x7F800000);
129 return has_nan_or_inf;
159 template<
typename BOX_TYPE,
typename SRC_INT_TYPE,
typename INT_TYPE>
162 constexpr INT_TYPE PARALLEL_THRESHOLD = 65536;
164 if (nboxes >= PARALLEL_THRESHOLD)
167 ntasks = (nprocessors > 1) ?
SYSmin(4*nprocessors, nboxes/(PARALLEL_THRESHOLD/2)) : 1;
173 const SRC_INT_TYPE* indices_end =
indices + nboxes;
176 SRC_INT_TYPE* psrc_index =
indices;
177 for (; psrc_index != indices_end; ++psrc_index)
183 if (psrc_index == indices_end)
187 SRC_INT_TYPE* nan_start = psrc_index;
188 for (++psrc_index; psrc_index != indices_end; ++psrc_index)
193 *nan_start = *psrc_index;
198 return indices_end - nan_start;
211 for (INT_TYPE taski = r.begin(), task_end = r.end(); taski < task_end; ++taski)
213 SRC_INT_TYPE* indices_start =
indices + (taski*
exint(nboxes))/ntasks;
214 const SRC_INT_TYPE* indices_end =
indices + ((taski+1)*
exint(nboxes))/ntasks;
215 SRC_INT_TYPE* psrc_index = indices_start;
216 for (; psrc_index != indices_end; ++psrc_index)
222 if (psrc_index == indices_end)
224 nexcluded[taski] = 0;
229 SRC_INT_TYPE* nan_start = psrc_index;
230 for (++psrc_index; psrc_index != indices_end; ++psrc_index)
235 *nan_start = *psrc_index;
239 nexcluded[taski] = indices_end - nan_start;
244 INT_TYPE total_excluded = nexcluded[0];
245 for (INT_TYPE taski = 1; taski < ntasks; ++taski)
247 total_excluded += nexcluded[taski];
250 if (total_excluded == 0)
256 while (nexcluded[taski] == 0)
261 SRC_INT_TYPE* dest_indices =
indices + ((taski+1)*
exint(nboxes))/ntasks - nexcluded[taski];
263 SRC_INT_TYPE* dest_end =
indices + nboxes - total_excluded;
264 for (++taski; taski < ntasks && dest_indices < dest_end; ++taski)
266 const SRC_INT_TYPE* psrc_index =
indices + (taski*
exint(nboxes))/ntasks;
267 const SRC_INT_TYPE* psrc_end =
indices + ((taski+1)*
exint(nboxes))/ntasks - nexcluded[taski];
268 INT_TYPE
count = psrc_end - psrc_index;
270 memmove(dest_indices, psrc_index,
sizeof(SRC_INT_TYPE)*count);
271 dest_indices += count;
273 nboxes -= total_excluded;
274 return total_excluded;
278 template<BVH_Heuristic H,
typename T,u
int NAXES,
typename BOX_TYPE,
typename SRC_INT_TYPE>
281 computeFullBoundingBox(axes_minmax, boxes, nboxes,
indices);
283 init<H>(axes_minmax, boxes, nboxes,
indices, reorder_indices, max_items_per_leaf);
287 template<BVH_Heuristic H,
typename T,u
int NAXES,
typename BOX_TYPE,
typename SRC_INT_TYPE>
301 createTrivialIndices(
indices, nboxes);
307 if (nexcluded != 0) {
312 computeFullBoundingBox(axes_minmax, boxes, nboxes,
indices);
320 initNodeReorder<H>(nodes, nodes[0], axes_minmax, boxes,
indices, nboxes, 0, max_items_per_leaf);
322 initNode<H>(nodes, nodes[0], axes_minmax, boxes,
indices, nboxes);
325 if (8*nodes.capacity() > 9*nodes.size()) {
326 nodes.setCapacity(nodes.size());
329 myRoot.reset(nodes.array());
331 nodes.unsafeClearData();
339 int newdepth =
depth;
341 for (
int s = 0;
s <
N;
s++)
343 const INT_TYPE node_int = curnode->child[
s];
344 if (Node::isInternal(node_int))
346 if (node_int == Node::EMPTY)
351 newdepth =
SYSmax(newdepth,
352 getMaxDepthHelper(Node::getInternalNum(node_int),
372 return getMaxDepthHelper(0, 1);
382 for (
int s = 0;
s <
N;
s++)
384 const INT_TYPE node_int = curnode->child[
s];
385 if (Node::isInternal(node_int))
387 if (node_int == Node::EMPTY)
393 int d = getPureInternalDepthHelper(Node::getInternalNum(node_int),
398 childdepth =
SYSmin(childdepth, d);
414 UT_ASSERT(!
"Empty node other than Node::EMPTY encountered.");
428 return getPureInternalDepthHelper(0, 0);
432 template<
typename LOCAL_DATA,
typename FUNCTORS>
435 LOCAL_DATA* data_for_parent)
const noexcept
441 traverseHelper(0,
INT_TYPE(-1), functors, data_for_parent);
444 template<
typename LOCAL_DATA,
typename FUNCTORS>
447 INT_TYPE parent_nodei,
449 LOCAL_DATA* data_for_parent)
const noexcept
452 bool descend = functors.pre(nodei, data_for_parent);
455 LOCAL_DATA local_data[
N];
457 for (s = 0; s <
N; ++
s) {
458 const INT_TYPE node_int = node.child[
s];
459 if (Node::isInternal(node_int)) {
460 if (node_int == Node::EMPTY) {
464 traverseHelper(Node::getInternalNum(node_int), nodei, functors, &local_data[s]);
467 functors.item(node_int, nodei, local_data[s]);
471 functors.post(nodei, parent_nodei, data_for_parent, s, local_data);
475 template<
typename LOCAL_DATA,
typename FUNCTORS>
479 LOCAL_DATA* data_for_parent)
const noexcept
485 traverseParallelHelper(0,
INT_TYPE(-1), parallel_threshold,
myNumNodes, functors, data_for_parent);
488 template<
typename LOCAL_DATA,
typename FUNCTORS>
491 INT_TYPE parent_nodei,
492 INT_TYPE parallel_threshold,
493 INT_TYPE next_node_id,
495 LOCAL_DATA* data_for_parent)
const noexcept
498 bool descend = functors.pre(nodei, data_for_parent);
504 INT_TYPE next_nodes[
N];
506 INT_TYPE nchildren =
N;
507 INT_TYPE nparallel = 0;
508 for (INT_TYPE s = N; s --> 0; )
510 const INT_TYPE node_int = node.child[
s];
511 if (node_int == Node::EMPTY)
516 next_nodes[
s] = next_node_id;
517 if (Node::isInternal(node_int))
521 INT_TYPE child_node_id = Node::getInternalNum(node_int);
522 nnodes[
s] = next_node_id - child_node_id;
523 next_node_id = child_node_id;
529 nparallel += (nnodes[
s] >= parallel_threshold);
532 LOCAL_DATA local_data[
N];
533 if (nparallel >= 2) {
535 if (nparallel < nchildren) {
536 for (INT_TYPE s = 0; s <
N; ++
s) {
537 if (nnodes[s] >= parallel_threshold) {
540 const INT_TYPE node_int = node.child[
s];
541 if (Node::isInternal(node_int)) {
542 if (node_int == Node::EMPTY) {
546 traverseHelper(Node::getInternalNum(node_int), nodei, functors, &local_data[s]);
549 functors.item(node_int, nodei, local_data[s]);
555 for (INT_TYPE taski = r.begin(); taski < r.end(); ++taski) {
556 INT_TYPE parallel_count = 0;
560 for (s = 0; s <
N; ++
s) {
561 if (nnodes[s] < parallel_threshold) {
564 if (parallel_count == taski) {
569 const INT_TYPE node_int = node.child[
s];
570 if (Node::isInternal(node_int)) {
571 UT_ASSERT_MSG_P(node_int != Node::EMPTY,
"Empty entries should have been excluded above.");
572 traverseParallelHelper(Node::getInternalNum(node_int), nodei, parallel_threshold, next_nodes[s], functors, &local_data[s]);
575 functors.item(node_int, nodei, local_data[s]);
582 for (INT_TYPE s = 0; s <
N; ++
s) {
583 const INT_TYPE node_int = node.child[
s];
584 if (Node::isInternal(node_int)) {
585 if (node_int == Node::EMPTY) {
589 traverseHelper(Node::getInternalNum(node_int), nodei, functors, &local_data[s]);
592 functors.item(node_int, nodei, local_data[s]);
596 functors.post(nodei, parent_nodei, data_for_parent, nchildren, local_data);
600 template<
typename LOCAL_DATA,
typename FUNCTORS>
603 LOCAL_DATA* data_for_parent)
const noexcept
609 traverseVectorHelper(0,
INT_TYPE(-1), functors, data_for_parent);
612 template<
typename LOCAL_DATA,
typename FUNCTORS>
615 INT_TYPE parent_nodei,
617 LOCAL_DATA* data_for_parent)
const noexcept
620 INT_TYPE descend = functors.pre(nodei, data_for_parent);
623 LOCAL_DATA local_data[
N];
625 for (s = 0; s <
N; ++
s) {
626 if ((descend>>s) & 1) {
627 const INT_TYPE node_int = node.child[
s];
628 if (Node::isInternal(node_int)) {
629 if (node_int == Node::EMPTY) {
631 descend &= (INT_TYPE(1)<<
s)-1;
634 traverseVectorHelper(Node::getInternalNum(node_int), nodei, functors, &local_data[s]);
637 functors.item(node_int, nodei, local_data[s]);
642 functors.post(nodei, parent_nodei, data_for_parent, s, local_data, descend);
646 template<
typename SRC_INT_TYPE>
648 constexpr
INT_TYPE PARALLEL_THRESHOLD = 65536;
650 if (
n >= PARALLEL_THRESHOLD) {
652 ntasks = (nprocessors > 1) ?
SYSmin(4*nprocessors,
n/(PARALLEL_THRESHOLD/2)) : 1;
661 for (
INT_TYPE taski = r.begin(), taskend = r.end(); taski != taskend; ++taski) {
673 template<
typename T,u
int NAXES,
typename BOX_TYPE,
typename SRC_INT_TYPE>
676 axes_minmax.initBounds();
680 if (nboxes >= 2*4096) {
682 ntasks = (nprocessors > 1) ?
SYSmin(4*nprocessors, nboxes/4096) : 1;
687 box.initBounds(boxes[
indices[0]]);
688 for (INT_TYPE i = 1; i < nboxes; ++i) {
689 box.combine(boxes[indices[i]]);
693 box.initBounds(boxes[0]);
694 for (INT_TYPE i = 1; i < nboxes; ++i) {
695 box.combine(boxes[i]);
703 parallel_boxes.
setSize(ntasks);
705 for (INT_TYPE taski = r.begin(),
end = r.end(); taski <
end; ++taski) {
706 const INT_TYPE startbox = (taski*
uint64(nboxes))/ntasks;
707 const INT_TYPE endbox = ((taski+1)*
uint64(nboxes))/ntasks;
710 box.initBounds(boxes[indices[startbox]]);
711 for (INT_TYPE i = startbox+1; i < endbox; ++i) {
712 box.combine(boxes[indices[i]]);
716 box.initBounds(boxes[startbox]);
717 for (INT_TYPE i = startbox+1; i < endbox; ++i) {
718 box.combine(boxes[i]);
721 parallel_boxes[taski] = box;
727 for (INT_TYPE i = 1; i < ntasks; ++i) {
728 box.combine(parallel_boxes[i]);
736 template<BVH_Heuristic H,
typename T,u
int NAXES,
typename BOX_TYPE,
typename SRC_INT_TYPE>
737 void BVH<N>::initNode(
UT_Array<Node>& nodes,
Node &node,
const Box<T,NAXES>& axes_minmax,
const BOX_TYPE* boxes, SRC_INT_TYPE* indices, INT_TYPE nboxes) noexcept {
740 for (INT_TYPE i = 0; i < nboxes; ++i) {
741 node.child[i] = indices[i];
743 for (INT_TYPE i = nboxes; i <
N; ++i) {
744 node.child[i] = Node::EMPTY;
749 SRC_INT_TYPE* sub_indices[N+1];
754 sub_indices[2] = indices+nboxes;
755 split<H>(axes_minmax, boxes,
indices, nboxes, sub_indices[1], &sub_boxes[0]);
758 multiSplit<H>(axes_minmax, boxes,
indices, nboxes, sub_indices, sub_boxes);
762 INT_TYPE nparallel = 0;
763 static constexpr INT_TYPE PARALLEL_THRESHOLD = 1024;
764 for (INT_TYPE i = 0; i <
N; ++i) {
765 INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
766 if (sub_nboxes == 1) {
767 node.child[i] = sub_indices[i][0];
769 else if (sub_nboxes >= PARALLEL_THRESHOLD) {
780 if (nparallel >= 2) {
786 parallel_nodes.
setSize(nparallel);
788 parallel_parent_nodes.
setSize(nparallel);
790 for (INT_TYPE taski = r.begin(), end = r.end(); taski <
end; ++taski) {
792 INT_TYPE counted_parallel = 0;
795 for (childi = 0; childi <
N; ++childi) {
796 sub_nboxes = sub_indices[childi+1]-sub_indices[childi];
797 if (sub_nboxes >= PARALLEL_THRESHOLD) {
798 if (counted_parallel == taski) {
813 Node& parent_node = parallel_parent_nodes[taski];
816 initNode<H>(local_nodes, parent_node, sub_boxes[childi], boxes, sub_indices[childi], sub_nboxes);
820 INT_TYPE counted_parallel = 0;
821 for (INT_TYPE i = 0; i <
N; ++i) {
822 INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
823 if (sub_nboxes != 1) {
824 INT_TYPE local_nodes_start = nodes.size();
825 node.child[i] = Node::markInternal(local_nodes_start);
826 if (sub_nboxes >= PARALLEL_THRESHOLD) {
828 Node child_node = parallel_parent_nodes[counted_parallel];
830 for (INT_TYPE childi = 0; childi <
N; ++childi) {
831 INT_TYPE child_child = child_node.child[childi];
832 if (Node::isInternal(child_child) && child_child != Node::EMPTY) {
833 child_child += local_nodes_start;
834 child_node.child[childi] = child_child;
839 const UT_Array<Node>& local_nodes = parallel_nodes[counted_parallel];
841 INT_TYPE
n = local_nodes.
size();
842 nodes.bumpCapacity(local_nodes_start + n);
843 nodes.setSizeNoInit(local_nodes_start + n);
844 nodes[local_nodes_start-1] = child_node;
847 nodes.bumpCapacity(local_nodes_start + 1);
848 nodes.setSizeNoInit(local_nodes_start + 1);
849 initNode<H>(nodes, nodes[local_nodes_start], sub_boxes[i], boxes, sub_indices[i], sub_nboxes);
855 adjustParallelChildNodes<PARALLEL_THRESHOLD>(nparallel, nodes, node, parallel_nodes.
array(), sub_indices);
858 for (INT_TYPE i = 0; i <
N; ++i) {
859 INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
860 if (sub_nboxes != 1) {
861 INT_TYPE local_nodes_start = nodes.size();
862 node.child[i] = Node::markInternal(local_nodes_start);
863 nodes.bumpCapacity(local_nodes_start + 1);
864 nodes.setSizeNoInit(local_nodes_start + 1);
865 initNode<H>(nodes, nodes[local_nodes_start], sub_boxes[i], boxes, sub_indices[i], sub_nboxes);
872 template<BVH_Heuristic H,
typename T,u
int NAXES,
typename BOX_TYPE,
typename SRC_INT_TYPE>
873 void BVH<N>::initNodeReorder(
UT_Array<Node>& nodes,
Node &node,
const Box<T,NAXES>& axes_minmax,
const BOX_TYPE* boxes, SRC_INT_TYPE* indices, INT_TYPE nboxes,
const INT_TYPE indices_offset,
const INT_TYPE max_items_per_leaf) noexcept {
876 for (INT_TYPE i = 0; i < nboxes; ++i) {
877 node.child[i] = indices_offset+i;
879 for (INT_TYPE i = nboxes; i <
N; ++i) {
880 node.child[i] = Node::EMPTY;
885 SRC_INT_TYPE* sub_indices[N+1];
890 sub_indices[2] = indices+nboxes;
891 split<H>(axes_minmax, boxes,
indices, nboxes, sub_indices[1], &sub_boxes[0]);
894 multiSplit<H>(axes_minmax, boxes,
indices, nboxes, sub_indices, sub_boxes);
899 INT_TYPE nleaves = 0;
901 SRC_INT_TYPE leaf_sizes[
N];
902 INT_TYPE sub_nboxes0 = sub_indices[1]-sub_indices[0];
903 if (sub_nboxes0 <= max_items_per_leaf) {
904 leaf_sizes[0] = sub_nboxes0;
905 for (
int j = 0;
j < sub_nboxes0; ++
j)
906 leaf_indices.
append(sub_indices[0][
j]);
909 INT_TYPE sub_nboxes1 = sub_indices[2]-sub_indices[1];
910 if (sub_nboxes1 <= max_items_per_leaf) {
911 leaf_sizes[nleaves] = sub_nboxes1;
912 for (
int j = 0;
j < sub_nboxes1; ++
j)
913 leaf_indices.
append(sub_indices[1][
j]);
916 for (INT_TYPE i = 2; i <
N; ++i) {
917 INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
918 if (sub_nboxes <= max_items_per_leaf) {
919 leaf_sizes[nleaves] = sub_nboxes;
920 for (
int j = 0;
j < sub_nboxes; ++
j)
921 leaf_indices.
append(sub_indices[i][
j]);
926 INT_TYPE move_distance = 0;
927 INT_TYPE index_move_distance = 0;
928 for (INT_TYPE i = N; i --> 0; )
930 INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
931 if (sub_nboxes <= max_items_per_leaf)
934 index_move_distance += sub_nboxes;
936 else if (move_distance > 0)
938 SRC_INT_TYPE *start_src_index = sub_indices[i];
939 for (SRC_INT_TYPE *src_index = sub_indices[i+1]-1; src_index >= start_src_index; --src_index) {
940 src_index[index_move_distance] = src_index[0];
942 sub_indices[i+move_distance] = sub_indices[i]+index_move_distance;
945 index_move_distance = 0;
946 for (INT_TYPE i = 0; i < nleaves; ++i)
948 INT_TYPE sub_nboxes = leaf_sizes[i];
949 sub_indices[i] = indices+index_move_distance;
950 for (
int j = 0;
j < sub_nboxes; ++
j)
951 indices[index_move_distance+
j] = leaf_indices[index_move_distance+
j];
952 index_move_distance += sub_nboxes;
957 INT_TYPE nparallel = 0;
958 static constexpr INT_TYPE PARALLEL_THRESHOLD = 1024;
959 for (INT_TYPE i = 0; i <
N; ++i) {
960 INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
961 if (sub_nboxes <= max_items_per_leaf) {
962 node.child[i] = indices_offset+(sub_indices[i]-sub_indices[0]);
964 else if (sub_nboxes >= PARALLEL_THRESHOLD) {
975 if (nparallel >= 2) {
981 parallel_nodes.
setSize(nparallel);
983 parallel_parent_nodes.
setSize(nparallel);
985 for (INT_TYPE taski = r.begin(), end = r.end(); taski <
end; ++taski) {
987 INT_TYPE counted_parallel = 0;
990 for (childi = 0; childi <
N; ++childi) {
991 sub_nboxes = sub_indices[childi+1]-sub_indices[childi];
992 if (sub_nboxes >= PARALLEL_THRESHOLD) {
993 if (counted_parallel == taski) {
1007 local_nodes.
setCapacity(nodeEstimate(sub_nboxes));
1008 Node& parent_node = parallel_parent_nodes[taski];
1011 initNodeReorder<H>(local_nodes, parent_node, sub_boxes[childi], boxes, sub_indices[childi], sub_nboxes,
1012 indices_offset+(sub_indices[childi]-sub_indices[0]), max_items_per_leaf);
1016 INT_TYPE counted_parallel = 0;
1017 for (INT_TYPE i = 0; i <
N; ++i) {
1018 INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
1019 if (sub_nboxes > max_items_per_leaf) {
1020 INT_TYPE local_nodes_start = nodes.size();
1021 node.child[i] = Node::markInternal(local_nodes_start);
1022 if (sub_nboxes >= PARALLEL_THRESHOLD) {
1024 Node child_node = parallel_parent_nodes[counted_parallel];
1025 ++local_nodes_start;
1026 for (INT_TYPE childi = 0; childi <
N; ++childi) {
1027 INT_TYPE child_child = child_node.child[childi];
1028 if (Node::isInternal(child_child) && child_child != Node::EMPTY) {
1029 child_child += local_nodes_start;
1030 child_node.child[childi] = child_child;
1035 const UT_Array<Node>& local_nodes = parallel_nodes[counted_parallel];
1037 INT_TYPE n = local_nodes.
size();
1038 nodes.bumpCapacity(local_nodes_start + n);
1039 nodes.setSizeNoInit(local_nodes_start + n);
1040 nodes[local_nodes_start-1] = child_node;
1043 nodes.bumpCapacity(local_nodes_start + 1);
1044 nodes.setSizeNoInit(local_nodes_start + 1);
1045 initNodeReorder<H>(nodes, nodes[local_nodes_start], sub_boxes[i], boxes, sub_indices[i], sub_nboxes,
1046 indices_offset+(sub_indices[i]-sub_indices[0]), max_items_per_leaf);
1052 adjustParallelChildNodes<PARALLEL_THRESHOLD>(nparallel, nodes, node, parallel_nodes.
array(), sub_indices);
1055 for (INT_TYPE i = 0; i <
N; ++i) {
1056 INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
1057 if (sub_nboxes > max_items_per_leaf) {
1058 INT_TYPE local_nodes_start = nodes.size();
1059 node.child[i] = Node::markInternal(local_nodes_start);
1060 nodes.bumpCapacity(local_nodes_start + 1);
1061 nodes.setSizeNoInit(local_nodes_start + 1);
1062 initNodeReorder<H>(nodes, nodes[local_nodes_start], sub_boxes[i], boxes, sub_indices[i], sub_nboxes,
1063 indices_offset+(sub_indices[i]-sub_indices[0]), max_items_per_leaf);
1070 template<BVH_Heuristic H,
typename T,u
int NAXES,
typename BOX_TYPE,
typename SRC_INT_TYPE>
1071 void BVH<N>::multiSplit(
const Box<T,NAXES>& axes_minmax,
const BOX_TYPE* boxes, SRC_INT_TYPE* indices, INT_TYPE nboxes, SRC_INT_TYPE* sub_indices[N+1],
Box<T,NAXES> sub_boxes[N]) noexcept {
1078 uint num_axes_zero = 0;
1079 for (
uint axis = 0; axis < NAXES; ++axis)
1081 const T*
v = axes_minmax[axis];
1082 num_axes_zero += (v[0] == v[1]);
1084 if (num_axes_zero != 0)
1088 multiSplit<BVH_Heuristic::BOX_AREA>(axes_minmax, boxes,
indices, nboxes, sub_indices, sub_boxes);
1091 multiSplit<BVH_Heuristic::BOX_PERIMETER>(axes_minmax, boxes,
indices, nboxes, sub_indices, sub_boxes);
1097 sub_indices[2] = indices+nboxes;
1098 split<H>(axes_minmax, boxes,
indices, nboxes, sub_indices[1], &sub_boxes[0]);
1105 SRC_INT_TYPE* sub_indices_startend[2*
N];
1107 sub_boxes_unsorted[0] = sub_boxes[0];
1108 sub_boxes_unsorted[1] = sub_boxes[1];
1109 sub_indices_startend[0] = sub_indices[0];
1110 sub_indices_startend[1] = sub_indices[1];
1111 sub_indices_startend[2] = sub_indices[1];
1112 sub_indices_startend[3] = sub_indices[2];
1113 for (INT_TYPE nsub = 2; nsub <
N; ++nsub) {
1114 SRC_INT_TYPE* selected_start = sub_indices_startend[0];
1115 SRC_INT_TYPE* selected_end = sub_indices_startend[1];
1119 for (INT_TYPE i = 0; i < nsub-1; ++i) {
1120 sub_indices_startend[2*i ] = sub_indices_startend[2*i+2];
1121 sub_indices_startend[2*i+1] = sub_indices_startend[2*i+3];
1123 for (INT_TYPE i = 0; i < nsub-1; ++i) {
1124 sub_boxes_unsorted[i] = sub_boxes_unsorted[i-1];
1128 split<H>(sub_box, boxes, selected_start, selected_end-selected_start, sub_indices_startend[2*nsub-1], &sub_boxes_unsorted[nsub]);
1129 sub_indices_startend[2*nsub-2] = selected_start;
1130 sub_indices_startend[2*nsub] = sub_indices_startend[2*nsub-1];
1131 sub_indices_startend[2*nsub+1] = selected_end;
1134 sub_indices[
N] = indices+nboxes;
1135 for (INT_TYPE i = 0; i <
N; ++i) {
1136 SRC_INT_TYPE* prev_pointer = (i != 0) ? sub_indices[i-1] :
nullptr;
1137 SRC_INT_TYPE* min_pointer =
nullptr;
1139 for (INT_TYPE
j = 0;
j <
N; ++
j) {
1140 SRC_INT_TYPE* cur_pointer = sub_indices_startend[2*
j];
1141 if ((cur_pointer > prev_pointer) && (!min_pointer || (cur_pointer < min_pointer))) {
1142 min_pointer = cur_pointer;
1143 box = sub_boxes_unsorted[
j];
1147 sub_indices[i] = min_pointer;
1154 sub_box_areas[0] = unweightedHeuristic<H>(sub_boxes[0]);
1155 sub_box_areas[1] = unweightedHeuristic<H>(sub_boxes[1]);
1156 for (INT_TYPE nsub = 2; nsub <
N; ++nsub) {
1158 INT_TYPE split_choice = INT_TYPE(-1);
1160 for (INT_TYPE i = 0; i < nsub; ++i) {
1161 const INT_TYPE index_count = (sub_indices[i+1]-sub_indices[i]);
1162 if (index_count > 1) {
1163 const T heuristic = sub_box_areas[i]*index_count;
1164 if (split_choice == INT_TYPE(-1) || heuristic > max_heuristic) {
1166 max_heuristic = heuristic;
1170 UT_ASSERT_MSG_P(split_choice != INT_TYPE(-1),
"There should always be at least one that can be split!");
1172 SRC_INT_TYPE* selected_start = sub_indices[split_choice];
1173 SRC_INT_TYPE* selected_end = sub_indices[split_choice+1];
1176 for (INT_TYPE i = nsub; i > split_choice; --i) {
1177 sub_indices[i+1] = sub_indices[i];
1179 for (INT_TYPE i = nsub-1; i > split_choice; --i) {
1180 sub_boxes[i+1] = sub_boxes[i];
1182 for (INT_TYPE i = nsub-1; i > split_choice; --i) {
1183 sub_box_areas[i+1] = sub_box_areas[i];
1187 split<H>(sub_boxes[split_choice], boxes, selected_start, selected_end-selected_start, sub_indices[split_choice+1], &sub_boxes[split_choice]);
1188 sub_box_areas[split_choice] = unweightedHeuristic<H>(sub_boxes[split_choice]);
1189 sub_box_areas[split_choice+1] = unweightedHeuristic<H>(sub_boxes[split_choice+1]);
1195 template<BVH_Heuristic H,
typename T,u
int NAXES,
typename BOX_TYPE,
typename SRC_INT_TYPE>
1196 void BVH<N>::split(
const Box<T,NAXES>& axes_minmax,
const BOX_TYPE* boxes, SRC_INT_TYPE* indices, INT_TYPE nboxes, SRC_INT_TYPE*& split_indices,
Box<T,NAXES>* split_boxes) noexcept {
1198 split_boxes[0].initBounds(boxes[indices[0]]);
1199 split_boxes[1].initBounds(boxes[indices[1]]);
1200 split_indices = indices+1;
1203 UT_ASSERT_MSG_P(nboxes > 2,
"Cases with less than 3 boxes should have already been handled!");
1215 uint num_axes_zero = 0;
1216 for (
uint axis = 0; axis < NAXES; ++axis)
1218 const T*
v = axes_minmax[axis];
1219 num_axes_zero += (v[0] == v[1]);
1221 if (num_axes_zero != 0)
1225 split<BVH_Heuristic::BOX_AREA>(axes_minmax, boxes,
indices, nboxes, split_indices, split_boxes);
1228 split<BVH_Heuristic::BOX_PERIMETER>(axes_minmax, boxes,
indices, nboxes, split_indices, split_boxes);
1233 constexpr INT_TYPE SMALL_LIMIT = 6;
1234 if (nboxes <= SMALL_LIMIT) {
1239 for (INT_TYPE box = 0; box < nboxes; ++box) {
1240 local_boxes[box].initBounds(boxes[indices[box]]);
1243 const INT_TYPE partition_limit = (INT_TYPE(1)<<(nboxes-1));
1244 INT_TYPE best_partition = INT_TYPE(-1);
1246 for (INT_TYPE partition_bits = 1; partition_bits < partition_limit; ++partition_bits) {
1248 sub_boxes[0] = local_boxes[0];
1249 sub_boxes[1].initBounds();
1250 INT_TYPE sub_counts[2] = {1,0};
1251 for (INT_TYPE bit = 0; bit < nboxes-1; ++bit) {
1252 INT_TYPE dest = (partition_bits>>bit)&1;
1253 sub_boxes[dest].combine(local_boxes[bit+1]);
1259 unweightedHeuristic<H>(sub_boxes[0])*sub_counts[0] +
1260 unweightedHeuristic<H>(sub_boxes[1])*sub_counts[1];
1262 if (best_partition == INT_TYPE(-1) || heuristic < best_heuristic) {
1264 best_partition = partition_bits;
1265 best_heuristic = heuristic;
1266 split_boxes[0] = sub_boxes[0];
1267 split_boxes[1] = sub_boxes[1];
1271 #if 0 // This isn't actually necessary with the current design, because I changed how the number of subtree nodes is determined.
1277 if (best_partition == partition_limit - 1) {
1279 SRC_INT_TYPE last_index = indices[0];
1280 SRC_INT_TYPE* dest_indices =
indices;
1281 SRC_INT_TYPE* local_split_indices = indices + nboxes-1;
1282 for (; dest_indices != local_split_indices; ++dest_indices) {
1283 dest_indices[0] = dest_indices[1];
1285 *local_split_indices = last_index;
1286 split_indices = local_split_indices;
1290 sub_boxes[0] = sub_boxes[1];
1291 sub_boxes[1] = temp_box;
1298 SRC_INT_TYPE local_indices[SMALL_LIMIT-1];
1299 for (INT_TYPE box = 0; box < nboxes-1; ++box) {
1300 local_indices[box] = indices[box+1];
1302 SRC_INT_TYPE* dest_indices = indices+1;
1303 SRC_INT_TYPE* src_indices = local_indices;
1305 for (INT_TYPE bit = 0; bit < nboxes-1; ++bit, ++src_indices) {
1306 if (!((best_partition>>bit)&1)) {
1308 *dest_indices = *src_indices;
1312 split_indices = dest_indices;
1314 src_indices = local_indices;
1315 for (INT_TYPE bit = 0; bit < nboxes-1; ++bit, ++src_indices) {
1316 if ((best_partition>>bit)&1) {
1318 *dest_indices = *src_indices;
1326 T max_axis_length = axes_minmax.vals[0][1] - axes_minmax.vals[0][0];
1327 for (
uint axis = 1; axis < NAXES; ++axis) {
1328 const T axis_length = axes_minmax.vals[axis][1] - axes_minmax.vals[axis][0];
1329 if (axis_length > max_axis_length) {
1331 max_axis_length = axis_length;
1335 if (!(max_axis_length >
T(0))) {
1338 split_indices = indices + nboxes/2;
1339 split_boxes[0] = axes_minmax;
1340 split_boxes[1] = axes_minmax;
1344 #if BVH_TRY_ALL_AXES
1346 half_box.vals[max_axis][1] = 0.5*(half_box.vals[max_axis][0] + half_box.vals[max_axis][1]);
1347 T good_heuristic = unweightedHeuristic<H>(half_box)*nboxes;
1349 const INT_TYPE axis = max_axis;
1352 constexpr INT_TYPE MID_LIMIT = 2*NSPANS;
1353 if (nboxes <= MID_LIMIT) {
1354 #if BVH_TRY_ALL_AXES
1356 split_indices =
nullptr;
1358 splitMiddleCountAxis<H>(max_axis, best_heuristic, boxes,
indices, nboxes, split_indices, split_boxes);
1361 if (best_heuristic > 1.2*good_heuristic)
1363 uint axis1 = (max_axis==0) ? 2 : (max_axis-1);
1364 splitMiddleCountAxis<H>(axis1, best_heuristic, boxes,
indices, nboxes, split_indices, split_boxes);
1366 uint axis2 = (axis1==0) ? 2 : (axis1-1);
1367 splitMiddleCountAxis<H>(axis2, best_heuristic, boxes,
indices, nboxes, split_indices, split_boxes);
1376 INT_TYPE best_left_count_split;
1377 INT_TYPE maxaxis_first_left_count = -1;
1378 INT_TYPE maxaxis_last_left_count = -1;
1381 INT_TYPE best_axis = -1;
1382 INT_TYPE best_split_index = -1;
1383 splitLargeCountAxis<H>(max_axis, axes_minmax.vals[max_axis][0], max_axis_length,
1384 best_heuristic, best_axis, best_split_index,
1385 best_left_split_box, best_right_split_box, best_left_count_split,
1386 maxaxis_first_left_count, maxaxis_last_left_count,
1390 if (best_heuristic > 1.2*good_heuristic)
1392 uint axis1 = (max_axis==0) ? 2 : (max_axis-1);
1393 splitLargeCountAxis<H>(axis1, axes_minmax.vals[axis1][0], axes_minmax.vals[axis1][1] - axes_minmax.vals[axis1][0],
1394 best_heuristic, best_axis, best_split_index,
1395 best_left_split_box, best_right_split_box, best_left_count_split,
1396 maxaxis_first_left_count, maxaxis_last_left_count,
1399 uint axis2 = (axis1==0) ? 2 : (axis1-1);
1400 splitLargeCountAxis<H>(axis2, axes_minmax.vals[axis2][0], axes_minmax.vals[axis2][1] - axes_minmax.vals[axis2][0],
1401 best_heuristic, best_axis, best_split_index,
1402 best_left_split_box, best_right_split_box, best_left_count_split,
1403 maxaxis_first_left_count, maxaxis_last_left_count,
1407 SRC_INT_TYPE*
const indices_end = indices+nboxes;
1409 if (best_split_index == -1) {
1412 const INT_TYPE min_count = nboxes/MIN_FRACTION;
1413 const INT_TYPE max_count = ((MIN_FRACTION-1)*
uint64(nboxes))/MIN_FRACTION;
1422 SRC_INT_TYPE* nth_index;
1423 if (maxaxis_first_left_count > max_count) {
1425 nth_index = indices+max_count;
1428 else if (maxaxis_last_left_count < min_count) {
1430 nth_index = indices+min_count;
1435 nth_index = indices+nboxes/2;
1445 nthElement<T>(boxes,
indices,indices+nboxes,max_axis,nth_index);
1447 split_indices = nth_index;
1449 for (SRC_INT_TYPE* left_indices = indices+1; left_indices < nth_index; ++left_indices) {
1450 left_box.combine(boxes[*left_indices]);
1453 for (SRC_INT_TYPE* right_indices = nth_index+1; right_indices < indices_end; ++right_indices) {
1454 right_box.combine(boxes[*right_indices]);
1456 split_boxes[0] = left_box;
1457 split_boxes[1] = right_box;
1461 const T best_axis_min_x2 = center_scale*axes_minmax.vals[best_axis][0];
1462 const T best_axis_length = axes_minmax.vals[best_axis][1] - axes_minmax.vals[best_axis][0];
1463 const T pivotx2 = best_axis_min_x2 + (best_split_index+1)*best_axis_length/(NSPANS/center_scale);
1464 SRC_INT_TYPE* ppivot_start;
1465 SRC_INT_TYPE* ppivot_end;
1466 partitionByCentre(boxes,indices,indices+nboxes,best_axis,pivotx2,ppivot_start,ppivot_end);
1468 split_indices = indices + best_left_count_split;
1473 if (split_indices >= ppivot_start && split_indices <= ppivot_end) {
1474 split_boxes[0] = best_left_split_box;
1475 split_boxes[1] = best_right_split_box;
1480 if (split_indices < ppivot_start) {
1481 split_indices = ppivot_start;
1484 split_indices = ppivot_end;
1488 if (split_indices == indices) {
1491 else if (split_indices == indices_end) {
1496 for (SRC_INT_TYPE* left_indices = indices+1; left_indices < split_indices; ++left_indices) {
1497 left_box.combine(boxes[*left_indices]);
1500 for (SRC_INT_TYPE* right_indices = split_indices+1; right_indices < indices_end; ++right_indices) {
1501 right_box.combine(boxes[*right_indices]);
1503 split_boxes[0] = left_box;
1504 split_boxes[1] = right_box;
1509 template<BVH_Heuristic H,
typename T,u
int NAXES,
typename BOX_TYPE,
typename SRC_INT_TYPE>
1510 void BVH<N>::splitMiddleCountAxis(
const INT_TYPE axis, T &best_heuristic,
const BOX_TYPE* boxes, SRC_INT_TYPE* indices, INT_TYPE nboxes, SRC_INT_TYPE*& split_indices,
Box<T,NAXES>* split_boxes) noexcept {
1515 #if BVH_TRY_ALL_AXES
1516 constexpr INT_TYPE MID_LIMIT = 2*NSPANS;
1521 T midpointsx2[MID_LIMIT];
1522 for (INT_TYPE i = 0; i < nboxes; ++i) {
1523 midpointsx2[i] =
utBoxCenter(boxes[indices[i]], axis);
1525 SRC_INT_TYPE local_indices[MID_LIMIT];
1526 for (INT_TYPE i = 0; i < nboxes; ++i) {
1527 local_indices[i] = i;
1530 const INT_TYPE chunk_starts[5] = {0, nboxes/4, nboxes/2, INT_TYPE((3*
uint64(nboxes))/4), nboxes};
1533 for (INT_TYPE chunk = 0; chunk < 4; ++chunk) {
1534 const INT_TYPE
start = chunk_starts[chunk];
1535 const INT_TYPE end = chunk_starts[chunk+1];
1536 for (INT_TYPE i = start+1; i <
end; ++i) {
1537 SRC_INT_TYPE indexi = local_indices[i];
1538 T vi = midpointsx2[indexi];
1539 for (INT_TYPE
j = start;
j < i; ++
j) {
1540 SRC_INT_TYPE indexj = local_indices[
j];
1541 T vj = midpointsx2[indexj];
1544 local_indices[
j] = indexi;
1548 local_indices[
j] = indexi;
1551 indexj = local_indices[
j];
1559 SRC_INT_TYPE local_indices_temp[MID_LIMIT];
1560 std::merge(local_indices, local_indices+chunk_starts[1],
1561 local_indices+chunk_starts[1], local_indices+chunk_starts[2],
1562 local_indices_temp, [&midpointsx2](
const SRC_INT_TYPE
a,
const SRC_INT_TYPE
b)->
bool {
1563 return midpointsx2[
a] < midpointsx2[
b];
1565 std::merge(local_indices+chunk_starts[2], local_indices+chunk_starts[3],
1566 local_indices+chunk_starts[3], local_indices+chunk_starts[4],
1567 local_indices_temp+chunk_starts[2], [&midpointsx2](
const SRC_INT_TYPE a,
const SRC_INT_TYPE b)->
bool {
1568 return midpointsx2[
a] < midpointsx2[
b];
1570 std::merge(local_indices_temp, local_indices_temp+chunk_starts[2],
1571 local_indices_temp+chunk_starts[2], local_indices_temp+chunk_starts[4],
1572 local_indices, [&midpointsx2](
const SRC_INT_TYPE a,
const SRC_INT_TYPE b)->
bool {
1573 return midpointsx2[
a] < midpointsx2[
b];
1577 for (INT_TYPE i = 0; i < nboxes; ++i) {
1578 local_indices[i] = indices[local_indices[i]];
1580 #if !BVH_TRY_ALL_AXES
1582 for (INT_TYPE i = 0; i < nboxes; ++i) {
1583 indices[i] = local_indices[i];
1587 std::stable_sort(indices, indices+nboxes, [boxes,max_axis](SRC_INT_TYPE a, SRC_INT_TYPE b)->
bool {
1595 const INT_TYPE nsplits = nboxes-1;
1597 left_boxes[0] = box_accumulator;
1598 for (INT_TYPE i = 1; i < nsplits; ++i) {
1599 box_accumulator.combine(boxes[local_indices[i]]);
1600 left_boxes[i] = box_accumulator;
1602 box_accumulator.initBounds(boxes[local_indices[nsplits-1]]);
1603 right_boxes[nsplits-1] = box_accumulator;
1604 for (INT_TYPE i = nsplits-1; i > 0; --i) {
1605 box_accumulator.combine(boxes[local_indices[i]]);
1606 right_boxes[i-1] = box_accumulator;
1609 INT_TYPE best_split = 0;
1610 T best_local_heuristic =
1611 unweightedHeuristic<H>(left_boxes[0]) +
1612 unweightedHeuristic<H>(right_boxes[0])*(nboxes-1);
1615 unweightedHeuristic<H>(left_boxes[
split])*(
split+1) +
1616 unweightedHeuristic<H>(right_boxes[
split])*(nboxes-(
split+1));
1617 if (heuristic < best_local_heuristic) {
1619 best_local_heuristic = heuristic;
1623 #if BVH_TRY_ALL_AXES
1624 if (!split_indices || best_local_heuristic < best_heuristic) {
1626 for (INT_TYPE i = 0; i < nboxes; ++i) {
1627 indices[i] = local_indices[i];
1629 split_indices = indices+best_split+1;
1630 split_boxes[0] = left_boxes[best_split];
1631 split_boxes[1] = right_boxes[best_split];
1632 best_heuristic = best_local_heuristic;
1635 split_indices = indices+best_split+1;
1636 split_boxes[0] = left_boxes[best_split];
1637 split_boxes[1] = right_boxes[best_split];
1641 #if BVH_TRY_ALL_AXES
1645 template<BVH_Heuristic H,
typename T,u
int NAXES,
typename BOX_TYPE,
typename SRC_INT_TYPE>
1646 void BVH<N>::splitLargeCountAxis(
1647 const INT_TYPE axis,
const T axis_min,
const T axis_length,
1648 T &best_heuristic, INT_TYPE &best_axis, INT_TYPE &best_split_index,
1651 INT_TYPE &best_left_count_split,
1652 INT_TYPE &maxaxis_first_left_count,
1653 INT_TYPE &maxaxis_last_left_count,
1654 const BOX_TYPE* boxes, SRC_INT_TYPE* indices, INT_TYPE nboxes
1657 const T axis_min = axes_minmax.vals[max_axis][0];
1658 const T axis_length = max_axis_length;
1661 for (INT_TYPE i = 0; i < NSPANS; ++i) {
1662 span_boxes[i].initBounds();
1664 INT_TYPE span_counts[NSPANS];
1665 for (INT_TYPE i = 0; i < NSPANS; ++i) {
1672 constexpr INT_TYPE BOX_SPANS_PARALLEL_THRESHOLD = 2048;
1673 INT_TYPE ntasks = 1;
1674 if (nboxes >= BOX_SPANS_PARALLEL_THRESHOLD) {
1676 ntasks = (nprocessors > 1) ?
SYSmin(4*nprocessors, nboxes/(BOX_SPANS_PARALLEL_THRESHOLD/2)) : 1;
1679 for (INT_TYPE indexi = 0; indexi < nboxes; ++indexi) {
1680 const auto& box = boxes[indices[indexi]];
1682 const uint span_index =
SYSclamp(
int((sum-axis_min_x2)*axis_index_scale),
int(0),
int(NSPANS-1));
1683 ++span_counts[span_index];
1685 span_box.combine(box);
1691 parallel_boxes.
setSize(NSPANS*ntasks);
1693 parallel_counts.
setSize(NSPANS*ntasks);
1694 UTparallelFor(
UT_BlockedRange<INT_TYPE>(0,ntasks), [¶llel_boxes,¶llel_counts,ntasks,boxes,nboxes,indices,axis,axis_min_x2,axis_index_scale](
const UT_BlockedRange<INT_TYPE>& r) {
1695 for (INT_TYPE taski = r.begin(), end = r.end(); taski <
end; ++taski) {
1697 for (INT_TYPE i = 0; i < NSPANS; ++i) {
1698 span_boxes[i].initBounds();
1700 INT_TYPE span_counts[NSPANS];
1701 for (INT_TYPE i = 0; i < NSPANS; ++i) {
1704 const INT_TYPE startbox = (taski*
uint64(nboxes))/ntasks;
1705 const INT_TYPE endbox = ((taski+1)*
uint64(nboxes))/ntasks;
1706 for (INT_TYPE indexi = startbox; indexi != endbox; ++indexi) {
1707 const auto& box = boxes[indices[indexi]];
1709 const uint span_index =
SYSclamp(
int((sum-axis_min_x2)*axis_index_scale),
int(0),
int(NSPANS-1));
1710 ++span_counts[span_index];
1712 span_box.combine(box);
1715 const INT_TYPE dest_array_start = taski*NSPANS;
1716 for (INT_TYPE i = 0; i < NSPANS; ++i) {
1717 parallel_boxes[dest_array_start+i] = span_boxes[i];
1719 for (INT_TYPE i = 0; i < NSPANS; ++i) {
1720 parallel_counts[dest_array_start+i] = span_counts[i];
1726 for (INT_TYPE taski = 0; taski < ntasks; ++taski) {
1727 const INT_TYPE dest_array_start = taski*NSPANS;
1728 for (INT_TYPE i = 0; i < NSPANS; ++i) {
1729 span_boxes[i].combine(parallel_boxes[dest_array_start+i]);
1732 for (INT_TYPE taski = 0; taski < ntasks; ++taski) {
1733 const INT_TYPE dest_array_start = taski*NSPANS;
1734 for (INT_TYPE i = 0; i < NSPANS; ++i) {
1735 span_counts[i] += parallel_counts[dest_array_start+i];
1747 left_boxes[0] = box_accumulator;
1748 for (INT_TYPE i = 1; i < NSPLITS; ++i) {
1749 box_accumulator.combine(span_boxes[i]);
1750 left_boxes[i] = box_accumulator;
1752 box_accumulator = span_boxes[NSPANS-1];
1753 right_boxes[NSPLITS-1] = box_accumulator;
1754 for (INT_TYPE i = NSPLITS-1; i > 0; --i) {
1755 box_accumulator.combine(span_boxes[i]);
1756 right_boxes[i-1] = box_accumulator;
1759 INT_TYPE left_counts[NSPLITS];
1762 INT_TYPE count_accumulator = span_counts[0];
1763 left_counts[0] = count_accumulator;
1764 for (INT_TYPE spliti = 1; spliti < NSPLITS; ++spliti) {
1765 count_accumulator += span_counts[spliti];
1766 left_counts[spliti] = count_accumulator;
1770 const INT_TYPE min_count = nboxes/MIN_FRACTION;
1771 UT_ASSERT_MSG_P(min_count > 0,
"MID_LIMIT above should have been large enough that nboxes would be > MIN_FRACTION");
1772 const INT_TYPE max_count = ((MIN_FRACTION-1)*
uint64(nboxes))/MIN_FRACTION;
1773 UT_ASSERT_MSG_P(max_count < nboxes,
"I'm not sure how this could happen mathematically, but it needs to be checked.");
1774 T smallest_heuristic = std::numeric_limits<T>::infinity();
1775 INT_TYPE split_index = -1;
1776 for (INT_TYPE spliti = 0; spliti < NSPLITS; ++spliti) {
1777 const INT_TYPE left_count = left_counts[spliti];
1778 if (left_count < min_count || left_count > max_count) {
1781 const INT_TYPE right_count = nboxes-left_count;
1783 left_count*unweightedHeuristic<H>(left_boxes[spliti]) +
1784 right_count*unweightedHeuristic<H>(right_boxes[spliti]);
1785 if (heuristic < smallest_heuristic) {
1786 smallest_heuristic = heuristic;
1787 split_index = spliti;
1791 #if BVH_TRY_ALL_AXES
1792 if (maxaxis_first_left_count == -1) {
1793 maxaxis_first_left_count = left_counts[0];
1794 maxaxis_last_left_count = left_counts[NSPLITS-1];
1796 if (split_index == -1) {
1800 if (best_axis < 0 || smallest_heuristic < best_heuristic) {
1802 best_split_index = split_index;
1803 best_heuristic = smallest_heuristic;
1804 best_left_split_box = left_boxes[split_index];
1805 best_right_split_box = right_boxes[split_index];
1806 best_left_count_split = left_counts[split_index];
1810 SRC_INT_TYPE*
const indices_end = indices+nboxes;
1812 if (split_index == -1) {
1822 SRC_INT_TYPE* nth_index;
1823 if (left_counts[0] > max_count) {
1825 nth_index = indices+max_count;
1828 else if (left_counts[NSPLITS-1] < min_count) {
1830 nth_index = indices+min_count;
1835 nth_index = indices+nboxes/2;
1845 nthElement<T>(boxes,
indices,indices+nboxes,max_axis,nth_index);
1847 split_indices = nth_index;
1849 for (SRC_INT_TYPE* left_indices = indices+1; left_indices < nth_index; ++left_indices) {
1850 left_box.combine(boxes[*left_indices]);
1853 for (SRC_INT_TYPE* right_indices = nth_index+1; right_indices < indices_end; ++right_indices) {
1854 right_box.combine(boxes[*right_indices]);
1856 split_boxes[0] = left_box;
1857 split_boxes[1] = right_box;
1861 SRC_INT_TYPE* ppivot_start;
1862 SRC_INT_TYPE* ppivot_end;
1863 partitionByCentre(boxes,indices,indices+nboxes,max_axis,pivotx2,ppivot_start,ppivot_end);
1865 split_indices = indices + left_counts[split_index];
1870 if (split_indices >= ppivot_start && split_indices <= ppivot_end) {
1871 split_boxes[0] = left_boxes[split_index];
1872 split_boxes[1] = right_boxes[split_index];
1877 if (split_indices < ppivot_start) {
1878 split_indices = ppivot_start;
1881 split_indices = ppivot_end;
1885 if (split_indices == indices) {
1888 else if (split_indices == indices_end) {
1893 for (SRC_INT_TYPE* left_indices = indices+1; left_indices < split_indices; ++left_indices) {
1894 left_box.combine(boxes[*left_indices]);
1897 for (SRC_INT_TYPE* right_indices = split_indices+1; right_indices < indices_end; ++right_indices) {
1898 right_box.combine(boxes[*right_indices]);
1900 split_boxes[0] = left_box;
1901 split_boxes[1] = right_box;
1907 template<u
int PARALLEL_THRESHOLD,
typename SRC_INT_TYPE>
1908 void BVH<N>::adjustParallelChildNodes(INT_TYPE nparallel,
UT_Array<Node>& nodes,
Node& node,
UT_Array<Node>* parallel_nodes, SRC_INT_TYPE* sub_indices) noexcept
1911 INT_TYPE counted_parallel = 0;
1912 INT_TYPE childi = 0;
1913 for (INT_TYPE taski = r.begin(), end = r.end(); taski <
end; ++taski) {
1915 INT_TYPE sub_nboxes;
1916 for (; childi <
N; ++childi) {
1917 sub_nboxes = sub_indices[childi+1]-sub_indices[childi];
1918 if (sub_nboxes >= PARALLEL_THRESHOLD) {
1919 if (counted_parallel == taski) {
1927 const UT_Array<Node>& local_nodes = parallel_nodes[counted_parallel];
1928 INT_TYPE n = local_nodes.
size();
1929 INT_TYPE local_nodes_start = Node::getInternalNum(node.child[childi])+1;
1933 for (INT_TYPE
j = 0;
j <
n; ++
j) {
1934 Node local_node = local_nodes[
j];
1935 for (INT_TYPE childj = 0; childj <
N; ++childj) {
1936 INT_TYPE local_child = local_node.child[childj];
1937 if (Node::isInternal(local_child) && local_child != Node::EMPTY) {
1938 local_child += local_nodes_start;
1939 local_node.child[childj] = local_child;
1942 nodes[local_nodes_start+
j] = local_node;
1949 template<
typename T,
typename BOX_TYPE,
typename SRC_INT_TYPE>
1950 void BVH<N>::nthElement(
const BOX_TYPE* boxes, SRC_INT_TYPE* indices,
const SRC_INT_TYPE* indices_end,
const uint axis, SRC_INT_TYPE*
const nth) noexcept {
1955 utBoxCenter(boxes[indices[(indices_end-indices)/2]], axis),
1958 if (pivots[0] < pivots[1]) {
1959 const T temp = pivots[0];
1960 pivots[0] = pivots[1];
1963 if (pivots[0] < pivots[2]) {
1964 const T temp = pivots[0];
1965 pivots[0] = pivots[2];
1968 if (pivots[1] < pivots[2]) {
1969 const T temp = pivots[1];
1970 pivots[1] = pivots[2];
1973 T mid_pivotx2 = pivots[1];
1976 if (mid_pivotx2 < min_pivotx2) {
1977 mid_pivotx2 = min_pivotx2;
1979 else if (mid_pivotx2 > max_pivotx2) {
1980 mid_pivotx2 = max_pivotx2;
1983 SRC_INT_TYPE* pivot_start;
1984 SRC_INT_TYPE* pivot_end;
1985 partitionByCentre(boxes,indices,indices_end,axis,mid_pivotx2,pivot_start,pivot_end);
1986 if (nth < pivot_start) {
1987 indices_end = pivot_start;
1989 else if (nth < pivot_end) {
1995 indices = pivot_end;
1997 if (indices_end <= indices+1) {
2004 template<
typename T,
typename BOX_TYPE,
typename SRC_INT_TYPE>
2005 void BVH<N>::partitionByCentre(
const BOX_TYPE* boxes, SRC_INT_TYPE*
const indices,
const SRC_INT_TYPE*
const indices_end,
const uint axis,
const T pivotx2, SRC_INT_TYPE*& ppivot_start, SRC_INT_TYPE*& ppivot_end) noexcept {
2009 SRC_INT_TYPE* pivot_start =
indices;
2011 SRC_INT_TYPE* pivot_end =
indices;
2014 for (SRC_INT_TYPE* psrc_index = indices; psrc_index != indices_end; ++psrc_index) {
2015 const T srcsum =
utBoxCenter(boxes[*psrc_index], axis);
2016 if (srcsum < pivotx2) {
2017 if (psrc_index != pivot_start) {
2018 if (pivot_start == pivot_end) {
2020 const SRC_INT_TYPE temp = *psrc_index;
2021 *psrc_index = *pivot_start;
2022 *pivot_start = temp;
2026 const SRC_INT_TYPE temp = *psrc_index;
2027 *psrc_index = *pivot_end;
2028 *pivot_end = *pivot_start;
2029 *pivot_start = temp;
2035 else if (srcsum == pivotx2) {
2037 if (psrc_index != pivot_end) {
2038 const SRC_INT_TYPE temp = *psrc_index;
2039 *psrc_index = *pivot_end;
2045 ppivot_start = pivot_start;
2046 ppivot_end = pivot_end;
2051 void BVH<N>::debugDump()
const {
2063 INT_TYPE cur_nodei = stack[stack.
size()-2];
2064 INT_TYPE cur_i = stack[stack.
size()-1];
2072 ++stack[stack.
size()-1];
2074 INT_TYPE child_nodei = cur_node.child[cur_i];
2075 if (Node::isInternal(child_nodei)) {
2076 if (child_nodei == Node::EMPTY) {
2083 INT_TYPE internal_node = Node::getInternalNum(child_nodei);
2086 stack.
append(internal_node);
2098 template<u
int BVH_N,
typename ITEM_BOX,
typename NODE_BOX>
2101 struct NodeBoxFunctors
2103 NODE_BOX *myNodeData;
2105 const ITEM_BOX *myItemBoxes;
2108 NODE_BOX *node_data,
2110 const ITEM_BOX *item_boxes)
2111 : myNodeData(node_data)
2113 , myItemBoxes(item_boxes)
2115 constexpr
SYS_FORCE_INLINE bool pre(
const int nodei, ITEM_BOX *data_for_parent)
const
2119 void SYS_FORCE_INLINE item(
const int itemi,
const int parent_nodei, ITEM_BOX &data_for_parent)
const
2121 data_for_parent = myItemBoxes[itemi];
2124 void post(
const int nodei,
const int parent_nodei, ITEM_BOX *data_for_parent,
const int nchildren,
const ITEM_BOX *child_data_array)
const
2126 ITEM_BOX box(child_data_array[0]);
2127 for (
int i = 1; i < nchildren; ++i)
2128 box.combine(child_data_array[i]);
2130 if (data_for_parent)
2131 *data_for_parent = box;
2133 NODE_BOX ¤t_node_data = myNodeData[nodei];
2134 current_node_data = box;
2138 constexpr
uint PARALLEL_THRESHOLD = 4096;
2139 NodeBoxFunctors functors(node_boxes, item_boxes);
2140 bvh.template traverseParallel<ITEM_BOX>(PARALLEL_THRESHOLD, functors);
2143 template<u
int NAXES,
typename T,u
int BVH_N,
typename ITEM_BOX,
typename NODE_BOX>
2146 struct NodeBoxFunctors
2148 NODE_BOX *
const myNodeData;
2150 const ITEM_BOX *
const myItemBoxes;
2151 const float myExpandFactor;
2154 NODE_BOX *node_data,
2156 const ITEM_BOX *item_boxes,
2157 const float expand_factor=0.0
f)
2158 : myNodeData(node_data)
2160 , myItemBoxes(item_boxes)
2161 , myExpandFactor(expand_factor)
2163 constexpr
SYS_FORCE_INLINE bool pre(
const int nodei, ITEM_BOX *data_for_parent)
const
2167 void SYS_FORCE_INLINE item(
const int itemi,
const int parent_nodei, ITEM_BOX &data_for_parent)
const
2169 data_for_parent = myItemBoxes[itemi];
2172 void post(
const int nodei,
const int parent_nodei, ITEM_BOX *data_for_parent,
const int nchildren,
const ITEM_BOX *child_data_array)
const
2174 ITEM_BOX box(child_data_array[0]);
2175 for (
int i = 1; i < nchildren; ++i)
2176 box.combine(child_data_array[i]);
2178 if (data_for_parent)
2179 *data_for_parent = box;
2181 NODE_BOX ¤t_node_data = myNodeData[nodei];
2185 if (!myExpandFactor)
2187 for (
int i = 0; i < nchildren; ++i)
2189 const ITEM_BOX &local_box = child_data_array[i];
2190 for (
int axis = 0; axis < NAXES; ++axis)
2192 ((
T*)¤t_node_data.vals[axis][0])[i] = local_box.vals[axis][0];
2193 ((
T*)¤t_node_data.vals[axis][1])[i] = local_box.vals[axis][1];
2199 for (
int i = 0; i < nchildren; ++i)
2201 const ITEM_BOX &local_box = child_data_array[i];
2202 for (
int axis = 0; axis < NAXES; ++axis)
2204 float minv = local_box.vals[axis][0];
2205 float maxv = local_box.vals[axis][1];
2210 ((
int32*)local_box.vals[axis])[0] & 0x7F800000,
2211 ((
int32*)local_box.vals[axis])[1] & 0x7F800000) - ((23-1)<<23));
2212 float diff = myExpandFactor*(maxv-minv) + (*(
fpreal32*)&epsilon);
2215 ((
T*)¤t_node_data.vals[axis][0])[i] = minv;
2216 ((
T*)¤t_node_data.vals[axis][1])[i] = maxv;
2220 for (
int i = nchildren; i < BVH_N; ++i)
2223 for (
int axis = 0; axis < NAXES; ++axis)
2237 constexpr
uint PARALLEL_THRESHOLD = 64;
2238 NodeBoxFunctors functors(node_boxes, item_boxes, expand_factor);
2239 bvh.template traverseParallel<ITEM_BOX>(PARALLEL_THRESHOLD, functors);
2242 template<
typename T,u
int NAXES,
bool PARALLEL,
typename INT_TYPE0,
typename DATA,
typename INT_TYPE>
2245 INT_TYPE next_node_id,
2249 const auto &node =
data.myRoot[nodei];
2251 static constexpr
uint N = 4;
2256 INT_TYPE next_nodes[
N];
2258 INT_TYPE nchildren =
N;
2259 INT_TYPE nparallel = 0;
2262 for (INT_TYPE s = N; s --> 0; )
2264 const INT_TYPE node_int = node.child[
s];
2265 if (node_int == Node::EMPTY)
2270 next_nodes[
s] = next_node_id;
2271 if (Node::isInternal(node_int))
2275 INT_TYPE child_node_id = Node::getInternalNum(node_int);
2276 nnodes[
s] = next_node_id - child_node_id;
2277 next_node_id = child_node_id;
2283 nparallel += (nnodes[
s] >=
data.myParallelThreshold);
2288 LOCAL_DATA local_data[
N];
2290 auto&& item_functor = [](
const DATA&
data, LOCAL_DATA& local_data, INT_TYPE
s, INT_TYPE nodei, INT_TYPE node_int) {
2294 INT_TYPE0
index = data.myIndices ? data.myIndices[node_int] : INT_TYPE0(node_int);
2295 local_data.initBounds(data.myItemBoxes[index]);
2300 if (data.myIndices) {
2301 index0 = data.myIndices[node_int];
2302 index1 = data.myIndices[node_int+1];
2305 index0 = INT_TYPE0(node_int);
2306 index1 = INT_TYPE0(node_int+1);
2308 local_data.initBoundsUnordered(data.myItemBoxes[index0], data.myItemBoxes[index1]);
2309 const INT_TYPE end_node_int = node_int+
count;
2311 for (; node_int < end_node_int; ++node_int) {
2312 const INT_TYPE0
index = data.myIndices ? data.myIndices[node_int] : INT_TYPE0(node_int);
2313 local_data.combine(data.myItemBoxes[index]);
2318 if (nparallel >= 2) {
2320 if (nparallel < nchildren) {
2321 for (INT_TYPE s = 0; s <
N; ++
s) {
2322 if (nnodes[s] >= data.myParallelThreshold) {
2325 const INT_TYPE node_int = node.child[
s];
2326 if (Node::isInternal(node_int)) {
2327 if (node_int == Node::EMPTY) {
2332 utCreateBVHInterleavedBoxesHelper<T,NAXES,false,INT_TYPE0>(Node::getInternalNum(node_int), INT_TYPE(-1),
data, &local_data[
s]);
2335 item_functor(data, local_data[s], s, nodei, node_int);
2341 for (INT_TYPE taski = r.begin(); taski < r.end(); ++taski) {
2342 INT_TYPE parallel_count = 0;
2346 for (s = 0; s <
N; ++
s) {
2347 if (nnodes[s] < data.myParallelThreshold) {
2350 if (parallel_count == taski) {
2355 const INT_TYPE node_int = node.child[
s];
2356 UT_ASSERT_MSG_P(Node::isInternal(node_int),
"item_functor is fairly fast, so we don't support having leaves run in parallel, unlike traverseParallel");
2357 UT_ASSERT_MSG_P(node_int != Node::EMPTY,
"Empty entries should have been excluded above.");
2358 utCreateBVHInterleavedBoxesHelper<T,NAXES,true,INT_TYPE0>(Node::getInternalNum(node_int), next_nodes[
s],
data, &local_data[
s]);
2364 for (INT_TYPE s = 0; s <
N; ++
s) {
2365 const INT_TYPE node_int = node.child[
s];
2366 if (Node::isInternal(node_int)) {
2367 if (node_int == Node::EMPTY) {
2372 utCreateBVHInterleavedBoxesHelper<T,NAXES,false,INT_TYPE0>(Node::getInternalNum(node_int), INT_TYPE(-1),
data, &local_data[
s]);
2375 item_functor(data, local_data[s], s, nodei, node_int);
2381 for (
int i = 1; i < nchildren; ++i)
2384 if (data_for_parent)
2385 *data_for_parent = box;
2387 auto ¤t_node_data = data.myNodeData[nodei];
2391 for (
int i = 0; i < nchildren; ++i)
2394 for (
int axis = 0; axis < NAXES; ++axis)
2396 ((
T*)¤t_node_data.vals[axis][0])[i] = local_box.
vals[axis][0];
2397 ((
T*)¤t_node_data.vals[axis][1])[i] = local_box.
vals[axis][1];
2400 for (
int i = nchildren; i <
N; ++i)
2403 for (
int axis = 0; axis < NAXES; ++axis)
2414 template<u
int NAXES,
typename T,
typename ITEM_BOX,
typename NODE_BOX,
typename INT_TYPE0>
2417 const ITEM_BOX* item_boxes,
2418 NODE_BOX* node_boxes,
2419 const v4uu* node_nitems,
2420 const INT_TYPE0* indices_mapping) noexcept
2422 if (!bvh.getNodes())
2425 static constexpr
uint PARALLEL_THRESHOLD = 4096;
2429 uint myParallelThreshold;
2430 const INT_TYPE0* myIndices;
2431 const ITEM_BOX* myItemBoxes;
2432 const v4uu* myNodeNItems;
2433 NODE_BOX *myNodeData;
2437 data.myRoot = bvh.getNodes();
2438 data.myParallelThreshold = PARALLEL_THRESHOLD;
2439 data.myIndices = indices_mapping;
2440 data.myItemBoxes = item_boxes;
2441 data.myNodeNItems = node_nitems;
2442 data.myNodeData = node_boxes;
2445 utCreateBVHInterleavedBoxesHelper<T,NAXES,true,INT_TYPE0>(
uint(0), bvh.getNumNodes(),
data, &box);
2448 template<u
int NAXES,
typename INT_TYPE>
2456 const uint nnodes = bvh.getNumNodes();
2471 template<u
int NAXES,
typename INT_TYPE>
2481 const NodeType *nodes = bvh.getNodes();
2483 auto stack_ptr = stack.
data();
2487 auto box_ptr = box_indices.data();
2488 auto box_end = box_ptr + box_indices.capacity();
2489 box_ptr += box_indices.entries();
2491 const BoxType query_boxes(query_box);
2496 uint current_node = stack_ptr[stack_entries];
2500 const BoxType &box = node_boxes[current_node];
2502 BoxType intersection;
2503 box.intersect(query_boxes, intersection);
2505 v4uu hit_mask = (intersection[0][0] <= intersection[0][1]);
2506 for (
uint axis = 1; axis < NAXES; ++axis)
2508 hit_mask &= (intersection[axis][0] <= intersection[axis][1]);
2513 const NodeType &node = nodes[current_node];
2514 uint local_index = SYSfirstBitSet(hit_bits);
2515 current_node = node.child[local_index];
2516 if (!(hit_bits & (hit_bits-1)))
2519 if (NodeType::isInternal(current_node))
2521 current_node = NodeType::getInternalNum(current_node);
2525 if (box_ptr >= box_end)
2527 box_indices.setSizeNoInit(box_ptr-box_indices.data());
2528 box_indices.bumpCapacity(box_end-box_indices.data()+1);
2530 box_ptr = box_indices.data();
2531 box_end = box_ptr + box_indices.capacity();
2532 box_ptr += box_indices.entries();
2534 *box_ptr++ = current_node;
2539 if (NodeType::isInternal(current_node))
2542 current_node = NodeType::getInternalNum(current_node);
2548 if (box_ptr >= box_end)
2550 box_indices.setSizeNoInit(box_ptr-box_indices.data());
2551 box_indices.bumpCapacity(box_end-box_indices.data()+1);
2553 box_ptr = box_indices.data();
2554 box_end = box_ptr + box_indices.capacity();
2555 box_ptr += box_indices.entries();
2557 *box_ptr++ = current_node;
2561 hit_bits &= (
uint(-2)<<local_index);
2565 local_index = SYSfirstBitSet(hit_bits);
2566 uint local_node = node.child[local_index];
2567 if (NodeType::isInternal(local_node))
2569 local_node = NodeType::getInternalNum(local_node);
2571 current_node = local_node;
2574 if (stack_entries >= stack_capacity)
2579 stack_ptr = stack.
data();
2581 stack_ptr[stack_entries++] = local_node;
2587 if (box_ptr >= box_end)
2589 box_indices.setSizeNoInit(box_ptr-box_indices.data());
2590 box_indices.bumpCapacity(box_end-box_indices.data()+1);
2592 box_ptr = box_indices.data();
2593 box_end = box_ptr + box_indices.capacity();
2594 box_ptr += box_indices.entries();
2596 *box_ptr++ = local_node;
2600 hit_bits &= (
uint(-2)<<local_index);
2608 }
while (stack_entries);
2610 box_indices.setSizeNoInit(box_ptr - box_indices.data());
2613 template<u
int NAXES,
typename INT_TYPE>
2623 const NodeType *nodes = bvh.getNodes();
2624 const uint nnodes = bvh.getNumNodes();
2632 const BoxType query_boxes(query_box);
2634 v4uf queryboxsize =
v4uf(query_box.axis_sum());
2638 auto stack_ptr = stack.
data();
2640 exint stack_entries = 0;
2642 if (stack_entries >= stack_capacity)
2646 stack_ptr = stack.
data();
2648 stack_ptr[stack_entries++] = 0;
2650 auto box_ptr = node_indices.data();
2651 auto box_end = box_ptr + node_indices.capacity();
2652 box_ptr += node_indices.entries();
2657 uint current_node = stack_ptr[stack_entries];
2661 const NodeType &node = nodes[current_node];
2662 v4uu nodechild = *((
const v4uu *)node.child);
2664 int internalmask = signbits(nodechild);
2666 if (internalmask != 0xf)
2670 if (box_ptr >= box_end)
2672 node_indices.setSizeNoInit(box_ptr-node_indices.data());
2673 node_indices.bumpCapacity(box_end-node_indices.data()+1);
2675 box_ptr = node_indices.data();
2676 box_end = box_ptr + node_indices.capacity();
2677 box_ptr += node_indices.entries();
2679 *box_ptr++ = current_node;
2685 const BoxType &box = node_boxes[current_node];
2688 v4uf boxsize = box.axis_sum();
2690 int bigenough = signbits(boxsize > queryboxsize);
2691 if ((negative | bigenough) != 0xf)
2696 if (box_ptr >= box_end)
2698 node_indices.setSizeNoInit(box_ptr-node_indices.data());
2699 node_indices.bumpCapacity(box_end-node_indices.data()+1);
2701 box_ptr = node_indices.data();
2702 box_end = box_ptr + node_indices.capacity();
2703 box_ptr += node_indices.entries();
2705 *box_ptr++ = current_node;
2709 BoxType intersection;
2710 box.intersect(query_boxes, intersection);
2712 v4uu hit_mask = (intersection[0][0] <= intersection[0][1]);
2713 for (
uint axis = 1; axis < NAXES; ++axis)
2715 hit_mask &= (intersection[axis][0] <= intersection[axis][1]);
2717 uint hit_bits = signbits(hit_mask);
2721 uint local_index = SYSfirstBitSet(hit_bits);
2722 current_node = node.child[local_index];
2723 current_node = NodeType::getInternalNum(current_node);
2724 if (!(hit_bits & (hit_bits-1)))
2731 hit_bits &= (
uint(-2)<<local_index);
2735 local_index = SYSfirstBitSet(hit_bits);
2736 uint local_node = node.child[local_index];
2737 local_node = NodeType::getInternalNum(local_node);
2739 if (stack_entries >= stack_capacity)
2744 stack_ptr = stack.
data();
2746 stack_ptr[stack_entries++] = local_node;
2749 hit_bits &= (
uint(-2)<<local_index);
2755 }
while (stack_entries);
2757 node_indices.setSizeNoInit(box_ptr - node_indices.data());
2760 template<u
int NAXES,
typename INT_TYPE,
int BATCH_SIZE>
2770 const NodeType *nodes = bvh.getNodes();
2771 const uint nnodes = bvh.getNumNodes();
2779 BoxType query_boxes[BATCH_SIZE];
2780 for (
int i = 0; i < BATCH_SIZE; i++)
2781 query_boxes[i] = query_box[i];
2793 const BoxType &box = node_boxes[current_node];
2795 uint local_hit_bits[BATCH_SIZE];
2798 for (
int i = 0; i < BATCH_SIZE; i++)
2800 BoxType intersection;
2801 box.intersect(query_boxes[i], intersection);
2803 v4uu hit_mask = (intersection[0][0] <= intersection[0][1]);
2804 for (
uint axis = 1; axis < NAXES; ++axis)
2806 hit_mask &= (intersection[axis][0] <= intersection[axis][1]);
2808 local_hit_bits[i] = signbits(hit_mask);
2809 hit_bits |= local_hit_bits[i];
2815 const NodeType &node = nodes[current_node];
2816 uint local_index = SYSfirstBitSet(hit_bits);
2817 current_node = node.child[local_index];
2818 if (!(hit_bits & (hit_bits-1)))
2821 if (NodeType::isInternal(current_node))
2823 current_node = NodeType::getInternalNum(current_node);
2828 for (
int i = 0; i < BATCH_SIZE; i++)
2830 if (local_hit_bits[i])
2831 box_indices[i].append(current_node);
2836 if (NodeType::isInternal(current_node))
2839 current_node = NodeType::getInternalNum(current_node);
2844 uint curbit = 1 << local_index;
2845 for (
int i = 0; i < BATCH_SIZE; i++)
2847 if (local_hit_bits[i] & curbit)
2848 box_indices[i].append(current_node);
2852 hit_bits &= (
uint(-2)<<local_index);
2856 local_index = SYSfirstBitSet(hit_bits);
2857 uint local_node = node.child[local_index];
2858 if (NodeType::isInternal(local_node))
2860 local_node = NodeType::getInternalNum(local_node);
2862 current_node = local_node;
2864 stack.
append(local_node);
2869 uint curbit = 1 << local_index;
2870 for (
int i = 0; i < BATCH_SIZE; i++)
2872 if (local_hit_bits[i] & curbit)
2873 box_indices[i].append(local_node);
2878 hit_bits &= (
uint(-2)<<local_index);
2889 template<
bool PARALLEL,
typename DATA>
2894 const DATA &
data) noexcept
2896 const auto &node =
data.myNodes[nodei];
2898 static constexpr
uint N = 4;
2905 for (INT_TYPE i = 0; i < N-1; ++i) {
2906 if (current_counts[i] < 0) {
2907 current_counts[i] = 0;
2909 else if (current_counts[i+1] < 0) {
2910 current_counts[i] = next_item_id-current_counts[i];
2911 next_items[i] = next_item_id;
2914 current_counts[i] = current_counts[i+1]-current_counts[i];
2915 next_items[i] = current_counts[i+1];
2918 if (current_counts[N-1] < 0) {
2919 current_counts[N-1] = 0;
2922 current_counts[N-1] = next_item_id-current_counts[N-1];
2923 next_items[N-1] = next_item_id;
2928 INT_TYPE next_nodes[
N];
2930 INT_TYPE nchildren =
N;
2931 INT_TYPE nparallel = 0;
2934 for (INT_TYPE s = N; s --> 0; )
2936 const INT_TYPE node_int = node.child[
s];
2937 if (node_int == Node::EMPTY)
2942 next_nodes[
s] = next_node_id;
2943 if (Node::isInternal(node_int))
2947 INT_TYPE child_node_id = Node::getInternalNum(node_int);
2948 nnodes[
s] = next_node_id - child_node_id;
2949 next_node_id = child_node_id;
2955 nparallel += (nnodes[
s] >=
data.myParallelThreshold);
2959 if (nparallel >= 2) {
2961 if (nparallel < nchildren) {
2962 for (INT_TYPE s = 0; s <
N; ++
s) {
2963 if (nnodes[s] >=
data.myParallelThreshold) {
2966 const INT_TYPE node_int = node.child[
s];
2967 if (Node::isInternal(node_int)) {
2968 if (node_int == Node::EMPTY) {
2973 utComputeNodeNItemsHelper<false>(Node::getInternalNum(node_int), INT_TYPE(-1), next_items[
s],
data);
2979 for (INT_TYPE taski = r.begin(); taski < r.end(); ++taski) {
2980 INT_TYPE parallel_count = 0;
2984 for (s = 0; s <
N; ++
s) {
2985 if (nnodes[s] <
data.myParallelThreshold) {
2988 if (parallel_count == taski) {
2993 const INT_TYPE node_int = node.child[
s];
2994 UT_ASSERT_MSG_P(Node::isInternal(node_int),
"item_functor is fairly fast, so we don't support having leaves run in parallel, unlike traverseParallel");
2995 UT_ASSERT_MSG_P(node_int != Node::EMPTY,
"Empty entries should have been excluded above.");
2996 utComputeNodeNItemsHelper<true>(Node::getInternalNum(node_int), next_nodes[
s], next_items[
s],
data);
3002 for (INT_TYPE s = 0; s <
N; ++
s) {
3003 const INT_TYPE node_int = node.child[
s];
3004 if (Node::isInternal(node_int)) {
3005 if (node_int == Node::EMPTY) {
3010 utComputeNodeNItemsHelper<false>(Node::getInternalNum(node_int), INT_TYPE(-1), next_items[
s],
data);
3019 exint nitems) noexcept
3022 if (bvh.getNumNodes() == 0)
3027 struct ItemStartFunctors
3031 ItemStartFunctors() =
default;
3034 : myNodeData(node_data)
3042 data_for_parent = itemi;
3045 void post(
const int nodei,
const int parent_nodei,
int32 *data_for_parent,
const int nchildren,
const int32 *child_data_array)
const
3047 if (data_for_parent)
3048 *data_for_parent = child_data_array[0];
3051 int32 *current_node_data = (
int32*)&myNodeData[nodei];
3053 current_node_data[0] = child_data_array[0];
3055 for (; i < nchildren; ++i)
3056 current_node_data[i] = child_data_array[i];
3058 current_node_data[i] = -1;
3062 constexpr
uint PARALLEL_THRESHOLD = 4096;
3063 ItemStartFunctors functors(node_nitems);
3064 bvh.template traverseParallel<int32>(PARALLEL_THRESHOLD, functors);
3068 struct ItemCountData
3070 uint myParallelThreshold;
3075 data.myParallelThreshold = PARALLEL_THRESHOLD;
3076 data.myNodes = bvh.getNodes();
3077 data.myNodeData = node_nitems;
3078 utComputeNodeNItemsHelper<true>(0, bvh.getNumNodes(), nitems,
data);
3081 template<
typename RADIUS_ARRAY>
3084 float radius = radii[
index];
3089 d2 = (distance < 0) ? -d2 : d2;
3093 template<ex
int NAXES>
3102 static constexpr
bool theAllPointsValid =
true;
3105 template<
typename TS>
3122 template<
typename TS,
typename RADIUS_ARRAY>
3137 template<
bool farthest>
3141 v4uu hit_mask = (!farthest) ? (dist2 <= max_dist_squared) : (dist2 >= max_dist_squared);
3142 return signbits(hit_mask);
3146 template<ex
int NAXES>
3150 using Parent::Parent;
3156 using Parent::Parent;
3162 using Parent::Parent;
3168 using Parent::Parent;
3174 using Parent::Parent;
3181 template<u
int NAXES>
3196 static constexpr
bool theAllPointsValid =
true;
3199 template<
typename TS>
3205 , myDiff2(length2(myDiff))
3222 template<
typename TS,
typename RADIUS_ARRAY>
3240 else if (proj_t >= myDiff2)
3249 vec = (myP0 + (proj_t * myDiff));
3261 template<
bool farthest>
3269 for (
uint axis = 0; axis < NAXES; ++axis)
3270 center[axis] = (boxes[axis][0] + boxes[axis][1]) * 0.5f;
3275 v4uf proj_t =
dot(myVDiff,center - myVP0);
3276 v4uu isp0_mask = (proj_t <= 0.0f);
3277 v4uu isp1_mask = andn(isp0_mask, (proj_t >= myVDiff2));
3282 for (
int i = 0; i < NAXES; ++i)
3284 vec[i] = (myVP0[i] & isp0_mask) | (myVP1[i] & isp1_mask) | andn((isp0_mask | isp1_mask), proj_vec[i]);
3288 v4uf dist2s = length2(vec);
3296 v4uf negative_mask = dists &
v4uu(0x80000000);
3298 dist2 = dists ^ negative_mask;
3304 dist2 = dists * dists;
3307 v4uu hit_mask = (!farthest) ? (dist2 <= max_dist_squared) : (dist2 >= max_dist_squared);
3308 return signbits(hit_mask);
3314 template<u
int NAXES>
3325 static constexpr
bool theAllPointsValid =
true;
3328 template<
typename TS>
3349 template<
typename TS,
typename RADIUS_ARRAY>
3358 diff -=
dot(diff,myDir)*myDir;
3368 template<
bool farthest>
3374 for (
uint axis = 0; axis < NAXES; ++axis)
3375 diffs[axis] = (boxes[axis][0] + boxes[axis][1]) * 0.5f - myVP0[axis];
3378 diffs -=
dot( diffs, myVDir )*myVDir;
3386 v4uf negative_mask = dists &
v4uu(0x80000000);
3388 dist2 = dists ^ negative_mask;
3394 dist2 = dists * dists;
3397 v4uu hit_mask = (!farthest) ? (dist2 <= max_dist_squared) : (dist2 >= max_dist_squared);
3398 return signbits(hit_mask);
3402 template<u
int NAXES,
typename POSITION_ARRAY,
typename RADIUS_ARRAY>
3415 static constexpr
bool theAllPointsValid =
false;
3418 template<
typename TS>
3424 const POSITION_ARRAY &positions,
3425 const RADIUS_ARRAY &radii)
3430 , myCosAngle(SYScos(angle))
3431 , mySinAngle(SYSsin(angle))
3432 , myPositions(positions)
3438 template <
typename T>
3443 const VectorType p0(myP0), dir(myDir), diff = point - p0;
3444 const T pz =
dot(diff,dir);
3445 const VectorType tmp = diff - pz * dir;
3446 const T q =
sqrt(length2(tmp));
3447 const T res = myCosAngle * q - mySinAngle * pz;
3451 template <
typename T>
3455 return getDistToSurface(center) <= rad;
3464 return sphereIntersects(tree_pos, myRadii[tree_point_index]);
3468 template<
typename TS>
3485 template<
bool farthest>
3491 for (
int axis = 0; axis < NAXES; ++axis)
3492 center[axis] = (boxes[axis][0] + boxes[axis][1]) * 0.5f;
3495 const v4uf dist_to_surface = getDistToSurface(center);
3499 const v4uf inside_mask = dist_to_surface | (dist_to_surface*dist_to_surface <= rad2);
3500 const uint inside_bits = signbits(inside_mask);
3505 const v4uu hit_mask = (!farthest) ? (dist2 <= max_dist_squared) : (dist2 >= max_dist_squared);
3506 return inside_bits & signbits(hit_mask);
3514 template<u
int NAXES>
3521 static constexpr
bool theAllPointsValid =
true;
3539 template<
typename RADIUS_ARRAY>
3543 for (
int axis = 0; axis < NAXES; ++axis)
3545 float p = myP[axis];
3546 float q = tree_point[axis];
3548 float axisdist =
SYSmin(absdiff, 1.0
f - absdiff);
3549 d2 += axisdist*axisdist;
3559 template<
bool farthest>
3563 for (
uint axis = 0; axis < NAXES; ++axis)
3564 center[axis] = (boxes[axis][0] + boxes[axis][1]) * 0.5f;
3573 for (
int axis = 0; axis < NAXES; ++axis)
3575 v4uf p = myVP[axis];
3576 v4uf c = center[axis];
3580 v4uf axisdist = centredist - (boxes[axis][1] - boxes[axis][0])*0.5
f;
3581 maxdist =
SYSmax(maxdist, axisdist);
3583 v4uf signmask = maxdist &
v4uu(0x80000000);
3584 dist2 = (maxdist*maxdist)|signmask;
3591 for (
int axis = 0; axis < NAXES; ++axis)
3593 v4uf p = myVP[axis];
3594 v4uf c = center[axis];
3597 v4uf axisdist = centredist + (boxes[axis][1] - boxes[axis][0])*0.5
f;
3599 sums += axisdist*axisdist;
3604 v4uu hit_mask = (!farthest) ? (dist2 <= max_dist_squared) : (dist2 >= max_dist_squared);
3605 return signbits(hit_mask);
3631 static constexpr
bool theAllPointsValid =
true;
3643 float la =
length(sidea);
3644 float lb =
length(sideb);
3645 float lc =
length(sidec);
3646 float p = la + lb + lc;
3647 myIncentre = (la*a + lb*b + lc*
c)/p;
3649 myDist = SYSsqrt((s-la)*(s-lb)*(s-lc)/s);
3661 if (
dot(myDirs[0], b - myIncentre) < 0)
3663 myDirs[0] = -myDirs[0];
3664 myDirs[1] = -myDirs[1];
3665 myDirs[2] = -myDirs[2];
3667 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[0], c - myIncentre), 0));
3668 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[1], c - myIncentre), 0));
3669 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[1], a - myIncentre), 0));
3670 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[2], a - myIncentre), 0));
3671 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[2], b - myIncentre), 0));
3677 myVDist =
v4uf(myDist);
3688 template<
typename RADIUS_ARRAY>
3693 float d0 =
dot(dir, myDirs[0]);
3694 float d1 =
dot(dir, myDirs[1]);
3695 float d2 =
dot(dir, myDirs[2]);
3696 float d =
SYSmax(d0, d1, d2) - myDist;
3699 float dsquared = d*d;
3700 return (d < 0) ? -dsquared : dsquared;
3706 template<
bool farthest>
3712 for (
uint axis = 0; axis < NAXES; ++axis)
3713 dirs[axis] = (boxes[axis][0] + boxes[axis][1])*0.5f - myVIncentre[axis];
3718 const v4uf d0 =
dot(dirs, myVDirs[0]);
3719 const v4uf d1 =
dot(dirs, myVDirs[1]);
3720 const v4uf d2 =
dot(dirs, myVDirs[2]);
3726 v4uf negative_mask = dists &
v4uu(0x80000000);
3728 dist2 = dists ^ negative_mask;
3734 dist2 = dists * dists;
3737 v4uu hit_mask = (!farthest) ? (dist2 <= max_dist_squared) : (dist2 >= max_dist_squared);
3738 return signbits(hit_mask);
3764 static constexpr
bool theAllPointsValid =
true;
3784 float areaa = 0.5f*(
normalize(myDirs[0]));
3785 float areab = 0.5f*(
normalize(myDirs[1]));
3786 float areac = 0.5f*(
normalize(myDirs[2]));
3787 float aread = 0.5f*(
normalize(myDirs[3]));
3788 float area = areaa + areab + areac + aread;
3789 myDist = volume_times_3/
area;
3790 myIncentre = (areaa*a + areab*b + areac*c + aread*d)/area;
3797 if (
dot(myDirs[0], b - myIncentre) < 0)
3799 myDirs[0] = -myDirs[0];
3800 myDirs[1] = -myDirs[1];
3801 myDirs[2] = -myDirs[2];
3802 myDirs[3] = -myDirs[3];
3804 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[0], c - myIncentre), 0));
3805 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[0], d - myIncentre), 0));
3806 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[1], c - myIncentre), 0));
3807 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[1], d - myIncentre), 0));
3808 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[1], a - myIncentre), 0));
3809 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[2], d - myIncentre), 0));
3810 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[2], a - myIncentre), 0));
3811 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[2], b - myIncentre), 0));
3812 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[3], a - myIncentre), 0));
3813 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[3], b - myIncentre), 0));
3814 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[3], c - myIncentre), 0));
3821 myVDist =
v4uf(myDist);
3832 template<
typename RADIUS_ARRAY>
3836 auto dir{ tree_point - myIncentre };
3837 float d0 =
dot(dir, myDirs[0]);
3838 float d1 =
dot(dir, myDirs[1]);
3839 float d2 =
dot(dir, myDirs[2]);
3840 float d3 =
dot(dir, myDirs[3]);
3841 float d =
SYSmax(d0, d1, d2, d3) - myDist;
3844 float dsquared = d*d;
3845 return (d < 0) ? -dsquared : dsquared;
3851 template<
bool farthest>
3857 for (
uint axis = 0; axis < NAXES; ++axis)
3858 dirs[axis] = (boxes[axis][0] + boxes[axis][1])*0.5f - myVIncentre[axis];
3863 const v4uf d0 =
dot(dirs, myVDirs[0]);
3864 const v4uf d1 =
dot(dirs, myVDirs[1]);
3865 const v4uf d2 =
dot(dirs, myVDirs[2]);
3866 const v4uf d3 =
dot(dirs, myVDirs[3]);
3872 v4uf negative_mask = dists &
v4uu(0x80000000);
3874 dist2 = dists ^ negative_mask;
3880 dist2 = dists * dists;
3883 v4uu hit_mask = (!farthest) ? (dist2 <= max_dist_squared) : (dist2 >= max_dist_squared);
3884 return signbits(hit_mask);
3888 template<
bool farthest,
typename INT_TYPE0>
3894 : myIndicesMapping(indices_mapping)
3896 UT_ASSERT_MSG_P(indices_mapping,
"Use std::less or std::greater if not mapping.");
3901 if (a.myDistSquared != b.myDistSquared) {
3903 ? (a.myDistSquared < b.myDistSquared)
3904 : (a.myDistSquared > b.myDistSquared);
3909 ? (myIndicesMapping[a.myNode] < myIndicesMapping[b.myNode])
3910 : (myIndicesMapping[a.myNode] > myIndicesMapping[b.myNode]);
3913 const INT_TYPE0* myIndicesMapping;
3916 template<
bool farthest,
bool reordered,
bool use_max_po
ints,u
int NAXES,
typename QUERY_POINT,
typename INT_TYPE0,
typename POSITION_ARRAY,
typename RADIUS_ARRAY>
3919 const INT_TYPE0* indices_mapping,
3920 const POSITION_ARRAY& positions,
3921 QUERY_POINT& query_point,
3923 const RADIUS_ARRAY& radii,
3925 float& max_dist_squared) noexcept
3929 if (!QUERY_POINT::theAllPointsValid && !query_point.isValid(
index))
3935 float d2 = query_point.queryDistance2(positions[
index], radii, index);
3936 if ((!farthest) ? (d2 > max_dist_squared) : (d2 < max_dist_squared))
3939 if (!use_max_points || output_queue.size() < max_points) {
3940 output_queue.append(StackEntry(d2, index));
3941 if (use_max_points && output_queue.size() == max_points) {
3942 if (max_points > 1) {
3945 if (reordered || indices_mapping) {
3947 std::make_heap(output_queue.data(), output_queue.data()+output_queue.size(), comparator);
3950 std::make_heap(output_queue.data(), output_queue.data()+output_queue.size());
3955 if (reordered || indices_mapping) {
3957 std::make_heap(output_queue.data(), output_queue.data()+output_queue.size(), comparator);
3960 std::make_heap(output_queue.data(), output_queue.data()+output_queue.size(), std::greater<StackEntry>());
3963 max_dist_squared = output_queue[0].myDistSquared;
3966 max_dist_squared = d2;
3970 else if (max_points == 1) {
3971 bool select_new =
true;
3972 if (d2 == max_dist_squared)
3975 INT_TYPE0 other_ptoff;
3976 if (reordered || indices_mapping)
3977 other_ptoff = indices_mapping[output_queue[0].myNode];
3979 other_ptoff = INT_TYPE0(output_queue[0].myNode);
3980 INT_TYPE0 ptoff = indices_mapping[
index];
3981 select_new = (!farthest) ? (ptoff < other_ptoff) : (ptoff > other_ptoff);
3985 output_queue[0].myDistSquared = d2;
3986 output_queue[0].myNode =
index;
3987 max_dist_squared = d2;
3994 auto start = output_queue.data();
3995 auto end = output_queue.data()+output_queue.size();
3997 if (reordered || indices_mapping) {
3999 if (d2 == max_dist_squared && indices_mapping[start->myNode] < indices_mapping[index])
4003 std::pop_heap(start, end, comparator);
4004 end[-1].myDistSquared = d2;
4005 end[-1].myNode =
index;
4006 std::push_heap(start, end, comparator);
4010 if (d2 == max_dist_squared && start->myNode < index)
4013 std::pop_heap(start, end);
4014 end[-1].myDistSquared = d2;
4015 end[-1].myNode =
index;
4016 std::push_heap(start, end);
4020 if (reordered || indices_mapping) {
4022 if (d2 == max_dist_squared && indices_mapping[start->myNode] > indices_mapping[index])
4026 std::pop_heap(start, end, comparator);
4027 end[-1].myDistSquared = d2;
4028 end[-1].myNode =
index;
4029 std::push_heap(start, end, comparator);
4033 if (d2 == max_dist_squared && start->myNode > index)
4036 std::pop_heap(start, end, std::greater<StackEntry>());
4037 end[-1].myDistSquared = d2;
4038 end[-1].myNode =
index;
4039 std::push_heap(start, end, std::greater<StackEntry>());
4043 max_dist_squared = start[0].myDistSquared;
4047 template<
bool farthest,
bool reordered,
bool use_max_po
ints,u
int NAXES,
typename QUERY_POINT,
typename INT_TYPE0,
typename POSITION_ARRAY,
typename RADIUS_ARRAY>
4051 const v4uu* node_nitems,
4052 const INT_TYPE0* indices_mapping,
4053 const POSITION_ARRAY& positions,
4054 QUERY_POINT& query_point,
4057 const RADIUS_ARRAY& radii,
4059 float max_dist_squared) noexcept
4063 const NodeType* nodes = bvh.getNodes();
4064 const uint nnodes = bvh.getNumNodes();
4066 if (nnodes == 0 || (use_max_points && max_points<=0))
4086 StackEntry entry = stack.
last();
4089 uint next_node = entry.myNode;
4092 if ((!farthest) ? (entry.myDistSquared > max_dist_squared) : (entry.myDistSquared < max_dist_squared))
4097 if (reordered || NodeType::isInternal(next_node))
4102 if (next_node == (reordered ? NodeType::getInternalNum(NodeType::EMPTY) : NodeType::EMPTY))
4105 next_node = NodeType::getInternalNum(next_node);
4106 const BoxType &box = node_boxes[next_node];
4108 uint hit_bits = query_point_wrapper.template boxHitAndDist2<farthest>(box, max_dist_squared, next_node, dist2);
4111 const NodeType &node = nodes[next_node];
4113 StackEntry *stack_last = stack.
data()+stack_size-1;
4117 union {
v4sf d2v;
float d2s[4]; };
4119 uint cur_node = next_node;
4120 next_node =
uint(-1);
4124 uint local_index = SYSfirstBitSet(hit_bits);
4126 if (!NodeType::isInternal(index))
4128 const uint count = ((
const uint32*)(node_nitems+cur_node))[local_index];
4131 utHandleSingleClosePoint<farthest,reordered,use_max_points,NAXES>(
4132 index, indices_mapping, positions, query_point_wrapper,
4133 output_queue, radii, max_points, max_dist_squared);
4136 else if (next_node ==
uint(-1))
4138 next_node = NodeType::getInternalNum(index);
4139 d20 = d2s[local_index];
4144 uint child1 = NodeType::getInternalNum(index);
4145 float d21 = d2s[local_index];
4146 if ((!farthest) ? (d20 > d21) : (d20 < d21))
4148 uint childtmp = next_node;
4156 if (!stack_size || ((!farthest) ? (stack_last->myDistSquared >= d20) : (stack_last->myDistSquared <= d20)))
4160 if (!stack_size || ((!farthest) ? (stack_last->myDistSquared >= d21) : (stack_last->myDistSquared <= d21)))
4162 stack.
append(StackEntry(d21, child1));
4170 for (i = stack_size-2; i >= limiti; --i)
4172 if ((!farthest) ? (stack[i].myDistSquared >= d21) : (stack[i].myDistSquared <= d21))
4174 stack.
insert(StackEntry(d21, child1), i+1);
4180 stack.
insert(StackEntry(d21, child1), i+1);
4186 StackEntry entry = stack.
last();
4187 stack_last->myNode = child1;
4188 stack_last->myDistSquared = d21;
4189 stack.
append(StackEntry(d20, next_node));
4190 next_node = entry.myNode;
4191 d20 = entry.myDistSquared;
4193 stack_last = stack.
data()+stack_size;
4197 hit_bits &= (
uint(-2)<<local_index);
4198 }
while (hit_bits != 0);
4200 if (next_node ==
uint(-1))
4207 if (!(hit_bits & (hit_bits-1)))
4210 uint local_index = SYSfirstBitSet(hit_bits);
4212 next_node = node.child[local_index];
4214 float local_d2 = d2s[local_index];
4215 uint child_index = node.child[local_index];
4216 if (!stack_size || ((!farthest) ? (stack_last->myDistSquared >= local_d2) : (stack_last->myDistSquared <= local_d2)))
4217 next_node = child_index;
4220 next_node = stack_last->myNode;
4221 stack_last->myNode = child_index;
4222 stack_last->myDistSquared = local_d2;
4236 while (stack_size != 0 && ((!farthest) ? (stack_last->myDistSquared > max_dist_squared) : (stack_last->myDistSquared < max_dist_squared)))
4242 static constexpr
uint two_bit_indices[16][2] = {
4243 {4, 4}, {4, 4}, {4, 4}, {0, 1},
4244 {4, 4}, {0, 2}, {1, 2}, {0, 1},
4245 {4, 4}, {0, 3}, {1, 3}, {0, 1},
4246 {2, 3}, {0, 2}, {1, 2}, {0, 1}
4248 static constexpr
uint third_bit_index[16] = {
4255 union {
v4sf d2v;
float d2s[4]; };
4257 uint local_index0 = two_bit_indices[hit_bits][0];
4258 uint local_index1 = two_bit_indices[hit_bits][1];
4259 if (third_bit_index[hit_bits] == 4)
4262 float d20 = d2s[local_index0];
4263 float d21 = d2s[local_index1];
4264 uint child0 = node.child[local_index0];
4265 uint child1 = node.child[local_index1];
4266 if ((!farthest) ? (d20 > d21) : (d20 < d21))
4268 uint childtmp = child0;
4276 if (!stack_size || ((!farthest) ? (stack_last->myDistSquared >= d20) : (stack_last->myDistSquared <= d20)))
4282 if (!stack_size || ((!farthest) ? (stack_last->myDistSquared >= d21) : (stack_last->myDistSquared <= d21)))
4284 stack.
append(StackEntry(d21, child1));
4292 for (i = stack_size-2; i >= limiti; --i)
4294 if ((!farthest) ? (stack[i].myDistSquared >= d21) : (stack[i].myDistSquared <= d21))
4296 stack.
insert(StackEntry(d21, child1), i+1);
4302 stack.
insert(StackEntry(d21, child1), i+1);
4308 next_node = stack_last->myNode;
4309 stack_last->myNode = child1;
4310 stack_last->myDistSquared = d21;
4311 stack.
append(StackEntry(d20, child0));
4317 uint nhit = (hit_bits == 0xF) ? 4 : 3;
4318 uint local_index2 = third_bit_index[hit_bits];
4319 uint children[4] = {
4320 node.child[local_index0],
4321 node.child[local_index1],
4322 node.child[local_index2],
4325 d2s[0] = d2s[local_index0];
4326 d2s[1] = d2s[local_index1];
4327 d2s[2] = d2s[local_index2];
4331 if (!stack_size || ((!farthest) ? (stack_last->myDistSquared >= d2s[0]) : (stack_last->myDistSquared <= d2s[0])))
4334 next_node = children[0];
4338 new_d2 = stack_last->myDistSquared;
4339 next_node = stack_last->myNode;
4340 stack_last->myDistSquared = d2s[0];
4341 stack_last->myNode = children[0];
4343 for (
uint hit = 1; hit < nhit; ++hit)
4345 float d2 = d2s[hit];
4346 uint child = children[hit];
4347 if ((!farthest) ? (d2 < new_d2) : (d2 > new_d2))
4349 float tmpd2 = new_d2;
4350 uint tmpchild = next_node;
4358 if (!stack_size || ((!farthest) ? (stack_last->myDistSquared >= d2) : (stack_last->myDistSquared <= d2)))
4360 stack.
append(StackEntry(d2, child));
4368 for (i = stack_size-2; i >= limiti; --i)
4370 if ((!farthest) ? (stack[i].myDistSquared <= d2) : (stack[i].myDistSquared >= d2))
4372 stack.
insert(StackEntry(d2, child), i+1);
4378 stack.
insert(StackEntry(d2, child), i+1);
4381 stack_last = stack.
data()+stack_size;
4389 utHandleSingleClosePoint<farthest,reordered,use_max_points,NAXES>(
4390 index, indices_mapping, positions, query_point_wrapper,
4391 output_queue, radii, max_points, max_dist_squared);
4399 #undef BVH_TRY_ALL_AXES
SYS_FORCE_INLINE uint boxHitAndDist2(const BoxType &boxes, const FloatType max_dist_squared, const uint internal_node_num, v4uf &dist2) const
void utCreateBVHInterleavedBoxesHelper(INT_TYPE nodei, INT_TYPE next_node_id, const DATA &data, UT::Box< T, NAXES > *data_for_parent) noexcept
SYS_FORCE_INLINE T utBoxCenter(const UT::Box< T, NAXES > &box, uint axis) noexcept
const UT_FixedVector< FloatType, NAXES > myDiff
void UTparallelFor(const Range &range, const Body &body, const int subscribe_ratio=2, const int min_grain_size=1, const bool force_use_task_scope=true)
#define SYS_STATIC_ASSERT(expr)
SYS_FORCE_INLINE bool isValid(uint tree_point_index) const
GLsizei GLenum const void * indices
SYS_FORCE_INLINE exint length() const
const UT_FixedVector< v4uf, NAXES > myVDir
constexpr bool allRadiiZero(const T &array) noexcept
void findClosestPoints(const UT::BVH< 4 > &bvh, const UT::Box< v4uf, NAXES > *node_boxes, const v4uu *node_nitems, const INT_TYPE0 *indices_mapping, const POSITION_ARRAY &positions, QUERY_POINT &query_point, BVHOrderedStack &stack, BVHOrderedStack &output_queue, const RADIUS_ARRAY &radii=ZeroRadiiWrapper(), exint max_points=std::numeric_limits< exint >::max(), float max_dist_squared=std::numeric_limits< float >::max()) noexcept
void bumpCapacity(exint min_capacity)
void traverseParallel(INT_TYPE parallel_threshold, FUNCTORS &functors, LOCAL_DATA *data_for_parent=nullptr) const noexcept
SIM_API const UT_StringHolder angle
UT_Vector2T< float > UT_Vector2
const UT_FixedVector< FloatType, NAXES > myP0
T negative(const T &val)
Return the unary negation of the given value.
SYS_FORCE_INLINE void removeLast()
auto printf(const S &fmt, const T &...args) -> int
bool SYSisFinite(fpreal64 f)
int getPureInternalDepth() const noexcept
Returns the minimum depth of the first non-internal node.
const UT_FixedVector< FloatType, NAXES > myQueryPoint
void setSizeNoInit(exint newsize)
static constexpr uint scale
SYS_FORCE_INLINE T utBoxCenter(const UT_Vector4T< T > &position, uint axis) noexcept
vfloat4 sqrt(const vfloat4 &a)
constexpr UT_FixedVector< SYS_FixedArrayElement_t< TS >, SYS_FixedArraySize_v< TS > > UTmakeFixedVector(const TS &as) noexcept
SYS_FORCE_INLINE float queryDistance2(const TS &tree_point, const RADIUS_ARRAY &radii, uint index) const
This must be the exact distance squared.
fpreal64 distance2(const UT_VectorD &v1, const UT_VectorD &v2)
Distance squared (L2) aka quadrance.
INT_TYPE utExcludeNaNInfBoxIndices(const BOX_TYPE *boxes, SRC_INT_TYPE *indices, INT_TYPE &nboxes) noexcept
SYS_FORCE_INLINE void utHandleRadiiLinearly(float &d2, const RADIUS_ARRAY &radii, uint index)
SYS_FORCE_INLINE const char * buffer() const
GLboolean GLboolean GLboolean GLboolean a
GLuint GLsizei GLsizei * length
void setCapacity(exint new_capacity)
#define UT_ASSERT_MSG_P(ZZ,...)
SYS_FORCE_INLINE BVHQuerySegment(const TS &p0, const TS &p1)
int getMaxDepth() const noexcept
Returns the maximum depth of any leaf.
SYS_FORCE_INLINE bool isValid(uint tree_point_index) const
GLdouble GLdouble GLdouble q
unsigned long long uint64
SYS_FORCE_INLINE uint boxHitAndDist2(const BoxType &boxes, const FloatType max_dist_squared, const uint internal_node_num, v4uf &dist2) const
SYS_FORCE_INLINE FloatType queryDistance2(const TS &tree_point, const RADIUS_ARRAY &radii, uint index) const
This must be the exact distance squared.
void setSize(exint newsize)
UT_FixedVector< v4uf, NAXES > myVIncentre
SYS_FORCE_INLINE float queryDistance2(const UT_FixedVector< float, NAXES > &tree_point, const RADIUS_ARRAY &radii, uint index) const
This must be the exact distance squared.
SYS_FORCE_INLINE FloatType queryDistance2(const TS &tree_point, const RADIUS_ARRAY &radii, uint index) const
This must be the exact distance squared.
void getIntersectingBoxesFromStack(const UT::BVH< 4 > &bvh, const UT::Box< v4uf, NAXES > *node_boxes, const UT::Box< float, NAXES > &query_box, UT_Array< INT_TYPE > &box_indices, BVHUnorderedStack &stack) noexcept
SYS_FORCE_INLINE T minDistance2(const UT_FixedVector< T, NAXES > &p) const noexcept
SYS_FORCE_INLINE float queryDistance2(const UT_FixedVector< float, NAXES > &tree_point, const RADIUS_ARRAY &radii, uint index) const
This must be the exact distance squared.
#define UT_ASSERT_MSG(ZZ,...)
SYS_FORCE_INLINE void utHandleSingleClosePoint(const uint index, const INT_TYPE0 *indices_mapping, const POSITION_ARRAY &positions, QUERY_POINT &query_point, BVHOrderedStack &output_queue, const RADIUS_ARRAY &radii, exint max_points, float &max_dist_squared) noexcept
SYS_FORCE_INLINE uint boxHitAndDist2(const BoxType &boxes, const float max_dist_squared, const uint internal_node_num, v4uf &dist2) const
GA_API const UT_StringHolder scale
SYS_FORCE_INLINE BVHQueryInfLine(const TS &p0, const TS &dir)
void computeNodeNItems(const UT::BVH< 4 > &bvh, v4uu *node_nitems, exint nitems) noexcept
Computes the number of items per node entry and fills in node_nitems.
const UT_FixedVector< v4uf, NAXES > myVP0
const UT_FixedVector< v4uf, NAXES > myVP0
const RADIUS_ARRAY & myRadii
void getIntersectingNodes(const UT::BVH< 4 > &bvh, const UT::Box< v4uf, NAXES > *node_boxes, const UT::Box< float, NAXES > &query_box, UT_Array< INT_TYPE > &box_indices, BVHUnorderedStack &stack) noexcept
const UT_FixedVector< v4uf, NAXES > myVP0
const UT_FixedVector< float, NAXES > myP
SYS_FORCE_INLINE T maxDistance2(const UT_FixedVector< T, NAXES > &p) const noexcept
SYS_FORCE_INLINE T getDistToSurface(const UT_FixedVector< T, NAXES > &point) const
const UT_FixedVector< FloatType, NAXES > myP0
fpreal64 dot(const CE_VectorT< T > &a, const CE_VectorT< T > &b)
static int getNumProcessors()
SYS_FORCE_INLINE ut_BVHOrderedStackCompare(const INT_TYPE0 *indices_mapping) noexcept
UT_Vector3T< T > SYSclamp(const UT_Vector3T< T > &v, const UT_Vector3T< T > &min, const UT_Vector3T< T > &max)
T diameter2() const noexcept
const UT_FixedVector< v4uf, NAXES > myVP
SYS_FORCE_INLINE void createBVHNodeBoxes(const UT::BVH< BVH_N > &bvh, const ITEM_BOX *item_boxes, NODE_BOX *node_boxes) noexcept
SYS_FORCE_INLINE void createBVHInterleavedBoxes(const UT::BVH< BVH_N > &bvh, const ITEM_BOX *item_boxes, NODE_BOX *node_boxes, float expand_factor=0.0f) noexcept
const UT_FixedVector< float, NAXES > myP0
STATIC_INLINE uint64_t H(uint64_t x, uint64_t y, uint64_t mul, int r)
UT_FixedVector< v4uf, NAXES > myVIncentre
SYS_FORCE_INLINE uint boxHitAndDist2(const BoxType &boxes, const float max_dist_squared, const uint internal_node_num, v4uf &dist2) const
UT_FixedVector< float, NAXES > myIncentre
GLboolean GLboolean GLboolean b
SYS_FORCE_INLINE bool isValid(uint tree_point_index) const
SYS_FORCE_INLINE BVHQueryTetrahedron(const UT_FixedVector< float, NAXES > &a, const UT_FixedVector< float, NAXES > &b, const UT_FixedVector< float, NAXES > &c, const UT_FixedVector< float, NAXES > &d)
GLint GLint GLsizei GLsizei GLsizei depth
void init(const BOX_TYPE *boxes, const INT_TYPE nboxes, SRC_INT_TYPE *indices=nullptr, bool reorder_indices=false, INT_TYPE max_items_per_leaf=1) noexcept
void traverse(FUNCTORS &functors, LOCAL_DATA *data_for_parent=nullptr) const noexcept
exint entries() const
Alias of size(). size() is preferred.
static void createTrivialIndices(SRC_INT_TYPE *indices, const INT_TYPE n) noexcept
SYS_FORCE_INLINE bool isValid(uint tree_point_index) const
const UT_FixedVector< v4uf, NAXES > myVQueryPoint
const UT_FixedVector< v4uf, NAXES > myVP1
SYS_FORCE_INLINE uint boxHitAndDist2(const BoxType &boxes, const FloatType max_dist_squared, const uint internal_node_num, v4uf &dist2) const
SYS_FORCE_INLINE bool isValid(uint tree_point_index) const
IMATH_NAMESPACE::V2f IMATH_NAMESPACE::Box2i std::string this attribute is obsolete as of OpenEXR v3 float
SYS_FORCE_INLINE bool sphereIntersects(const UT_FixedVector< T, NAXES > ¢er, const T rad) const
void getIntersectingBoxesBatch(const UT::BVH< 4 > &bvh, const UT::Box< v4uf, NAXES > *node_boxes, const UT::Box< float, NAXES > *query_box, UT_Array< INT_TYPE > *box_indices, BVHUnorderedStack &stack) noexcept
SYS_FORCE_INLINE uint boxHitAndDist2(const BoxType &boxes, const float max_dist_squared, const uint internal_node_num, v4uf &dist2) const
constexpr UT_Vector3T< SYS_FixedArrayElement_t< TS > > UTmakeVector3T(const TS &as) noexcept
ImageBuf OIIO_API absdiff(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
void utComputeNodeNItemsHelper(uint nodei, uint next_node_id, uint next_item_id, const DATA &data) noexcept
SIM_API const UT_StringHolder position
SYS_FORCE_INLINE void append(char character)
const UT_FixedVector< v4uf, NAXES > myVDiff
ImageBuf OIIO_API max(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
SYS_FORCE_INLINE BVHQueryPointWrapper(const TS &query_point)
GA_API const UT_StringHolder N
SYS_FORCE_INLINE float queryDistance2(const UT_FixedVector< float, NAXES > &tree_point, const RADIUS_ARRAY &radii, uint index) const
This must be the exact distance squared.
SYS_FORCE_INLINE BVHQueryCone(const TS &p0, const TS &dir, const float angle, const POSITION_ARRAY &positions, const RADIUS_ARRAY &radii)
const UT_FixedVector< float, NAXES > myDir
const UT_FixedVector< FloatType, NAXES > myDir
IMATH_INTERNAL_NAMESPACE_HEADER_ENTER IMATH_HOSTDEVICE constexpr T abs(T a) IMATH_NOEXCEPT
SYS_FORCE_INLINE bool operator()(const BVHOrderedStackEntry &a, const BVHOrderedStackEntry &b) const noexcept
SYS_FORCE_INLINE bool isValid(uint tree_point_index) const
const POSITION_ARRAY & myPositions
void getIntersectingBoxes(const UT::BVH< 4 > &bvh, const UT::Box< v4uf, NAXES > *node_boxes, const UT::Box< float, NAXES > &query_box, UT_Array< INT_TYPE > &box_indices, BVHUnorderedStack &stack) noexcept
SYS_FORCE_INLINE bool utBoxExclude(const UT::Box< T, NAXES > &box) noexcept
void OIIO_UTIL_API split(string_view str, std::vector< string_view > &result, string_view sep=string_view(), int maxsplit=-1)
SYS_FORCE_INLINE BVHQueryTriangle(const UT_FixedVector< float, NAXES > &a, const UT_FixedVector< float, NAXES > &b, const UT_FixedVector< float, NAXES > &c)
SIM_API const UT_StringHolder distance
void clear()
Resets list to an empty list.
UT_FixedVector< float, NAXES > myIncentre
SYS_FORCE_INLINE void combine(const Box< T, NAXES > &src) noexcept
SYS_FORCE_INLINE FloatType queryDistance2(const TS &tree_point, const RADIUS_ARRAY &radii, uint index) const
This must be the exact distance squared.
SYS_FORCE_INLINE uint boxHitAndDist2(const BoxType &boxes, const float max_dist_squared, const uint internal_node_num, v4uf &dist2) const
SYS_FORCE_INLINE BVHQueryPtUnitWrap(const UT_FixedVector< float, NAXES > &p)
SYS_FORCE_INLINE bool isValid(uint tree_point_index) const
SIM_DerVector3 cross(const SIM_DerVector3 &lhs, const SIM_DerVector3 &rhs)
const UT_FixedVector< FloatType, NAXES > myP1
exint insert(exint index)
void traverseVector(FUNCTORS &functors, LOCAL_DATA *data_for_parent=nullptr) const noexcept
GA_API const UT_StringHolder area
bool isEmpty() const
Returns true iff there are no occupied elements in the array.