16 #ifndef __UT_BVHImpl_h__
17 #define __UT_BVHImpl_h__
29 #define BVH_TRY_ALL_AXES 0
33 template<
typename T,u
int NAXES>
37 for (
uint axis = 1; axis < NAXES; ++axis)
42 return has_nan_or_inf;
46 const int32 *pboxints =
reinterpret_cast<const int32*
>(&box);
48 bool has_nan_or_inf = ((pboxints[0] & 0x7F800000) == 0x7F800000);
49 has_nan_or_inf |= ((pboxints[1] & 0x7F800000) == 0x7F800000);
50 for (
uint axis = 1; axis < NAXES; ++axis)
52 has_nan_or_inf |= ((pboxints[2*axis] & 0x7F800000) == 0x7F800000);
53 has_nan_or_inf |= ((pboxints[2*axis + 1] & 0x7F800000) == 0x7F800000);
55 return has_nan_or_inf;
57 template<
typename T,u
int NAXES>
59 const T*
v = box.vals[axis];
66 template<
typename T,u
int NAXES>
69 for (
uint axis = 1; axis < NAXES; ++axis)
71 return has_nan_or_inf;
77 bool has_nan_or_inf = ((ppositionints[0] & 0x7F800000) == 0x7F800000);
78 for (
uint axis = 1; axis < NAXES; ++axis)
79 has_nan_or_inf |= ((ppositionints[axis] & 0x7F800000) == 0x7F800000);
80 return has_nan_or_inf;
82 template<
typename T,u
int NAXES>
86 template<
typename T,u
int NAXES>
106 bool has_nan_or_inf = ((ppositionints[0] & 0x7F800000) == 0x7F800000);
107 has_nan_or_inf |= ((ppositionints[1] & 0x7F800000) == 0x7F800000);
108 return has_nan_or_inf;
114 bool has_nan_or_inf = ((ppositionints[0] & 0x7F800000) == 0x7F800000);
115 has_nan_or_inf |= ((ppositionints[1] & 0x7F800000) == 0x7F800000);
116 has_nan_or_inf |= ((ppositionints[2] & 0x7F800000) == 0x7F800000);
117 return has_nan_or_inf;
123 bool has_nan_or_inf = ((ppositionints[0] & 0x7F800000) == 0x7F800000);
124 has_nan_or_inf |= ((ppositionints[1] & 0x7F800000) == 0x7F800000);
125 has_nan_or_inf |= ((ppositionints[2] & 0x7F800000) == 0x7F800000);
126 has_nan_or_inf |= ((ppositionints[3] & 0x7F800000) == 0x7F800000);
127 return has_nan_or_inf;
154 template<
typename BOX_TYPE,
typename SRC_INT_TYPE,
typename INT_TYPE>
157 constexpr INT_TYPE PARALLEL_THRESHOLD = 65536;
159 if (nboxes >= PARALLEL_THRESHOLD)
162 ntasks = (nprocessors > 1) ?
SYSmin(4*nprocessors, nboxes/(PARALLEL_THRESHOLD/2)) : 1;
168 const SRC_INT_TYPE* indices_end =
indices + nboxes;
171 SRC_INT_TYPE* psrc_index =
indices;
172 for (; psrc_index != indices_end; ++psrc_index)
178 if (psrc_index == indices_end)
182 SRC_INT_TYPE* nan_start = psrc_index;
183 for (++psrc_index; psrc_index != indices_end; ++psrc_index)
188 *nan_start = *psrc_index;
193 return indices_end - nan_start;
206 for (INT_TYPE taski = r.begin(), task_end = r.end(); taski < task_end; ++taski)
208 SRC_INT_TYPE* indices_start =
indices + (taski*
exint(nboxes))/ntasks;
209 const SRC_INT_TYPE* indices_end =
indices + ((taski+1)*
exint(nboxes))/ntasks;
210 SRC_INT_TYPE* psrc_index = indices_start;
211 for (; psrc_index != indices_end; ++psrc_index)
217 if (psrc_index == indices_end)
219 nexcluded[taski] = 0;
224 SRC_INT_TYPE* nan_start = psrc_index;
225 for (++psrc_index; psrc_index != indices_end; ++psrc_index)
230 *nan_start = *psrc_index;
234 nexcluded[taski] = indices_end - nan_start;
239 INT_TYPE total_excluded = nexcluded[0];
240 for (INT_TYPE taski = 1; taski < ntasks; ++taski)
242 total_excluded += nexcluded[taski];
245 if (total_excluded == 0)
251 while (nexcluded[taski] == 0)
256 SRC_INT_TYPE* dest_indices =
indices + ((taski+1)*
exint(nboxes))/ntasks - nexcluded[taski];
258 SRC_INT_TYPE* dest_end =
indices + nboxes - total_excluded;
259 for (++taski; taski < ntasks && dest_indices < dest_end; ++taski)
261 const SRC_INT_TYPE* psrc_index =
indices + (taski*
exint(nboxes))/ntasks;
262 const SRC_INT_TYPE* psrc_end =
indices + ((taski+1)*
exint(nboxes))/ntasks - nexcluded[taski];
263 INT_TYPE
count = psrc_end - psrc_index;
265 memmove(dest_indices, psrc_index,
sizeof(SRC_INT_TYPE)*count);
266 dest_indices += count;
268 nboxes -= total_excluded;
269 return total_excluded;
273 template<BVH_Heuristic H,
typename T,u
int NAXES,
typename BOX_TYPE,
typename SRC_INT_TYPE>
276 computeFullBoundingBox(axes_minmax, boxes, nboxes,
indices);
278 init<H>(axes_minmax, boxes, nboxes,
indices, reorder_indices, max_items_per_leaf);
282 template<BVH_Heuristic H,
typename T,u
int NAXES,
typename BOX_TYPE,
typename SRC_INT_TYPE>
296 createTrivialIndices(
indices, nboxes);
302 if (nexcluded != 0) {
307 computeFullBoundingBox(axes_minmax, boxes, nboxes,
indices);
315 initNodeReorder<H>(nodes, nodes[0], axes_minmax, boxes,
indices, nboxes, 0, max_items_per_leaf);
317 initNode<H>(nodes, nodes[0], axes_minmax, boxes,
indices, nboxes);
320 if (8*nodes.capacity() > 9*nodes.size()) {
321 nodes.setCapacity(nodes.size());
324 myRoot.reset(nodes.array());
326 nodes.unsafeClearData();
334 int newdepth =
depth;
336 for (
int s = 0;
s <
N;
s++)
338 const INT_TYPE node_int = curnode->child[
s];
339 if (Node::isInternal(node_int))
341 if (node_int == Node::EMPTY)
346 newdepth =
SYSmax(newdepth,
347 getMaxDepthHelper(Node::getInternalNum(node_int),
367 return getMaxDepthHelper(0, 1);
377 for (
int s = 0;
s <
N;
s++)
379 const INT_TYPE node_int = curnode->child[
s];
380 if (Node::isInternal(node_int))
382 if (node_int == Node::EMPTY)
388 int d = getPureInternalDepthHelper(Node::getInternalNum(node_int),
393 childdepth =
SYSmin(childdepth, d);
409 UT_ASSERT(!
"Empty node other than Node::EMPTY encountered.");
423 return getPureInternalDepthHelper(0, 0);
427 template<
typename LOCAL_DATA,
typename FUNCTORS>
430 LOCAL_DATA* data_for_parent)
const noexcept
436 traverseHelper(0,
INT_TYPE(-1), functors, data_for_parent);
439 template<
typename LOCAL_DATA,
typename FUNCTORS>
442 INT_TYPE parent_nodei,
444 LOCAL_DATA* data_for_parent)
const noexcept
447 bool descend = functors.pre(nodei, data_for_parent);
450 LOCAL_DATA local_data[
N];
452 for (s = 0; s <
N; ++
s) {
453 const INT_TYPE node_int = node.child[
s];
454 if (Node::isInternal(node_int)) {
455 if (node_int == Node::EMPTY) {
459 traverseHelper(Node::getInternalNum(node_int), nodei, functors, &local_data[s]);
462 functors.item(node_int, nodei, local_data[s]);
466 functors.post(nodei, parent_nodei, data_for_parent, s, local_data);
470 template<
typename LOCAL_DATA,
typename FUNCTORS>
474 LOCAL_DATA* data_for_parent)
const noexcept
480 traverseParallelHelper(0,
INT_TYPE(-1), parallel_threshold,
myNumNodes, functors, data_for_parent);
483 template<
typename LOCAL_DATA,
typename FUNCTORS>
486 INT_TYPE parent_nodei,
487 INT_TYPE parallel_threshold,
488 INT_TYPE next_node_id,
490 LOCAL_DATA* data_for_parent)
const noexcept
493 bool descend = functors.pre(nodei, data_for_parent);
499 INT_TYPE next_nodes[
N];
501 INT_TYPE nchildren =
N;
502 INT_TYPE nparallel = 0;
503 for (INT_TYPE s = N; s --> 0; )
505 const INT_TYPE node_int = node.child[
s];
506 if (node_int == Node::EMPTY)
511 next_nodes[
s] = next_node_id;
512 if (Node::isInternal(node_int))
516 INT_TYPE child_node_id = Node::getInternalNum(node_int);
517 nnodes[
s] = next_node_id - child_node_id;
518 next_node_id = child_node_id;
524 nparallel += (nnodes[
s] >= parallel_threshold);
527 LOCAL_DATA local_data[
N];
528 if (nparallel >= 2) {
530 if (nparallel < nchildren) {
531 for (INT_TYPE s = 0; s <
N; ++
s) {
532 if (nnodes[s] >= parallel_threshold) {
535 const INT_TYPE node_int = node.child[
s];
536 if (Node::isInternal(node_int)) {
537 if (node_int == Node::EMPTY) {
541 traverseHelper(Node::getInternalNum(node_int), nodei, functors, &local_data[s]);
544 functors.item(node_int, nodei, local_data[s]);
550 for (INT_TYPE taski = r.begin(); taski < r.end(); ++taski) {
551 INT_TYPE parallel_count = 0;
555 for (s = 0; s <
N; ++
s) {
556 if (nnodes[s] < parallel_threshold) {
559 if (parallel_count == taski) {
564 const INT_TYPE node_int = node.child[
s];
565 if (Node::isInternal(node_int)) {
566 UT_ASSERT_MSG_P(node_int != Node::EMPTY,
"Empty entries should have been excluded above.");
567 traverseParallelHelper(Node::getInternalNum(node_int), nodei, parallel_threshold, next_nodes[s], functors, &local_data[s]);
570 functors.item(node_int, nodei, local_data[s]);
577 for (INT_TYPE s = 0; s <
N; ++
s) {
578 const INT_TYPE node_int = node.child[
s];
579 if (Node::isInternal(node_int)) {
580 if (node_int == Node::EMPTY) {
584 traverseHelper(Node::getInternalNum(node_int), nodei, functors, &local_data[s]);
587 functors.item(node_int, nodei, local_data[s]);
591 functors.post(nodei, parent_nodei, data_for_parent, nchildren, local_data);
595 template<
typename LOCAL_DATA,
typename FUNCTORS>
598 LOCAL_DATA* data_for_parent)
const noexcept
604 traverseVectorHelper(0,
INT_TYPE(-1), functors, data_for_parent);
607 template<
typename LOCAL_DATA,
typename FUNCTORS>
610 INT_TYPE parent_nodei,
612 LOCAL_DATA* data_for_parent)
const noexcept
615 INT_TYPE descend = functors.pre(nodei, data_for_parent);
618 LOCAL_DATA local_data[
N];
620 for (s = 0; s <
N; ++
s) {
621 if ((descend>>s) & 1) {
622 const INT_TYPE node_int = node.child[
s];
623 if (Node::isInternal(node_int)) {
624 if (node_int == Node::EMPTY) {
626 descend &= (INT_TYPE(1)<<
s)-1;
629 traverseVectorHelper(Node::getInternalNum(node_int), nodei, functors, &local_data[s]);
632 functors.item(node_int, nodei, local_data[s]);
637 functors.post(nodei, parent_nodei, data_for_parent, s, local_data, descend);
641 template<
typename SRC_INT_TYPE>
643 constexpr
INT_TYPE PARALLEL_THRESHOLD = 65536;
645 if (
n >= PARALLEL_THRESHOLD) {
647 ntasks = (nprocessors > 1) ?
SYSmin(4*nprocessors,
n/(PARALLEL_THRESHOLD/2)) : 1;
656 for (
INT_TYPE taski = r.begin(), taskend = r.end(); taski != taskend; ++taski) {
668 template<
typename T,u
int NAXES,
typename BOX_TYPE,
typename SRC_INT_TYPE>
671 axes_minmax.initBounds();
675 if (nboxes >= 2*4096) {
677 ntasks = (nprocessors > 1) ?
SYSmin(4*nprocessors, nboxes/4096) : 1;
682 box.initBounds(boxes[
indices[0]]);
683 for (INT_TYPE i = 1; i < nboxes; ++i) {
684 box.combine(boxes[indices[i]]);
688 box.initBounds(boxes[0]);
689 for (INT_TYPE i = 1; i < nboxes; ++i) {
690 box.combine(boxes[i]);
698 parallel_boxes.
setSize(ntasks);
700 for (INT_TYPE taski = r.begin(),
end = r.end(); taski <
end; ++taski) {
701 const INT_TYPE startbox = (taski*
uint64(nboxes))/ntasks;
702 const INT_TYPE endbox = ((taski+1)*
uint64(nboxes))/ntasks;
705 box.initBounds(boxes[indices[startbox]]);
706 for (INT_TYPE i = startbox+1; i < endbox; ++i) {
707 box.combine(boxes[indices[i]]);
711 box.initBounds(boxes[startbox]);
712 for (INT_TYPE i = startbox+1; i < endbox; ++i) {
713 box.combine(boxes[i]);
716 parallel_boxes[taski] = box;
722 for (INT_TYPE i = 1; i < ntasks; ++i) {
723 box.combine(parallel_boxes[i]);
731 template<BVH_Heuristic H,
typename T,u
int NAXES,
typename BOX_TYPE,
typename SRC_INT_TYPE>
732 void BVH<N>::initNode(
UT_Array<Node>& nodes,
Node &node,
const Box<T,NAXES>& axes_minmax,
const BOX_TYPE* boxes, SRC_INT_TYPE* indices, INT_TYPE nboxes) noexcept {
735 for (INT_TYPE i = 0; i < nboxes; ++i) {
736 node.child[i] = indices[i];
738 for (INT_TYPE i = nboxes; i <
N; ++i) {
739 node.child[i] = Node::EMPTY;
744 SRC_INT_TYPE* sub_indices[N+1];
749 sub_indices[2] = indices+nboxes;
750 split<H>(axes_minmax, boxes,
indices, nboxes, sub_indices[1], &sub_boxes[0]);
753 multiSplit<H>(axes_minmax, boxes,
indices, nboxes, sub_indices, sub_boxes);
757 INT_TYPE nparallel = 0;
758 static constexpr INT_TYPE PARALLEL_THRESHOLD = 1024;
759 for (INT_TYPE i = 0; i <
N; ++i) {
760 INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
761 if (sub_nboxes == 1) {
762 node.child[i] = sub_indices[i][0];
764 else if (sub_nboxes >= PARALLEL_THRESHOLD) {
775 if (nparallel >= 2) {
781 parallel_nodes.
setSize(nparallel);
783 parallel_parent_nodes.
setSize(nparallel);
785 for (INT_TYPE taski = r.begin(), end = r.end(); taski <
end; ++taski) {
787 INT_TYPE counted_parallel = 0;
790 for (childi = 0; childi <
N; ++childi) {
791 sub_nboxes = sub_indices[childi+1]-sub_indices[childi];
792 if (sub_nboxes >= PARALLEL_THRESHOLD) {
793 if (counted_parallel == taski) {
808 Node& parent_node = parallel_parent_nodes[taski];
811 initNode<H>(local_nodes, parent_node, sub_boxes[childi], boxes, sub_indices[childi], sub_nboxes);
815 INT_TYPE counted_parallel = 0;
816 for (INT_TYPE i = 0; i <
N; ++i) {
817 INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
818 if (sub_nboxes != 1) {
819 INT_TYPE local_nodes_start = nodes.size();
820 node.child[i] = Node::markInternal(local_nodes_start);
821 if (sub_nboxes >= PARALLEL_THRESHOLD) {
823 Node child_node = parallel_parent_nodes[counted_parallel];
825 for (INT_TYPE childi = 0; childi <
N; ++childi) {
826 INT_TYPE child_child = child_node.child[childi];
827 if (Node::isInternal(child_child) && child_child != Node::EMPTY) {
828 child_child += local_nodes_start;
829 child_node.child[childi] = child_child;
834 const UT_Array<Node>& local_nodes = parallel_nodes[counted_parallel];
836 INT_TYPE
n = local_nodes.
size();
837 nodes.bumpCapacity(local_nodes_start + n);
838 nodes.setSizeNoInit(local_nodes_start + n);
839 nodes[local_nodes_start-1] = child_node;
842 nodes.bumpCapacity(local_nodes_start + 1);
843 nodes.setSizeNoInit(local_nodes_start + 1);
844 initNode<H>(nodes, nodes[local_nodes_start], sub_boxes[i], boxes, sub_indices[i], sub_nboxes);
850 adjustParallelChildNodes<PARALLEL_THRESHOLD>(nparallel, nodes, node, parallel_nodes.
array(), sub_indices);
853 for (INT_TYPE i = 0; i <
N; ++i) {
854 INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
855 if (sub_nboxes != 1) {
856 INT_TYPE local_nodes_start = nodes.size();
857 node.child[i] = Node::markInternal(local_nodes_start);
858 nodes.bumpCapacity(local_nodes_start + 1);
859 nodes.setSizeNoInit(local_nodes_start + 1);
860 initNode<H>(nodes, nodes[local_nodes_start], sub_boxes[i], boxes, sub_indices[i], sub_nboxes);
867 template<BVH_Heuristic H,
typename T,u
int NAXES,
typename BOX_TYPE,
typename SRC_INT_TYPE>
868 void BVH<N>::initNodeReorder(
UT_Array<Node>& nodes,
Node &node,
const Box<T,NAXES>& axes_minmax,
const BOX_TYPE* boxes, SRC_INT_TYPE* indices, INT_TYPE nboxes,
const INT_TYPE indices_offset,
const INT_TYPE max_items_per_leaf) noexcept {
871 for (INT_TYPE i = 0; i < nboxes; ++i) {
872 node.child[i] = indices_offset+i;
874 for (INT_TYPE i = nboxes; i <
N; ++i) {
875 node.child[i] = Node::EMPTY;
880 SRC_INT_TYPE* sub_indices[N+1];
885 sub_indices[2] = indices+nboxes;
886 split<H>(axes_minmax, boxes,
indices, nboxes, sub_indices[1], &sub_boxes[0]);
889 multiSplit<H>(axes_minmax, boxes,
indices, nboxes, sub_indices, sub_boxes);
894 INT_TYPE nleaves = 0;
896 SRC_INT_TYPE leaf_sizes[
N];
897 INT_TYPE sub_nboxes0 = sub_indices[1]-sub_indices[0];
898 if (sub_nboxes0 <= max_items_per_leaf) {
899 leaf_sizes[0] = sub_nboxes0;
900 for (
int j = 0;
j < sub_nboxes0; ++
j)
901 leaf_indices.
append(sub_indices[0][
j]);
904 INT_TYPE sub_nboxes1 = sub_indices[2]-sub_indices[1];
905 if (sub_nboxes1 <= max_items_per_leaf) {
906 leaf_sizes[nleaves] = sub_nboxes1;
907 for (
int j = 0;
j < sub_nboxes1; ++
j)
908 leaf_indices.
append(sub_indices[1][
j]);
911 for (INT_TYPE i = 2; i <
N; ++i) {
912 INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
913 if (sub_nboxes <= max_items_per_leaf) {
914 leaf_sizes[nleaves] = sub_nboxes;
915 for (
int j = 0;
j < sub_nboxes; ++
j)
916 leaf_indices.
append(sub_indices[i][
j]);
921 INT_TYPE move_distance = 0;
922 INT_TYPE index_move_distance = 0;
923 for (INT_TYPE i = N; i --> 0; )
925 INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
926 if (sub_nboxes <= max_items_per_leaf)
929 index_move_distance += sub_nboxes;
931 else if (move_distance > 0)
933 SRC_INT_TYPE *start_src_index = sub_indices[i];
934 for (SRC_INT_TYPE *src_index = sub_indices[i+1]-1; src_index >= start_src_index; --src_index) {
935 src_index[index_move_distance] = src_index[0];
937 sub_indices[i+move_distance] = sub_indices[i]+index_move_distance;
940 index_move_distance = 0;
941 for (INT_TYPE i = 0; i < nleaves; ++i)
943 INT_TYPE sub_nboxes = leaf_sizes[i];
944 sub_indices[i] = indices+index_move_distance;
945 for (
int j = 0;
j < sub_nboxes; ++
j)
946 indices[index_move_distance+
j] = leaf_indices[index_move_distance+
j];
947 index_move_distance += sub_nboxes;
952 INT_TYPE nparallel = 0;
953 static constexpr INT_TYPE PARALLEL_THRESHOLD = 1024;
954 for (INT_TYPE i = 0; i <
N; ++i) {
955 INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
956 if (sub_nboxes <= max_items_per_leaf) {
957 node.child[i] = indices_offset+(sub_indices[i]-sub_indices[0]);
959 else if (sub_nboxes >= PARALLEL_THRESHOLD) {
970 if (nparallel >= 2) {
976 parallel_nodes.
setSize(nparallel);
978 parallel_parent_nodes.
setSize(nparallel);
980 for (INT_TYPE taski = r.begin(), end = r.end(); taski <
end; ++taski) {
982 INT_TYPE counted_parallel = 0;
985 for (childi = 0; childi <
N; ++childi) {
986 sub_nboxes = sub_indices[childi+1]-sub_indices[childi];
987 if (sub_nboxes >= PARALLEL_THRESHOLD) {
988 if (counted_parallel == taski) {
1002 local_nodes.
setCapacity(nodeEstimate(sub_nboxes));
1003 Node& parent_node = parallel_parent_nodes[taski];
1006 initNodeReorder<H>(local_nodes, parent_node, sub_boxes[childi], boxes, sub_indices[childi], sub_nboxes,
1007 indices_offset+(sub_indices[childi]-sub_indices[0]), max_items_per_leaf);
1011 INT_TYPE counted_parallel = 0;
1012 for (INT_TYPE i = 0; i <
N; ++i) {
1013 INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
1014 if (sub_nboxes > max_items_per_leaf) {
1015 INT_TYPE local_nodes_start = nodes.size();
1016 node.child[i] = Node::markInternal(local_nodes_start);
1017 if (sub_nboxes >= PARALLEL_THRESHOLD) {
1019 Node child_node = parallel_parent_nodes[counted_parallel];
1020 ++local_nodes_start;
1021 for (INT_TYPE childi = 0; childi <
N; ++childi) {
1022 INT_TYPE child_child = child_node.child[childi];
1023 if (Node::isInternal(child_child) && child_child != Node::EMPTY) {
1024 child_child += local_nodes_start;
1025 child_node.child[childi] = child_child;
1030 const UT_Array<Node>& local_nodes = parallel_nodes[counted_parallel];
1032 INT_TYPE n = local_nodes.
size();
1033 nodes.bumpCapacity(local_nodes_start + n);
1034 nodes.setSizeNoInit(local_nodes_start + n);
1035 nodes[local_nodes_start-1] = child_node;
1038 nodes.bumpCapacity(local_nodes_start + 1);
1039 nodes.setSizeNoInit(local_nodes_start + 1);
1040 initNodeReorder<H>(nodes, nodes[local_nodes_start], sub_boxes[i], boxes, sub_indices[i], sub_nboxes,
1041 indices_offset+(sub_indices[i]-sub_indices[0]), max_items_per_leaf);
1047 adjustParallelChildNodes<PARALLEL_THRESHOLD>(nparallel, nodes, node, parallel_nodes.
array(), sub_indices);
1050 for (INT_TYPE i = 0; i <
N; ++i) {
1051 INT_TYPE sub_nboxes = sub_indices[i+1]-sub_indices[i];
1052 if (sub_nboxes > max_items_per_leaf) {
1053 INT_TYPE local_nodes_start = nodes.size();
1054 node.child[i] = Node::markInternal(local_nodes_start);
1055 nodes.bumpCapacity(local_nodes_start + 1);
1056 nodes.setSizeNoInit(local_nodes_start + 1);
1057 initNodeReorder<H>(nodes, nodes[local_nodes_start], sub_boxes[i], boxes, sub_indices[i], sub_nboxes,
1058 indices_offset+(sub_indices[i]-sub_indices[0]), max_items_per_leaf);
1065 template<BVH_Heuristic H,
typename T,u
int NAXES,
typename BOX_TYPE,
typename SRC_INT_TYPE>
1066 void BVH<N>::multiSplit(
const Box<T,NAXES>& axes_minmax,
const BOX_TYPE* boxes, SRC_INT_TYPE* indices, INT_TYPE nboxes, SRC_INT_TYPE* sub_indices[N+1],
Box<T,NAXES> sub_boxes[N]) noexcept {
1073 uint num_axes_zero = 0;
1074 for (
uint axis = 0; axis < NAXES; ++axis)
1076 const T*
v = axes_minmax[axis];
1077 num_axes_zero += (v[0] == v[1]);
1079 if (num_axes_zero != 0)
1083 multiSplit<BVH_Heuristic::BOX_AREA>(axes_minmax, boxes,
indices, nboxes, sub_indices, sub_boxes);
1086 multiSplit<BVH_Heuristic::BOX_PERIMETER>(axes_minmax, boxes,
indices, nboxes, sub_indices, sub_boxes);
1092 sub_indices[2] = indices+nboxes;
1093 split<H>(axes_minmax, boxes,
indices, nboxes, sub_indices[1], &sub_boxes[0]);
1100 SRC_INT_TYPE* sub_indices_startend[2*
N];
1102 sub_boxes_unsorted[0] = sub_boxes[0];
1103 sub_boxes_unsorted[1] = sub_boxes[1];
1104 sub_indices_startend[0] = sub_indices[0];
1105 sub_indices_startend[1] = sub_indices[1];
1106 sub_indices_startend[2] = sub_indices[1];
1107 sub_indices_startend[3] = sub_indices[2];
1108 for (INT_TYPE nsub = 2; nsub <
N; ++nsub) {
1109 SRC_INT_TYPE* selected_start = sub_indices_startend[0];
1110 SRC_INT_TYPE* selected_end = sub_indices_startend[1];
1114 for (INT_TYPE i = 0; i < nsub-1; ++i) {
1115 sub_indices_startend[2*i ] = sub_indices_startend[2*i+2];
1116 sub_indices_startend[2*i+1] = sub_indices_startend[2*i+3];
1118 for (INT_TYPE i = 0; i < nsub-1; ++i) {
1119 sub_boxes_unsorted[i] = sub_boxes_unsorted[i-1];
1123 split<H>(sub_box, boxes, selected_start, selected_end-selected_start, sub_indices_startend[2*nsub-1], &sub_boxes_unsorted[nsub]);
1124 sub_indices_startend[2*nsub-2] = selected_start;
1125 sub_indices_startend[2*nsub] = sub_indices_startend[2*nsub-1];
1126 sub_indices_startend[2*nsub+1] = selected_end;
1129 sub_indices[
N] = indices+nboxes;
1130 for (INT_TYPE i = 0; i <
N; ++i) {
1131 SRC_INT_TYPE* prev_pointer = (i != 0) ? sub_indices[i-1] :
nullptr;
1132 SRC_INT_TYPE* min_pointer =
nullptr;
1134 for (INT_TYPE
j = 0;
j <
N; ++
j) {
1135 SRC_INT_TYPE* cur_pointer = sub_indices_startend[2*
j];
1136 if ((cur_pointer > prev_pointer) && (!min_pointer || (cur_pointer < min_pointer))) {
1137 min_pointer = cur_pointer;
1138 box = sub_boxes_unsorted[
j];
1142 sub_indices[i] = min_pointer;
1149 sub_box_areas[0] = unweightedHeuristic<H>(sub_boxes[0]);
1150 sub_box_areas[1] = unweightedHeuristic<H>(sub_boxes[1]);
1151 for (INT_TYPE nsub = 2; nsub <
N; ++nsub) {
1153 INT_TYPE split_choice = INT_TYPE(-1);
1155 for (INT_TYPE i = 0; i < nsub; ++i) {
1156 const INT_TYPE index_count = (sub_indices[i+1]-sub_indices[i]);
1157 if (index_count > 1) {
1158 const T heuristic = sub_box_areas[i]*index_count;
1159 if (split_choice == INT_TYPE(-1) || heuristic > max_heuristic) {
1161 max_heuristic = heuristic;
1165 UT_ASSERT_MSG_P(split_choice != INT_TYPE(-1),
"There should always be at least one that can be split!");
1167 SRC_INT_TYPE* selected_start = sub_indices[split_choice];
1168 SRC_INT_TYPE* selected_end = sub_indices[split_choice+1];
1171 for (INT_TYPE i = nsub; i > split_choice; --i) {
1172 sub_indices[i+1] = sub_indices[i];
1174 for (INT_TYPE i = nsub-1; i > split_choice; --i) {
1175 sub_boxes[i+1] = sub_boxes[i];
1177 for (INT_TYPE i = nsub-1; i > split_choice; --i) {
1178 sub_box_areas[i+1] = sub_box_areas[i];
1182 split<H>(sub_boxes[split_choice], boxes, selected_start, selected_end-selected_start, sub_indices[split_choice+1], &sub_boxes[split_choice]);
1183 sub_box_areas[split_choice] = unweightedHeuristic<H>(sub_boxes[split_choice]);
1184 sub_box_areas[split_choice+1] = unweightedHeuristic<H>(sub_boxes[split_choice+1]);
1190 template<BVH_Heuristic H,
typename T,u
int NAXES,
typename BOX_TYPE,
typename SRC_INT_TYPE>
1191 void BVH<N>::split(
const Box<T,NAXES>& axes_minmax,
const BOX_TYPE* boxes, SRC_INT_TYPE* indices, INT_TYPE nboxes, SRC_INT_TYPE*& split_indices,
Box<T,NAXES>* split_boxes) noexcept {
1193 split_boxes[0].initBounds(boxes[indices[0]]);
1194 split_boxes[1].initBounds(boxes[indices[1]]);
1195 split_indices = indices+1;
1198 UT_ASSERT_MSG_P(nboxes > 2,
"Cases with less than 3 boxes should have already been handled!");
1210 uint num_axes_zero = 0;
1211 for (
uint axis = 0; axis < NAXES; ++axis)
1213 const T*
v = axes_minmax[axis];
1214 num_axes_zero += (v[0] == v[1]);
1216 if (num_axes_zero != 0)
1220 split<BVH_Heuristic::BOX_AREA>(axes_minmax, boxes,
indices, nboxes, split_indices, split_boxes);
1223 split<BVH_Heuristic::BOX_PERIMETER>(axes_minmax, boxes,
indices, nboxes, split_indices, split_boxes);
1228 constexpr INT_TYPE SMALL_LIMIT = 6;
1229 if (nboxes <= SMALL_LIMIT) {
1234 for (INT_TYPE box = 0; box < nboxes; ++box) {
1235 local_boxes[box].initBounds(boxes[indices[box]]);
1238 const INT_TYPE partition_limit = (INT_TYPE(1)<<(nboxes-1));
1239 INT_TYPE best_partition = INT_TYPE(-1);
1241 for (INT_TYPE partition_bits = 1; partition_bits < partition_limit; ++partition_bits) {
1243 sub_boxes[0] = local_boxes[0];
1244 sub_boxes[1].initBounds();
1245 INT_TYPE sub_counts[2] = {1,0};
1246 for (INT_TYPE bit = 0; bit < nboxes-1; ++bit) {
1247 INT_TYPE dest = (partition_bits>>bit)&1;
1248 sub_boxes[dest].combine(local_boxes[bit+1]);
1254 unweightedHeuristic<H>(sub_boxes[0])*sub_counts[0] +
1255 unweightedHeuristic<H>(sub_boxes[1])*sub_counts[1];
1257 if (best_partition == INT_TYPE(-1) || heuristic < best_heuristic) {
1259 best_partition = partition_bits;
1260 best_heuristic = heuristic;
1261 split_boxes[0] = sub_boxes[0];
1262 split_boxes[1] = sub_boxes[1];
1266 #if 0 // This isn't actually necessary with the current design, because I changed how the number of subtree nodes is determined.
1272 if (best_partition == partition_limit - 1) {
1274 SRC_INT_TYPE last_index = indices[0];
1275 SRC_INT_TYPE* dest_indices =
indices;
1276 SRC_INT_TYPE* local_split_indices = indices + nboxes-1;
1277 for (; dest_indices != local_split_indices; ++dest_indices) {
1278 dest_indices[0] = dest_indices[1];
1280 *local_split_indices = last_index;
1281 split_indices = local_split_indices;
1285 sub_boxes[0] = sub_boxes[1];
1286 sub_boxes[1] = temp_box;
1293 SRC_INT_TYPE local_indices[SMALL_LIMIT-1];
1294 for (INT_TYPE box = 0; box < nboxes-1; ++box) {
1295 local_indices[box] = indices[box+1];
1297 SRC_INT_TYPE* dest_indices = indices+1;
1298 SRC_INT_TYPE* src_indices = local_indices;
1300 for (INT_TYPE bit = 0; bit < nboxes-1; ++bit, ++src_indices) {
1301 if (!((best_partition>>bit)&1)) {
1303 *dest_indices = *src_indices;
1307 split_indices = dest_indices;
1309 src_indices = local_indices;
1310 for (INT_TYPE bit = 0; bit < nboxes-1; ++bit, ++src_indices) {
1311 if ((best_partition>>bit)&1) {
1313 *dest_indices = *src_indices;
1321 T max_axis_length = axes_minmax.vals[0][1] - axes_minmax.vals[0][0];
1322 for (
uint axis = 1; axis < NAXES; ++axis) {
1323 const T axis_length = axes_minmax.vals[axis][1] - axes_minmax.vals[axis][0];
1324 if (axis_length > max_axis_length) {
1326 max_axis_length = axis_length;
1330 if (!(max_axis_length >
T(0))) {
1333 split_indices = indices + nboxes/2;
1334 split_boxes[0] = axes_minmax;
1335 split_boxes[1] = axes_minmax;
1339 #if BVH_TRY_ALL_AXES
1341 half_box.vals[max_axis][1] = 0.5*(half_box.vals[max_axis][0] + half_box.vals[max_axis][1]);
1342 T good_heuristic = unweightedHeuristic<H>(half_box)*nboxes;
1344 const INT_TYPE axis = max_axis;
1347 constexpr INT_TYPE MID_LIMIT = 2*NSPANS;
1348 if (nboxes <= MID_LIMIT) {
1349 #if BVH_TRY_ALL_AXES
1351 split_indices =
nullptr;
1353 splitMiddleCountAxis<H>(max_axis, best_heuristic, boxes,
indices, nboxes, split_indices, split_boxes);
1356 if (best_heuristic > 1.2*good_heuristic)
1358 uint axis1 = (max_axis==0) ? 2 : (max_axis-1);
1359 splitMiddleCountAxis<H>(axis1, best_heuristic, boxes,
indices, nboxes, split_indices, split_boxes);
1361 uint axis2 = (axis1==0) ? 2 : (axis1-1);
1362 splitMiddleCountAxis<H>(axis2, best_heuristic, boxes,
indices, nboxes, split_indices, split_boxes);
1371 INT_TYPE best_left_count_split;
1372 INT_TYPE maxaxis_first_left_count = -1;
1373 INT_TYPE maxaxis_last_left_count = -1;
1376 INT_TYPE best_axis = -1;
1377 INT_TYPE best_split_index = -1;
1378 splitLargeCountAxis<H>(max_axis, axes_minmax.vals[max_axis][0], max_axis_length,
1379 best_heuristic, best_axis, best_split_index,
1380 best_left_split_box, best_right_split_box, best_left_count_split,
1381 maxaxis_first_left_count, maxaxis_last_left_count,
1385 if (best_heuristic > 1.2*good_heuristic)
1387 uint axis1 = (max_axis==0) ? 2 : (max_axis-1);
1388 splitLargeCountAxis<H>(axis1, axes_minmax.vals[axis1][0], axes_minmax.vals[axis1][1] - axes_minmax.vals[axis1][0],
1389 best_heuristic, best_axis, best_split_index,
1390 best_left_split_box, best_right_split_box, best_left_count_split,
1391 maxaxis_first_left_count, maxaxis_last_left_count,
1394 uint axis2 = (axis1==0) ? 2 : (axis1-1);
1395 splitLargeCountAxis<H>(axis2, axes_minmax.vals[axis2][0], axes_minmax.vals[axis2][1] - axes_minmax.vals[axis2][0],
1396 best_heuristic, best_axis, best_split_index,
1397 best_left_split_box, best_right_split_box, best_left_count_split,
1398 maxaxis_first_left_count, maxaxis_last_left_count,
1402 SRC_INT_TYPE*
const indices_end = indices+nboxes;
1404 if (best_split_index == -1) {
1407 const INT_TYPE min_count = nboxes/MIN_FRACTION;
1408 const INT_TYPE max_count = ((MIN_FRACTION-1)*
uint64(nboxes))/MIN_FRACTION;
1417 SRC_INT_TYPE* nth_index;
1418 if (maxaxis_first_left_count > max_count) {
1420 nth_index = indices+max_count;
1423 else if (maxaxis_last_left_count < min_count) {
1425 nth_index = indices+min_count;
1430 nth_index = indices+nboxes/2;
1440 nthElement<T>(boxes,
indices,indices+nboxes,max_axis,nth_index);
1442 split_indices = nth_index;
1444 for (SRC_INT_TYPE* left_indices = indices+1; left_indices < nth_index; ++left_indices) {
1445 left_box.combine(boxes[*left_indices]);
1448 for (SRC_INT_TYPE* right_indices = nth_index+1; right_indices < indices_end; ++right_indices) {
1449 right_box.combine(boxes[*right_indices]);
1451 split_boxes[0] = left_box;
1452 split_boxes[1] = right_box;
1456 const T best_axis_min_x2 = center_scale*axes_minmax.vals[best_axis][0];
1457 const T best_axis_length = axes_minmax.vals[best_axis][1] - axes_minmax.vals[best_axis][0];
1458 const T pivotx2 = best_axis_min_x2 + (best_split_index+1)*best_axis_length/(NSPANS/center_scale);
1459 SRC_INT_TYPE* ppivot_start;
1460 SRC_INT_TYPE* ppivot_end;
1461 partitionByCentre(boxes,indices,indices+nboxes,best_axis,pivotx2,ppivot_start,ppivot_end);
1463 split_indices = indices + best_left_count_split;
1468 if (split_indices >= ppivot_start && split_indices <= ppivot_end) {
1469 split_boxes[0] = best_left_split_box;
1470 split_boxes[1] = best_right_split_box;
1475 if (split_indices < ppivot_start) {
1476 split_indices = ppivot_start;
1479 split_indices = ppivot_end;
1483 if (split_indices == indices) {
1486 else if (split_indices == indices_end) {
1491 for (SRC_INT_TYPE* left_indices = indices+1; left_indices < split_indices; ++left_indices) {
1492 left_box.combine(boxes[*left_indices]);
1495 for (SRC_INT_TYPE* right_indices = split_indices+1; right_indices < indices_end; ++right_indices) {
1496 right_box.combine(boxes[*right_indices]);
1498 split_boxes[0] = left_box;
1499 split_boxes[1] = right_box;
1504 template<BVH_Heuristic H,
typename T,u
int NAXES,
typename BOX_TYPE,
typename SRC_INT_TYPE>
1505 void BVH<N>::splitMiddleCountAxis(
const INT_TYPE axis, T &best_heuristic,
const BOX_TYPE* boxes, SRC_INT_TYPE* indices, INT_TYPE nboxes, SRC_INT_TYPE*& split_indices,
Box<T,NAXES>* split_boxes) noexcept {
1510 #if BVH_TRY_ALL_AXES
1511 constexpr INT_TYPE MID_LIMIT = 2*NSPANS;
1516 T midpointsx2[MID_LIMIT];
1517 for (INT_TYPE i = 0; i < nboxes; ++i) {
1518 midpointsx2[i] =
utBoxCenter(boxes[indices[i]], axis);
1520 SRC_INT_TYPE local_indices[MID_LIMIT];
1521 for (INT_TYPE i = 0; i < nboxes; ++i) {
1522 local_indices[i] = i;
1525 const INT_TYPE chunk_starts[5] = {0, nboxes/4, nboxes/2, INT_TYPE((3*
uint64(nboxes))/4), nboxes};
1528 for (INT_TYPE chunk = 0; chunk < 4; ++chunk) {
1529 const INT_TYPE
start = chunk_starts[chunk];
1530 const INT_TYPE end = chunk_starts[chunk+1];
1531 for (INT_TYPE i = start+1; i <
end; ++i) {
1532 SRC_INT_TYPE indexi = local_indices[i];
1533 T vi = midpointsx2[indexi];
1534 for (INT_TYPE
j = start;
j < i; ++
j) {
1535 SRC_INT_TYPE indexj = local_indices[
j];
1536 T vj = midpointsx2[indexj];
1539 local_indices[
j] = indexi;
1543 local_indices[
j] = indexi;
1546 indexj = local_indices[
j];
1554 SRC_INT_TYPE local_indices_temp[MID_LIMIT];
1555 std::merge(local_indices, local_indices+chunk_starts[1],
1556 local_indices+chunk_starts[1], local_indices+chunk_starts[2],
1557 local_indices_temp, [&midpointsx2](
const SRC_INT_TYPE
a,
const SRC_INT_TYPE
b)->
bool {
1558 return midpointsx2[
a] < midpointsx2[
b];
1560 std::merge(local_indices+chunk_starts[2], local_indices+chunk_starts[3],
1561 local_indices+chunk_starts[3], local_indices+chunk_starts[4],
1562 local_indices_temp+chunk_starts[2], [&midpointsx2](
const SRC_INT_TYPE a,
const SRC_INT_TYPE b)->
bool {
1563 return midpointsx2[
a] < midpointsx2[
b];
1565 std::merge(local_indices_temp, local_indices_temp+chunk_starts[2],
1566 local_indices_temp+chunk_starts[2], local_indices_temp+chunk_starts[4],
1567 local_indices, [&midpointsx2](
const SRC_INT_TYPE a,
const SRC_INT_TYPE b)->
bool {
1568 return midpointsx2[
a] < midpointsx2[
b];
1572 for (INT_TYPE i = 0; i < nboxes; ++i) {
1573 local_indices[i] = indices[local_indices[i]];
1575 #if !BVH_TRY_ALL_AXES
1577 for (INT_TYPE i = 0; i < nboxes; ++i) {
1578 indices[i] = local_indices[i];
1582 std::stable_sort(indices, indices+nboxes, [boxes,max_axis](SRC_INT_TYPE a, SRC_INT_TYPE b)->
bool {
1590 const INT_TYPE nsplits = nboxes-1;
1592 left_boxes[0] = box_accumulator;
1593 for (INT_TYPE i = 1; i < nsplits; ++i) {
1594 box_accumulator.combine(boxes[local_indices[i]]);
1595 left_boxes[i] = box_accumulator;
1597 box_accumulator.initBounds(boxes[local_indices[nsplits-1]]);
1598 right_boxes[nsplits-1] = box_accumulator;
1599 for (INT_TYPE i = nsplits-1; i > 0; --i) {
1600 box_accumulator.combine(boxes[local_indices[i]]);
1601 right_boxes[i-1] = box_accumulator;
1604 INT_TYPE best_split = 0;
1605 T best_local_heuristic =
1606 unweightedHeuristic<H>(left_boxes[0]) +
1607 unweightedHeuristic<H>(right_boxes[0])*(nboxes-1);
1610 unweightedHeuristic<H>(left_boxes[
split])*(
split+1) +
1611 unweightedHeuristic<H>(right_boxes[
split])*(nboxes-(
split+1));
1612 if (heuristic < best_local_heuristic) {
1614 best_local_heuristic = heuristic;
1618 #if BVH_TRY_ALL_AXES
1619 if (!split_indices || best_local_heuristic < best_heuristic) {
1621 for (INT_TYPE i = 0; i < nboxes; ++i) {
1622 indices[i] = local_indices[i];
1624 split_indices = indices+best_split+1;
1625 split_boxes[0] = left_boxes[best_split];
1626 split_boxes[1] = right_boxes[best_split];
1627 best_heuristic = best_local_heuristic;
1630 split_indices = indices+best_split+1;
1631 split_boxes[0] = left_boxes[best_split];
1632 split_boxes[1] = right_boxes[best_split];
1636 #if BVH_TRY_ALL_AXES
1640 template<BVH_Heuristic H,
typename T,u
int NAXES,
typename BOX_TYPE,
typename SRC_INT_TYPE>
1641 void BVH<N>::splitLargeCountAxis(
1642 const INT_TYPE axis,
const T axis_min,
const T axis_length,
1643 T &best_heuristic, INT_TYPE &best_axis, INT_TYPE &best_split_index,
1646 INT_TYPE &best_left_count_split,
1647 INT_TYPE &maxaxis_first_left_count,
1648 INT_TYPE &maxaxis_last_left_count,
1649 const BOX_TYPE* boxes, SRC_INT_TYPE* indices, INT_TYPE nboxes
1652 const T axis_min = axes_minmax.vals[max_axis][0];
1653 const T axis_length = max_axis_length;
1656 for (INT_TYPE i = 0; i < NSPANS; ++i) {
1657 span_boxes[i].initBounds();
1659 INT_TYPE span_counts[NSPANS];
1660 for (INT_TYPE i = 0; i < NSPANS; ++i) {
1667 constexpr INT_TYPE BOX_SPANS_PARALLEL_THRESHOLD = 2048;
1668 INT_TYPE ntasks = 1;
1669 if (nboxes >= BOX_SPANS_PARALLEL_THRESHOLD) {
1671 ntasks = (nprocessors > 1) ?
SYSmin(4*nprocessors, nboxes/(BOX_SPANS_PARALLEL_THRESHOLD/2)) : 1;
1674 for (INT_TYPE indexi = 0; indexi < nboxes; ++indexi) {
1675 const auto& box = boxes[indices[indexi]];
1677 const uint span_index =
SYSclamp(
int((sum-axis_min_x2)*axis_index_scale),
int(0),
int(NSPANS-1));
1678 ++span_counts[span_index];
1680 span_box.combine(box);
1686 parallel_boxes.
setSize(NSPANS*ntasks);
1688 parallel_counts.
setSize(NSPANS*ntasks);
1689 UTparallelFor(
UT_BlockedRange<INT_TYPE>(0,ntasks), [¶llel_boxes,¶llel_counts,ntasks,boxes,nboxes,indices,axis,axis_min_x2,axis_index_scale](
const UT_BlockedRange<INT_TYPE>& r) {
1690 for (INT_TYPE taski = r.begin(), end = r.end(); taski <
end; ++taski) {
1692 for (INT_TYPE i = 0; i < NSPANS; ++i) {
1693 span_boxes[i].initBounds();
1695 INT_TYPE span_counts[NSPANS];
1696 for (INT_TYPE i = 0; i < NSPANS; ++i) {
1699 const INT_TYPE startbox = (taski*
uint64(nboxes))/ntasks;
1700 const INT_TYPE endbox = ((taski+1)*
uint64(nboxes))/ntasks;
1701 for (INT_TYPE indexi = startbox; indexi != endbox; ++indexi) {
1702 const auto& box = boxes[indices[indexi]];
1704 const uint span_index =
SYSclamp(
int((sum-axis_min_x2)*axis_index_scale),
int(0),
int(NSPANS-1));
1705 ++span_counts[span_index];
1707 span_box.combine(box);
1710 const INT_TYPE dest_array_start = taski*NSPANS;
1711 for (INT_TYPE i = 0; i < NSPANS; ++i) {
1712 parallel_boxes[dest_array_start+i] = span_boxes[i];
1714 for (INT_TYPE i = 0; i < NSPANS; ++i) {
1715 parallel_counts[dest_array_start+i] = span_counts[i];
1721 for (INT_TYPE taski = 0; taski < ntasks; ++taski) {
1722 const INT_TYPE dest_array_start = taski*NSPANS;
1723 for (INT_TYPE i = 0; i < NSPANS; ++i) {
1724 span_boxes[i].combine(parallel_boxes[dest_array_start+i]);
1727 for (INT_TYPE taski = 0; taski < ntasks; ++taski) {
1728 const INT_TYPE dest_array_start = taski*NSPANS;
1729 for (INT_TYPE i = 0; i < NSPANS; ++i) {
1730 span_counts[i] += parallel_counts[dest_array_start+i];
1742 left_boxes[0] = box_accumulator;
1743 for (INT_TYPE i = 1; i < NSPLITS; ++i) {
1744 box_accumulator.combine(span_boxes[i]);
1745 left_boxes[i] = box_accumulator;
1747 box_accumulator = span_boxes[NSPANS-1];
1748 right_boxes[NSPLITS-1] = box_accumulator;
1749 for (INT_TYPE i = NSPLITS-1; i > 0; --i) {
1750 box_accumulator.combine(span_boxes[i]);
1751 right_boxes[i-1] = box_accumulator;
1754 INT_TYPE left_counts[NSPLITS];
1757 INT_TYPE count_accumulator = span_counts[0];
1758 left_counts[0] = count_accumulator;
1759 for (INT_TYPE spliti = 1; spliti < NSPLITS; ++spliti) {
1760 count_accumulator += span_counts[spliti];
1761 left_counts[spliti] = count_accumulator;
1765 const INT_TYPE min_count = nboxes/MIN_FRACTION;
1766 UT_ASSERT_MSG_P(min_count > 0,
"MID_LIMIT above should have been large enough that nboxes would be > MIN_FRACTION");
1767 const INT_TYPE max_count = ((MIN_FRACTION-1)*
uint64(nboxes))/MIN_FRACTION;
1768 UT_ASSERT_MSG_P(max_count < nboxes,
"I'm not sure how this could happen mathematically, but it needs to be checked.");
1769 T smallest_heuristic = std::numeric_limits<T>::infinity();
1770 INT_TYPE split_index = -1;
1771 for (INT_TYPE spliti = 0; spliti < NSPLITS; ++spliti) {
1772 const INT_TYPE left_count = left_counts[spliti];
1773 if (left_count < min_count || left_count > max_count) {
1776 const INT_TYPE right_count = nboxes-left_count;
1778 left_count*unweightedHeuristic<H>(left_boxes[spliti]) +
1779 right_count*unweightedHeuristic<H>(right_boxes[spliti]);
1780 if (heuristic < smallest_heuristic) {
1781 smallest_heuristic = heuristic;
1782 split_index = spliti;
1786 #if BVH_TRY_ALL_AXES
1787 if (maxaxis_first_left_count == -1) {
1788 maxaxis_first_left_count = left_counts[0];
1789 maxaxis_last_left_count = left_counts[NSPLITS-1];
1791 if (split_index == -1) {
1795 if (best_axis < 0 || smallest_heuristic < best_heuristic) {
1797 best_split_index = split_index;
1798 best_heuristic = smallest_heuristic;
1799 best_left_split_box = left_boxes[split_index];
1800 best_right_split_box = right_boxes[split_index];
1801 best_left_count_split = left_counts[split_index];
1805 SRC_INT_TYPE*
const indices_end = indices+nboxes;
1807 if (split_index == -1) {
1817 SRC_INT_TYPE* nth_index;
1818 if (left_counts[0] > max_count) {
1820 nth_index = indices+max_count;
1823 else if (left_counts[NSPLITS-1] < min_count) {
1825 nth_index = indices+min_count;
1830 nth_index = indices+nboxes/2;
1840 nthElement<T>(boxes,
indices,indices+nboxes,max_axis,nth_index);
1842 split_indices = nth_index;
1844 for (SRC_INT_TYPE* left_indices = indices+1; left_indices < nth_index; ++left_indices) {
1845 left_box.combine(boxes[*left_indices]);
1848 for (SRC_INT_TYPE* right_indices = nth_index+1; right_indices < indices_end; ++right_indices) {
1849 right_box.combine(boxes[*right_indices]);
1851 split_boxes[0] = left_box;
1852 split_boxes[1] = right_box;
1856 SRC_INT_TYPE* ppivot_start;
1857 SRC_INT_TYPE* ppivot_end;
1858 partitionByCentre(boxes,indices,indices+nboxes,max_axis,pivotx2,ppivot_start,ppivot_end);
1860 split_indices = indices + left_counts[split_index];
1865 if (split_indices >= ppivot_start && split_indices <= ppivot_end) {
1866 split_boxes[0] = left_boxes[split_index];
1867 split_boxes[1] = right_boxes[split_index];
1872 if (split_indices < ppivot_start) {
1873 split_indices = ppivot_start;
1876 split_indices = ppivot_end;
1880 if (split_indices == indices) {
1883 else if (split_indices == indices_end) {
1888 for (SRC_INT_TYPE* left_indices = indices+1; left_indices < split_indices; ++left_indices) {
1889 left_box.combine(boxes[*left_indices]);
1892 for (SRC_INT_TYPE* right_indices = split_indices+1; right_indices < indices_end; ++right_indices) {
1893 right_box.combine(boxes[*right_indices]);
1895 split_boxes[0] = left_box;
1896 split_boxes[1] = right_box;
1902 template<u
int PARALLEL_THRESHOLD,
typename SRC_INT_TYPE>
1903 void BVH<N>::adjustParallelChildNodes(INT_TYPE nparallel,
UT_Array<Node>& nodes,
Node& node,
UT_Array<Node>* parallel_nodes, SRC_INT_TYPE* sub_indices) noexcept
1906 INT_TYPE counted_parallel = 0;
1907 INT_TYPE childi = 0;
1908 for (INT_TYPE taski = r.begin(), end = r.end(); taski <
end; ++taski) {
1910 INT_TYPE sub_nboxes;
1911 for (; childi <
N; ++childi) {
1912 sub_nboxes = sub_indices[childi+1]-sub_indices[childi];
1913 if (sub_nboxes >= PARALLEL_THRESHOLD) {
1914 if (counted_parallel == taski) {
1922 const UT_Array<Node>& local_nodes = parallel_nodes[counted_parallel];
1923 INT_TYPE n = local_nodes.
size();
1924 INT_TYPE local_nodes_start = Node::getInternalNum(node.child[childi])+1;
1928 for (INT_TYPE
j = 0;
j <
n; ++
j) {
1929 Node local_node = local_nodes[
j];
1930 for (INT_TYPE childj = 0; childj <
N; ++childj) {
1931 INT_TYPE local_child = local_node.child[childj];
1932 if (Node::isInternal(local_child) && local_child != Node::EMPTY) {
1933 local_child += local_nodes_start;
1934 local_node.child[childj] = local_child;
1937 nodes[local_nodes_start+
j] = local_node;
1944 template<
typename T,
typename BOX_TYPE,
typename SRC_INT_TYPE>
1945 void BVH<N>::nthElement(
const BOX_TYPE* boxes, SRC_INT_TYPE* indices,
const SRC_INT_TYPE* indices_end,
const uint axis, SRC_INT_TYPE*
const nth) noexcept {
1950 utBoxCenter(boxes[indices[(indices_end-indices)/2]], axis),
1953 if (pivots[0] < pivots[1]) {
1954 const T temp = pivots[0];
1955 pivots[0] = pivots[1];
1958 if (pivots[0] < pivots[2]) {
1959 const T temp = pivots[0];
1960 pivots[0] = pivots[2];
1963 if (pivots[1] < pivots[2]) {
1964 const T temp = pivots[1];
1965 pivots[1] = pivots[2];
1968 T mid_pivotx2 = pivots[1];
1971 if (mid_pivotx2 < min_pivotx2) {
1972 mid_pivotx2 = min_pivotx2;
1974 else if (mid_pivotx2 > max_pivotx2) {
1975 mid_pivotx2 = max_pivotx2;
1978 SRC_INT_TYPE* pivot_start;
1979 SRC_INT_TYPE* pivot_end;
1980 partitionByCentre(boxes,indices,indices_end,axis,mid_pivotx2,pivot_start,pivot_end);
1981 if (nth < pivot_start) {
1982 indices_end = pivot_start;
1984 else if (nth < pivot_end) {
1990 indices = pivot_end;
1992 if (indices_end <= indices+1) {
1999 template<
typename T,
typename BOX_TYPE,
typename SRC_INT_TYPE>
2000 void BVH<N>::partitionByCentre(
const BOX_TYPE* boxes, SRC_INT_TYPE*
const indices,
const SRC_INT_TYPE*
const indices_end,
const uint axis,
const T pivotx2, SRC_INT_TYPE*& ppivot_start, SRC_INT_TYPE*& ppivot_end) noexcept {
2004 SRC_INT_TYPE* pivot_start =
indices;
2006 SRC_INT_TYPE* pivot_end =
indices;
2009 for (SRC_INT_TYPE* psrc_index = indices; psrc_index != indices_end; ++psrc_index) {
2010 const T srcsum =
utBoxCenter(boxes[*psrc_index], axis);
2011 if (srcsum < pivotx2) {
2012 if (psrc_index != pivot_start) {
2013 if (pivot_start == pivot_end) {
2015 const SRC_INT_TYPE temp = *psrc_index;
2016 *psrc_index = *pivot_start;
2017 *pivot_start = temp;
2021 const SRC_INT_TYPE temp = *psrc_index;
2022 *psrc_index = *pivot_end;
2023 *pivot_end = *pivot_start;
2024 *pivot_start = temp;
2030 else if (srcsum == pivotx2) {
2032 if (psrc_index != pivot_end) {
2033 const SRC_INT_TYPE temp = *psrc_index;
2034 *psrc_index = *pivot_end;
2040 ppivot_start = pivot_start;
2041 ppivot_end = pivot_end;
2046 void BVH<N>::debugDump()
const {
2058 INT_TYPE cur_nodei = stack[stack.
size()-2];
2059 INT_TYPE cur_i = stack[stack.
size()-1];
2067 ++stack[stack.
size()-1];
2069 INT_TYPE child_nodei = cur_node.child[cur_i];
2070 if (Node::isInternal(child_nodei)) {
2071 if (child_nodei == Node::EMPTY) {
2078 INT_TYPE internal_node = Node::getInternalNum(child_nodei);
2081 stack.
append(internal_node);
2093 template<u
int BVH_N,
typename ITEM_BOX,
typename NODE_BOX>
2096 struct NodeBoxFunctors
2098 NODE_BOX *myNodeData;
2100 const ITEM_BOX *myItemBoxes;
2103 NODE_BOX *node_data,
2105 const ITEM_BOX *item_boxes)
2106 : myNodeData(node_data)
2108 , myItemBoxes(item_boxes)
2110 constexpr
SYS_FORCE_INLINE bool pre(
const int nodei, ITEM_BOX *data_for_parent)
const
2114 void SYS_FORCE_INLINE item(
const int itemi,
const int parent_nodei, ITEM_BOX &data_for_parent)
const
2116 data_for_parent = myItemBoxes[itemi];
2119 void post(
const int nodei,
const int parent_nodei, ITEM_BOX *data_for_parent,
const int nchildren,
const ITEM_BOX *child_data_array)
const
2121 ITEM_BOX box(child_data_array[0]);
2122 for (
int i = 1; i < nchildren; ++i)
2123 box.combine(child_data_array[i]);
2125 if (data_for_parent)
2126 *data_for_parent = box;
2128 NODE_BOX ¤t_node_data = myNodeData[nodei];
2129 current_node_data = box;
2133 constexpr
uint PARALLEL_THRESHOLD = 4096;
2134 NodeBoxFunctors functors(node_boxes, item_boxes);
2135 bvh.template traverseParallel<ITEM_BOX>(PARALLEL_THRESHOLD, functors);
2138 template<u
int NAXES,
typename T,u
int BVH_N,
typename ITEM_BOX,
typename NODE_BOX>
2141 struct NodeBoxFunctors
2143 NODE_BOX *
const myNodeData;
2145 const ITEM_BOX *
const myItemBoxes;
2146 const float myExpandFactor;
2149 NODE_BOX *node_data,
2151 const ITEM_BOX *item_boxes,
2152 const float expand_factor=0.0
f)
2153 : myNodeData(node_data)
2155 , myItemBoxes(item_boxes)
2156 , myExpandFactor(expand_factor)
2158 constexpr
SYS_FORCE_INLINE bool pre(
const int nodei, ITEM_BOX *data_for_parent)
const
2162 void SYS_FORCE_INLINE item(
const int itemi,
const int parent_nodei, ITEM_BOX &data_for_parent)
const
2164 data_for_parent = myItemBoxes[itemi];
2167 void post(
const int nodei,
const int parent_nodei, ITEM_BOX *data_for_parent,
const int nchildren,
const ITEM_BOX *child_data_array)
const
2169 ITEM_BOX box(child_data_array[0]);
2170 for (
int i = 1; i < nchildren; ++i)
2171 box.combine(child_data_array[i]);
2173 if (data_for_parent)
2174 *data_for_parent = box;
2176 NODE_BOX ¤t_node_data = myNodeData[nodei];
2180 if (!myExpandFactor)
2182 for (
int i = 0; i < nchildren; ++i)
2184 const ITEM_BOX &local_box = child_data_array[i];
2185 for (
int axis = 0; axis < NAXES; ++axis)
2187 ((
T*)¤t_node_data.vals[axis][0])[i] = local_box.vals[axis][0];
2188 ((
T*)¤t_node_data.vals[axis][1])[i] = local_box.vals[axis][1];
2194 for (
int i = 0; i < nchildren; ++i)
2196 const ITEM_BOX &local_box = child_data_array[i];
2197 for (
int axis = 0; axis < NAXES; ++axis)
2199 float minv = local_box.vals[axis][0];
2200 float maxv = local_box.vals[axis][1];
2205 ((
int32*)local_box.vals[axis])[0] & 0x7F800000,
2206 ((
int32*)local_box.vals[axis])[1] & 0x7F800000) - ((23-1)<<23));
2207 float diff = myExpandFactor*(maxv-minv) + (*(
fpreal32*)&epsilon);
2210 ((
T*)¤t_node_data.vals[axis][0])[i] = minv;
2211 ((
T*)¤t_node_data.vals[axis][1])[i] = maxv;
2215 for (
int i = nchildren; i < BVH_N; ++i)
2218 for (
int axis = 0; axis < NAXES; ++axis)
2232 constexpr
uint PARALLEL_THRESHOLD = 64;
2233 NodeBoxFunctors functors(node_boxes, item_boxes, expand_factor);
2234 bvh.template traverseParallel<ITEM_BOX>(PARALLEL_THRESHOLD, functors);
2237 template<
typename T,u
int NAXES,
bool PARALLEL,
typename INT_TYPE0,
typename DATA,
typename INT_TYPE>
2240 INT_TYPE next_node_id,
2244 const auto &node =
data.myRoot[nodei];
2246 static constexpr
uint N = 4;
2251 INT_TYPE next_nodes[
N];
2253 INT_TYPE nchildren =
N;
2254 INT_TYPE nparallel = 0;
2257 for (INT_TYPE s = N; s --> 0; )
2259 const INT_TYPE node_int = node.child[
s];
2260 if (node_int == Node::EMPTY)
2265 next_nodes[
s] = next_node_id;
2266 if (Node::isInternal(node_int))
2270 INT_TYPE child_node_id = Node::getInternalNum(node_int);
2271 nnodes[
s] = next_node_id - child_node_id;
2272 next_node_id = child_node_id;
2278 nparallel += (nnodes[
s] >=
data.myParallelThreshold);
2283 LOCAL_DATA local_data[
N];
2285 auto&& item_functor = [](
const DATA&
data, LOCAL_DATA& local_data, INT_TYPE
s, INT_TYPE nodei, INT_TYPE node_int) {
2289 INT_TYPE0
index = data.myIndices ? data.myIndices[node_int] : INT_TYPE0(node_int);
2290 local_data.initBounds(data.myItemBoxes[index]);
2295 if (data.myIndices) {
2296 index0 = data.myIndices[node_int];
2297 index1 = data.myIndices[node_int+1];
2300 index0 = INT_TYPE0(node_int);
2301 index1 = INT_TYPE0(node_int+1);
2303 local_data.initBoundsUnordered(data.myItemBoxes[index0], data.myItemBoxes[index1]);
2304 const INT_TYPE end_node_int = node_int+
count;
2306 for (; node_int < end_node_int; ++node_int) {
2307 const INT_TYPE0
index = data.myIndices ? data.myIndices[node_int] : INT_TYPE0(node_int);
2308 local_data.combine(data.myItemBoxes[index]);
2313 if (nparallel >= 2) {
2315 if (nparallel < nchildren) {
2316 for (INT_TYPE s = 0; s <
N; ++
s) {
2317 if (nnodes[s] >= data.myParallelThreshold) {
2320 const INT_TYPE node_int = node.child[
s];
2321 if (Node::isInternal(node_int)) {
2322 if (node_int == Node::EMPTY) {
2327 utCreateBVHInterleavedBoxesHelper<T,NAXES,false,INT_TYPE0>(Node::getInternalNum(node_int), INT_TYPE(-1),
data, &local_data[
s]);
2330 item_functor(data, local_data[s], s, nodei, node_int);
2336 for (INT_TYPE taski = r.begin(); taski < r.end(); ++taski) {
2337 INT_TYPE parallel_count = 0;
2341 for (s = 0; s <
N; ++
s) {
2342 if (nnodes[s] < data.myParallelThreshold) {
2345 if (parallel_count == taski) {
2350 const INT_TYPE node_int = node.child[
s];
2351 UT_ASSERT_MSG_P(Node::isInternal(node_int),
"item_functor is fairly fast, so we don't support having leaves run in parallel, unlike traverseParallel");
2352 UT_ASSERT_MSG_P(node_int != Node::EMPTY,
"Empty entries should have been excluded above.");
2353 utCreateBVHInterleavedBoxesHelper<T,NAXES,true,INT_TYPE0>(Node::getInternalNum(node_int), next_nodes[
s],
data, &local_data[
s]);
2359 for (INT_TYPE s = 0; s <
N; ++
s) {
2360 const INT_TYPE node_int = node.child[
s];
2361 if (Node::isInternal(node_int)) {
2362 if (node_int == Node::EMPTY) {
2367 utCreateBVHInterleavedBoxesHelper<T,NAXES,false,INT_TYPE0>(Node::getInternalNum(node_int), INT_TYPE(-1),
data, &local_data[
s]);
2370 item_functor(data, local_data[s], s, nodei, node_int);
2376 for (
int i = 1; i < nchildren; ++i)
2379 if (data_for_parent)
2380 *data_for_parent = box;
2382 auto ¤t_node_data = data.myNodeData[nodei];
2386 for (
int i = 0; i < nchildren; ++i)
2389 for (
int axis = 0; axis < NAXES; ++axis)
2391 ((
T*)¤t_node_data.vals[axis][0])[i] = local_box.
vals[axis][0];
2392 ((
T*)¤t_node_data.vals[axis][1])[i] = local_box.
vals[axis][1];
2395 for (
int i = nchildren; i <
N; ++i)
2398 for (
int axis = 0; axis < NAXES; ++axis)
2409 template<u
int NAXES,
typename T,
typename ITEM_BOX,
typename NODE_BOX,
typename INT_TYPE0>
2412 const ITEM_BOX* item_boxes,
2413 NODE_BOX* node_boxes,
2414 const v4uu* node_nitems,
2415 const INT_TYPE0* indices_mapping) noexcept
2417 if (!bvh.getNodes())
2420 static constexpr
uint PARALLEL_THRESHOLD = 4096;
2424 uint myParallelThreshold;
2425 const INT_TYPE0* myIndices;
2426 const ITEM_BOX* myItemBoxes;
2427 const v4uu* myNodeNItems;
2428 NODE_BOX *myNodeData;
2432 data.myRoot = bvh.getNodes();
2433 data.myParallelThreshold = PARALLEL_THRESHOLD;
2434 data.myIndices = indices_mapping;
2435 data.myItemBoxes = item_boxes;
2436 data.myNodeNItems = node_nitems;
2437 data.myNodeData = node_boxes;
2440 utCreateBVHInterleavedBoxesHelper<T,NAXES,true,INT_TYPE0>(
uint(0), bvh.getNumNodes(),
data, &box);
2443 template<u
int NAXES,
typename INT_TYPE>
2451 const uint nnodes = bvh.getNumNodes();
2466 template<u
int NAXES,
typename INT_TYPE>
2476 const NodeType *nodes = bvh.getNodes();
2478 auto stack_ptr = stack.
data();
2482 auto box_ptr = box_indices.data();
2483 auto box_end = box_ptr + box_indices.capacity();
2484 box_ptr += box_indices.entries();
2486 const BoxType query_boxes(query_box);
2491 uint current_node = stack_ptr[stack_entries];
2495 const BoxType &box = node_boxes[current_node];
2497 BoxType intersection;
2498 box.intersect(query_boxes, intersection);
2500 v4uu hit_mask = (intersection[0][0] <= intersection[0][1]);
2501 for (
uint axis = 1; axis < NAXES; ++axis)
2503 hit_mask &= (intersection[axis][0] <= intersection[axis][1]);
2508 const NodeType &node = nodes[current_node];
2509 uint local_index = SYSfirstBitSet(hit_bits);
2510 current_node = node.child[local_index];
2511 if (!(hit_bits & (hit_bits-1)))
2514 if (NodeType::isInternal(current_node))
2516 current_node = NodeType::getInternalNum(current_node);
2520 if (box_ptr >= box_end)
2522 box_indices.setSizeNoInit(box_ptr-box_indices.data());
2523 box_indices.bumpCapacity(box_end-box_indices.data()+1);
2525 box_ptr = box_indices.data();
2526 box_end = box_ptr + box_indices.capacity();
2527 box_ptr += box_indices.entries();
2529 *box_ptr++ = current_node;
2534 if (NodeType::isInternal(current_node))
2537 current_node = NodeType::getInternalNum(current_node);
2543 if (box_ptr >= box_end)
2545 box_indices.setSizeNoInit(box_ptr-box_indices.data());
2546 box_indices.bumpCapacity(box_end-box_indices.data()+1);
2548 box_ptr = box_indices.data();
2549 box_end = box_ptr + box_indices.capacity();
2550 box_ptr += box_indices.entries();
2552 *box_ptr++ = current_node;
2556 hit_bits &= (
uint(-2)<<local_index);
2560 local_index = SYSfirstBitSet(hit_bits);
2561 uint local_node = node.child[local_index];
2562 if (NodeType::isInternal(local_node))
2564 local_node = NodeType::getInternalNum(local_node);
2566 current_node = local_node;
2569 if (stack_entries >= stack_capacity)
2574 stack_ptr = stack.
data();
2576 stack_ptr[stack_entries++] = local_node;
2582 if (box_ptr >= box_end)
2584 box_indices.setSizeNoInit(box_ptr-box_indices.data());
2585 box_indices.bumpCapacity(box_end-box_indices.data()+1);
2587 box_ptr = box_indices.data();
2588 box_end = box_ptr + box_indices.capacity();
2589 box_ptr += box_indices.entries();
2591 *box_ptr++ = local_node;
2595 hit_bits &= (
uint(-2)<<local_index);
2603 }
while (stack_entries);
2605 box_indices.setSizeNoInit(box_ptr - box_indices.data());
2608 template<u
int NAXES,
typename INT_TYPE>
2618 const NodeType *nodes = bvh.getNodes();
2619 const uint nnodes = bvh.getNumNodes();
2627 const BoxType query_boxes(query_box);
2629 v4uf queryboxsize =
v4uf(query_box.axis_sum());
2633 auto stack_ptr = stack.
data();
2635 exint stack_entries = 0;
2637 if (stack_entries >= stack_capacity)
2641 stack_ptr = stack.
data();
2643 stack_ptr[stack_entries++] = 0;
2645 auto box_ptr = node_indices.data();
2646 auto box_end = box_ptr + node_indices.capacity();
2647 box_ptr += node_indices.entries();
2652 uint current_node = stack_ptr[stack_entries];
2656 const NodeType &node = nodes[current_node];
2657 v4uu nodechild = *((
const v4uu *)node.child);
2659 int internalmask = signbits(nodechild);
2661 if (internalmask != 0xf)
2665 if (box_ptr >= box_end)
2667 node_indices.setSizeNoInit(box_ptr-node_indices.data());
2668 node_indices.bumpCapacity(box_end-node_indices.data()+1);
2670 box_ptr = node_indices.data();
2671 box_end = box_ptr + node_indices.capacity();
2672 box_ptr += node_indices.entries();
2674 *box_ptr++ = current_node;
2680 const BoxType &box = node_boxes[current_node];
2683 v4uf boxsize = box.axis_sum();
2685 int bigenough = signbits(boxsize > queryboxsize);
2686 if ((negative | bigenough) != 0xf)
2691 if (box_ptr >= box_end)
2693 node_indices.setSizeNoInit(box_ptr-node_indices.data());
2694 node_indices.bumpCapacity(box_end-node_indices.data()+1);
2696 box_ptr = node_indices.data();
2697 box_end = box_ptr + node_indices.capacity();
2698 box_ptr += node_indices.entries();
2700 *box_ptr++ = current_node;
2704 BoxType intersection;
2705 box.intersect(query_boxes, intersection);
2707 v4uu hit_mask = (intersection[0][0] <= intersection[0][1]);
2708 for (
uint axis = 1; axis < NAXES; ++axis)
2710 hit_mask &= (intersection[axis][0] <= intersection[axis][1]);
2712 uint hit_bits = signbits(hit_mask);
2716 uint local_index = SYSfirstBitSet(hit_bits);
2717 current_node = node.child[local_index];
2718 current_node = NodeType::getInternalNum(current_node);
2719 if (!(hit_bits & (hit_bits-1)))
2726 hit_bits &= (
uint(-2)<<local_index);
2730 local_index = SYSfirstBitSet(hit_bits);
2731 uint local_node = node.child[local_index];
2732 local_node = NodeType::getInternalNum(local_node);
2734 if (stack_entries >= stack_capacity)
2739 stack_ptr = stack.
data();
2741 stack_ptr[stack_entries++] = local_node;
2744 hit_bits &= (
uint(-2)<<local_index);
2750 }
while (stack_entries);
2752 node_indices.setSizeNoInit(box_ptr - node_indices.data());
2755 template<u
int NAXES,
typename INT_TYPE,
int BATCH_SIZE>
2765 const NodeType *nodes = bvh.getNodes();
2766 const uint nnodes = bvh.getNumNodes();
2774 BoxType query_boxes[BATCH_SIZE];
2775 for (
int i = 0; i < BATCH_SIZE; i++)
2776 query_boxes[i] = query_box[i];
2788 const BoxType &box = node_boxes[current_node];
2790 uint local_hit_bits[BATCH_SIZE];
2793 for (
int i = 0; i < BATCH_SIZE; i++)
2795 BoxType intersection;
2796 box.intersect(query_boxes[i], intersection);
2798 v4uu hit_mask = (intersection[0][0] <= intersection[0][1]);
2799 for (
uint axis = 1; axis < NAXES; ++axis)
2801 hit_mask &= (intersection[axis][0] <= intersection[axis][1]);
2803 local_hit_bits[i] = signbits(hit_mask);
2804 hit_bits |= local_hit_bits[i];
2810 const NodeType &node = nodes[current_node];
2811 uint local_index = SYSfirstBitSet(hit_bits);
2812 current_node = node.child[local_index];
2813 if (!(hit_bits & (hit_bits-1)))
2816 if (NodeType::isInternal(current_node))
2818 current_node = NodeType::getInternalNum(current_node);
2823 for (
int i = 0; i < BATCH_SIZE; i++)
2825 if (local_hit_bits[i])
2826 box_indices[i].append(current_node);
2831 if (NodeType::isInternal(current_node))
2834 current_node = NodeType::getInternalNum(current_node);
2839 uint curbit = 1 << local_index;
2840 for (
int i = 0; i < BATCH_SIZE; i++)
2842 if (local_hit_bits[i] & curbit)
2843 box_indices[i].append(current_node);
2847 hit_bits &= (
uint(-2)<<local_index);
2851 local_index = SYSfirstBitSet(hit_bits);
2852 uint local_node = node.child[local_index];
2853 if (NodeType::isInternal(local_node))
2855 local_node = NodeType::getInternalNum(local_node);
2857 current_node = local_node;
2859 stack.
append(local_node);
2864 uint curbit = 1 << local_index;
2865 for (
int i = 0; i < BATCH_SIZE; i++)
2867 if (local_hit_bits[i] & curbit)
2868 box_indices[i].append(local_node);
2873 hit_bits &= (
uint(-2)<<local_index);
2884 template<
bool PARALLEL,
typename DATA>
2889 const DATA &
data) noexcept
2891 const auto &node =
data.myNodes[nodei];
2893 static constexpr
uint N = 4;
2900 for (INT_TYPE i = 0; i < N-1; ++i) {
2901 if (current_counts[i] < 0) {
2902 current_counts[i] = 0;
2904 else if (current_counts[i+1] < 0) {
2905 current_counts[i] = next_item_id-current_counts[i];
2906 next_items[i] = next_item_id;
2909 current_counts[i] = current_counts[i+1]-current_counts[i];
2910 next_items[i] = current_counts[i+1];
2913 if (current_counts[N-1] < 0) {
2914 current_counts[N-1] = 0;
2917 current_counts[N-1] = next_item_id-current_counts[N-1];
2918 next_items[N-1] = next_item_id;
2923 INT_TYPE next_nodes[
N];
2925 INT_TYPE nchildren =
N;
2926 INT_TYPE nparallel = 0;
2929 for (INT_TYPE s = N; s --> 0; )
2931 const INT_TYPE node_int = node.child[
s];
2932 if (node_int == Node::EMPTY)
2937 next_nodes[
s] = next_node_id;
2938 if (Node::isInternal(node_int))
2942 INT_TYPE child_node_id = Node::getInternalNum(node_int);
2943 nnodes[
s] = next_node_id - child_node_id;
2944 next_node_id = child_node_id;
2950 nparallel += (nnodes[
s] >=
data.myParallelThreshold);
2954 if (nparallel >= 2) {
2956 if (nparallel < nchildren) {
2957 for (INT_TYPE s = 0; s <
N; ++
s) {
2958 if (nnodes[s] >=
data.myParallelThreshold) {
2961 const INT_TYPE node_int = node.child[
s];
2962 if (Node::isInternal(node_int)) {
2963 if (node_int == Node::EMPTY) {
2968 utComputeNodeNItemsHelper<false>(Node::getInternalNum(node_int), INT_TYPE(-1), next_items[
s],
data);
2974 for (INT_TYPE taski = r.begin(); taski < r.end(); ++taski) {
2975 INT_TYPE parallel_count = 0;
2979 for (s = 0; s <
N; ++
s) {
2980 if (nnodes[s] <
data.myParallelThreshold) {
2983 if (parallel_count == taski) {
2988 const INT_TYPE node_int = node.child[
s];
2989 UT_ASSERT_MSG_P(Node::isInternal(node_int),
"item_functor is fairly fast, so we don't support having leaves run in parallel, unlike traverseParallel");
2990 UT_ASSERT_MSG_P(node_int != Node::EMPTY,
"Empty entries should have been excluded above.");
2991 utComputeNodeNItemsHelper<true>(Node::getInternalNum(node_int), next_nodes[
s], next_items[
s],
data);
2997 for (INT_TYPE s = 0; s <
N; ++
s) {
2998 const INT_TYPE node_int = node.child[
s];
2999 if (Node::isInternal(node_int)) {
3000 if (node_int == Node::EMPTY) {
3005 utComputeNodeNItemsHelper<false>(Node::getInternalNum(node_int), INT_TYPE(-1), next_items[
s],
data);
3014 exint nitems) noexcept
3017 if (bvh.getNumNodes() == 0)
3022 struct ItemStartFunctors
3026 ItemStartFunctors() =
default;
3029 : myNodeData(node_data)
3037 data_for_parent = itemi;
3040 void post(
const int nodei,
const int parent_nodei,
int32 *data_for_parent,
const int nchildren,
const int32 *child_data_array)
const
3042 if (data_for_parent)
3043 *data_for_parent = child_data_array[0];
3046 int32 *current_node_data = (
int32*)&myNodeData[nodei];
3048 current_node_data[0] = child_data_array[0];
3050 for (; i < nchildren; ++i)
3051 current_node_data[i] = child_data_array[i];
3053 current_node_data[i] = -1;
3057 constexpr
uint PARALLEL_THRESHOLD = 4096;
3058 ItemStartFunctors functors(node_nitems);
3059 bvh.template traverseParallel<int32>(PARALLEL_THRESHOLD, functors);
3063 struct ItemCountData {
3064 uint myParallelThreshold;
3069 data.myParallelThreshold = PARALLEL_THRESHOLD;
3070 data.myNodes = bvh.getNodes();
3071 data.myNodeData = node_nitems;
3072 utComputeNodeNItemsHelper<true>(0, bvh.getNumNodes(), nitems,
data);
3075 template<
typename RADIUS_ARRAY>
3078 float radius = radii[
index];
3083 d2 = (distance < 0) ? -d2 : d2;
3087 template<ex
int NAXES>
3096 static constexpr
bool theAllPointsValid =
true;
3099 template<
typename TS>
3116 template<
typename TS,
typename RADIUS_ARRAY>
3131 template<
bool farthest>
3135 v4uu hit_mask = (!farthest) ? (dist2 <= max_dist_squared) : (dist2 >= max_dist_squared);
3136 return signbits(hit_mask);
3140 template<ex
int NAXES>
3144 using Parent::Parent;
3150 using Parent::Parent;
3156 using Parent::Parent;
3162 using Parent::Parent;
3168 using Parent::Parent;
3175 template<u
int NAXES>
3190 static constexpr
bool theAllPointsValid =
true;
3193 template<
typename TS>
3199 , myDiff2(length2(myDiff))
3216 template<
typename TS,
typename RADIUS_ARRAY>
3234 else if (proj_t >= myDiff2)
3243 vec = (myP0 + (proj_t * myDiff));
3255 template<
bool farthest>
3263 for (
uint axis = 0; axis < NAXES; ++axis)
3264 center[axis] = (boxes[axis][0] + boxes[axis][1]) * 0.5f;
3269 v4uf proj_t =
dot(myVDiff,center - myVP0);
3270 v4uu isp0_mask = (proj_t <= 0.0f);
3271 v4uu isp1_mask = andn(isp0_mask, (proj_t >= myVDiff2));
3276 for (
int i = 0; i < NAXES; ++i)
3278 vec[i] = (myVP0[i] & isp0_mask) | (myVP1[i] & isp1_mask) | andn((isp0_mask | isp1_mask), proj_vec[i]);
3282 v4uf dist2s = length2(vec);
3290 v4uf negative_mask = dists &
v4uu(0x80000000);
3292 dist2 = dists ^ negative_mask;
3298 dist2 = dists * dists;
3301 v4uu hit_mask = (!farthest) ? (dist2 <= max_dist_squared) : (dist2 >= max_dist_squared);
3302 return signbits(hit_mask);
3308 template<u
int NAXES>
3319 static constexpr
bool theAllPointsValid =
true;
3322 template<
typename TS>
3343 template<
typename TS,
typename RADIUS_ARRAY>
3352 diff -=
dot(diff,myDir)*myDir;
3362 template<
bool farthest>
3368 for (
uint axis = 0; axis < NAXES; ++axis)
3369 diffs[axis] = (boxes[axis][0] + boxes[axis][1]) * 0.5f - myVP0[axis];
3372 diffs -=
dot( diffs, myVDir )*myVDir;
3380 v4uf negative_mask = dists &
v4uu(0x80000000);
3382 dist2 = dists ^ negative_mask;
3388 dist2 = dists * dists;
3391 v4uu hit_mask = (!farthest) ? (dist2 <= max_dist_squared) : (dist2 >= max_dist_squared);
3392 return signbits(hit_mask);
3396 template<u
int NAXES,
typename POSITION_ARRAY,
typename RADIUS_ARRAY>
3409 static constexpr
bool theAllPointsValid =
false;
3412 template<
typename TS>
3418 const POSITION_ARRAY &positions,
3419 const RADIUS_ARRAY &radii)
3424 , myCosAngle(SYScos(angle))
3425 , mySinAngle(SYSsin(angle))
3426 , myPositions(positions)
3432 template <
typename T>
3437 const VectorType p0(myP0), dir(myDir), diff = point - p0;
3438 const T pz =
dot(diff,dir);
3439 const VectorType tmp = diff - pz * dir;
3440 const T q =
sqrt(length2(tmp));
3441 const T res = myCosAngle * q - mySinAngle * pz;
3445 template <
typename T>
3449 return getDistToSurface(center) <= rad;
3458 return sphereIntersects(tree_pos, myRadii[tree_point_index]);
3462 template<
typename TS>
3479 template<
bool farthest>
3485 for (
int axis = 0; axis < NAXES; ++axis)
3486 center[axis] = (boxes[axis][0] + boxes[axis][1]) * 0.5f;
3489 const v4uf dist_to_surface = getDistToSurface(center);
3493 const v4uf inside_mask = dist_to_surface | (dist_to_surface*dist_to_surface <= rad2);
3494 const uint inside_bits = signbits(inside_mask);
3499 const v4uu hit_mask = (!farthest) ? (dist2 <= max_dist_squared) : (dist2 >= max_dist_squared);
3500 return inside_bits & signbits(hit_mask);
3508 template<u
int NAXES>
3514 static constexpr
bool theAllPointsValid =
true;
3532 template<
typename RADIUS_ARRAY>
3536 for (
int axis = 0; axis < NAXES; ++axis)
3538 float p = myP[axis];
3539 float q = tree_point[axis];
3541 float axisdist =
SYSmin(absdiff, 1.0
f - absdiff);
3542 d2 += axisdist*axisdist;
3552 template<
bool farthest>
3556 for (
uint axis = 0; axis < NAXES; ++axis)
3557 center[axis] = (boxes[axis][0] + boxes[axis][1]) * 0.5f;
3566 for (
int axis = 0; axis < NAXES; ++axis)
3568 v4uf p = myVP[axis];
3569 v4uf c = center[axis];
3573 v4uf axisdist = centredist - (boxes[axis][1] - boxes[axis][0])*0.5
f;
3574 maxdist =
SYSmax(maxdist, axisdist);
3576 v4uf signmask = maxdist &
v4uu(0x80000000);
3577 dist2 = (maxdist*maxdist)|signmask;
3584 for (
int axis = 0; axis < NAXES; ++axis)
3586 v4uf p = myVP[axis];
3587 v4uf c = center[axis];
3590 v4uf axisdist = centredist + (boxes[axis][1] - boxes[axis][0])*0.5
f;
3592 sums += axisdist*axisdist;
3597 v4uu hit_mask = (!farthest) ? (dist2 <= max_dist_squared) : (dist2 >= max_dist_squared);
3598 return signbits(hit_mask);
3623 static constexpr
bool theAllPointsValid =
true;
3635 float la =
length(sidea);
3636 float lb =
length(sideb);
3637 float lc =
length(sidec);
3638 float p = la + lb + lc;
3639 myIncentre = (la*a + lb*b + lc*
c)/p;
3641 myDist = SYSsqrt((s-la)*(s-lb)*(s-lc)/s);
3653 if (
dot(myDirs[0], b - myIncentre) < 0)
3655 myDirs[0] = -myDirs[0];
3656 myDirs[1] = -myDirs[1];
3657 myDirs[2] = -myDirs[2];
3659 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[0], c - myIncentre), 0));
3660 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[1], c - myIncentre), 0));
3661 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[1], a - myIncentre), 0));
3662 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[2], a - myIncentre), 0));
3663 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[2], b - myIncentre), 0));
3669 myVDist =
v4uf(myDist);
3680 template<
typename RADIUS_ARRAY>
3685 float d0 =
dot(dir, myDirs[0]);
3686 float d1 =
dot(dir, myDirs[1]);
3687 float d2 =
dot(dir, myDirs[2]);
3688 float d =
SYSmax(d0, d1, d2) - myDist;
3691 float dsquared = d*d;
3692 return (d < 0) ? -dsquared : dsquared;
3698 template<
bool farthest>
3704 for (
uint axis = 0; axis < NAXES; ++axis)
3705 dirs[axis] = (boxes[axis][0] + boxes[axis][1])*0.5f - myVIncentre[axis];
3710 const v4uf d0 =
dot(dirs, myVDirs[0]);
3711 const v4uf d1 =
dot(dirs, myVDirs[1]);
3712 const v4uf d2 =
dot(dirs, myVDirs[2]);
3718 v4uf negative_mask = dists &
v4uu(0x80000000);
3720 dist2 = dists ^ negative_mask;
3726 dist2 = dists * dists;
3729 v4uu hit_mask = (!farthest) ? (dist2 <= max_dist_squared) : (dist2 >= max_dist_squared);
3730 return signbits(hit_mask);
3755 static constexpr
bool theAllPointsValid =
true;
3775 float areaa = 0.5f*(
normalize(myDirs[0]));
3776 float areab = 0.5f*(
normalize(myDirs[1]));
3777 float areac = 0.5f*(
normalize(myDirs[2]));
3778 float aread = 0.5f*(
normalize(myDirs[3]));
3779 float area = areaa + areab + areac + aread;
3780 myDist = volume_times_3/
area;
3781 myIncentre = (areaa*a + areab*b + areac*c + aread*d)/area;
3788 if (
dot(myDirs[0], b - myIncentre) < 0)
3790 myDirs[0] = -myDirs[0];
3791 myDirs[1] = -myDirs[1];
3792 myDirs[2] = -myDirs[2];
3793 myDirs[3] = -myDirs[3];
3795 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[0], c - myIncentre), 0));
3796 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[0], d - myIncentre), 0));
3797 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[1], c - myIncentre), 0));
3798 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[1], d - myIncentre), 0));
3799 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[1], a - myIncentre), 0));
3800 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[2], d - myIncentre), 0));
3801 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[2], a - myIncentre), 0));
3802 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[2], b - myIncentre), 0));
3803 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[3], a - myIncentre), 0));
3804 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[3], b - myIncentre), 0));
3805 UT_ASSERT_P(SYSisGreaterOrEqual(
dot(myDirs[3], c - myIncentre), 0));
3812 myVDist =
v4uf(myDist);
3823 template<
typename RADIUS_ARRAY>
3827 auto dir{ tree_point - myIncentre };
3828 float d0 =
dot(dir, myDirs[0]);
3829 float d1 =
dot(dir, myDirs[1]);
3830 float d2 =
dot(dir, myDirs[2]);
3831 float d3 =
dot(dir, myDirs[3]);
3832 float d =
SYSmax(d0, d1, d2, d3) - myDist;
3835 float dsquared = d*d;
3836 return (d < 0) ? -dsquared : dsquared;
3842 template<
bool farthest>
3848 for (
uint axis = 0; axis < NAXES; ++axis)
3849 dirs[axis] = (boxes[axis][0] + boxes[axis][1])*0.5f - myVIncentre[axis];
3854 const v4uf d0 =
dot(dirs, myVDirs[0]);
3855 const v4uf d1 =
dot(dirs, myVDirs[1]);
3856 const v4uf d2 =
dot(dirs, myVDirs[2]);
3857 const v4uf d3 =
dot(dirs, myVDirs[3]);
3863 v4uf negative_mask = dists &
v4uu(0x80000000);
3865 dist2 = dists ^ negative_mask;
3871 dist2 = dists * dists;
3874 v4uu hit_mask = (!farthest) ? (dist2 <= max_dist_squared) : (dist2 >= max_dist_squared);
3875 return signbits(hit_mask);
3879 template<
bool farthest,
typename INT_TYPE0>
3885 : myIndicesMapping(indices_mapping)
3887 UT_ASSERT_MSG_P(indices_mapping,
"Use std::less or std::greater if not mapping.");
3892 if (a.myDistSquared != b.myDistSquared) {
3894 ? (a.myDistSquared < b.myDistSquared)
3895 : (a.myDistSquared > b.myDistSquared);
3900 ? (myIndicesMapping[a.myNode] < myIndicesMapping[b.myNode])
3901 : (myIndicesMapping[a.myNode] > myIndicesMapping[b.myNode]);
3904 const INT_TYPE0* myIndicesMapping;
3907 template<
bool farthest,
bool reordered,
bool use_max_po
ints,u
int NAXES,
typename QUERY_POINT,
typename INT_TYPE0,
typename POSITION_ARRAY,
typename RADIUS_ARRAY>
3910 const INT_TYPE0* indices_mapping,
3911 const POSITION_ARRAY& positions,
3912 QUERY_POINT& query_point,
3914 const RADIUS_ARRAY& radii,
3916 float& max_dist_squared) noexcept
3920 if (!QUERY_POINT::theAllPointsValid && !query_point.isValid(
index))
3926 float d2 = query_point.queryDistance2(positions[
index], radii, index);
3927 if ((!farthest) ? (d2 > max_dist_squared) : (d2 < max_dist_squared))
3930 if (!use_max_points || output_queue.size() < max_points) {
3931 output_queue.append(StackEntry(d2, index));
3932 if (use_max_points && output_queue.size() == max_points) {
3933 if (max_points > 1) {
3936 if (reordered || indices_mapping) {
3938 std::make_heap(output_queue.data(), output_queue.data()+output_queue.size(), comparator);
3941 std::make_heap(output_queue.data(), output_queue.data()+output_queue.size());
3946 if (reordered || indices_mapping) {
3948 std::make_heap(output_queue.data(), output_queue.data()+output_queue.size(), comparator);
3951 std::make_heap(output_queue.data(), output_queue.data()+output_queue.size(), std::greater<StackEntry>());
3954 max_dist_squared = output_queue[0].myDistSquared;
3957 max_dist_squared = d2;
3961 else if (max_points == 1) {
3962 bool select_new =
true;
3963 if (d2 == max_dist_squared)
3966 INT_TYPE0 other_ptoff;
3967 if (reordered || indices_mapping)
3968 other_ptoff = indices_mapping[output_queue[0].myNode];
3970 other_ptoff = INT_TYPE0(output_queue[0].myNode);
3971 INT_TYPE0 ptoff = indices_mapping[
index];
3972 select_new = (!farthest) ? (ptoff < other_ptoff) : (ptoff > other_ptoff);
3976 output_queue[0].myDistSquared = d2;
3977 output_queue[0].myNode =
index;
3978 max_dist_squared = d2;
3985 auto start = output_queue.data();
3986 auto end = output_queue.data()+output_queue.size();
3988 if (reordered || indices_mapping) {
3990 if (d2 == max_dist_squared && indices_mapping[start->myNode] < indices_mapping[index])
3994 std::pop_heap(start, end, comparator);
3995 end[-1].myDistSquared = d2;
3996 end[-1].myNode =
index;
3997 std::push_heap(start, end, comparator);
4001 if (d2 == max_dist_squared && start->myNode < index)
4004 std::pop_heap(start, end);
4005 end[-1].myDistSquared = d2;
4006 end[-1].myNode =
index;
4007 std::push_heap(start, end);
4011 if (reordered || indices_mapping) {
4013 if (d2 == max_dist_squared && indices_mapping[start->myNode] > indices_mapping[index])
4017 std::pop_heap(start, end, comparator);
4018 end[-1].myDistSquared = d2;
4019 end[-1].myNode =
index;
4020 std::push_heap(start, end, comparator);
4024 if (d2 == max_dist_squared && start->myNode > index)
4027 std::pop_heap(start, end, std::greater<StackEntry>());
4028 end[-1].myDistSquared = d2;
4029 end[-1].myNode =
index;
4030 std::push_heap(start, end, std::greater<StackEntry>());
4034 max_dist_squared = start[0].myDistSquared;
4038 template<
bool farthest,
bool reordered,
bool use_max_po
ints,u
int NAXES,
typename QUERY_POINT,
typename INT_TYPE0,
typename POSITION_ARRAY,
typename RADIUS_ARRAY>
4042 const v4uu* node_nitems,
4043 const INT_TYPE0* indices_mapping,
4044 const POSITION_ARRAY& positions,
4045 QUERY_POINT& query_point,
4048 const RADIUS_ARRAY& radii,
4050 float max_dist_squared) noexcept
4054 const NodeType* nodes = bvh.getNodes();
4055 const uint nnodes = bvh.getNumNodes();
4057 if (nnodes == 0 || (use_max_points && max_points<=0))
4077 StackEntry entry = stack.
last();
4080 uint next_node = entry.myNode;
4083 if ((!farthest) ? (entry.myDistSquared > max_dist_squared) : (entry.myDistSquared < max_dist_squared))
4088 if (reordered || NodeType::isInternal(next_node))
4093 if (next_node == (reordered ? NodeType::getInternalNum(NodeType::EMPTY) : NodeType::EMPTY))
4096 next_node = NodeType::getInternalNum(next_node);
4097 const BoxType &box = node_boxes[next_node];
4099 uint hit_bits = query_point_wrapper.template boxHitAndDist2<farthest>(box, max_dist_squared, next_node, dist2);
4102 const NodeType &node = nodes[next_node];
4104 StackEntry *stack_last = stack.
data()+stack_size-1;
4108 union {
v4sf d2v;
float d2s[4]; };
4110 uint cur_node = next_node;
4111 next_node =
uint(-1);
4115 uint local_index = SYSfirstBitSet(hit_bits);
4117 if (!NodeType::isInternal(index))
4119 const uint count = ((
const uint32*)(node_nitems+cur_node))[local_index];
4122 utHandleSingleClosePoint<farthest,reordered,use_max_points,NAXES>(
4123 index, indices_mapping, positions, query_point_wrapper,
4124 output_queue, radii, max_points, max_dist_squared);
4127 else if (next_node ==
uint(-1))
4129 next_node = NodeType::getInternalNum(index);
4130 d20 = d2s[local_index];
4135 uint child1 = NodeType::getInternalNum(index);
4136 float d21 = d2s[local_index];
4137 if ((!farthest) ? (d20 > d21) : (d20 < d21))
4139 uint childtmp = next_node;
4147 if (!stack_size || ((!farthest) ? (stack_last->myDistSquared >= d20) : (stack_last->myDistSquared <= d20)))
4151 if (!stack_size || ((!farthest) ? (stack_last->myDistSquared >= d21) : (stack_last->myDistSquared <= d21)))
4153 stack.
append(StackEntry(d21, child1));
4161 for (i = stack_size-2; i >= limiti; --i)
4163 if ((!farthest) ? (stack[i].myDistSquared >= d21) : (stack[i].myDistSquared <= d21))
4165 stack.
insert(StackEntry(d21, child1), i+1);
4171 stack.
insert(StackEntry(d21, child1), i+1);
4177 StackEntry entry = stack.
last();
4178 stack_last->myNode = child1;
4179 stack_last->myDistSquared = d21;
4180 stack.
append(StackEntry(d20, next_node));
4181 next_node = entry.myNode;
4182 d20 = entry.myDistSquared;
4184 stack_last = stack.
data()+stack_size;
4188 hit_bits &= (
uint(-2)<<local_index);
4189 }
while (hit_bits != 0);
4191 if (next_node ==
uint(-1))
4198 if (!(hit_bits & (hit_bits-1)))
4201 uint local_index = SYSfirstBitSet(hit_bits);
4203 next_node = node.child[local_index];
4205 float local_d2 = d2s[local_index];
4206 uint child_index = node.child[local_index];
4207 if (!stack_size || ((!farthest) ? (stack_last->myDistSquared >= local_d2) : (stack_last->myDistSquared <= local_d2)))
4208 next_node = child_index;
4211 next_node = stack_last->myNode;
4212 stack_last->myNode = child_index;
4213 stack_last->myDistSquared = local_d2;
4227 while (stack_size != 0 && ((!farthest) ? (stack_last->myDistSquared > max_dist_squared) : (stack_last->myDistSquared < max_dist_squared)))
4233 static constexpr
uint two_bit_indices[16][2] = {
4234 {4, 4}, {4, 4}, {4, 4}, {0, 1},
4235 {4, 4}, {0, 2}, {1, 2}, {0, 1},
4236 {4, 4}, {0, 3}, {1, 3}, {0, 1},
4237 {2, 3}, {0, 2}, {1, 2}, {0, 1}
4239 static constexpr
uint third_bit_index[16] = {
4246 union {
v4sf d2v;
float d2s[4]; };
4248 uint local_index0 = two_bit_indices[hit_bits][0];
4249 uint local_index1 = two_bit_indices[hit_bits][1];
4250 if (third_bit_index[hit_bits] == 4)
4253 float d20 = d2s[local_index0];
4254 float d21 = d2s[local_index1];
4255 uint child0 = node.child[local_index0];
4256 uint child1 = node.child[local_index1];
4257 if ((!farthest) ? (d20 > d21) : (d20 < d21))
4259 uint childtmp = child0;
4267 if (!stack_size || ((!farthest) ? (stack_last->myDistSquared >= d20) : (stack_last->myDistSquared <= d20)))
4273 if (!stack_size || ((!farthest) ? (stack_last->myDistSquared >= d21) : (stack_last->myDistSquared <= d21)))
4275 stack.
append(StackEntry(d21, child1));
4283 for (i = stack_size-2; i >= limiti; --i)
4285 if ((!farthest) ? (stack[i].myDistSquared >= d21) : (stack[i].myDistSquared <= d21))
4287 stack.
insert(StackEntry(d21, child1), i+1);
4293 stack.
insert(StackEntry(d21, child1), i+1);
4299 next_node = stack_last->myNode;
4300 stack_last->myNode = child1;
4301 stack_last->myDistSquared = d21;
4302 stack.
append(StackEntry(d20, child0));
4308 uint nhit = (hit_bits == 0xF) ? 4 : 3;
4309 uint local_index2 = third_bit_index[hit_bits];
4310 uint children[4] = {
4311 node.child[local_index0],
4312 node.child[local_index1],
4313 node.child[local_index2],
4316 d2s[0] = d2s[local_index0];
4317 d2s[1] = d2s[local_index1];
4318 d2s[2] = d2s[local_index2];
4322 if (!stack_size || ((!farthest) ? (stack_last->myDistSquared >= d2s[0]) : (stack_last->myDistSquared <= d2s[0])))
4325 next_node = children[0];
4329 new_d2 = stack_last->myDistSquared;
4330 next_node = stack_last->myNode;
4331 stack_last->myDistSquared = d2s[0];
4332 stack_last->myNode = children[0];
4334 for (
uint hit = 1; hit < nhit; ++hit)
4336 float d2 = d2s[hit];
4337 uint child = children[hit];
4338 if ((!farthest) ? (d2 < new_d2) : (d2 > new_d2))
4340 float tmpd2 = new_d2;
4341 uint tmpchild = next_node;
4349 if (!stack_size || ((!farthest) ? (stack_last->myDistSquared >= d2) : (stack_last->myDistSquared <= d2)))
4351 stack.
append(StackEntry(d2, child));
4359 for (i = stack_size-2; i >= limiti; --i)
4361 if ((!farthest) ? (stack[i].myDistSquared <= d2) : (stack[i].myDistSquared >= d2))
4363 stack.
insert(StackEntry(d2, child), i+1);
4369 stack.
insert(StackEntry(d2, child), i+1);
4372 stack_last = stack.
data()+stack_size;
4380 utHandleSingleClosePoint<farthest,reordered,use_max_points,NAXES>(
4381 index, indices_mapping, positions, query_point_wrapper,
4382 output_queue, radii, max_points, max_dist_squared);
4390 #undef BVH_TRY_ALL_AXES
SYS_FORCE_INLINE uint boxHitAndDist2(const BoxType &boxes, const FloatType max_dist_squared, const uint internal_node_num, v4uf &dist2) const
void utCreateBVHInterleavedBoxesHelper(INT_TYPE nodei, INT_TYPE next_node_id, const DATA &data, UT::Box< T, NAXES > *data_for_parent) noexcept
SYS_FORCE_INLINE T utBoxCenter(const UT::Box< T, NAXES > &box, uint axis) noexcept
const UT_FixedVector< FloatType, NAXES > myDiff
void UTparallelFor(const Range &range, const Body &body, const int subscribe_ratio=2, const int min_grain_size=1, const bool force_use_task_scope=true)
#define SYS_STATIC_ASSERT(expr)
SYS_FORCE_INLINE bool isValid(uint tree_point_index) const
GLsizei GLenum const void * indices
SYS_FORCE_INLINE exint length() const
const UT_FixedVector< v4uf, NAXES > myVDir
constexpr bool allRadiiZero(const T &array) noexcept
void findClosestPoints(const UT::BVH< 4 > &bvh, const UT::Box< v4uf, NAXES > *node_boxes, const v4uu *node_nitems, const INT_TYPE0 *indices_mapping, const POSITION_ARRAY &positions, QUERY_POINT &query_point, BVHOrderedStack &stack, BVHOrderedStack &output_queue, const RADIUS_ARRAY &radii=ZeroRadiiWrapper(), exint max_points=std::numeric_limits< exint >::max(), float max_dist_squared=std::numeric_limits< float >::max()) noexcept
void bumpCapacity(exint min_capacity)
void traverseParallel(INT_TYPE parallel_threshold, FUNCTORS &functors, LOCAL_DATA *data_for_parent=nullptr) const noexcept
SIM_API const UT_StringHolder angle
UT_Vector2T< float > UT_Vector2
const UT_FixedVector< FloatType, NAXES > myP0
T negative(const T &val)
Return the unary negation of the given value.
SYS_FORCE_INLINE void removeLast()
auto printf(const S &fmt, const T &...args) -> int
bool SYSisFinite(fpreal64 f)
int getPureInternalDepth() const noexcept
Returns the minimum depth of the first non-internal node.
const UT_FixedVector< FloatType, NAXES > myQueryPoint
void setSizeNoInit(exint newsize)
static constexpr uint scale
SYS_FORCE_INLINE T utBoxCenter(const UT_Vector4T< T > &position, uint axis) noexcept
vfloat4 sqrt(const vfloat4 &a)
constexpr UT_FixedVector< SYS_FixedArrayElement_t< TS >, SYS_FixedArraySize_v< TS > > UTmakeFixedVector(const TS &as) noexcept
SYS_FORCE_INLINE float queryDistance2(const TS &tree_point, const RADIUS_ARRAY &radii, uint index) const
This must be the exact distance squared.
fpreal64 distance2(const UT_VectorD &v1, const UT_VectorD &v2)
Distance squared (L2) aka quadrance.
INT_TYPE utExcludeNaNInfBoxIndices(const BOX_TYPE *boxes, SRC_INT_TYPE *indices, INT_TYPE &nboxes) noexcept
SYS_FORCE_INLINE void utHandleRadiiLinearly(float &d2, const RADIUS_ARRAY &radii, uint index)
SYS_FORCE_INLINE const char * buffer() const
GLboolean GLboolean GLboolean GLboolean a
GLuint GLsizei GLsizei * length
void setCapacity(exint new_capacity)
#define UT_ASSERT_MSG_P(ZZ,...)
SYS_FORCE_INLINE BVHQuerySegment(const TS &p0, const TS &p1)
int getMaxDepth() const noexcept
Returns the maximum depth of any leaf.
SYS_FORCE_INLINE bool isValid(uint tree_point_index) const
GLdouble GLdouble GLdouble q
unsigned long long uint64
SYS_FORCE_INLINE uint boxHitAndDist2(const BoxType &boxes, const FloatType max_dist_squared, const uint internal_node_num, v4uf &dist2) const
SYS_FORCE_INLINE FloatType queryDistance2(const TS &tree_point, const RADIUS_ARRAY &radii, uint index) const
This must be the exact distance squared.
void setSize(exint newsize)
UT_FixedVector< v4uf, NAXES > myVIncentre
SYS_FORCE_INLINE float queryDistance2(const UT_FixedVector< float, NAXES > &tree_point, const RADIUS_ARRAY &radii, uint index) const
This must be the exact distance squared.
SYS_FORCE_INLINE FloatType queryDistance2(const TS &tree_point, const RADIUS_ARRAY &radii, uint index) const
This must be the exact distance squared.
void getIntersectingBoxesFromStack(const UT::BVH< 4 > &bvh, const UT::Box< v4uf, NAXES > *node_boxes, const UT::Box< float, NAXES > &query_box, UT_Array< INT_TYPE > &box_indices, BVHUnorderedStack &stack) noexcept
SYS_FORCE_INLINE T minDistance2(const UT_FixedVector< T, NAXES > &p) const noexcept
SYS_FORCE_INLINE float queryDistance2(const UT_FixedVector< float, NAXES > &tree_point, const RADIUS_ARRAY &radii, uint index) const
This must be the exact distance squared.
#define UT_ASSERT_MSG(ZZ,...)
SYS_FORCE_INLINE void utHandleSingleClosePoint(const uint index, const INT_TYPE0 *indices_mapping, const POSITION_ARRAY &positions, QUERY_POINT &query_point, BVHOrderedStack &output_queue, const RADIUS_ARRAY &radii, exint max_points, float &max_dist_squared) noexcept
SYS_FORCE_INLINE uint boxHitAndDist2(const BoxType &boxes, const float max_dist_squared, const uint internal_node_num, v4uf &dist2) const
GA_API const UT_StringHolder scale
SYS_FORCE_INLINE BVHQueryInfLine(const TS &p0, const TS &dir)
void computeNodeNItems(const UT::BVH< 4 > &bvh, v4uu *node_nitems, exint nitems) noexcept
Computes the number of items per node entry and fills in node_nitems.
const UT_FixedVector< v4uf, NAXES > myVP0
const UT_FixedVector< v4uf, NAXES > myVP0
const RADIUS_ARRAY & myRadii
void getIntersectingNodes(const UT::BVH< 4 > &bvh, const UT::Box< v4uf, NAXES > *node_boxes, const UT::Box< float, NAXES > &query_box, UT_Array< INT_TYPE > &box_indices, BVHUnorderedStack &stack) noexcept
const UT_FixedVector< v4uf, NAXES > myVP0
const UT_FixedVector< float, NAXES > myP
IMATH_NAMESPACE::V2f float
SYS_FORCE_INLINE T maxDistance2(const UT_FixedVector< T, NAXES > &p) const noexcept
SYS_FORCE_INLINE T getDistToSurface(const UT_FixedVector< T, NAXES > &point) const
const UT_FixedVector< FloatType, NAXES > myP0
fpreal64 dot(const CE_VectorT< T > &a, const CE_VectorT< T > &b)
static int getNumProcessors()
SYS_FORCE_INLINE ut_BVHOrderedStackCompare(const INT_TYPE0 *indices_mapping) noexcept
UT_Vector3T< T > SYSclamp(const UT_Vector3T< T > &v, const UT_Vector3T< T > &min, const UT_Vector3T< T > &max)
T diameter2() const noexcept
const UT_FixedVector< v4uf, NAXES > myVP
SYS_FORCE_INLINE void createBVHNodeBoxes(const UT::BVH< BVH_N > &bvh, const ITEM_BOX *item_boxes, NODE_BOX *node_boxes) noexcept
SYS_FORCE_INLINE void createBVHInterleavedBoxes(const UT::BVH< BVH_N > &bvh, const ITEM_BOX *item_boxes, NODE_BOX *node_boxes, float expand_factor=0.0f) noexcept
const UT_FixedVector< float, NAXES > myP0
STATIC_INLINE uint64_t H(uint64_t x, uint64_t y, uint64_t mul, int r)
UT_FixedVector< v4uf, NAXES > myVIncentre
SYS_FORCE_INLINE uint boxHitAndDist2(const BoxType &boxes, const float max_dist_squared, const uint internal_node_num, v4uf &dist2) const
UT_FixedVector< float, NAXES > myIncentre
GLboolean GLboolean GLboolean b
SYS_FORCE_INLINE bool isValid(uint tree_point_index) const
SYS_FORCE_INLINE BVHQueryTetrahedron(const UT_FixedVector< float, NAXES > &a, const UT_FixedVector< float, NAXES > &b, const UT_FixedVector< float, NAXES > &c, const UT_FixedVector< float, NAXES > &d)
GLint GLint GLsizei GLsizei GLsizei depth
void init(const BOX_TYPE *boxes, const INT_TYPE nboxes, SRC_INT_TYPE *indices=nullptr, bool reorder_indices=false, INT_TYPE max_items_per_leaf=1) noexcept
void traverse(FUNCTORS &functors, LOCAL_DATA *data_for_parent=nullptr) const noexcept
exint entries() const
Alias of size(). size() is preferred.
static void createTrivialIndices(SRC_INT_TYPE *indices, const INT_TYPE n) noexcept
SYS_FORCE_INLINE bool isValid(uint tree_point_index) const
const UT_FixedVector< v4uf, NAXES > myVQueryPoint
const UT_FixedVector< v4uf, NAXES > myVP1
SYS_FORCE_INLINE uint boxHitAndDist2(const BoxType &boxes, const FloatType max_dist_squared, const uint internal_node_num, v4uf &dist2) const
SYS_FORCE_INLINE bool isValid(uint tree_point_index) const
SYS_FORCE_INLINE bool sphereIntersects(const UT_FixedVector< T, NAXES > ¢er, const T rad) const
void getIntersectingBoxesBatch(const UT::BVH< 4 > &bvh, const UT::Box< v4uf, NAXES > *node_boxes, const UT::Box< float, NAXES > *query_box, UT_Array< INT_TYPE > *box_indices, BVHUnorderedStack &stack) noexcept
SYS_FORCE_INLINE uint boxHitAndDist2(const BoxType &boxes, const float max_dist_squared, const uint internal_node_num, v4uf &dist2) const
constexpr UT_Vector3T< SYS_FixedArrayElement_t< TS > > UTmakeVector3T(const TS &as) noexcept
ImageBuf OIIO_API absdiff(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
void utComputeNodeNItemsHelper(uint nodei, uint next_node_id, uint next_item_id, const DATA &data) noexcept
SIM_API const UT_StringHolder position
SYS_FORCE_INLINE void append(char character)
const UT_FixedVector< v4uf, NAXES > myVDiff
ImageBuf OIIO_API max(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
SYS_FORCE_INLINE BVHQueryPointWrapper(const TS &query_point)
GA_API const UT_StringHolder N
SYS_FORCE_INLINE float queryDistance2(const UT_FixedVector< float, NAXES > &tree_point, const RADIUS_ARRAY &radii, uint index) const
This must be the exact distance squared.
SYS_FORCE_INLINE BVHQueryCone(const TS &p0, const TS &dir, const float angle, const POSITION_ARRAY &positions, const RADIUS_ARRAY &radii)
const UT_FixedVector< float, NAXES > myDir
const UT_FixedVector< FloatType, NAXES > myDir
IMATH_INTERNAL_NAMESPACE_HEADER_ENTER IMATH_HOSTDEVICE constexpr T abs(T a) IMATH_NOEXCEPT
SYS_FORCE_INLINE bool operator()(const BVHOrderedStackEntry &a, const BVHOrderedStackEntry &b) const noexcept
SYS_FORCE_INLINE bool isValid(uint tree_point_index) const
const POSITION_ARRAY & myPositions
void getIntersectingBoxes(const UT::BVH< 4 > &bvh, const UT::Box< v4uf, NAXES > *node_boxes, const UT::Box< float, NAXES > &query_box, UT_Array< INT_TYPE > &box_indices, BVHUnorderedStack &stack) noexcept
SYS_FORCE_INLINE bool utBoxExclude(const UT::Box< T, NAXES > &box) noexcept
void OIIO_UTIL_API split(string_view str, std::vector< string_view > &result, string_view sep=string_view(), int maxsplit=-1)
SYS_FORCE_INLINE BVHQueryTriangle(const UT_FixedVector< float, NAXES > &a, const UT_FixedVector< float, NAXES > &b, const UT_FixedVector< float, NAXES > &c)
SIM_API const UT_StringHolder distance
void clear()
Resets list to an empty list.
UT_FixedVector< float, NAXES > myIncentre
SYS_FORCE_INLINE void combine(const Box< T, NAXES > &src) noexcept
SYS_FORCE_INLINE FloatType queryDistance2(const TS &tree_point, const RADIUS_ARRAY &radii, uint index) const
This must be the exact distance squared.
SYS_FORCE_INLINE uint boxHitAndDist2(const BoxType &boxes, const float max_dist_squared, const uint internal_node_num, v4uf &dist2) const
SYS_FORCE_INLINE BVHQueryPtUnitWrap(const UT_FixedVector< float, NAXES > &p)
SYS_FORCE_INLINE bool isValid(uint tree_point_index) const
SIM_DerVector3 cross(const SIM_DerVector3 &lhs, const SIM_DerVector3 &rhs)
const UT_FixedVector< FloatType, NAXES > myP1
exint insert(exint index)
void traverseVector(FUNCTORS &functors, LOCAL_DATA *data_for_parent=nullptr) const noexcept
constexpr T normalize(UT_FixedVector< T, D > &a) noexcept
GA_API const UT_StringHolder area
bool isEmpty() const
Returns true iff there are no occupied elements in the array.