HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
reduce.h
Go to the documentation of this file.
1 //
2 // Copyright 2018 Pixar
3 //
4 // Licensed under the terms set forth in the LICENSE.txt file available at
5 // https://openusd.org/license.
6 //
7 #ifndef PXR_BASE_WORK_REDUCE_H
8 #define PXR_BASE_WORK_REDUCE_H
9 
10 /// \file work/reduce.h
11 #include "pxr/pxr.h"
13 #include "pxr/base/work/api.h"
14 
15 #include <tbb/blocked_range.h>
16 #include <tbb/parallel_reduce.h>
17 #include <tbb/task_group.h>
18 
20 
21 
22 ///////////////////////////////////////////////////////////////////////////////
23 ///
24 /// Recursively splits the range [0, \p n) into subranges, which are then
25 /// reduced by invoking \p loopCallback in parallel. Each invocation of
26 /// \p loopCallback returns a single value that is the result of joining the
27 /// elements in the respective subrange. These values are then further joined
28 /// using the binary operator \p reductionCallback, until only a single value
29 /// remains. This single value is then the result of joining all elements over
30 /// the entire range [0, \p n).
31 ///
32 /// The \p loopCallback must be of the form:
33 ///
34 /// V LoopCallback(size_t begin, size_t end, const V &identity);
35 ///
36 /// The \p reductionCallback must be of the form:
37 ///
38 /// V ReductionCallback(const V &lhs, const V &rhs);
39 ///
40 /// For example, the following code reduces an array of mesh points into a
41 /// single bounding box:
42 ///
43 /// ```{.cpp}
44 ///
45 /// // Get the mesh points from which we are going to generate the bounding box.
46 /// const std::vector<Vector3> &points = GetMeshPoints();
47 ///
48 /// // Generate the bounding box by parallel reducing the points.
49 /// BoundingBox bbox = WorkParallelReduceN(
50 /// BoundingBox(),
51 /// points.size(),
52 /// [&points](size_t b, size_t e, const BoundingBox &identity){
53 /// BoundingBox bbox(identity);
54 ///
55 /// // Insert each point in this subrange into the local bounding box.
56 /// for (size_t i = b; i != e; ++i) {
57 /// bbox.InsertPoint(points[i]);
58 /// }
59 ///
60 /// // Return the local bounding box, which now encapsulates all the
61 /// // points in this subrange.
62 /// return bbox;
63 /// },
64 /// [](const BoundingBox &lhs, const BoundingBox &rhs){
65 /// // Join two bounding boxes into a single bounding box. The
66 /// // algorithm will apply this reduction step recursively until there
67 /// // is only a single bounding box left.
68 /// BoundingBox bbox(lhs);
69 /// bbox.UnionWith(rhs);
70 /// return bbox;
71 /// }
72 /// );
73 ///
74 /// ```
75 ///
76 /// \p grainSize specifies a minimum amount of work to be done per-thread.
77 /// There is overhead to launching a task and a typical guideline is that
78 /// you want to have at least 10,000 instructions to count for the overhead of
79 /// launching that task.
80 ///
81 template <typename Fn, typename Rn, typename V>
82 V
84  const V &identity,
85  size_t n,
86  Fn &&loopCallback,
87  Rn &&reductionCallback,
88  size_t grainSize)
89 {
90  if (n == 0)
91  return identity;
92 
93  // Don't bother with parallel_reduce, if concurrency is limited to 1.
94  if (WorkHasConcurrency()) {
95 
96  class Work_Body_TBB
97  {
98  public:
99  Work_Body_TBB(Fn &fn) : _fn(fn) { }
100 
101  V operator()(
102  const tbb::blocked_range<size_t> &r,
103  const V &value) const {
104  // Note that we std::forward _fn using Fn in order get the
105  // right operator().
106  // We maintain the right type in this way:
107  // If Fn is T&, then reference collapsing gives us T& for _fn
108  // If Fn is T, then std::forward correctly gives us T&& for _fn
109  return std::forward<Fn>(_fn)(r.begin(), r.end(), value);
110  }
111  private:
112  Fn &_fn;
113  };
114 
115  // In most cases we do not want to inherit cancellation state from the
116  // parent context, so we create an isolated task group context.
117  tbb::task_group_context ctx(tbb::task_group_context::isolated);
118  return tbb::parallel_reduce(tbb::blocked_range<size_t>(0,n,grainSize),
119  identity,
120  Work_Body_TBB(loopCallback),
121  std::forward<Rn>(reductionCallback),
122  tbb::auto_partitioner(),
123  ctx);
124  }
125 
126  // If concurrency is limited to 1, execute serially.
127  return std::forward<Fn>(loopCallback)(0, n, identity);
128 }
129 
130 ///////////////////////////////////////////////////////////////////////////////
131 ///
132 /// \overload
133 ///
134 /// This overload does not accept a grain size parameter and instead attempts
135 /// to automatically deduce a grain size that is optimal for the current
136 /// resource utilization and provided workload.
137 ///
138 template <typename Fn, typename Rn, typename V>
139 V
141  const V &identity,
142  size_t n,
143  Fn &&loopCallback,
144  Rn &&reductionCallback)
145 {
146  return WorkParallelReduceN(identity, n, loopCallback, reductionCallback, 1);
147 }
148 
150 
151 #endif // PXR_BASE_WORK_REDUCE_H
GLsizei const GLfloat * value
Definition: glcorearb.h:824
GLdouble n
Definition: glcorearb.h:2008
PXR_NAMESPACE_OPEN_SCOPE V WorkParallelReduceN(const V &identity, size_t n, Fn &&loopCallback, Rn &&reductionCallback, size_t grainSize)
Definition: reduce.h:83
WORK_API bool WorkHasConcurrency()
PXR_NAMESPACE_CLOSE_SCOPE PXR_NAMESPACE_OPEN_SCOPE
Definition: path.h:1425
#define PXR_NAMESPACE_CLOSE_SCOPE
Definition: pxr.h:74
GLboolean r
Definition: glcorearb.h:1222