HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_TaskExclusive.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: UT_TaskExclusive.h (UT Library, C++)
7  *
8  * COMMENTS:
9  */
10 
11 #pragma once
12 
13 #ifndef __UT_TaskExclusive__
14 #define __UT_TaskExclusive__
15 
16 #include "UT_Task.h"
17 #include "UT_TaskScope.h"
18 #include "UT_TaskState.h"
19 #include "UT_TaskArena.h"
20 #include "UT_Thread.h"
21 #include <SYS/SYS_Pragma.h>
22 #include <utility>
23 
24 /// This is a lock-free implementation for exclusive task execution. That is,
25 /// a task which needs to be performed once (i.e. std::once). However, this
26 /// construct will allow TBB to recycle blocked tasks so they can be used for
27 /// other processing.
28 ///
29 /// This can be used as an alternate to a lock. If code underneath a lock
30 /// calls into TBB, this can lead to a deadlock since TBB can steal a child
31 /// task to complete a parent task outside the lock. This typically requires a
32 /// separate task arena. A lock will also block a thread, preventing it from
33 /// participating in other tasks.
34 ///
35 /// UT_TaskExclusive provides a good alternative, ensuring that only one thread
36 /// will execute the functor, and all other threads which wait for the functor
37 /// to finish will be allowed to participate in other computation (even to help
38 /// computing parallel tasks in the functor).
39 ///
40 /// The class is templated on a functor which is used to actually perform the
41 /// execution. The template functor needs to have a operator()() method.
42 ///
43 /// For example, given a single Object which has a deferredInitialize() method
44 /// that may get called from multiple threads: @code
45 /// class Object
46 /// {
47 /// UT_TaskExclusive<Object> myExclusive;
48 ///
49 /// public:
50 /// // Users call deferredInitialize when they want the object to be
51 /// // initialized. However, it's possible the object may need to be
52 /// // initialized by multiple threads. The object uses UT_TaskExclusive
53 /// // to ensure doInitialization() is only executed one time.
54 /// void deferredInitialize()
55 /// {
56 /// myExclusive.execute(*this);
57 /// }
58 /// void operator()()
59 /// {
60 /// // This method will only be called once, even if
61 /// // multiple threads invoke the deferredInitialize() method
62 /// // simultaneously.
63 /// doInitialization();
64 /// }
65 /// }
66 /// @endcode
67 /// If you have multiple methods that should only be called one time, you can
68 /// always create a nested object functor.
69 ///
70 /// If the functor is likely to create further tbb tasks, you can ensure these
71 /// tasks are run in their own task arena by setting @c run_in_task_arena to
72 /// true (the default).
73 template <class T>
75 {
76 public:
78  : myState()
79  {
80  }
81 
82  /// Execute the compute task. This will guarantee the function has been
83  /// run before the execute() function returns. However, no locking will be
84  /// done.
85  ///
86  /// If multiple threads try to call the function simultaneously, only one
87  /// function will run, while the other will yield its cycles to other
88  /// parallel tasks. When the first task completes, both threads will
89  /// return.
90  ///
91  /// If the functor is likely to create further tbb tasks, you can ensure
92  /// these tasks are run in their own task arena by setting @c
93  /// run_in_task_arena to true (the default). The primary reason for having
94  /// a separate task arena is that the if the functor creates further tasks,
95  /// and one of these tasks is also dependent on the task exclusive, this
96  /// can lead to a tbb deadlock (a tbb stack lock).
97  void execute(
98  T &func,
99  bool run_in_task_arena = true,
100  bool use_spinlock = false)
101  {
102  UT_TaskState::TaskStatus status = myState.tryMarkAsBusy(run_in_task_arena);
104  {
105  // The task has been run, so just return
106  return;
107  }
108 
109  if (run_in_task_arena && status == UT_TaskState::FREE)
110  {
111  // I win and I get to run the compute function
114  // NOTE: The arena & group must be on the heap, since this
115  // thread might have returned before all tasks in the
116  // arena, waiting on the group, have finished.
117  auto *arena_group = new UT_TaskState::ArenaAndGroup();
118  UT_TaskArena *arena = &arena_group->first;
119  const UT_TaskScope *parent_task_scope = UT_TaskScope::getCurrent();
120 
121  // execute will be called on the arena from multiple threads,
122  // but this one will always run before any others can start,
123  // so we don't need to explicitly call initialize.
124  //arena->initialize();
125  arena->execute([this,&func,arena_group,parent_task_scope]()
126  {
127  UT_TaskGroup *group = &arena_group->second;
128  group->run([this,&func,parent_task_scope]()
129  {
130  UT_TaskScope task_scope(parent_task_scope);
131  func();
132  // Can't explicitly wake up waiting tasks,
133  // so we must pass nullptr here.
134  myState.markAsDone(nullptr, true);
135  });
136  // Ensure the initialized arena and group are written out to main memory
137  // before the accessible pointer is set.
138  SYSstoreFence();
139  // The arena/group pointer must be set after run has been
140  // called on the group, to ensure that other threads,
141  // waiting for this pointer to be non-null, won't try to
142  // call wait on the group until there's a task in the group.
143  myState.setAndRetainArenaGroup(arena_group);
144 
145  // NOTE: We probably don't need a UT_TaskScope here; the one
146  // inside the lambda passed to run() should suffice.
147  group->wait();
148  });
150  myState.decrefArenaGroup();
151  }
152  else if (status == UT_TaskState::BUSY_WITH_ARENA)
153  {
154  // NOTE: Even if run_in_task_arena is false, if the first one
155  // called it with true, we need to be consistent.
156  // e.g. VGEO_MotionBlurTree was clearing myBoxes during its
157  // building, and code outside the exclusive was checking
158  // the size to determine whether to parallelize,
159  // (i.e. whether to use a task arena.)
160 
161  auto *arena_group = myState.getArenaGroup();
162  if (!arena_group)
163  {
164  // Spin until the group task gets created.
165  UT_ThreadBackoff backoff;
166  do
167  {
168  backoff.wait();
169  arena_group = myState.getArenaGroup();
170  } while (!arena_group);
171  }
172 
173  // NOTE: We probably don't need a SYSloadFence() here,
174  // because speculative reads of memory pointed to by
175  // arena_group would fail, since it was previously null.
176  // As soon as it's non-null, it's safe to read, as ensured
177  // by the SYSstoreFence() above.
178 
179  // Enter the task arena running the group task,
180  // and wait on the group.
181  UT_TaskArena *arena = &arena_group->first;
182  UT_TaskGroup *group = &arena_group->second;
183  arena->execute([group]()
184  {
185  // NOTE: This task doesn't need its own task scope,
186  // because any tasks executed during the group->wait()
187  // should have their own task scopes, which are
188  // automatically descendents of the task scope in the
189  // main compute task.
190  group->wait();
191  });
192  myState.decrefArenaGroup();
193  }
194  else if (use_spinlock)
195  {
196  if (status == UT_TaskState::FREE)
197  {
198  // If other threads will be using a spin lock, we don't
199  // need to create a new task.
200  func();
201  myState.markAsDone(nullptr, false);
202  }
203  else
204  {
205  UT_ThreadBackoff backoff;
206  do
207  {
208  backoff.wait();
209  } while (myState.relaxedLoadStatus() == UT_TaskState::BUSY_NO_ARENA);
210  }
211  }
212  else
213  {
214  if (status == UT_TaskState::FREE)
215  {
216  // We need a compute task, just so that we can specify
217  // a parent task for spawning child tasks upon completion.
218  // There might be some way to eliminate this.
219  ComputeTask *root = ComputeTask::createRoot(myState, func);
220  ComputeTask::spawnRootAndWait(*root);
221  }
222  else
223  {
224  // I lost and I have to wait for the opportunity to help.
225  DummyTask *root = DummyTask::createRoot(myState);
226  DummyTask::spawnRootAndWait(*root);
227  }
228  }
229  }
230 
231  /// Executes the compute task in this thread without any locking
232  /// or protection. Useful if the caller has already setup the
233  /// appropriate lock.
235  {
236  if (!hasRun())
237  {
238  func();
239  myState.markAsDoneNoThread();
240  }
241  }
242 
243  /// Resetting the exclusive task should only be done when there's no
244  /// possibility that any threads are trying to execute or relying on the
245  /// results of the computation.
246  void reset() { myState.reset(); }
247 
248  /// Test whether the function has been executed. This is thread-safe, but
249  /// doesn't count on other threads which may be in the process of running
250  /// it.
251  bool hasRun() const { return myState.isDone(); }
252 
253 private:
254  class ComputeTask : public UT_Task
255  {
256  public:
257  ComputeTask(UT_TaskState &state, T &func)
258  : myState(state)
259  , myFunc(func)
260  {
261  }
262  static ComputeTask *createRoot(UT_TaskState &state, T &func)
263  {
264  return new (allocate_root()) ComputeTask(state, func);
265  }
266 
267  virtual UT_Task *run()
268  {
269  myFunc();
270  myState.markAsDone(this, false);
271  return nullptr;
272  }
273  T &myFunc;
274  UT_TaskState &myState;
275  };
276  class DummyTask : public UT_Task
277  {
278  public:
279  DummyTask(UT_TaskState &state)
280  : myState(state)
281  {
282  }
283  static DummyTask *createRoot(UT_TaskState &state)
284  {
285  return new (allocate_root()) DummyTask(state);
286  }
287 
288  virtual UT_Task *run()
289  {
290  if (myState.relaxedLoadStatus() != UT_TaskState::BUSY_NO_ARENA)
291  return nullptr;
292 
293  // Someone else is performing the computation, so
294  // a) Add a child task to this task (introducing a dependency)
295  // b) Add the child task to myState so it can be run when the
296  // compute is done... The child (wait task) isn't run
297  // until the compute is complete.
298  // c) Recycle my task so that it's put back on the TBB
299  // scheduler. When the child task is complete, this task
300  // will be run again, but the state will be marked as
301  // done, so we can return.
302 
303  // Increment the reference count 2 times
304  // - Once since I have a child
305  // - Once since I'm recycling myself
308  myState.addWaitingTask(*this);
310  return nullptr;
311  }
312  UT_TaskState &myState;
313  };
314 
315  UT_TaskState myState;
316 };
317 
318 #endif
319 
Thread has acquired responsibility to evaluate node.
Definition: UT_TaskState.h:56
void markAsDoneNoThread()
Non-threaded version of marking as done.
Definition: UT_TaskState.h:208
void execute(F &functor)
Definition: UT_TaskArena.h:37
#define SYS_PRAGMA_PUSH_WARN()
Definition: SYS_Pragma.h:34
static const UT_TaskScope * getCurrent()
Definition: UT_TaskScope.h:94
A task node for managing which thread is currently working on a given task.
Definition: UT_TaskState.h:42
The node has been evaluated with an arena.
Definition: UT_TaskState.h:59
void decrefArenaGroup()
Definition: UT_TaskState.h:241
ArenaAndGroup * getArenaGroup() const
Definition: UT_TaskState.h:233
void markAsDone(UT_Task *parent_task, bool run_in_task_arena)
Definition: UT_TaskState.h:182
void reset()
Definition: UT_TaskState.h:80
TaskStatus tryMarkAsBusy(bool run_in_task_arena=false)
Definition: UT_TaskState.h:100
tbb::task_group_status wait()
Definition: UT_Task.h:238
void recycleAsSafeContinuation()
Definition: UT_Task.h:53
void incrementRefCount()
Definition: UT_Task.h:40
#define SYSstoreFence()
#define SYS_PRAGMA_POP_WARN()
Definition: SYS_Pragma.h:35
#define SYS_PRAGMA_DISABLE_ATTRIBUTES()
Definition: SYS_Pragma.h:152
bool isDone() const
Test whether the task state is marked as DONE.
Definition: UT_TaskState.h:91
void executeNoThread(T &func)
GLenum func
Definition: glcorearb.h:782
void setAndRetainArenaGroup(ArenaAndGroup *p)
Definition: UT_TaskState.h:227
Another thread is busy evaluating the node without an arena.
Definition: UT_TaskState.h:58
Another thread is busy evaluating the node with an arena.
Definition: UT_TaskState.h:57
std::pair< UT_TaskArena, UT_TaskGroup > ArenaAndGroup
Definition: UT_TaskState.h:225
void run(const F &f)
Definition: UT_Task.h:210
bool hasRun() const
void execute(T &func, bool run_in_task_arena=true, bool use_spinlock=false)
TaskStatus relaxedLoadStatus() const
This does a fast (non-atomic) check of the status.
Definition: UT_TaskState.h:220