24 #include "core/platform/env.h"
119 class ThreadPoolInterface;
122 namespace onnxruntime {
131 namespace concurrency {
133 template <
typename Environment>
134 class ThreadPoolTempl;
136 class ExtendedThreadPoolInterface;
138 class ThreadPoolParallelSection;
157 const ThreadOptions& thread_options,
159 int degree_of_parallelism,
160 bool low_latency_hint,
161 bool force_hybrid =
false);
230 std::function<
void()> fn) {
251 const std::function<
void(std::ptrdiff_t
first, std::ptrdiff_t last)>& fn) {
256 const std::function<
void(std::ptrdiff_t first, std::ptrdiff_t last)>& fn);
262 const std::function<
void(std::ptrdiff_t)>& fn) {
264 tp->SimpleParallelFor(total, fn);
266 for (std::ptrdiff_t i = 0; i < total; ++i) {
283 template <
typename F>
286 for (std::ptrdiff_t i = 0; i < total; ++i) {
300 if (num_batches <= 0) {
304 if (num_batches <= 1) {
305 for (
int i = 0; i < total; i++) {
311 tp->SimpleParallelFor(num_batches, [&](std::ptrdiff_t batch_index) {
313 for (std::ptrdiff_t i = work.start; i < work.end; i++) {
327 constexpr
static WorkInfo PartitionWork(std::ptrdiff_t batch_idx, std::ptrdiff_t num_batches, std::ptrdiff_t total_work) {
328 const std::ptrdiff_t work_per_batch = total_work / num_batches;
329 const std::ptrdiff_t work_per_batch_extra = total_work % num_batches;
332 if (batch_idx < work_per_batch_extra) {
333 info.
start = (work_per_batch + 1) * batch_idx;
334 info.
end = info.
start + work_per_batch + 1;
336 info.
start = work_per_batch * batch_idx + work_per_batch_extra;
337 info.
end = info.
start + work_per_batch;
375 int NumThreads()
const;
379 int CurrentThreadId()
const;
386 void RunInParallel(std::function<
void(
unsigned idx)> fn,
unsigned n, std::ptrdiff_t block_size);
394 void ParallelForFixedBlockSizeScheduling(std::ptrdiff_t total, std::ptrdiff_t block_size,
395 const std::function<
void(std::ptrdiff_t, std::ptrdiff_t)>& fn);
400 bool ShouldParallelizeLoop(
const std::ptrdiff_t num_iterations,
401 const std::ptrdiff_t block_size = 1)
const;
405 void ParallelFor(std::ptrdiff_t total,
double cost_per_unit,
406 const std::function<
void(std::ptrdiff_t first, std::ptrdiff_t last)>& fn);
408 void ParallelFor(std::ptrdiff_t total,
const TensorOpCost& cost_per_unit,
409 const std::function<
void(std::ptrdiff_t first, std::ptrdiff_t)>& fn);
411 void SimpleParallelFor(std::ptrdiff_t total,
const std::function<
void(std::ptrdiff_t)>& fn);
413 void Schedule(std::function<
void()> fn);
419 ThreadOptions thread_options_;
428 std::unique_ptr<ThreadPoolTempl<Env> > extended_eigen_threadpool_;
431 bool force_hybrid_ =
false;
GLsizei const GLchar *const * string
static void TrySimpleParallelFor(ThreadPool *tp, std::ptrdiff_t total, const std::function< void(std::ptrdiff_t)> &fn)
static void Schedule(ThreadPool *tp, std::function< void()> fn)
static void TryBatchParallelFor(ThreadPool *tp, std::ptrdiff_t total, F &&fn, std::ptrdiff_t num_batches)
ThreadPool(Env *env, const ThreadOptions &thread_options, const NAME_CHAR_TYPE *name, int degree_of_parallelism, bool low_latency_hint, bool force_hybrid=false)
static bool ShouldParallelize(const ThreadPool *tp)
static void TryParallelFor(ThreadPool *tp, std::ptrdiff_t total, double cost_per_unit, const std::function< void(std::ptrdiff_t first, std::ptrdiff_t last)> &fn)
ParallelSection(ThreadPool *tp)
#define ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(TypeName)
GLuint const GLchar * name
static int DegreeOfParallelism(const ThreadPool *tp)
static std::string StopProfiling(concurrency::ThreadPool *tp)
static void StartProfiling(concurrency::ThreadPool *tp)
ORT_DISALLOW_COPY_AND_ASSIGNMENT(ThreadPool)
static constexpr WorkInfo PartitionWork(std::ptrdiff_t batch_idx, std::ptrdiff_t num_batches, std::ptrdiff_t total_work)