18 #if (((OIIO_GNUC_VERSION && NDEBUG) || OIIO_CLANG_VERSION >= 30500 || OIIO_APPLE_CLANG_VERSION >= 70000 || defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)) \
19 && (defined(__x86_64__) || defined(__i386__))) \
21 #define OIIO_DONOTOPT_FORECINLINE OIIO_FORCEINLINE
23 #define OIIO_DONOTOPT_FORECINLINE inline
64 template<
typename T,
typename ...Ts>
68 if (
sizeof...(Ts) > 0)
143 template<
typename FUNC,
typename... ARGS>
149 std::cout << (*this) << std::endl;
155 double avg()
const {
return m_avg; }
156 double stddev()
const {
return m_stddev; }
157 double range()
const {
return m_range; }
158 double median()
const {
return m_median; }
165 m_user_iterations =
val;
176 size_t trials()
const {
return m_trials; }
191 size_t work()
const {
return m_work; }
200 m_exclude_outliers = e;
238 const std::string&
name()
const {
return m_name; }
241 size_t m_iterations = 0;
242 size_t m_user_iterations = 0;
243 size_t m_trials = 10;
246 std::vector<double> m_times;
251 int m_exclude_outliers = 1;
254 Unit m_units = Unit::autounit;
256 template<
typename FUNC,
typename... ARGS>
257 double run(FUNC
func, ARGS&&...
args)
259 if (m_user_iterations)
260 m_iterations = m_user_iterations;
262 m_iterations = determine_iterations(func,
args...);
263 m_times.resize(m_trials);
265 double overhead = iteration_overhead() * iterations();
266 for (
auto&
t : m_times)
267 t =
std::max(0.0, do_trial(m_iterations, func,
args...) - overhead);
272 template<
typename FUNC,
typename... ARGS>
273 size_t determine_iterations(FUNC func, ARGS&&...
args)
276 const double target_time = 0.01;
279 double t = do_trial (i, func,
args...);
281 if (t > target_time * 1.5 && i > 2)
283 if (t > target_time * 0.75 || i > (
size_t(1) << 30))
285 if (t < target_time / 16)
292 template<
typename FUNC,
typename... ARGS>
293 double do_trial(
size_t iterations, FUNC func, ARGS&&...
args)
296 while (iterations--) {
303 void compute_stats() { compute_stats(m_times, m_iterations); }
304 void compute_stats(std::vector<double>& times,
size_t iterations);
305 double iteration_overhead();
320 template<
typename FUNC>
324 double mintime = 1.0e30, maxtime = 0.0;
325 while (ntrials-- > 0) {
327 for (
int i = 0; i < nrepeats; ++i) {
339 *
range = maxtime - mintime;
344 template<
typename FUNC>
379 int total_iterations,
int ntrials,
380 cspan<int> threadcounts = {1,2,4,8,12,16,24,32,48,64,128});
387 int maxthreads,
int total_iterations,
int ntrials,
388 cspan<int> threadcounts = {1,2,4,8,12,16,24,32,48,64,128});
404 #if ((OIIO_GNUC_VERSION && NDEBUG) || OIIO_CLANG_VERSION >= 30500 || OIIO_APPLE_CLANG_VERSION >= 70000 || defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)) \
405 && (defined(__x86_64__) || defined(__i386__))
412 #if defined(__clang__)
416 asm volatile(
"" : :
"g"(
val) :
"memory");
418 asm volatile(
"" : :
"i,r,m"(
val) :
"memory");
426 #pragma optimize("", off)
430 _ReadWriteBarrier ();
433 #pragma optimize("", on)
435 #elif __has_attribute(__optnone__)
440 inline T const & __attribute__((__optnone__))
458 #if ((OIIO_GNUC_VERSION && NDEBUG) || OIIO_CLANG_VERSION >= 30500 || OIIO_APPLE_CLANG_VERSION >= 70000 || defined(__INTEL_COMPILER)) && (defined(__x86_64__) || defined(__i386__))
462 asm volatile (
"" : : :
"memory");
468 _ReadWriteBarrier ();
OIIO_NAMESPACE_BEGIN OIIO_DONOTOPT_FORECINLINE T const & DoNotOptimize(T const &val)
void OIIO_UTIL_API clobber(void *p)
typedef int(APIENTRYP RE_PFNGLXSWAPINTERVALSGIPROC)(int)
Benchmarker & indent(int spaces)
size_t iterations() const
void OIIO_UTIL_API use_char_ptr(char const volatile *)
String-related utilities, all in namespace Strutil.
Benchmarker & verbose(int v)
OIIO_FORCEINLINE void clobber_all_memory()
std::ostream & operator<<(std::ostream &ostr, const DataType &a)
int exclude_outliers() const
const std::string & name() const
Benchmarker & units(Unit s)
GLuint const GLchar * name
double time_trial(FUNC func, int ntrials=1, int nrepeats=1, double *range=NULL)
Benchmarker & work(size_t val)
Benchmarker & iterations(size_t val)
Benchmarker & exclude_outliers(int e)
ImageBuf OIIO_API max(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
**If you just want to fire and args
#define OIIO_NAMESPACE_END
double operator()(string_view name, FUNC func, ARGS &&...args)
Benchmarker & trials(size_t val)
OIIO_UTIL_API std::vector< double > timed_thread_wedge(function_view< void(int)> task, function_view< void()> pretask, function_view< void()> posttask, std::ostream *out, int maxthreads, int total_iterations, int ntrials, cspan< int > threadcounts={1, 2, 4, 8, 12, 16, 24, 32, 48, 64, 128})
#define OIIO_DONOTOPT_FORECINLINE
#define OIIO_NAMESPACE_BEGIN