HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
timing.h
Go to the documentation of this file.
1 //
2 // Copyright 2016 Pixar
3 //
4 // Licensed under the terms set forth in the LICENSE.txt file available at
5 // https://openusd.org/license.
6 //
7 #ifndef PXR_BASE_ARCH_TIMING_H
8 #define PXR_BASE_ARCH_TIMING_H
9 
10 /// \file arch/timing.h
11 /// \ingroup group_arch_SystemFunctions
12 /// High-resolution, low-cost timing routines.
13 
14 #include "pxr/pxr.h"
15 #include "pxr/base/arch/api.h"
16 #include "pxr/base/arch/defines.h"
17 #include "pxr/base/arch/inttypes.h"
18 
19 /// \addtogroup group_arch_SystemFunctions
20 ///@{
21 
22 #if defined(ARCH_OS_LINUX) && defined(ARCH_CPU_INTEL)
23 #include <x86intrin.h>
24 #elif defined(ARCH_OS_DARWIN)
25 #include <mach/mach_time.h>
26 #elif defined(ARCH_OS_WINDOWS)
27 #include <intrin.h>
28 #endif
29 
30 #include <algorithm>
31 #include <atomic>
32 #include <iterator>
33 #include <numeric>
34 
36 
37 /// Return the current time in system-dependent units.
38 ///
39 /// The current time is returned as a number of "ticks", where each tick
40 /// represents some system-dependent amount of time. The resolution of the
41 /// timing routines varies, but on all systems, it is well under one
42 /// microsecond. The cost of this routine is in the 10s-to-100s of nanoseconds
43 /// on GHz class machines.
44 inline uint64_t
46 {
47 #if defined(ARCH_OS_DARWIN)
48  // On Darwin we'll use mach_absolute_time().
49  return mach_absolute_time();
50 #elif defined(ARCH_CPU_INTEL)
51  // On Intel we'll use the rdtsc instruction.
52  return __rdtsc();
53 #elif defined (ARCH_CPU_ARM)
54  uint64_t result;
55  #if defined(ARCH_COMPILER_MSVC)
56  // MSVC does not support inline assembly on ARM64 platforms
57  // 0x5F02 == ARM64_CNTVCT - manually calculated value avoids <windows.h>
58  result = _ReadStatusReg(0x5F02);
59  #else
60  __asm __volatile("mrs %0, CNTVCT_EL0" : "=&r" (result));
61  #endif
62  return result;
63 #else
64 #error Unknown architecture.
65 #endif
66 }
67 
68 
69 /// Get a "start" tick time for measuring an interval of time, followed by a
70 /// later call to ArchGetStopTickTime(). Or see ArchIntervalTimer. This is
71 /// like ArchGetTickTime but it includes compiler & CPU fencing & reordering
72 /// constraints in an attempt to get the best measurement possible.
73 inline uint64_t
75 {
76  uint64_t t;
77 #if defined (ARCH_OS_DARWIN) || \
78  (defined (ARCH_CPU_ARM) && defined (ARCH_COMPILER_MSVC))
79  return ArchGetTickTime();
80 #elif defined (ARCH_CPU_ARM)
81  std::atomic_signal_fence(std::memory_order_seq_cst);
82  asm volatile("mrs %0, cntvct_el0" : "=r"(t));
83  std::atomic_signal_fence(std::memory_order_seq_cst);
84 #elif defined (ARCH_COMPILER_MSVC)
85  _mm_lfence();
86  std::atomic_signal_fence(std::memory_order_seq_cst);
87  t = __rdtsc();
88  _mm_lfence();
89  std::atomic_signal_fence(std::memory_order_seq_cst);
90 #elif defined(ARCH_CPU_INTEL) && \
91  (defined(ARCH_COMPILER_CLANG) || defined(ARCH_COMPILER_GCC))
92  // Prevent reorders by the compiler.
93  std::atomic_signal_fence(std::memory_order_seq_cst);
94  asm volatile(
95  "lfence\n\t"
96  "rdtsc\n\t"
97  "shl $32, %%rdx\n\t"
98  "or %%rdx, %0\n\t"
99  "lfence"
100  : "=a"(t)
101  :
102  // rdtsc writes rdx
103  // shl modifies cc flags
104  : "rdx", "cc");
105 #else
106 #error "Unsupported architecture."
107 #endif
108  return t;
109 }
110 
111 /// Get a "stop" tick time for measuring an interval of time. See
112 /// ArchGetStartTickTime() or ArchIntervalTimer. This is like ArchGetTickTime
113 /// but it includes compiler & CPU fencing & reordering constraints in an
114 /// attempt to get the best measurement possible.
115 inline uint64_t
117 {
118  uint64_t t;
119 #if defined (ARCH_OS_DARWIN) || \
120  (defined (ARCH_CPU_ARM) && defined (ARCH_COMPILER_MSVC))
121  return ArchGetTickTime();
122 #elif defined (ARCH_CPU_ARM)
123  std::atomic_signal_fence(std::memory_order_seq_cst);
124  asm volatile("mrs %0, cntvct_el0" : "=r"(t));
125  std::atomic_signal_fence(std::memory_order_seq_cst);
126 #elif defined (ARCH_COMPILER_MSVC)
127  std::atomic_signal_fence(std::memory_order_seq_cst);
128  unsigned aux;
129  t = __rdtscp(&aux);
130  _mm_lfence();
131  std::atomic_signal_fence(std::memory_order_seq_cst);
132 #elif defined(ARCH_CPU_INTEL) && \
133  (defined(ARCH_COMPILER_CLANG) || defined(ARCH_COMPILER_GCC))
134  std::atomic_signal_fence(std::memory_order_seq_cst);
135  asm volatile(
136  "rdtscp\n\t"
137  "shl $32, %%rdx\n\t"
138  "or %%rdx, %0\n\t"
139  "lfence"
140  : "=a"(t)
141  :
142  // rdtscp writes rcx & rdx
143  // shl modifies cc flags
144  : "rcx", "rdx", "cc");
145 #else
146 #error "Unsupported architecture."
147 #endif
148  return t;
149 }
150 
151 #if defined (doxygen) || \
152  (!defined(ARCH_OS_DARWIN) && defined(ARCH_CPU_INTEL) && \
153  (defined(ARCH_COMPILER_CLANG) || defined(ARCH_COMPILER_GCC)))
154 
155 /// A simple timer class for measuring an interval of time using the
156 /// ArchTickTimer facilities.
157 struct ArchIntervalTimer
158 {
159  /// Construct a timer and start timing if \p start is true.
160  explicit ArchIntervalTimer(bool start=true)
161  : _started(start) {
162  if (_started) {
163  Start();
164  }
165  }
166 
167  /// Start the timer, or reset the start time if it has already been started.
168  void Start() {
169  _started = true;
170  std::atomic_signal_fence(std::memory_order_seq_cst);
171  asm volatile(
172  "lfence\n\t"
173  "rdtsc\n\t"
174  "lfence"
175  : "=a"(_startLow), "=d"(_startHigh) :: );
176  }
177 
178  /// Return true if this timer is started.
179  bool IsStarted() const {
180  return _started;
181  }
182 
183  /// Return this timer's start time, or 0 if it hasn't been started.
184  uint64_t GetStartTicks() const {
185  return (uint64_t(_startHigh) << 32) + _startLow;
186  }
187 
188  /// Read and return the current time.
189  uint64_t GetCurrentTicks() {
190  return ArchGetStopTickTime();
191  }
192 
193  /// Read the current time and return the difference between it and the start
194  /// time. If the timer was not started, return 0.
195  uint64_t GetElapsedTicks() {
196  if (!_started) {
197  return 0;
198  }
199  uint32_t stopLow, stopHigh;
200  std::atomic_signal_fence(std::memory_order_seq_cst);
201  asm volatile(
202  "rdtscp\n\t"
203  "lfence"
204  : "=a"(stopLow), "=d"(stopHigh)
205  :
206  // rdtscp writes rcx
207  : "rcx");
208  return ((uint64_t(stopHigh) << 32) + stopLow) -
209  ((uint64_t(_startHigh) << 32) + _startLow);
210  }
211 private:
212  bool _started = false;
213  uint32_t _startLow = 0, _startHigh = 0;
214 };
215 
216 #else
217 
219 {
220  explicit ArchIntervalTimer(bool start=true)
221  : _started(start) {
222  if (_started) {
223  _startTicks = ArchGetStartTickTime();
224  }
225  }
226 
227  void Start() {
228  _started = true;
229  _startTicks = ArchGetStartTickTime();
230  }
231 
232  bool IsStarted() const {
233  return _started;
234  }
235 
236  uint64_t GetStartTicks() const {
237  return _startTicks;
238  }
239 
240  uint64_t GetCurrentTicks() {
241  return ArchGetStopTickTime();
242  }
243 
244  uint64_t GetElapsedTicks() {
245  if (!_started) {
246  return 0;
247  }
248  return ArchGetStopTickTime() - _startTicks;
249  }
250 private:
251  bool _started = false;
252  uint64_t _startTicks;
253 };
254 
255 #endif
256 
257 /// Return the tick time resolution. Although the number of ticks per second
258 /// may be very large, on many current systems the tick timers do not update at
259 /// that rate. Rather, sequential calls to ArchGetTickTime() may report
260 /// increases of 10s to 100s of ticks, with a minimum increment betwewen calls.
261 /// This function returns that minimum increment as measured at startup time.
262 ///
263 /// Note that if this value is of sufficient size, then short times measured
264 /// with tick timers are potentially subject to significant noise. In
265 /// particular, an interval of measured tick time is liable to be off by +/- one
266 /// ArchGetTickQuantum().
267 ARCH_API
268 uint64_t ArchGetTickQuantum();
269 
270 /// Return the ticks taken to record an interval of time with ArchIntervalTimer,
271 /// as measured at startup time.
272 ARCH_API
274 
275 
276 /// Convert a duration measured in "ticks", as returned by
277 /// \c ArchGetTickTime(), to nanoseconds.
278 ///
279 /// An example to test the timing routines would be:
280 /// \code
281 /// ArchIntervalTimer iTimer;
282 /// sleep(10);
283 ///
284 /// // duration should be approximately 10/// 1e9 = 1e10 nanoseconds.
285 /// int64_t duration = ArchTicksToNanoseconds(iTimer.GetElapsedTicks());
286 /// \endcode
287 ///
288 ARCH_API
289 int64_t ArchTicksToNanoseconds(uint64_t nTicks);
290 
291 /// Convert a duration measured in "ticks", as returned by
292 /// \c ArchGetTickTime(), to seconds.
293 ARCH_API
294 double ArchTicksToSeconds(uint64_t nTicks);
295 
296 /// Convert a duration in seconds to "ticks", as returned by
297 /// \c ArchGetTickTime().
298 ARCH_API
299 uint64_t ArchSecondsToTicks(double seconds);
300 
301 /// Get nanoseconds per tick. Useful when converting ticks obtained from
302 /// \c ArchTickTime()
303 ARCH_API
305 
306 ARCH_API
307 uint64_t
308 Arch_MeasureExecutionTime(uint64_t maxTicks, bool *reachedConsensus,
309  void const *m, uint64_t (*callM)(void const *, int));
310 
311 /// Run \p fn repeatedly attempting to determine a consensus fastest execution
312 /// time with low noise, for up to \p maxTicks, then return the consensus
313 /// fastest execution time. If a consensus is not reached in that time, return
314 /// a best estimate instead. If \p reachedConsensus is not null, set it to
315 /// indicate whether or not a consensus was reached. This function ignores \p
316 /// maxTicks greater than 5 billion ticks and runs for up to 5 billion ticks
317 /// instead. The \p fn will run for an indeterminate number of times, so it
318 /// should be side-effect free. Also, it should do essentially the same work
319 /// on every invocation so that timing its execution makes sense.
320 template <class Fn>
321 uint64_t
323  Fn const &fn,
324  uint64_t maxTicks = 1e7,
325  bool *reachedConsensus = nullptr)
326 {
327  auto measureN = [&fn](int nTimes) -> uint64_t {
328  ArchIntervalTimer iTimer;
329  for (int i = nTimes; i--; ) {
330  std::atomic_signal_fence(std::memory_order_seq_cst);
331  (void)fn();
332  std::atomic_signal_fence(std::memory_order_seq_cst);
333  }
334  return iTimer.GetElapsedTicks();
335  };
336 
337  using MeasureNType = decltype(measureN);
338 
340  maxTicks, reachedConsensus,
341  static_cast<void const *>(&measureN),
342  [](void const *mN, int nTimes) {
343  return (*static_cast<MeasureNType const *>(mN))(nTimes);
344  });
345 }
346 
347 ///@}
348 
350 
351 #endif // PXR_BASE_ARCH_TIMING_H
ARCH_API double ArchTicksToSeconds(uint64_t nTicks)
void
Definition: png.h:1083
GLuint start
Definition: glcorearb.h:475
bool IsStarted() const
Definition: timing.h:232
ARCH_API double ArchGetNanosecondsPerTick()
ARCH_API int64_t ArchTicksToNanoseconds(uint64_t nTicks)
**But if you need a result
Definition: thread.h:622
ARCH_API uint64_t ArchGetIntervalTimerTickOverhead()
uint64_t GetElapsedTicks()
Definition: timing.h:244
uint64_t GetCurrentTicks()
Definition: timing.h:240
ArchIntervalTimer(bool start=true)
Definition: timing.h:220
uint64_t GetStartTicks() const
Definition: timing.h:236
uint64_t ArchMeasureExecutionTime(Fn const &fn, uint64_t maxTicks=1e7, bool *reachedConsensus=nullptr)
Definition: timing.h:322
ARCH_API uint64_t ArchSecondsToTicks(double seconds)
GLdouble t
Definition: glad.h:2397
void Start()
Definition: timing.h:227
PXR_NAMESPACE_OPEN_SCOPE uint64_t ArchGetTickTime()
Definition: timing.h:45
uint64_t ArchGetStartTickTime()
Definition: timing.h:74
PXR_NAMESPACE_CLOSE_SCOPE PXR_NAMESPACE_OPEN_SCOPE
Definition: path.h:1425
#define PXR_NAMESPACE_CLOSE_SCOPE
Definition: pxr.h:74
#define ARCH_API
Definition: api.h:23
uint64_t ArchGetStopTickTime()
Definition: timing.h:116
ARCH_API uint64_t Arch_MeasureExecutionTime(uint64_t maxTicks, bool *reachedConsensus, void const *m, uint64_t(*callM)(void const *, int))
ARCH_API uint64_t ArchGetTickQuantum()