HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
timing.h
Go to the documentation of this file.
1 //
2 // Copyright 2016 Pixar
3 //
4 // Licensed under the Apache License, Version 2.0 (the "Apache License")
5 // with the following modification; you may not use this file except in
6 // compliance with the Apache License and the following modification to it:
7 // Section 6. Trademarks. is deleted and replaced with:
8 //
9 // 6. Trademarks. This License does not grant permission to use the trade
10 // names, trademarks, service marks, or product names of the Licensor
11 // and its affiliates, except as required to comply with Section 4(c) of
12 // the License and to reproduce the content of the NOTICE file.
13 //
14 // You may obtain a copy of the Apache License at
15 //
16 // http://www.apache.org/licenses/LICENSE-2.0
17 //
18 // Unless required by applicable law or agreed to in writing, software
19 // distributed under the Apache License with the above modification is
20 // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 // KIND, either express or implied. See the Apache License for the specific
22 // language governing permissions and limitations under the Apache License.
23 //
24 #ifndef PXR_BASE_ARCH_TIMING_H
25 #define PXR_BASE_ARCH_TIMING_H
26 
27 /// \file arch/timing.h
28 /// \ingroup group_arch_SystemFunctions
29 /// High-resolution, low-cost timing routines.
30 
31 #include "pxr/pxr.h"
32 #include "pxr/base/arch/api.h"
33 #include "pxr/base/arch/defines.h"
34 #include "pxr/base/arch/inttypes.h"
35 
36 /// \addtogroup group_arch_SystemFunctions
37 ///@{
38 
39 #if defined(ARCH_OS_LINUX) && defined(ARCH_CPU_INTEL)
40 #include <x86intrin.h>
41 #elif defined(ARCH_OS_DARWIN)
42 #include <mach/mach_time.h>
43 #elif defined(ARCH_OS_WINDOWS)
44 #include <intrin.h>
45 #endif
46 
47 #include <algorithm>
48 #include <atomic>
49 #include <iterator>
50 #include <numeric>
51 
53 
54 /// Return the current time in system-dependent units.
55 ///
56 /// The current time is returned as a number of "ticks", where each tick
57 /// represents some system-dependent amount of time. The resolution of the
58 /// timing routines varies, but on all systems, it is well under one
59 /// microsecond. The cost of this routine is in the 10s-to-100s of nanoseconds
60 /// on GHz class machines.
61 inline uint64_t
63 {
64 #if defined(ARCH_OS_DARWIN)
65  // On Darwin we'll use mach_absolute_time().
66  return mach_absolute_time();
67 #elif defined(ARCH_CPU_INTEL)
68  // On Intel we'll use the rdtsc instruction.
69  return __rdtsc();
70 #elif defined (ARCH_CPU_ARM)
71  uint64_t result;
72  __asm __volatile("mrs %0, CNTVCT_EL0" : "=&r" (result));
73  return result;
74 #else
75 #error Unknown architecture.
76 #endif
77 }
78 
79 
80 /// Get a "start" tick time for measuring an interval of time, followed by a
81 /// later call to ArchGetStopTickTime(). Or see ArchIntervalTimer. This is
82 /// like ArchGetTickTime but it includes compiler & CPU fencing & reordering
83 /// constraints in an attempt to get the best measurement possible.
84 inline uint64_t
86 {
87  uint64_t t;
88 #if defined (ARCH_OS_DARWIN)
89  return ArchGetTickTime();
90 #elif defined (ARCH_CPU_ARM)
91  std::atomic_signal_fence(std::memory_order_seq_cst);
92  asm volatile("mrs %0, cntvct_el0" : "=r"(t));
93  std::atomic_signal_fence(std::memory_order_seq_cst);
94 #elif defined (ARCH_COMPILER_MSVC)
95  _mm_lfence();
96  std::atomic_signal_fence(std::memory_order_seq_cst);
97  t = __rdtsc();
98  _mm_lfence();
99  std::atomic_signal_fence(std::memory_order_seq_cst);
100 #elif defined(ARCH_CPU_INTEL) && \
101  (defined(ARCH_COMPILER_CLANG) || defined(ARCH_COMPILER_GCC))
102  // Prevent reorders by the compiler.
103  std::atomic_signal_fence(std::memory_order_seq_cst);
104  asm volatile(
105  "lfence\n\t"
106  "rdtsc\n\t"
107  "shl $32, %%rdx\n\t"
108  "or %%rdx, %0\n\t"
109  "lfence"
110  : "=a"(t)
111  :
112  // rdtsc writes rdx
113  // shl modifies cc flags
114  : "rdx", "cc");
115 #else
116 #error "Unsupported architecture."
117 #endif
118  return t;
119 }
120 
121 /// Get a "stop" tick time for measuring an interval of time. See
122 /// ArchGetStartTickTime() or ArchIntervalTimer. This is like ArchGetTickTime
123 /// but it includes compiler & CPU fencing & reordering constraints in an
124 /// attempt to get the best measurement possible.
125 inline uint64_t
127 {
128  uint64_t t;
129 #if defined (ARCH_OS_DARWIN)
130  return ArchGetTickTime();
131 #elif defined (ARCH_CPU_ARM)
132  std::atomic_signal_fence(std::memory_order_seq_cst);
133  asm volatile("mrs %0, cntvct_el0" : "=r"(t));
134  std::atomic_signal_fence(std::memory_order_seq_cst);
135 #elif defined (ARCH_COMPILER_MSVC)
136  std::atomic_signal_fence(std::memory_order_seq_cst);
137  unsigned aux;
138  t = __rdtscp(&aux);
139  _mm_lfence();
140  std::atomic_signal_fence(std::memory_order_seq_cst);
141 #elif defined(ARCH_CPU_INTEL) && \
142  (defined(ARCH_COMPILER_CLANG) || defined(ARCH_COMPILER_GCC))
143  std::atomic_signal_fence(std::memory_order_seq_cst);
144  asm volatile(
145  "rdtscp\n\t"
146  "shl $32, %%rdx\n\t"
147  "or %%rdx, %0\n\t"
148  "lfence"
149  : "=a"(t)
150  :
151  // rdtscp writes rcx & rdx
152  // shl modifies cc flags
153  : "rcx", "rdx", "cc");
154 #else
155 #error "Unsupported architecture."
156 #endif
157  return t;
158 }
159 
160 #if defined (doxygen) || \
161  (!defined(ARCH_OS_DARWIN) && defined(ARCH_CPU_INTEL) && \
162  (defined(ARCH_COMPILER_CLANG) || defined(ARCH_COMPILER_GCC)))
163 
164 /// A simple timer class for measuring an interval of time using the
165 /// ArchTickTimer facilities.
166 struct ArchIntervalTimer
167 {
168  /// Construct a timer and start timing if \p start is true.
169  explicit ArchIntervalTimer(bool start=true)
170  : _started(start) {
171  if (_started) {
172  Start();
173  }
174  }
175 
176  /// Start the timer, or reset the start time if it has already been started.
177  void Start() {
178  _started = true;
179  std::atomic_signal_fence(std::memory_order_seq_cst);
180  asm volatile(
181  "lfence\n\t"
182  "rdtsc\n\t"
183  "lfence"
184  : "=a"(_startLow), "=d"(_startHigh) :: );
185  }
186 
187  /// Return true if this timer is started.
188  bool IsStarted() const {
189  return _started;
190  }
191 
192  /// Return this timer's start time, or 0 if it hasn't been started.
193  uint64_t GetStartTicks() const {
194  return (uint64_t(_startHigh) << 32) + _startLow;
195  }
196 
197  /// Read and return the current time.
198  uint64_t GetCurrentTicks() {
199  return ArchGetStopTickTime();
200  }
201 
202  /// Read the current time and return the difference between it and the start
203  /// time. If the timer was not started, return 0.
204  uint64_t GetElapsedTicks() {
205  if (!_started) {
206  return 0;
207  }
208  uint32_t stopLow, stopHigh;
209  std::atomic_signal_fence(std::memory_order_seq_cst);
210  asm volatile(
211  "rdtscp\n\t"
212  "lfence"
213  : "=a"(stopLow), "=d"(stopHigh)
214  :
215  // rdtscp writes rcx
216  : "rcx");
217  return (uint64_t(stopHigh - _startHigh) << 32) + (stopLow - _startLow);
218  }
219 private:
220  bool _started = false;
221  uint32_t _startLow = 0, _startHigh = 0;
222 };
223 
224 #else
225 
227 {
228  explicit ArchIntervalTimer(bool start=true)
229  : _started(start) {
230  if (_started) {
231  _startTicks = ArchGetStartTickTime();
232  }
233  }
234 
235  void Start() {
236  _started = true;
237  _startTicks = ArchGetStartTickTime();
238  }
239 
240  bool IsStarted() const {
241  return _started;
242  }
243 
244  uint64_t GetStartTicks() const {
245  return _startTicks;
246  }
247 
248  uint64_t GetCurrentTicks() {
249  return ArchGetStopTickTime();
250  }
251 
252  uint64_t GetElapsedTicks() {
253  if (!_started) {
254  return 0;
255  }
256  return ArchGetStopTickTime() - _startTicks;
257  }
258 private:
259  bool _started = false;
260  uint64_t _startTicks;
261 };
262 
263 #endif
264 
265 /// Return the tick time resolution. Although the number of ticks per second
266 /// may be very large, on many current systems the tick timers do not update at
267 /// that rate. Rather, sequential calls to ArchGetTickTime() may report
268 /// increases of 10s to 100s of ticks, with a minimum increment betwewen calls.
269 /// This function returns that minimum increment as measured at startup time.
270 ///
271 /// Note that if this value is of sufficient size, then short times measured
272 /// with tick timers are potentially subject to significant noise. In
273 /// particular, an interval of measured tick time is liable to be off by +/- one
274 /// ArchGetTickQuantum().
275 ARCH_API
276 uint64_t ArchGetTickQuantum();
277 
278 /// Return the ticks taken to record an interval of time with ArchIntervalTimer,
279 /// as measured at startup time.
280 ARCH_API
282 
283 
284 /// Convert a duration measured in "ticks", as returned by
285 /// \c ArchGetTickTime(), to nanoseconds.
286 ///
287 /// An example to test the timing routines would be:
288 /// \code
289 /// ArchIntervalTimer iTimer;
290 /// sleep(10);
291 ///
292 /// // duration should be approximately 10/// 1e9 = 1e10 nanoseconds.
293 /// int64_t duration = ArchTicksToNanoseconds(iTimer.GetElapsedTicks());
294 /// \endcode
295 ///
296 ARCH_API
297 int64_t ArchTicksToNanoseconds(uint64_t nTicks);
298 
299 /// Convert a duration measured in "ticks", as returned by
300 /// \c ArchGetTickTime(), to seconds.
301 ARCH_API
302 double ArchTicksToSeconds(uint64_t nTicks);
303 
304 /// Convert a duration in seconds to "ticks", as returned by
305 /// \c ArchGetTickTime().
306 ARCH_API
307 uint64_t ArchSecondsToTicks(double seconds);
308 
309 /// Get nanoseconds per tick. Useful when converting ticks obtained from
310 /// \c ArchTickTime()
311 ARCH_API
313 
314 ARCH_API
315 uint64_t
316 Arch_MeasureExecutionTime(uint64_t maxMicroseconds, bool *reachedConsensus,
317  void const *m, uint64_t (*callM)(void const *, int));
318 
319 /// Run \p fn repeatedly attempting to determine a consensus fastest execution
320 /// time with low noise, for up to \p maxMicroseconds, then return the consensus
321 /// fastest execution time. If a consensus is not reached in that time, return
322 /// a best estimate instead. If \p reachedConsensus is not null, set it to
323 /// indicate whether or not a consensus was reached. This function ignores \p
324 /// maxMicroseconds greater than 5 seconds and runs for up to 5 seconds instead.
325 /// The \p fn will run for an indeterminate number of times, so it should be
326 /// side-effect free. Also, it should do essentially the same work on every
327 /// invocation so that timing its execution makes sense.
328 template <class Fn>
329 uint64_t
331  Fn const &fn,
332  uint64_t maxMicroSeconds = 10000, /* 10 msec */
333  bool *reachedConsensus = nullptr)
334 {
335  auto measureN = [&fn](int nTimes) -> uint64_t {
336  ArchIntervalTimer iTimer;
337  for (int i = nTimes; i--; ) {
338  std::atomic_signal_fence(std::memory_order_seq_cst);
339  (void)fn();
340  std::atomic_signal_fence(std::memory_order_seq_cst);
341  }
342  return iTimer.GetElapsedTicks();
343  };
344 
345  using MeasureNType = decltype(measureN);
346 
348  maxMicroSeconds, reachedConsensus,
349  static_cast<void const *>(&measureN),
350  [](void const *mN, int nTimes) {
351  return (*static_cast<MeasureNType const *>(mN))(nTimes);
352  });
353 }
354 
355 ///@}
356 
358 
359 #endif // PXR_BASE_ARCH_TIMING_H
ARCH_API double ArchTicksToSeconds(uint64_t nTicks)
void
Definition: png.h:1083
GLuint start
Definition: glcorearb.h:475
bool IsStarted() const
Definition: timing.h:240
ARCH_API double ArchGetNanosecondsPerTick()
ARCH_API int64_t ArchTicksToNanoseconds(uint64_t nTicks)
**But if you need a result
Definition: thread.h:613
ARCH_API uint64_t ArchGetIntervalTimerTickOverhead()
uint64_t GetElapsedTicks()
Definition: timing.h:252
uint64_t ArchMeasureExecutionTime(Fn const &fn, uint64_t maxMicroSeconds=10000, bool *reachedConsensus=nullptr)
Definition: timing.h:330
uint64_t GetCurrentTicks()
Definition: timing.h:248
ArchIntervalTimer(bool start=true)
Definition: timing.h:228
uint64_t GetStartTicks() const
Definition: timing.h:244
ARCH_API uint64_t Arch_MeasureExecutionTime(uint64_t maxMicroseconds, bool *reachedConsensus, void const *m, uint64_t(*callM)(void const *, int))
ARCH_API uint64_t ArchSecondsToTicks(double seconds)
GLdouble t
Definition: glad.h:2397
void Start()
Definition: timing.h:235
PXR_NAMESPACE_OPEN_SCOPE uint64_t ArchGetTickTime()
Definition: timing.h:62
uint64_t ArchGetStartTickTime()
Definition: timing.h:85
PXR_NAMESPACE_CLOSE_SCOPE PXR_NAMESPACE_OPEN_SCOPE
Definition: path.h:1441
#define PXR_NAMESPACE_CLOSE_SCOPE
Definition: pxr.h:91
#define ARCH_API
Definition: api.h:40
uint64_t ArchGetStopTickTime()
Definition: timing.h:126
ARCH_API uint64_t ArchGetTickQuantum()