HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SYS_BitUtil.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: SYS_BitUtil.h (SYS Library, C++)
7  *
8  * COMMENTS: Functions for fast bit manipulation.
9  *
10  * SYSfirstBitSet - Get the index of the first (lowest-order) bit
11  * equal 1, else -1 if input is zero.
12  * SYSlastBitSet - Get the index of the last (highest-order) bit
13  * equal 1, else -1 if input is zero.
14  * SYScountBitsSet - Counts the number of bits equal to 1
15  * SYSisPow2 - Is it a power of two?
16  * SYSmakePow2 - Round up to nearest power of two
17  * SYSceilLog2 - ceil(log2(n))
18  */
19 
20 #ifndef __SYS_BitUtil__
21 #define __SYS_BitUtil__
22 
23 #include "SYS_Inline.h"
24 #include "SYS_Types.h"
25 
26 #if defined(_MSC_VER)
27  #include <intrin.h>
28 #endif
29 
30 static SYS_FORCE_INLINE int SYSfirstBitSet(uint v)
31 {
32 #if defined(_MSC_VER)
33  unsigned long result;
34  if (_BitScanForward(&result, v))
35  return int(result);
36  return -1;
37 #elif defined(GCC3) && !defined(MBSD_ARM)
38  int c;
39  asm(
40  "bsf %1, %0\n"
41  "cmove %2, %0\n"
42  : "=r" (c) : "r" (v), "r" (-1));
43  return c;
44 #else
45  if (!v)
46  return -1;
47  int result = 0;
48  if (v & 0x0000FFFF)
49  v &= 0x0000FFFF;
50  else
51  result |= 0x10;
52 
53  if (v & 0x00FF00FF)
54  v &= 0x00FF00FF;
55  else
56  result |= 0x08;
57 
58  if (v & 0x0F0F0F0F)
59  v &= 0x0F0F0F0F;
60  else
61  result |= 0x04;
62 
63  if (v & 0x33333333)
64  v &= 0x33333333;
65  else
66  result |= 0x02;
67 
68  if (v & 0x55555555)
69  v &= 0x55555555;
70  else
71  result |= 0x01;
72 
73  return result;
74 #endif
75 }
76 
77 static SYS_FORCE_INLINE int SYSfirstBitSet(uint64 v)
78 {
79 #if defined(_MSC_VER) && defined(AMD64)
80  unsigned long result;
81  if (_BitScanForward64(&result, v))
82  return int(result);
83  return -1;
84 #elif defined(GCC4) && defined(AMD64)
85  uint64 c;
86  asm(
87  "bsf %1, %0\n"
88  "cmove %2, %0\n"
89  : "=r" (c) : "r" (v), "r" (uint64(-1)));
90  return c;
91 #else
92  // Unfortunately, 64-bit BSF isn't supported on 32-bit CPUs
93  if (v & 0x00000000FFFFFFFFULL)
94  return SYSfirstBitSet(uint(v));
95  int result = SYSfirstBitSet(uint(v>>32));
96  if (result >= 0)
97  result += 32;
98  return result;
99 #endif
100 }
101 
102 static SYS_FORCE_INLINE int SYSfirstBitSet(int v)
103 {
104  return SYSfirstBitSet(uint(v));
105 }
106 
107 static SYS_FORCE_INLINE int SYSfirstBitSet(int64 v)
108 {
109  return SYSfirstBitSet(uint64(v));
110 }
111 
112 #if defined(MBSD)
113 static SYS_FORCE_INLINE int SYSfirstBitSet(size_t v)
114 {
115  return SYSfirstBitSet(uint64(v));
116 }
117 #endif
118 
119 static SYS_FORCE_INLINE int SYSlastBitSet(uint v)
120 {
121 #if defined(_MSC_VER)
122  unsigned long result;
123  if (_BitScanReverse(&result, v))
124  return int(result);
125  return -1;
126 // TODO: TBD for Apple Silicon.
127 /*#elif defined(GCC3) && defined(ARM64)
128  int c;
129  asm(
130  "bsr %w1, %w0\n"
131  "cmove %w2, %w0\n"
132  : "=r" (c) : "r" (v), "r" (-1));
133  return c;
134 */
135 #elif defined(GCC3) && defined(AMD64)
136  int c;
137  asm(
138  "bsr %1, %0\n"
139  "cmove %2, %0\n"
140  : "=r" (c) : "r" (v), "r" (-1));
141  return c;
142 #else
143  if (!v)
144  return -1;
145  int result = 0;
146  if (v & 0xFFFF0000)
147  {
148  v &= 0xFFFF0000;
149  result |= 0x10;
150  }
151  if (v & 0xFF00FF00)
152  {
153  v &= 0xFF00FF00;
154  result |= 0x08;
155  }
156  if (v & 0xF0F0F0F0)
157  {
158  v &= 0xF0F0F0F0;
159  result |= 0x04;
160  }
161  if (v & 0xCCCCCCCC)
162  {
163  v &= 0xCCCCCCCC;
164  result |= 0x02;
165  }
166  if (v & 0xAAAAAAAA)
167  {
168  v &= 0xAAAAAAAA;
169  result |= 0x01;
170  }
171  return result;
172 #endif
173 }
174 
175 static SYS_FORCE_INLINE int SYSlastBitSet(uint64 v)
176 {
177 #if defined(_MSC_VER) && defined(AMD64)
178  unsigned long result;
179  if (_BitScanReverse64(&result, v))
180  return int(result);
181  return -1;
182 #elif defined(GCC4) && defined(AMD64)
183  uint64 c;
184  asm(
185  "bsr %1, %0\n"
186  "cmove %2, %0\n"
187  : "=r" (c) : "r" (v), "r" (uint64(-1)));
188  return c;
189 #else
190  // Unfortunately, 64-bit BSR isn't supported on 32-bit CPUs
191  if (v & 0xFFFFFFFF00000000ULL)
192  return SYSlastBitSet(uint(v>>32)) + 32;
193  return SYSlastBitSet(uint(v));
194 #endif
195 }
196 
197 static SYS_FORCE_INLINE int SYSlastBitSet(int v)
198 {
199  return SYSlastBitSet(uint(v));
200 }
201 
202 static SYS_FORCE_INLINE int SYSlastBitSet(int64 v)
203 {
204  return SYSlastBitSet(uint64(v));
205 }
206 
207 /// For a given bit index, finds the next bit above it that is set and returns
208 /// its index, or -1 if no bits are set. Use SYSfirstBitSet to get a seed bit,
209 /// if iterating over bits from lowest to highest set.
210 template<typename T>
211 static SYS_FORCE_INLINE int SYSnextBitSet(T v, int bit)
212 {
213  bit++;
214  if (bit <= 0 || bit >= std::numeric_limits<T>::digits)
215  return -1;
216 
217  int nbit = SYSfirstBitSet(v >> bit);
218  if (nbit == -1)
219  return -1;
220  return nbit + bit;
221 }
222 
223 #if defined(MBSD)
224 static SYS_FORCE_INLINE int SYSlastBitSet(size_t v)
225 {
226  return SYSlastBitSet(uint64(v));
227 }
228 #endif
229 
230 // Count number of bits set without using a lookup table.
231 // NOTE: There are intrinsics on recent Intel/AMD CPUs to count bits
232 // in hardware (e.g. __builtin_popcount on gcc and __popcnt in
233 // MSVC). However, they're dependent on certain CPU features
234 // being set, so we won't use them here. The code below pipelines
235 // pretty well anyway.
236 static SYS_FORCE_INLINE constexpr int SYScountBitsSet(uint v)
237 {
238  v = (v&0x55555555U) + ((v>>1 )&0x55555555U);
239  v = (v&0x33333333U) + ((v>>2 )&0x33333333U);
240  v = (v&0x0F0F0F0FU) + ((v>>4 )&0x0F0F0F0FU);
241  // The additions won't overflow 8 bits, so no more masks
242  // are needed until the end
243  v += (v>>8 );
244  v += (v>>16);
245  return ((int)v)&0xFF;
246 }
247 
248 static SYS_FORCE_INLINE constexpr int SYScountBitsSet(uint64 v)
249 {
250  v = (v&0x5555555555555555ULL) + ((v>>1 )&0x5555555555555555ULL);
251  v = (v&0x3333333333333333ULL) + ((v>>2 )&0x3333333333333333ULL);
252  v = (v&0x0F0F0F0F0F0F0F0FULL) + ((v>>4 )&0x0F0F0F0F0F0F0F0FULL);
253  // The additions won't overflow 8 bits, so no more masks
254  // are needed until the end
255  v += (v>>8 );
256  v += (v>>16);
257  v += (v>>32);
258  return ((int)v)&0xFF;
259 }
260 
261 /// NOTE: This does *not* consider zero to be a power of two
262 template <typename T>
263 static SYS_FORCE_INLINE constexpr bool SYSisPow2(T v)
264 {
265  return v>0 && !(v&(v-1));
266 }
267 
268 /// Round up to the nearest power of 2 greater (or equal) to v
269 template <typename T>
270 static SYS_FORCE_INLINE constexpr T SYSmakePow2(T v)
271 {
272  if (v <= 0)
273  return 1;
274  if (SYSisPow2(v))
275  return v;
276  T v2 = (T(1) << (SYSlastBitSet(v) + 1));
277  return v2;
278 }
279 
280 template <typename T>
281 static SYS_FORCE_INLINE constexpr int SYSceilLog2(T v)
282 {
283  if (v <= 0)
284  return 0;
285  bool isPow2 = SYSisPow2(v);
286  return SYSlastBitSet(v) + !isPow2;
287 }
288 
289 template <typename T>
290 static SYS_FORCE_INLINE constexpr int SYSfloorLog2(T v)
291 {
292  if (v <= 0)
293  return 0;
294  return SYSlastBitSet(v);
295 }
296 
297 // A macro to compute floor(log2(x)) of an 8-bit number at compile time. Useful for computing
298 // bit masks.
299 #define SYS_LOG2F(x) ((((x)>=(1<< 1))?1:0) + (((x)>=(1<< 2))?1:0) + (((x)>=(1<< 3))?1:0) + (((x)>=(1<< 4))?1:0) + \
300  (((x)>=(1<< 5))?1:0) + (((x)>=(1<< 6))?1:0) + (((x)>=(1<< 7))?1:0) + (((x)>=(1<< 8))?1:0))
301 
302 static SYS_FORCE_INLINE constexpr int SYSfloorLog2(float v)
303 {
304  if (v <= 0)
305  return 0x80000000;
306  SYS_FPRealUnionF fori(v);
307  int e = (fori.uval >> SYS_FPRealUnionF::MANTISSA_BITS) - 127;
308  // NOTE: Not quite right for denormal numbers or infinities.
309  return e;
310 }
311 static SYS_FORCE_INLINE constexpr int SYSfloorLog2(double v)
312 {
313  if (v <= 0)
314  return 0x80000000;
315  SYS_FPRealUnionD fori(v);
316  int e = (fori.uval >> SYS_FPRealUnionD::MANTISSA_BITS) - 1023;
317  // NOTE: Not quite right for denormal numbers or infinities.
318  return e;
319 }
320 static SYS_FORCE_INLINE constexpr int SYSroundLog2(float v)
321 {
322  if (v <= 0)
323  return 0x80000000;
324  SYS_FPRealUnionF fori(v);
325  int e = (fori.uval >> SYS_FPRealUnionF::MANTISSA_BITS) - 127;
326  // NOTE: Not quite right for denormal numbers or infinities.
327  // Magic number 3474676 is ceil((2^0.5 - 1)*(2^23))
328  if ((fori.uval & ((1<<SYS_FPRealUnionF::MANTISSA_BITS)-1)) >= 3474676)
329  ++e;
330  return e;
331 }
332 static SYS_FORCE_INLINE constexpr int SYSroundLog2(double v)
333 {
334  if (v <= 0)
335  return 0x80000000;
336  SYS_FPRealUnionD fori(v);
337  int e = (fori.uval >> SYS_FPRealUnionD::MANTISSA_BITS) - 1023;
338  // NOTE: Not quite right for denormal numbers or infinities.
339  // Magic number 1865452045155277 is ceil((2^0.5 - 1)*(2^52))
340  if ((fori.uval & ((1ULL<<SYS_FPRealUnionD::MANTISSA_BITS)-1)) >= 1865452045155277ULL)
341  ++e;
342  return e;
343 }
344 static SYS_FORCE_INLINE constexpr int SYSceilLog2(float v)
345 {
346  if (v <= 0)
347  return 0x80000000;
348  SYS_FPRealUnionF fori(v);
349  int e = (fori.uval >> SYS_FPRealUnionF::MANTISSA_BITS) - 127;
350  // NOTE: Not quite right for denormal numbers or infinities.
351  if ((fori.uval & ((1<<SYS_FPRealUnionF::MANTISSA_BITS)-1)) != 0)
352  ++e;
353  return e;
354 }
355 static SYS_FORCE_INLINE constexpr int SYSceilLog2(double v)
356 {
357  if (v <= 0)
358  return 0x80000000;
359  SYS_FPRealUnionD fori(v);
360  int e = (fori.uval >> SYS_FPRealUnionD::MANTISSA_BITS) - 1023;
361  // NOTE: Not quite right for denormal numbers or infinities.
362  if ((fori.uval & ((1ULL<<SYS_FPRealUnionD::MANTISSA_BITS)-1)) != 0)
363  ++e;
364  return e;
365 }
366 
367 static SYS_FORCE_INLINE constexpr float SYSintExp2AsFloat(int e)
368 {
369  // Avoid include for SYSclamp by doing it manually
370  if (e < -127)
371  e = -127;
372  else if (e > 128)
373  e = 128;
375  return fori.fval;
376 }
377 
378 static SYS_FORCE_INLINE constexpr double SYSintExp2AsDouble(int e)
379 {
380  // Avoid include for SYSclamp by doing it manually
381  if (e < -1023)
382  e = -1023;
383  else if (e > 1024)
384  e = 1024;
386  return fori.fval;
387 }
388 
389 /// Reverse the bits in a 8 bit byte
390 static SYS_FORCE_INLINE constexpr uint8
391 SYSreverseBits(uint8 n)
392 {
393 #if 0
394  n = ((n >> 1) & 0x55) | ((n << 1) & 0xaa);
395  n = ((n >> 2) & 0x33) | ((n << 2) & 0xcc);
396  n = ((n >> 4) & 0x0f) | ((n << 4) & 0xf0);
397  return n;
398 #else
399  // Table driven method is about 2x faster than shifting
400  #define R2(n) n, n + 2*64, n + 1*64, n + 3*64
401  #define R4(n) R2(n), R2(n + 2*16), R2(n + 1*16), R2(n + 3*16)
402  #define R6(n) R4(n), R4(n + 2*4 ), R4(n + 1*4 ), R4(n + 3*4 )
403  constexpr uint8 table[256] = { R6(0), R6(2), R6(1), R6(3) };
404  #undef R2
405  #undef R4
406  #undef R6
407  return table[n];
408 #endif
409 }
410 
411 /// Reverse the bits in a 16 bit word
412 static SYS_FORCE_INLINE constexpr uint16
413 SYSreverseBits(uint16 n)
414 {
415  n = ((n >> 1) & 0x5555) | ((n << 1) & 0xaaaa);
416  n = ((n >> 2) & 0x3333) | ((n << 2) & 0xcccc);
417  n = ((n >> 4) & 0x0f0f) | ((n << 4) & 0xf0f0);
418  n = ((n >> 8) & 0x00ff) | ((n << 8) & 0xff00);
419  return n;
420 }
421 
422 /// Reverse the bits in a 32 bit word
423 static SYS_FORCE_INLINE constexpr uint32
424 SYSreverseBits(uint32 n)
425 {
426  n = ((n >> 1) & 0x55555555) | ((n << 1) & 0xaaaaaaaa);
427  n = ((n >> 2) & 0x33333333) | ((n << 2) & 0xcccccccc);
428  n = ((n >> 4) & 0x0f0f0f0f) | ((n << 4) & 0xf0f0f0f0);
429  n = ((n >> 8) & 0x00ff00ff) | ((n << 8) & 0xff00ff00);
430  n = ((n >> 16) & 0x0000ffff) | ((n << 16) & 0xffff0000);
431  return n;
432 }
433 
434 /// Reverse the bits in a 32 bit word
435 static SYS_FORCE_INLINE constexpr uint64
436 SYSreverseBits(uint64 n)
437 {
438  n = ((n >> 1) & 0x5555555555555555) | ((n << 1) & 0xaaaaaaaaaaaaaaaa);
439  n = ((n >> 2) & 0x3333333333333333) | ((n << 2) & 0xcccccccccccccccc);
440  n = ((n >> 4) & 0x0f0f0f0f0f0f0f0f) | ((n << 4) & 0xf0f0f0f0f0f0f0f0);
441  n = ((n >> 8) & 0x00ff00ff00ff00ff) | ((n << 8) & 0xff00ff00ff00ff00);
442  n = ((n >> 16) & 0x0000ffff0000ffff) | ((n << 16) & 0xffff0000ffff0000);
443  n = ((n >> 32) & 0x00000000ffffffff) | ((n << 32) & 0xffffffff00000000);
444  return n;
445 }
446 
447 #endif
unsigned short uint16
Definition: SYS_Types.h:38
const GLfloat * c
Definition: glew.h:16631
unsigned long long uint64
Definition: SYS_Types.h:117
unsigned char uint8
Definition: SYS_Types.h:36
GLfloat GLfloat GLfloat v2
Definition: glcorearb.h:817
const GLdouble * v
Definition: glcorearb.h:836
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
long long int64
Definition: SYS_Types.h:116
typedef int(WINAPI *PFNWGLRELEASEPBUFFERDCARBPROC)(HPBUFFERARB hPbuffer
#define R6(n)
GLdouble n
Definition: glcorearb.h:2007
unsigned int uint32
Definition: SYS_Types.h:40
unsigned int uint
Definition: SYS_Types.h:45
GLenum GLsizei GLenum GLenum const void * table
Definition: glew.h:4970