HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
fpreal16.h
Go to the documentation of this file.
1 ///////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
4 // Digital Ltd. LLC
5 //
6 // All rights reserved.
7 //
8 // Redistribution and use in source and binary forms, with or without
9 // modification, are permitted provided that the following conditions are
10 // met:
11 // * Redistributions of source code must retain the above copyright
12 // notice, this list of conditions and the following disclaimer.
13 // * Redistributions in binary form must reproduce the above
14 // copyright notice, this list of conditions and the following disclaimer
15 // in the documentation and/or other materials provided with the
16 // distribution.
17 // * Neither the name of Industrial Light & Magic nor the names of
18 // its contributors may be used to endorse or promote products derived
19 // from this software without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 //
33 ///////////////////////////////////////////////////////////////////////////
34 
35 // Primary authors:
36 // Florian Kainz <kainz@ilm.com>
37 // Rod Bogart <rgb@ilm.com>
38 
39 //---------------------------------------------------------------------------
40 //
41 // fpreal16 -- a 16-bit floating point number class:
42 //
43 // Type fpreal16 can represent positive and negative numbers, whose
44 // magnitude is between roughly 6.1e-5 and 6.5e+4, with a relative
45 // error of 9.8e-4; numbers smaller than 6.1e-5 can be represented
46 // with an absolute error of 6.0e-8. All integers from -2048 to
47 // +2048 can be represented exactly.
48 //
49 // Type fpreal16 behaves (almost) like the built-in C++ floating point
50 // types. In arithmetic expressions, fpreal16, float and double can be
51 // mixed freely. Here are a few examples:
52 //
53 // fpreal16 a (3.5);
54 // float b (a + sqrt (a));
55 // a += b;
56 // b += a;
57 // b = a + 7;
58 //
59 // Conversions from fpreal16 to float are lossless; all fpreal16 numbers
60 // are exactly representable as floats.
61 //
62 // Conversions from float to fpreal16 may not preserve the float's
63 // value exactly. If a float is not representable as a fpreal16, the
64 // float value is rounded to the nearest representable fpreal16. If
65 // a float value is exactly in the middle between the two closest
66 // representable fpreal16 values, then the float value is rounded to
67 // the fpreal16 with the greater magnitude.
68 //
69 // Overflows during float-to-fpreal16 conversions cause arithmetic
70 // exceptions. An overflow occurs when the float value to be
71 // converted is too large to be represented as a fpreal16, or if the
72 // float value is an infinity or a NAN.
73 //
74 // The implementation of type fpreal16 makes the following assumptions
75 // about the implementation of the built-in C++ types:
76 //
77 // float is an IEEE 754 single-precision number
78 // sizeof (float) == 4
79 // sizeof (unsigned int) == sizeof (float)
80 // alignof (unsigned int) == alignof (float)
81 // sizeof (unsigned short) == 2
82 //
83 //---------------------------------------------------------------------------
84 
85 #ifndef _H_REAL16_H_
86 #define _H_REAL16_H_
87 
88 #include "SYS_API.h"
89 #ifndef __SYS_Types__
90 #error This file must be included from SYS_Types.h
91 #endif
92 
93 #include "SYS_Inline.h"
94 #include "SYS_TypeDecorate.h"
95 #include <iosfwd>
96 
97 
99 {
100  public:
101 
102  //-------------
103  // Constructors
104  //-------------
105 
106  fpreal16() = default; // no initialization
107  fpreal16(const fpreal16 &h) = default;
109 
110 
111  //--------------------
112  // Conversion to float
113  //--------------------
114 
115  operator fpreal32() const;
116 
117 
118  //------------
119  // Unary minus
120  //------------
121 
122  fpreal16 operator-() const;
123 
124 
125  //-----------
126  // Assignment
127  //-----------
128 
129  fpreal16 &operator=(const fpreal16 &h) = default;
130  fpreal16 operator=(fpreal32 f);
131 
134 
137 
140 
143 
144 
145  //---------------------------------------------------------
146  // Round to n-bit precision (n should be between 0 and 10).
147  // After rounding, the significand's 10-n least significant
148  // bits will be zero.
149  //---------------------------------------------------------
150 
151  fpreal16 round(unsigned int n) const;
152 
153 
154  //--------------------------------------------------------------------
155  // Classification:
156  //
157  // h.isFinite() returns true if h is a normalized number,
158  // a denormalized number or zero
159  //
160  // h.isNormalized() returns true if h is a normalized number
161  //
162  // h.isDenormalized() returns true if h is a denormalized number
163  //
164  // h.isZero() returns true if h is zero
165  //
166  // h.isNan() returns true if h is a NAN
167  //
168  // h.isInfinity() returns true if h is a positive
169  // or a negative infinity
170  //
171  // h.isNegative() returns true if the sign bit of h
172  // is set (negative)
173  //--------------------------------------------------------------------
174 
175  bool isFinite() const;
176  bool isNormalized() const;
177  bool isDenormalized() const;
178  bool isZero() const;
179  bool isNan() const;
180  bool isInfinity() const;
181  bool isNegative() const;
182 
183 
184  //--------------------------------------------
185  // Special values
186  //
187  // posInf() returns +infinity
188  //
189  // negInf() returns +infinity
190  //
191  // qNan() returns a NAN with the bit
192  // pattern 0111111111111111
193  //
194  // sNan() returns a NAN with the bit
195  // pattern 0111110111111111
196  //--------------------------------------------
197 
198  static fpreal16 posInf();
199  static fpreal16 negInf();
200  static fpreal16 qNan();
201  static fpreal16 sNan();
202 
203 
204  //--------------------------------------
205  // Access to the internal representation
206  //--------------------------------------
207 
208  unsigned short bits() const;
209  void setBits(unsigned short bits);
210 
211 
212  static void ensureStaticDataIsInitialized();
213 
214  public:
215 
216  union uif
217  {
220  };
221 
222  private:
223 
224  static int16 convert(int i);
225  static fpreal32 overflow();
226 
227  unsigned short _h;
228 
229  protected:
230  static bool selftest();
231  static const uif *_toFloat;
232  static const unsigned short *_eLut;
233  static bool _itWorks;
234 };
235 
236 
237 //-----------
238 // Type Traits Compatibility
239 //-----------
240 
243 
244 
245 //-----------
246 // Stream I/O
247 //-----------
248 
249 SYS_API extern std::ostream & operator << (std::ostream &os, fpreal16 h);
250 SYS_API extern std::istream & operator >> (std::istream &is, fpreal16 &h);
251 
252 
253 //----------
254 // Debugging
255 //----------
256 
257 SYS_API extern void SYSprintBits (std::ostream &os, fpreal16 h);
258 SYS_API extern void SYSprintBits (std::ostream &os, fpreal32 f);
259 SYS_API extern void SYSprintBits (char c[19], fpreal16 h);
260 SYS_API extern void SYSprintBits (char c[35], fpreal32 f);
261 
262 
263 //-------
264 // Limits
265 //-------
266 
267 #define H_REAL16_MIN 5.96046448e-08 // Smallest +ve fpreal16
268 #define H_REAL16_NRM_MIN 6.10351562e-05 // Smallest +ve normalized fpreal16
269 
270 #define H_REAL16_MAX 65504.0 // Largest positive fpreal16
271 
272 // Smallest positive e for which fpreal16 (1.0 + e) != fpreal16 (1.0)
273 #define H_REAL16_EPSILON 0.00097656
274 
275 #define H_REAL16_MANT_DIG 11 // Number of digits in mantissa
276  // (significand + hidden leading 1)
277 
278 #define H_REAL16_DIG 2 // Number of base 10 digits that
279  // can be represented without change
280 
281 #define H_REAL16_RADIX 2 // Base of the exponent
282 
283 #define H_REAL16_MIN_EXP -13 // Minimum negative integer such that
284  // H_REAL16_RADIX raised to the power of
285  // one less than that integer is a
286  // normalized fpreal16
287 
288 #define H_REAL16_MAX_EXP 16 // Maximum positive integer such that
289  // H_REAL16_RADIX raised to the power of
290  // one less than that integer is a
291  // normalized fpreal16
292 
293 #define H_REAL16_MIN_10_EXP -4 // Minimum positive integer such
294  // that 10 raised to that power is
295  // a normalized fpreal16
296 
297 #define H_REAL16_MAX_10_EXP 4 // Maximum positive integer such
298  // that 10 raised to that power is
299  // a normalized fpreal16
300 
301 
302 //---------------------------------------------------------------------------
303 //
304 // Implementation --
305 //
306 // Representation of a float:
307 //
308 // We assume that a float, f, is an IEEE 754 single-precision
309 // floating point number, whose bits are arranged as follows:
310 //
311 // 31 (msb)
312 // |
313 // | 30 23
314 // | | |
315 // | | | 22 0 (lsb)
316 // | | | | |
317 // X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX
318 //
319 // s e m
320 //
321 // S is the sign-bit, e is the exponent and m is the significand.
322 //
323 // If e is between 1 and 254, f is a normalized number:
324 //
325 // s e-127
326 // f = (-1) * 2 * 1.m
327 //
328 // If e is 0, and m is not zero, f is a denormalized number:
329 //
330 // s -126
331 // f = (-1) * 2 * 0.m
332 //
333 // If e and m are both zero, f is zero:
334 //
335 // f = 0.0
336 //
337 // If e is 255, f is an "infinity" or "not a number" (NAN),
338 // depending on whether m is zero or not.
339 //
340 // Examples:
341 //
342 // 0 00000000 00000000000000000000000 = 0.0
343 // 0 01111110 00000000000000000000000 = 0.5
344 // 0 01111111 00000000000000000000000 = 1.0
345 // 0 10000000 00000000000000000000000 = 2.0
346 // 0 10000000 10000000000000000000000 = 3.0
347 // 1 10000101 11110000010000000000000 = -124.0625
348 // 0 11111111 00000000000000000000000 = +infinity
349 // 1 11111111 00000000000000000000000 = -infinity
350 // 0 11111111 10000000000000000000000 = NAN
351 // 1 11111111 11111111111111111111111 = NAN
352 //
353 // Representation of a fpreal16:
354 //
355 // Here is the bit-layout for a fpreal16 number, h:
356 //
357 // 15 (msb)
358 // |
359 // | 14 10
360 // | | |
361 // | | | 9 0 (lsb)
362 // | | | | |
363 // X XXXXX XXXXXXXXXX
364 //
365 // s e m
366 //
367 // S is the sign-bit, e is the exponent and m is the significand.
368 //
369 // If e is between 1 and 30, h is a normalized number:
370 //
371 // s e-15
372 // h = (-1) * 2 * 1.m
373 //
374 // If e is 0, and m is not zero, h is a denormalized number:
375 //
376 // S -14
377 // h = (-1) * 2 * 0.m
378 //
379 // If e and m are both zero, h is zero:
380 //
381 // h = 0.0
382 //
383 // If e is 31, h is an "infinity" or "not a number" (NAN),
384 // depending on whether m is zero or not.
385 //
386 // Examples:
387 //
388 // 0 00000 0000000000 = 0.0
389 // 0 01110 0000000000 = 0.5
390 // 0 01111 0000000000 = 1.0
391 // 0 10000 0000000000 = 2.0
392 // 0 10000 1000000000 = 3.0
393 // 1 10101 1111000001 = -124.0625
394 // 0 11111 0000000000 = +infinity
395 // 1 11111 0000000000 = -infinity
396 // 0 11111 1000000000 = NAN
397 // 1 11111 1111111111 = NAN
398 //
399 // Conversion:
400 //
401 // Converting from a float to a fpreal16 requires some non-trivial bit
402 // manipulations. In some cases, this makes conversion relatively
403 // slow, but the most common case is accelerated via table lookups.
404 //
405 // Converting back from a fpreal16 to a float is easier because we don't
406 // have to do any rounding. In addition, there are only 65536
407 // different fpreal16 numbers; we can convert each of those numbers once
408 // and store the results in a table. Later, all conversions can be
409 // done using only simple table lookups.
410 //
411 //---------------------------------------------------------------------------
412 
413 
414 //----------------------------
415 // fpreal16-from-float constructor
416 //----------------------------
417 
420 {
421  if (f == 0)
422  {
423  //
424  // Common special case - zero.
425  // For speed, we don't preserve the zero's sign.
426  //
427 
428  _h = 0;
429  }
430  else
431  {
432  //
433  // We extract the combined sign and exponent, e, from our
434  // floating-point number, f. Then we convert e to the sign
435  // and exponent of the fpreal16 number via a table lookup.
436  //
437  // For the most common case, where a normalized fpreal16 is produced,
438  // the table lookup returns a non-zero value; in this case, all
439  // we have to do, is round f's significand to 10 bits and combine
440  // the result with e.
441  //
442  // For all other cases (overflow, zeroes, denormalized numbers
443  // resulting from underflow, infinities and NANs), the table
444  // lookup returns zero, and we call a longer, non-inline function
445  // to do the float-to-fpreal16 conversion.
446  //
447 
448  uif x;
449 
450  x.f = f;
451 
452  int e = (x.i >> 23) & 0x000001ff;
453 
454  e = _eLut[e];
455 
456  if (e)
457  {
458  //
459  // Simple case - round the significand and
460  // combine it with the sign and exponent.
461  //
462 
463  _h = e + (((x.i & 0x007fffff) + 0x00001000) >> 13);
464  }
465  else
466  {
467  //
468  // Difficult case - call a function.
469  //
470 
471  _h = convert (x.i);
472  }
473  }
474 }
475 
476 
477 //------------------------------------------
478 // fpreal16-to-float conversion via table lookup
479 //------------------------------------------
480 
482 fpreal16::operator fpreal32() const
483 {
484  return _toFloat[_h].f;
485 }
486 
487 //-------------------------
488 // Round to n-bit precision
489 //-------------------------
490 
491 inline fpreal16
492 fpreal16::round(unsigned int n) const
493 {
494  //
495  // Parameter check.
496  //
497 
498  if (n >= 10)
499  return *this;
500 
501  //
502  // Disassemble h into the sign, s,
503  // and the combined exponent and significand, e.
504  //
505 
506  unsigned short s = _h & 0x8000;
507  unsigned short e = _h & 0x7fff;
508 
509  //
510  // Round the exponent and significand to the nearest value
511  // where ones occur only in the (10-n) most significant bits.
512  // Note that the exponent adjusts automatically if rounding
513  // up causes the significand to overflow.
514  //
515 
516  e >>= 9 - n;
517  e += e & 1;
518  e <<= 9 - n;
519 
520  //
521  // Check for exponent overflow.
522  //
523 
524  if (e >= 0x7c00)
525  {
526  //
527  // Overflow occurred -- truncate instead of rounding.
528  //
529 
530  e = _h;
531  e >>= 10 - n;
532  e <<= 10 - n;
533  }
534 
535  //
536  // Put the original sign bit back.
537  //
538 
539  fpreal16 h;
540  h._h = s | e;
541 
542  return h;
543 }
544 
545 
546 //-----------------------
547 // Other inline functions
548 //-----------------------
549 
552 {
553  fpreal16 h;
554  h._h = _h ^ 0x8000;
555  return h;
556 }
557 
558 
561 {
562  *this = fpreal16 (f);
563  return *this;
564 }
565 
566 
567 inline fpreal16
569 {
570  *this = fpreal16 (fpreal32(*this) + fpreal32(h));
571  return *this;
572 }
573 
574 
575 inline fpreal16
577 {
578  *this = fpreal16 (fpreal32(*this) + f);
579  return *this;
580 }
581 
582 
583 inline fpreal16
585 {
586  *this = fpreal16 (fpreal32(*this) - fpreal32(h));
587  return *this;
588 }
589 
590 
591 inline fpreal16
593 {
594  *this = fpreal16 (fpreal32 (*this) - f);
595  return *this;
596 }
597 
598 
599 inline fpreal16
601 {
602  *this = fpreal16 (fpreal32 (*this) * fpreal32 (h));
603  return *this;
604 }
605 
606 
607 inline fpreal16
609 {
610  *this = fpreal16 (fpreal32 (*this) * f);
611  return *this;
612 }
613 
614 
615 inline fpreal16
617 {
618  *this = fpreal16 (fpreal32 (*this) / fpreal32 (h));
619  return *this;
620 }
621 
622 
623 inline fpreal16
625 {
626  *this = fpreal16 (fpreal32 (*this) / f);
627  return *this;
628 }
629 
630 
631 SYS_FORCE_INLINE bool
633 {
634  unsigned short e = (_h >> 10) & 0x001f;
635  return e < 31;
636 }
637 
638 
639 SYS_FORCE_INLINE bool
641 {
642  unsigned short e = (_h >> 10) & 0x001f;
643  return e > 0 && e < 31;
644 }
645 
646 
647 SYS_FORCE_INLINE bool
649 {
650  unsigned short e = (_h >> 10) & 0x001f;
651  unsigned short m = _h & 0x3ff;
652  return e == 0 && m != 0;
653 }
654 
655 
656 SYS_FORCE_INLINE bool
658 {
659  return (_h & 0x7fff) == 0;
660 }
661 
662 
663 SYS_FORCE_INLINE bool
665 {
666  unsigned short e = (_h >> 10) & 0x001f;
667  unsigned short m = _h & 0x3ff;
668  return e == 31 && m != 0;
669 }
670 
671 
672 SYS_FORCE_INLINE bool
674 {
675  unsigned short e = (_h >> 10) & 0x001f;
676  unsigned short m = _h & 0x3ff;
677  return e == 31 && m == 0;
678 }
679 
680 
681 SYS_FORCE_INLINE bool
683 {
684  return (_h & 0x8000) != 0;
685 }
686 
687 
690 {
691  fpreal16 h;
692  h._h = 0x7c00;
693  return h;
694 }
695 
696 
699 {
700  fpreal16 h;
701  h._h = 0xfc00;
702  return h;
703 }
704 
705 
708 {
709  fpreal16 h;
710  h._h = 0x7fff;
711  return h;
712 }
713 
714 
717 {
718  fpreal16 h;
719  h._h = 0x7dff;
720  return h;
721 }
722 
723 
724 SYS_FORCE_INLINE unsigned short
726 {
727  return _h;
728 }
729 
730 
731 SYS_FORCE_INLINE void
732 fpreal16::setBits(unsigned short b)
733 {
734  _h = b;
735 }
736 
737 // namespace std overloads
738 namespace std
739 {
740 // gcc defines these as macros in <math.h>
741 #pragma push_macro("isnormal")
742 #pragma push_macro("isfinite")
743 #pragma push_macro("isinf")
744 #pragma push_macro("isnan")
745 #undef isnormal
746 #undef isfinite
747 #undef isinf
748 #undef isnan
749 
750 static SYS_FORCE_INLINE bool isnormal(fpreal16 v) { return v.isNormalized(); }
751 static SYS_FORCE_INLINE bool isfinite(fpreal16 v) { return v.isFinite(); }
752 static SYS_FORCE_INLINE bool isinf(fpreal16 v) { return v.isInfinity(); }
753 static SYS_FORCE_INLINE bool isnan(fpreal16 v) { return v.isNan(); }
754 #pragma pop_macro("isnormal")
755 #pragma pop_macro("isfinite")
756 #pragma pop_macro("isinf")
757 #pragma pop_macro("isnan")
758 }
759 
760 #endif
GLboolean GLboolean GLboolean b
Definition: glcorearb.h:1222
static fpreal16 negInf()
Definition: fpreal16.h:698
OIIO_FORCEINLINE const vint4 & operator/=(vint4 &a, const vint4 &b)
Definition: simd.h:4438
SYS_API void SYSprintBits(std::ostream &os, fpreal16 h)
bool isZero() const
Definition: fpreal16.h:657
const GLfloat * c
Definition: glew.h:16631
static const uif * _toFloat
Definition: fpreal16.h:231
IMATH_HOSTDEVICE constexpr Plane3< T > operator-(const Plane3< T > &plane) IMATH_NOEXCEPT
Reflect the pla.
Definition: ImathPlane.h:253
fpreal16 operator-=(fpreal16 h)
Definition: fpreal16.h:584
fpreal16 operator-() const
Definition: fpreal16.h:551
Tto convert(const Tfrom &source)
fpreal16 operator/=(fpreal16 h)
Definition: fpreal16.h:616
float fpreal32
Definition: SYS_Types.h:200
bool isNegative(const Type &x)
Return true if x is less than zero.
Definition: Math.h:368
uint32 i
Definition: fpreal16.h:218
GLint GLenum GLint x
Definition: glcorearb.h:409
fpreal16 operator*=(fpreal16 h)
Definition: fpreal16.h:600
bool isNegative() const
Definition: fpreal16.h:682
static fpreal16 qNan()
Definition: fpreal16.h:707
OIIO_FORCEINLINE const vint4 & operator+=(vint4 &a, const vint4 &b)
Definition: simd.h:4369
#define SYS_DECLARE_IS_FLOATING_POINT(T)
Declare a type as floating point.
fpreal32 f
Definition: fpreal16.h:219
fpreal16 operator+=(fpreal16 h)
Definition: fpreal16.h:568
const GLdouble * v
Definition: glcorearb.h:837
fpreal16()=default
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
#define SYS_DECLARE_IS_POD(T)
Declare a type as POD.
bool isNan(const float x)
Return true if x is a NaN (Not-A-Number) value.
Definition: Math.h:396
bool isFinite() const
Definition: fpreal16.h:632
static bool _itWorks
Definition: fpreal16.h:233
vfloat4 round(const vfloat4 &a)
Definition: simd.h:7436
bool isInfinity() const
Definition: fpreal16.h:673
IMATH_HOSTDEVICE const Vec2< S > & operator*=(Vec2< S > &v, const Matrix22< T > &m) IMATH_NOEXCEPT
Vector-matrix multiplication: v *= m.
Definition: ImathMatrix.h:4660
static fpreal16 sNan()
Definition: fpreal16.h:716
GLdouble n
Definition: glcorearb.h:2008
GLfloat GLfloat GLfloat GLfloat h
Definition: glcorearb.h:2002
void setBits(unsigned short bits)
Definition: fpreal16.h:732
short int16
Definition: SYS_Types.h:37
fpreal16 round(unsigned int n) const
Definition: fpreal16.h:492
static fpreal16 posInf()
Definition: fpreal16.h:689
bool isNan() const
Definition: fpreal16.h:664
const GLdouble * m
Definition: glew.h:9166
GLfloat f
Definition: glcorearb.h:1926
unsigned int uint32
Definition: SYS_Types.h:40
fpreal16 & operator=(const fpreal16 &h)=default
OIIO_FORCEINLINE const vint4 & operator-=(vint4 &a, const vint4 &b)
Definition: simd.h:4392
#define SYS_API
Definition: SYS_API.h:11
unsigned short bits() const
Definition: fpreal16.h:725
static const unsigned short * _eLut
Definition: fpreal16.h:232
GLdouble s
Definition: glew.h:1395
bool isZero(const Type &x)
Return true if x is exactly equal to zero.
Definition: Math.h:338
bool isNormalized() const
Definition: fpreal16.h:640
bool isFinite(const float x)
Return true if x is finite.
Definition: Math.h:376
bool isDenormalized() const
Definition: fpreal16.h:648