11 namespace onnxruntime_float16 {
20 #elif defined(__GNUC__) || defined(__clang__)
21 little = __ORDER_LITTLE_ENDIAN__,
22 big = __ORDER_BIG_ENDIAN__,
25 #error onnxruntime_float16::detail::endian is not implemented in this environment.
31 "Only little-endian or big-endian native byte orders are supported.");
38 template <
class Derived>
91 return static_cast<int16_t
>(
val) < 0;
169 Derived
Abs() const noexcept {
return Derived::FromBits(
AbsImpl()); }
186 return static_cast<uint16_t
>((lhs.val | rhs.val) & ~
kSignMask) == 0;
190 if (
IsNaN() || rhs.IsNaN()) {
194 return val == rhs.val;
200 if (
IsNaN() || rhs.IsNaN()) {
206 if (left_is_negative != rhs.IsNegative()) {
210 return left_is_negative && !
AreZero(*
this, rhs);
212 return (
val != rhs.val) && ((
val < rhs.val) ^ left_is_negative);
245 template <
class Derived>
253 constexpr
unsigned int sign_mask = 0x80000000u;
254 uint16_t
val =
static_cast<uint16_t
>(0x0u);
256 unsigned int sign =
f.u & sign_mask;
264 if (
f.u >= f16max.u) {
265 val = (
f.u > f32infty.u) ? 0x7e00 : 0x7c00;
267 if (
f.u < (113 << 23)) {
271 f.f += denorm_magic.f;
274 val =
static_cast<uint16_t
>(
f.u - denorm_magic.u);
276 unsigned int mant_odd = (
f.u >> 13) & 1;
285 val =
static_cast<uint16_t
>(
f.u >> 13);
289 val |=
static_cast<uint16_t
>(sign >> 16);
293 template <
class Derived>
296 constexpr
unsigned int shifted_exp = 0x7c00 << 13;
299 o.
u = (
val & 0x7fff) << 13;
300 unsigned int exp = shifted_exp & o.u;
301 o.u += (127 - 15) << 23;
304 if (exp == shifted_exp) {
305 o.u += (128 - 16) << 23;
306 }
else if (exp == 0) {
313 #if (defined _MSC_VER) && (defined _M_ARM || defined _M_ARM64 || defined _M_ARM64EC)
319 o.u |= (
val & 0x8000U) << 16U;
325 template <
class Derived>
379 return static_cast<int16_t
>(
val) < 0;
457 Derived
Abs() const noexcept {
return Derived::FromBits(
AbsImpl()); }
477 return static_cast<uint16_t
>((lhs.val | rhs.val) & ~
kSignMask) == 0;
481 template <
class Derived>
485 result = kPositiveQNaNBits;
487 auto get_msb_half = [](
float fl) {
489 #ifdef __cpp_if_constexpr
490 if constexpr (detail::endian::native == detail::endian::little) {
492 if (detail::endian::native == detail::endian::little) {
494 std::memcpy(&result, reinterpret_cast<char*>(&fl) +
sizeof(uint16_t),
sizeof(uint16_t));
496 std::memcpy(&result, &fl,
sizeof(uint16_t));
501 uint16_t upper_bits = get_msb_half(
v);
507 U32 += (upper_bits & 1) + kRoundToNearest;
508 result = get_msb_half(F32);
513 template <
class Derived>
516 return std::numeric_limits<float>::quiet_NaN();
519 char*
const first =
reinterpret_cast<char*
>(&
result);
520 char*
const second = first +
sizeof(uint16_t);
521 #ifdef __cpp_if_constexpr
522 if constexpr (detail::endian::native == detail::endian::little) {
524 if (detail::endian::native == detail::endian::little) {
526 std::memset(first, 0,
sizeof(uint16_t));
527 std::memcpy(second, &
val,
sizeof(uint16_t));
529 std::memcpy(first, &
val,
sizeof(uint16_t));
530 std::memset(second, 0,
sizeof(uint16_t));
static constexpr uint16_t kOneBits
bool operator<(const Float16Impl &rhs) const noexcept
bool IsNaN() const noexcept
Tests if the value is NaN
Derived Abs() const noexcept
Creates an instance that represents absolute value.
Derived Negate() const noexcept
Creates a new instance with the sign flipped.
static constexpr uint16_t kOneBits
bool IsSubnormal() const noexcept
Tests if the value is subnormal (denormal).
static constexpr uint16_t kPositiveQNaNBits
bool operator!=(const Float16Impl &rhs) const noexcept
static constexpr uint16_t kNegativeInfinityBits
bool operator==(const Float16Impl &rhs) const noexcept
float ToFloatImpl() const noexcept
Converts bfloat16 to float
static bool AreZero(const Float16Impl &lhs, const Float16Impl &rhs) noexcept
IEEE defines that positive and negative zero are equal, this gives us a quick equality check for two ...
**But if you need a result
bool IsNegative() const noexcept
Checks if the value is negative
static constexpr uint16_t kMaxValueBits
uint16_t NegateImpl() const noexcept
Creates a new instance with the sign flipped.
static constexpr uint16_t ToUint16Impl(float v) noexcept
Converts from float to uint16_t float16 representation
bool IsFinite() const noexcept
Tests if the value is finite
static constexpr uint16_t kBiasedExponentMask
static constexpr uint16_t kNegativeQNaNBits
bool IsPositiveInfinity() const noexcept
Tests if the value represents positive infinity.
bool IsNormal() const noexcept
Tests if the value is normal (not zero, subnormal, infinite, or NaN).
bool IsFinite() const noexcept
Tests if the value is finite
Shared implementation between public and internal classes. CRTP pattern.
bool IsNegative() const noexcept
Checks if the value is negative
static constexpr uint16_t kBiasedExponentMask
static constexpr uint16_t kNegativeInfinityBits
static constexpr uint16_t kMinusOneBits
bool IsNegativeInfinity() const noexcept
Tests if the value represents negative infinity
float ToFloatImpl() const noexcept
Converts float16 to float
bool IsPositiveInfinity() const noexcept
Tests if the value represents positive infinity.
static constexpr uint16_t kPositiveQNaNBits
static constexpr uint16_t kMinusOneBits
IMATH_HOSTDEVICE constexpr int sign(T a) IMATH_NOEXCEPT
static constexpr uint16_t kSignMask
Derived Negate() const noexcept
Creates a new instance with the sign flipped.
uint16_t AbsImpl() const noexcept
Creates an instance that represents absolute value.
bool IsNaNOrZero() const noexcept
Tests if the value is NaN or zero. Useful for comparisons.
static constexpr uint16_t kNegativeQNaNBits
bool IsInfinity() const noexcept
Tests if the value is either positive or negative infinity.
Derived Abs() const noexcept
Creates an instance that represents absolute value.
bool IsSubnormal() const noexcept
Tests if the value is subnormal (denormal).
static constexpr uint16_t kPositiveInfinityBits
static constexpr uint16_t kMaxValueBits
static constexpr uint16_t kPositiveInfinityBits
bool IsNaN() const noexcept
Tests if the value is NaN
static constexpr uint16_t kRoundToNearest
static constexpr uint16_t kSignMask
bool IsNormal() const noexcept
Tests if the value is normal (not zero, subnormal, infinite, or NaN).
uint16_t AbsImpl() const noexcept
Creates an instance that represents absolute value.
bool IsNaNOrZero() const noexcept
Tests if the value is NaN or zero. Useful for comparisons.
static bool AreZero(const BFloat16Impl &lhs, const BFloat16Impl &rhs) noexcept
IEEE defines that positive and negative zero are equal, this gives us a quick equality check for two ...
IMATH_INTERNAL_NAMESPACE_HEADER_ENTER IMATH_HOSTDEVICE constexpr T abs(T a) IMATH_NOEXCEPT
uint16_t NegateImpl() const noexcept
Creates a new instance with the sign flipped.
bool IsInfinity() const noexcept
Tests if the value is either positive or negative infinity.
Shared implementation between public and internal classes. CRTP pattern.
static uint16_t ToUint16Impl(float v) noexcept
Converts from float to uint16_t float16 representation
bool IsNegativeInfinity() const noexcept
Tests if the value represents negative infinity