8 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
12 #if !defined(__CUDACC__) && !defined(__HIPCC__)
20 namespace onnxruntime {
22 #if defined(__CUDACC__) || defined(__HIPCC__)
23 #define ORT_HOST_DEVICE __host__ __device__
25 #define ORT_HOST_DEVICE
81 using Base::operator==;
82 using Base::operator!=;
83 using Base::operator<;
105 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800
106 val = __bfloat16_as_ushort(__float2bfloat16(
v));
107 #elif defined(__HIP__)
110 val = UINT16_C(0x7FC0);
118 uint32_t rounding_bias = ((U32 >> 16) & 1) + UINT32_C(0x7FFF);
119 val =
static_cast<uint16_t
>((U32 + rounding_bias) >> 16);
127 auto get_msb_half = [](
float fl) {
129 if constexpr (onnxruntime_float16::detail::endian::native == onnxruntime_float16::detail::endian::little) {
130 std::memcpy(&result, reinterpret_cast<char*>(&fl) +
sizeof(uint16_t),
sizeof(uint16_t));
132 std::memcpy(&result, &fl,
sizeof(uint16_t));
137 uint16_t upper_bits = get_msb_half(
v);
144 val = get_msb_half(F32);
150 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
151 return __bfloat162float(*reinterpret_cast<const __nv_bfloat16*>(&
val));
152 #elif defined(__HIP__)
157 float* tempRes =
reinterpret_cast<float*
>(&tmp);
163 return std::numeric_limits<float>::quiet_NaN();
167 char*
const first =
reinterpret_cast<char*
>(&
result);
169 char*
const second = first +
sizeof(uint16_t);
170 std::memcpy(second, &
val,
sizeof(uint16_t));
172 std::memcpy(first, &
val,
sizeof(uint16_t));
211 #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
213 explicit ORT_HOST_DEVICE operator __nv_bfloat16()
const {
return *
reinterpret_cast<const __nv_bfloat16*
>(&
val); }
221 return val == rhs.val;
225 return !(*
this == rhs);
235 if (left_is_negative != rhs.IsNegativeHostDevice()) {
241 return (
val != rhs.val) && ((
val < rhs.val) ^ left_is_negative);
256 return static_cast<uint16_t
>((lhs.val | rhs.val) & ~
kSignMask) == 0;
263 #if !defined(__CUDACC__) && !defined(__HIPCC__)
264 inline MLFloat16 operator"" _f16(
unsigned long long int v) noexcept {
272 inline BFloat16 operator"" _b16(
unsigned long long int v) noexcept {
276 inline BFloat16 operator"" _bfp16(
long double v) noexcept {
292 for (; size != 0; ++
src, ++d, --
size) {
302 class numeric_limits<onnxruntime::MLFloat16> {
342 static constexpr
bool is_specialized =
true;
346 static constexpr
bool is_exact =
false;
347 static constexpr
bool has_infinity =
true;
348 static constexpr
bool has_quiet_NaN =
true;
349 static constexpr
bool has_signaling_NaN =
true;
350 static constexpr float_denorm_style has_denorm = denorm_present;
351 static constexpr
bool has_denorm_loss =
false;
353 static constexpr
bool is_bounded =
true;
354 static constexpr
bool is_iec559 =
true;
355 static constexpr
bool is_modulo =
false;
357 static constexpr
int digits = 11;
359 static constexpr
int max_digits10 = 5;
360 static constexpr
int radix = 2;
361 static constexpr
int min_exponent = -13;
362 static constexpr
int min_exponent10 = -4;
363 static constexpr
int max_exponent = 16;
364 static constexpr
int max_exponent10 = 4;
366 static constexpr
bool traps =
false;
367 static constexpr
bool tinyness_before =
false;
368 static constexpr std::float_round_style round_style = std::round_to_nearest;
372 class numeric_limits<onnxruntime::BFloat16> {
413 static constexpr
bool is_specialized =
true;
416 static constexpr
bool is_exact =
false;
417 static constexpr
bool has_infinity =
true;
418 static constexpr
bool has_quiet_NaN =
true;
419 static constexpr
bool has_signaling_NaN =
true;
420 static constexpr float_denorm_style has_denorm = denorm_present;
421 static constexpr
bool has_denorm_loss =
false;
423 static constexpr
bool is_bounded =
true;
424 static constexpr
bool is_iec559 =
false;
425 static constexpr
bool is_modulo =
false;
427 static constexpr
int digits = 8;
429 static constexpr
int max_digits10 = 4;
430 static constexpr
int radix = 2;
431 static constexpr
int min_exponent = -125;
432 static constexpr
int min_exponent10 = -37;
433 static constexpr
int max_exponent = 128;
434 static constexpr
int max_exponent10 = 38;
436 static constexpr
bool traps =
false;
437 static constexpr
bool tinyness_before =
false;
438 static constexpr float_round_style round_style = round_to_nearest;
bool IsNaN() const noexcept
Tests if the value is NaN
static constexpr onnxruntime::MLFloat16 infinity() noexcept
MLFloat16 Abs() const noexcept
Creates an instance that represents absolute value.
ORT_HOST_DEVICE bool operator<(const BFloat16 &rhs) const noexcept
bool_constant< is_integral< T >::value &&!std::is_same< T, bool >::value &&!std::is_same< T, char >::value &&!std::is_same< T, wchar_t >::value > is_integer
BFloat16 Negate() const noexcept
Creates a new instance with the sign flipped.
static const MLFloat16 NegativeInfinity
MLFloat16(float v) noexcept
bool IsSubnormal() const noexcept
Tests if the value is subnormal (denormal).
GLsizei const GLfloat * value
static constexpr onnxruntime::MLFloat16 denorm_min() noexcept
void BFloat16ToFloat(const BFloat16 *blf, float *flt, size_t size) noexcept
static const BFloat16 MinusOne
static constexpr onnxruntime::BFloat16 max() noexcept
static const BFloat16 NaN
**But if you need a result
void FloatToBFloat16(const float *flt, BFloat16 *blf, size_t size)
bool IsNegative() const noexcept
Checks if the value is negative
static constexpr MLFloat16 FromBits(uint16_t x) noexcept
ORT_HOST_DEVICE BFloat16(float v) noexcept
ORT_HOST_DEVICE bool IsNegativeHostDevice() const noexcept
static constexpr uint16_t ToUint16Impl(float v) noexcept
Converts from float to uint16_t float16 representation
static constexpr onnxruntime::BFloat16 quiet_NaN() noexcept
bool IsFinite() const noexcept
Tests if the value is finite
ORT_HOST_DEVICE bool operator!=(const BFloat16 &rhs) const noexcept
static constexpr onnxruntime::BFloat16 min() noexcept
static ORT_HOST_DEVICE bool AreZeroHostDevice(const BFloat16Impl &lhs, const BFloat16Impl &rhs) noexcept
bool IsPositiveInfinity() const noexcept
Tests if the value represents positive infinity.
static constexpr ORT_HOST_DEVICE FromBitsT FromBits() noexcept
bool IsNormal() const noexcept
Tests if the value is normal (not zero, subnormal, infinite, or NaN).
bool IsFinite() const noexcept
Tests if the value is finite
static constexpr onnxruntime::MLFloat16 lowest() noexcept
Shared implementation between public and internal classes. CRTP pattern.
bool IsNegative() const noexcept
Checks if the value is negative
ORT_HOST_DEVICE bool IsNaNHostDevice() const noexcept
static constexpr onnxruntime::MLFloat16 quiet_NaN() noexcept
ORT_HOST_DEVICE float ToFloat() const noexcept
ORT_HOST_DEVICE bool operator==(const BFloat16 &rhs) const noexcept
constexpr ORT_HOST_DEVICE BFloat16(unsigned short bits, FromBitsT) noexcept
bool IsNegativeInfinity() const noexcept
Tests if the value represents negative infinity
float ToFloatImpl() const noexcept
Converts float16 to float
static constexpr onnxruntime::MLFloat16 round_error() noexcept
bool IsPositiveInfinity() const noexcept
Tests if the value represents positive infinity.
static constexpr uint16_t kPositiveQNaNBits
static constexpr onnxruntime::MLFloat16 min() noexcept
static const BFloat16 Infinity
MLFloat16 Negate() const noexcept
Creates a new instance with the sign flipped.
static const BFloat16 MaxValue
static constexpr onnxruntime::BFloat16 denorm_min() noexcept
static const BFloat16 NegativeNaN
bool IsNaNOrZero() const noexcept
Tests if the value is NaN or zero. Useful for comparisons.
bool IsInfinity() const noexcept
Tests if the value is either positive or negative infinity.
static const MLFloat16 Infinity
static const MLFloat16 NegativeNaN
BFloat16 Abs() const noexcept
Creates an instance that represents absolute value.
bool IsSubnormal() const noexcept
Tests if the value is subnormal (denormal).
static constexpr onnxruntime::BFloat16 round_error() noexcept
static constexpr onnxruntime::BFloat16 signaling_NaN() noexcept
static constexpr onnxruntime::MLFloat16 epsilon() noexcept
static const MLFloat16 Zero
std::integral_constant< bool, std::numeric_limits< T >::is_signed||std::is_same< T, int128_opt >::value > is_signed
IMATH_NAMESPACE::V2f IMATH_NAMESPACE::Box2i std::string this attribute is obsolete as of OpenEXR v3 float
static constexpr uint16_t kPositiveInfinityBits
bool IsNaN() const noexcept
Tests if the value is NaN
static const BFloat16 One
static const MLFloat16 MinusOne
static constexpr onnxruntime::BFloat16 infinity() noexcept
static const BFloat16 NegativeInfinity
static constexpr uint16_t kRoundToNearest
static constexpr uint16_t kSignMask
static constexpr onnxruntime::MLFloat16 signaling_NaN() noexcept
static const MLFloat16 One
static const MLFloat16 MaxValue
bool IsNormal() const noexcept
Tests if the value is normal (not zero, subnormal, infinite, or NaN).
bool IsNaNOrZero() const noexcept
Tests if the value is NaN or zero. Useful for comparisons.
float ToFloat() const noexcept
static constexpr ORT_HOST_DEVICE BFloat16 FromBits(uint16_t bits) noexcept
static constexpr onnxruntime::BFloat16 epsilon() noexcept
static const MLFloat16 NaN
static constexpr onnxruntime::BFloat16 lowest() noexcept
bool IsInfinity() const noexcept
Tests if the value is either positive or negative infinity.
Shared implementation between public and internal classes. CRTP pattern.
static constexpr onnxruntime::MLFloat16 max() noexcept
bool IsNegativeInfinity() const noexcept
Tests if the value represents negative infinity
static const BFloat16 Zero