docs/hdk/_v_m___a_v_x_func_8h_source.html

 /*

  * PROPRIETARY INFORMATION.  This software is proprietary to

  * Side Effects Software Inc., and is not to be reproduced,

  * transmitted, or disclosed in any way without written permission.

  *

  * NAME:        VM_AVXFunc.h ( VM Library, C++)

  *

  * COMMENTS:

  */


 #ifndef __VM_AVXFunc__

 #define __VM_AVXFunc__


 #include "VM_API.h"

 #include <SYS/SYS_Align.h>

 #include <SYS/SYS_Types.h>


 #define CPU_HAS_AVX_INSTR       1

 #define VM_AVX_STYLE            1


 #include <immintrin.h>

 typedef __m256  v8sf;

 typedef __m256i v8si;


 // Plain casting (no conversion)

 // MSVC has problems casting between __m128 and __m128i, so we implement a

 // custom casting routine specifically for windows.


 #if defined(_MSC_VER)


 static SYS_FORCE_INLINE v8sf

 vm_v8sf(const v8si &a)

 {

     union {

         v8si ival;

         v8sf fval;

     };

     ival = a;

     return fval;

 }


 static SYS_FORCE_INLINE v8si

 vm_v8si(const v8sf &a)

 {

     union {

         v8si ival;

         v8sf fval;

     };

     fval = a;

     return ival;

 }


 #define V8SF(A)         vm_v8sf(A)

 #define V8SI(A)         vm_v8si(A)


 #else


 #define V8SF(A)         (v8sf)A

 #define V8SI(A)         (v8si)A


 // Intrinsic missing in gcc/clang

 #define _mm256_set_m128i(v0, v1)  _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1)


 #endif


 #define VM_SHUFFLE_MASK_AVX(a0,a1, b0,b1)       ((b1)<<6|(b0)<<4 | (a1)<<2|(a0))


 template <int mask>

 static SYS_FORCE_INLINE v8sf

 vm_shuffle_avx(const v8sf &a, const v8sf &b)

 {

     return _mm256_shuffle_ps(a, b, mask);

 }


 template <int mask>

 static SYS_FORCE_INLINE v8si

 vm_shuffle_avx(const v8si &a, const v8si &b)

 {

     return V8SI(_mm256_shuffle_ps(V8SF(a), V8SF(b), mask));

 }


 template <int A, int B, int C, int D, typename T>

 static SYS_FORCE_INLINE T

 vm_shuffle_avx(const T &a, const T &b)

 {

     return vm_shuffle_avx<VM_SHUFFLE_MASK_AVX(A,B,C,D)>(a, b);

 }


 template <int mask, typename T>

 static SYS_FORCE_INLINE T

 vm_shuffle_avx(const T &a)

 {

     return vm_shuffle_avx<mask>(a, a);

 }


 template <int A, int B, int C, int D, typename T>

 static SYS_FORCE_INLINE T

 vm_shuffle_avx(const T &a)

 {

     return vm_shuffle_avx<A,B,C,D>(a, a);

 }


 // The _mm256_insert_epi32 intrinsic is missing in VS2015

 #if defined(_MSC_VER)

 static SYS_FORCE_INLINE v8si

 vm_insert_avx(const v8si v, int32 a, int n)

 {

     union { v8si vector; int32 comp[8]; };

     vector = v;

     comp[n] = a;

     return vector;

 }

 #else

 static SYS_FORCE_INLINE v8si

 vm_insert_avx(const v8si v, int32 a, int n)

 {

     switch(n)

     {

     case 0 : return _mm256_insert_epi32(v, a, 0);

     case 1 : return _mm256_insert_epi32(v, a, 1);

     case 2 : return _mm256_insert_epi32(v, a, 2);

     case 3 : return _mm256_insert_epi32(v, a, 3);

     case 4 : return _mm256_insert_epi32(v, a, 4);

     case 5 : return _mm256_insert_epi32(v, a, 5);

     case 6 : return _mm256_insert_epi32(v, a, 6);

     case 7 : return _mm256_insert_epi32(v, a, 7);

     }

     return v;

 }

 #endif


 static SYS_FORCE_INLINE v8sf

 vm_insert_avx(const v8sf v, float a, int n)

 {

     union { v8sf vector; float comp[8]; };

     vector = v;

     comp[n] = a;

     return vector;

 }


 // The _mm256_extract_epi32 intrinsic is missing in VS2015

 #if defined(_MSC_VER)

 static SYS_FORCE_INLINE int

 vm_extract_avx(const v8si v, int n)

 {

     union { v8si vector; int32 comp[8]; };

     vector = v;

     return comp[n];

 }

 #else

 static SYS_FORCE_INLINE int

 vm_extract_avx(const v8si v, int n)

 {

     switch(n)

     {

     case 0 : return _mm256_extract_epi32(v, 0);

     case 1 : return _mm256_extract_epi32(v, 1);

     case 2 : return _mm256_extract_epi32(v, 2);

     case 3 : return _mm256_extract_epi32(v, 3);

     case 4 : return _mm256_extract_epi32(v, 4);

     case 5 : return _mm256_extract_epi32(v, 5);

     case 6 : return _mm256_extract_epi32(v, 6);

     case 7 : return _mm256_extract_epi32(v, 7);

     }

     return 0;

 }

 #endif


 static SYS_FORCE_INLINE float

 vm_extract_avx(const v8sf v, int n)

 {

     union { v8sf vector; float comp[8]; };

     vector = v;

     return comp[n];

 }


 static SYS_FORCE_INLINE v8sf

 vm_splats_avx(float a)

 {

     return _mm256_set1_ps(a);

 }


 static SYS_FORCE_INLINE v8si

 vm_splats_avx(uint32 a)

 {

     SYS_FPRealUnionF    tmp;

     tmp.uval = a;

     return V8SI(vm_splats_avx(tmp.fval));

 }


 static SYS_FORCE_INLINE v8si

 vm_splats_avx(int32 a)

 {

     return _mm256_set1_epi32(a);

 }


 static SYS_FORCE_INLINE v8sf

 vm_splats_avx(float a0, float a1, float a2, float a3,

               float a4, float a5, float a6, float a7)

 {

     return _mm256_set_ps(a7, a6, a5, a4, a3, a2, a1, a0);

 }


 static SYS_FORCE_INLINE v8si

 vm_splats_avx(uint a0, uint a1, uint a2, uint a3,

               uint a4, uint a5, uint a6, uint a7)

 {

     return _mm256_set_epi32((int32)a7, (int32)a6, (int32)a5, (int32)a4,

                             (int32)a3, (int32)a2, (int32)a1, (int32)a0);

 }


 static SYS_FORCE_INLINE v8si

 vm_splats_avx(int32 a0, int32 a1, int32 a2, int32 a3,

               int32 a4, int32 a5, int32 a6, int32 a7)

 {

     return _mm256_set_epi32(a7, a6, a5, a4, a3, a2, a1, a0);

 }


 static SYS_FORCE_INLINE v8si

 vm_load_avx(const int32 v[8])

 {

     return _mm256_loadu_si256((v8si *) v);

 }


 static SYS_FORCE_INLINE v8sf

 vm_load_avx(const float v[8])

 {

     return _mm256_loadu_ps(v);

 }


 static SYS_FORCE_INLINE void

 vm_store_avx(int32 dst[8], v8si value)

 {

     _mm256_storeu_si256((__m256i*) dst, value);

 }


 static SYS_FORCE_INLINE void

 vm_store_avx(float dst[8], v8sf value)

 {

     _mm256_storeu_ps(dst, value);

 }


 static SYS_FORCE_INLINE v8si

 vm_izero_avx()

 {

     return _mm256_setzero_si256();

 }


 static SYS_FORCE_INLINE v8sf

 vm_zero_avx()

 {

     return _mm256_setzero_ps();

 }


 static SYS_FORCE_INLINE v8sf

 vm_negate_avx(v8sf a)

 {

     return _mm256_sub_ps(_mm256_setzero_ps(), a);

 }


 static SYS_FORCE_INLINE v8sf

 vm_abs_avx(v8sf a)

 {

     return _mm256_max_ps(a, vm_negate_avx(a));

 }


 static SYS_FORCE_INLINE v8sf

 vm_fdiv_avx(v8sf a, v8sf b)

 {

     return _mm256_mul_ps(a, _mm256_rcp_ps(b));

 }


 static SYS_FORCE_INLINE v8sf

 vm_fsqrt_avx(v8sf a)

 {

     return _mm256_rcp_ps(_mm256_rsqrt_ps(a));

 }


 static SYS_FORCE_INLINE v8sf

 vm_madd_avx(v8sf a, v8sf b, v8sf c)

 {

     return _mm256_add_ps(_mm256_mul_ps(a, b), c);

 }


 // Some integer instructions aren't in AVX so we use SSE

 #define SSE_WRAPPER_I(NAME, OP)                  \

 static SYS_FORCE_INLINE v8si                             \

 NAME(v8si a, v8si b)                             \

 {                                                \

     __m128i la = _mm256_extractf128_si256(a, 0); \

     __m128i ua = _mm256_extractf128_si256(a, 1); \

     __m128i lb = _mm256_extractf128_si256(b, 0); \

     __m128i ub = _mm256_extractf128_si256(b, 1); \

     return _mm256_set_m128i(OP(ua, ub),          \

                             OP(la, lb));         \

 }

 SSE_WRAPPER_I(vm_int_cmplt_avx, _mm_cmplt_epi32)


 static const v8si       theSSETrue_avx= vm_splats_avx(0xFFFFFFFF);


 static SYS_FORCE_INLINE bool

 vm_allbits_avx(const v8si &a)

 {

     return _mm256_movemask_ps(V8SF(_mm256_cmpeq_epi32(a, theSSETrue_avx))) == 0xFF;

 }


 #define VM_EXTRACT_AVX  vm_extract_avx

 #define VM_INSERT_AVX   vm_insert_avx

 #define VM_SPLATS_AVX   vm_splats_avx

 #define VM_LOAD_AVX     vm_load_avx

 #define VM_STORE_AVX    vm_store_avx

 #define VM_ZERO_AVX     vm_zero_avx

 #define VM_IZERO_AVX    vm_izero_avx


 #define VM_CMPLT_AVX(A,B)       V8SI(_mm256_cmp_ps(A,B,_CMP_LT_OQ))

 #define VM_CMPLE_AVX(A,B)       V8SI(_mm256_cmp_ps(A,B,_CMP_LE_OQ))

 #define VM_CMPGT_AVX(A,B)       V8SI(_mm256_cmp_ps(A,B,_CMP_GT_OQ))

 #define VM_CMPGE_AVX(A,B)       V8SI(_mm256_cmp_ps(A,B,_CMP_GE_OQ))

 #define VM_CMPEQ_AVX(A,B)       V8SI(_mm256_cmp_ps(A,B,_CMP_EQ_OQ))

 #define VM_CMPNE_AVX(A,B)       V8SI(_mm256_cmp_ps(A,B,_CMP_NEQ_OQ))


 #define VM_ICMPLT_AVX   vm_int_cmplt_avx

 #define VM_ICMPGT_AVX   _mm256_cmpgt_epi32

 #define VM_ICMPEQ_AVX   _mm256_cmpeq_epi32


 #define VM_IADD_AVX     _mm256_add_epi32

 #define VM_ISUB_AVX     _mm256_sub_epi32

 #define VM_IMUL_AVX     _mm256_mullo_epi32


 #define VM_ADD_AVX      _mm256_add_ps

 #define VM_SUB_AVX      _mm256_sub_ps

 #define VM_MUL_AVX      _mm256_mul_ps

 #define VM_DIV_AVX      _mm256_div_ps

 #define VM_SQRT_AVX     _mm256_sqrt_ps

 #define VM_ISQRT_AVX    _mm256_rsqrt_ps

 #define VM_INVERT_AVX   _mm256_rcp_ps

 #define VM_ABS_AVX      vm_abs_avx


 #define VM_FDIV_AVX     vm_fdiv_avx

 #define VM_NEG_AVX      vm_negate_avx

 #define VM_FSQRT_AVX    vm_fsqrt_avx

 #define VM_MADD_AVX     vm_madd_avx


 #define VM_MIN_AVX      _mm256_min_ps

 #define VM_MAX_AVX      _mm256_max_ps


 #define VM_AND_AVX      _mm256_and_si256

 #define VM_ANDNOT_AVX   _mm256_andnot_si256

 #define VM_OR_AVX       _mm256_or_si256

 #define VM_XOR_AVX      _mm256_xor_si256


 #define VM_ALLBITS_AVX  vm_allbits_avx


 #define VM_SHUFFLE_AVX  vm_shuffle_avx


 // Integer to float conversions

 #define VM_SSE_ROUND_MASK_AVX   0x6000

 #define VM_SSE_ROUND_ZERO_AVX   0x6000

 #define VM_SSE_ROUND_UP_AVX     0x4000

 #define VM_SSE_ROUND_DOWN_AVX   0x2000

 #define VM_SSE_ROUND_NEAR_AVX   0x0000


 #define GETROUND_AVX()  (_mm_getcsr()&VM_SSE_ROUND_MASK_AVX)

 #define SETROUND_AVX(x) (_mm_setcsr(x|(_mm_getcsr()&~VM_SSE_ROUND_MASK_AVX)))


 // The P functions must be invoked before FLOOR, the E functions invoked

 // afterwards to reset the state.


 #define VM_P_FLOOR_AVX()        uint rounding = GETROUND_AVX(); \

                                 SETROUND_AVX(VM_SSE_ROUND_DOWN_AVX);

 #define VM_FLOOR_AVX            _mm256_cvtps_epi32

 #define VM_INT_AVX              _mm256_cvttps_epi32

 #define VM_E_FLOOR_AVX()        SETROUND_AVX(rounding);


 // Float to integer conversion

 #define VM_IFLOAT_AVX           _mm256_cvtepi32_ps


 // bitshifing  A=v8si C=int

 #define VM_SHIFTLEFT_AVX(A,C)   _mm256_sll_epi32(A,_mm_setr_epi32(C,0,0,0))

 #define VM_SHIFTRIGHT_AVX(A,C)  _mm256_srl_epi32(A,_mm_setr_epi32(C,0,0,0))


 //

 //  SSE Trig sourced from...

 //  http://software-lisc.fbk.eu/avx_mathfun/avx_mathfun.h

 //

 static SYS_FORCE_INLINE void

 vm_sincos_avx(v8sf x, v8sf *s, v8sf *c)

 {


 // AVX implementation of sincos

 //

 // Based on "sse_mathfun.h", by Julien Pommier

 // http://gruntthepeon.free.fr/ssemath/

 //

 // Copyright (C) 2012 Giovanni Garberoglio

 // Interdisciplinary Laboratory for Computational Science (LISC)

 // Fondazione Bruno Kessler and University of Trento

 // via Sommarive, 18

 // I-38123 Trento (Italy)

 //

 // This software is provided 'as-is', without any express or implied

 // warranty.  In no event will the authors be held liable for any damages

 // arising from the use of this software.

 //

 // Permission is granted to anyone to use this software for any purpose,

 // including commercial applications, and to alter it and redistribute it

 // freely, subject to the following restrictions:

 //

 // 1. The origin of this software must not be misrepresented; you must not

 //    claim that you wrote the original software. If you use this software

 //    in a product, an acknowledgment in the product documentation would be

 //    appreciated but is not required.

 // 2. Altered source versions must be plainly marked as such, and must not be

 //    misrepresented as being the original software.

 // 3. This notice may not be removed or altered from any source distribution.

 //

 // (this is the zlib license)


 #define _PI32AVX_CONST(Name, Val)                               \

     static const SYS_ALIGN(32) int _pi32avx_##Name[4] =         \

         { Val, Val, Val, Val }


     _PI32AVX_CONST(1, 1);

     _PI32AVX_CONST(inv1, ~1);

     _PI32AVX_CONST(2, 2);

     _PI32AVX_CONST(4, 4);


     //declare some AVX constants -- why can't I figure a better way to do that?

 #define _PS256_CONST(Name, Val)                                 \

     static const SYS_ALIGN(32) float _ps256_##Name[8] =         \

         { Val, Val, Val, Val, Val, Val, Val, Val }

 #define _PS256_CONST_TYPE(Name, Type, Val)                      \

     static const SYS_ALIGN(32) Type _ps256_##Name[8] =          \

         { Val, Val, Val, Val, Val, Val, Val, Val }


     _PS256_CONST(1  , 1.0f);

     _PS256_CONST(0p5, 0.5f);


     _PS256_CONST_TYPE(sign_mask, uint32, 0x80000000);

     _PS256_CONST_TYPE(inv_sign_mask, uint32, ~0x80000000);


     _PS256_CONST(minus_cephes_DP1, -0.78515625);

     _PS256_CONST(minus_cephes_DP2, -2.4187564849853515625e-4);

     _PS256_CONST(minus_cephes_DP3, -3.77489497744594108e-8);

     _PS256_CONST(sincof_p0, -1.9515295891E-4);

     _PS256_CONST(sincof_p1,  8.3321608736E-3);

     _PS256_CONST(sincof_p2, -1.6666654611E-1);

     _PS256_CONST(coscof_p0,  2.443315711809948E-005);

     _PS256_CONST(coscof_p1, -1.388731625493765E-003);

     _PS256_CONST(coscof_p2,  4.166664568298827E-002);

     _PS256_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI


 #undef _PI32AVX_CONST

 #undef _PS256_CONST

 #undef _PS256_CONST_TYPE


     typedef union imm_xmm_union {

       v8si      imm;

       __m128i   xmm[2];

     } imm_xmm_union;


 #define COPY_IMM_TO_XMM(imm_, xmm0_, xmm1_) {           \

         SYS_ALIGN(32) imm_xmm_union u;                  \

         u.imm = imm_;                                   \

         xmm0_ = u.xmm[0];                               \

         xmm1_ = u.xmm[1];                               \

     }


 #define COPY_XMM_TO_IMM(xmm0_, xmm1_, imm_) {           \

         SYS_ALIGN(32) imm_xmm_union u;                  \

         u.xmm[0]=xmm0_; u.xmm[1]=xmm1_; imm_ = u.imm;   \

     }


     v8sf xmm1, xmm2, xmm3, sign_bit_sin, y;

     v8si imm0, imm2, imm4;


     __m128i imm0_1, imm0_2;

     __m128i imm2_1, imm2_2;

     __m128i imm4_1, imm4_2;


     sign_bit_sin = x;

     // take the absolute value

     x = _mm256_and_ps(x, *(v8sf*)_ps256_inv_sign_mask);

     // extract the sign bit (upper one)

     sign_bit_sin = _mm256_and_ps(sign_bit_sin, *(v8sf*)_ps256_sign_mask);


     // scale by 4/Pi

     y = _mm256_mul_ps(x, *(v8sf*)_ps256_cephes_FOPI);


     // we use SSE2 routines to perform the integer ops

     COPY_IMM_TO_XMM(_mm256_cvttps_epi32(y),imm2_1,imm2_2);


     imm2_1 = _mm_add_epi32(imm2_1, *(__m128i*)_pi32avx_1);

     imm2_2 = _mm_add_epi32(imm2_2, *(__m128i*)_pi32avx_1);


     imm2_1 = _mm_and_si128(imm2_1, *(__m128i*)_pi32avx_inv1);

     imm2_2 = _mm_and_si128(imm2_2, *(__m128i*)_pi32avx_inv1);


     COPY_XMM_TO_IMM(imm2_1,imm2_2,imm2);

     y = _mm256_cvtepi32_ps(imm2);


     imm4_1 = imm2_1;

     imm4_2 = imm2_2;


     imm0_1 = _mm_and_si128(imm2_1, *(__m128i*)_pi32avx_4);

     imm0_2 = _mm_and_si128(imm2_2, *(__m128i*)_pi32avx_4);


     imm0_1 = _mm_slli_epi32(imm0_1, 29);

     imm0_2 = _mm_slli_epi32(imm0_2, 29);


     COPY_XMM_TO_IMM(imm0_1, imm0_2, imm0);


     imm2_1 = _mm_and_si128(imm2_1, *(__m128i*)_pi32avx_2);

     imm2_2 = _mm_and_si128(imm2_2, *(__m128i*)_pi32avx_2);


     imm2_1 = _mm_cmpeq_epi32(imm2_1, _mm_setzero_si128());

     imm2_2 = _mm_cmpeq_epi32(imm2_2, _mm_setzero_si128());


     COPY_XMM_TO_IMM(imm2_1, imm2_2, imm2);


     v8sf swap_sign_bit_sin = _mm256_castsi256_ps(imm0);

     v8sf poly_mask = _mm256_castsi256_ps(imm2);


     // The magic pass: "Extended precision modular arithmetic"

     // x = ((x - y * DP1) - y * DP2) - y * DP3;

     xmm1 = *(v8sf*)_ps256_minus_cephes_DP1;

     xmm2 = *(v8sf*)_ps256_minus_cephes_DP2;

     xmm3 = *(v8sf*)_ps256_minus_cephes_DP3;

     xmm1 = _mm256_mul_ps(y, xmm1);

     xmm2 = _mm256_mul_ps(y, xmm2);

     xmm3 = _mm256_mul_ps(y, xmm3);

     x = _mm256_add_ps(x, xmm1);

     x = _mm256_add_ps(x, xmm2);

     x = _mm256_add_ps(x, xmm3);


     imm4_1 = _mm_sub_epi32(imm4_1, *(__m128i*)_pi32avx_2);

     imm4_2 = _mm_sub_epi32(imm4_2, *(__m128i*)_pi32avx_2);


     imm4_1 = _mm_andnot_si128(imm4_1, *(__m128i*)_pi32avx_4);

     imm4_2 = _mm_andnot_si128(imm4_2, *(__m128i*)_pi32avx_4);


     imm4_1 = _mm_slli_epi32(imm4_1, 29);

     imm4_2 = _mm_slli_epi32(imm4_2, 29);


     COPY_XMM_TO_IMM(imm4_1, imm4_2, imm4);


     v8sf sign_bit_cos = _mm256_castsi256_ps(imm4);


     sign_bit_sin = _mm256_xor_ps(sign_bit_sin, swap_sign_bit_sin);


     // Evaluate the first polynom  (0 <= x <= Pi/4)

     v8sf z = _mm256_mul_ps(x,x);

     y = *(v8sf*)_ps256_coscof_p0;


     y = _mm256_mul_ps(y, z);

     y = _mm256_add_ps(y, *(v8sf*)_ps256_coscof_p1);

     y = _mm256_mul_ps(y, z);

     y = _mm256_add_ps(y, *(v8sf*)_ps256_coscof_p2);

     y = _mm256_mul_ps(y, z);

     y = _mm256_mul_ps(y, z);

     v8sf tmp = _mm256_mul_ps(z, *(v8sf*)_ps256_0p5);

     y = _mm256_sub_ps(y, tmp);

     y = _mm256_add_ps(y, *(v8sf*)_ps256_1);


     // Evaluate the second polynom  (Pi/4 <= x <= 0)

     v8sf y2 = *(v8sf*)_ps256_sincof_p0;

     y2 = _mm256_mul_ps(y2, z);

     y2 = _mm256_add_ps(y2, *(v8sf*)_ps256_sincof_p1);

     y2 = _mm256_mul_ps(y2, z);

     y2 = _mm256_add_ps(y2, *(v8sf*)_ps256_sincof_p2);

     y2 = _mm256_mul_ps(y2, z);

     y2 = _mm256_mul_ps(y2, x);

     y2 = _mm256_add_ps(y2, x);


     // select the correct result from the two polynoms

     xmm3 = poly_mask;

     v8sf ysin2 = _mm256_and_ps(xmm3, y2);

     v8sf ysin1 = _mm256_andnot_ps(xmm3, y);

     y2 = _mm256_sub_ps(y2,ysin2);

     y = _mm256_sub_ps(y, ysin1);


     xmm1 = _mm256_add_ps(ysin1,ysin2);

     xmm2 = _mm256_add_ps(y,y2);


     // update the sign

     *s = _mm256_xor_ps(xmm1, sign_bit_sin);

     *c = _mm256_xor_ps(xmm2, sign_bit_cos);


 #undef COPY_IMM_TO_XMM

 #undef COPY_XMM_TO_IMM

 }


 static SYS_FORCE_INLINE v8sf

 vm_sin_avx(v8sf x)

 {

     v8sf s,c;

     vm_sincos_avx(x,&s,&c);

     return s;

 }


 static SYS_FORCE_INLINE v8sf

 vm_cos_avx(v8sf x)

 {

     v8sf s,c;

     vm_sincos_avx(x,&s,&c);

     return c;

 }


 static SYS_FORCE_INLINE v8sf

 vm_tan_avx(v8sf x)

 {

     v8sf s,c;

     vm_sincos_avx(x,&s,&c);

     return _mm256_div_ps(s,c);

 }


 #define VM_SINCOS_AVX   vm_sincos_avx

 #define VM_SIN_AVX      vm_sin_avx

 #define VM_COS_AVX      vm_cos_avx

 #define VM_TAN_AVX      vm_tan_avx


 #endif

int32
int int32
Definition: SYS_Types.h:39

VM_API.h

_PS256_CONST_TYPE
#define _PS256_CONST_TYPE(Name, Type, Val)

_PS256_CONST
#define _PS256_CONST(Name, Val)

v
const GLdouble * v
Definition: glcorearb.h:837

SYS_FPRealUnionT< fpreal32 >::uval
uint_type uval
Definition: SYS_Types.h:386

value
GLsizei const GLfloat * value
Definition: glcorearb.h:824

z
GLdouble GLdouble GLdouble z
Definition: glcorearb.h:848

a
GLboolean GLboolean GLboolean GLboolean a
Definition: glcorearb.h:1222

s
GLdouble s
Definition: glad.h:3009

y
GLint y
Definition: glcorearb.h:103

v8sf
__m256 v8sf
Definition: VM_AVXFunc.h:22

v8si
__m256i v8si
Definition: VM_AVXFunc.h:23

SYS_Types.h

n
GLdouble n
Definition: glcorearb.h:2008

f
GLfloat f
Definition: glcorearb.h:1926

COPY_XMM_TO_IMM
#define COPY_XMM_TO_IMM(xmm0_, xmm1_, imm_)

SYS_FORCE_INLINE
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45

V8SF
#define V8SF(A)
Definition: VM_AVXFunc.h:58

mask
GLint GLuint mask
Definition: glcorearb.h:124

SYS_Align.h

b
GLboolean GLboolean GLboolean b
Definition: glcorearb.h:1222

COPY_IMM_TO_XMM
#define COPY_IMM_TO_XMM(imm_, xmm0_, xmm1_)

x
GLint GLenum GLint x
Definition: glcorearb.h:409

OBJ_MatchTransform::T

v8sf
Definition: VM_BasicAVXFunc.h:24

dst
GLenum GLenum dst
Definition: glcorearb.h:1793

V8SI
#define V8SI(A)
Definition: VM_AVXFunc.h:59

nanovdb::io::c
c
Definition: IO.h:328

_PI32AVX_CONST
#define _PI32AVX_CONST(Name, Val)

uint32
unsigned int uint32
Definition: SYS_Types.h:40

SYS_FPRealUnionT< fpreal32 >::fval
fpreal_type fval
Definition: SYS_Types.h:387

SYS_FPRealUnionT< fpreal32 >
Definition: SYS_Types.h:369

y2
GLdouble GLdouble GLdouble y2
Definition: glad.h:2349

SSE_WRAPPER_I
#define SSE_WRAPPER_I(NAME, OP)
Definition: VM_AVXFunc.h:286

uint
unsigned int uint
Definition: SYS_Types.h:45

v8si
Definition: VM_BasicAVXFunc.h:45