16 #include <cuda_runtime.h>
17 #ifndef USE_CUDA_MINIMAL
18 #include <cublas_v2.h>
45 void Init(
const OrtKernelContext& kernel_ctx) {
66 if constexpr (
sizeof(
T) >
sizeof(
void*)) {
68 OrtErrorCode::ORT_INVALID_ARGUMENT);
72 OrtStatus* status = ort_api.KernelContext_GetResource(
76 OrtErrorCode::ORT_RUNTIME_EXCEPTION);
79 memcpy(&t, &resource,
sizeof(
T));
91 ORT_CXX_API_THROW(
"failed to allocate deferred cpu memory", OrtErrorCode::ORT_RUNTIME_EXCEPTION);
101 ORT_CXX_API_THROW(
"failed to free deferred cpu memory", OrtErrorCode::ORT_RUNTIME_EXCEPTION);
int32_t arena_extend_strategy
OrtAllocator * deferred_cpu_allocator
bool enable_skip_layer_norm_strict_mode
void FreeDeferredCpuMem(void *mem) const
const OrtApi & GetApi() noexcept
This returns a reference to the OrtApi interface in use.
void Init(const OrtKernelContext &kernel_ctx)
void * AllocDeferredCpuMem(size_t size) const
T FetchResource(const OrtKernelContext &kernel_ctx, CudaResource resource_type)
Use a manually-specified time code range.
cudnnHandle_t cudnn_handle
struct CUstream_st * cudaStream_t
int32_t cudnn_conv_algo_search
#define ORT_CUDA_RESOURCE_VERSION
bool cudnn_conv_use_max_workspace
bool cudnn_conv1d_pad_to_nc1d
cublasHandle_t cublas_handle
#define ORT_CXX_API_THROW(string, code)