HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
allocator.h
Go to the documentation of this file.
1 // Copyright (c) Microsoft Corporation. All rights reserved.
2 // Licensed under the MIT License.
3 
4 #pragma once
5 
6 #include <map>
7 
8 #include "core/common/common.h"
9 #include "core/framework/allocator_stats.h"
10 // some enums are defined in session/onnxruntime_c_api.h but used in ortdevice.h/ortmemory.h
11 #include "core/session/onnxruntime_c_api.h"
14 
15 // This configures the arena based allocator used by ORT
16 // See docs/C_API.md for details on what these mean and how to choose these values
17 struct OrtArenaCfg {
27  : max_mem(max_mem),
28  arena_extend_strategy(arena_extend_strategy),
29  initial_chunk_size_bytes(initial_chunk_size_bytes),
30  max_dead_bytes_per_chunk(max_dead_bytes_per_chunk),
31  initial_growth_chunk_size_bytes(initial_growth_chunk_size_bytes),
32  max_power_of_two_extend_bytes(max_power_of_two_extend_bytes) {}
33 
34  size_t max_mem; // use 0 to allow ORT to choose the default
35  int arena_extend_strategy; // use -1 to allow ORT to choose the default, 0 = kNextPowerOfTwo, 1 = kSameAsRequested
36  int initial_chunk_size_bytes; // use -1 to allow ORT to choose the default
37  int max_dead_bytes_per_chunk; // use -1 to allow ORT to choose the default
38  int initial_growth_chunk_size_bytes; // use -1 to allow ORT to choose the default
39  int64_t max_power_of_two_extend_bytes; // use -1 to allow ORT to choose the default
40 };
41 
42 namespace onnxruntime {
43 constexpr const char* CPU = "Cpu";
44 constexpr const char* CUDA = "Cuda";
45 constexpr const char* CUDA_PINNED = "CudaPinned";
46 constexpr const char* CANN = "Cann";
47 constexpr const char* CANN_PINNED = "CannPinned";
48 constexpr const char* DML = "DML";
49 constexpr const char* HIP = "Hip";
50 constexpr const char* HIP_PINNED = "HipPinned";
51 constexpr const char* OpenVINO_CPU = "OpenVINO_CPU";
52 constexpr const char* OpenVINO_GPU = "OpenVINO_GPU";
53 constexpr const char* OpenVINO_RT = "OpenVINO_RT";
54 constexpr const char* OpenVINO_RT_NPU = "OpenVINO_RT_NPU";
55 constexpr const char* WEBGPU_BUFFER = "WebGPU_Buffer";
56 constexpr const char* WEBNN_TENSOR = "WebNN_Tensor";
57 
58 constexpr size_t kAllocAlignment = 256;
59 
60 class IAllocator;
61 class Stream;
62 namespace synchronize {
63 class Notification;
64 }
65 using WaitNotificationFn = std::function<void(Stream&, synchronize::Notification&)>;
66 void* AllocateBufferWithOptions(IAllocator& allocator, size_t size, bool use_reserve, Stream* stream, WaitNotificationFn wait_fn);
67 
68 template <typename T>
69 using IAllocatorUniquePtr = std::unique_ptr<T, std::function<void(T*)>>;
70 
71 class IAllocator {
72  public:
73  IAllocator(const OrtMemoryInfo& info) : memory_info_(info) {}
74  virtual ~IAllocator() = default;
75  /**
76  * Allocate memory of the specified size.
77  * If size is 0, nullptr is returned.
78  * If allocation fails, an exception is thrown.
79  *
80  * @remarks Use SafeInt when calculating the size of memory to allocate using Alloc.
81  */
82  virtual void* Alloc(size_t size) = 0;
83 
84  virtual void Free(void* p) = 0;
85 
86  // Reserve() is an interface exposed for an implementation of IAllocator
87  // to optionally implement some allocation logic that by-passes any arena-based
88  // logic that may be housed in the Alloc() implementation.
89  // There are SessionOptions config(s) that allow users to allocate some memory
90  // by-passing arena-based logic.
91  // By default, the base implementation just calls Alloc().
92  virtual void* Reserve(size_t size) { return Alloc(size); }
93 
94  const OrtMemoryInfo& Info() const { return memory_info_; };
95 
96  // Each implementation of IAllocator can override and provide their own implementation
97  virtual void GetStats(AllocatorStats* /*stats*/) { return; }
98 
99  static bool CalcMemSizeForArray(size_t nmemb, size_t size, size_t* out) noexcept {
100  return CalcMemSizeForArrayWithAlignment(nmemb, size, 0, out);
101  }
102 
103  /**
104  * Calculate the memory size for an array. The size is bounds checked using SafeInt.
105  * \tparam alignment must be power of 2
106  * \param nmemb Number of members or elements in the array
107  * \param size Size of each element
108  * \param out Total size required after any alignment is applied
109  * \return true, successful. false, overflow
110  */
111  [[nodiscard]] static bool CalcMemSizeForArrayWithAlignment(size_t nmemb, size_t size, size_t alignment,
112  size_t* out) noexcept;
113 
114  /**
115  * https://cwe.mitre.org/data/definitions/190.html
116  * \param alignment must be power of 2
117  * \param nmemb Number of members or elements in the array
118  * \param size Size of each element
119  * \param out Total size required after any alignment is applied
120  * \return true, successful. false, overflow
121  * \remarks This was the original API and was implemented in the header. Replaced with the above version
122  * implemented in the .cc file so that the SafeInt dependency is internal.
123  */
124  template <size_t alignment>
125  [[nodiscard]] static bool CalcMemSizeForArrayWithAlignment(size_t nmemb, size_t size, size_t* out) noexcept;
126 
127  /**
128  * allocate memory for an array which has nmemb items of data, each size bytes long
129  */
130  void* AllocArray(size_t nmemb, size_t size) {
131  size_t len;
132  if (!CalcMemSizeForArray(nmemb, size, &len)) {
133  ORT_THROW("Invalid size requested for allocation: ", nmemb, " * ", size);
134  }
135 
136  return Alloc(len);
137  }
138 
139  /**
140  * allocate memory for an array which has nmemb items of data, each size bytes long
141  */
142  template <size_t alignment>
143  void* AllocArrayWithAlignment(size_t nmemb, size_t size) {
144  size_t len;
145  if (!CalcMemSizeForArrayWithAlignment(nmemb, size, alignment, &len)) {
146  ORT_THROW("Invalid size requested for allocation: ", nmemb, " * ", size, " with alignment ", alignment);
147  }
148 
149  return Alloc(len);
150  }
151 
152  /**
153  Create a std::unique_ptr that is allocated and freed by the provided IAllocator.
154  @param allocator The allocator.
155  @param count_or_bytes The exact bytes to allocate if T is void, otherwise the number of elements to allocate.
156  @param use_reserve If true, call Reserve() instead of Alloc() to allocate memory.
157  @param stream Which stream instance allocated chunk will be used with.
158  @param wait_fn If the allocator want to dynamic reuse a chunk from another stream, use this wait_fn to sync on
159  the target stream to make the reuse safe.
160  @returns std::unique_ptr with allocated memory and deleter. Throws if it cannot allocate memory.
161  */
162  template <typename T>
163  static IAllocatorUniquePtr<T> MakeUniquePtr(std::shared_ptr<IAllocator> allocator, size_t count_or_bytes,
164  bool use_reserve = false,
165  Stream* stream = nullptr, WaitNotificationFn wait_fn = nullptr) {
166  ValidateAllocator(allocator);
167 
168  // for now limit to fundamental types. we could support others, but to do so either we or the caller
169  // needs to call the dtor for the objects, for buffers allocated on device we don't have destructor
170  // static_assert(std::is_fundamental<T>::value, "Fundamental type required as no destructors are called.");
171 
172  size_t alloc_size = count_or_bytes;
173 
174  // if T is not void, 'count_or_bytes' == number of items so allow for that
175  if constexpr (!std::is_void<T>::value) {
176  // sizeof(void) isn't valid, but the compiler isn't smart enough to ignore that this line isn't
177  // reachable if T is void. use std::conditional to 'use' void* in the sizeof call
178  constexpr auto size = sizeof(typename std::conditional<std::is_void<T>::value, void*, T>::type);
179  alloc_size = ValidatedCalcMemSizeForArray(count_or_bytes, size);
180  }
181 
182  // allocate
183  T* p = static_cast<T*>(AllocateBufferWithOptions(*allocator, alloc_size, use_reserve, stream, std::move(wait_fn)));
184  ValidateAllocation(p, alloc_size);
185 
186  return IAllocatorUniquePtr<T>{p,
187  [allocator = std::move(allocator)](T* p) {
188  allocator->Free(p);
189  }};
190  }
191 
192  /**
193  Create a std::unique_ptr that is allocated and freed by the provided OrtAllocator.
194  @param ort_allocator The allocator.
195  @param count_or_bytes The exact bytes to allocate if T is void, otherwise the number of elements to allocate.
196  @returns std::unique_ptr with allocated memory and deleter. Throws if it cannot allocate memory.
197  */
198  template <typename T>
199  static IAllocatorUniquePtr<T> MakeUniquePtrFromOrtAllocator(OrtAllocator* ort_allocator, size_t count_or_bytes) {
200  ValidateAllocator(ort_allocator);
201 
202  size_t alloc_size = count_or_bytes;
203  // if T is not void, 'count_or_bytes' == number of items so allow for that
204  if constexpr (!std::is_void<T>::value) {
205  // sizeof(void) isn't valid, but the compiler isn't smart enough to ignore that this line isn't
206  // reachable if T is void. use std::conditional to 'use' void* in the sizeof call
207  constexpr auto size = sizeof(typename std::conditional<std::is_void<T>::value, void*, T>::type);
208  alloc_size = ValidatedCalcMemSizeForArray(count_or_bytes, size);
209  }
210 
211  T* p = static_cast<T*>(ort_allocator->Alloc(ort_allocator, alloc_size));
212  ValidateAllocation(p, alloc_size);
213 
214  return IAllocatorUniquePtr<T>{p,
215  [ort_allocator](T* p) {
216  ort_allocator->Free(ort_allocator, p);
217  }};
218  }
219 
220  private:
221  //
222  // validation functions. split out from methods that are templatized on the data type to minimize binary size.
223  //
224 
225  template <typename T>
226  static void ValidateAllocator(const T& allocator) {
227  ORT_ENFORCE(allocator != nullptr);
228  }
229 
230  static size_t ValidatedCalcMemSizeForArray(size_t count, size_t size) {
231  size_t alloc_size = 0;
232  if (!CalcMemSizeForArray(count, size, &alloc_size)) {
233  ORT_THROW("Invalid size requested for allocation: ", count, " * ", size);
234  }
235 
236  return alloc_size;
237  }
238 
239  static void ValidateAllocation(void* p, size_t size) {
240  // allocator should throw directly but in case it didn't ensure we do here so that calling code doesn't
241  // need to check for nullptr when an actual allocation was expected.
242  ORT_ENFORCE(p != nullptr || size == 0, "Memory allocation failed. Size=", size);
243  };
244 
245  OrtMemoryInfo memory_info_;
246 };
247 
248 template <size_t alignment>
249 bool IAllocator::CalcMemSizeForArrayWithAlignment(size_t nmemb, size_t size, size_t* out) noexcept {
250  return CalcMemSizeForArrayWithAlignment(nmemb, size, alignment, out);
251 }
252 
253 class CPUAllocator : public IAllocator {
254  public:
255  explicit CPUAllocator(const OrtMemoryInfo& memory_info) : IAllocator(memory_info) {}
256 
257  CPUAllocator() : IAllocator(OrtMemoryInfo(CPU, OrtAllocatorType::OrtDeviceAllocator)) {}
258 
259  void* Alloc(size_t size) override;
260  void Free(void* p) override;
261 };
262 
263 using AllocatorPtr = std::shared_ptr<IAllocator>;
264 using AllocatorMap = std::map<OrtDevice, AllocatorPtr>;
265 
266 void* AllocatorDefaultAlloc(size_t size);
267 void AllocatorDefaultFree(void* p);
268 } // namespace onnxruntime
GLuint GLuint stream
Definition: glcorearb.h:1832
type
Definition: core.h:556
virtual void Free(void *p)=0
constexpr const char * WEBGPU_BUFFER
Definition: allocator.h:55
void * AllocatorDefaultAlloc(size_t size)
constexpr const char * OpenVINO_GPU
Definition: allocator.h:52
size_t max_mem
Definition: allocator.h:34
constexpr const char * CPU
Definition: allocator.h:43
virtual void GetStats(AllocatorStats *)
Definition: allocator.h:97
constexpr size_t kAllocAlignment
Definition: allocator.h:58
constexpr const char * CUDA_PINNED
Definition: allocator.h:45
GLsizei const GLfloat * value
Definition: glcorearb.h:824
std::function< void(Stream &, synchronize::Notification &)> WaitNotificationFn
Definition: allocator.h:65
void * AllocateBufferWithOptions(IAllocator &allocator, size_t size, bool use_reserve, Stream *stream, WaitNotificationFn wait_fn)
#define ORT_ENFORCE(condition,...)
Definition: common.h:172
constexpr const char * CUDA
Definition: allocator.h:44
constexpr const char * HIP_PINNED
Definition: allocator.h:50
constexpr const char * OpenVINO_RT_NPU
Definition: allocator.h:54
const OrtMemoryInfo & Info() const
Definition: allocator.h:94
constexpr const char * HIP
Definition: allocator.h:49
int initial_growth_chunk_size_bytes
Definition: allocator.h:38
OrtArenaCfg(size_t max_mem, int arena_extend_strategy, int initial_chunk_size_bytes, int max_dead_bytes_per_chunk, int initial_growth_chunk_size_bytes, int64_t max_power_of_two_extend_bytes)
Definition: allocator.h:24
constexpr const char * WEBNN_TENSOR
Definition: allocator.h:56
static bool CalcMemSizeForArrayWithAlignment(size_t nmemb, size_t size, size_t alignment, size_t *out) noexcept
int arena_extend_strategy
Definition: allocator.h:35
static IAllocatorUniquePtr< T > MakeUniquePtr(std::shared_ptr< IAllocator > allocator, size_t count_or_bytes, bool use_reserve=false, Stream *stream=nullptr, WaitNotificationFn wait_fn=nullptr)
Definition: allocator.h:163
std::unique_ptr< T, std::function< void(T *)>> IAllocatorUniquePtr
Definition: allocator.h:69
static bool CalcMemSizeForArray(size_t nmemb, size_t size, size_t *out) noexcept
Definition: allocator.h:99
void Free(void *p) override
virtual void * Reserve(size_t size)
Definition: allocator.h:92
std::map< OrtDevice, AllocatorPtr > AllocatorMap
Definition: allocator.h:264
void AllocatorDefaultFree(void *p)
constexpr const char * CANN
Definition: allocator.h:46
CPUAllocator(const OrtMemoryInfo &memory_info)
Definition: allocator.h:255
void * AllocArray(size_t nmemb, size_t size)
Definition: allocator.h:130
int64_t max_power_of_two_extend_bytes
Definition: allocator.h:39
#define ORT_THROW(...)
Definition: common.h:162
constexpr const char * DML
Definition: allocator.h:48
GLsizeiptr size
Definition: glcorearb.h:664
std::shared_ptr< IAllocator > AllocatorPtr
Definition: allocator.h:263
int initial_chunk_size_bytes
Definition: allocator.h:36
constexpr const char * CANN_PINNED
Definition: allocator.h:47
void * Alloc(size_t size) override
virtual ~IAllocator()=default
void * AllocArrayWithAlignment(size_t nmemb, size_t size)
Definition: allocator.h:143
int max_dead_bytes_per_chunk
Definition: allocator.h:37
IAllocator(const OrtMemoryInfo &info)
Definition: allocator.h:73
constexpr const char * OpenVINO_RT
Definition: allocator.h:53
virtual void * Alloc(size_t size)=0
GLint GLsizei count
Definition: glcorearb.h:405
static IAllocatorUniquePtr< T > MakeUniquePtrFromOrtAllocator(OrtAllocator *ort_allocator, size_t count_or_bytes)
Definition: allocator.h:199
constexpr const char * OpenVINO_CPU
Definition: allocator.h:51