HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CE_Context.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: CE_Context.h ( CE Library, C++)
7  *
8  * COMMENTS: Compute Engine Contexts.
9  */
10 
11 #ifndef __CE_Context__
12 #define __CE_Context__
13 
14 #include "CE_API.h"
15 #include "CE_Tracing.h"
16 
17 #include <UT/UT_Array.h>
18 #include <UT/UT_Error.h>
19 #include <UT/UT_Map.h>
20 #include <UT/UT_NonCopyable.h>
21 #include <UT/UT_StringMap.h>
22 #include <SYS/SYS_Types.h>
23 #include <SYS/SYS_Handle.h>
24 #include <iosfwd>
25 
26 class CE_MemoryPool;
27 class UT_MemoryResource;
28 
29 typedef void (*CE_ErrorCB)(const char *errmsg, UT_ErrorSeverity severity,
30  void *data);
31 
33 {
34 public:
37 
39 
40  virtual void rebindOGLBuffer( uint buf_obj ) = 0;
41  virtual void unbindOGLBuffer() = 0;
42  virtual bool isBinded() = 0;
43 };
44 
45 /// An OpenCL buffer that is backed by a external memory. External memory is accessed
46 /// through a platform specific memory handle.
47 /// Requires the OpenCL driver implements the following extensions:
48 /// cl_khr_external_memory
49 /// cl_khr_external_memory_opaque_fd (only on Linux and possibly Mac)
50 /// cl_khr_external_memory_win32 (only on Windows)
51 class CE_API CE_ExternalBuffer : public cl::Buffer
52 {
53 public:
55  const cl::Context &context,
57  ::size_t size,
58  SYS_Handle handle,
59  cl_int *err = nullptr);
60 
61  CE_ExternalBuffer() : cl::Buffer() {}
62 };
63 /// An OpenCL image that is backed by external memory, similar to
64 /// CE_ExternalBuffer.
66 {
67 public:
69  const cl::Context &context,
71  const cl_image_format *format,
72  const cl_image_desc *desc,
73  SYS_Handle handle,
74  cl_int *err = nullptr);
75 
76  CE_ExternalImage() : cl::Image() {}
77 };
78 
79 #ifndef CL_UUID_SIZE_KHR
80 #define CL_UUID_SIZE_KHR 16
81 #endif
82 
83 /// CE_Context encapsulates the OpenCL context and provides various convenience
84 /// functions for loading kernel programs and allocating GPU memory.
86 {
87 public:
88  CE_Context();
89  virtual ~CE_Context();
90 
92 
93  /// Returns a pointer to the singleton CE_Context object. This function
94  /// attempts to initialize OpenCL if it has not yet been.
95  /// gl_shared should be true if the context will be expected to interoperate
96  /// with the OpenGL context. If both gl_shared and shared_fallback are true,
97  /// then the function will try to make an unshared context in case the
98  /// shared context fails to create.
99  static CE_Context *getContext(bool gl_shared = true,
100  bool shared_fallback = true);
101  /// Returns true if interoperability between CL and GL is possible.
102  static bool isGLSharingPossible();
103 
104  /// Returns the underlying cl::Context object.
105  cl::Context getCLContext() const {return myContext;}
106 
107  /// Returns the cl::Queue object that is used to enqueue OpenCL kernels
108  /// and memory transfers.
109  cl::CommandQueue getQueue() const {return myQueue;}
110 
111  /// Returns the OpenCL Device object.
112  cl::Device getDevice() const {return myDevice;}
113 
114  ceTraceCtx getTraceContext() const {return myTraceCtx;}
115 
116  // Write OpenCL Device info to the supplied buffer.
117  static void getInfo(const cl::Device &device, UT_WorkBuffer &buffer );
118  static void getExtendedInfo(const cl::Device &device, UT_WorkBuffer &buffer );
119 
120  // Write info for all available OpenCL platforms to the supplied buffer.
121  static void getAllPlatformsInfo(UT_WorkBuffer &buffer);
122 
123  /// Get the suggested global and local ranges for the given 1-D kernel over
124  /// the specified number of items.
125  void get1DRanges(const cl::Kernel &k, size_t items,
126  cl::NDRange &g, cl::NDRange &l);
127 
128  /// Get the suggested global and local ranges for a reduction for the
129  /// given 1-D kernel over the specified number of items.
130  void getReductionRanges(const cl::Kernel &k, size_t items, size_t accumsize,
131  cl::NDRange &global_range, cl::NDRange &local_range,
132  uint &groupsize, uint &ngroups) const;
133 
134  /// Get the maximum workgroup size for the given kernel.
135  size_t getMaxWorkgroupSize(const cl::Kernel &k);
136  /// Get the array of maximum work items along each dimension supported by the
137  /// compute device.
138  std::vector<size_t> getMaxWorkItemSizes();
139 
140  /// Round up a provided group size to a larger clean one as some
141  /// driers die with prime-based groups sizes.
142  /// Less than 1024 is raised to next power of 2, greater is to a multiple
143  /// of 1024.
144  static size_t roundUpGroupSize(size_t gsize);
145 
146  /// Loads the OpenCL program specified by progname. This functions searches
147  /// for the file in the HOUDINI_OCL_PATH environment variable. Any compile-
148  /// time options can be passed in the options parameter. If the program
149  /// load succeeds, the progname will be cached, using the progrname and
150  /// options strings together as a hash value lookup. In this way the same
151  /// OpenCL program can be loaded several times with different compile-time
152  /// flags.
153  cl::Program loadProgram(const char *progname,
154  const char *options = nullptr,
155  bool recompile = false);
156  cl::Program compileProgram(const char *progtext,
157  const char *options = nullptr,
158  bool recompile = false);
159 
160  /// Create an OpenCL kernel named kernelname from the program specified by
161  /// progname. For some types of devices these kernels will be cached, as
162  /// kernels can be expensive to create. This is the recommended method
163  /// for creating kernels.
164  cl::Kernel loadKernel(const cl::Program &prog, const UT_StringRef &kernelname);
165  cl::Kernel loadKernel(const char *progname, const UT_StringRef &kernelname,
166  const char *options = nullptr)
167  { return loadKernel(loadProgram(progname, options), kernelname); }
168 
169  /// Returns whether the CE_Context has been successfully initialized.
170  bool isValid() const {return myIsValid;}
171 
172  /// Returns whether the singleton CE_Context has been initialized yet. This
173  /// can be used to test whether OpenCL has been initialized without calling
174  /// getContext and forcing an attempt at initialization.
175  static bool isInitialized(bool gl_shared=false);
176 
177  /// Returns true if the OpenCL device is running on the CPU.
178  bool isCPU() const;
179 
180  /// Returns true if the OpenCL device supports double precision.
181  bool hasDoubleSupport() const {return mySupportsDouble;}
182  /// Returns true if the OpenCL device supports writing to 3D image objects.
183  bool has3DImageWriteSupport() const {return mySupports3DImageWrites;}
184 
185  /// Block until any outstanding kernel or memory transfers on the main
186  /// CommandQueue have executed. If sweep_pool is true, the context's
187  /// CE_MemoryPool will sweep for any buffers that were in use when their
188  /// CE_Grid's went out of scope, but that were still active in kernels.
189  void finish(bool sweep_pool=true);
190 
191  /// Allocate a buffer of specified size on the CE_Device.
192  /// use_pool= true, attempts to use the underlying CE_MemoryPool to possibly return
193  /// an already allocated, unused buffer.
194  /// read=true, creates a buffer that is readable inside kernels.
195  /// write=true, creates a buffer that is writable inside kernels.
196  /// ogl_bind, specifies an OGL buffer to bind to.
197  cl::Buffer allocBuffer(int64 size, bool use_pool=true, bool read=true, bool write=true, uint32 ogl_bind=SYS_UINT32_MAX);
198 
199  /// Release the specified buffer, possibly to the CE_MemoryPool.
200  void releaseBuffer(cl::Buffer &&buf, bool use_pool=true);
201 
202  /// Read the specified number of bytes from the buffer.
203  void readBuffer(const cl::Buffer &buf, size_t size, void *p, bool blocking = true, size_t offset = 0);
204 
205  /// Write the specified number of bytes to the buffer.
206  void writeBuffer(const cl::Buffer &buf, size_t size, const void *p, bool blocking = true, size_t offset = 0);
207 
208  /// Copy the specified amount of data from source buffer to destination
209  /// buffer.
210  void copyBuffer(const cl::Buffer &src, const cl::Buffer &dst, size_t size,
211  size_t src_offset = 0, size_t dst_offset = 0);
212 
213  /// Copy data from the source buffer into the specified region of the
214  /// destination image.
215  void copyBufferToImage(const cl::Buffer &src, const cl::Image &dst,
216  const cl::size_t<3>& dst_origin,
217  const cl::size_t<3>& dst_region,
218  size_t src_offset = 0);
219 
220  /// Copy data from the specified region of the source image to the
221  /// destination buffer.
222  void copyImageToBuffer(const cl::Image &src, const cl::Buffer &dst,
223  const cl::size_t<3>& src_origin,
224  const cl::size_t<3>& src_region,
225  size_t dst_offset = 0);
226 
227  /// Enqueue the kernel over the provided ranges.
228  void enqueueKernel(const cl::Kernel &kernel, const cl::NDRange &global, const cl::NDRange &local);
229 
230  /// Keep a map buffer to bind at render time
231  /// The first time a CL::Buffer is created it can be registered to rebing to a OGL vertex buffer at drawing time.
232  /// The uint returned by the register call can be attached to a detail attribute and the drawing code can convert
233  /// the CL Buffer to a CL BufferGL.
234  uint32 registerDelayedOGLBindBuffer(CE_DelayedOGLBindBuffer* buffer);
235  void unregisterDelayedOGLBindBuffer(uint32 id);
236  CE_DelayedOGLBindBuffer* lookupDelayedOGLBindBuffer( uint id );
237 
238  /// Returns true if the context supports querying for device and driver
239  /// UUIDs that are unique across APIs (allowing for example, to match Vulkan
240  /// device selection). This requires the cl_khr_device_uuid extension.
241  bool supportsUUID();
242  /// Writes the OpenCL device UUID. Check that supportsUUID() returns true
243  /// before trying to query the UUID.
244  void getDeviceUUID(cl_uchar (&uuid)[CL_UUID_SIZE_KHR]);
245  /// Writes the OpenCL driver UUID. Check that supportsUUID() returns true
246  /// before trying to query the UUID.
247  void getDriverUUID(cl_uchar (&uuid)[CL_UUID_SIZE_KHR]);
248 
249  /// Returns true if the context supports the creation of buffers backed by
250  /// external memory. Mainly for use in sharing buffers with Vulkan.
251  bool supportsExternalMemory();
252 
253  /// Create a buffer backed by external memory. The handle will be the reference
254  /// to the actual memory object, and its lifetime is externally managed. The
255  /// memory might be owned by another GPU API, such as Vulkan. In those cases,
256  /// that handle can be obtained by a call to
257  /// vkGetMemoryFdKHR or vkGetMemoryWin32HandleKHR
258  /// This requires a few extensions to be available. Check that the context
259  /// supports these by calling CE_Context::supportsExternalMemory first.
260  CE_ExternalBuffer createExternalMemoryBuffer(SYS_Handle handle, int64_t size, bool read=true, bool write=true);
261 
262  CE_ExternalImage createExternalImage(SYS_Handle handle,
263  const cl_image_format& format,
264  const cl_image_desc& image_desc,
265  bool read = true, bool write = true);
266 
267  /// Clear the CE_MemoryPool object.
268  void clearMemoryPool();
269 
270  /// Sweep the memory pool for in use buffers. We call this on every finish
271  /// call as well, but if we have a large sequence of operations that use
272  /// temporary buffers without a finish, it's a good idea to call this directly
273  /// occasionally.
274  void sweepInUseMemory();
275 
276  /// Return a pointer to pinned (page-locked) host memory. On some devices
277  /// (Nvidia), using this type of memory for the PCI/E host/device transfers
278  /// can double the throughput. Will return NULL if the memory can't be
279  /// allocated, or if the device is not a GPU.
280  fpreal32 *getPinnedBuffer(int64 size);
281 
282  cl::Buffer getXNoiseData();
283 
284  /// Standard error reporting for OpenCL exceptions. They should generally
285  /// take the form:
286  /// @code
287  /// try
288  /// {
289  /// OpenCL calls...
290  /// }
291  /// catch(cl::Error &err)
292  /// {
293  /// CE_Context::reportError(err);
294  /// ///cleanup
295  /// }
296  /// @endcode
297  /// This will not capture delayed errors, however. Instead
298  /// you will need to add a callback to intercept them.
299  static void reportError(const cl::Error &err);
300  static void outputErrorMessage(const char *err_msg);
301  static void setErrorCB(CE_ErrorCB callback, void *data);
302  static void outputWarningMessage(const char *err_msg);
303 
304  static void initMainSharedGLContext( int devicetype, void* context, void* display, const cl_uchar (&uuid)[16]);
305  static bool useHalfNormalDelayedBindBuffer();
306 
307  /// Marks that an operation has run out of memory, allowing us
308  /// to report elsewhere.
309  void setOutOfMemoryFailure(bool hasfailed = true) { myOutOfMemoryFailure = true; }
310  bool hasOutOfMemoryFailureHappened() const { return myOutOfMemoryFailure; }
311 
312  /// Combination of environment variable, but overriden to be false by known buggy drivers
313  static bool shouldUseOCLOGLInterop();
314 
315  /// This structure holds a device name, vendor, device number, and drive version with respect to
316  /// its vendor platform.
318  {
322  int number;
324  };
325  /// Get the vector of available devices of the given type.
326  static void getDevices(UT_Array<DeviceDescriptor>&, cl_device_type t);
327 
328  /// Get an index to the preferred/default device for the specified device
329  /// type and the list of available devices.
330  static int getDefaultDevice(
332 
333  /// Returns true if environment variables are set that override preferences.
334  static bool isEnvironmentOverride();
335 
336  // Queries the device by calling clGetDeviceInfo, but returning false and setting
337  // result to zero for unknown flags or flags that are disabled with environment
338  // variables.
339  template <class T>
340  static bool getDeviceInfoRestricted(cl_device_id device, cl_uint flag, T &result);
341 
342  /// Queries the current device give the specified flag using clGetDeviceInfo,
343  /// used by ocldeviceinfo EXPR function. Returns false for unrecognized flag.
344  bool getDeviceInfo(const char *flag, fpreal &result);
345 
346  /// This function returns the total size of addressable compute memory for
347  /// the current device.
348  size_t getAddressableMemory() const;
349 
350  /// Report memory usage
351  void reportUsage(std::ostream &os) const;
352 
353  /// Requests the specified amount of memory to be freed from the OpenCL
354  /// device and returns the amount actually freed.
355  int64 requestMemoryFromResource(int64 amount);
356  /// Returns the memory resource object for the OpenCL device. All objects
357  /// that use OpenCL memory should ideally be registered as clients for this
358  /// resource.
360  {
361  return myMemResource;
362  }
363 
364 protected:
365  cl::Program *doCompileProgram(const char *progtext, const char *options,
366  bool recompile);
367 
368  /// Initialize the context for the given device.
369  void init(cl::Context &context, cl::Device &device, bool gl_shared);
370 
371  /// Releases the pinned, page-locked memory buffer.
372  void releasePinnedBuffer();
373 
374 
380  bool myIsValid;
384 
385  struct KernelInfo
386  {
389  };
390 
393 
396 
397  // The pinned buffer is unique to the main thread.
400 
402 
404 
405  static void* theGLContext;
406  static void* theGLDisplay;
407  static int theGLDeviceType;
408  static char theGLDeviceUUID[16];
409 };
410 
411 /// NOTE: this function will retry if it fails on allocation failure, after
412 /// freeing some memory.
414 ce_enqueueKernel(const cl::CommandQueue& queue, const cl::Kernel &kernel,
415  const cl::NDRange &offset, const cl::NDRange &global, const cl::NDRange &local,
416  const std::vector<cl::Event>* events,
417  cl::Event* event);
418 
419 #endif
#define CE_API
Definition: CE_API.h:13
struct _cl_device_id * cl_device_id
Definition: cl.h:30
uint32_t cl_uint
Definition: cl_platform.h:261
bool has3DImageWriteSupport() const
Returns true if the OpenCL device supports writing to 3D image objects.
Definition: CE_Context.h:183
bool myIsValid
Definition: CE_Context.h:380
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition: glcorearb.h:2540
GLbitfield flags
Definition: glcorearb.h:1596
cl::Device getDevice() const
Returns the OpenCL Device object.
Definition: CE_Context.h:112
Unsorted map container.
Definition: UT_Map.h:109
*get result *(waiting if necessary)*A common idiom is to fire a bunch of sub tasks at the and then *wait for them to all complete We provide a helper class
Definition: thread.h:632
void
Definition: png.h:1083
GLboolean * data
Definition: glcorearb.h:131
bool myOutOfMemoryFailure
Definition: CE_Context.h:401
GLboolean GLboolean g
Definition: glcorearb.h:1222
UT_StringHolder platformVendor
Definition: CE_Context.h:321
bool mySupports3DImageWrites
Definition: CE_Context.h:382
UT_ErrorSeverity
Definition: UT_Error.h:25
int32_t cl_int
Definition: cl_platform.h:260
uint8_t cl_uchar
Definition: cl_platform.h:257
ceTraceCtx getTraceContext() const
Definition: CE_Context.h:114
CE_API cl_int ce_enqueueKernel(const cl::CommandQueue &queue, const cl::Kernel &kernel, const cl::NDRange &offset, const cl::NDRange &global, const cl::NDRange &local, const std::vector< cl::Event > *events, cl::Event *event)
**But if you need a result
Definition: thread.h:622
virtual ~CE_DelayedOGLBindBuffer()
Definition: CE_Context.h:36
Definition: Image.h:45
float fpreal32
Definition: SYS_Types.h:200
void * ceTraceCtx
Definition: CE_Tracing.h:59
GLuint buffer
Definition: glcorearb.h:660
cl::CommandQueue getQueue() const
Definition: CE_Context.h:109
#define CL_UUID_SIZE_KHR
Definition: CE_Context.h:80
struct _cl_event * event
Definition: glcorearb.h:2961
fpreal32 * myPinnedData
Definition: CE_Context.h:399
Event interface for cl_event.
Definition: cl.hpp:1647
cl::CommandQueue myQueue
Definition: CE_Context.h:376
bool isValid() const
Returns whether the CE_Context has been successfully initialized.
Definition: CE_Context.h:170
UT_StringHolder driverVersion
Definition: CE_Context.h:323
GLintptr offset
Definition: glcorearb.h:665
UT_Map< uint32, CE_DelayedOGLBindBuffer * > myDelayedOGLBindBuffers
Definition: CE_Context.h:403
ceTraceCtx myTraceCtx
Definition: CE_Context.h:378
cl_bitfield cl_device_type
Definition: cl.h:42
cl::CommandQueue myDeviceQueue
Definition: CE_Context.h:377
GLuint writeBuffer
Definition: glcorearb.h:2674
GLint GLint GLsizei GLint GLenum format
Definition: glcorearb.h:108
CE_MemoryPool * myMemPool
Definition: CE_Context.h:394
#define UT_NON_COPYABLE(CLASS)
Define deleted copy constructor and assignment operator inside a class.
*get result *(waiting if necessary)*A common idiom is to fire a bunch of sub tasks at the queue
Definition: thread.h:632
long long int64
Definition: SYS_Types.h:116
cl::Device myDevice
Definition: CE_Context.h:379
static void * theGLDisplay
Definition: CE_Context.h:406
void setOutOfMemoryFailure(bool hasfailed=true)
Definition: CE_Context.h:309
GLenum GLenum severity
Definition: glcorearb.h:2539
#define SYS_UINT32_MAX
Definition: SYS_Types.h:172
GLdouble t
Definition: glad.h:2397
GLsizeiptr size
Definition: glcorearb.h:664
GLenum GLenum dst
Definition: glcorearb.h:1793
cl::Context myContext
Definition: CE_Context.h:375
CommandQueue interface for cl_command_queue.
Definition: cl.hpp:2850
static void * theGLContext
Definition: CE_Context.h:405
cl_int getInfo(Func f, cl_uint name, T *param)
Definition: cl.hpp:1030
cl::Kernel loadKernel(const char *progname, const UT_StringRef &kernelname, const char *options=nullptr)
Definition: CE_Context.h:165
fpreal64 fpreal
Definition: SYS_Types.h:278
UT_MemoryResource * getMemoryResource() const
Definition: CE_Context.h:359
bool mySupportsDouble
Definition: CE_Context.h:381
Base class interface for all images.
Definition: cl.hpp:2098
unsigned int uint32
Definition: SYS_Types.h:40
Memory buffer interface.
Definition: cl.hpp:1867
cl::Buffer myXNoiseData
Definition: CE_Context.h:383
NDRange interface.
Definition: cl.hpp:2466
UT_StringHolder name
Definition: CE_Context.h:387
void(* CE_ErrorCB)(const char *errmsg, UT_ErrorSeverity severity, void *data)
Definition: CE_Context.h:29
Kernel interface that implements cl_kernel.
Definition: cl.hpp:2544
cl::Context getCLContext() const
Returns the underlying cl::Context object.
Definition: CE_Context.h:105
UT_MemoryResource * myMemResource
Definition: CE_Context.h:395
static int theGLDeviceType
Definition: CE_Context.h:407
UT_Map< const _cl_program *, UT_Array< KernelInfo > * > myKernelTable
Definition: CE_Context.h:392
Device interface for cl_device_id.
Definition: cl.hpp:1265
bool hasDoubleSupport() const
Returns true if the OpenCL device supports double precision.
Definition: CE_Context.h:181
Program interface that implements cl_program.
Definition: cl.hpp:2649
bool hasOutOfMemoryFailureHappened() const
Definition: CE_Context.h:310
unsigned int uint
Definition: SYS_Types.h:45
cl::Kernel * kernel
Definition: CE_Context.h:388
cl_bitfield cl_mem_flags
Definition: cl.h:66
Definition: format.h:1821
cl::Buffer myPinnedBuffer
Definition: CE_Context.h:398
UT_StringMap< cl::Program * > myProgramTable
Definition: CE_Context.h:391
GLenum src
Definition: glcorearb.h:1793