HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CE_Context.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: CE_Context.h ( CE Library, C++)
7  *
8  * COMMENTS: Compute Engine Contexts.
9  */
10 
11 #ifndef __CE_Context__
12 #define __CE_Context__
13 
14 #include "CE_API.h"
15 
16 #ifdef CE_ENABLED
17 
18 #include "CE_Tracing.h"
19 
20 #include <UT/UT_Array.h>
21 #include <UT/UT_Error.h>
22 #include <UT/UT_Map.h>
23 #include <UT/UT_NonCopyable.h>
24 #include <UT/UT_StringMap.h>
25 #include <SYS/SYS_Types.h>
26 
27 class CE_MemoryPool;
28 
29 typedef void (*CE_ErrorCB)(const char *errmsg, UT_ErrorSeverity severity,
30  void *data);
31 
33 {
34 public:
37 
39 
40  virtual void rebindOGLBuffer( uint buf_obj ) = 0;
41  virtual void unbindOGLBuffer() = 0;
42  virtual bool isBinded() = 0;
43 };
44 
45 /// CE_Context encapsulates the OpenCL context and provides various convenience
46 /// functions for loading kernel programs and allocating GPU memory.
48 {
49 public:
50  CE_Context();
51  virtual ~CE_Context();
52 
53  UT_NON_COPYABLE(CE_Context)
54 
55  /// Returns a pointer to the singleton CE_Context object. This function
56  /// attempts to initialize OpenCL if it has not yet been.
57  /// gl_shared should be true if the context will be expected to interoperate
58  /// with the OpenGL context. If both gl_shared and shared_fallback are true,
59  /// then the function will try to make an unshared context in case the
60  /// shared context fails to create.
61  static CE_Context *getContext(bool gl_shared = true,
62  bool shared_fallback = true);
63  /// Returns true if interoperability between CL and GL is possible.
64  static bool isGLSharingPossible();
65 
66  /// Returns the underlying cl::Context object.
67  cl::Context getCLContext() const {return myContext;}
68 
69  /// Returns the cl::Queue object that is used to enqueue OpenCL kernels
70  /// and memory transfers.
71  cl::CommandQueue getQueue() const {return myQueue;}
72 
73  /// Returns the OpenCL Device object.
74  cl::Device getDevice() const {return myDevice;}
75 
76  ceTraceCtx getTraceContext() const {return myTraceCtx;}
77 
78  // Write OpenCL Device info to the supplied buffer.
79  static void getInfo(const cl::Device &device, UT_WorkBuffer &buffer );
80  static void getExtendedInfo(const cl::Device &device, UT_WorkBuffer &buffer );
81 
82  // Write info for all available OpenCL platforms to the supplied buffer.
83  static void getAllPlatformsInfo(UT_WorkBuffer &buffer);
84 
85  /// Get the suggested global and local ranges for the given 1-D kernel over
86  /// the specified number of items.
87  void get1DRanges(const cl::Kernel &k, size_t items,
89 
90  /// Get the maximum workgroup size for the given kernel.
91  size_t getMaxWorkgroupSize(const cl::Kernel &k);
92  /// Get the array of maximum work items along each dimension supported by the
93  /// compute device.
94  std::vector<size_t> getMaxWorkItemSizes();
95 
96  /// Round up a provided group size to a larger clean one as some
97  /// driers die with prime-based groups sizes.
98  /// Less than 1024 is raised to next power of 2, greater is to a multiple
99  /// of 1024.
100  static size_t roundUpGroupSize(size_t gsize);
101 
102  /// Loads the OpenCL program specified by progname. This functions searches
103  /// for the file in the HOUDINI_OCL_PATH environment variable. Any compile-
104  /// time options can be passed in the options parameter. If the program
105  /// load succeeds, the progname will be cached, using the progrname and
106  /// options strings together as a hash value lookup. In this way the same
107  /// OpenCL program can be loaded several times with different compile-time
108  /// flags.
109  cl::Program loadProgram(const char *progname, const char *options = NULL,
110  bool recompile = false);
111  cl::Program compileProgram(const char *progtext, const char *options = NULL,
112  bool recompile = false);
113 
114  /// Create an OpenCL kernel named kernelname from the program specified by
115  /// progname. For some types of devices these kernels will be cached, as
116  /// kernels can be expensive to create. This is the recommended method
117  /// for creating kernels.
118  cl::Kernel loadKernel(const cl::Program &prog, const UT_StringRef &kernelname);
119  cl::Kernel loadKernel(const char *progname, const UT_StringRef &kernelname,
120  const char *options = NULL)
121  { return loadKernel(loadProgram(progname, options), kernelname); }
122 
123  /// Returns whether the CE_Context has been successfully initialized.
124  bool isValid() const {return myIsValid;}
125 
126  /// Returns whether the singleton CE_Context has been initialized yet. This
127  /// can be used to test whether OpenCL has been initialized without calling
128  /// getContext and forcing an attempt at initialization.
129  static bool isInitialized(bool gl_shared=false);
130 
131  /// Returns true if the OpenCL device is running on the CPU.
132  bool isCPU() const;
133 
134  /// Returns true if the OpenCL device supports double precision.
135  bool hasDoubleSupport() const {return mySupportsDouble;}
136  /// Returns true if the OpenCL device supports writing to 3D image objects.
137  bool has3DImageWriteSupport() const {return mySupports3DImageWrites;}
138 
139  /// Block until any outstanding kernel or memory transfers on the main
140  /// CommandQueue have executed. If sweepPool is true, the context's
141  /// CE_MemoryPool will sweep for any buffers that were in use when their
142  /// CE_Grid's went out of scope, but that were still active in kernels.
143  void finish(bool sweepPool=true);
144 
145  /// Allocate a buffer of specified size on the CE_Device.
146  /// usePool= true, attempts to use the underlying CE_MemoryPool to possibly return
147  /// an already allocated, unused buffer.
148  /// read=true, creates a buffer that is readable inside kernels.
149  /// write=true, creates a buffer that is writable inside kernels.
150  /// ogl_bind, specifies an OGL buffer to bind to.
151  cl::Buffer allocBuffer(int64 size, bool usePool=true, bool read=true, bool write=true, uint32 ogl_bind=SYS_UINT32_MAX);
152 
153  /// Release the specified buffer, possibly to the CE_MemoryPool.
154  void releaseBuffer(cl::Buffer &&buf);
155 
156  /// Read the specified number of bytes from the buffer.
157  void readBuffer(const cl::Buffer &buf, size_t size, void *p, bool blocking = true, size_t offset = 0);
158 
159  /// Write the specified number of bytes to the buffer.
160  void writeBuffer(const cl::Buffer &buf, size_t size, const void *p, bool blocking = true, size_t offset = 0);
161 
162  /// Enqueue the kernel over the provided ranges.
163  void enqueueKernel(const cl::Kernel &kernel, const cl::NDRange &global, const cl::NDRange &local);
164 
165  /// Keep a map buffer to bind at render time
166  /// The first time a CL::Buffer is created it can be registered to rebing to a OGL vertex buffer at drawing time.
167  /// The uint returned by the register call can be attached to a detail attribute and the drawing code can convert
168  /// the CL Buffer to a CL BufferGL.
169  uint32 registerDelayedOGLBindBuffer(CE_DelayedOGLBindBuffer* buffer);
170  void unregisterDelayedOGLBindBuffer(uint32 id);
171  CE_DelayedOGLBindBuffer* lookupDelayedOGLBindBuffer( uint id );
172 
173  /// Clear the CE_MemoryPool object.
174  void clearMemoryPool();
175 
176  /// Return a pointer to pinned (page-locked) host memory. On some devices
177  /// (Nvidia), using this type of memory for the PCI/E host/device transfers
178  /// can double the throughput. Will return NULL if the memory can't be
179  /// allocated, or if the device is not a GPU.
180  fpreal32 *getPinnedBuffer(int64 size);
181 
182  cl::Buffer getXNoiseData();
183 
184  /// Standard error reporting for OpenCL exceptions. They should generally
185  /// take the form:
186  /// @code
187  /// try
188  /// {
189  /// OpenCL calls...
190  /// }
191  /// catch(cl::Error &err)
192  /// {
193  /// CE_Context::reportError(err);
194  /// ///cleanup
195  /// }
196  /// @endcode
197  /// This will not capture delayed errors, however. Instead
198  /// you will need to add a callback to intercept them.
199  static void reportError(const cl::Error &err);
200  static void outputErrorMessage(const char *errMsg);
201  static void setErrorCB(CE_ErrorCB callback, void *data);
202  static void outputWarningMessage(const char *errMsg);
203 
204  static void initMainSharedGLContext( int devicetype, void* context, void* display );
205  static bool useHalfNormalDelayedBindBuffer();
206 
207  /// Marks that an operation has run out of memory, allowing us
208  /// to report elsewhere.
209  void setOutOfMemoryFailure(bool hasfailed = true) { myOutOfMemoryFailure = true; }
210  bool hasOutOfMemoryFailureHappened() const { return myOutOfMemoryFailure; }
211 
212  /// This structure holds a device name, vendor, and device number with respect to
213  /// its vendor platform.
215  {
219  int number;
220  };
221  /// Get the vector of available devices of the given type.
222  static void getDevices(UT_Array<DeviceDescriptor>&, cl_device_type t);
223 
224  /// Get an index to the preferred/default device for the specified device
225  /// type and the list of available devices.
226  static int getDefaultDevice(
228 
229  /// Returns true if environment variables are set that override preferences.
230  static bool isEnvironmentOverride();
231 
232  // Queries the device by calling clGetDeviceInfo, but returning false and setting
233  // result to zero for unknown flags or flags that are disabled with environment
234  // variables.
235  template <class T>
236  static bool getDeviceInfoRestricted(cl_device_id device, cl_uint flag, T &result);
237 
238  /// Queries the current device give the specified flag using clGetDeviceInfo,
239  /// used by ocldeviceinfo EXPR function. Returns false for unrecognized flag.
240  bool getDeviceInfo(const char *flag, fpreal &result);
241 
242 protected:
243  cl::Program *doCompileProgram(const char *progtext, const char *options);
244 
245  /// Initialize the context for the given device.
246  void init(cl::Context &context, cl::Device &device);
247 
248  /// Releases the pinned, page-locked memory buffer.
249  void releasePinnedBuffer();
250 
251 
257  bool myIsValid;
261 
262  struct KernelInfo
263  {
266  };
267 
270 
272 
273  // The pinned buffer is unique to the main thread.
276 
278 
280 
281  static void* theGLContext;
282  static void* theGLDisplay;
283  static int theGLDeviceType;
284 };
285 
287 ce_enqueueKernel(const cl::CommandQueue& queue, const cl::Kernel &kernel,
288  const cl::NDRange &offset, const cl::NDRange &global, const cl::NDRange &local,
289  const std::vector<cl::Event>* events,
290  cl::Event* event);
291 
292 #endif
293 #endif
294 
#define CE_API
Definition: CE_API.h:10
struct _cl_device_id * cl_device_id
Definition: cl.h:30
uint32_t cl_uint
Definition: cl_platform.h:261
bool has3DImageWriteSupport() const
Returns true if the OpenCL device supports writing to 3D image objects.
Definition: CE_Context.h:137
bool myIsValid
Definition: CE_Context.h:257
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition: glcorearb.h:2540
cl::Device getDevice() const
Returns the OpenCL Device object.
Definition: CE_Context.h:74
Unsorted map container.
Definition: UT_Map.h:107
*get result *(waiting if necessary)*A common idiom is to fire a bunch of sub tasks at the and then *wait for them to all complete We provide a helper class
Definition: thread.h:623
void
Definition: png.h:1083
GLboolean * data
Definition: glcorearb.h:131
bool myOutOfMemoryFailure
Definition: CE_Context.h:277
GLboolean GLboolean g
Definition: glcorearb.h:1222
cl::Kernel loadKernel(const char *progname, const UT_StringRef &kernelname, const char *options=NULL)
Definition: CE_Context.h:119
bool mySupports3DImageWrites
Definition: CE_Context.h:259
UT_ErrorSeverity
Definition: UT_Error.h:25
int32_t cl_int
Definition: cl_platform.h:260
ceTraceCtx getTraceContext() const
Definition: CE_Context.h:76
CE_API cl_int ce_enqueueKernel(const cl::CommandQueue &queue, const cl::Kernel &kernel, const cl::NDRange &offset, const cl::NDRange &global, const cl::NDRange &local, const std::vector< cl::Event > *events, cl::Event *event)
**But if you need a result
Definition: thread.h:613
virtual ~CE_DelayedOGLBindBuffer()
Definition: CE_Context.h:36
float fpreal32
Definition: SYS_Types.h:200
void read(T &in, bool &v)
Definition: ImfXdr.h:502
void * ceTraceCtx
Definition: CE_Tracing.h:59
cl::CommandQueue getQueue() const
Definition: CE_Context.h:71
struct _cl_event * event
Definition: glcorearb.h:2961
fpreal32 * myPinnedData
Definition: CE_Context.h:275
Event interface for cl_event.
Definition: cl.hpp:1647
cl::CommandQueue myQueue
Definition: CE_Context.h:253
bool isValid() const
Returns whether the CE_Context has been successfully initialized.
Definition: CE_Context.h:124
GLintptr offset
Definition: glcorearb.h:665
UT_Map< uint32, CE_DelayedOGLBindBuffer * > myDelayedOGLBindBuffers
Definition: CE_Context.h:279
Definition: core.h:760
ceTraceCtx myTraceCtx
Definition: CE_Context.h:255
cl_bitfield cl_device_type
Definition: cl.h:42
cl::CommandQueue myDeviceQueue
Definition: CE_Context.h:254
GLuint writeBuffer
Definition: glcorearb.h:2674
CE_MemoryPool * myMemPool
Definition: CE_Context.h:271
#define UT_NON_COPYABLE(CLASS)
Define deleted copy constructor and assignment operator inside a class.
*get result *(waiting if necessary)*A common idiom is to fire a bunch of sub tasks at the queue
Definition: thread.h:623
long long int64
Definition: SYS_Types.h:116
cl::Device myDevice
Definition: CE_Context.h:256
static void * theGLDisplay
Definition: CE_Context.h:282
void setOutOfMemoryFailure(bool hasfailed=true)
Definition: CE_Context.h:209
GLenum GLenum severity
Definition: glcorearb.h:2539
#define SYS_UINT32_MAX
Definition: SYS_Types.h:172
GLdouble t
Definition: glad.h:2397
GLsizeiptr size
Definition: glcorearb.h:664
cl::Context myContext
Definition: CE_Context.h:252
CommandQueue interface for cl_command_queue.
Definition: cl.hpp:2850
static void * theGLContext
Definition: CE_Context.h:281
cl_int getInfo(Func f, cl_uint name, T *param)
Definition: cl.hpp:1030
fpreal64 fpreal
Definition: SYS_Types.h:277
bool mySupportsDouble
Definition: CE_Context.h:258
unsigned int uint32
Definition: SYS_Types.h:40
Memory buffer interface.
Definition: cl.hpp:1867
cl::Buffer myXNoiseData
Definition: CE_Context.h:260
NDRange interface.
Definition: cl.hpp:2466
UT_StringHolder name
Definition: CE_Context.h:264
void(* CE_ErrorCB)(const char *errmsg, UT_ErrorSeverity severity, void *data)
Definition: CE_Context.h:29
Kernel interface that implements cl_kernel.
Definition: cl.hpp:2544
cl::Context getCLContext() const
Returns the underlying cl::Context object.
Definition: CE_Context.h:67
static int theGLDeviceType
Definition: CE_Context.h:283
UT_Map< const _cl_program *, UT_Array< KernelInfo > * > myKernelTable
Definition: CE_Context.h:269
Device interface for cl_device_id.
Definition: cl.hpp:1265
bool hasDoubleSupport() const
Returns true if the OpenCL device supports double precision.
Definition: CE_Context.h:135
void write(T &out, bool v)
Definition: ImfXdr.h:287
Program interface that implements cl_program.
Definition: cl.hpp:2649
bool hasOutOfMemoryFailureHappened() const
Definition: CE_Context.h:210
unsigned int uint
Definition: SYS_Types.h:45
cl::Kernel * kernel
Definition: CE_Context.h:265
Definition: format.h:895
cl::Buffer myPinnedBuffer
Definition: CE_Context.h:274
UT_StringMap< cl::Program * > myProgramTable
Definition: CE_Context.h:268