HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CE_Context.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: CE_Context.h ( CE Library, C++)
7  *
8  * COMMENTS: Compute Engine Contexts.
9  */
10 
11 #ifndef __CE_Context__
12 #define __CE_Context__
13 
14 #include "CE_API.h"
15 
16 #ifdef CE_ENABLED
17 
18 #include <SYS/SYS_Types.h>
19 
20 #include <UT/UT_StringMap.h>
21 #include <UT/UT_Map.h>
22 #include <UT/UT_Error.h>
23 #include <UT/UT_Array.h>
24 
25 #include "CE_Tracing.h"
26 
27 class CE_MemoryPool;
28 
29 typedef void (*CE_ErrorCB)(const char *errmsg, UT_ErrorSeverity severity,
30  void *data);
31 
33 {
34 public:
37 
38  virtual void rebindOGLBuffer( uint buf_obj ) = 0;
39  virtual void unbindOGLBuffer() = 0;
40  virtual bool isBinded() = 0;
41 };
42 
43 /// CE_Context encapsulates the OpenCL context and provides various convenience
44 /// functions for loading kernel programs and allocating GPU memory.
46 {
47 public:
48  CE_Context();
49  virtual ~CE_Context();
50 
51  /// Returns a pointer to the singleton CE_Context object. This function
52  /// attempts to initialize OpenCL if it has not yet been.
53  /// gl_shared should be true if the context will be expected to interoperate
54  /// with the OpenGL context. If both gl_shared and shared_fallback are true,
55  /// then the function will try to make an unshared context in case the
56  /// shared context fails to create.
57  static CE_Context *getContext(bool gl_shared = true,
58  bool shared_fallback = true);
59  /// Returns true if interoperability between CL and GL is possible.
60  static bool isGLSharingPossible();
61 
62  /// Returns the underlying cl::Context object.
63  cl::Context getCLContext() const {return myContext;}
64 
65  /// Returns the cl::Queue object that is used to enqueue OpenCL kernels
66  /// and memory transfers.
67  cl::CommandQueue getQueue() const {return myQueue;}
68 
69  /// Returns the OpenCL Device object.
70  cl::Device getDevice() const {return myDevice;}
71 
72  ceTraceCtx getTraceContext() const {return myTraceCtx;}
73 
74  // Write OpenCL Device info to the supplied buffer.
75  static void getInfo(const cl::Device &device, UT_WorkBuffer &buffer );
76  static void getExtendedInfo(const cl::Device &device, UT_WorkBuffer &buffer );
77 
78  // Write info for all available OpenCL platforms to the supplied buffer.
79  static void getAllPlatformsInfo(UT_WorkBuffer &buffer);
80 
81  /// Get the suggested global and local ranges for the given 1-D kernel over
82  /// the specified number of items.
83  void get1DRanges(const cl::Kernel &k, size_t items,
85 
86  /// Get the maximum workgroup size for the given kernel.
87  size_t getMaxWorkgroupSize(const cl::Kernel &k);
88 
89  /// Loads the OpenCL program specified by progname. This functions searches
90  /// for the file in the HOUDINI_OCL_PATH environment variable. Any compile-
91  /// time options can be passed in the options parameter. If the program
92  /// load succeeds, the progname will be cached, using the progrname and
93  /// options strings together as a hash value lookup. In this way the same
94  /// OpenCL program can be loaded several times with different compile-time
95  /// flags.
96  cl::Program loadProgram(const char *progname, const char *options = NULL,
97  bool recompile = false);
98  cl::Program compileProgram(const char *progtext, const char *options = NULL,
99  bool recompile = false);
100 
101  /// Create an OpenCL kernel named kernelname from the program specified by
102  /// progname. For some types of devices these kernels will be cached, as
103  /// kernels can be expensive to create. This is the recommended method
104  /// for creating kernels.
105  cl::Kernel loadKernel(const cl::Program &prog, const UT_StringRef &kernelname);
106  cl::Kernel loadKernel(const char *progname, const UT_StringRef &kernelname,
107  const char *options = NULL)
108  { return loadKernel(loadProgram(progname, options), kernelname); }
109 
110  /// Returns whether the CE_Context has been successfully initialized.
111  bool isValid() const {return myIsValid;}
112 
113  /// Returns whether the singleton CE_Context has been initialized yet. This
114  /// can be used to test whether OpenCL has been initialized without calling
115  /// getContext and forcing an attempt at initialization.
116  static bool isInitialized(bool gl_shared=false);
117 
118  /// Returns true if the OpenCL device is running on the CPU.
119  bool isCPU() const;
120 
121  /// Returns true if the OpenCL device supports double precision.
122  bool hasDoubleSupport() const {return mySupportsDouble;}
123  /// Returns true if the OpenCL device supports writing to 3D image objects.
124  bool has3DImageWriteSupport() const {return mySupports3DImageWrites;}
125 
126  /// Block until any outstanding kernel or memory transfers on the main
127  /// CommandQueue have executed. If sweepPool is true, the context's
128  /// CE_MemoryPool will sweep for any buffers that were in use when their
129  /// CE_Grid's went out of scope, but that were still active in kernels.
130  void finish(bool sweepPool=true);
131 
132  /// Allocate a buffer of specified size on the CE_Device.
133  /// usePool= true, attempts to use the underlying CE_MemoryPool to possibly return
134  /// an already allocated, unused buffer.
135  /// read=true, creates a buffer that is readable inside kernels.
136  /// write=true, creates a buffer that is writable inside kernels.
137  /// ogl_bind, specifies an OGL buffer to bind to.
138  cl::Buffer allocBuffer(int64 size, bool usePool=true, bool read=true, bool write=true, uint32 ogl_bind=SYS_UINT32_MAX);
139 
140  /// Release the specified buffer, possibly to the CE_MemoryPool.
141  void releaseBuffer(cl::Buffer &&buf);
142 
143  /// Read the specified number of bytes from the buffer.
144  void readBuffer(const cl::Buffer &buf, size_t size, void *p, bool blocking = true, size_t offset = 0);
145 
146  /// Write the specified number of bytes to the buffer.
147  void writeBuffer(const cl::Buffer &buf, size_t size, const void *p, bool blocking = true, size_t offset = 0);
148 
149  /// Enqueue the kernel over the provided ranges.
150  void enqueueKernel(const cl::Kernel &kernel, const cl::NDRange &global, const cl::NDRange &local);
151 
152  /// Keep a map buffer to bind at render time
153  /// The first time a CL::Buffer is created it can be registered to rebing to a OGL vertex buffer at drawing time.
154  /// The uint returned by the register call can be attached to a detail attribute and the drawing code can convert
155  /// the CL Buffer to a CL BufferGL.
156  uint32 registerDelayedOGLBindBuffer(CE_DelayedOGLBindBuffer* buffer);
157  void unregisterDelayedOGLBindBuffer(uint32 id);
158  CE_DelayedOGLBindBuffer* lookupDelayedOGLBindBuffer( uint id );
159 
160  /// Clear the CE_MemoryPool object.
161  void clearMemoryPool();
162 
163  /// Return a pointer to pinned (page-locked) host memory. On some devices
164  /// (Nvidia), using this type of memory for the PCI/E host/device transfers
165  /// can double the throughput. Will return NULL if the memory can't be
166  /// allocated, or if the device is not a GPU.
167  fpreal32 *getPinnedBuffer(int64 size);
168 
169  cl::Buffer getXNoiseData();
170 
171  /// Standard error reporting for OpenCL exceptions. They should generally
172  /// take the form:
173  /// @code
174  /// try
175  /// {
176  /// OpenCL calls...
177  /// }
178  /// catch(cl::Error &err)
179  /// {
180  /// CE_Context::reportError(err);
181  /// ///cleanup
182  /// }
183  /// @endcode
184  /// This will not capture delayed errors, however. Instead
185  /// you will need to add a callback to intercept them.
186  static void reportError(const cl::Error &err);
187  static void outputErrorMessage(const char *errMsg);
188  static void setErrorCB(CE_ErrorCB callback, void *data);
189  static void outputWarningMessage(const char *errMsg);
190 
191  static void initMainSharedGLContext( int devicetype, void* context, void* display );
192  static bool useHalfNormalDelayedBindBuffer();
193 
194  /// Marks that an operation has run out of memory, allowing us
195  /// to report elsewhere.
196  void setOutOfMemoryFailure(bool hasfailed = true) { myOutOfMemoryFailure = true; }
197  bool hasOutOfMemoryFailureHappened() const { return myOutOfMemoryFailure; }
198 
199  /// This structure holds a device name, vendor, and device number with respect to
200  /// its vendor platform.
202  {
206  int number;
207  };
208  /// Get the vector of available devices of the given type.
209  static void getDevices(UT_Array<DeviceDescriptor>&, cl_device_type t);
210 
211  /// Get an index to the preferred/default device for the specified device
212  /// type and the list of available devices.
213  static int getDefaultDevice(
215 
216  /// Returns true if environment variables are set that override preferences.
217  static bool isEnvironmentOverride();
218 
219  // Queries the device by calling clGetDeviceInfo, but returning false and setting
220  // result to zero for unknown flags or flags that are disabled with environment
221  // variables.
222  template <class T>
223  static bool getDeviceInfoRestricted(cl_device_id device, cl_uint flag, T &result);
224 
225  /// Queries the current device give the specified flag using clGetDeviceInfo,
226  /// used by ocldeviceinfo EXPR function. Returns false for unrecognized flag.
227  bool getDeviceInfo(const char *flag, fpreal &result);
228 
229 protected:
230  cl::Program *doCompileProgram(const char *progtext, const char *options);
231 
232  /// Initialize the context for the given device.
233  void init(cl::Context &context, cl::Device &device);
234 
235  /// Releases the pinned, page-locked memory buffer.
236  void releasePinnedBuffer();
237 
238 
244  bool myIsValid;
248 
249  struct KernelInfo
250  {
253  };
254 
257 
259 
260  // The pinned buffer is unique to the main thread.
263 
265 
267 
268  static void* theGLContext;
269  static void* theGLDisplay;
270  static int theGLDeviceType;
271 };
272 
273 CE_API cl_int
274 ce_enqueueKernel(const cl::CommandQueue& queue, const cl::Kernel &kernel,
275  const cl::NDRange &offset, const cl::NDRange &global, const cl::NDRange &local,
276  const std::vector<cl::Event>* events,
277  cl::Event* event);
278 
279 #endif
280 #endif
281 
#define CE_API
Definition: CE_API.h:10
struct _cl_device_id * cl_device_id
Definition: cl.h:42
bool has3DImageWriteSupport() const
Returns true if the OpenCL device supports writing to 3D image objects.
Definition: CE_Context.h:124
bool myIsValid
Definition: CE_Context.h:244
cl::Device getDevice() const
Returns the OpenCL Device object.
Definition: CE_Context.h:70
Unsorted map container.
Definition: UT_Map.h:83
void
Definition: png.h:1083
bool myOutOfMemoryFailure
Definition: CE_Context.h:264
GLbitfield GLuint readBuffer
Definition: glew.h:13270
GLboolean GLboolean g
Definition: glcorearb.h:1221
cl::Kernel loadKernel(const char *progname, const UT_StringRef &kernelname, const char *options=NULL)
Definition: CE_Context.h:106
bool mySupports3DImageWrites
Definition: CE_Context.h:246
UT_ErrorSeverity
Definition: UT_Error.h:25
ceTraceCtx getTraceContext() const
Definition: CE_Context.h:72
CE_API cl_int ce_enqueueKernel(const cl::CommandQueue &queue, const cl::Kernel &kernel, const cl::NDRange &offset, const cl::NDRange &global, const cl::NDRange &local, const std::vector< cl::Event > *events, cl::Event *event)
virtual ~CE_DelayedOGLBindBuffer()
Definition: CE_Context.h:36
float fpreal32
Definition: SYS_Types.h:200
void read(T &in, bool &v)
Definition: ImfXdr.h:611
void * ceTraceCtx
Definition: CE_Tracing.h:56
GLdouble GLdouble t
Definition: glew.h:1403
GLuint buffer
Definition: glcorearb.h:659
GLdouble l
Definition: glew.h:9164
cl::CommandQueue getQueue() const
Definition: CE_Context.h:67
struct _cl_event * event
Definition: glcorearb.h:2960
GLsizeiptr size
Definition: glcorearb.h:663
GLuint writeBuffer
Definition: glcorearb.h:2673
fpreal32 * myPinnedData
Definition: CE_Context.h:262
Event interface for cl_event.
Definition: cl.hpp:1645
cl::CommandQueue myQueue
Definition: CE_Context.h:240
bool isValid() const
Returns whether the CE_Context has been successfully initialized.
Definition: CE_Context.h:111
GLuint64EXT * result
Definition: glew.h:14311
UT_Map< uint32, CE_DelayedOGLBindBuffer * > myDelayedOGLBindBuffers
Definition: CE_Context.h:266
ceTraceCtx myTraceCtx
Definition: CE_Context.h:242
cl_bitfield cl_device_type
Definition: cl.h:53
cl::CommandQueue myDeviceQueue
Definition: CE_Context.h:241
CE_MemoryPool * myMemPool
Definition: CE_Context.h:258
*get result *(waiting if necessary)*A common idiom is to fire a bunch of sub tasks at the queue
Definition: thread.h:629
long long int64
Definition: SYS_Types.h:116
cl::Device myDevice
Definition: CE_Context.h:243
GLfloat GLfloat p
Definition: glew.h:16656
static void * theGLDisplay
Definition: CE_Context.h:269
void setOutOfMemoryFailure(bool hasfailed=true)
Definition: CE_Context.h:196
GLenum GLenum severity
Definition: glcorearb.h:2538
#define SYS_UINT32_MAX
Definition: SYS_Types.h:172
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition: glcorearb.h:2539
GLboolean * data
Definition: glcorearb.h:130
cl::Context myContext
Definition: CE_Context.h:239
CommandQueue interface for cl_command_queue.
Definition: cl.hpp:2847
static void * theGLContext
Definition: CE_Context.h:268
cl_int getInfo(Func f, cl_uint name, T *param)
Definition: cl.hpp:1028
fpreal64 fpreal
Definition: SYS_Types.h:277
bool mySupportsDouble
Definition: CE_Context.h:245
unsigned int uint32
Definition: SYS_Types.h:40
Memory buffer interface.
Definition: cl.hpp:1865
cl::Buffer myXNoiseData
Definition: CE_Context.h:247
NDRange interface.
Definition: cl.hpp:2463
UT_StringHolder name
Definition: CE_Context.h:251
void(* CE_ErrorCB)(const char *errmsg, UT_ErrorSeverity severity, void *data)
Definition: CE_Context.h:29
Kernel interface that implements cl_kernel.
Definition: cl.hpp:2541
cl::Context getCLContext() const
Returns the underlying cl::Context object.
Definition: CE_Context.h:63
GLintptr offset
Definition: glcorearb.h:664
static int theGLDeviceType
Definition: CE_Context.h:270
UT_Map< const _cl_program *, UT_Array< KernelInfo > * > myKernelTable
Definition: CE_Context.h:256
Device interface for cl_device_id.
Definition: cl.hpp:1263
bool hasDoubleSupport() const
Returns true if the OpenCL device supports double precision.
Definition: CE_Context.h:122
void write(T &out, bool v)
Definition: ImfXdr.h:332
Program interface that implements cl_program.
Definition: cl.hpp:2646
bool hasOutOfMemoryFailureHappened() const
Definition: CE_Context.h:197
unsigned int uint
Definition: SYS_Types.h:45
cl::Kernel * kernel
Definition: CE_Context.h:252
cl::Buffer myPinnedBuffer
Definition: CE_Context.h:261
UT_StringMap< cl::Program * > myProgramTable
Definition: CE_Context.h:255