HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CE_Grid.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: CE_Grid.h ( CE Library, C++)
7  *
8  * COMMENTS: Compute Engine Grid.
9  */
10 
11 #ifndef __CE_Grid__
12 #define __CE_Grid__
13 
14 #include "CE_API.h"
15 
16 #ifdef CE_ENABLED
17 
18 #include <UT/UT_VoxelArray.h>
19 #include <UT/UT_String.h>
20 
21 /// This class represents a 3-dimensional array of float values stored on an
22 /// OpenCL device. It is roughly analagous to UT_VoxelArray, although it does
23 /// not support tiling. There are various convenience functions for simplifying
24 /// invocations of OpenCL kernels that operate on grids of values.
25 ///
26 /// CE_Grid supports an arbitrary number of "ghost" cells, which
27 /// are cells at the boundary edges of the grid that can be set according to
28 /// different UT_VoxelBorderType values, as well as arbitrary number of
29 /// "padding" cells. The padding cells can be used to ensure that several
30 /// grids with slightly different resolutions can nonetheless
31 /// have indentical x, y, and z stride values. This makes writing
32 /// kernels that operate on grids that represent face-sampled (MAC) vector
33 /// array much simpler, since the index into the different grids need only be
34 /// calculated once.
35 ///
36 /// Invoking an OpenCL kernel typically involves one of the bind() functions to
37 /// bind the kernel to the range of work items in the grid, assuming one OpenCL
38 /// work item per voxel. There are three different variants of bind(), the
39 /// default assumes the grid is always 3-dimensional; bind2D() which assumes the
40 /// grid has a 2-dimensional axis, and bind2D3D(), which will treat a
41 /// 3-dimensional grid normally, but flatten a 2-d grid for optimal performance
42 /// on OpenCL devices that prefer a large number of work items in the first
43 /// dimension. This last function is the preferred method to call if the grid
44 /// might represent 2D or 3D data.
45 ///
46 /// A simple OpenCL kernel that doubles every value in a CE_Grid might look
47 /// like:
48 /// @code
49 /// void __kernel doubleit(__global float *grid,
50 /// uint offset, uint xstride, uint ystride, uint zstride)
51 /// {
52 /// size_t idx = offset + get_global_id(0) * xstride +
53 /// get_global_id(1) * ystride +
54 /// get_global_id(2) * zstride;
55 /// grid[idx] *= 2;
56 /// }
57 /// @endcode
58 ///
59 /// Because this kernel takes x, y, and z strides, it can be used for 2D and 3D
60 /// data: in the 2D case zstride will always be 0.
61 /// Invoking this kernel then typically looks like:
62 /// @code
63 /// CE_Context *context = CE_Context::getContext();
64 /// cl::Program prog = context->loadProgram("mykernels.cl");
65 /// cl::KernelFunctor doubleit = myGrid.bind2D3D(prog, "doubleit");
66 /// doubleit(myGrid.buffer(), myGrid.getOffset(),
67 /// myGrid.getXStride2D3D(),
68 /// myGrid.getYStride2D3D(),
69 /// myGrid.getZStride2D3D());
70 /// @endcode
72 {
73 public:
74  CE_Grid();
75  CE_Grid(const CE_Grid &src);
76  virtual ~CE_Grid();
77 
78  /// Return the underlying OpenCL buffer that can be used in kernel
79  /// invocations. It allocates this buffer only on demand.
80  const cl::Buffer &buffer() const
81  {
82  if (hasBuffer())
83  return myBuffer;
84  return allocBuffer();
85  }
86 
87  /// If the current OpenCL buffer is valid.
88  bool hasBuffer() const {return (myBuffer() != 0);}
89 
90  /// Size the grid as specified. Note this does not actually allocate
91  /// memory on the OpenCL device.
92  void size(int xres, int yres, int zres,
93  int xghost = 1, int yghost = 1, int zghost = 1,
94  int xpad = 1, int ypad = 1, int zpad = 1);
95 
96  int getXRes() const { return myRes[0]; }
97  int getYRes() const { return myRes[1]; }
98  int getZRes() const { return myRes[2]; }
99 
100  int getRes(int dim) const { return myRes[dim]; }
101  UT_Vector3I getRes() const { return myRes; }
102 
103  UT_Vector3I getGhostRes() const { return myGhostCells; }
104  int getGhostRes(int dim) const { return myGhostCells[dim]; }
105 
106  UT_Vector3I getPadding() const { return myPadding; }
107  int getPadding(int dim) const { return myPadding[dim]; }
108 
109  /// Returns the offset from the beginning of the buffer to the beginning
110  /// of actual data.
111  int getOffset() const
112  {
113  return myGhostCells[0] +
114  myGhostCells[1] * myStrides[1] +
115  myGhostCells[2] * myStrides[2];
116  }
117  /// Returns the offset from the beginning of the buffer to the beginning
118  /// of the data including ghost cells.
119  int getGhostOffset() const;
120 
121  /// Like the identically-named function in UT_VoxelArray, set the values
122  /// that determine grid border behaviour. Note that calling these functions
123  /// does not actually set the ghost cell values; updateBorderCells() does
124  /// that.
125  void setBorder(UT_VoxelBorderType type, fpreal32 t);
126  UT_VoxelBorderType getBorder() const { return myBorderType; }
127  fpreal32 getBorderValue() const { return myBorderValue; }
128  void setBorderScale(fpreal32 scalex,
129  fpreal32 scaley,
130  fpreal32 scalez);
131  fpreal32 getBorderScale(int axis) const
132  { return myBorderScale[axis]; }
133 
134  /// Converts the given voxel coordinates to the normalized [0..1]^3 space,
135  /// similar to the function of the same name on UT_VoxelArray.
136  bool indexToPos(int x, int y, int z, UT_Vector3F &pos) const;
137 
138  /// Invoke a series of kernels to fill the ghost cells along each axis with
139  /// the proper values according to the border value and type.
140  void updateBorderCells() const;
141 
142  /// Returns the number of data voxels in the grid.
143  int64 numVoxels() const
144  { return ((int64)myRes[0]) * myRes[1] * myRes[2]; }
145 
146  /// Returns the number of total voxels in the grid, including ghost cells
147  /// and padding.
149  {
150  return (int64)(myRes[0] + 2 * myGhostCells[0] + myPadding[0]) *
151  (int64)(myRes[1] + 2 * myGhostCells[1] + myPadding[1]) *
152  (int64)(myRes[2] + 2 * myGhostCells[2] + myPadding[2]);
153  }
154 
155  /// Returns the memory required by the entire grid, including ghost cells
156  /// and padding.
158  { return sizeof(fpreal32) * numTotalVoxels(); }
159 
160  /// Initialize the CE_Grid from the supplied UT_VoxelArray. Note that
161  /// in the case that UT_VoxelArray::isConstant(), this will be very fast
162  /// and avoid allocating any actual GPU memory.
163  void initFromVoxels(const UT_VoxelArrayF &src,
164  int xghost = 1, int yghost = 1, int zghost = 1,
165  int xpad = 1, int ypad = 1, int zpad = 1);
166 
167  /// Match destination UT_VoxelArray to this CE_Grid and copy data. Note
168  /// if isConstant() is true, this is very fast. includeGhostCells can be
169  /// set to include the ghost cell values in the sizing and copy operations.
170  void matchAndCopyToVoxels(UT_VoxelArrayF& dest,
171  bool includeGhostCells = false) const;
172 
173  /// Match the src CE_Grid in terms of size and border conditions.
174  void match(const CE_Grid &src);
175 
176  /// Returns true if this CE_Grid matches src in terms of size and
177  /// border conditions.
178  bool isMatching(const CE_Grid &src) const;
179 
180  /// Returns true if this CE_Grid matches src in terms of offset and strides.
181  bool isCongruent(const CE_Grid& src) const;
182 
183  int getXStride() const {return myStrides[0];}
184  int getYStride() const {return myStrides[1];}
185  int getZStride() const {return myStrides[2];}
186  int getStride(int dim) const { return myStrides[dim]; }
187 
188  UT_Vector3I getStrides() const { return myStrides; }
189 
190  /// Returns the stride along axis including ghost cells.
191  int getGhostStride(int axis) const;
192 
193  /// Returns whether the specified axis is 2-dimensional.
194  bool isAxis2D(int axis) const
195  {
196  return (myRes[axis] == 1);
197  }
198 
199  /// Returns the 2-dimensional axis of this grid, or -1 if there is none.
200  int getAxis2D() const {return myAxis2d;}
201 
202  /// When flattening a grid to 2-dimensions, this is the axis to treat as
203  /// the x-axis, usually when using bind2D.
204  static int getXAxis2D(int axis2d)
205  {
206  return (axis2d > 0) ? 0 : 1;
207  }
208 
209  /// When flattening a grid to 2-dimensions, this is the axis to treat as
210  /// the y-axis, usually when using bind2D.
211  static int getYAxis2D(int axis2d)
212  {
213  return (axis2d < 2) ? 2 : 1;
214  }
215 
216  /// 2D strides.
217  int getXStride2D(int axis2d) const {return myStrides[getXAxis2D(axis2d)];}
218  int getYStride2D(int axis2d) const {return myStrides[getYAxis2D(axis2d)];}
219 
220  /// Returns the true x-stride if the grid is 3D, or the flattened x-stride
221  /// if 2D.
222  int getXStride2D3D() const
223  {
224  if (myAxis2d == -1)
225  return myStrides[0];
226  return getXStride2D(myAxis2d);
227  }
228 
229  /// Returns the true Y-stride if the grid is 3D, or the flattened y-stride
230  /// if 2D.
231  int getYStride2D3D() const
232  {
233  if (myAxis2d == -1)
234  return myStrides[1];
235  return getYStride2D(myAxis2d);
236  }
237 
238  /// Returns the true Z-stride if the grid is 3D, or 0 if 2D.
239  int getZStride2D3D() const
240  {
241  if (myAxis2d == -1)
242  return myStrides[2];
243  return 0;
244  }
245 
246  /// Returns the true x resolution if the grid is 3D, or the flattened x
247  /// resolution if 2D.
248  int getXRes2D3D() const
249  {
250  if (myAxis2d == -1)
251  return myRes[0];
252  return myRes[getXAxis2D(myAxis2d)];
253  }
254 
255  /// Returns the true y resolution if the grid is 3D, or the flattened y
256  /// resolution if 2D.
257  int getYRes2D3D() const
258  {
259  if (myAxis2d == -1)
260  return myRes[1];
261  return myRes[getYAxis2D(myAxis2d)];
262  }
263 
264  /// Returns the true z resolution if the grid is 3D, or 1 if 2D.
265  int getZRes2D3D() const
266  {
267  if (myAxis2d == -1)
268  return myRes[2];
269  return 1;
270  }
271 
272  /// Set this grid to the specified constant value. This will release the
273  /// underlying OpenCL buffer and store the constant value in the CE_Grid
274  /// object.
275  void constant(fpreal32 v);
276 
277  void zero() { constant(0); }
278 
279  /// Returns whether the grid is set to a constant value. If checkBorders is
280  /// true, checks that the border type and value are equal as well.
281  bool isConstant(fpreal32 *cval = 0, bool checkBorders = false) const;
282 
283  /// Copy data from the source grid. This requires that the grids have the
284  /// same data resolution, but ghost cells, padding, borders, etc. can differ.
285  void copyData(const CE_Grid &src);
286 
287  /// Assign to this, equivalent to match(src), followed by copyData(src).
288  CE_Grid &operator=(const CE_Grid &src);
289 
290  /// Steal the buffer from the other grid, leaving it unitialized.
291  void stealBuffer(CE_Grid &src);
292 
293  /// Add the source array values to this. Calls linearCombination() and
294  /// requires isCongruent(src).
295  CE_Grid &operator +=(const CE_Grid &src);
296 
297  /// Return an OpenCL NDRange comprised of the entire grid resolution,
298  /// implying one OpenCL work item per voxel.
300  {
301  return cl::NDRange(getXRes(),
302  getYRes(),
303  getZRes());
304  }
305 
306  /// Return an OpenCL NDRange comprised of the flattened 2D grid resolution,
307  /// implying one OpenCL work item per voxel.
308  cl::NDRange getGlobalRange2D(int axis2d) const
309  {
310  return cl::NDRange(myRes[getXAxis2D(axis2d)],
311  myRes[getYAxis2D(axis2d)]);
312  }
313 
314  /// Return an OpenCL NDRange comprised of the entire 3D grid resolution if
315  /// the grid is 3D, else the flattened 2D grid resolution.
317  {
318  return (myAxis2d == -1) ? getGlobalRange() :
319  getGlobalRange2D(myAxis2d);
320  }
321 
322  /// Create a local work item range for the supplied global range.
323  static cl::NDRange getLocalRange(const cl::NDRange &g);
325  { return getLocalRange(getGlobalRange2D3D()); }
326 
327  /// Bind a 3D kernel, with one work item per voxel.
328  cl::KernelFunctor bind(cl::Kernel k) const;
329  cl::KernelFunctor bind(cl::Program prog,
330  const char *kernelname) const;
331 
332  /// Bind a 2D kernel, treating the provided axis as the flat one,
333  /// with one work item per voxel. This also allows calling 2d kernels along
334  /// slices of a 3D grid.
335  cl::KernelFunctor bind2D(int axis, cl::Kernel k) const;
336  cl::KernelFunctor bind2D(int axis, cl::Program prog,
337  const char *kernelname) const;
338 
339  /// Bind a 2D-3D kernel, which should take x-, y-, and z-strides as
340  /// as parameters, automatically flattening a 2D grid if necessary.
341  cl::KernelFunctor bind2D3D(cl::Kernel k) const;
342  cl::KernelFunctor bind2D3D(cl::Program prog,
343  const char *kernelname) const;
344 
345  /// Enqueue kernel that stores the linear combination
346  /// g0 + c1 * g1
347  /// where this is g0. g1 is assumed to be center-sampled, and g0 is
348  /// corner-sampled around g1. Thus, g1 is 8-way (4-way for 2D) averaged
349  /// before the operation.
350  /// Requires this grid to have one more voxel per live dimension.
351  void scaledAddCornerFromCenter(fpreal32 c1, const CE_Grid &g1);
352 
353  /// Enqueue kernel that stores the linear combination
354  /// c0 * g0 + d.
355  /// Requires isCongruent(g0).
356  void linearCombination(fpreal32 c0, const CE_Grid &g0, fpreal32 d);
357 
358  /// Enqueue kernel that stores the linear combination
359  /// c0 * g0 + c1 * g1 + d.
360  /// Requires isCongruent(g0) && isCongruent(g1).
361  void linearCombination(fpreal32 c0, const CE_Grid &g0,
362  fpreal32 c1, const CE_Grid &g1,
363  fpreal32 d);
364 
365  /// Enqueue kernel that stores the linear combination
366  /// c0 * g0 + c1 * g1 + c2 * g2 + d
367  /// Requires isCongruent(g0) && isCongruent(g1) && isCongruent(g2)
368  void linearCombination(fpreal32 c0, const CE_Grid &g0,
369  fpreal32 c1, const CE_Grid &g1,
370  fpreal32 c2, const CE_Grid &g2,
371  fpreal32 d);
372 
373  /// Compute scale * divergence of the vector field represented by the
374  /// supplied grids and voxelsize.
375  void divergence(const CE_Grid &x, const CE_Grid &y, const CE_Grid &z,
376  fpreal32 scale, UT_Vector3 voxelSize);
377 
378  /// Compute scale * divergence of the vector field represented by the
379  /// supplied grids and voxelsize; divergence is calculated at corners of
380  /// the center-sampled vector field.
381  void divergenceCenterToCorner(const CE_Grid &x, const CE_Grid &y,
382  const CE_Grid &z, fpreal32 scale,
383  UT_Vector3 voxelSize);
384 
385  /// Add scale * gradient of the supplied field along the supplied axis
386  /// and voxelsize.
387  void applyGradient(const CE_Grid &p, fpreal32 scale,
388  fpreal32 voxelSize, int axis);
389 
390  /// Add scale * gradient of the supplied field along the supplied axis
391  /// and voxelsize. Assumes this grid is center-sampled, and p is sampled
392  /// at its corners.
393  void applyGradientCornerToCenter(const CE_Grid &p, fpreal32 scale,
394  fpreal32 voxelsize, int axis);
395 
396  /// Reductions of the grid to a single value.
397  fpreal64 sum() const;
398  fpreal64 sumAbs() const;
399  fpreal64 sumSqr() const;
400  fpreal64 min() const;
401  fpreal64 minAbs() const;
402  fpreal64 max() const;
403  fpreal64 maxAbs() const;
404  fpreal64 average() const {return sum() / numVoxels();}
405 
406  fpreal64 localAverage(UT_Vector3I &radius);
407  fpreal64 localSum( UT_Vector3I &radius);
408  fpreal64 localSumSqr( UT_Vector3I &radius);
409  fpreal64 localSumAbs( UT_Vector3I &radius);
410  fpreal64 localMin( UT_Vector3I &radius);
411  fpreal64 localMinAbs( UT_Vector3I &radius);
412  fpreal64 localMax( UT_Vector3I &radius);
413  fpreal64 localMaxAbs( UT_Vector3I &radius);
414 
415 
416  /// Compute the infinity-norm and 2-norm of the grid.
417  void computeNorms(fpreal64 &norminf, fpreal64 &norm2) const;
418 protected:
419 
420  const cl::Buffer &allocBuffer() const;
421  void releaseBuffer();
422 
423  void setValue(fpreal32 cval) const;
424 
425  // Reduction helpers
426  void getReductionRanges(const cl::Kernel &k,
427  cl::NDRange &globalRange, cl::NDRange &localRange,
428  uint &groupsize, uint &ngroups,
429  size_t &accumsize) const;
430  fpreal64 reduceFlat(cl::Buffer outgrid, uint groupsize, uint ngroups,
431  size_t accumsize, const char *reduceFlags) const;
432 
433  // Main reduction function.
434  fpreal64 doReduce(const char* reduceFlags) const;
435  bool doLocalReduce( const char * options, UT_Vector3I &radius );
436 
438  mutable bool myIsConstant;
443  int myAxis2d;
444 
449 };
450 
451 
452 #else
453 
454 class CE_API CE_Grid
455 {
456 };
457 
458 #endif
459 #endif
fpreal32 myConstantVal
Definition: CE_Grid.h:439
#define CE_API
Definition: CE_API.h:10
vint4 max(const vint4 &a, const vint4 &b)
Definition: simd.h:4703
int getYStride2D(int axis2d) const
Definition: CE_Grid.h:218
int64 numVoxels() const
Returns the number of data voxels in the grid.
Definition: CE_Grid.h:143
int myAxis2d
Definition: CE_Grid.h:443
GLsizeiptr size
Definition: glew.h:1681
cl::NDRange getGlobalRange2D3D() const
Definition: CE_Grid.h:316
GLenum src
Definition: glew.h:2410
UT_VoxelBorderType myBorderType
Definition: CE_Grid.h:447
int getXRes() const
Definition: CE_Grid.h:96
int getYRes() const
Definition: CE_Grid.h:97
int getRes(int dim) const
Definition: CE_Grid.h:100
GLenum GLenum GLenum GLenum GLenum scale
Definition: glew.h:13880
int getXStride2D3D() const
Definition: CE_Grid.h:222
UT_Vector3I getGhostRes() const
Definition: CE_Grid.h:103
UT_VoxelBorderType
Definition: UT_VoxelArray.h:67
fpreal32 myBorderValue
Definition: CE_Grid.h:446
const GLdouble * v
Definition: glew.h:1391
UT_Vector3I myStrides
Definition: CE_Grid.h:445
UT_Vector3I myRes
Definition: CE_Grid.h:440
int getXRes2D3D() const
Definition: CE_Grid.h:248
float fpreal32
Definition: SYS_Types.h:200
UT_Vector3I myPadding
Definition: CE_Grid.h:442
GLdouble GLdouble z
Definition: glew.h:1559
int getStride(int dim) const
Definition: CE_Grid.h:186
UT_Vector3I myGhostCells
Definition: CE_Grid.h:441
double fpreal64
Definition: SYS_Types.h:201
int getYStride2D3D() const
Definition: CE_Grid.h:231
int64 totalVoxelMemory() const
Definition: CE_Grid.h:157
UT_Vector3F myBorderScale
Definition: CE_Grid.h:448
fpreal32 getBorderScale(int axis) const
Definition: CE_Grid.h:131
UT_VoxelBorderType getBorder() const
Definition: CE_Grid.h:126
GLint GLint GLint GLint GLint x
Definition: glew.h:1252
GLint GLint GLint GLint GLint GLint y
Definition: glew.h:1252
OIIO_FORCEINLINE const vint4 & operator+=(vint4 &a, const vint4 &b)
Definition: simd.h:4246
cl::NDRange getGlobalRange2D(int axis2d) const
Definition: CE_Grid.h:308
int64 numTotalVoxels() const
Definition: CE_Grid.h:148
void zero()
Definition: CE_Grid.h:277
long long int64
Definition: SYS_Types.h:116
int getOffset() const
Definition: CE_Grid.h:111
int getXStride() const
Definition: CE_Grid.h:183
bool isAxis2D(int axis) const
Returns whether the specified axis is 2-dimensional.
Definition: CE_Grid.h:194
int getXStride2D(int axis2d) const
2D strides.
Definition: CE_Grid.h:217
int getZRes() const
Definition: CE_Grid.h:98
int getGhostRes(int dim) const
Definition: CE_Grid.h:104
UT_Vector3I getStrides() const
Definition: CE_Grid.h:188
static int getXAxis2D(int axis2d)
Definition: CE_Grid.h:204
GLuint GLuint GLsizei GLenum type
Definition: glew.h:1253
cl::NDRange getLocalRange2D3D() const
Definition: CE_Grid.h:324
bool hasBuffer() const
If the current OpenCL buffer is valid.
Definition: CE_Grid.h:88
int getZRes2D3D() const
Returns the true z resolution if the grid is 3D, or 1 if 2D.
Definition: CE_Grid.h:265
GLfloat GLfloat p
Definition: glew.h:16321
fpreal64 average() const
Definition: CE_Grid.h:404
UT_Vector3I getPadding() const
Definition: CE_Grid.h:106
Kernel functor interface.
Definition: cl.hpp:3564
fpreal32 getBorderValue() const
Definition: CE_Grid.h:127
int getAxis2D() const
Returns the 2-dimensional axis of this grid, or -1 if there is none.
Definition: CE_Grid.h:200
bool myIsConstant
Definition: CE_Grid.h:438
Memory buffer interface.
Definition: cl.hpp:1865
static int getYAxis2D(int axis2d)
Definition: CE_Grid.h:211
NDRange interface.
Definition: cl.hpp:2458
int getYRes2D3D() const
Definition: CE_Grid.h:257
int getZStride2D3D() const
Returns the true Z-stride if the grid is 3D, or 0 if 2D.
Definition: CE_Grid.h:239
Kernel interface that implements cl_kernel.
Definition: cl.hpp:2536
int getPadding(int dim) const
Definition: CE_Grid.h:107
VectorToScalarConverter< GridType >::Type::Ptr divergence(const GridType &grid, bool threaded, InterruptT *interrupt)
Compute the divergence of the given vector-valued grid.
vint4 min(const vint4 &a, const vint4 &b)
Definition: simd.h:4694
int getZStride() const
Definition: CE_Grid.h:185
UT_Vector3I getRes() const
Definition: CE_Grid.h:101
Program interface that implements cl_program.
Definition: cl.hpp:2641
cl::Buffer myBuffer
Definition: CE_Grid.h:437
unsigned int uint
Definition: SYS_Types.h:45
GLdouble GLdouble t
Definition: glew.h:1398
int getYStride() const
Definition: CE_Grid.h:184
const cl::Buffer & buffer() const
Definition: CE_Grid.h:80
GLboolean GLboolean g
Definition: glew.h:9477
cl::NDRange getGlobalRange() const
Definition: CE_Grid.h:299