HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
CE_Grid.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: CE_Grid.h ( CE Library, C++)
7  *
8  * COMMENTS: Compute Engine Grid.
9  */
10 
11 #ifndef __CE_Grid__
12 #define __CE_Grid__
13 
14 #include "CE_API.h"
15 
16 #ifdef CE_ENABLED
17 
18 #include <UT/UT_VoxelArray.h>
19 #include <UT/UT_String.h>
20 
21 /// This class represents a 3-dimensional array of float values stored on an
22 /// OpenCL device. It is roughly analagous to UT_VoxelArray, although it does
23 /// not support tiling. There are various convenience functions for simplifying
24 /// invocations of OpenCL kernels that operate on grids of values.
25 ///
26 /// CE_Grid supports an arbitrary number of "ghost" cells, which
27 /// are cells at the boundary edges of the grid that can be set according to
28 /// different UT_VoxelBorderType values, as well as arbitrary number of
29 /// "padding" cells. The padding cells can be used to ensure that several
30 /// grids with slightly different resolutions can nonetheless
31 /// have indentical x, y, and z stride values. This makes writing
32 /// kernels that operate on grids that represent face-sampled (MAC) vector
33 /// array much simpler, since the index into the different grids need only be
34 /// calculated once.
35 ///
36 /// Invoking an OpenCL kernel typically involves one of the bind() functions to
37 /// bind the kernel to the range of work items in the grid, assuming one OpenCL
38 /// work item per voxel. There are three different variants of bind(), the
39 /// default assumes the grid is always 3-dimensional; bind2D() which assumes the
40 /// grid has a 2-dimensional axis, and bind2D3D(), which will treat a
41 /// 3-dimensional grid normally, but flatten a 2-d grid for optimal performance
42 /// on OpenCL devices that prefer a large number of work items in the first
43 /// dimension. This last function is the preferred method to call if the grid
44 /// might represent 2D or 3D data.
45 ///
46 /// A simple OpenCL kernel that doubles every value in a CE_Grid might look
47 /// like:
48 /// @code
49 /// void __kernel doubleit(__global float *grid,
50 /// uint offset, uint xstride, uint ystride, uint zstride)
51 /// {
52 /// size_t idx = offset + get_global_id(0) * xstride +
53 /// get_global_id(1) * ystride +
54 /// get_global_id(2) * zstride;
55 /// grid[idx] *= 2;
56 /// }
57 /// @endcode
58 ///
59 /// Because this kernel takes x, y, and z strides, it can be used for 2D and 3D
60 /// data: in the 2D case zstride will always be 0.
61 /// Invoking this kernel then typically looks like:
62 /// @code
63 /// CE_Context *context = CE_Context::getContext();
64 /// cl::Program prog = context->loadProgram("mykernels.cl");
65 /// cl::KernelFunctor doubleit = myGrid.bind2D3D(prog, "doubleit");
66 /// doubleit(myGrid.buffer(), myGrid.getOffset(),
67 /// myGrid.getXStride2D3D(),
68 /// myGrid.getYStride2D3D(),
69 /// myGrid.getZStride2D3D());
70 /// @endcode
72 {
73 public:
74  CE_Grid();
75  CE_Grid(const CE_Grid &src);
76  virtual ~CE_Grid();
77 
78  /// Return the underlying OpenCL buffer that can be used in kernel
79  /// invocations. It allocates this buffer only on demand.
80  const cl::Buffer &buffer() const
81  {
82  if (hasBuffer())
83  return myBuffer;
84  return allocBuffer();
85  }
86 
87  /// If the current OpenCL buffer is valid.
88  bool hasBuffer() const {return (myBuffer() != 0);}
89 
90  /// Size the grid as specified. Note this does not actually allocate
91  /// memory on the OpenCL device.
92  void size(int xres, int yres, int zres,
93  int xghost = 1, int yghost = 1, int zghost = 1,
94  int xpad = 1, int ypad = 1, int zpad = 1);
95 
96  int getXRes() const { return myRes[0]; }
97  int getYRes() const { return myRes[1]; }
98  int getZRes() const { return myRes[2]; }
99 
100  int getRes(int dim) const { return myRes[dim]; }
101  UT_Vector3I getRes() const { return myRes; }
102 
103  UT_Vector3I getGhostRes() const { return myGhostCells; }
104  int getGhostRes(int dim) const { return myGhostCells[dim]; }
105 
106  UT_Vector3I getPadding() const { return myPadding; }
107  int getPadding(int dim) const { return myPadding[dim]; }
108 
109  /// Returns the offset from the beginning of the buffer to the beginning
110  /// of actual data.
111  int getOffset() const
112  {
113  return myGhostCells[0] +
114  myGhostCells[1] * myStrides[1] +
115  myGhostCells[2] * myStrides[2];
116  }
117  /// Returns the offset from the beginning of the buffer to the beginning
118  /// of the data including ghost cells.
119  int getGhostOffset() const;
120 
121  /// Like the identically-named function in UT_VoxelArray, set the values
122  /// that determine grid border behaviour. Note that calling these functions
123  /// does not actually set the ghost cell values; updateBorderCells() does
124  /// that.
125  void setBorder(UT_VoxelBorderType type, fpreal32 t);
126  UT_VoxelBorderType getBorder() const { return myBorderType; }
127  fpreal32 getBorderValue() const { return myBorderValue; }
128  void setBorderScale(fpreal32 scalex,
129  fpreal32 scaley,
130  fpreal32 scalez);
131  fpreal32 getBorderScale(int axis) const
132  { return myBorderScale[axis]; }
133 
134  /// Invoke a series of kernels to fill the ghost cells along each axis with
135  /// the proper values according to the border value and type.
136  void updateBorderCells() const;
137 
138  /// Returns the number of data voxels in the grid.
139  int64 numVoxels() const
140  { return ((int64)myRes[0]) * myRes[1] * myRes[2]; }
141 
142  /// Returns the number of total voxels in the grid, including ghost cells
143  /// and padding.
145  {
146  return (int64)(myRes[0] + 2 * myGhostCells[0] + myPadding[0]) *
147  (int64)(myRes[1] + 2 * myGhostCells[1] + myPadding[1]) *
148  (int64)(myRes[2] + 2 * myGhostCells[2] + myPadding[2]);
149  }
150 
151  /// Returns the memory required by the entire grid, including ghost cells
152  /// and padding.
154  { return sizeof(fpreal32) * numTotalVoxels(); }
155 
156  /// Initialize the CE_Grid from the supplied UT_VoxelArray. Note that
157  /// in the case that UT_VoxelArray::isConstant(), this will be very fast
158  /// and avoid allocating any actual GPU memory.
159  void initFromVoxels(const UT_VoxelArrayF &src,
160  int xghost = 1, int yghost = 1, int zghost = 1,
161  int xpad = 1, int ypad = 1, int zpad = 1);
162 
163  /// Match destination UT_VoxelArray to this CE_Grid and copy data. Note
164  /// if isConstant() is true, this is very fast. includeGhostCells can be
165  /// set to include the ghost cell values in the sizing and copy operations.
166  void matchAndCopyToVoxels(UT_VoxelArrayF& dest,
167  bool includeGhostCells = false) const;
168 
169  /// Match the src CE_Grid in terms of size and border conditions.
170  void match(const CE_Grid &src);
171 
172  /// Returns true if this CE_Grid matches src in terms of size and
173  /// border conditions.
174  bool isMatching(const CE_Grid &src) const;
175 
176  /// Returns true if this CE_Grid matches src in terms of offset and strides.
177  bool isCongruent(const CE_Grid& src) const;
178 
179  int getXStride() const {return myStrides[0];}
180  int getYStride() const {return myStrides[1];}
181  int getZStride() const {return myStrides[2];}
182  int getStride(int dim) const { return myStrides[dim]; }
183 
184  UT_Vector3I getStrides() const { return myStrides; }
185 
186  /// Returns the stride along axis including ghost cells.
187  int getGhostStride(int axis) const;
188 
189  /// Returns whether the specified axis is 2-dimensional.
190  bool isAxis2D(int axis) const
191  {
192  return (myRes[axis] == 1);
193  }
194 
195  /// Returns the 2-dimensional axis of this grid, or -1 if there is none.
196  int getAxis2D() const {return myAxis2d;}
197 
198  /// When flattening a grid to 2-dimensions, this is the axis to treat as
199  /// the x-axis, usually when using bind2D.
200  static int getXAxis2D(int axis2d)
201  {
202  return (axis2d > 0) ? 0 : 1;
203  }
204 
205  /// When flattening a grid to 2-dimensions, this is the axis to treat as
206  /// the y-axis, usually when using bind2D.
207  static int getYAxis2D(int axis2d)
208  {
209  return (axis2d < 2) ? 2 : 1;
210  }
211 
212  /// 2D strides.
213  int getXStride2D(int axis2d) const {return myStrides[getXAxis2D(axis2d)];}
214  int getYStride2D(int axis2d) const {return myStrides[getYAxis2D(axis2d)];}
215 
216  /// Returns the true x-stride if the grid is 3D, or the flattened x-stride
217  /// if 2D.
218  int getXStride2D3D() const
219  {
220  if (myAxis2d == -1)
221  return myStrides[0];
222  return getXStride2D(myAxis2d);
223  }
224 
225  /// Returns the true Y-stride if the grid is 3D, or the flattened y-stride
226  /// if 2D.
227  int getYStride2D3D() const
228  {
229  if (myAxis2d == -1)
230  return myStrides[1];
231  return getYStride2D(myAxis2d);
232  }
233 
234  /// Returns the true Z-stride if the grid is 3D, or 0 if 2D.
235  int getZStride2D3D() const
236  {
237  if (myAxis2d == -1)
238  return myStrides[2];
239  return 0;
240  }
241 
242  /// Returns the true x resolution if the grid is 3D, or the flattened x
243  /// resolution if 2D.
244  int getXRes2D3D() const
245  {
246  if (myAxis2d == -1)
247  return myRes[0];
248  return myRes[getXAxis2D(myAxis2d)];
249  }
250 
251  /// Returns the true y resolution if the grid is 3D, or the flattened y
252  /// resolution if 2D.
253  int getYRes2D3D() const
254  {
255  if (myAxis2d == -1)
256  return myRes[1];
257  return myRes[getYAxis2D(myAxis2d)];
258  }
259 
260  /// Returns the true z resolution if the grid is 3D, or 1 if 2D.
261  int getZRes2D3D() const
262  {
263  if (myAxis2d == -1)
264  return myRes[2];
265  return 1;
266  }
267 
268  /// Set this grid to the specified constant value. This will release the
269  /// underlying OpenCL buffer and store the constant value in the CE_Grid
270  /// object.
271  void constant(fpreal32 v);
272 
273  void zero() { constant(0); }
274 
275  /// Returns whether the grid is set to a constant value. If checkBorders is
276  /// true, checks that the border type and value are equal as well.
277  bool isConstant(fpreal32 *cval = 0, bool checkBorders = false) const;
278 
279  /// Copy data from the source grid. This requires that the grids have the
280  /// same data resolution, but ghost cells, padding, borders, etc. can differ.
281  void copyData(const CE_Grid &src);
282 
283  /// Assign to this, equivalent to match(src), followed by copyData(src).
284  CE_Grid &operator=(const CE_Grid &src);
285 
286  /// Steal the buffer from the other grid, leaving it unitialized.
287  void stealBuffer(CE_Grid &src);
288 
289  /// Add the source array values to this. Calls linearCombination() and
290  /// requires isCongruent(src).
291  CE_Grid &operator +=(const CE_Grid &src);
292 
293  /// Return an OpenCL NDRange comprised of the entire grid resolution,
294  /// implying one OpenCL work item per voxel.
296  {
297  return cl::NDRange(getXRes(),
298  getYRes(),
299  getZRes());
300  }
301 
302  /// Return an OpenCL NDRange comprised of the flattened 2D grid resolution,
303  /// implying one OpenCL work item per voxel.
304  cl::NDRange getGlobalRange2D(int axis2d) const
305  {
306  return cl::NDRange(myRes[getXAxis2D(axis2d)],
307  myRes[getYAxis2D(axis2d)]);
308  }
309 
310  /// Return an OpenCL NDRange comprised of the entire 3D grid resolution if
311  /// the grid is 3D, else the flattened 2D grid resolution.
313  {
314  return (myAxis2d == -1) ? getGlobalRange() :
315  getGlobalRange2D(myAxis2d);
316  }
317 
318  /// Create a local work item range for the supplied global range.
319  static cl::NDRange getLocalRange(const cl::NDRange &g);
321  { return getLocalRange(getGlobalRange2D3D()); }
322 
323  /// Bind a 3D kernel, with one work item per voxel.
324  cl::KernelFunctor bind(cl::Kernel k) const;
325  cl::KernelFunctor bind(cl::Program prog,
326  const char *kernelname) const;
327 
328  /// Bind a 2D kernel, treating the provided axis as the flat one,
329  /// with one work item per voxel. This also allows calling 2d kernels along
330  /// slices of a 3D grid.
331  cl::KernelFunctor bind2D(int axis, cl::Kernel k) const;
332  cl::KernelFunctor bind2D(int axis, cl::Program prog,
333  const char *kernelname) const;
334 
335  /// Bind a 2D-3D kernel, which should take x-, y-, and z-strides as
336  /// as parameters, automatically flattening a 2D grid if necessary.
337  cl::KernelFunctor bind2D3D(cl::Kernel k) const;
338  cl::KernelFunctor bind2D3D(cl::Program prog,
339  const char *kernelname) const;
340 
341 
342  /// Enqueue kernel that stores the linear combination
343  /// c0 * g0 + d.
344  /// Requires isCongruent(g0).
345  void linearCombination(fpreal32 c0, const CE_Grid &g0, fpreal32 d);
346 
347  /// Enqueue kernel that stores the linear combination
348  /// c0 * g0 + c1 * g1 + d.
349  /// Requires isCongruent(g0) && isCongruent(g1).
350  void linearCombination(fpreal32 c0, const CE_Grid &g0,
351  fpreal32 c1, const CE_Grid &g1,
352  fpreal32 d);
353 
354  /// Enqueue kernel that stores the linear combination
355  /// c0 * g0 + c1 * g1 + c2 * g2 + d
356  /// Requires isCongruent(g0) && isCongruent(g1) && isCongruent(g2)
357  void linearCombination(fpreal32 c0, const CE_Grid &g0,
358  fpreal32 c1, const CE_Grid &g1,
359  fpreal32 c2, const CE_Grid &g2,
360  fpreal32 d);
361 
362  /// Compute scale * divergence of the vector field represented by the
363  /// supplied grids and voxelsize.
364  void divergence(const CE_Grid &x, const CE_Grid &y, const CE_Grid &z,
365  fpreal32 scale, const UT_Vector3 voxelSize);
366 
367  /// Add scale * gradient of the supplied field along the supplied axis
368  /// and voxelsize.
369  void applyGradient(const CE_Grid &p, fpreal32 scale,
370  fpreal32 voxelSize, int axis);
371 
372 
373  /// Reductions of the grid to a single value.
374  fpreal64 sum() const;
375  fpreal64 sumAbs() const;
376  fpreal64 sumSqr() const;
377  fpreal64 min() const;
378  fpreal64 minAbs() const;
379  fpreal64 max() const;
380  fpreal64 maxAbs() const;
381  fpreal64 average() const {return sum() / numVoxels();}
382 
383  fpreal64 localAverage(UT_Vector3I &radius);
384  fpreal64 localSum( UT_Vector3I &radius);
385  fpreal64 localSumSqr( UT_Vector3I &radius);
386  fpreal64 localSumAbs( UT_Vector3I &radius);
387  fpreal64 localMin( UT_Vector3I &radius);
388  fpreal64 localMinAbs( UT_Vector3I &radius);
389  fpreal64 localMax( UT_Vector3I &radius);
390  fpreal64 localMaxAbs( UT_Vector3I &radius);
391 
392 
393  /// Compute the infinity-norm and 2-norm of the grid.
394  void computeNorms(fpreal64 &norminf, fpreal64 &norm2) const;
395 protected:
396 
397  const cl::Buffer &allocBuffer() const;
398  void releaseBuffer();
399 
400  void setValue(fpreal32 cval) const;
401 
402  // Reduction helpers
403  void getReductionRanges(const cl::Kernel &k,
404  cl::NDRange &globalRange, cl::NDRange &localRange,
405  uint &groupsize, uint &ngroups,
406  size_t &accumsize) const;
407  fpreal64 reduceFlat(cl::Buffer outgrid, uint groupsize, uint ngroups,
408  size_t accumsize, const char *reduceFlags) const;
409 
410  // Main reduction function.
411  fpreal64 doReduce(const char* reduceFlags) const;
412  bool doLocalReduce( const char * options, UT_Vector3I &radius );
413 
415  mutable bool myIsConstant;
420  int myAxis2d;
421 
426 };
427 
428 
429 #else
430 
431 class CE_API CE_Grid
432 {
433 };
434 
435 #endif
436 #endif
fpreal32 myConstantVal
Definition: CE_Grid.h:416
#define CE_API
Definition: CE_API.h:10
int getYStride2D(int axis2d) const
Definition: CE_Grid.h:214
int64 numVoxels() const
Returns the number of data voxels in the grid.
Definition: CE_Grid.h:139
int myAxis2d
Definition: CE_Grid.h:420
cl::NDRange getGlobalRange2D3D() const
Definition: CE_Grid.h:312
UT_VoxelBorderType myBorderType
Definition: CE_Grid.h:424
int getXRes() const
Definition: CE_Grid.h:96
const hboost::disable_if_c< VecTraits< T >::IsVec, T >::type & min(const T &a, const T &b)
Definition: Composite.h:128
int getYRes() const
Definition: CE_Grid.h:97
int getRes(int dim) const
Definition: CE_Grid.h:100
const GLdouble * v
Definition: glcorearb.h:836
GLdouble GLdouble GLdouble z
Definition: glcorearb.h:847
GLboolean GLboolean g
Definition: glcorearb.h:1221
int getXStride2D3D() const
Definition: CE_Grid.h:218
UT_Vector3I getGhostRes() const
Definition: CE_Grid.h:103
UT_VoxelBorderType
Definition: UT_VoxelArray.h:69
GLint y
Definition: glcorearb.h:102
fpreal32 myBorderValue
Definition: CE_Grid.h:423
UT_Vector3I myStrides
Definition: CE_Grid.h:422
UT_Vector3I myRes
Definition: CE_Grid.h:417
int getXRes2D3D() const
Definition: CE_Grid.h:244
UT_Vector3I myPadding
Definition: CE_Grid.h:419
GLsizeiptr size
Definition: glcorearb.h:663
const hboost::disable_if_c< VecTraits< T >::IsVec, T >::type & max(const T &a, const T &b)
Definition: Composite.h:132
int getStride(int dim) const
Definition: CE_Grid.h:182
UT_Vector3I myGhostCells
Definition: CE_Grid.h:418
int getYStride2D3D() const
Definition: CE_Grid.h:227
int64 totalVoxelMemory() const
Definition: CE_Grid.h:153
long long int64
Definition: SYS_Types.h:100
GA_API const UT_StringHolder scale
UT_Vector3F myBorderScale
Definition: CE_Grid.h:425
fpreal32 getBorderScale(int axis) const
Definition: CE_Grid.h:131
UT_VoxelBorderType getBorder() const
Definition: CE_Grid.h:126
double fpreal64
Definition: SYS_Types.h:185
cl::NDRange getGlobalRange2D(int axis2d) const
Definition: CE_Grid.h:304
int64 numTotalVoxels() const
Definition: CE_Grid.h:144
void zero()
Definition: CE_Grid.h:273
int getOffset() const
Definition: CE_Grid.h:111
int getXStride() const
Definition: CE_Grid.h:179
bool isAxis2D(int axis) const
Returns whether the specified axis is 2-dimensional.
Definition: CE_Grid.h:190
int getXStride2D(int axis2d) const
2D strides.
Definition: CE_Grid.h:213
int getZRes() const
Definition: CE_Grid.h:98
int getGhostRes(int dim) const
Definition: CE_Grid.h:104
UT_Vector3I getStrides() const
Definition: CE_Grid.h:184
static int getXAxis2D(int axis2d)
Definition: CE_Grid.h:200
cl::NDRange getLocalRange2D3D() const
Definition: CE_Grid.h:320
bool hasBuffer() const
If the current OpenCL buffer is valid.
Definition: CE_Grid.h:88
unsigned int uint
Definition: SYS_Types.h:33
int getZRes2D3D() const
Returns the true z resolution if the grid is 3D, or 1 if 2D.
Definition: CE_Grid.h:261
fpreal64 average() const
Definition: CE_Grid.h:381
UT_Vector3I getPadding() const
Definition: CE_Grid.h:106
GLint GLenum GLint x
Definition: glcorearb.h:408
Kernel functor interface.
Definition: cl.hpp:3562
fpreal32 getBorderValue() const
Definition: CE_Grid.h:127
int getAxis2D() const
Returns the 2-dimensional axis of this grid, or -1 if there is none.
Definition: CE_Grid.h:196
bool myIsConstant
Definition: CE_Grid.h:415
Memory buffer interface.
Definition: cl.hpp:1863
static int getYAxis2D(int axis2d)
Definition: CE_Grid.h:207
GLint GLint GLsizei GLint GLenum GLenum type
Definition: glcorearb.h:107
NDRange interface.
Definition: cl.hpp:2456
int getYRes2D3D() const
Definition: CE_Grid.h:253
int getZStride2D3D() const
Returns the true Z-stride if the grid is 3D, or 0 if 2D.
Definition: CE_Grid.h:235
Kernel interface that implements cl_kernel.
Definition: cl.hpp:2534
int getPadding(int dim) const
Definition: CE_Grid.h:107
VectorToScalarConverter< GridType >::Type::Ptr divergence(const GridType &grid, bool threaded, InterruptT *interrupt)
Compute the divergence of the given vector-valued grid.
int getZStride() const
Definition: CE_Grid.h:181
UT_Vector3I getRes() const
Definition: CE_Grid.h:101
Program interface that implements cl_program.
Definition: cl.hpp:2639
cl::Buffer myBuffer
Definition: CE_Grid.h:414
float fpreal32
Definition: SYS_Types.h:184
int getYStride() const
Definition: CE_Grid.h:180
const cl::Buffer & buffer() const
Definition: CE_Grid.h:80
cl::NDRange getGlobalRange() const
Definition: CE_Grid.h:295
GLenum src
Definition: glcorearb.h:1792