HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CE_Grid.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: CE_Grid.h ( CE Library, C++)
7  *
8  * COMMENTS: Compute Engine Grid.
9  */
10 
11 #ifndef __CE_Grid__
12 #define __CE_Grid__
13 
14 #include "CE_API.h"
15 
16 #include <UT/UT_VoxelArray.h>
17 #include <UT/UT_String.h>
18 
19 /// This class represents a 3-dimensional array of float values stored on an
20 /// OpenCL device. It is roughly analagous to UT_VoxelArray, although it does
21 /// not support tiling. There are various convenience functions for simplifying
22 /// invocations of OpenCL kernels that operate on grids of values.
23 ///
24 /// CE_Grid supports an arbitrary number of "ghost" cells, which
25 /// are cells at the boundary edges of the grid that can be set according to
26 /// different UT_VoxelBorderType values, as well as arbitrary number of
27 /// "padding" cells. The padding cells can be used to ensure that several
28 /// grids with slightly different resolutions can nonetheless
29 /// have indentical x, y, and z stride values. This makes writing
30 /// kernels that operate on grids that represent face-sampled (MAC) vector
31 /// array much simpler, since the index into the different grids need only be
32 /// calculated once.
33 ///
34 /// Invoking an OpenCL kernel typically involves one of the bind() functions to
35 /// bind the kernel to the range of work items in the grid, assuming one OpenCL
36 /// work item per voxel. There are three different variants of bind(), the
37 /// default assumes the grid is always 3-dimensional; bind2D() which assumes the
38 /// grid has a 2-dimensional axis, and bind2D3D(), which will treat a
39 /// 3-dimensional grid normally, but flatten a 2-d grid for optimal performance
40 /// on OpenCL devices that prefer a large number of work items in the first
41 /// dimension. This last function is the preferred method to call if the grid
42 /// might represent 2D or 3D data.
43 ///
44 /// A simple OpenCL kernel that doubles every value in a CE_Grid might look
45 /// like:
46 /// @code
47 /// void __kernel doubleit(__global float *grid,
48 /// uint offset, uint xstride, uint ystride, uint zstride)
49 /// {
50 /// size_t idx = offset + get_global_id(0) * xstride +
51 /// get_global_id(1) * ystride +
52 /// get_global_id(2) * zstride;
53 /// grid[idx] *= 2;
54 /// }
55 /// @endcode
56 ///
57 /// Because this kernel takes x, y, and z strides, it can be used for 2D and 3D
58 /// data: in the 2D case zstride will always be 0.
59 /// Invoking this kernel then typically looks like:
60 /// @code
61 /// CE_Context *context = CE_Context::getContext();
62 /// cl::Program prog = context->loadProgram("mykernels.cl");
63 /// cl::KernelFunctor doubleit = myGrid.bind2D3D(prog, "doubleit");
64 /// doubleit(myGrid.buffer(), myGrid.getOffset(),
65 /// myGrid.getXStride2D3D(),
66 /// myGrid.getYStride2D3D(),
67 /// myGrid.getZStride2D3D());
68 /// @endcode
70 {
71 public:
72  CE_Grid();
73  CE_Grid(const CE_Grid &src);
74  virtual ~CE_Grid();
75 
76  /// Return the underlying OpenCL buffer that can be used in kernel
77  /// invocations. It allocates this buffer only on demand.
78  const cl::Buffer &buffer() const
79  {
80  if (hasBuffer())
81  return myBuffer;
82  return allocBuffer();
83  }
84 
85  /// If the current OpenCL buffer is valid.
86  bool hasBuffer() const {return (myBuffer() != 0);}
87  bool isValid() const { return hasBuffer(); }
88 
90  {
91  if (isValid()) return totalVoxelMemory();
92  return 0;
93  }
94 
95  /// Size the grid as specified. Note this does not actually allocate
96  /// memory on the OpenCL device.
97  void size(int xres, int yres, int zres,
98  int xghost = 1, int yghost = 1, int zghost = 1,
99  int xpad = 1, int ypad = 1, int zpad = 1);
100 
101  int getXRes() const { return myRes[0]; }
102  int getYRes() const { return myRes[1]; }
103  int getZRes() const { return myRes[2]; }
104 
105  int getRes(int dim) const { return myRes[dim]; }
106  UT_Vector3I getRes() const { return myRes; }
107 
108  UT_Vector3I getGhostRes() const { return myGhostCells; }
109  int getGhostRes(int dim) const { return myGhostCells[dim]; }
110 
111  UT_Vector3I getPadding() const { return myPadding; }
112  int getPadding(int dim) const { return myPadding[dim]; }
113 
114  /// Returns the offset from the beginning of the buffer to the beginning
115  /// of actual data.
116  int getOffset() const
117  {
118  return myGhostCells[0] +
119  myGhostCells[1] * myStrides[1] +
120  myGhostCells[2] * myStrides[2];
121  }
122  /// Returns the offset from the beginning of the buffer to the beginning
123  /// of the data including ghost cells.
124  int getGhostOffset() const;
125 
126  /// Like the identically-named function in UT_VoxelArray, set the values
127  /// that determine grid border behaviour. Note that calling these functions
128  /// does not actually set the ghost cell values; updateBorderCells() does
129  /// that.
130  void setBorder(UT_VoxelBorderType type, fpreal32 t);
131  UT_VoxelBorderType getBorder() const { return myBorderType; }
132  fpreal32 getBorderValue() const { return myBorderValue; }
133  void setBorderScale(fpreal32 scalex,
134  fpreal32 scaley,
135  fpreal32 scalez);
136  fpreal32 getBorderScale(int axis) const
137  { return myBorderScale[axis]; }
138 
139  /// Converts the given voxel coordinates to the normalized [0..1]^3 space,
140  /// similar to the function of the same name on UT_VoxelArray.
141  bool indexToPos(int x, int y, int z, UT_Vector3F &pos) const;
142 
143  /// Invoke a series of kernels to fill the ghost cells along each axis with
144  /// the proper values according to the border value and type.
145  void updateBorderCells() const;
146 
147  /// Returns the number of data voxels in the grid.
148  int64 numVoxels() const
149  { return ((int64)myRes[0]) * myRes[1] * myRes[2]; }
150 
151  /// Returns the number of total voxels in the grid, including ghost cells
152  /// and padding.
154  {
155  return (int64)(myRes[0] + 2 * myGhostCells[0] + myPadding[0]) *
156  (int64)(myRes[1] + 2 * myGhostCells[1] + myPadding[1]) *
157  (int64)(myRes[2] + 2 * myGhostCells[2] + myPadding[2]);
158  }
159 
160  /// Returns the memory required by the entire grid, including ghost cells
161  /// and padding.
163  { return sizeof(fpreal32) * numTotalVoxels(); }
164 
165  /// Initialize the CE_Grid from the supplied UT_VoxelArray. Note that
166  /// in the case that UT_VoxelArray::isConstant(), this will be very fast
167  /// and avoid allocating any actual GPU memory.
168  void initFromVoxels(const UT_VoxelArrayF &src,
169  int xghost = 1, int yghost = 1, int zghost = 1,
170  int xpad = 1, int ypad = 1, int zpad = 1);
171 
172  /// Match destination UT_VoxelArray to this CE_Grid and copy data. Note
173  /// if isConstant() is true, this is very fast. includeGhostCells can be
174  /// set to include the ghost cell values in the sizing and copy operations.
175  void matchAndCopyToVoxels(UT_VoxelArrayF& dest,
176  bool include_ghost_cells = false) const;
177 
178  /// Match the src CE_Grid in terms of size and border conditions.
179  void match(const CE_Grid &src);
180 
181  /// Returns true if this CE_Grid matches src in terms of size and
182  /// border conditions.
183  bool isMatching(const CE_Grid &src) const;
184 
185  /// Returns true if this CE_Grid matches src in terms of offset and strides.
186  bool isCongruent(const CE_Grid& src) const;
187 
188  int getXStride() const {return myStrides[0];}
189  int getYStride() const {return myStrides[1];}
190  int getZStride() const {return myStrides[2];}
191  int getStride(int dim) const { return myStrides[dim]; }
192 
193  UT_Vector3I getStrides() const { return myStrides; }
194 
195  /// Returns the stride along axis including ghost cells.
196  int getGhostStride(int axis) const;
197 
198  /// Returns whether the specified axis is 2-dimensional.
199  bool isAxis2D(int axis) const
200  {
201  return (myRes[axis] == 1);
202  }
203 
204  /// Returns the 2-dimensional axis of this grid, or -1 if there is none.
205  int getAxis2D() const {return myAxis2d;}
206 
207  /// When flattening a grid to 2-dimensions, this is the axis to treat as
208  /// the x-axis, usually when using bind2D.
209  static int getXAxis2D(int axis2d)
210  {
211  return (axis2d > 0) ? 0 : 1;
212  }
213 
214  /// When flattening a grid to 2-dimensions, this is the axis to treat as
215  /// the y-axis, usually when using bind2D.
216  static int getYAxis2D(int axis2d)
217  {
218  return (axis2d < 2) ? 2 : 1;
219  }
220 
221  /// 2D strides.
222  int getXStride2D(int axis2d) const {return myStrides[getXAxis2D(axis2d)];}
223  int getYStride2D(int axis2d) const {return myStrides[getYAxis2D(axis2d)];}
224 
225  /// Returns the true x-stride if the grid is 3D, or the flattened x-stride
226  /// if 2D.
227  int getXStride2D3D() const
228  {
229  if (myAxis2d == -1)
230  return myStrides[0];
231  return getXStride2D(myAxis2d);
232  }
233 
234  /// Returns the true Y-stride if the grid is 3D, or the flattened y-stride
235  /// if 2D.
236  int getYStride2D3D() const
237  {
238  if (myAxis2d == -1)
239  return myStrides[1];
240  return getYStride2D(myAxis2d);
241  }
242 
243  /// Returns the true Z-stride if the grid is 3D, or 0 if 2D.
244  int getZStride2D3D() const
245  {
246  if (myAxis2d == -1)
247  return myStrides[2];
248  return 0;
249  }
250 
251  /// Returns the true x resolution if the grid is 3D, or the flattened x
252  /// resolution if 2D.
253  int getXRes2D3D() const
254  {
255  if (myAxis2d == -1)
256  return myRes[0];
257  return myRes[getXAxis2D(myAxis2d)];
258  }
259 
260  /// Returns the true y resolution if the grid is 3D, or the flattened y
261  /// resolution if 2D.
262  int getYRes2D3D() const
263  {
264  if (myAxis2d == -1)
265  return myRes[1];
266  return myRes[getYAxis2D(myAxis2d)];
267  }
268 
269  /// Returns the true z resolution if the grid is 3D, or 1 if 2D.
270  int getZRes2D3D() const
271  {
272  if (myAxis2d == -1)
273  return myRes[2];
274  return 1;
275  }
276 
277  /// Set this grid to the specified constant value. This will release the
278  /// underlying OpenCL buffer and store the constant value in the CE_Grid
279  /// object.
280  void constant(fpreal32 v);
281 
282  void zero() { constant(0); }
283 
284  /// Returns whether the grid is set to a constant value. If check_borders
285  /// is true, checks that the border type and value are equal as well.
286  bool isConstant(fpreal32 *cval = 0, bool check_borders = false) const;
287 
288  /// Copy data from the source grid. This requires that the grids have the
289  /// same data resolution, but ghost cells, padding, borders, etc. can differ.
290  void copyData(const CE_Grid &src);
291 
292  /// Assign to this, equivalent to match(src), followed by copyData(src).
293  CE_Grid &operator=(const CE_Grid &src);
294 
295  /// Steal the buffer from the other grid, leaving it unitialized.
296  void stealBuffer(CE_Grid &src);
297 
298  /// Add the source array values to this. Calls linearCombination() and
299  /// requires isCongruent(src).
300  CE_Grid &operator +=(const CE_Grid &src);
301 
302  /// Return an OpenCL NDRange comprised of the entire grid resolution,
303  /// implying one OpenCL work item per voxel.
305  {
306  return cl::NDRange(getXRes(),
307  getYRes(),
308  getZRes());
309  }
310 
311  /// Return an OpenCL NDRange comprised of the flattened 2D grid resolution,
312  /// implying one OpenCL work item per voxel.
313  cl::NDRange getGlobalRange2D(int axis2d) const
314  {
315  return cl::NDRange(myRes[getXAxis2D(axis2d)],
316  myRes[getYAxis2D(axis2d)]);
317  }
318 
319  /// Return an OpenCL NDRange comprised of the entire 3D grid resolution if
320  /// the grid is 3D, else the flattened 2D grid resolution.
322  {
323  return (myAxis2d == -1) ? getGlobalRange() :
324  getGlobalRange2D(myAxis2d);
325  }
326 
327  /// Create a local work item range for the supplied global range.
328  static cl::NDRange getLocalRange(const cl::NDRange &g);
330  { return getLocalRange(getGlobalRange2D3D()); }
331 
332  /// Bind a 3D kernel, with one work item per voxel.
333  /// If a local range is provided, the global range will be padded to ensure
334  /// compliance; in this case, the kernel must manually perform bound checks!
335  /// lrange can point at an array, in which case n specifies its size.
337  const cl::NDRange* lrange = nullptr,
338  int n = 1) const;
339  cl::KernelFunctor bind(cl::Program prog,
340  const char *kernelname) const;
341 
342  /// Bind a 2D kernel, treating the provided axis as the flat one,
343  /// with one work item per voxel. This also allows calling 2d kernels along
344  /// slices of a 3D grid.
345  cl::KernelFunctor bind2D(int axis, cl::Kernel k) const;
346  cl::KernelFunctor bind2D(int axis, cl::Program prog,
347  const char *kernelname) const;
348 
349  /// Bind a 2D-3D kernel, which should take x-, y-, and z-strides as
350  /// as parameters, automatically flattening a 2D grid if necessary.
351  cl::KernelFunctor bind2D3D(cl::Kernel k) const;
352  cl::KernelFunctor bind2D3D(cl::Program prog,
353  const char *kernelname) const;
354 
355  /// Enqueue kernel that stores the linear combination
356  /// g0 + c1 * g1
357  /// where this is g0. g1 is assumed to be center-sampled, and g0 is
358  /// corner-sampled around g1. Thus, g1 is 8-way (4-way for 2D) averaged
359  /// before the operation.
360  /// Requires this grid to have one more voxel per live dimension.
361  void scaledAddCornerFromCenter(fpreal32 c1, const CE_Grid &g1);
362 
363  /// Enqueue kernel that stores the linear combination
364  /// c0 * g0 + d.
365  /// Requires isCongruent(g0).
366  void linearCombination(fpreal32 c0, const CE_Grid &g0, fpreal32 d);
367 
368  /// Enqueue kernel that stores the linear combination
369  /// c0 * g0 + c1 * g1 + d.
370  /// Requires isCongruent(g0) && isCongruent(g1).
371  void linearCombination(fpreal32 c0, const CE_Grid &g0,
372  fpreal32 c1, const CE_Grid &g1,
373  fpreal32 d);
374 
375  /// Enqueue kernel that stores the linear combination
376  /// c0 * g0 + c1 * g1 + c2 * g2 + d
377  /// Requires isCongruent(g0) && isCongruent(g1) && isCongruent(g2)
378  void linearCombination(fpreal32 c0, const CE_Grid &g0,
379  fpreal32 c1, const CE_Grid &g1,
380  fpreal32 c2, const CE_Grid &g2,
381  fpreal32 d);
382 
383  /// Compute scale * divergence of the vector field represented by the
384  /// supplied grids and voxelsize.
385  void divergence(const CE_Grid &x, const CE_Grid &y, const CE_Grid &z,
386  fpreal32 scale, UT_Vector3 voxel_size);
387 
388  /// Compute scale * divergence of the vector field represented by the
389  /// supplied grids and voxelsize; divergence is calculated at corners of
390  /// the center-sampled vector field.
391  void divergenceCenterToCorner(const CE_Grid &x, const CE_Grid &y,
392  const CE_Grid &z, fpreal32 scale,
393  UT_Vector3 voxel_size);
394 
395  /// Add scale * gradient of the supplied field along the supplied axis
396  /// and voxelsize.
397  void applyGradient(const CE_Grid &p, fpreal32 scale,
398  fpreal32 voxel_size, int axis);
399 
400  /// Add scale * gradient of the supplied field along the supplied axis
401  /// and voxelsize. Assumes this grid is center-sampled, and p is sampled
402  /// at its corners.
403  void applyGradientCornerToCenter(const CE_Grid &p, fpreal32 scale,
404  fpreal32 voxelsize, int axis);
405 
406  /// Reductions of the grid to a single value.
407  fpreal64 sum() const;
408  fpreal64 sumAbs() const;
409  fpreal64 sumSqr() const;
410  fpreal64 min() const;
411  fpreal64 minAbs() const;
412  fpreal64 max() const;
413  fpreal64 maxAbs() const;
414  fpreal64 average() const {return sum() / numVoxels();}
415 
416  fpreal64 localAverage(UT_Vector3I &radius);
417  fpreal64 localSum( UT_Vector3I &radius);
418  fpreal64 localSumSqr( UT_Vector3I &radius);
419  fpreal64 localSumAbs( UT_Vector3I &radius);
420  fpreal64 localMin( UT_Vector3I &radius);
421  fpreal64 localMinAbs( UT_Vector3I &radius);
422  fpreal64 localMax( UT_Vector3I &radius);
423  fpreal64 localMaxAbs( UT_Vector3I &radius);
424 
425  void boxBlur(int passes, UT_Vector3 radius);
426 
427  /// Compute the infinity-norm and 2-norm of the grid.
428  void computeNorms(fpreal64 &norminf, fpreal64 &norm2) const;
429 protected:
430 
431  const cl::Buffer &allocBuffer() const;
432  void releaseBuffer();
433 
434  void setValue(fpreal32 cval) const;
435 
436  // Reduction helpers
437  void getReductionRanges(const cl::Kernel &k,
438  cl::NDRange &globalRange, cl::NDRange &localRange,
439  uint &groupsize, uint &ngroups,
440  size_t &accumsize) const;
441  fpreal64 reduceFlat(cl::Buffer outgrid, uint groupsize, uint ngroups,
442  size_t accumsize, const char *reduce_flags) const;
443 
444  // Main reduction function.
445  fpreal64 doReduce(const char* reduce_flags) const;
446  bool doLocalReduce( const char * options, UT_Vector3I &radius );
447 
449  mutable bool myIsConstant;
454  int myAxis2d;
455 
460 };
461 
462 #endif
fpreal32 myConstantVal
Definition: CE_Grid.h:450
#define CE_API
Definition: CE_API.h:13
int getYStride2D(int axis2d) const
Definition: CE_Grid.h:223
int64 numVoxels() const
Returns the number of data voxels in the grid.
Definition: CE_Grid.h:148
int myAxis2d
Definition: CE_Grid.h:454
cl::NDRange getGlobalRange2D3D() const
Definition: CE_Grid.h:321
UT_VoxelBorderType myBorderType
Definition: CE_Grid.h:458
int getXRes() const
Definition: CE_Grid.h:101
int getYRes() const
Definition: CE_Grid.h:102
int getRes(int dim) const
Definition: CE_Grid.h:105
const GLdouble * v
Definition: glcorearb.h:837
GLdouble GLdouble GLdouble z
Definition: glcorearb.h:848
GLboolean GLboolean g
Definition: glcorearb.h:1222
int getXStride2D3D() const
Definition: CE_Grid.h:227
UT_Vector3I getGhostRes() const
Definition: CE_Grid.h:108
int64 exint
Definition: SYS_Types.h:125
UT_VoxelBorderType
Definition: UT_VoxelArray.h:70
ImageBuf OIIO_API min(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
GLint y
Definition: glcorearb.h:103
fpreal32 myBorderValue
Definition: CE_Grid.h:457
UT_Vector3I myStrides
Definition: CE_Grid.h:456
__hostdev__ void setValue(uint32_t offset, bool v)
Definition: NanoVDB.h:5750
UT_Vector3I myRes
Definition: CE_Grid.h:451
int getXRes2D3D() const
Definition: CE_Grid.h:253
float fpreal32
Definition: SYS_Types.h:200
UT_Vector3I myPadding
Definition: CE_Grid.h:453
int getStride(int dim) const
Definition: CE_Grid.h:191
UT_Vector3I myGhostCells
Definition: CE_Grid.h:452
double fpreal64
Definition: SYS_Types.h:201
int getYStride2D3D() const
Definition: CE_Grid.h:236
int64 totalVoxelMemory() const
Definition: CE_Grid.h:162
GA_API const UT_StringHolder scale
GLdouble n
Definition: glcorearb.h:2008
GLint GLint GLsizei GLint GLenum GLenum type
Definition: glcorearb.h:108
UT_Vector3F myBorderScale
Definition: CE_Grid.h:459
fpreal32 getBorderScale(int axis) const
Definition: CE_Grid.h:136
UT_VoxelBorderType getBorder() const
Definition: CE_Grid.h:131
OIIO_FORCEINLINE const vint4 & operator+=(vint4 &a, const vint4 &b)
Definition: simd.h:4512
cl::NDRange getGlobalRange2D(int axis2d) const
Definition: CE_Grid.h:313
int64 numTotalVoxels() const
Definition: CE_Grid.h:153
void zero()
Definition: CE_Grid.h:282
long long int64
Definition: SYS_Types.h:116
int getOffset() const
Definition: CE_Grid.h:116
int getXStride() const
Definition: CE_Grid.h:188
exint getDeviceMemoryUsage() const
Definition: CE_Grid.h:89
bool isAxis2D(int axis) const
Returns whether the specified axis is 2-dimensional.
Definition: CE_Grid.h:199
int getXStride2D(int axis2d) const
2D strides.
Definition: CE_Grid.h:222
int getZRes() const
Definition: CE_Grid.h:103
int getGhostRes(int dim) const
Definition: CE_Grid.h:109
UT_Vector3I getStrides() const
Definition: CE_Grid.h:193
static int getXAxis2D(int axis2d)
Definition: CE_Grid.h:209
GLint GLenum GLint x
Definition: glcorearb.h:409
cl::NDRange getLocalRange2D3D() const
Definition: CE_Grid.h:329
bool hasBuffer() const
If the current OpenCL buffer is valid.
Definition: CE_Grid.h:86
GLdouble t
Definition: glad.h:2397
int getZRes2D3D() const
Returns the true z resolution if the grid is 3D, or 1 if 2D.
Definition: CE_Grid.h:270
GLsizeiptr size
Definition: glcorearb.h:664
fpreal64 average() const
Definition: CE_Grid.h:414
LeafData & operator=(const LeafData &)=delete
UT_Vector3I getPadding() const
Definition: CE_Grid.h:111
ImageBuf OIIO_API max(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
Kernel functor interface.
Definition: cl.hpp:3585
fpreal32 getBorderValue() const
Definition: CE_Grid.h:132
int getAxis2D() const
Returns the 2-dimensional axis of this grid, or -1 if there is none.
Definition: CE_Grid.h:205
bool myIsConstant
Definition: CE_Grid.h:449
Memory buffer interface.
Definition: cl.hpp:1867
static int getYAxis2D(int axis2d)
Definition: CE_Grid.h:216
NDRange interface.
Definition: cl.hpp:2466
int getYRes2D3D() const
Definition: CE_Grid.h:262
int getZStride2D3D() const
Returns the true Z-stride if the grid is 3D, or 0 if 2D.
Definition: CE_Grid.h:244
Kernel interface that implements cl_kernel.
Definition: cl.hpp:2544
int getPadding(int dim) const
Definition: CE_Grid.h:112
VectorToScalarConverter< GridType >::Type::Ptr divergence(const GridType &grid, bool threaded, InterruptT *interrupt)
Compute the divergence of the given vector-valued grid.
int getZStride() const
Definition: CE_Grid.h:190
bool isValid() const
Definition: CE_Grid.h:87
UT_Vector3I getRes() const
Definition: CE_Grid.h:106
Program interface that implements cl_program.
Definition: cl.hpp:2649
cl::Buffer myBuffer
Definition: CE_Grid.h:448
unsigned int uint
Definition: SYS_Types.h:45
int getYStride() const
Definition: CE_Grid.h:189
const cl::Buffer & buffer() const
Definition: CE_Grid.h:78
cl::NDRange getGlobalRange() const
Definition: CE_Grid.h:304
GLenum src
Definition: glcorearb.h:1793