67 {
swap(*
this, other); other.init(0);
return *
this; }
91 region[0] =
sizeof(
T);
92 region[1] = src.
size();
96 size_t src_row_pitch =
sizeof(V);
97 size_t dst_row_pitch =
sizeof(
T);
101 src_origin, dst_origin, region,
102 src_row_pitch, 0, dst_row_pitch, 0);
110 template <
typename V>
112 int src_tuplesize = 1,
int dst_tuplesize = 1,
115 UT_ASSERT(src_tuplesize >= 1 && dst_tuplesize >= 1);
116 if (!(src_tuplesize >= 1 && dst_tuplesize >= 1))
119 exint nelem = src.
size() / src_tuplesize;
120 init(nelem * dst_tuplesize);
121 convertFrom(src, src_tuplesize, dst_tuplesize, 0, 0, nelem, default_value);
132 template <
typename V>
134 int src_tuplesize = 1,
int dst_tuplesize = 1,
136 exint nelements = -1,
T default_value = 0)
138 UT_ASSERT(src_tuplesize >= 1 && dst_tuplesize >= 1);
139 if (!(src_tuplesize >= 1 && dst_tuplesize >= 1))
142 exint src_nelem = src.
size() / src_tuplesize;
143 exint dst_nelem =
size() / dst_tuplesize;
145 nelements =
SYSmin(src_nelem - src_offset, dst_nelem - dst_offset);
147 UT_ASSERT(src_offset >= 0 && dst_offset >= 0 &&
148 (src_nelem - src_offset) >= nelements &&
149 (dst_nelem - dst_offset) >= nelements);
154 wb.
append(
" -D CEARRAY_VALUE_");
155 appendElemType<V>(wb);
156 const char *opt = wb.
buffer();
157 cl::Kernel k = loadKernel(
"convertFrom", opt);
161 context->
get1DRanges(k, nelements, global_range, local_range);
163 global_range, local_range);
166 src_tuplesize, dst_tuplesize,
167 src_offset, dst_offset,
168 nelements, scalarKernelArg(default_value));
176 bool oneifnonzero=
false);
182 T readValue(
int idx)
const;
185 void writeValue(
int idx,
const T &
val,
bool blocking=
true);
191 void sort(
bool is_descending =
false,
int maxbits = 0)
194 sortInternal(emptyvals, is_descending, maxbits);
201 template <
typename V>
205 sortInternal(vals, is_descending, maxbits);
213 reduce_t min(
int tuplesize = 1,
int comp = 0)
const;
214 reduce_t minAbs(
int tuplesize = 1,
int comp = 0)
const ;
215 reduce_t max(
int tuplesize = 1,
int comp = 0)
const;
216 reduce_t maxAbs(
int tuplesize = 1,
int comp = 0)
const;
217 reduce_t sum(
int tuplesize = 1,
int comp = 0)
const;
218 reduce_t sumAbs(
int tuplesize = 1,
int comp = 0)
const;
219 reduce_t sumSqr(
int tuplesize = 1,
int comp = 0)
const;
225 return fsum / tuplecount;
238 void constant(
T cval);
249 cl::Kernel loadKernel(
const char *kernel_name,
250 const char *opt = NULL)
const;
252 reduce_t doReduce(
const char *reduce_flags,
const CE_Array<T> *
a,
253 int tuplesize = 1,
int comp = 0)
const;
255 template <
typename V>
257 const char *reduce_flags)
const;
260 template <
typename V>
261 void sortInternal(
CE_Array<V> &vals,
bool is_descending,
int maxbits);
264 template <
typename V>
269 std::is_same_v<T, fpreal16>,
294 template <
typename V>
303 int elemsize =
sizeof(V);
GLenum GLuint GLenum GLsizei const GLchar * buf
A simple OpenCL-based array class.
void initFromBuffer(const CE_BufferDevice< V > &src, int offset)
typename std::conditional< B, T, F >::type conditional_t
GLsizei const GLfloat * value
void sortValues(CE_Array< V > &vals, bool is_descending=false, int maxbits=0)
CE_Array(CE_Array< T > &&a) noexcept
Move constructor. Steals the buffer from the original.
SYS_FORCE_INLINE const char * buffer() const
GLboolean GLboolean GLboolean GLboolean a
T prefixSum(std::vector< T > &vec, bool threaded=true, OpT op=OpT())
Computes inclusive prefix sum of a vector.
ImageBuf OIIO_API min(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
void copyFrom(const CE_BufferDevice< T > &b, exint len=-1)
GLdouble GLdouble GLdouble q
exint size() const
Returns the buffer length.
void reorderBuffer(const CE_BufferDevice< V > &src, CE_BufferDevice< V > &dst, const CE_UInt32Array &order)
cl::CommandQueue getQueue() const
fpreal64 average(int tuplesize=1, int comp=0) const
void convertFrom(const CE_Array< V > &src, int src_tuplesize=1, int dst_tuplesize=1, exint src_offset=0, exint dst_offset=0, exint nelements=-1, T default_value=0)
CE_Array< float > CE_FloatArray
void sort(bool is_descending=false, int maxbits=0)
static CE_Context * getContext(bool gl_shared=true, bool shared_fallback=true)
void initAndConvertFrom(const CE_Array< V > &src, int src_tuplesize=1, int dst_tuplesize=1, T default_value=0)
~CE_Array()
CE_BufferDevice base class will release buffer.
fpreal64 dot(const CE_VectorT< T > &a, const CE_VectorT< T > &b)
scalar_arg_t scalarKernelArg(T v)
fpreal64 rms(int tuplesize=1, int comp=0) const
GLdouble GLdouble GLint GLint order
CE_Array< int > CE_Int32Array
typename std::conditional< std::is_integral< int >::value, exint, fpreal64 >::type reduce_t
const cl::Buffer & buffer() const
CE_Array< T > & operator=(CE_Array< T > &&other)
GLboolean GLboolean GLboolean b
void get1DRanges(const cl::Kernel &k, size_t items, cl::NDRange &g, cl::NDRange &l)
CE_Array(cl::Buffer &&buf, exint size=-1)
CE_Array(const CE_Array< T > &a)
friend void swap(CE_BufferDevice< T > &a, CE_BufferDevice< T > &b)
CommandQueue interface for cl_command_queue.
LeafData & operator=(const LeafData &)=delete
SYS_FORCE_INLINE void append(char character)
ImageBuf OIIO_API max(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
Kernel functor interface.
typename std::conditional_t< std::is_same_v< int, fpreal16 >, fpreal32, int > scalar_arg_t
CE_Array< uint32_t > CE_UInt32Array
const cl::Buffer & buffer() const
Kernel interface that implements cl_kernel.
KernelFunctor bind(const CommandQueue &queue, const NDRange &offset, const NDRange &global, const NDRange &local)
CE_EXTERN_TEMPLATE(CE_Array< uint8 >)
cl::KernelFunctor bind(cl::Kernel &k) const
bool isEmpty() const
Returns true iff there are no occupied elements in the buffer.