HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_StringView.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: UT_StringView.h ( UT Library, C++)
7  *
8  * COMMENTS:
9  */
10 
11 #ifndef __UT_StringView__
12 #define __UT_StringView__
13 
14 #include "UT_API.h"
15 #include "UT_Assert.h"
16 
17 #include <SYS/SYS_Compiler.h>
18 #include <SYS/SYS_Inline.h>
19 #include <SYS/SYS_String.h>
20 
21 #include <limits.h>
22 #include <string.h>
23 #include <memory>
24 #include <iosfwd>
25 
26 class UT_StringLit;
27 class UT_StringRef;
28 class UT_StringHolder;
29 class UT_WorkBuffer;
30 class UT_StringView;
31 template <typename T> class UT_Array;
33 
34 /// @brief A utility class to do read-only operations on a subset of an
35 /// existing string.
36 /// @note This object does not take ownership over the data being looked at,
37 /// so care must be taken that the owning object doesn't not go out of scope
38 /// before the view object.
40 {
41 public:
42  /// Default constructor. Constructs an empty non-string.
43  constexpr SYS_FORCE_INLINE
45  : myStart(nullptr)
46  , myEnd(nullptr)
47  { /**/ }
48 
49  /// Construct a string view from the entirety of a null-terminated string.
50  constexpr SYS_FORCE_INLINE
51  UT_StringView(const char *str)
52  : myStart(str)
53  , myEnd(nullptr)
54  {
55  if (myStart)
56  myEnd = myStart + __builtin_strlen(myStart);
57  }
58 
59  /// Construct a string view on a string of a given length.
60  constexpr SYS_FORCE_INLINE
61  explicit UT_StringView(const char *str, exint len)
62  : myStart(str)
63  , myEnd(nullptr)
64  {
65  //UT_ASSERT(len >= 0 && "String View length should not be negative");
66  if (myStart)
67  myEnd = myStart + len;
68  }
69 
70  /// Construct a string view on a given string range. The @c end pointer
71  /// should point to one past the end of the string (i.e. in the case of
72  /// null terminated strings, it should point at the null character).
74  explicit UT_StringView(const char *start, const char *end)
75  : myStart(start), myEnd(end)
76  {
77  UT_ASSERT((!myStart && !myEnd) || myStart <= myEnd);
78  }
79 
80  /// Copy constructor.
82  UT_StringView(const UT_StringView &o) = default;
83 
88 
89  /// Assignment operator
91  UT_StringView & operator=(const UT_StringView &o) = default;
92 
93  /// The exact meaning depends on context, but generally used either as
94  /// end of view indicator by the functions that expect a view index or
95  /// as the error indicator by the functions that return a view index.
96  static constexpr exint npos = std::numeric_limits<exint>::max();
97 
98  /// @name Query functions
99  /// @{
100 
101  /// Returns the length of the string in bytes.
103  exint length() const { return exint(myEnd - myStart); }
104 
105  /// Returns @c true if the string is empty.
107  bool isEmpty() const { return myStart == myEnd; }
108 
109  /// Returns @c true if the view points to a non-empty string
111  bool isstring() const { return !isEmpty(); }
112 
113  /// Test whether the string is not an empty string (or nullptr)
115  SYS_SAFE_BOOL operator bool() const { return !isEmpty(); }
116 
117  /// Returns the memory, in bytes, used by this object.
119  int64 getMemoryUsage(bool inclusive) const
120  { return inclusive ? sizeof(*this) : 0; }
121 
122  /// Returns the character at index @c i. No bounds checking is performed.
124  char operator[](exint i) const { return myStart[i]; }
125  /// Returns the character at index @c i.
127  const char& at(exint pos) const
128  {
129  UT_ASSERT_P(pos < length() && pos >= 0);
130 
131  return myStart[pos];
132  }
133 
134  /// Returns a pointer to the first character of a view.
136  const char* data() const noexcept { return myStart; }
137 
138  /// Returns a constant reference to the first character in the view.
139  /// Undefined behaviour if view is empty.
141  {
143  !isEmpty(),
144  "Undefined behaviour trying to get front of empty string.");
145  return myStart[0];
146  }
147  /// Returns a constant reference to the last character in the view.
148  /// Undefined behaviour if the view is empty.
150  {
152  !isEmpty(),
153  "Undefined behaviour trying to get back of empty string.");
154  return myStart[length() - 1];
155  }
156 
157  /// Find first character equal to any of the characters in the given
158  /// character sequence.
160  findFirstOf(UT_StringView view, exint pos = 0) const noexcept;
161  SYS_NO_DISCARD_RESULT exint findFirstOf(char item, exint pos = 0) const
162  noexcept
163  {
164  UT_ASSERT(pos >= 0 && myStart + pos >= myStart);
165  for (const char *c = myStart + pos; c < myEnd; ++c)
166  {
167  if (*c == item)
168  return static_cast<exint>(c - myStart);
169  }
170  return UT_StringView::npos;
171  }
173  findFirstOf(const char *s, exint pos, exint count) const
174  {
175  return findFirstOf(UT_StringView(s, count), pos);
176  }
178  {
179  return findFirstOf(UT_StringView(s), pos);
180  }
181 
182  /// Find the last character equal to any of the characters in the viven
183  /// character sequence.
185  findLastOf(UT_StringView view, exint pos = npos) const noexcept;
186  SYS_NO_DISCARD_RESULT exint findLastOf(char item, exint pos = npos) const
187  noexcept
188  {
189  return findLastOf(UT_StringView(std::addressof(item), 1), pos);
190  }
192  findLastOf(const char *s, exint pos, exint count) const
193  {
194  return findLastOf(UT_StringView(s, count), pos);
195  }
197  findLastOf(const char *s, exint pos = npos) const
198  {
199  return findLastOf(UT_StringView(s), pos);
200  }
201 
202  /// Find the first character not equal to any of the characters in the given
203  /// character sequence.
205  findFirstNotOf(UT_StringView view, exint pos = 0) const noexcept;
206  SYS_NO_DISCARD_RESULT exint findFirstNotOf(char item, exint pos = 0) const
207  noexcept
208  {
209  UT_ASSERT(pos >= 0 && myStart + pos >= myStart);
210  for (const char *c = myStart + pos; c < myEnd; ++c)
211  {
212  if (*c != item)
213  return static_cast<exint>(c - myStart);
214  }
215 
216  return UT_StringView::npos;
217  }
219  findFirstNotOf(const char *s, exint pos, exint count) const
220  {
221  return findFirstNotOf(UT_StringView(s, count), pos);
222  }
224  {
225  return findFirstNotOf(UT_StringView(s), pos);
226  }
227 
228  /// Find the last character not equal to any of the characters in the given
229  /// character sequence.
231  findLastNotOf(UT_StringView view, exint pos = npos) const noexcept;
232  SYS_NO_DISCARD_RESULT exint findLastNotOf(char item, exint pos = npos) const
233  noexcept
234  {
235  return findLastNotOf(UT_StringView(std::addressof(item), 1), pos);
236  }
238  findLastNotOf(const char *s, exint pos, exint count) const
239  {
240  return findLastNotOf(UT_StringView(s, count), pos);
241  }
243  {
244  return findLastNotOf(UT_StringView(s), pos);
245  }
246  /// @}
247 
248  /// @name Iterators
249  /// @{
250 
251  /// The iterator type. @ref UT_StringView only provides read-only iterators.
252  typedef const char * const_iterator;
253 
254  /// Returns a constant iterator pointing to the beginning of the string.
256  const_iterator begin() const { return myStart; }
257 
258  /// Returns a constant iterator pointing to the end of the string.
260  const_iterator end() const { return myEnd; }
261  /// @}
262 
263  /// @name Manipulators
264  /// @{
265 
266  /// Clears the string. After this operation @ref isstring will return false,
267  /// and @ref isEmpty will return true.
268  void clear() { myStart = myEnd = nullptr; }
269 
270  /// Trim characters from the left- and right-hand side of the string.
271  /// By default this will trim the ASCII space characters.
273  UT_StringView trim(const char *c =" \t\n\r") const
274  { return trimInternal(c, true, true); }
275 
276  /// Trim characters from the left-hand side of the string.
277  /// By default this will trim the ASCII space characters.
279  UT_StringView trimLeft(const char *c =" \t\n\r") const
280  { return trimInternal(c, true, false); }
281 
282  /// Trim characters from the right-hand side of the string.
283  /// By default this will trim the ASCII space characters.
285  UT_StringView trimRight(const char *c =" \t\n\r") const
286  { return trimInternal(c, false, true); }
287 
288  /// Return the first token
290  UT_StringView firstToken(const char *sep_chars = " \t\n\r") const;
291 
292  /// Splits the string into individual tokens, separated by one or more of
293  /// the @c sep characters given. If @c group_separators is false, the
294  /// delimiters are not grouped together, e.g., ",," => ('', '', ''), which
295  /// is equivalent of Python's split().
297  UT_StringViewArray tokenize(const char *sep_chars =" \t\n\r",
298  bool group_separators = true) const;
299 
300  /// Splits the string into a list of words, using sep_str as the separator
301  /// string. Unlike tokenize, consecutive delimiters are not grouped
302  /// together and are instead taken to delimit empty strings.
303  /// If @c max_split is set, the string is split into at most @c max_sep
304  /// pieces.
306  UT_StringViewArray split(const char *sep_str = " ",
307  int max_split = INT_MAX) const;
308 
309  /// Returns a sub-string of the current string. If positive, the
310  /// @c index parameter is relative to the start. If negative, it's
311  /// relative to the end (e.g. substr(-1,1) will return the last character
312  /// of the string).
313  /// The empty string will be returned for out-of-range values.
316 
317  /// Move the start of the view forward by n characters.
318  /// If N is grreater then length() then it moves to the end.
319  void removePrefix(exint n);
320  /// Move the end of the view backwards by n characters.
321  /// If n is greater then the length() then it moves to the start.
322  void removeSuffix(exint n);
323  /// @}
324 
325  /// @name Operators
326  /// @{
327 
328  /// The @ref compare function compares this string with another, and returns
329  /// and integer less than, equal to, or greater than zero if this string
330  /// is found to be less than, equal to, or greater than the given string,
331  /// respectively. If a length is given, then the strings are compared as
332  /// if they were both of that length, or smaller.
334  int compare(const char *str, exint str_len,
335  bool case_sensitive=true) const;
337  int compare(const UT_StringView &sv,
338  bool case_sensitive=true) const;
339 
340  /// Returns true if the two strings compare as being equal.
341  /// If @c case_sensitive is set to @c false then the strings are compared
342  /// in a case-insensitive fashion.
344  bool equal(const char *str, bool case_sensitive=true) const
345  { return compare(UT_StringView(str),
346  case_sensitive) == 0; }
348  bool equal(const UT_StringView &other,
349  bool case_sensitive=true) const
350  { return compare(other, case_sensitive) == 0; }
351 
352  /// Returns true if the two strings compare as being equal.
354  bool operator==(const char *str) const
355  { return compare(UT_StringView(str)) == 0; }
357  bool operator==(const UT_StringView &other) const
358  { return compare(other) == 0; }
359 
360  /// Returns true if the two strings compare as being not equal.
362  bool operator!=(const char *str) const
363  { return compare(UT_StringView(str)) != 0; }
365  bool operator!=(const UT_StringView &other) const
366  { return compare(other) != 0; }
367 
368  /// Returns true if this string is lexicographically less than the given
369  /// string.
371  bool operator<(const char *str) const
372  { return compare(UT_StringView(str)) < 0; }
374  bool operator<(const UT_StringView &other) const
375  { return compare(other) < 0; }
376 
377  /// Find the first instance of the given character in this string.
378  const_iterator find(char c, const_iterator start) const;
380  const_iterator find(char c) const
381  { return find(c, begin()); }
382 
383  /// Find the first instance of the given substring in this string.
384  const_iterator find(const char *str, const_iterator start) const;
386  const_iterator find(const char *str) const
387  { return find(str, begin()); }
388 
389  /// Find last instance of the given character in this string, searching
390  /// backwards from 'pos'.
392  const_iterator rfind(char c, const_iterator pos) const;
394  const_iterator rfind(char c) const
395  { return rfind(c, end() - 1); }
396  /// Returns true if our string starts with the specified prefix.
398  bool startsWith(const char* prefix,
399  bool case_sensitive = true,
400  exint len = -1) const;
402  bool endsWith(const char* prefix,
403  bool case_sensitive = true,
404  exint len = -1) const;
405  /// @}
406 
407  /// Determine if string can be seen as a single floating point number
409  bool isFloat(bool skip_spaces = false,
410  bool loose = false,
411  bool allow_underscore = false) const;
412  /// Determine if string can be seen as a single integer number
414  bool isInteger(bool skip_spaces = false) const;
415 
417  unsigned hash() const
418  {
419  return SYSstring_hash(myStart, length(), /*allow_nulls*/true);
420  }
421 private:
423  UT_StringView trimInternal(const char *c, bool left, bool right) const;
424 
425  const char *myStart, *myEnd;
426 };
427 
428 // In very specific cases it can be helpful to store string views in a standard
429 // container such as map. When doing so EXTREME CAUTION must be taken!!!
430 namespace std
431 {
432  template <>
433  struct hash<UT_StringView>
434  {
435  size_t operator()(const UT_StringView& s) const
436  {
437  return s.hash();
438  }
439  };
440 }
441 // Implicitly adding support for hboost::hash hash doesn't work right now
442 // because it leads to ambiguity with the one for UT_String, I think because
443 // both of them can be created implicitly from const char pointers.
444 #if 0
445 // Hash for hboost. Same warning as above to use EXTREME CAUTION!!!
446 SYS_FORCE_INLINE size_t
447 hash_value(const UT_StringView &str)
448 {
449  return str.hash();
450 }
451 #endif
452 
453 UT_API std::ostream &
454 operator<<(std::ostream &os, const UT_StringView &sv);
455 
456 
457 #endif // __UT_StringView__
UT_API std::ostream & operator<<(std::ostream &os, const UT_StringView &sv)
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const_iterator find(const char *str) const
SYS_NO_DISCARD_RESULT exint findFirstNotOf(const char *s, exint pos) const
Returns the length of the string in bytes.
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const char & back() const
constexpr SYS_FORCE_INLINE UT_StringView(const char *str)
Construct a string view from the entirety of a null-terminated string.
Definition: UT_StringView.h:51
SYS_NO_DISCARD_RESULT exint findLastNotOf(const char *s, exint pos) const
Returns the length of the string in bytes.
GLint left
Definition: glcorearb.h:2005
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const_iterator find(char c) const
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool operator<(const UT_StringView &other) const
GLuint start
Definition: glcorearb.h:475
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool operator<(const char *str) const
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE UT_StringView trimLeft(const char *c=" \t\n\r") const
GLdouble right
Definition: glad.h:2817
constexpr SYS_FORCE_INLINE UT_StringView()
Default constructor. Constructs an empty non-string.
Definition: UT_StringView.h:44
int64 exint
Definition: SYS_Types.h:125
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const_iterator end() const
Returns a constant iterator pointing to the end of the string.
GLdouble s
Definition: glad.h:3009
GLuint GLsizei GLsizei * length
Definition: glcorearb.h:795
#define UT_API
Definition: UT_API.h:14
#define SYS_EXINT_MAX
Definition: SYS_Types.h:181
SYS_NO_DISCARD_RESULT exint findLastOf(const char *s, exint pos, exint count) const
Returns the length of the string in bytes.
size_t operator()(const UT_StringView &s) const
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool operator==(const char *str) const
Returns true if the two strings compare as being equal.
SYS_NO_DISCARD_RESULT unsigned hash() const
const char * const_iterator
The iterator type. UT_StringView only provides read-only iterators.
#define UT_ASSERT_MSG(ZZ,...)
Definition: UT_Assert.h:159
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const char * data() const noexcept
Returns a pointer to the first character of a view.
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE int64 getMemoryUsage(bool inclusive) const
Returns the memory, in bytes, used by this object.
CompareResults OIIO_API compare(const ImageBuf &A, const ImageBuf &B, float failthresh, float warnthresh, ROI roi={}, int nthreads=0)
A utility class to do read-only operations on a subset of an existing string.
Definition: UT_StringView.h:39
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool isEmpty() const
Returns true if the string is empty.
GLdouble n
Definition: glcorearb.h:2008
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE exint length() const
Returns the length of the string in bytes.
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool equal(const UT_StringView &other, bool case_sensitive=true) const
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const_iterator rfind(char c) const
#define SYS_SAFE_BOOL
Definition: SYS_Compiler.h:55
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const char & at(exint pos) const
Returns the character at index i.
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const char & front() const
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:155
size_t OIIO_UTIL_API rfind(string_view a, string_view b)
GLuint GLuint end
Definition: glcorearb.h:475
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
static constexpr exint npos
Definition: UT_StringView.h:96
long long int64
Definition: SYS_Types.h:116
#define SYS_NO_DISCARD_RESULT
Definition: SYS_Compiler.h:93
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool operator==(const UT_StringView &other) const
constexpr SYS_FORCE_INLINE UT_StringView(const char *str, exint len)
Construct a string view on a string of a given length.
Definition: UT_StringView.h:61
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE char operator[](exint i) const
Returns the character at index i. No bounds checking is performed.
SYS_NO_DISCARD_RESULT exint findFirstNotOf(const char *s, exint pos, exint count) const
Returns the length of the string in bytes.
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool equal(const char *str, bool case_sensitive=true) const
SYS_NO_DISCARD_RESULT exint findFirstOf(const char *s, exint pos, exint count) const
Returns the length of the string in bytes.
SYS_NO_DISCARD_RESULT exint findLastOf(const char *s, exint pos=npos) const
Returns the length of the string in bytes.
GLuint index
Definition: glcorearb.h:786
ImageBuf OIIO_API max(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const_iterator begin() const
Returns a constant iterator pointing to the beginning of the string.
SYS_FORCE_INLINE UT_StringView(const char *start, const char *end)
Definition: UT_StringView.h:74
Definition: core.h:982
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool isstring() const
Returns true if the view points to a non-empty string.
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE UT_StringView trimRight(const char *c=" \t\n\r") const
#define UT_ASSERT(ZZ)
Definition: UT_Assert.h:156
UT_Array< UT_StringView > UT_StringViewArray
Definition: UT_StringView.h:31
#define const
Definition: zconf.h:214
void OIIO_UTIL_API split(string_view str, std::vector< string_view > &result, string_view sep=string_view(), int maxsplit=-1)
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool operator!=(const UT_StringView &other) const
SYS_NO_DISCARD_RESULT exint findLastNotOf(const char *s, exint pos, exint count) const
Returns the length of the string in bytes.
size_t hash_value(const CH_ChannelRef &ref)
SYS_NO_DISCARD_RESULT exint findFirstOf(const char *s, exint pos) const
Returns the length of the string in bytes.
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE UT_StringView trim(const char *c=" \t\n\r") const
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool operator!=(const char *str) const
Returns true if the two strings compare as being not equal.
GLint GLsizei count
Definition: glcorearb.h:405
FMT_CONSTEXPR auto find(Ptr first, Ptr last, T value, Ptr &out) -> bool
Definition: core.h:2089
PcpNodeRef_ChildrenIterator begin(const PcpNodeRef::child_const_range &r)
Support for range-based for loops for PcpNodeRef children ranges.
Definition: node.h:483