HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_StringView.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: UT_StringView.h ( UT Library, C++)
7  *
8  * COMMENTS:
9  */
10 
11 #ifndef __UT_StringView__
12 #define __UT_StringView__
13 
14 #include "UT_API.h"
15 
16 #include "UT_Assert.h"
17 #include "UT_Array.h"
18 
19 #include <SYS/SYS_Compiler.h>
20 #include <SYS/SYS_Inline.h>
21 #include <SYS/SYS_String.h>
22 
23 #include <string.h>
24 #include <memory>
25 #include <limits>
26 #include <iosfwd>
27 
28 class UT_StringLit;
29 class UT_StringRef;
30 class UT_StringHolder;
31 class UT_WorkBuffer;
34 
35 /// @brief A utility class to do read-only operations on a subset of an
36 /// existing string.
37 /// @note This object does not take ownership over the data being looked at,
38 /// so care must be taken that the owning object doesn't not go out of scope
39 /// before the view object.
41 {
42 public:
43  /// Default constructor. Constructs an empty non-string.
44  constexpr SYS_FORCE_INLINE
46  : myStart(nullptr)
47  , myEnd(nullptr)
48  { /**/ }
49 
50  /// Construct a string view from the entirety of a null-terminated string.
51  constexpr SYS_FORCE_INLINE
52  UT_StringView(const char *str)
53  : myStart(str)
54  , myEnd(nullptr)
55  {
56  if (myStart)
57  myEnd = myStart + ::strlen(str);
58  }
59 
60  /// Construct a string view on a string of a given length.
61  constexpr SYS_FORCE_INLINE
62  explicit UT_StringView(const char *str, exint len)
63  : myStart(str)
64  , myEnd(nullptr)
65  {
66  //UT_ASSERT(len >= 0 && "String View length should not be negative");
67  if (myStart)
68  myEnd = myStart + len;
69  }
70 
71  /// Construct a string view on a given string range. The @c end pointer
72  /// should point to one past the end of the string (i.e. in the case of
73  /// null terminated strings, it should point at the null character).
75  explicit UT_StringView(const char *start, const char *end)
76  : myStart(start), myEnd(end)
77  {
78  UT_ASSERT((!myStart && !myEnd) || myStart <= myEnd);
79  }
80 
81  /// Copy constructor.
83  UT_StringView(const UT_StringView &o) = default;
84 
89 
90  /// Assignment operator
92  UT_StringView & operator=(const UT_StringView &o) = default;
93 
94  /// The exact meaning depends on context, but generally used either as
95  /// end of view indicator by the functions that expect a view index or
96  /// as the error indicator by the functions that return a view index.
97  static constexpr exint npos = std::numeric_limits<exint>::max();
98 
99  /// @name Query functions
100  /// @{
101 
102  /// Returns the length of the string in bytes.
104  exint length() const { return exint(myEnd - myStart); }
105 
106  /// Returns @c true if the string is empty.
108  bool isEmpty() const { return myStart == myEnd; }
109 
110  /// Returns @c true if the view points to a valid string, even an empty one
112  bool isstring() const { return myStart; }
113 
114  /// Test whether the string is not an empty string (or nullptr)
116  SYS_SAFE_BOOL operator bool() const { return !isEmpty(); }
117 
118  /// Returns the memory, in bytes, used by this object.
120  int64 getMemoryUsage(bool inclusive) const
121  { return inclusive ? sizeof(*this) : 0; }
122 
123  /// Returns the character at index @c i. No bounds checking is performed.
125  char operator[](exint i) const { return myStart[i]; }
126  /// Returns the character at index @c i.
128  const char& at(exint pos) const
129  {
130  UT_ASSERT_P(pos < length() && pos < 0);
131 
132  return myStart[pos];
133  }
134 
135  /// Returns a pointer to the first character of a view.
137  const char* data() const noexcept { return myStart; }
138 
139  /// Returns a constant reference to the first character in the view.
140  /// Undefined behaviour if view is empty.
142  {
144  !isEmpty(),
145  "Undefined behaviour trying to get front of empty string.");
146  return myStart[0];
147  }
148  /// Returns a constant reference to the last character in the view.
149  /// Undefined behaviour if the view is empty.
151  {
153  !isEmpty(),
154  "Undefined behaviour trying to get back of empty string.");
155  return myStart[length() - 1];
156  }
157 
158  /// Find first character equal to any of the characters in the given
159  /// character sequence.
161  findFirstOf(UT_StringView view, exint pos = 0) const noexcept;
162  SYS_NO_DISCARD_RESULT exint findFirstOf(char item, exint pos = 0) const
163  noexcept
164  {
165  UT_ASSERT(pos >= 0 && myStart + pos >= myStart);
166  for (const char *c = myStart + pos; c < myEnd; ++c)
167  {
168  if (*c == item)
169  return static_cast<exint>(c - myStart);
170  }
171  return UT_StringView::npos;
172  }
174  findFirstOf(const char *s, exint pos, exint count) const
175  {
176  return findFirstOf(UT_StringView(s, count), pos);
177  }
179  {
180  return findFirstOf(UT_StringView(s), pos);
181  }
182 
183  /// Find the last character equal to any of the characters in the viven
184  /// character sequence.
186  findLastOf(UT_StringView view, exint pos = npos) const noexcept;
187  SYS_NO_DISCARD_RESULT exint findLastOf(char item, exint pos = npos) const
188  noexcept
189  {
190  return findLastOf(UT_StringView(std::addressof(item), 1), pos);
191  }
193  findLastOf(const char *s, exint pos, exint count) const
194  {
195  return findLastOf(UT_StringView(s, count), pos);
196  }
198  findLastOf(const char *s, exint pos = npos) const
199  {
200  return findLastOf(UT_StringView(s), pos);
201  }
202 
203  /// Find the first character not equal to any of the characters in the given
204  /// character sequence.
206  findFirstNotOf(UT_StringView view, exint pos = 0) const noexcept;
207  SYS_NO_DISCARD_RESULT exint findFirstNotOf(char item, exint pos = 0) const
208  noexcept
209  {
210  UT_ASSERT(pos >= 0 && myStart + pos >= myStart);
211  for (const char *c = myStart + pos; c < myEnd; ++c)
212  {
213  if (*c != item)
214  return static_cast<exint>(c - myStart);
215  }
216 
217  return UT_StringView::npos;
218  }
220  findFirstNotOf(const char *s, exint pos, exint count) const
221  {
222  return findFirstNotOf(UT_StringView(s, count), pos);
223  }
225  {
226  return findFirstNotOf(UT_StringView(s), pos);
227  }
228 
229  /// Find the last character not equal to any of the characters in the given
230  /// character sequence.
232  findLastNotOf(UT_StringView view, exint pos = npos) const noexcept;
233  SYS_NO_DISCARD_RESULT exint findLastNotOf(char item, exint pos = npos) const
234  noexcept
235  {
236  return findLastNotOf(UT_StringView(std::addressof(item), 1), pos);
237  }
239  findLastNotOf(const char *s, exint pos, exint count) const
240  {
241  return findLastNotOf(UT_StringView(s, count), pos);
242  }
244  {
245  return findLastNotOf(UT_StringView(s), pos);
246  }
247  /// @}
248 
249  /// @name Iterators
250  /// @{
251 
252  /// The iterator type. @ref UT_StringView only provides read-only iterators.
253  typedef const char * const_iterator;
254 
255  /// Returns a constant iterator pointing to the beginning of the string.
257  const_iterator begin() const { return myStart; }
258 
259  /// Returns a constant iterator pointing to the end of the string.
261  const_iterator end() const { return myEnd; }
262  /// @}
263 
264  /// @name Manipulators
265  /// @{
266 
267  /// Clears the string. After this operation @ref isstring will return false,
268  /// and @ref isEmpty will return true.
269  void clear() { myStart = myEnd = 0; }
270 
271  /// Trim characters from the left- and right-hand side of the string.
272  /// By default this will trim the ASCII space characters.
274  UT_StringView trim(const char *c =" \t\n\r") const
275  { return trimInternal(c, true, true); }
276 
277  /// Trim characters from the left-hand side of the string.
278  /// By default this will trim the ASCII space characters.
280  UT_StringView trimLeft(const char *c =" \t\n\r") const
281  { return trimInternal(c, true, false); }
282 
283  /// Trim characters from the right-hand side of the string.
284  /// By default this will trim the ASCII space characters.
286  UT_StringView trimRight(const char *c =" \t\n\r") const
287  { return trimInternal(c, false, true); }
288 
289  /// Return the first token
291  UT_StringView firstToken(const char *sep_chars = " \t\n\r") const;
292 
293  /// Splits the string into individual tokens, separated by one or more of
294  /// the @c sep characters given. If @c group_separators is false, the
295  /// delimiters are not grouped together, e.g., ",," => ('', '', ''), which
296  /// is equivalent of Python's split().
298  UT_StringViewArray tokenize(const char *sep_chars =" \t\n\r",
299  bool group_separators = true) const;
300 
301  /// Splits the string into a list of words, using sep_str as the separator
302  /// string. Unlike tokenize, consecutive delimiters are not grouped
303  /// together and are instead taken to delimit empty strings.
304  /// If @c max_split is set, the string is split into at most @c max_sep
305  /// pieces.
307  UT_StringViewArray split(const char *sep_str = " ",
308  int max_split = INT_MAX) const;
309 
310  /// Returns a sub-string of the current string. If positive, the
311  /// @c index parameter is relative to the start. If negative, it's
312  /// relative to the end (e.g. substr(-1,1) will return the last character
313  /// of the string).
314  /// The empty string will be returned for out-of-range values.
317 
318  /// Move the start of the view forward by n characters.
319  /// If N is grreater then length() then it moves to the end.
320  void removePrefix(exint n);
321  /// Move the end of the view backwards by n characters.
322  /// If n is greater then the length() then it moves to the start.
323  void removeSuffix(exint n);
324  /// @}
325 
326  /// @name Operators
327  /// @{
328 
329  /// The @ref compare function compares this string with another, and returns
330  /// and integer less than, equal to, or greater than zero if this string
331  /// is found to be less than, equal to, or greater than the given string,
332  /// respectively. If a length is given, then the strings are compared as
333  /// if they were both of that length, or smaller.
335  int compare(const char *str, exint str_len,
336  bool case_sensitive=true) const;
338  int compare(const UT_StringView &sv,
339  bool case_sensitive=true) const;
340 
341  /// Returns true if the two strings compare as being equal.
342  /// If @c case_sensitive is set to @c false then the strings are compared
343  /// in a case-insensitive fashion.
345  bool equal(const char *str, bool case_sensitive=true) const
346  { return compare(UT_StringView(str),
347  case_sensitive) == 0; }
349  bool equal(const UT_StringView &other,
350  bool case_sensitive=true) const
351  { return compare(other, case_sensitive) == 0; }
352 
353  /// Returns true if the two strings compare as being equal.
355  bool operator==(const char *str) const
356  { return compare(UT_StringView(str)) == 0; }
358  bool operator==(const UT_StringView &other) const
359  { return compare(other) == 0; }
360 
361  /// Returns true if the two strings compare as being not equal.
363  bool operator!=(const char *str) const
364  { return compare(UT_StringView(str)) != 0; }
366  bool operator!=(const UT_StringView &other) const
367  { return compare(other) != 0; }
368 
369  /// Returns true if this string is lexicographically less than the given
370  /// string.
372  bool operator<(const char *str) const
373  { return compare(UT_StringView(str)) < 0; }
375  bool operator<(const UT_StringView &other) const
376  { return compare(other) < 0; }
377 
378  /// Find the first instance of the given character in this string.
379  const_iterator find(char c, const_iterator start) const;
381  const_iterator find(char c) const
382  { return find(c, begin()); }
383 
384  /// Find the first instance of the given substring in this string.
385  const_iterator find(const char *str, const_iterator start) const;
387  const_iterator find(const char *str) const
388  { return find(str, begin()); }
389 
390  /// Find last instance of the given character in this string, searching
391  /// backwards from 'pos'.
393  const_iterator rfind(char c, const_iterator pos) const;
395  const_iterator rfind(char c) const
396  { return rfind(c, end() - 1); }
397  /// Returns true if our string starts with the specified prefix.
399  bool startsWith(const char* prefix,
400  bool case_sensitive = true,
401  exint len = -1) const;
403  bool endsWith(const char* prefix,
404  bool case_sensitive = true,
405  exint len = -1) const;
406  /// @}
407 
408  /// Determine if string can be seen as a single floating point number
410  bool isFloat(bool skip_spaces = false,
411  bool loose = false,
412  bool allow_underscore = false) const;
413  /// Determine if string can be seen as a single integer number
415  bool isInteger(bool skip_spaces = false) const;
416 
418  unsigned hash() const
419  {
420  return SYSstring_hash(myStart, length(), /*allow_nulls*/true);
421  }
422 private:
424  UT_StringView trimInternal(const char *c, bool left, bool right) const;
425 
426  const char *myStart, *myEnd;
427 };
428 
429 // In very specific cases it can be helpful to store string views in a standard
430 // container such as map. When doing so EXTREME CAUTION must be taken!!!
431 namespace std
432 {
433  template <>
434  struct hash<UT_StringView>
435  {
436  size_t operator()(const UT_StringView& s) const
437  {
438  return s.hash();
439  }
440  };
441 }
442 // Implicitly adding support for hboost::hash hash doesn't work right now
443 // because it leads to ambiguity with the one for UT_String, I think because
444 // both of them can be created implicitly from const char pointers.
445 #if 0
446 // Hash for hboost. Same warning as above to use EXTREME CAUTION!!!
447 SYS_FORCE_INLINE size_t
448 hash_value(const UT_StringView &str)
449 {
450  return str.hash();
451 }
452 #endif
453 
454 UT_API std::ostream &
455 operator<<(std::ostream &os, const UT_StringView &sv);
456 
457 
458 #endif // __UT_StringView__
UT_API std::ostream & operator<<(std::ostream &os, const UT_StringView &sv)
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const_iterator find(const char *str) const
SYS_NO_DISCARD_RESULT exint findFirstNotOf(const char *s, exint pos) const
Returns the length of the string in bytes.
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const char & back() const
constexpr SYS_FORCE_INLINE UT_StringView(const char *str)
Construct a string view from the entirety of a null-terminated string.
Definition: UT_StringView.h:52
SYS_NO_DISCARD_RESULT exint findLastNotOf(const char *s, exint pos) const
Returns the length of the string in bytes.
GLint left
Definition: glcorearb.h:2005
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const_iterator find(char c) const
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool operator<(const UT_StringView &other) const
GLuint start
Definition: glcorearb.h:475
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool operator<(const char *str) const
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE UT_StringView trimLeft(const char *c=" \t\n\r") const
const GLfloat * c
Definition: glew.h:16631
constexpr SYS_FORCE_INLINE UT_StringView()
Default constructor. Constructs an empty non-string.
Definition: UT_StringView.h:45
int64 exint
Definition: SYS_Types.h:125
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const_iterator end() const
Returns a constant iterator pointing to the end of the string.
#define UT_API
Definition: UT_API.h:14
#define SYS_EXINT_MAX
Definition: SYS_Types.h:181
SYS_NO_DISCARD_RESULT exint findLastOf(const char *s, exint pos, exint count) const
Returns the length of the string in bytes.
GLfloat right
Definition: glew.h:15525
size_t operator()(const UT_StringView &s) const
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool operator==(const char *str) const
Returns true if the two strings compare as being equal.
SYS_NO_DISCARD_RESULT unsigned hash() const
const char * const_iterator
The iterator type. UT_StringView only provides read-only iterators.
#define UT_ASSERT_MSG(ZZ,...)
Definition: UT_Assert.h:156
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const char * data() const noexcept
Returns a pointer to the first character of a view.
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE int64 getMemoryUsage(bool inclusive) const
Returns the memory, in bytes, used by this object.
CompareResults OIIO_API compare(const ImageBuf &A, const ImageBuf &B, float failthresh, float warnthresh, ROI roi={}, int nthreads=0)
A utility class to do read-only operations on a subset of an existing string.
Definition: UT_StringView.h:40
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool isEmpty() const
Returns true if the string is empty.
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE exint length() const
Returns the length of the string in bytes.
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool equal(const UT_StringView &other, bool case_sensitive=true) const
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const_iterator rfind(char c) const
#define SYS_SAFE_BOOL
Definition: SYS_Compiler.h:55
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const char & at(exint pos) const
Returns the character at index i.
GLenum GLsizei len
Definition: glew.h:7782
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const char & front() const
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:152
size_t OIIO_UTIL_API rfind(string_view a, string_view b)
GLuint GLuint end
Definition: glcorearb.h:475
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
static constexpr exint npos
Definition: UT_StringView.h:97
long long int64
Definition: SYS_Types.h:116
#define SYS_NO_DISCARD_RESULT
Definition: SYS_Compiler.h:93
GLint GLsizei count
Definition: glcorearb.h:405
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool operator==(const UT_StringView &other) const
constexpr SYS_FORCE_INLINE UT_StringView(const char *str, exint len)
Construct a string view on a string of a given length.
Definition: UT_StringView.h:62
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE char operator[](exint i) const
Returns the character at index i. No bounds checking is performed.
GLdouble n
Definition: glcorearb.h:2008
GLuint GLsizei GLsizei * length
Definition: glcorearb.h:795
SYS_NO_DISCARD_RESULT exint findFirstNotOf(const char *s, exint pos, exint count) const
Returns the length of the string in bytes.
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool equal(const char *str, bool case_sensitive=true) const
SYS_NO_DISCARD_RESULT exint findFirstOf(const char *s, exint pos, exint count) const
Returns the length of the string in bytes.
SYS_NO_DISCARD_RESULT exint findLastOf(const char *s, exint pos=npos) const
Returns the length of the string in bytes.
GLuint index
Definition: glcorearb.h:786
ImageBuf OIIO_API max(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const_iterator begin() const
Returns a constant iterator pointing to the beginning of the string.
SYS_FORCE_INLINE UT_StringView(const char *start, const char *end)
Definition: UT_StringView.h:75
Definition: core.h:982
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool isstring() const
Returns true if the view points to a valid string, even an empty one.
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE UT_StringView trimRight(const char *c=" \t\n\r") const
#define UT_ASSERT(ZZ)
Definition: UT_Assert.h:153
UT_Array< UT_StringView > UT_StringViewArray
Definition: UT_StringView.h:32
#define const
Definition: zconf.h:214
void OIIO_UTIL_API split(string_view str, std::vector< string_view > &result, string_view sep=string_view(), int maxsplit=-1)
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool operator!=(const UT_StringView &other) const
SYS_NO_DISCARD_RESULT exint findLastNotOf(const char *s, exint pos, exint count) const
Returns the length of the string in bytes.
size_t hash_value(const CH_ChannelRef &ref)
GLdouble s
Definition: glew.h:1395
SYS_NO_DISCARD_RESULT exint findFirstOf(const char *s, exint pos) const
Returns the length of the string in bytes.
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE UT_StringView trim(const char *c=" \t\n\r") const
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool operator!=(const char *str) const
Returns true if the two strings compare as being not equal.
FMT_CONSTEXPR auto find(Ptr first, Ptr last, T value, Ptr &out) -> bool
Definition: core.h:2089
PcpNodeRef_ChildrenIterator begin(const PcpNodeRef::child_const_range &r)
Support for range-based for loops for PcpNodeRef children ranges.
Definition: node.h:450