HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_StringUtils.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: UT_StringUtils.h
7  *
8  * COMMENTS:
9  *
10  */
11 
12 #ifndef __UT_STRINGUTILS_H__
13 #define __UT_STRINGUTILS_H__
14 
15 #include "UT_API.h"
16 #include "UT_StringView.h"
17 
18 #include <SYS/SYS_Compiler.h>
19 #include <SYS/SYS_Inline.h>
20 #include <SYS/SYS_ParseNumber.h>
21 #include <SYS/SYS_String.h>
22 #include <SYS/SYS_Types.h>
23 
24 #include <tuple>
25 #include <string.h>
26 
27 class UT_StringHolder;
28 
29 SYS_FORCE_INLINE bool UTisstring(const char *s) { return s && *s; }
30 
31 template <typename T, bool HasFastLength = true>
34  const T& str,
35  const char* prefix,
36  bool case_sensitive = true,
37  exint len = -1)
38 {
39  if (!UTisstring(str.data()) || !(prefix && *prefix))
40  return false;
41 
42  if (len < 0)
43  len = strlen(prefix);
44 
45  if constexpr (HasFastLength)
46  {
47  if (len > str.length())
48  return false;
49  }
50 
51  const char* start = str.data();
52  if (case_sensitive)
53  return strncmp(start, prefix, len) == 0;
54  return SYSstrncasecmp(start, prefix, len) == 0;
55 }
56 
57 template <typename T>
60  const T& str,
61  const char* suffix,
62  bool case_sensitive = true,
63  exint len = -1)
64 {
65  if (!UTisstring(str.data()) || !(suffix && *suffix))
66  return false;
67 
68  if (len < 0)
69  len = strlen(suffix);
70 
71  if (len > str.length())
72  return false;
73 
74  const char* start = (str.data() + str.length()) - len;
75  if (case_sensitive)
76  return strncmp(start, suffix, len) == 0;
77  return SYSstrncasecmp(start, suffix, len) == 0;
78 }
79 
80 template <typename T>
81 SYS_NO_DISCARD_RESULT const char *
83  const T& str)
84 {
85  int i = str.length();
86  if (i <= 0)
87  return nullptr;
88 
89  const char* data = str.data();
90  while (i--)
91  {
92  if (!SYSisdigit(data[i]))
93  break;
94  }
95  return &data[i + 1];
96 }
97 
98 template <typename StringT>
100 UTstringFileName(const StringT& str)
101 {
102  // Convert the unknown string type to a string view.
103  UT_StringView view(str.data(), str.length());
104  return UTstringFileName(view);
105 }
106 
107 template <>
110 {
111  if (str.isEmpty())
112  return str;
113 
114  exint pos = str.findLastOf('/');
115  if (pos == UT_StringView::npos)
116  {
117  return str;
118  }
119 
120  // Make sure to move the pos past the '/'
121  return UT_StringView(str.begin() + pos + 1, str.end());
122 }
123 
124 /// Split the given path into the directory, filename, and file extension.
125 template <typename StringT>
126 SYS_NO_DISCARD_RESULT std::tuple<UT_StringView, UT_StringView, UT_StringView>
127 UTstringSplitPath(const StringT& str)
128 {
129  // Convert the unknown string type to a string view.
130  UT_StringView view(str.data(), str.length());
131  return UTstringSplitPath(view);
132 }
133 
134 /// Split the given path into the directory, filename, and file extension.
135 template <>
136 SYS_NO_DISCARD_RESULT inline std::tuple<UT_StringView, UT_StringView, UT_StringView>
138 {
139  if (str.isEmpty())
140  return std::make_tuple(str, str, str);
141 
142  // Parse for the dir
143  UT_StringView dir;
144  exint pos = str.findLastOf('/');
145  if (pos != UT_StringView::npos)
146  dir = UT_StringView(str.begin(), pos + 1);
147 
148  // Parse for the filename and file extension
149  UT_StringView file = UT_StringView(str.begin() + dir.length(), str.end());
150 
151  // If the file is `..` or `.`, or a dotfile, then there is no extension to parse
152  if (file.length() > 0 && file[0] == '.')
153  return std::make_tuple(dir, file, UT_StringView());
154 
155  exint dot = file.findLastOf('.');
156 
157  UT_StringView fname = file;
158  UT_StringView fext;
159  if (dot != UT_StringView::npos)
160  {
161  fname = UT_StringView(file.begin(), dot);
162  fext = UT_StringView(file.begin() + dot, file.end());
163  }
164 
165  return std::make_tuple(dir, fname, fext);
166 }
167 
168 template <typename StringT>
170 UTstringFileExtension(const StringT& str)
171 {
172  // Convert the unknown string type to a string view.
173  UT_StringView view(str.data(), str.length());
174  return UTstringFileExtension(view);
175 }
176 
177 template <>
180 {
181  if (str.isEmpty())
182  return str;
183 
184  exint dot = str.findLastOf('.');
185  if (dot == UT_StringView::npos)
186  {
187  return UT_StringView{};
188  }
189 
190  // Make sure the last dot that is found is after the last '/'.
191  exint slash = str.findLastOf('/');
192  if (slash != UT_StringView::npos && slash > dot)
193  return UT_StringView{};
194 
195  return UT_StringView(str.begin() + dot, str.end());
196 }
197 
198 template <typename StringT>
200 UTstringMatchFileExtension(const StringT& str, const char* extension)
201 {
202  UT_StringView str_extension = UTstringFileExtension(str);
203  return str_extension.equal(extension, false);
204 }
205 
206 template <typename StringT>
208 UTstringCountChar(const StringT& str, int c)
209 {
210  UT_StringView view(str.data(), str.length());
211  return UTstringCountChar(view, c);
212 }
213 
214 template <>
215 SYS_NO_DISCARD_RESULT inline int
217 {
218  if (str.isEmpty())
219  return 0;
220 
221  int count = 0;
222  exint pos = 0;
223  while (pos < str.length())
224  {
225  pos = str.findFirstOf(static_cast<char>(c), pos);
226  if (pos == UT_StringView::npos)
227  break;
228 
229  count++;
230  // Skip past the item that was just found.
231  pos++;
232  }
233 
234  return count;
235 }
236 
237 template <typename StringT>
240  const StringT& str,
241  bool skip_spaces = false,
242  bool loose = false,
243  bool allow_underscore = false)
244 {
245  const char* data = str.data();
246  if (!UTisstring(data))
247  return false;
248 
249  int i = 0;
250  // Skip leading spaces
251  if (skip_spaces)
252  for (; SYSisspace(data[i]); i++)
253  continue;
254 
255  for (; data[i] == '-' || data[i] == '+'; i++)
256  continue;
257 
258  int digit = 0;
259  int ecount = 0;
260  int dotcount = 0;
261  int epos = -10;
262 
263  for (; data[i]; i++)
264  {
265  if (SYSisdigit(data[i]))
266  {
267  digit = 1;
268  continue;
269  }
270  switch (data[i])
271  {
272  case 'e':
273  case 'E':
274  if (ecount)
275  return false;
276  epos = i + 1;
277  ecount = 1;
278  break;
279  case '-':
280  case '+':
281  if (epos != i)
282  return false;
283  epos++;
284  break;
285  case '.':
286  if (ecount || dotcount)
287  return false;
288  dotcount = 1;
289  break;
290  case '_':
291  if (allow_underscore)
292  {
293  // If we have not seen a digit, it is an error.
294  if (!digit)
295  return false;
296  // The actual requirements are tighter than this as
297  // I believe trailing underscores are prohibited.
298  }
299  else
300  {
301  return false;
302  }
303  break;
304  case ' ':
305  case '\t':
306  case '\n':
307  case '\r':
308  case '\f':
309  case '\v':
310  if (!skip_spaces)
311  {
312  return false;
313  }
314  else
315  {
316  // ignore trailing spaces
317  for (; data[i]; i++)
318  if (!SYSisspace(data[i]))
319  return false;
320 
321  if (digit || loose)
322  return true;
323  else
324  return false;
325  }
326 
327  default:
328  return false;
329  }
330  }
331 
332  return (digit || loose);
333 }
334 
335 template <>
336 SYS_NO_DISCARD_RESULT inline bool
338  const UT_StringView& str,
339  bool skip_spaces,
340  bool loose,
341  bool allow_underscore)
342 {
343  // Since UT_StringView may not be null terminated we need to use a
344  // specialized method.
345  return str.isFloat(skip_spaces, loose, allow_underscore);
346 }
347 
348 template <typename StringT>
350 UTstringIsInteger(const StringT& str, bool skip_spaces = false)
351 {
352  const char* cur = str.data();
353 
354  if (!UTisstring(cur))
355  return false;
356 
357  // Skip leading spaces
358  if (skip_spaces)
359  {
360  for (; SYSisspace(*cur); ++cur)
361  ;
362  }
363 
364  // Skip all +'s and -'s
365  for (; *cur == '-' || *cur == '+'; ++cur)
366  ;
367 
368  // Skip all digits
369  for (; SYSisdigit(*cur); ++cur)
370  ;
371 
372  // Skip trailing spaces
373  if (skip_spaces)
374  {
375  for (; SYSisspace(*cur); ++cur)
376  ;
377  }
378 
379  // if we have anything left, this is not an integer
380  if (*cur)
381  return false;
382  return true;
383 }
384 
385 template <>
386 SYS_NO_DISCARD_RESULT inline bool
387 UTstringIsInteger<UT_StringView>(const UT_StringView& str, bool skip_spaces)
388 {
389  // Since UT_StringView may not be null terminated we need to use a
390  // specialized method.
391  return str.isInteger(skip_spaces);
392 }
393 
394 /// Decompose a filename into various parts
395 ///
396 /// parseNumberedFileName will breakup a filename into its various
397 /// parts: file = prefix$Fsuffix (note: suffix is
398 /// not the same as file extension.) 0 is returned if there is
399 /// no frame number. 'negative' allows -[frame] to be interpreted as a
400 /// negative number. 'fractional' allows [frame].[number] to be interpreted
401 /// as a fractional frame.
402 ///
403 /// WARNING: Make sure to take care of lifetimes when the input string is
404 /// UT_StringView.
405 template <typename StringT>
406 bool
408  const StringT& str,
409  UT_StringView& prefix,
410  UT_StringView& frame,
411  UT_StringView& suffix,
412  bool negative, bool fractional)
413 {
414  if (str.length() == 0) return false;
415 
416  const char *data = str.data();
417 
418  if (data == nullptr)
419  return false;
420 
421  const char *itr;
422  const char *digend;
423 
424  // make sure we don't find numbers in the path part
425  const char* filename = strrchr(data, '/');
426  if (filename == nullptr)
427  filename = data;
428 
429  int len =strlen(filename);
430 
431  // make sure we don't find numbers in the file extension.
432  const char* fileend = filename + len -1;
433  while(*fileend != '.' && fileend > filename)
434  fileend --;
435 
436  if(fileend == filename)
437  fileend = filename + len -1;
438 
439  // search for the last digit.
440  for(itr = fileend; itr >= filename; itr-- )
441  if (SYSisdigit(*itr))
442  {
443  digend = itr;
444  break;
445  }
446 
447  if (itr < filename)
448  {
449  prefix = UT_StringView(data);
450  frame = UT_StringView();
451  suffix = UT_StringView();
452  return false;
453  }
454 
455  for (; itr >= filename; itr--)
456  if (!(SYSisdigit(*itr) ||
457  (negative && *itr == '-') ||
458  (fractional && *itr == '.')))
459  break;
460 
461  prefix = UT_StringView(data, (int)(itr-data+1));
462  frame = UT_StringView(itr+1, (int)(digend - itr));
463  suffix = UT_StringView(digend+1);
464 
465  return true;
466 }
467 
468 /// Decompose a filename into various parts
469 ///
470 /// parseNumberedFileName will breakup a filename into its various
471 /// parts: file = prefix$Fsuffix (note: suffix is
472 /// not the same as file extension.) 0 is returned if there is
473 /// no frame number. 'negative' allows -[frame] to be interpreted as a
474 /// negative number. 'fractional' allows [frame].[number] to be interpreted
475 /// as a fractional frame.
476 template <typename StringT>
478  const StringT& str,
479  UT_StringHolder& prefix,
480  UT_StringHolder& frame,
481  UT_StringHolder& suffix,
482  bool negative,
483  bool fractional);
484 
485 template <typename StringT>
486 bool
487 UTstringIsQuoted(const StringT& str, char delimiter)
488 {
489  int len = str.length();
490  const char* data = str.data();
491 
492  return len >= 2 && data[0] == delimiter && data[len - 1] == delimiter
493  && data[len - 2] != '\\';
494 }
495 
496 template <typename StringT, typename IntT>
499  const StringT& str,
500  IntT& number,
501  int base = 0,
503 {
504  const char* b = str.data();
505  const char* e = b+str.length();
506  return SYSparseInteger(b, e, number, base, flags);
507 }
508 
509 template <typename IntT, typename StringT>
512  const StringT& str,
513  int base=0,
515 {
516  IntT number = IntT{};
517  const char* b = str.data();
518  const char* e = b+str.length();
519  SYSparseInteger(b, e, number, base, flags);
520  return number;
521 }
522 
523 #endif // __UT_STRINGUTILS_H__
524 
GLbitfield flags
Definition: glcorearb.h:1596
GT_API const UT_StringHolder filename
T negative(const T &val)
Return the unary negation of the given value.
Definition: Math.h:128
GLboolean * data
Definition: glcorearb.h:131
SYS_NO_DISCARD_RESULT bool UTstringEndsWith(const T &str, const char *suffix, bool case_sensitive=true, exint len=-1)
GLuint start
Definition: glcorearb.h:475
SYS_NO_DISCARD_RESULT bool UTstringStartsWith(const T &str, const char *prefix, bool case_sensitive=true, exint len=-1)
int64 exint
Definition: SYS_Types.h:125
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const_iterator end() const
Returns a constant iterator pointing to the end of the string.
GLdouble s
Definition: glad.h:3009
bool UTstringIsQuoted(const StringT &str, char delimiter)
A utility class to do read-only operations on a subset of an existing string.
Definition: UT_StringView.h:40
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool isEmpty() const
Returns true if the string is empty.
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE exint length() const
Returns the length of the string in bytes.
int SYSstrncasecmp(const char *a, const char *b, size_t n)
Definition: SYS_String.h:273
SYS_NO_DISCARD_RESULT UT_StringView UTstringFileName(const StringT &str)
SYS_NO_DISCARD_RESULT bool UTstringIsFloat(const StringT &str, bool skip_spaces=false, bool loose=false, bool allow_underscore=false)
SYS_API SYS_ParseStatus SYSparseInteger(const char *begin, const char *&end, int8 &number, int base=0, SYS_ParseFlags flags=SYS_ParseFlags::None)
fpreal64 dot(const CE_VectorT< T > &a, const CE_VectorT< T > &b)
Definition: CE_Vector.h:137
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
SYS_NO_DISCARD_RESULT int UTstringCountChar(const StringT &str, int c)
static constexpr exint npos
Definition: UT_StringView.h:97
#define SYS_NO_DISCARD_RESULT
Definition: SYS_Compiler.h:93
SYS_ParseStatus UTstringToInt(const StringT &str, IntT &number, int base=0, SYS_ParseFlags flags=SYS_ParseFlags::None)
SYS_NO_DISCARD_RESULT std::tuple< UT_StringView, UT_StringView, UT_StringView > UTstringSplitPath(const StringT &str)
Split the given path into the directory, filename, and file extension.
GLboolean GLboolean GLboolean b
Definition: glcorearb.h:1222
SYS_NO_DISCARD_RESULT bool UTstringIsInteger(const StringT &str, bool skip_spaces=false)
SYS_NO_DISCARD_RESULT bool UTstringIsInteger< UT_StringView >(const UT_StringView &str, bool skip_spaces)
SYS_NO_DISCARD_RESULT UT_StringView UTstringFileExtension(const StringT &str)
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool equal(const char *str, bool case_sensitive=true) const
SYS_ParseStatus
List of possible states the parsing ended in.
SYS_ParseFlags
SYS_NO_DISCARD_RESULT bool UTstringMatchFileExtension(const StringT &str, const char *extension)
SYS_NO_DISCARD_RESULT UT_StringView UTstringFileExtension< UT_StringView >(const UT_StringView &str)
SYS_NO_DISCARD_RESULT const char * UTstringNumericSuffix(const T &str)
bool UTstringParseNumberedFilename(const StringT &str, UT_StringView &prefix, UT_StringView &frame, UT_StringView &suffix, bool negative, bool fractional)
SYS_FORCE_INLINE bool UTisstring(const char *s)
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const_iterator begin() const
Returns a constant iterator pointing to the beginning of the string.
SYS_NO_DISCARD_RESULT bool UTstringIsFloat< UT_StringView >(const UT_StringView &str, bool skip_spaces, bool loose, bool allow_underscore)
SYS_NO_DISCARD_RESULT exint findLastOf(UT_StringView view, exint pos=npos) const noexcept
SYS_NO_DISCARD_RESULT int UTstringCountChar< UT_StringView >(const UT_StringView &str, int c)
OIIO_UTIL_API std::string extension(string_view filepath, bool include_dot=true) noexcept
GLint GLsizei count
Definition: glcorearb.h:405
Definition: format.h:1821