HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_StringUtils.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: UT_StringUtils.h
7  *
8  * COMMENTS:
9  *
10  */
11 
12 #ifndef __UT_STRINGUTILS_H__
13 #define __UT_STRINGUTILS_H__
14 
15 #include "UT_API.h"
16 #include "UT_StringView.h"
17 
18 #include <SYS/SYS_Compiler.h>
19 #include <SYS/SYS_Inline.h>
20 #include <SYS/SYS_ParseNumber.h>
21 #include <SYS/SYS_String.h>
22 #include <SYS/SYS_Types.h>
23 
24 #include <tuple>
25 #include <string.h>
26 
27 class UT_StringHolder;
28 
29 SYS_FORCE_INLINE bool UTisstring(const char *s) { return s && *s; }
30 
31 template <typename T>
34  const T& str,
35  const char* prefix,
36  bool case_sensitive = true,
37  exint len = -1)
38 {
39  if (!UTisstring(str.data()) || !(prefix && *prefix))
40  return false;
41 
42  if (len < 0)
43  len = strlen(prefix);
44 
45  if (len > str.length())
46  return false;
47 
48  const char* start = str.data();
49  if (case_sensitive)
50  return strncmp(start, prefix, len) == 0;
51  return SYSstrncasecmp(start, prefix, len) == 0;
52 }
53 
54 template <typename T>
57  const T& str,
58  const char* suffix,
59  bool case_sensitive = true,
60  exint len = -1)
61 {
62  if (!UTisstring(str.data()) || !(suffix && *suffix))
63  return false;
64 
65  if (len < 0)
66  len = strlen(suffix);
67 
68  if (len > str.length())
69  return false;
70 
71  const char* start = (str.data() + str.length()) - len;
72  if (case_sensitive)
73  return strncmp(start, suffix, len) == 0;
74  return SYSstrncasecmp(start, suffix, len) == 0;
75 }
76 
77 template <typename T>
78 SYS_NO_DISCARD_RESULT const char *
80  const T& str)
81 {
82  int i = str.length();
83  if (i <= 0)
84  return nullptr;
85 
86  const char* data = str.data();
87  while (i--)
88  {
89  if (!SYSisdigit(data[i]))
90  break;
91  }
92  return &data[i + 1];
93 }
94 
95 template <typename StringT>
97 UTstringFileName(const StringT& str)
98 {
99  // Convert the unknown string type to a string view.
100  UT_StringView view(str.data(), str.length());
101  return UTstringFileName(view);
102 }
103 
104 template <>
107 {
108  if (str.isEmpty())
109  return str;
110 
111  exint pos = str.findLastOf('/');
112  if (pos == UT_StringView::npos)
113  {
114  return str;
115  }
116 
117  // Make sure to move the pos past the '/'
118  return UT_StringView(str.begin() + pos + 1, str.end());
119 }
120 
121 /// Split the given path into the directory, filename, and file extension.
122 template <typename StringT>
123 SYS_NO_DISCARD_RESULT std::tuple<UT_StringView, UT_StringView, UT_StringView>
124 UTstringSplitPath(const StringT& str)
125 {
126  // Convert the unknown string type to a string view.
127  UT_StringView view(str.data(), str.length());
128  return UTstringSplitPath(view);
129 }
130 
131 /// Split the given path into the directory, filename, and file extension.
132 template <>
133 SYS_NO_DISCARD_RESULT inline std::tuple<UT_StringView, UT_StringView, UT_StringView>
135 {
136  if (str.isEmpty())
137  return std::make_tuple(str, str, str);
138 
139  // Parse for the dir
140  UT_StringView dir;
141  exint pos = str.findLastOf('/');
142  if (pos != UT_StringView::npos)
143  dir = UT_StringView(str.begin(), pos + 1);
144 
145  // Parse for the filename and file extension
146  UT_StringView file = UT_StringView(str.begin() + dir.length(), str.end());
147 
148  // If the file is `..` or `.`, or a dotfile, then there is no extension to parse
149  if (file.length() > 0 && file[0] == '.')
150  return std::make_tuple(dir, file, UT_StringView());
151 
152  exint dot = file.findLastOf('.');
153 
154  UT_StringView fname = file;
155  UT_StringView fext;
156  if (dot != UT_StringView::npos)
157  {
158  fname = UT_StringView(file.begin(), dot);
159  fext = UT_StringView(file.begin() + dot, file.end());
160  }
161 
162  return std::make_tuple(dir, fname, fext);
163 }
164 
165 template <typename StringT>
167 UTstringFileExtension(const StringT& str)
168 {
169  // Convert the unknown string type to a string view.
170  UT_StringView view(str.data(), str.length());
171  return UTstringFileExtension(view);
172 }
173 
174 template <>
177 {
178  if (str.isEmpty())
179  return str;
180 
181  exint dot = str.findLastOf('.');
182  if (dot == UT_StringView::npos)
183  {
184  return UT_StringView{};
185  }
186 
187  // Make sure the last dot that is found is after the last '/'.
188  exint slash = str.findLastOf('/');
189  if (slash != UT_StringView::npos && slash > dot)
190  return UT_StringView{};
191 
192  return UT_StringView(str.begin() + dot, str.end());
193 }
194 
195 template <typename StringT>
197 UTstringMatchFileExtension(const StringT& str, const char* extension)
198 {
199  UT_StringView str_extension = UTstringFileExtension(str);
200  return str_extension.equal(extension, false);
201 }
202 
203 template <typename StringT>
205 UTstringCountChar(const StringT& str, int c)
206 {
207  UT_StringView view(str.data(), str.length());
208  return UTstringCountChar(view, c);
209 }
210 
211 template <>
212 SYS_NO_DISCARD_RESULT inline int
214 {
215  if (str.isEmpty())
216  return 0;
217 
218  int count = 0;
219  exint pos = 0;
220  while (pos < str.length())
221  {
222  pos = str.findFirstOf(static_cast<char>(c), pos);
223  if (pos == UT_StringView::npos)
224  break;
225 
226  count++;
227  // Skip past the item that was just found.
228  pos++;
229  }
230 
231  return count;
232 }
233 
234 template <typename StringT>
237  const StringT& str,
238  bool skip_spaces = false,
239  bool loose = false,
240  bool allow_underscore = false)
241 {
242  const char* data = str.data();
243  if (!UTisstring(data))
244  return false;
245 
246  int i = 0;
247  // Skip leading spaces
248  if (skip_spaces)
249  for (; SYSisspace(data[i]); i++)
250  continue;
251 
252  for (; data[i] == '-' || data[i] == '+'; i++)
253  continue;
254 
255  int digit = 0;
256  int ecount = 0;
257  int dotcount = 0;
258  int epos = -10;
259 
260  for (; data[i]; i++)
261  {
262  if (SYSisdigit(data[i]))
263  {
264  digit = 1;
265  continue;
266  }
267  switch (data[i])
268  {
269  case 'e':
270  case 'E':
271  if (ecount)
272  return false;
273  epos = i + 1;
274  ecount = 1;
275  break;
276  case '-':
277  case '+':
278  if (epos != i)
279  return false;
280  epos++;
281  break;
282  case '.':
283  if (ecount || dotcount)
284  return false;
285  dotcount = 1;
286  break;
287  case '_':
288  if (allow_underscore)
289  {
290  // If we have not seen a digit, it is an error.
291  if (!digit)
292  return false;
293  // The actual requirements are tighter than this as
294  // I believe trailing underscores are prohibited.
295  }
296  else
297  {
298  return false;
299  }
300  break;
301  case ' ':
302  case '\t':
303  case '\n':
304  case '\r':
305  case '\f':
306  case '\v':
307  if (!skip_spaces)
308  {
309  return false;
310  }
311  else
312  {
313  // ignore trailing spaces
314  for (; data[i]; i++)
315  if (!SYSisspace(data[i]))
316  return false;
317 
318  if (digit || loose)
319  return true;
320  else
321  return false;
322  }
323 
324  default:
325  return false;
326  }
327  }
328 
329  return (digit || loose);
330 }
331 
332 template <>
333 SYS_NO_DISCARD_RESULT inline bool
335  const UT_StringView& str,
336  bool skip_spaces,
337  bool loose,
338  bool allow_underscore)
339 {
340  // Since UT_StringView may not be null terminated we need to use a
341  // specialized method.
342  return str.isFloat(skip_spaces, loose, allow_underscore);
343 }
344 
345 template <typename StringT>
347 UTstringIsInteger(const StringT& str, bool skip_spaces = false)
348 {
349  const char* cur = str.data();
350 
351  if (!UTisstring(cur))
352  return false;
353 
354  // Skip leading spaces
355  if (skip_spaces)
356  {
357  for (; SYSisspace(*cur); ++cur)
358  ;
359  }
360 
361  // Skip all +'s and -'s
362  for (; *cur == '-' || *cur == '+'; ++cur)
363  ;
364 
365  // Skip all digits
366  for (; SYSisdigit(*cur); ++cur)
367  ;
368 
369  // Skip trailing spaces
370  if (skip_spaces)
371  {
372  for (; SYSisspace(*cur); ++cur)
373  ;
374  }
375 
376  // if we have anything left, this is not an integer
377  if (*cur)
378  return false;
379  return true;
380 }
381 
382 template <>
383 SYS_NO_DISCARD_RESULT inline bool
384 UTstringIsInteger<UT_StringView>(const UT_StringView& str, bool skip_spaces)
385 {
386  // Since UT_StringView may not be null terminated we need to use a
387  // specialized method.
388  return str.isInteger(skip_spaces);
389 }
390 
391 /// Decompose a filename into various parts
392 ///
393 /// parseNumberedFileName will breakup a filename into its various
394 /// parts: file = prefix$Fsuffix (note: suffix is
395 /// not the same as file extension.) 0 is returned if there is
396 /// no frame number. 'negative' allows -[frame] to be interpreted as a
397 /// negative number. 'fractional' allows [frame].[number] to be interpreted
398 /// as a fractional frame.
399 ///
400 /// WARNING: Make sure to take care of lifetimes when the input string is
401 /// UT_StringView.
402 template <typename StringT>
403 bool
405  const StringT& str,
406  UT_StringView& prefix,
407  UT_StringView& frame,
408  UT_StringView& suffix,
409  bool negative, bool fractional)
410 {
411  if (str.length() == 0) return false;
412 
413  const char *data = str.data();
414 
415  if (data == nullptr)
416  return false;
417 
418  const char *itr;
419  const char *digend;
420 
421  // make sure we don't find numbers in the path part
422  const char* filename = strrchr(data, '/');
423  if (filename == nullptr)
424  filename = data;
425 
426  int len =strlen(filename);
427 
428  // make sure we don't find numbers in the file extension.
429  const char* fileend = filename + len -1;
430  while(*fileend != '.' && fileend > filename)
431  fileend --;
432 
433  if(fileend == filename)
434  fileend = filename + len -1;
435 
436  // search for the last digit.
437  for(itr = fileend; itr >= filename; itr-- )
438  if (SYSisdigit(*itr))
439  {
440  digend = itr;
441  break;
442  }
443 
444  if (itr < filename)
445  {
446  prefix = UT_StringView(data);
447  frame = UT_StringView();
448  suffix = UT_StringView();
449  return false;
450  }
451 
452  for (; itr >= filename; itr--)
453  if (!(SYSisdigit(*itr) ||
454  (negative && *itr == '-') ||
455  (fractional && *itr == '.')))
456  break;
457 
458  prefix = UT_StringView(data, (int)(itr-data+1));
459  frame = UT_StringView(itr+1, (int)(digend - itr));
460  suffix = UT_StringView(digend+1);
461 
462  return true;
463 }
464 
465 /// Decompose a filename into various parts
466 ///
467 /// parseNumberedFileName will breakup a filename into its various
468 /// parts: file = prefix$Fsuffix (note: suffix is
469 /// not the same as file extension.) 0 is returned if there is
470 /// no frame number. 'negative' allows -[frame] to be interpreted as a
471 /// negative number. 'fractional' allows [frame].[number] to be interpreted
472 /// as a fractional frame.
473 template <typename StringT>
475  const StringT& str,
476  UT_StringHolder& prefix,
477  UT_StringHolder& frame,
478  UT_StringHolder& suffix,
479  bool negative,
480  bool fractional);
481 
482 template <typename StringT>
483 bool
484 UTstringIsQuoted(const StringT& str, char delimiter)
485 {
486  int len = str.length();
487  const char* data = str.data();
488 
489  return len >= 2 && data[0] == delimiter && data[len - 1] == delimiter
490  && data[len - 2] != '\\';
491 }
492 
493 template <typename StringT, typename IntT>
496  const StringT& str,
497  IntT& number,
498  int base = 0,
500 {
501  const char* b = str.data();
502  const char* e = b+str.length();
503  return SYSparseInteger(b, e, number, base, flags);
504 }
505 
506 template <typename IntT, typename StringT>
509  const StringT& str,
510  int base=0,
512 {
513  IntT number = IntT{};
514  const char* b = str.data();
515  const char* e = b+str.length();
516  SYSparseInteger(b, e, number, base, flags);
517  return number;
518 }
519 
520 #endif // __UT_STRINGUTILS_H__
521 
GLbitfield flags
Definition: glcorearb.h:1596
GT_API const UT_StringHolder filename
T negative(const T &val)
Return the unary negation of the given value.
Definition: Math.h:128
GLboolean * data
Definition: glcorearb.h:131
SYS_NO_DISCARD_RESULT bool UTstringEndsWith(const T &str, const char *suffix, bool case_sensitive=true, exint len=-1)
GLuint start
Definition: glcorearb.h:475
int64 exint
Definition: SYS_Types.h:125
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const_iterator end() const
Returns a constant iterator pointing to the end of the string.
GLdouble s
Definition: glad.h:3009
bool UTstringIsQuoted(const StringT &str, char delimiter)
A utility class to do read-only operations on a subset of an existing string.
Definition: UT_StringView.h:39
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool isEmpty() const
Returns true if the string is empty.
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE exint length() const
Returns the length of the string in bytes.
int SYSstrncasecmp(const char *a, const char *b, size_t n)
Definition: SYS_String.h:273
SYS_NO_DISCARD_RESULT UT_StringView UTstringFileName(const StringT &str)
SYS_NO_DISCARD_RESULT bool UTstringIsFloat(const StringT &str, bool skip_spaces=false, bool loose=false, bool allow_underscore=false)
SYS_API SYS_ParseStatus SYSparseInteger(const char *begin, const char *&end, int8 &number, int base=0, SYS_ParseFlags flags=SYS_ParseFlags::None)
fpreal64 dot(const CE_VectorT< T > &a, const CE_VectorT< T > &b)
Definition: CE_Vector.h:130
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
SYS_NO_DISCARD_RESULT int UTstringCountChar(const StringT &str, int c)
SYS_NO_DISCARD_RESULT bool UTstringStartsWith(const T &str, const char *prefix, bool case_sensitive=true, exint len=-1)
static constexpr exint npos
Definition: UT_StringView.h:96
#define SYS_NO_DISCARD_RESULT
Definition: SYS_Compiler.h:93
SYS_ParseStatus UTstringToInt(const StringT &str, IntT &number, int base=0, SYS_ParseFlags flags=SYS_ParseFlags::None)
SYS_NO_DISCARD_RESULT std::tuple< UT_StringView, UT_StringView, UT_StringView > UTstringSplitPath(const StringT &str)
Split the given path into the directory, filename, and file extension.
GLboolean GLboolean GLboolean b
Definition: glcorearb.h:1222
SYS_NO_DISCARD_RESULT bool UTstringIsInteger(const StringT &str, bool skip_spaces=false)
SYS_NO_DISCARD_RESULT bool UTstringIsInteger< UT_StringView >(const UT_StringView &str, bool skip_spaces)
SYS_NO_DISCARD_RESULT UT_StringView UTstringFileExtension(const StringT &str)
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool equal(const char *str, bool case_sensitive=true) const
SYS_ParseStatus
List of possible states the parsing ended in.
SYS_ParseFlags
SYS_NO_DISCARD_RESULT bool UTstringMatchFileExtension(const StringT &str, const char *extension)
SYS_NO_DISCARD_RESULT UT_StringView UTstringFileExtension< UT_StringView >(const UT_StringView &str)
SYS_NO_DISCARD_RESULT const char * UTstringNumericSuffix(const T &str)
bool UTstringParseNumberedFilename(const StringT &str, UT_StringView &prefix, UT_StringView &frame, UT_StringView &suffix, bool negative, bool fractional)
SYS_FORCE_INLINE bool UTisstring(const char *s)
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const_iterator begin() const
Returns a constant iterator pointing to the beginning of the string.
Definition: core.h:982
SYS_NO_DISCARD_RESULT bool UTstringIsFloat< UT_StringView >(const UT_StringView &str, bool skip_spaces, bool loose, bool allow_underscore)
SYS_NO_DISCARD_RESULT exint findLastOf(UT_StringView view, exint pos=npos) const noexcept
SYS_NO_DISCARD_RESULT int UTstringCountChar< UT_StringView >(const UT_StringView &str, int c)
OIIO_UTIL_API std::string extension(string_view filepath, bool include_dot=true) noexcept
GLint GLsizei count
Definition: glcorearb.h:405
Definition: format.h:895