HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_String.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  *
7  * NAME: Utility Library (C++)
8  *
9  * COMMENTS: String class
10  *
11  */
12 
13 #ifndef __UT_String_h__
14 #define __UT_String_h__
15 
16 #include "UT_API.h"
17 
18 #include "UT_Assert.h"
19 #include "UT_VectorTypes.h"
20 #include "UT_StringView.h"
21 #include "UT_StringUtils.h"
22 
23 #include <SYS/SYS_Compiler.h>
24 #include <SYS/SYS_Inline.h>
25 #include <SYS/SYS_String.h>
26 #include <SYS/SYS_Types.h>
27 
28 #include <functional>
29 #include <iosfwd>
30 #include <string>
31 
32 #include <ctype.h>
33 #include <stdlib.h>
34 #include <string.h>
35 
36 #ifdef WIN32
37  #define strcasecmp stricmp
38  #define strncasecmp strnicmp
39 #endif
40 
41 class UT_OStream;
42 class UT_String;
43 class UT_StringCshIO;
44 class UT_WorkArgs;
45 class UT_IStream;
46 class ut_PatternRecord;
47 class UT_StringMMPattern;
48 class UT_StringArray;
49 class UT_StringHolder;
50 class UT_StringRef;
51 
52 // The following lookup functions are used by cshParse. By default,
53 // varLookup simply uses getenv, exprLookup opens the command as
54 // a pipe and uses the result.
55 UT_API extern void UTvarLookup(const char *name, UT_String &result);
56 UT_API extern void UTexprLookup(const char *name, UT_String &result);
57 
58 /// @file
59 /// @class UT_String
60 ///
61 /// UT_String is a string class that support two different types of assignment
62 /// semantics:
63 /// - Shallow (default): Just reference the given string and NOT take
64 /// ownership.
65 /// - Deep: Make a copy of the given string, taking ownership in the
66 /// process (aka it making it "hard").
67 ///
68 /// If UT_String::harden() is called, or any other UT_String method that
69 /// requires modifying the string, it will make a copy of its reference pointer
70 /// (and take ownership) first.
71 ///
73 {
74 public:
75 
76  /// UT_String can be constructed with UT_String::ALWAYS_DEEP to create an
77  /// object that will always perform deep copies when assigned to.
78  enum UT_AlwaysDeepType { ALWAYS_DEEP };
79 
80  /// @brief Construct UT_String from a C string, using shallow semantics
81  ///
82  /// @param str The initial string.
83  /// @param deepCopy If true, a copy of @em str will be used.
84  /// @param len Number of characters to use from @em str. Use -1 to
85  /// use the entire string. If len is non-negative, then
86  /// deepCopy will be implicitly set to true. If str is NULL
87  /// and len is non-negative, then it will be initialized
88  /// with "".
90  UT_String(const char *str = 0)
91  : myData(SYSconst_cast(str))
92  , myIsReference(true)
93  , myIsAlwaysDeep(false)
94  {}
95  UT_String(const char *str, bool deep_copy, int len = -1);
96 
97  /// @brief Construct UT_String from a std::string, always doing
98  /// a deep copy. The result will only be a UT_AlwaysDeep if the
99  /// appropriate version is used, however!
100  ///
101  /// NOTE: You cannot do:
102  /// UT_String foo;
103  /// std::string bar = "hello world";
104  /// foo = UT_String(bar.substr(2, 5));
105  ///
106  /// It provides an shortcut for constructing a UT_String from a function
107  /// that returns a std::string by value. For example, it lets you write
108  /// @code
109  /// UT_String str(func());
110  /// @endcode
111  /// instead of
112  /// @code
113  /// UT_String str(func().c_str(), /*harden=*/true);
114  /// @endcode
115  explicit UT_String(const std::string &str)
116  : myIsReference(false),
117  myIsAlwaysDeep(false)
118  { myData = strdup(str.c_str()); }
119 
120  /// @brief Construct UT_String from a UT_StringHolder.
121  /// This always duplicates and uses ALWAYS_DEEP semantics.
122  explicit UT_String(const UT_StringHolder &str);
123 
124 private:
125  /// This is intentionally not implemented - callers should choose between
126  /// the const char * and UT_StringHolder constructors, depending on whether
127  /// they want to make a deep copy.
128  /// @see UT_StringWrap.
129  UT_String(const UT_StringRef &);
130 
131 public:
132  /// @brief Construct UT_String from a UT_StringView.
133  /// This always duplicates and uses ALWAYS_DEEP semantics.
134  explicit UT_String(const UT_StringView &sv);
135 
136  /// @brief Construct UT_String from a C string, using ALWAYS_DEEP semantics
137  UT_String(UT_AlwaysDeepType, const char *str = 0)
138  : myIsReference(false),
139  myIsAlwaysDeep(true)
140  { myData = str ? strdup(str) : 0; }
141 
142  /// @brief Construct UT_String from a std::string, using ALWAYS_DEEP
143  /// semantics
145  : myIsReference(false),
146  myIsAlwaysDeep(true)
147  { myData = strdup(str.c_str()); }
148 
149  /// Copy constructor
150  ///
151  /// If the string we're copying from is ALWAYS_DEEP, then this object will
152  /// also become ALWAYS_DEEP. This way, you can pass/return a string by
153  /// value.
154  UT_String(const UT_String &str);
155 
156  ~UT_String();
157 
158  /// Move operators
159  /// @{
160  UT_String(UT_String &&str) noexcept
161  : myData(str.myData)
162  , myIsReference(str.myIsReference)
163  , myIsAlwaysDeep(str.myIsAlwaysDeep)
164  {
165  str.myData = nullptr;
166  str.myIsReference = !str.myIsAlwaysDeep;
167  }
169  {
170  freeData();
171  myData = str.myData;
172  myIsReference = str.myIsReference;
173  myIsAlwaysDeep = str.myIsAlwaysDeep;
174  str.myData = nullptr;
175  str.myIsReference = !str.myIsAlwaysDeep;
176  return *this;
177  }
178  /// @}
179 
180  /// Make a string always deep
181  void setAlwaysDeep(bool deep)
182  {
183  myIsAlwaysDeep = deep;
184  if (deep && myIsReference)
185  {
186  if (myData != NULL)
187  harden();
188  else
189  {
190  // This takes the same semantic as
191  // str = NULL;
192  // where str is an always deep string
193  myIsReference = false;
194  }
195  }
196  }
197  bool isAlwaysDeep() const
198  {
199  return myIsAlwaysDeep;
200  }
201 
202  void swap( UT_String &other );
203 
204  /// Take shallow copy and make it deep.
205  // @{
206  void harden()
207  {
208  if (!myIsReference && myData)
209  return;
210  myData = strdup(myData ? myData : "");
211  myIsReference = false;
212  }
213 
214  void harden(const char *s, int len = -1);
216  {
217  if (myIsReference)
218  {
219  if (isstring())
220  harden();
221  else
222  *this = "";
223  }
224  }
225  void hardenIfNeeded(const char *s)
226  {
227  if (s && *s)
228  harden(s);
229  else
230  *this = "";
231  }
232  // @}
233 
234  /// Returns whether this string is hardened already.
235  bool isHard() const { return !myIsReference; }
236 
237  /// Give up ownership of string
238  ///
239  /// Take a hard reference and make it shallow. This method makes sure
240  /// it gives back something you can delete, because this UT_String is
241  /// taking its hands off the data. Use it with care since it may lead
242  /// to memory leaks if, for example, you harden it again later.
243  ///
244  /// In the case of ALWAYS_DEEP strings, this is disallowed so it will
245  /// just return a copy of the data.
246  char * steal()
247  {
248  if (!myIsAlwaysDeep)
249  {
250  if (myIsReference)
251  myData = strdup(myData ? myData : ""); // harden
252  myIsReference = true; // but say it's soft
253  return myData;
254  }
255  else
256  {
257  // return a new copy of the data without releasing
258  // ownership for always deep strings
259  return strdup(myData ? myData : "");
260  }
261  }
262 
263  /// Take ownership of given string
264  ///
265  /// adopt() is the opposite of steal(). Basically, you're giving
266  /// the UT_String ownership of the string.
267  // @{
268  void adopt(char *s)
269  {
270  if (!myIsReference)
271  {
272  if (s != myData)
273  free(myData);
274  }
275  myData = s;
276  myIsReference = false;
277  }
278  void adopt(UT_String &str)
279  {
280  adopt(str.steal());
281  }
282  void adopt(UT_StringHolder &holder);
283 
284  // @}
285 
286  /// Save string to binary stream.
287  void saveBinary(std::ostream &os) const { save(os, true); }
288 
289  /// Save string to ASCII stream. This will add double quotes and escape to
290  /// the stream if necessary (empty string or contains spaces).
291  void saveAscii(std::ostream &os) const { save(os, false); }
292  void saveAscii(UT_OStream &os) const { save(os, false); }
293 
294  /// Save string to stream. Saves as binary if @em binary is true.
295  void save(std::ostream &os, bool binary) const;
296  void save(UT_OStream &os, bool binary) const;
297 
298  /// Load string from stream. Use is.eof() to check eof status
299  bool load(UT_IStream &is);
300 
301  /// Reset the string to the default constructor.
302  void clear()
303  { *this = (const char *)NULL; }
304 
305  /// Prepend a string (or character)
306  // @{
307  void prepend(const char *prefix);
308  void prepend(char ch);
309  // @}
310 
311  /// Append a character
312  void append(char ch);
313 
314  /// Append a string or a section of a string.
315  void append(const char *str, exint len = -1);
316 
317  /// Remove the last character
318  void removeLast() { truncate(length()-1); }
319  /// Truncate the string at the Nth character
320  void truncate(exint len);
321 
322  UT_String &operator=(const UT_String &str);
323  UT_String &operator=(const char *str);
324  UT_String &operator=(const std::string &str);
325  UT_String &operator=(const UT_StringHolder &str);
326  UT_String &operator=(const UT_StringView &str);
327 private:
328  /// Not implemented - see UT_String(const UT_StringRef &).
329  UT_String &operator=(const UT_StringRef);
330 
331 public:
332  UT_String &operator+=(const char *str)
333  {
334  if (!isstring())
335  {
336  // We are an empty string, so we merely copy
337  // the incoming string rather than trying to append
338  // to it.
339  harden(str);
340  }
341  else
342  {
343  bool same = (str == myData);
344  harden();
345  if (str)
346  {
347  int mylen = (int)strlen(myData);
348  myData = (char *)realloc(myData,
349  mylen+strlen(str)+1);
350  if (!same)
351  {
352  strcpy(&myData[mylen], str);
353  }
354  else
355  {
356  memcpy(myData + mylen, myData, mylen);
357  myData[mylen * 2] = '\0';
358  }
359  }
360  }
361  return *this;
362  }
363 
365  {
366  *this += (const char *)str.myData;
367  return *this;
368  }
369  UT_String &operator+=(const UT_StringRef &str);
370 
371  // Basic equality functions and operators
372  int compare(const char *str, bool case_sensitive=true) const
373  {
374  // Unlike std::string, UT_String treats NULL and
375  // the empty string as distinct (empty has precedence).
376  if (myData==0 || str==0)
377  {
378  if (myData) return 1;
379  if(str) return -1;
380  return 0;
381  }
382  if (case_sensitive)
383  return strcmp(myData, str);
384  return strcasecmp(myData, str);
385  }
386  int compare(const UT_String &str, bool case_sensitive=true) const
387  {
388  return compare(str.myData,case_sensitive);
389  }
390  int compare(const UT_StringRef &str, bool case_sensitive=true) const;
391 
392  bool equal(const char *str, bool case_sensitive=true) const
393  {
394  return compare(str,case_sensitive)==0;
395  }
396  bool equal(const UT_String &str, bool case_sensitive=true) const
397  {
398  return compare(str.myData,case_sensitive)==0;
399  }
400  bool equal(const UT_StringRef &str, bool case_sensitive=true) const
401  {
402  return compare(str,case_sensitive)==0;
403  }
404 
405  bool operator==(const char *str) const
406  {
407  return compare(str)==0;
408  }
409  bool operator==(const UT_String &str) const
410  {
411  return compare(str.myData)==0;
412  }
413  bool operator==(const UT_StringRef &str) const
414  {
415  return compare(str)==0;
416  }
417  bool operator!=(const char *str) const
418  {
419  return compare(str)!=0;
420  }
421  bool operator!=(const UT_String &str) const
422  {
423  return compare(str.myData)!=0;
424  }
425  bool operator!=(const UT_StringRef &str) const
426  {
427  return compare(str)!=0;
428  }
429  bool operator<(const char *str) const
430  {
431  return compare(str)<0;
432  }
433  bool operator<(const UT_String &str) const
434  {
435  return compare(str.myData)<0;
436  }
437  bool operator<(const UT_StringRef &str) const
438  {
439  return compare(str)<0;
440  }
441  bool operator<=(const char *str) const
442  {
443  return compare(str)<=0;
444  }
445  bool operator<=(const UT_String &str) const
446  {
447  return compare(str.myData)<=0;
448  }
449  bool operator<=(const UT_StringRef &str) const
450  {
451  return compare(str)<=0;
452  }
453  bool operator>(const char *str) const
454  {
455  return compare(str)>0;
456  }
457  bool operator>(const UT_String &str) const
458  {
459  return compare(str.myData)>0;
460  }
461  bool operator>(const UT_StringRef &str) const
462  {
463  return compare(str)>0;
464  }
465  bool operator>=(const char *str) const
466  {
467  return compare(str)>=0;
468  }
469  bool operator>=(const UT_String &str) const
470  {
471  return compare(str.myData)>=0;
472  }
473  bool operator>=(const UT_StringRef &str) const
474  {
475  return compare(str)>=0;
476  }
477 
478  /// Test whether the string is defined or not
479  SYS_SAFE_BOOL operator bool() const { return isstring(); }
480 
481  /// Return the edit distance between two strings.
482  /// See http://en.wikipedia.org/wiki/Levenshtein_distance for details.
483  /// allow_subst controls whether a substitution of a character with
484  /// another is a single operation, rather than two operations of
485  /// insert and delete.
486  int distance(const char *str,
487  bool case_sensitive = true,
488  bool allow_subst = true) const;
489 
490  operator const char *() const
491  { return (const char *)myData; }
492  operator char *()
493  { return myData; }
494 
495  operator UT_StringView() const
496  { return UT_StringView(myData); }
497 
498  const char *c_str() const { return buffer(); }
499  const char *buffer() const { return myData; }
500  const char *data() const { return buffer(); }
501  const char *nonNullBuffer() const { return myData ? myData : ""; }
502 
503  char operator()(unsigned i) const
504  {
505  UT_ASSERT_P( isstring() );
506  UT_ASSERT_SLOW(i <= strlen(myData));
507  return myData[i];
508  }
509 
510  char &operator()(unsigned i)
511  {
512  harden();
513  return myData[i];
514  }
515 
516  // Prefer using write() since ideally the non-const operator() is removed
517  inline void write(unsigned i, char c)
518  {
519  hardenIfNeeded();
520  myData[i] = c;
521  }
522 
523  int toInt() const;
524  fpreal toFloat() const;
525 
526  /// Converts the contents of this UT_String to a std::string. Note that
527  /// std::string can't be constructed with a null pointer, so you can't
528  /// just write std::string s = ut_string.buffer();
529  std::string toStdString() const;
530 
531  //
532  // Here, we're finished with operators
533  //
534 
535  /// Return length of string
536  unsigned length() const
537  { return (myData) ? (unsigned)strlen(myData) : 0; }
538 
539  /// Return memory usage in bytes
540  int64 getMemoryUsage(bool inclusive=true) const
541  {
542  return (inclusive ? sizeof(*this) : 0)
543  + (!myIsReference ? (length() + 1)*sizeof(char) : 0);
544  }
545 
546  /// Find first occurrance of character. Returns NULL upon failure.
547  /// @{
548  char *findChar(int c)
549  { return myData ? strchr(myData, c) : nullptr; }
550  const char *findChar(int c) const
551  { return SYSconst_cast(*this).findChar(c); }
552  /// @}
553 
554  /// Find first occurrance of any character in @em str
555  /// @{
556  char *findChar(const char *str)
557  { return myData ? strpbrk(myData, str) : nullptr; }
558  const char *findChar(const char *str) const
559  { return SYSconst_cast(*this).findChar(str); }
560  /// @}
561 
562  /// Find last occurance of character
563  /// @{
564  char *lastChar(int c)
565  { return myData ? strrchr(myData, c) : nullptr; }
566  const char *lastChar(int c) const
567  { return SYSconst_cast(*this).lastChar(c); }
568  /// @}
569 
570  /// Return the number of occurrences of the specified character.
571  int countChar(int c) const;
572 
573  /// Count the occurrences of the string
574  int count(const char *str, bool case_sensitive = true) const;
575 
576  char *findNonSpace();
577  const char *findNonSpace() const;
578  const char *findWord(const char *word) const;
579  bool findString(const char *str, bool fullword,
580  bool usewildcards) const;
581  int changeWord(const char *from, const char *to, bool all = true);
582  int changeString(const char *from, const char *to, bool fullword);
583  int changeQuotedWord(const char *from, const char *to,
584  int quote = '`', bool all = true);
585 
586  int findLongestCommonSuffix( const char *with ) const;
587 
588  /// Perform deep copy of the substring starting from @em index
589  /// for @em len characters into the specified UT_String.
590  /// If @em len is too long, then a substring starting from @em index to
591  /// the end of the string is copied.
592  /// Returns the length of the copied substring.
593  int substr(UT_String &buf, int index, int len=0) const;
594 
595  /// Determine if string can be seen as a single floating point number
596  bool isFloat(bool skip_spaces = false,
597  bool loose = false,
598  bool allow_underscore = false) const;
599  /// Determine if string can be seen as a single integer number
600  bool isInteger(bool skip_spaces = false) const;
601 
602  void toUpper()
603  {
604  char *ptr;
605  harden();
606  for (ptr=myData; *ptr; ptr++)
607  *ptr = (char)toupper(*ptr);
608  }
609  void toLower()
610  {
611  char *ptr;
612  harden();
613  for (ptr=myData; *ptr; ptr++)
614  *ptr = (char)tolower(*ptr);
615  }
616 
617 
618  /// Return last component of forward slash separated path string
619  ///
620  /// If there is a slash in the string, fileName() returns the string
621  /// starting after the slash. Otherwise, it returns the contents of
622  /// this string. Note that it returns a pointer into this string.
623  const char *fileName() const
624  {
625  UT_StringView file_name = UTstringFileName(*this);
626  return file_name.begin();
627  }
628  /// Return the extension of a file path string
629  /// @{
631  {
633  if (extension.isEmpty())
634  return nullptr;
635  return myData + (extension.begin() - myData);
636  }
637  const char *fileExtension() const
638  {
639  return SYSconst_cast(*this).fileExtension();
640  }
641  /// @}
642 
643  /// Return whether the file extension matches. The extension passed in
644  /// should include the '.' separator. For example: @code
645  /// matchFileExtension(".jpg")
646  /// @endcode
647  bool matchFileExtension(const char *match_extension) const
648  {
649  return UTstringMatchFileExtension(*this, match_extension);
650  }
651  /// Return path terminated just before the extension.
652  /// If the filename starts with '.' and no path is provided,
653  /// returns NULL
654  UT_String pathUpToExtension() const;
655 
656  /// Replace the file extension and return the new string
657  UT_String replaceExtension(const UT_String &new_ext) const;
658 
659  /// Split a path into @em dir_name and @em file_name, where @em file_name
660  /// is everything after the final slash (i.e. the same as fileName()).
661  /// Either part may be empty. Note that if the string starts with / and
662  /// only contains that one slash, the @em dir_name will be / and not blank.
663  /// @em dir_name and @em file_name will either be set to hardened strings
664  /// or an empty string.
665  void splitPath(UT_String &dir_name, UT_String &file_name) const;
666 
667  /// Decompose a filename into various parts
668  ///
669  /// parseNumberedFileName will breakup a filename into its various
670  /// parts: file = prefix$Fsuffix (note: suffix is
671  /// not the same as file extension.) 0 is returned if there is
672  /// no frame number. 'negative' allows -[frame] to be interpreted as a
673  /// negative number. 'fractional' allows [frame].[number] to be interpreted
674  /// as a fractional frame.
675  int parseNumberedFilename(UT_String &prefix,
676  UT_String &frame,
677  UT_String &suff,
678  bool negative = true,
679  bool fractional = false) const;
680 
681  bool isstring() const
682  { return (myData && *myData); }
683 
684  /// trimSpace() will remove all space characters (leading and following)
685  /// from a string. If the string consists of multiple words, the words will
686  /// be collapsed. The function returns 1 if space was trimmed.
687  int trimSpace(bool leave_single_space_between_words = false);
688 
689  /// A version of trimSpace() that only removes leading and following spaces
690  /// from a string, leaving any between words intact.
691  int trimBoundingSpace();
692 
693  /// strips out all characters found in 'chars'. The string length will be
694  /// reduced by the number of characters removed. The number of characters
695  /// removed is returned.
696  int strip(const char *chars);
697 
698  /// protectString() will modify the existing string to escape double quotes
699  /// and backslashes. It will only wrap the string in double quotes if
700  /// it has spaces in it. If 'protect_empty' is true, the string will
701  /// become '""', otherwise it will stay empty.
702  void protectString(bool protect_empty=false);
703 
704  /// If the char is a quote character `"` or `'` then make sure to protect
705  /// it by adding '\' before the quote character. If the character is not
706  /// a quote character then the character is simply added to the ostream.
707  static void protectString(std::ostream& os, char c);
708 
709  /// protectPreQuotePythonStringLiteral() will modify the existing string
710  // to escape any non-printing characters, backslashes, and instances of the
711  /// specified delimiter. Unlike protectString(), it will not wrap the
712  /// string in quotes.
713  void protectPreQuotePythonStringLiteral(char delimiter='\'');
714 
715  /// returns true if the string begins and ends with a (non-escaped) quote
716  /// 'delimiter'.
717  bool isQuotedString(char delimiter='\'') const;
718 
719  /// makeQuotedString() is similar to protectString() except it returns a
720  /// new string instead of changing this string, it does wrap the string
721  /// in quotes, and it lets you use either ' or " as the delimiter.
722  /// The quoted string can also be optionally be made to escape non-printing
723  /// characters. The string that's returned is UT_String::ALWAYS_DEEP.
724  UT_String makeQuotedString(char delimiter='\'',
725  bool escape_nonprinting=false) const;
726 
727  /// makeSmartQuotedString() will use either ' or " as the delimiter to
728  /// avoid escaped quotes, using the default delimiter if it doesn't
729  /// matter. The quoted string can also be optionally be made to escape
730  /// non-printing characters. The string that's returned is
731  /// UT_String::ALWAYS_DEEP.
732  UT_String makeSmartQuotedString(char default_delimiter='\'',
733  bool escape_nonprinting=false) const;
734 
735  /// Expands standard control sequences ('\\n', '\\r', '\\t', '\\0') to their
736  /// corresponding ASCII values (10, 13, 9, 0, respectively).
737  /// If the expand_extended flag is enabled, an extended expansion is enabled
738  /// which adds hexadecimal, decimal and Unicode control sequence expansion.
739  /// Any values resulting from that expansion, which are outside the standard
740  /// ASCII range, will be encoded as UTF8-encoded control points.
741  void expandControlSequences(bool expand_extended = false);
742 
743  bool hasWhiteSpace() const;
744 
745  void removeTrailingSpace();
746  void removeTrailingChars(char chr);
747 
748  void removeTrailingDigits();
749 
750  // cshParse() does not need to harden the string. It does very robust
751  // parsing in the style of csh. It actually does better parsing than
752  // csh. Variable expansion & backquote expansion are done in the
753  // correct order for the correct arguments. One caveat is that the
754  // string cannot have \0377 (0xff) as a character in it.
755  //
756  // If there is an error in parsing, the error flag (if passed in) will be
757  // set to:
758  // 0 = no error
759  // 1 = line too long
760  int cshParse(char *argv[], int max_args,
761  void (*vlookup)(const char *, UT_String&)=UTvarLookup,
762  void (*elookup)(const char *, UT_String&)=UTexprLookup,
763  int *error = 0,
764  UT_StringCshIO *io=0);
765 
766  int cshParse(UT_WorkArgs &argv,
767  void (*vlookup)(const char *, UT_String&)=UTvarLookup,
768  void (*elookup)(const char *, UT_String&)=UTexprLookup,
769  int *error = 0,
770  UT_StringCshIO *io=0);
771 
772  // dosParse() uses the semi-braindead approach of ms-dos to argument
773  // parsing. That is, arguments are separated by a double quote or space
774  // (being a space or a tab). If 'preserve_backslashes' is set to
775  // false (the default), back-slashes are passed through verbatim, unless
776  // the following character is a double quote. Likewise, any pairs of
777  // back-slashes preceding a double quote are turned into single
778  // back-slashes.
779  int dosParse(UT_WorkArgs &argv, bool preserve_backslashes=false);
780  int dosParse(char *argv[], int max_args,
781  bool preserve_backslashes=false);
782 
783  /// Perform dos parsing modifying the buffer passed in. The args will be
784  /// stored as raw pointers into the given buffer
785  static int dosParse(char *buffer, UT_WorkArgs &args,
786  bool preserve_backslashes);
787 
788  // parse will insert nulls into the string.
789  // NB: The argv array is null terminated, thus the effective
790  // maximum number of arguments is one less than maxArgs.
791  // NB: The maxArgs variants are all deprecated, use UT_WorkArgs
792  // instead.
793  int parse(char *argv[], int max_args,
794  const char *quotes = "\"'", bool keep_quotes = false)
795  {
796  harden();
797  return parseInPlace(argv, max_args, quotes, keep_quotes);
798  }
799  int parse(UT_WorkArgs &argv, int start_arg = 0,
800  const char *quotes = "\"'", bool keep_quotes = false)
801  {
802  harden();
803  return parseInPlace(argv, start_arg, quotes, keep_quotes);
804  }
805  int parse(UT_StringArray &argv, int start_arg = 0,
806  const char *quotes = "\"'", bool keep_quotes = false)
807  {
808  harden();
809  return parseInPlace(argv, start_arg, quotes, keep_quotes);
810  }
811  // Warning: the following methods insert nulls into the string without
812  // hardening.
813  int parseInPlace(char *argv[], int max_args,
814  const char *quotes = "\"'", bool keep_quotes = false);
815  int parseInPlace(UT_WorkArgs &argv, int start_arg = 0,
816  const char *quotes = "\"'", bool keep_quotes = false);
817  int parseInPlace(UT_StringArray &argv, int start_arg = 0,
818  const char *quotes = "\"'", bool keep_quotes = false);
819 
820  // Splits the string at specific separator characters. Unlike the parse
821  // methods, the tokenize methods ignore quoting completely.
822  int tokenize(char *argv[], int max_args, char separator)
823  {
824  harden();
825  return tokenizeInPlace(argv, max_args, separator);
826  }
827  int tokenizeInPlace(char *argv[], int max_args, char separator);
828  int tokenize(UT_WorkArgs &argv, char separator)
829  {
830  harden();
831  return tokenizeInPlace(argv, separator);
832  }
833  int tokenizeInPlace(UT_WorkArgs &argv, char separator);
834  int tokenize(char *argv[], int max_args,
835  const char *separators = " \t\n")
836  {
837  harden();
838  return tokenizeInPlace(argv, max_args, separators);
839  }
840  int tokenizeInPlace(char *argv[], int max_args,
841  const char *separators = " \t\n");
842  int tokenize(UT_WorkArgs &argv, const char *separators = " \t\n")
843  {
844  harden();
845  return tokenizeInPlace(argv, separators);
846  }
847  int tokenizeInPlace(UT_WorkArgs &argv,
848  const char *separators = " \t\n");
849 
850  template<typename T>
851  int tokenize(T &list, const char *separators = " \t\n")
852  {
853  harden();
854  return tokenizeInPlace(list, separators);
855  }
856 
857  template<typename T>
858  int tokenizeInPlace(T &list,
859  const char *separators = " \t\n")
860  {
861  char *token;
862  char *context;
863 
864  if (!isstring())
865  return 0;
866  if (!(token = SYSstrtok(myData, separators, &context)))
867  return 0;
868 
869  list.append(token);
870 
871  while ((token = SYSstrtok(0, separators, &context)) != NULL)
872  list.append(token);
873 
874  return list.entries();
875  }
876 
877 
878  // Replaces the contents with variables expanded.
879  void expandVariables();
880 
881  // Functions to hash a string
883  {
884  return hash(myData);
885  }
886 
887  // The code can be used for rudimentary hash chaining, but it is NOT
888  // the case that hash("def", hash("abc")) == hash("abcdef"), so there
889  // is little reason to use this rather than normal hash combiners.
890  static SYS_FORCE_INLINE uint32 hash(const char *str, uint32 code = 0)
891  {
892  return SYSstring_hashseed(
893  str, SYS_EXINT_MAX, code, /*allow_nulls*/ false);
894  }
895 
896  // This does pattern matching on a string. The pattern may include
897  // the following syntax:
898  // ? = match a single character
899  // * = match any number of characters
900  // [char_set] = matches any character in the set
901  bool match(const char *pattern, bool case_sensitive = true) const;
902 
903  // Similar to match() except it assumes that we're dealing with file paths
904  // so that it determines whether to do a case-sensitive match depending on
905  // the platform.
906  bool matchFile(const char *pattern) const;
907 
908  // Similar to match() but uses rsync style matching:
909  // * = match any number of characters up to a slash
910  // ** = match any number of characters, including a slash
911  bool matchPath(const char *pattern, bool case_sensitive = true,
912  bool *excludes_branch = nullptr) const;
913 
914  // multiMatch will actually check multiple patterns all separated
915  // by the separator character: i.e. geo1,geo2,foot*
916  //
917  // NOTE: No pattern or may contain the separator
918  bool multiMatch(const char *pattern,
919  bool case_sensitive, char separator) const;
920  bool multiMatch(const char *pattern, bool case_sensitive = true,
921  const char *separators = ", ",
922  bool *explicitly_excluded = 0,
923  int *match_index = 0,
924  ut_PatternRecord *pattern_record=NULL) const;
925  bool multiMatch(const UT_StringMMPattern &pattern,
926  bool *explicitly_excluded = 0,
927  int *match_index = 0,
928  ut_PatternRecord *pattern_record=NULL) const;
929 
930  // this method matches a pattern while recording any wildcard
931  // patterns used.
932  bool multiMatchRecord(const char *pattern, int maxpatterns,
933  char *singles, int &nsingles,
934  char **words, int &nwords,
935  bool case_sensitive = true,
936  const char *separators = ", ") const;
937  bool multiMatchRecord(const UT_StringMMPattern &pattern,
938  int maxpatterns,
939  char *singles, int &nsingles,
940  char **words, int &nwords) const;
941  bool multiMatchRecord(const char *pattern,
942  UT_StringHolder &singles,
943  UT_StringArray &words,
944  bool case_sensitive = true,
945  const char *separators = ", ") const;
946 
947  /// matchPattern(UT_WorkArgs &) assumes that the arguments contain the
948  /// components of a pattern to be matched against. The method returns
949  /// true if the pattern matches, false if it doesn't. This matching
950  /// process handles ^ expansion properly (and efficiently).
951  /// If the string doesn't match any components of the pattern, then the
952  /// assumed value is returned.
953  bool matchPattern(const UT_WorkArgs &pattern_args,
954  bool assume_match=false) const;
955 
956  static bool multiMatchCheck(const char *pattern);
957  static bool wildcardMatchCheck(const char *pattern);
958 
959  // Same as match but equivalent to "*pattern*"
960  bool contains(const char *pattern, bool case_sensitive=true) const;
961 
962  // Returns true if our string starts with the specified prefix.
963  bool startsWith(const UT_StringView &prefix,
964  bool case_sensitive = true) const;
965 
966  // Returns true if our string ends with the specified suffix.
967  bool endsWith(const UT_StringView &suffix,
968  bool case_sensitive = true) const;
969 
970  /// Pluralize an English noun ending (i.e. box->boxes or tube->tubes). The
971  /// ending must be lower case to be processed properly.
972  void pluralize();
973 
974  // Will parse strings like 1-10:2,3 and call func for every element
975  // implied. It will stop when the func returns 0 or the parsing
976  // is complete, in which case it returns 1.
977  // Parsing also allows secondary elements to be specified eg 3.4 0.12
978  // The secfunc is used to find the maximum index of secondary elements
979  // for each compound num. The elements are assumed to be
980  // non-negative integers.
981  int traversePattern(int max, void *data,
982  int (*func)(int num, int sec, void *data),
983  unsigned int (*secfunc)(int num,void *data)=0,
984  int offset=0) const;
985 
986  // Fast containment, assumes no special characters
987  const char *fcontain(const char *pattern, bool case_sensitive=true) const
988  {
989  if (!myData) return NULL;
990  return case_sensitive ? strstr(myData, pattern)
991  : SYSstrcasestr(myData, pattern);
992  }
993 
994  // Given the match pattern which fits our contents, any assigned wildcards
995  // are subsitituted. The wildcards may also be indexed.
996  // Returns true if rename was successful.
997  //
998  // @note This code was adapted from CHOP_Rename::subPatterns() and
999  // works the same way.
1000  //
1001  // eg. this = apple, match = a*le, replace = b* ---> bpp
1002  // this = a_to_b, match = *_to_*, replace = *(1)_to_*(0) ---> b_to_a
1003  bool patternRename(const char *match_pattern, const char *replace);
1004 
1005  // Given the name rule according to which a name consists of a base name
1006  // (char sequence ending in a non-digit) and a numerical suffix, the
1007  // following two methods return the base and the suffix respectively.
1008  // base() needs a string buffer and will return a const char* pointing to it.
1009  // base() always returns a non-zero pointer,
1010  // while suffix() returns 0 if no suffix is found.
1011  const char *base(UT_String &buf) const;
1012  const char *suffix() const;
1013 
1014  // incrementNumberedName will increment a name. If it has a numerical
1015  // suffix, that suffix is incremented. If not, "2" is appended to the
1016  // name. The preserve_padding parameter can be set to true so that zero
1017  // padding is preserved. Incrementing foo0009 will produce foo10 with
1018  // this parameter set to false, or foo0010 if it is set to true.
1019  void incrementNumberedName(bool preserve_padding = false);
1020 
1021  // setFormat is used to set how an outstream formats its ascii output.
1022  // So you can use printf style formatting. eg:
1023  // UT_String::setFormat(cout, "%08d") << 100;
1024  //
1025  // Note: Don't do:
1026  // cout << UT_String::setFormat(cout, "%08d") << 100;
1027  // ^^^^
1028  // Also: The formating changes (except for field width) are permanent,
1029  // so you'll have to reset them manually.
1030  //
1031  // TODO: A resetFormat, and a push/pop format pair.
1032  static std::ostream &setFormat(std::ostream &os, const char *fmt);
1033  std::ostream &setFormat(std::ostream &os);
1034 
1035  int replacePrefix(const char *oldpref,
1036  const char *newpref);
1037  int replaceSuffix(const char *oldsuffix,
1038  const char *newsuffix);
1039 
1040  // expandArrays will expand a series of tokens of the
1041  // form prefix[pattern]suffix into the names array
1042  //
1043  // Note: Each names[i] must be free'd after use
1044  // and label is used on the non-const parse method
1045  // NB: The max variants are all deprecated, use UT_WorkArgs
1046  // instead.
1047  int expandArrays(char *names[], int max);
1048 
1049  // This routine will ensure no line is over the specified
1050  // number of columns. Offending lines will be wrapped at
1051  // the first spaceChar or cut at exactly cols if spaceChar
1052  // is not found.
1053  // It returns one if any changes were done.
1054  // It currently treats tabs as single characters which should be
1055  // changed.
1056  // It will break words at hyphens if possible.
1057  int format(int cols);
1058 
1059  /// Replaces up to 'count' occurrences of 'find' with 'replacement',
1060  /// and returns the number of substitutions that occurred.
1061  /// If 'count' <= 0, all occurrences will be replaced.
1062  int substitute( const char *find, const char *replacement,
1063  int count = -1);
1064  /// Convenience version of substitute() for all or single occurrence.
1065  SYS_DEPRECATED_REPLACE(19.5, "Use 'count' variant")
1066  int substitute( const char *find, const char *replacement,
1067  bool all )
1068  { return substitute(find, replacement, !all ? 1 : -1); }
1069 
1070  // This function replaces the character found with another character.
1071  int substitute( char find, char replacement, bool all = true );
1072 
1073  // this function removes the substring at pos and len, and inserts str
1074  // at pos. it returns the difference (new_length - old_length)
1075  int replace( int pos, int len, const char *str );
1076 
1077  // remove the first len characters of this string
1078  int eraseHead(int len)
1079  { return replace(0, len, ""); }
1080 
1081  // remove the last len characters of this string
1082  int eraseTail(int len)
1083  { return replace(length() - len, len, ""); }
1084 
1085  // remove the substring start at pos for len characters
1086  int erase(int pos = 0, int len = -1)
1087  {
1088  if (len < 0)
1089  len = length() - pos;
1090  return replace(pos, len, "");
1091  }
1092 
1093  // insert the given string at pos into this string
1094  int insert(int pos, const char *str)
1095  { return replace(pos, 0, str); }
1096 
1097  // Does a "smart" string compare which will sort based on numbered names.
1098  // That is "text20" is bigger than "text3". In a strictly alphanumeric
1099  // comparison, this would not be the case. Zero is only returned if both
1100  // strings are identical.
1101  static int compareNumberedString(const char *s1,
1102  const char *s2,
1103  bool case_sensitive=true,
1104  bool allow_negatives=false);
1105  static int qsortCmpNumberedString(const char *const*v1,
1106  const char *const*v2);
1107 
1108  // Like compare numbered strings, but it sorts better when there are
1109  // .ext extensions (i.e. it handles '.' as a special case)
1110  static int compareNumberedFilename(const char *s1,
1111  const char *s2,
1112  bool case_sensitive=false);
1113  static int qsortCmpNumberedFilename(const char *const*v1,
1114  const char *const*v2);
1115 
1116  // Like compare numbered strings, but allows special ordering of certain
1117  // characters that should always come first or last.
1118  static int compareNumberedStringWithExceptions(const char *s1,
1119  const char *s2,
1120  bool case_sensitive=false,
1121  bool allow_negatives=false,
1122  const char *sorted_first=nullptr,
1123  const char *sorted_last=nullptr);
1124 
1125  /// Compare two version strings which have numbered components separated by
1126  /// dots. eg. "X.Y.Z". Assumes the components go from most to least
1127  /// significant in left to right order.
1128  static int compareVersionString(const char *s1, const char *s2);
1129 
1130  /// Given a path, set the value of the string to the program name. For
1131  /// example: @code
1132  /// str.extractProgramName(argv[0]);
1133  /// str.extractProgramName("c:/Path/program.exe");
1134  /// str.extractProgramName("/usr/bin/program");
1135  /// @endcode
1136  /// This will extract the last path component. Program names may also have
1137  /// their extensions stripped. For example ".exe" on Windows and "-bin" to
1138  /// strip the Houdini wrappers on other platforms.
1139  ///
1140  /// @note The path should be normalized to have forward slashes as the path
1141  /// separator.
1142  void extractProgramName(const char *path,
1143  bool strip_extension=true,
1144  bool normalize_path=true);
1145 
1146  /// Given a path, check to see whether the program name matches the
1147  /// expected. For example: @code
1148  /// if (UT_String::matchProgramname(argv[0], "houdini"))
1149  /// if (UT_String::matchProgramname("c:/Path/houdini.exe", "houdini"))
1150  /// if (UT_String::matchProgramname("/usr/bin/houdini", "houdini"))
1151  /// @endcode
1152  /// The matching is always case-insensitive.
1153  ///
1154  /// @note The path should be normalized to have forward slashes as the path
1155  /// separator.
1156  static bool matchProgramName(const char *path, const char *expected,
1157  bool normalize_path=false);
1158 
1159  /// Convert a path to a "normalized" path. That is, all back-slashes will
1160  /// be converted to forward slashes. On some operating systems, this will
1161  /// leave the string unchanged.
1162  void normalizePath();
1163 
1164  // A very fast integer to string converter. This is faster (at least on
1165  // SGI) than using sprintf("%d"). About two to three times as fast. Both
1166  // of these methods return the length of the string generated.
1167  static int itoa(char *str, int64 i);
1168  static int utoa(char *str, uint64 i);
1169 
1170  // Versions of the above functions which set into this string object
1171  void itoa(int64 i);
1172  void utoa(uint64 i);
1173 
1174  // A reader-friendly version of itoa. This places commas appropriately
1175  // to ensure the person can pick out the kilo points easily.
1176  // This can handle numbers up to 999,999,999,999,999,999.
1177  void itoaPretty(int64 val);
1178 
1179  /// Convert the given time delta (in milliseconds)
1180  /// to a reader-friendly string in days, hours, minutes, and seconds.
1181  void timeDeltaToPrettyString(double time_ms);
1182 
1183  /// Convert the given time delta (in milliseconds)
1184  /// to a reader-friendly string in milliseconds.
1185  void timeDeltaToPrettyStringMS(double time_ms);
1186 
1187  // Do an sprintf into this string. This method will allocate exactly the
1188  // number of bytes required for the final string. If the format string is
1189  // bad, isstring() will return false afterwards.
1190  int sprintf(const char *fmt, ...) SYS_PRINTF_CHECK_ATTRIBUTE(2, 3);
1191 
1192  // This will change the string into a valid C style variable name.
1193  // All non-alpha numerics will be converted to _.
1194  // If the first letter is a digit, it is prefixed with an _.
1195  // This returns 0 if no changes occurred, 1 if something had to
1196  // be adjusted.
1197  // Note that this does NOT force the name to be non-zero in length.
1198  // The safechars parameter is a string containing extra characters
1199  // that should be considered safe. These characters are not
1200  // converted to underscores.
1201  int forceValidVariableName(const char *safechars = NULL);
1202  // Returns true if the string matches a C-style varaible name.
1203  // The safechars are not allowed to be the start.
1204  // Matching forceValid, empty strings are considered valid!
1205  bool isValidVariableName(const char *safechars = NULL) const;
1206 
1207  // This will force all non-alphanumeric characters to be underscores.
1208  // Returns true if any changes were required.
1209  bool forceAlphaNumeric();
1210 
1211  // This function will calculate the relative path to get from src to dest.
1212  // If file_path is false, this method assume it is dealing with node paths.
1213  // If file_path is true, it will also deal with Windows drive letters and
1214  // UNC paths.
1215  void getRelativePath(const char *src_fullpath,
1216  const char *dest_fullpath,
1217  bool file_path = false);
1218 
1219  // This function takes two absolute paths and returns the length of the
1220  // longest common path prefix, up to and including the last '/'. This
1221  // means, for instance, that if fullpath1[len1-1] == '/' then all of
1222  // fullpath1 is eligible as a common prefix.
1223  // NB: This function DOES NOT handle NT style drive names! It is currently
1224  // only used for op paths. If you want to add support for this, you
1225  // should add another default parameter to do this.
1226  static int findLongestCommonPathPrefix(const char *fullpath1, int len1,
1227  const char *fullpath2, int len2);
1228 
1229  // This function tests whether we are an absolute path, and returns true or
1230  // false depending on whether we are.
1231  bool isAbsolutePath(bool file_path=false) const;
1232 
1233  // This function assumes that we are an absolute path and will remove all
1234  // un-necessary components from it as long as we remain an absolute path.
1235  // We return false if an error was encountered, in which case the results
1236  // are unpredictable.
1237  bool collapseAbsolutePath(bool file_path=false);
1238 
1239  // This function will make sure that the string is at most max_length
1240  // characters long. If the string is longer than that, it will
1241  // replace the middle of the string by "...". Returns true if the string
1242  // has changed and false otherwise. max_length must be greater than 3.
1243  bool truncateMiddle(int max_length);
1244 
1245  // This function is an abomination when you can just write:
1246  // UT_String foo("");
1247  // ...
1248  // if (foo.isstring())
1249  // ...
1250  // Avoid using it and do not write functions that return "const UT_String&"
1251  static const UT_String &getEmptyString();
1252 
1253  /// Count the number of valid characters in the : modifier for variable
1254  /// expansion. For example, the string ":r" will return 2, the string
1255  /// ":r:t" will return 4, the string ":z" will return 0. These use the csh
1256  /// expansion modifiers.
1257  ///
1258  /// If the string doesn't start with a ':', the method will return 0.
1259  static int countCshModifiers(const char *src);
1260 
1261  /// Applies a "csh" style modifier string to this string. For example, a
1262  /// modifier string of ":e" would replace the string with the file
1263  /// extension of the string.
1264  ///
1265  /// Returns true if any modifications were performed
1266  bool applyCshModifiers(const char *modifiers);
1267 
1268 
1269  /// This will remove the range from a string of the form foo$Fbar.ext (#-#)
1270  /// and return the first number from the range. If there is only 1 range
1271  /// number, it will be returned. If there is no range, 0 is returned.
1272  /// The returned string is hardened.
1273  UT_String removeRange ();
1274 
1275  /// This will format a value to represent a given size in bytes, kilobytes,
1276  /// megabytes, etc.
1277  void formatByteSize(exint size, int digits=2);
1278 
1279  // UTF-8 helpers
1280 
1281  /// Returns the number of Unicode codepoints in the string, assuming it's
1282  /// encoded as UTF-8.
1283  int getCodePointCount() const;
1284 
1285  /// Returns a list of Unicode code points from this string.
1286  void getAsCodePoints(UT_Int32Array &cp_list) const;
1287 
1288  /// Friend specialization of std::swap() to use UT_String::swap()
1289  /// @internal This is needed because standard std::swap() implementations
1290  /// will try to copy the UT_String objects, causing hardened strings to
1291  /// become weak.
1292  friend void swap(UT_String& a, UT_String& b) { a.swap(b); }
1293 
1294  /// expandArrays will expand a series of tokens of the
1295  /// form prefix[pattern]suffix into the names UT_StringArray
1296  /// @param tokens is will store the parsed tokens without expansion
1297  /// @param names is will store the parsed tokens with expansion
1298  /// This doesn't need a max argument like:
1299  /// int expandArrays(char *names[], int max)
1300  int expandArrays(UT_StringArray &tokens, UT_StringArray &names);
1301 
1302 private:
1303  template <typename OSTREAM>
1304  void saveInternal(OSTREAM &os, bool binary) const;
1305 
1306  void freeData();
1307 
1308  /// implements a few csh-style modifiers.
1309  /// @param mod pointer to a string starting with the modifier to apply.
1310  /// so, to apply a global substitute modifier :gs/l/r/
1311  /// mod should be: s/l/r
1312  /// @param all True if all possible modifications should be
1313  /// (recursively) performed.
1314  /// Otherwise, at most one modification is applied.
1315  /// @return whether any modification was performed
1316  bool applyNextModifier(const char *mod, bool all);
1317 
1318 
1319  /// Sets myIsReference to false and copies the other_string into myData,
1320  /// but attempts to avoid unnecessary memory reallocations. Frees up
1321  /// any previous data, if necessary. If other_string is NULL, the call
1322  /// is equivalent to freeData().
1323  void doSmartCopyFrom(const char* other_string);
1324 
1325  static int compareNumberedStringInternal(const char *s1, const char *s2,
1326  bool case_sensitive,
1327  bool allow_negatives,
1328  const char *sorted_first,
1329  const char *sorted_last);
1330 
1331  static SYS_FORCE_INLINE void utStrFree(char *str)
1332  {
1333 #if defined(UT_DEBUG) && !defined(_WIN32)
1334  if (str)
1335  ::memset((void *)str, 0xDD, ::strlen(str) + 1);
1336 #endif
1337  ::free((void *)str);
1338  }
1339 
1340  char *myData;
1341  bool myIsReference:1,
1342  myIsAlwaysDeep:1;
1343 
1344  /// This operator saves the string to the stream via the string's
1345  /// saveAscii() method, protecting any whitespace (by adding quotes),
1346  /// backslashes or quotes in the string.
1347  friend UT_API std::ostream &operator<<(std::ostream &os, const UT_String &d);
1348  friend UT_API UT_OStream &operator<<(UT_OStream &os, const UT_String &d);
1349 
1350  friend class UT_API UT_StringRef;
1351 };
1352 
1353 /// Creates a shallow wrapper around a string for calling UT_String's many
1354 /// const algorithms.
1356 {
1357 public:
1358  // We only have a single constructor which is always shallow.
1360  UT_StringWrap(const char *str)
1361  : UT_String(str)
1362  {}
1363  // It seems necessary on MSVC to forceinline the empty constructor in order
1364  // to have it inlined.
1367  {}
1368 
1369  // Manually wrap methods that have non-const overloads or return non-const
1370  // pointers.
1371  char operator()(unsigned i) const { return UT_String::operator()(i); }
1372  const char *findChar(int c) const { return UT_String::findChar(c); }
1373  const char *findChar(const char *str) const { return UT_String::findChar(str); }
1374  const char *findNonSpace() const { return UT_String::findNonSpace(); }
1375  const char *lastChar(int c) const { return UT_String::lastChar(c); }
1376 
1377  using UT_String::operator==;
1378  using UT_String::operator!=;
1379  using UT_String::c_str;
1380  using UT_String::length;
1381 
1382  using UT_String::base;
1383  using UT_String::compare;
1384  using UT_String::contains;
1385  using UT_String::count;
1386  using UT_String::countChar;
1387  using UT_String::distance;
1388  using UT_String::endsWith;
1389  using UT_String::equal;
1390  using UT_String::fcontain;
1392  using UT_String::fileName;
1393  using UT_String::findWord;
1394  using UT_String::findString;
1397  using UT_String::isFloat;
1398  using UT_String::isInteger;
1400  using UT_String::isstring;
1401  using UT_String::match;
1402  using UT_String::matchFile;
1404  using UT_String::matchPath;
1406  using UT_String::multiMatch;
1410  using UT_String::save;
1411  using UT_String::saveAscii;
1412  using UT_String::saveBinary;
1413  using UT_String::splitPath;
1414  using UT_String::startsWith;
1415  using UT_String::substr;
1416  using UT_String::suffix;
1417  using UT_String::toFloat;
1418  using UT_String::toInt;
1419 };
1420 
1423 {
1424  if (!myIsReference && myData)
1425  utStrFree(myData);
1426 }
1427 
1429 void
1430 UT_String::freeData()
1431 {
1432  if (myData)
1433  {
1434  if (!myIsReference)
1435  utStrFree(myData);
1436  myData = 0;
1437  }
1438 }
1439 
1440 inline void
1442 {
1443  // We can't use UTswap because it doesn't work with bit fields.
1444  bool temp = myIsReference;
1445  myIsReference = other.myIsReference;
1446  other.myIsReference = temp;
1447 
1448  char *tmp_data = myData;
1449  myData = other.myData;
1450  other.myData = tmp_data;
1451 
1452  if (myIsAlwaysDeep)
1453  harden();
1454 
1455  if (other.myIsAlwaysDeep)
1456  other.harden();
1457 }
1458 
1460 public:
1461  UT_String myOut; // Points to argument following '>'
1462  UT_String myErr; // Points to argument following '>&'
1463  UT_String myIn; // Points to argument following '<'
1464  short myDoubleOut; // If the argument is '>>' or '>>&'
1465  short myDoubleIn; // If the argument is '<<'
1466 };
1467 
1468 UT_API std::ostream & do_setformat(std::ostream &os, const char fmt[]);
1469 
1470 /// Does a "smart" string compare which will sort based on numbered names.
1471 /// That is "text20" is bigger than "text3". In a strictly alphanumeric
1472 /// comparison, this would not be the case.
1474 {
1475  bool operator()(const char *s1, const char *s2) const
1476  {
1477  return UT_String::compareNumberedString(s1, s2) < 0;
1478  }
1479 
1480  bool operator()(const std::string &s1, const std::string &s2) const
1481  {
1482  return operator()(s1.c_str(), s2.c_str());
1483  }
1484 };
1485 
1486 #endif
bool match(const char *pattern, bool case_sensitive=true) const
int tokenize(char *argv[], int max_args, const char *separators=" \t\n")
Definition: UT_String.h:834
UT_String & operator+=(const char *str)
Definition: UT_String.h:332
static SYS_FORCE_INLINE uint32 hash(const char *str, uint32 code=0)
Definition: UT_String.h:890
int distance(const char *str, bool case_sensitive=true, bool allow_subst=true) const
char * lastChar(int c)
Definition: UT_String.h:564
GLboolean GLboolean GLboolean b
Definition: glcorearb.h:1222
bool operator!=(const char *str) const
Definition: UT_String.h:417
UT_String & operator+=(const UT_String &str)
Definition: UT_String.h:364
UT_API void normalizePath(UT_String &file_path, bool want_marker=false, bool always_want_expanded_path=false)
bool operator>=(const UT_StringRef &str) const
Definition: UT_String.h:473
int count(const char *str, bool case_sensitive=true) const
Count the occurrences of the string.
bool matchFileExtension(const char *match_extension) const
Definition: UT_String.h:647
void swap(UT_String &other)
Definition: UT_String.h:1441
void saveAscii(UT_OStream &os) const
Definition: UT_String.h:292
bool operator()(const char *s1, const char *s2) const
Definition: UT_String.h:1475
T negative(const T &val)
Return the unary negation of the given value.
Definition: Math.h:127
const char * lastChar(int c) const
Definition: UT_String.h:1375
bool isInteger(bool skip_spaces=false) const
Determine if string can be seen as a single integer number.
bool operator<=(const char *str) const
Definition: UT_String.h:441
UT_String myIn
Definition: UT_String.h:1463
fpreal toFloat() const
bool operator==(const char *str) const
Definition: UT_String.h:405
bool operator<=(const UT_String &str) const
Definition: UT_String.h:445
int toInt() const
char * fileExtension()
Definition: UT_String.h:630
const GLuint GLenum const void * binary
Definition: glcorearb.h:1924
const GLfloat * c
Definition: glew.h:16631
bool isHard() const
Returns whether this string is hardened already.
Definition: UT_String.h:235
GLsizei const GLchar *const * path
Definition: glcorearb.h:3341
SYS_FORCE_INLINE T * SYSconst_cast(const T *foo)
Definition: SYS_Types.h:136
UT_String makeQuotedString(char delimiter='\'', bool escape_nonprinting=false) const
const char * findChar(const char *str) const
Definition: UT_String.h:558
int64 exint
Definition: SYS_Types.h:125
void swap(T &lhs, T &rhs)
Definition: pugixml.cpp:7172
void write(unsigned i, char c)
Definition: UT_String.h:517
bool operator==(const UT_String &str) const
Definition: UT_String.h:409
#define UT_API
Definition: UT_API.h:14
const char * fileExtension() const
Definition: UT_String.h:637
const char * data() const
Definition: UT_String.h:500
bool isAbsolutePath(bool file_path=false) const
bool findString(const char *str, bool fullword, bool usewildcards) const
GLuint const GLchar * name
Definition: glcorearb.h:786
char * findChar(int c)
Definition: UT_String.h:548
#define SYS_EXINT_MAX
Definition: SYS_Types.h:181
char & operator()(unsigned i)
Definition: UT_String.h:510
bool equal(const char *str, bool case_sensitive=true) const
Definition: UT_String.h:392
const char * findNonSpace() const
Definition: UT_String.h:1374
GLenum src
Definition: glcorearb.h:1793
unsigned long long uint64
Definition: SYS_Types.h:117
int compare(const char *str, bool case_sensitive=true) const
Definition: UT_String.h:372
GLuint buffer
Definition: glcorearb.h:660
void clear()
Reset the string to the default constructor.
Definition: UT_String.h:302
bool isAlwaysDeep() const
Definition: UT_String.h:197
const char * c_str() const
Definition: UT_String.h:498
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat s1
Definition: glew.h:12900
bool matchPath(const char *pattern, bool case_sensitive=true, bool *excludes_branch=nullptr) const
unsigned length() const
Return length of string.
Definition: UT_String.h:536
int compare(const UT_String &str, bool case_sensitive=true) const
Definition: UT_String.h:386
const char * suffix() const
bool operator<(const char *str) const
Definition: UT_String.h:429
GLsizeiptr size
Definition: glcorearb.h:664
bool operator<(const UT_StringRef &str) const
Definition: UT_String.h:437
UT_API void UTexprLookup(const char *name, UT_String &result)
bool contains(const char *pattern, bool case_sensitive=true) const
int tokenize(UT_WorkArgs &argv, const char *separators=" \t\n")
Definition: UT_String.h:842
#define SYS_DEPRECATED_REPLACE(__V__, __R__)
std::ostream & operator<<(std::ostream &ostr, const DataType &a)
Definition: DataType.h:133
UT_String(UT_AlwaysDeepType, const std::string &str)
Construct UT_String from a std::string, using ALWAYS_DEEP semantics.
Definition: UT_String.h:144
void hardenIfNeeded(const char *s)
Take shallow copy and make it deep.
Definition: UT_String.h:225
const char * buffer() const
Definition: UT_String.h:499
CompareResults OIIO_API compare(const ImageBuf &A, const ImageBuf &B, float failthresh, float warnthresh, ROI roi={}, int nthreads=0)
A utility class to do read-only operations on a subset of an existing string.
Definition: UT_StringView.h:40
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool isEmpty() const
Returns true if the string is empty.
SYS_FORCE_INLINE uint32 hash() const
Definition: UT_String.h:882
GLuint64EXT * result
Definition: glew.h:14311
bool operator==(const UT_StringRef &str) const
Definition: UT_String.h:413
Definition: core.h:760
char operator()(unsigned i) const
Definition: UT_String.h:1371
int tokenize(char *argv[], int max_args, char separator)
Definition: UT_String.h:822
bool operator>=(const char *str) const
Definition: UT_String.h:465
UT_String & operator=(UT_String &&str)
Definition: UT_String.h:168
SYS_NO_DISCARD_RESULT UT_StringView UTstringFileName(const StringT &str)
int tokenizeInPlace(T &list, const char *separators=" \t\n")
Definition: UT_String.h:858
OIIO_FORCEINLINE const vint4 & operator+=(vint4 &a, const vint4 &b)
Definition: simd.h:4369
GLfloat GLfloat GLfloat v2
Definition: glcorearb.h:818
#define SYS_SAFE_BOOL
Definition: SYS_Compiler.h:55
GLsizei GLsizei GLfloat distance
Definition: glew.h:13923
bool operator!=(const UT_String &str) const
Definition: UT_String.h:421
GLenum GLsizei len
Definition: glew.h:7782
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:152
bool operator>=(const UT_String &str) const
Definition: UT_String.h:469
#define SYS_PRINTF_CHECK_ATTRIBUTE(string_index, first_to_check)
Definition: SYS_Types.h:447
char * findNonSpace()
std::string OIIO_UTIL_API replace(string_view str, string_view pattern, string_view replacement, bool global=false)
GLboolean GLboolean GLboolean GLboolean a
Definition: glcorearb.h:1222
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
GLsizei const GLchar *const * string
Definition: glcorearb.h:814
GLuint num
Definition: glew.h:2695
bool operator>(const UT_String &str) const
Definition: UT_String.h:457
char * SYSstrtok(char *string, const char *delimit, char **context)
Definition: SYS_String.h:106
char * findChar(const char *str)
Definition: UT_String.h:556
#define UT_ASSERT_SLOW(ZZ)
Definition: UT_Assert.h:151
const char * findChar(int c) const
Definition: UT_String.h:550
SYS_FORCE_INLINE UT_String(const char *str=0)
Construct UT_String from a C string, using shallow semantics.
Definition: UT_String.h:90
void harden()
Take shallow copy and make it deep.
Definition: UT_String.h:206
void saveAscii(std::ostream &os) const
Definition: UT_String.h:291
bool equal(const UT_StringRef &str, bool case_sensitive=true) const
Definition: UT_String.h:400
UT_String(UT_String &&str) noexcept
Definition: UT_String.h:160
long long int64
Definition: SYS_Types.h:116
bool equal(const UT_String &str, bool case_sensitive=true) const
Definition: UT_String.h:396
typedef int(WINAPI *PFNWGLRELEASEPBUFFERDCARBPROC)(HPBUFFERARB hPbuffer
void setAlwaysDeep(bool deep)
Make a string always deep.
Definition: UT_String.h:181
bool operator>(const UT_StringRef &str) const
Definition: UT_String.h:461
GLuint const GLuint * names
Definition: glew.h:2695
GLint GLsizei count
Definition: glcorearb.h:405
const char * findChar(const char *str) const
Definition: UT_String.h:1373
bool matchFile(const char *pattern) const
bool operator()(const std::string &s1, const std::string &s2) const
Definition: UT_String.h:1480
int eraseHead(int len)
Definition: UT_String.h:1078
void toUpper()
Definition: UT_String.h:602
void adopt(UT_String &str)
Definition: UT_String.h:278
SYS_FORCE_INLINE ~UT_StringWrap()
Definition: UT_String.h:1366
const char * findWord(const char *word) const
bool operator>(const char *str) const
Definition: UT_String.h:453
int64 getMemoryUsage(bool inclusive=true) const
Return memory usage in bytes.
Definition: UT_String.h:540
void saveBinary(std::ostream &os) const
Save string to binary stream.
Definition: UT_String.h:287
bool isFloat(bool skip_spaces=false, bool loose=false, bool allow_underscore=false) const
Determine if string can be seen as a single floating point number.
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition: glcorearb.h:2540
GLubyte * pattern
Definition: glew.h:5741
bool isValidVariableName(const char *safechars=NULL) const
static int compareNumberedString(const char *s1, const char *s2, bool case_sensitive=true, bool allow_negatives=false)
short myDoubleIn
Definition: UT_String.h:1465
GLuint GLsizei GLsizei * length
Definition: glcorearb.h:795
GLint GLint GLsizei GLint GLenum format
Definition: glcorearb.h:108
void adopt(char *s)
Definition: UT_String.h:268
GLuint GLfloat * val
Definition: glcorearb.h:1608
UT_String pathUpToExtension() const
SYS_NO_DISCARD_RESULT UT_StringView UTstringFileExtension(const StringT &str)
GLenum func
Definition: glcorearb.h:783
int substr(UT_String &buf, int index, int len=0) const
SYS_NO_DISCARD_RESULT bool UTstringMatchFileExtension(const StringT &str, const char *extension)
void save(std::ostream &os, bool binary) const
Save string to stream. Saves as binary if binary is true.
short myDoubleOut
Definition: UT_String.h:1464
fpreal64 fpreal
Definition: SYS_Types.h:277
int parse(UT_StringArray &argv, int start_arg=0, const char *quotes="\"'", bool keep_quotes=false)
Definition: UT_String.h:805
bool multiMatch(const char *pattern, bool case_sensitive, char separator) const
char * steal()
Definition: UT_String.h:246
char * SYSstrcasestr(const char *haystack, const char *needle)
Replacement for strcasestr, since no equivalent exists on Win32.
Definition: SYS_String.h:290
GLuint index
Definition: glcorearb.h:786
bool multiMatchRecord(const char *pattern, int maxpatterns, char *singles, int &nsingles, char **words, int &nwords, bool case_sensitive=true, const char *separators=", ") const
int parseNumberedFilename(UT_String &prefix, UT_String &frame, UT_String &suff, bool negative=true, bool fractional=false) const
UT_AlwaysDeepType
Definition: UT_String.h:78
auto ptr(T p) -> const void *
Definition: format.h:2448
ImageBuf OIIO_API max(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
**If you just want to fire and args
Definition: thread.h:609
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const_iterator begin() const
Returns a constant iterator pointing to the beginning of the string.
unsigned int uint32
Definition: SYS_Types.h:40
const char * lastChar(int c) const
Definition: UT_String.h:566
UT_String myOut
Definition: UT_String.h:1461
UT_String myErr
Definition: UT_String.h:1462
bool isstring() const
Definition: UT_String.h:681
int findLongestCommonSuffix(const char *with) const
void hardenIfNeeded()
Take shallow copy and make it deep.
Definition: UT_String.h:215
const char * findChar(int c) const
Definition: UT_String.h:1372
int parse(char *argv[], int max_args, const char *quotes="\"'", bool keep_quotes=false)
Definition: UT_String.h:793
UT_String(UT_AlwaysDeepType, const char *str=0)
Construct UT_String from a C string, using ALWAYS_DEEP semantics.
Definition: UT_String.h:137
GLintptr offset
Definition: glcorearb.h:665
bool operator<(const UT_String &str) const
Definition: UT_String.h:433
int erase(int pos=0, int len=-1)
Definition: UT_String.h:1086
#define const
Definition: zconf.h:214
GLfloat GLfloat v1
Definition: glcorearb.h:817
int tokenize(UT_WorkArgs &argv, char separator)
Definition: UT_String.h:828
auto sprintf(const S &fmt, const T &...args) -> std::basic_string< Char >
Definition: printf.h:574
string_view OIIO_UTIL_API strip(string_view str, string_view chars=string_view())
bool operator<=(const UT_StringRef &str) const
Definition: UT_String.h:449
bool startsWith(const UT_StringView &prefix, bool case_sensitive=true) const
void splitPath(UT_String &dir_name, UT_String &file_name) const
char operator()(unsigned i) const
Definition: UT_String.h:503
bool OIIO_UTIL_API contains(string_view a, string_view b)
Does 'a' contain the string 'b' within it?
int parse(UT_WorkArgs &argv, int start_arg=0, const char *quotes="\"'", bool keep_quotes=false)
Definition: UT_String.h:799
const char * base(UT_String &buf) const
GLdouble s
Definition: glew.h:1395
void removeLast()
Remove the last character.
Definition: UT_String.h:318
bool all(const vbool4 &v)
Definition: simd.h:3467
UT_API void UTvarLookup(const char *name, UT_String &result)
SYS_FORCE_INLINE UT_StringWrap(const char *str)
Definition: UT_String.h:1360
bool endsWith(const UT_StringView &suffix, bool case_sensitive=true) const
UT_String(const std::string &str)
Construct UT_String from a std::string, always doing a deep copy. The result will only be a UT_Always...
Definition: UT_String.h:115
int eraseTail(int len)
Definition: UT_String.h:1082
const char * fileName() const
Definition: UT_String.h:623
OIIO_UTIL_API std::string extension(string_view filepath, bool include_dot=true) noexcept
Definition: format.h:895
int countChar(int c) const
Return the number of occurrences of the specified character.
UT_API std::ostream & do_setformat(std::ostream &os, const char fmt[])
int tokenize(T &list, const char *separators=" \t\n")
Definition: UT_String.h:851
FMT_CONSTEXPR auto find(Ptr first, Ptr last, T value, Ptr &out) -> bool
Definition: core.h:2089
const char * nonNullBuffer() const
Definition: UT_String.h:501
void toLower()
Definition: UT_String.h:609
int insert(int pos, const char *str)
Definition: UT_String.h:1094
const char * fcontain(const char *pattern, bool case_sensitive=true) const
Definition: UT_String.h:987
bool operator!=(const UT_StringRef &str) const
Definition: UT_String.h:425