HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_String.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  *
7  * NAME: Utility Library (C++)
8  *
9  * COMMENTS: String class
10  *
11  */
12 
13 #ifndef __UT_String_h__
14 #define __UT_String_h__
15 
16 #include "UT_API.h"
17 
18 #include "UT_Assert.h"
19 #include "UT_VectorTypes.h"
20 #include "UT_StringView.h"
21 #include "UT_StringUtils.h"
22 
23 #include <SYS/SYS_Compiler.h>
24 #include <SYS/SYS_Deprecated.h>
25 #include <SYS/SYS_Inline.h>
26 #include <SYS/SYS_String.h>
27 #include <SYS/SYS_Types.h>
28 
29 #include <iosfwd>
30 #include <string>
31 #include <utility>
32 
33 #include <ctype.h>
34 #include <stdlib.h>
35 #include <string.h>
36 
37 #ifdef WIN32
38  #define strcasecmp stricmp
39  #define strncasecmp strnicmp
40 #endif
41 
42 class UT_OStream;
43 class UT_String;
44 class UT_StringCshIO;
45 class UT_WorkArgs;
46 class UT_IStream;
47 class ut_PatternRecord;
48 class UT_StringMMPattern;
49 class UT_StringArray;
50 class UT_StringHolder;
51 class UT_StringRef;
52 
53 // The following lookup functions are used by cshParse. By default,
54 // varLookup simply uses getenv, exprLookup opens the command as
55 // a pipe and uses the result.
56 UT_API extern void UTvarLookup(const char *name, UT_String &result);
57 UT_API extern void UTexprLookup(const char *name, UT_String &result);
58 
59 /// @file
60 /// @class UT_String
61 ///
62 /// UT_String is a string class that support two different types of assignment
63 /// semantics:
64 /// - Shallow (default): Just reference the given string and NOT take
65 /// ownership.
66 /// - Deep: Make a copy of the given string, taking ownership in the
67 /// process (aka it making it "hard").
68 ///
69 /// If UT_String::harden() is called, or any other UT_String method that
70 /// requires modifying the string, it will make a copy of its reference pointer
71 /// (and take ownership) first.
72 ///
74 {
75 public:
76 
77  /// UT_String can be constructed with UT_String::ALWAYS_DEEP to create an
78  /// object that will always perform deep copies when assigned to.
79  enum UT_AlwaysDeepType { ALWAYS_DEEP };
80 
81  /// @brief Construct UT_String from a C string, using shallow semantics
82  ///
83  /// @param str The initial string.
85  UT_String(const char *str = nullptr)
86  : myData(SYSconst_cast(str))
87  , myIsReference(true)
88  , myIsAlwaysDeep(false)
89  {}
90 
91  /// @brief Construct UT_String from a C string, using shallow semantics
92  ///
93  /// @param str The initial string.
94  /// @param deep_copy If true, a copy of @em str will be used.
95  /// @param len Number of characters to use from @em str. Use -1 to
96  /// use the entire string. If len is non-negative, then
97  /// deepCopy will be implicitly set to true. If str is NULL
98  /// and len is non-negative, then it will be initialized
99  /// with "".
100  UT_String(const char *str, bool deep_copy, int len = -1);
101  // Prohibit accidents when converting to UT_StringHolder like:
102  // myString(buffer, /*deep*/1)
103  // where the 1 becomes a length. Note this also catches anyone who
104  // tried
105  // myString(buffer, UT_String::AlwaysDeep) as that should be first.
106  UT_String(const char *data, int bad) = delete;
107 
108 
109  /// @brief Construct UT_String from a std::string, always doing
110  /// a deep copy. The result will only be a UT_AlwaysDeep if the
111  /// appropriate version is used, however!
112  ///
113  /// NOTE: You cannot do:
114  /// UT_String foo;
115  /// std::string bar = "hello world";
116  /// foo = UT_String(bar.substr(2, 5));
117  ///
118  /// It provides an shortcut for constructing a UT_String from a function
119  /// that returns a std::string by value. For example, it lets you write
120  /// @code
121  /// UT_String str(func());
122  /// @endcode
123  /// instead of
124  /// @code
125  /// UT_String str(func().c_str(), /*harden=*/true);
126  /// @endcode
127  explicit UT_String(const std::string &str)
128  : myIsReference(false),
129  myIsAlwaysDeep(false)
130  { myData = strdup(str.c_str()); }
131 
132  /// @brief Construct UT_String from a UT_StringHolder.
133  /// This always duplicates and uses ALWAYS_DEEP semantics.
134  explicit UT_String(const UT_StringHolder &str);
135 
136  /// @brief Construct UT_String from a UT_StringHolder rvalue with
137  /// ALWAYS_DEEP semantics.
138  explicit UT_String(UT_StringHolder &&str);
139 
140 private:
141  /// This is intentionally not implemented - callers should choose between
142  /// the const char * and UT_StringHolder constructors, depending on whether
143  /// they want to make a deep copy.
144  /// @see UT_StringWrap.
145  UT_String(const UT_StringRef &);
146 
147 public:
148  /// @brief Construct UT_String from a UT_StringView.
149  /// This always duplicates and uses ALWAYS_DEEP semantics.
150  explicit UT_String(const UT_StringView &sv);
151 
152  /// @brief Construct UT_String from a C string, using ALWAYS_DEEP semantics
153  UT_String(UT_AlwaysDeepType, const char *str = nullptr)
154  : myIsReference(false),
155  myIsAlwaysDeep(true)
156  { myData = str ? strdup(str) : nullptr; }
157 
158  /// @brief Construct UT_String from a std::string, using ALWAYS_DEEP
159  /// semantics
160  UT_String(UT_AlwaysDeepType, const std::string &str)
161  : myIsReference(false),
162  myIsAlwaysDeep(true)
163  { myData = strdup(str.c_str()); }
164 
165  /// Copy constructor
166  ///
167  /// If the string we're copying from is ALWAYS_DEEP, then this object will
168  /// also become ALWAYS_DEEP. This way, you can pass/return a string by
169  /// value.
170  UT_String(const UT_String &str);
171 
172  ~UT_String();
173 
174  /// Move operators
175  /// @{
176  UT_String(UT_String &&str) noexcept
177  : myData(str.myData)
178  , myIsReference(str.myIsReference)
179  , myIsAlwaysDeep(str.myIsAlwaysDeep)
180  {
181  str.myData = nullptr;
182  str.myIsReference = !str.myIsAlwaysDeep;
183  }
184  UT_String &operator=(UT_String &&str) noexcept
185  {
186  freeData();
187  myData = str.myData;
188  myIsReference = str.myIsReference;
189  myIsAlwaysDeep = str.myIsAlwaysDeep;
190  str.myData = nullptr;
191  str.myIsReference = !str.myIsAlwaysDeep;
192  return *this;
193  }
194  /// @}
195 
196  /// Make a string always deep
197  void setAlwaysDeep(bool deep)
198  {
199  myIsAlwaysDeep = deep;
200  if (deep && myIsReference)
201  {
202  if (myData != nullptr)
203  harden();
204  else
205  {
206  // This takes the same semantic as
207  // str = NULL;
208  // where str is an always deep string
209  myIsReference = false;
210  }
211  }
212  }
213  bool isAlwaysDeep() const
214  {
215  return myIsAlwaysDeep;
216  }
217 
218  void swap( UT_String &other );
219 
220  /// Take shallow copy and make it deep.
221  // @{
222  void harden()
223  {
224  if (!myIsReference && myData)
225  return;
226  myData = strdup(myData ? myData : "");
227  myIsReference = false;
228  }
229 
230  void harden(const char *s, int len = -1);
232  {
233  if (myIsReference)
234  {
235  if (isstring())
236  harden();
237  else
238  *this = "";
239  }
240  }
241  void hardenIfNeeded(const char *s)
242  {
243  if (s && *s)
244  harden(s);
245  else
246  *this = "";
247  }
248  // @}
249 
250  /// Returns whether this string is hardened already.
251  bool isHard() const { return !myIsReference; }
252 
253  /// Give up ownership of string
254  ///
255  /// Take a hard reference and make it shallow. This method makes sure
256  /// it gives back something you can delete, because this UT_String is
257  /// taking its hands off the data. Use it with care since it may lead
258  /// to memory leaks if, for example, you harden it again later.
259  ///
260  /// In the case of ALWAYS_DEEP strings, this is disallowed so it will
261  /// just return a copy of the data.
262  char * steal()
263  {
264  if (!myIsAlwaysDeep)
265  {
266  if (myIsReference)
267  myData = strdup(myData ? myData : ""); // harden
268  myIsReference = true; // but say it's soft
269  return myData;
270  }
271  else
272  {
273  // return a new copy of the data without releasing
274  // ownership for always deep strings
275  return strdup(myData ? myData : "");
276  }
277  }
278 
279  /// Take ownership of given string
280  ///
281  /// adopt() is the opposite of steal(). Basically, you're giving
282  /// the UT_String ownership of the string.
283  // @{
284  void adopt(char *s)
285  {
286  if (!myIsReference)
287  {
288  if (s != myData)
289  utStrFree(myData);
290  }
291  myData = s;
292  myIsReference = false;
293  }
294  void adopt(UT_String &str)
295  {
296  adopt(str.steal());
297  }
298  void adopt(UT_StringHolder &holder);
299 
300  // @}
301 
302  /// Save string to binary stream.
303  void saveBinary(std::ostream &os) const { save(os, true); }
304 
305  /// Save string to ASCII stream. This will add double quotes and escape to
306  /// the stream if necessary (empty string or contains spaces).
307  void saveAscii(std::ostream &os) const { save(os, false); }
308  void saveAscii(UT_OStream &os) const { save(os, false); }
309 
310  /// Save string to stream. Saves as binary if @em binary is true.
311  void save(std::ostream &os, bool binary) const;
312  void save(UT_OStream &os, bool binary) const;
313 
314  /// Load string from stream. Use is.eof() to check eof status
315  bool load(UT_IStream &is);
316 
317  /// Reset the string to the default constructor.
318  void clear()
319  { *this = (const char *)nullptr; }
320 
321  /// Prepend a string (or character)
322  // @{
323  void prepend(const char *prefix);
324  void prepend(char ch);
325  // @}
326 
327  /// Append a character
328  void append(char ch);
329 
330  /// Append a string or a section of a string.
331  void append(const char *str, exint len = -1);
332 
333  /// Remove the last character
334  void removeLast() { truncate(length()-1); }
335  /// Truncate the string at the Nth character
336  void truncate(exint len);
337 
338  UT_String &operator=(const UT_String &str);
339  UT_String &operator=(const char *str);
340  UT_String &operator=(const std::string &str);
341  UT_String &operator=(const UT_StringHolder &str);
343  UT_String &operator=(const UT_StringView &str);
344 private:
345  /// Not implemented - see UT_String(const UT_StringRef &).
347 
348 public:
349  UT_String &operator+=(const char *str)
350  {
351  if (!isstring())
352  {
353  // We are an empty string, so we merely copy
354  // the incoming string rather than trying to append
355  // to it.
356  harden(str);
357  }
358  else
359  {
360  bool same = (str == myData);
361  harden();
362  if (str)
363  {
364  int mylen = (int)strlen(myData);
365  myData = (char *)realloc(myData,
366  mylen+strlen(str)+1);
367  if (!same)
368  {
369  strcpy(&myData[mylen], str);
370  }
371  else
372  {
373  memcpy(myData + mylen, myData, mylen);
374  myData[mylen * 2] = '\0';
375  }
376  }
377  }
378  return *this;
379  }
380 
382  {
383  *this += (const char *)str.myData;
384  return *this;
385  }
386  UT_String &operator+=(const UT_StringRef &str);
387 
388  // Basic equality functions and operators
389  int compare(const char *str, bool case_sensitive=true) const
390  {
391  // Unlike std::string, UT_String treats NULL and
392  // the empty string as distinct (empty has precedence).
393  if (myData == nullptr || str == nullptr)
394  {
395  if (myData) return 1;
396  if(str) return -1;
397  return 0;
398  }
399  if (case_sensitive)
400  return strcmp(myData, str);
401  return strcasecmp(myData, str);
402  }
403  int compare(const UT_String &str, bool case_sensitive=true) const
404  {
405  return compare(str.myData,case_sensitive);
406  }
407  int compare(const UT_StringRef &str, bool case_sensitive=true) const;
408 
409  bool equal(const char *str, bool case_sensitive=true) const
410  {
411  return compare(str,case_sensitive)==0;
412  }
413  bool equal(const UT_String &str, bool case_sensitive=true) const
414  {
415  return compare(str.myData,case_sensitive)==0;
416  }
417  bool equal(const UT_StringRef &str, bool case_sensitive=true) const
418  {
419  return compare(str,case_sensitive)==0;
420  }
421 
422  bool operator==(const char *str) const
423  {
424  return compare(str)==0;
425  }
426  bool operator==(const UT_String &str) const
427  {
428  return compare(str.myData)==0;
429  }
430  bool operator==(const UT_StringRef &str) const
431  {
432  return compare(str)==0;
433  }
434  bool operator!=(const char *str) const
435  {
436  return compare(str)!=0;
437  }
438  bool operator!=(const UT_String &str) const
439  {
440  return compare(str.myData)!=0;
441  }
442  bool operator!=(const UT_StringRef &str) const
443  {
444  return compare(str)!=0;
445  }
446  bool operator<(const char *str) const
447  {
448  return compare(str)<0;
449  }
450  bool operator<(const UT_String &str) const
451  {
452  return compare(str.myData)<0;
453  }
454  bool operator<(const UT_StringRef &str) const
455  {
456  return compare(str)<0;
457  }
458  bool operator<=(const char *str) const
459  {
460  return compare(str)<=0;
461  }
462  bool operator<=(const UT_String &str) const
463  {
464  return compare(str.myData)<=0;
465  }
466  bool operator<=(const UT_StringRef &str) const
467  {
468  return compare(str)<=0;
469  }
470  bool operator>(const char *str) const
471  {
472  return compare(str)>0;
473  }
474  bool operator>(const UT_String &str) const
475  {
476  return compare(str.myData)>0;
477  }
478  bool operator>(const UT_StringRef &str) const
479  {
480  return compare(str)>0;
481  }
482  bool operator>=(const char *str) const
483  {
484  return compare(str)>=0;
485  }
486  bool operator>=(const UT_String &str) const
487  {
488  return compare(str.myData)>=0;
489  }
490  bool operator>=(const UT_StringRef &str) const
491  {
492  return compare(str)>=0;
493  }
494 
495  /// Test whether the string is defined or not
496  SYS_SAFE_BOOL operator bool() const { return isstring(); }
497 
498  /// Return the edit distance between two strings.
499  /// See http://en.wikipedia.org/wiki/Levenshtein_distance for details.
500  /// allow_subst controls whether a substitution of a character with
501  /// another is a single operation, rather than two operations of
502  /// insert and delete.
503  int distance(const char *str,
504  bool case_sensitive = true,
505  bool allow_subst = true) const;
506 
507  operator const char *() const
508  { return (const char *)myData; }
509  operator char *()
510  { return myData; }
511 
512  operator UT_StringView() const
513  { return UT_StringView(myData); }
514 
515  const char *c_str() const { return buffer(); }
516  const char *buffer() const { return myData; }
517  const char *data() const { return buffer(); }
518  const char *nonNullBuffer() const { return myData ? myData : ""; }
519 
520  char operator()(unsigned i) const
521  {
522  UT_ASSERT_P( isstring() );
523  UT_ASSERT_SLOW(i <= strlen(myData));
524  return myData[i];
525  }
526 
527  char &operator()(unsigned i)
528  {
529  harden();
530  return myData[i];
531  }
532 
533  // Prefer using write() since ideally the non-const operator() is removed
534  inline void write(unsigned i, char c)
535  {
536  hardenIfNeeded();
537  myData[i] = c;
538  }
539 
540  int toInt() const;
541  fpreal toFloat() const;
542 
543  /// Converts the contents of this UT_String to a std::string. Note that
544  /// std::string can't be constructed with a null pointer, so you can't
545  /// just write std::string s = ut_string.buffer();
546  std::string toStdString() const;
547 
548  //
549  // Here, we're finished with operators
550  //
551 
552  /// Return length of string
553  unsigned length() const
554  { return (myData) ? (unsigned)strlen(myData) : 0; }
555 
556  /// Return memory usage in bytes
557  int64 getMemoryUsage(bool inclusive=true) const
558  {
559  return (inclusive ? sizeof(*this) : 0)
560  + (!myIsReference ? (length() + 1)*sizeof(char) : 0);
561  }
562 
563  /// Find first occurrance of character. Returns NULL upon failure.
564  /// @{
565  char *findChar(int c)
566  { return myData ? strchr(myData, c) : nullptr; }
567  const char *findChar(int c) const
568  { return SYSconst_cast(*this).findChar(c); }
569  /// @}
570 
571  /// Find first occurrance of any character in @em str
572  /// @{
573  char *findChar(const char *str)
574  { return myData ? strpbrk(myData, str) : nullptr; }
575  const char *findChar(const char *str) const
576  { return SYSconst_cast(*this).findChar(str); }
577  /// @}
578 
579  /// Find last occurance of character
580  /// @{
581  char *lastChar(int c)
582  { return myData ? strrchr(myData, c) : nullptr; }
583  const char *lastChar(int c) const
584  { return SYSconst_cast(*this).lastChar(c); }
585  /// @}
586 
587  /// Return the number of occurrences of the specified character.
588  int countChar(int c) const;
589 
590  /// Count the occurrences of the string
591  int count(const char *str, bool case_sensitive = true) const;
592 
593  char *findNonSpace();
594  const char *findNonSpace() const;
595  const char *findWord(const char *word) const;
596  bool findString(const char *str, bool fullword,
597  bool usewildcards) const;
598  int changeWord(const char *from, const char *to, bool all = true);
599  int changeString(const char *from, const char *to, bool fullword);
600  int changeQuotedWord(const char *from, const char *to,
601  int quote = '`', bool all = true);
602 
603  int findLongestCommonSuffix( const char *with ) const;
604 
605  /// Perform deep copy of the substring starting from @em index
606  /// for @em len characters into the specified UT_String.
607  /// If @em len is too long, then a substring starting from @em index to
608  /// the end of the string is copied.
609  /// Returns the length of the copied substring.
610  int substr(UT_String &buf, int index, int len=0) const;
611 
612  /// Determine if string can be seen as a single floating point number
613  bool isFloat(bool skip_spaces = false,
614  bool loose = false,
615  bool allow_underscore = false) const;
616  /// Determine if string can be seen as a single integer number
617  bool isInteger(bool skip_spaces = false) const;
618 
619  void toUpper()
620  {
621  char *ptr;
622  harden();
623  for (ptr=myData; *ptr; ptr++)
624  *ptr = (char)toupper(*ptr);
625  }
626  void toLower()
627  {
628  char *ptr;
629  harden();
630  for (ptr=myData; *ptr; ptr++)
631  *ptr = (char)tolower(*ptr);
632  }
633 
634 
635  /// Return last component of forward slash separated path string
636  ///
637  /// If there is a slash in the string, fileName() returns the string
638  /// starting after the slash. Otherwise, it returns the contents of
639  /// this string. Note that it returns a pointer into this string.
640  const char *fileName() const
641  {
642  UT_StringView file_name = UTstringFileName(*this);
643  return file_name.begin();
644  }
645  /// Return the extension of a file path string
646  /// @{
648  {
650  if (extension.isEmpty())
651  return nullptr;
652  return myData + (extension.begin() - myData);
653  }
654  const char *fileExtension() const
655  {
656  return SYSconst_cast(*this).fileExtension();
657  }
658  /// @}
659 
660  /// Return whether the file extension matches. The extension passed in
661  /// should include the '.' separator. For example: @code
662  /// matchFileExtension(".jpg")
663  /// @endcode
664  bool matchFileExtension(const char *match_extension) const
665  {
666  return UTstringMatchFileExtension(*this, match_extension);
667  }
668  /// Return path terminated just before the extension.
669  /// If the filename starts with '.' and no path is provided,
670  /// returns NULL
671  UT_String pathUpToExtension() const;
672 
673  /// Replace the file extension and return the new string
674  UT_String replaceExtension(const UT_String &new_ext) const;
675 
676  /// Split a path into @em dir_name and @em file_name, where @em file_name
677  /// is everything after the final slash (i.e. the same as fileName()).
678  /// Either part may be empty. Note that if the string starts with / and
679  /// only contains that one slash, the @em dir_name will be / and not blank.
680  /// @em dir_name and @em file_name will either be set to hardened strings
681  /// or an empty string.
682  void splitPath(UT_String &dir_name, UT_String &file_name) const;
683 
684  /// Decompose a filename into various parts
685  ///
686  /// parseNumberedFileName will breakup a filename into its various
687  /// parts: file = prefix$Fsuffix (note: suffix is
688  /// not the same as file extension.) 0 is returned if there is
689  /// no frame number. 'negative' allows -[frame] to be interpreted as a
690  /// negative number. 'fractional' allows [frame].[number] to be interpreted
691  /// as a fractional frame.
692  int parseNumberedFilename(UT_String &prefix,
693  UT_String &frame,
694  UT_String &suff,
695  bool negative = true,
696  bool fractional = false) const;
697 
698  bool isstring() const
699  { return (myData && *myData); }
700 
701  /// trimSpace() will remove all space characters (leading and following)
702  /// from a string. If the string consists of multiple words, the words will
703  /// be collapsed. The function returns 1 if space was trimmed.
704  int trimSpace(bool leave_single_space_between_words = false);
705 
706  /// A version of trimSpace() that only removes leading and following spaces
707  /// from a string, leaving any between words intact.
708  int trimBoundingSpace();
709 
710  /// strips out all characters found in 'chars'. The string length will be
711  /// reduced by the number of characters removed. The number of characters
712  /// removed is returned.
713  int strip(const char *chars);
714 
715  /// protectString() will modify the existing string to escape double quotes
716  /// and backslashes. It will only wrap the string in double quotes if
717  /// it has spaces in it. If 'protect_empty' is true, the string will
718  /// become '""', otherwise it will stay empty.
719  void protectString(bool protect_empty=false);
720 
721  /// If the char is a quote character `"` or `'` then make sure to protect
722  /// it by adding '\' before the quote character. If the character is not
723  /// a quote character then the character is simply added to the ostream.
724  static void protectString(std::ostream& os, char c);
725 
726  /// protectPreQuotePythonStringLiteral() will modify the existing string
727  // to escape any non-printing characters, backslashes, and instances of the
728  /// specified delimiter. Unlike protectString(), it will not wrap the
729  /// string in quotes.
730  void protectPreQuotePythonStringLiteral(char delimiter='\'');
731 
732  /// returns true if the string begins and ends with a (non-escaped) quote
733  /// 'delimiter'.
734  bool isQuotedString(char delimiter='\'') const;
735 
736  /// makeQuotedString() is similar to protectString() except it returns a
737  /// new string instead of changing this string, it does wrap the string
738  /// in quotes, and it lets you use either ' or " as the delimiter.
739  /// The quoted string can also be optionally be made to escape non-printing
740  /// characters. The string that's returned is UT_String::ALWAYS_DEEP.
741  UT_String makeQuotedString(char delimiter='\'',
742  bool escape_nonprinting=false) const;
743 
744  /// makeSmartQuotedString() will use either ' or " as the delimiter to
745  /// avoid escaped quotes, using the default delimiter if it doesn't
746  /// matter. The quoted string can also be optionally be made to escape
747  /// non-printing characters. The string that's returned is
748  /// UT_String::ALWAYS_DEEP.
749  UT_String makeSmartQuotedString(char default_delimiter='\'',
750  bool escape_nonprinting=false) const;
751 
752  /// Expands standard control sequences ('\\n', '\\r', '\\t', '\\0') to their
753  /// corresponding ASCII values (10, 13, 9, 0, respectively).
754  /// If the expand_extended flag is enabled, an extended expansion is enabled
755  /// which adds hexadecimal, decimal and Unicode control sequence expansion.
756  /// Any values resulting from that expansion, which are outside the standard
757  /// ASCII range, will be encoded as UTF8-encoded control points.
758  void expandControlSequences(bool expand_extended = false);
759 
760  bool hasWhiteSpace() const;
761 
762  void removeTrailingSpace();
763  void removeTrailingChars(char chr);
764 
765  void removeTrailingDigits();
766 
767  /// Parse string into array of arguments similar to csh.
768  ///
769  /// cshParse() does not need to harden the string. It does very robust
770  /// parsing in the style of csh. It actually does better parsing than
771  /// csh. Variable expansion & backquote expansion are done in the
772  /// correct order for the correct arguments. One caveat is that the
773  /// string cannot have \0377 (0xff) as a character in it.
774  ///
775  /// If there is an error in parsing, the error flag (if passed in) will be
776  /// set to:
777  /// 0 = no error
778  /// 1 = line too long
779  ///
780  /// To reconstruct the command line, use UT_Args::fillCommandLine().
781  ///
782  /// @{
783  int cshParse(char *argv[], int max_args,
784  void (*vlookup)(const char *, UT_String&)=UTvarLookup,
785  void (*elookup)(const char *, UT_String&)=UTexprLookup,
786  int *error = nullptr,
787  UT_StringCshIO *io = nullptr);
788  int cshParse(UT_WorkArgs &argv,
789  void (*vlookup)(const char *, UT_String&)=UTvarLookup,
790  void (*elookup)(const char *, UT_String&)=UTexprLookup,
791  int *error = nullptr,
792  UT_StringCshIO *io = nullptr);
793  /// @}
794 
795  /// dosParse() uses the semi-braindead approach of ms-dos to argument
796  /// parsing. That is, arguments are separated by a double quote or space
797  /// (being a space or a tab). If 'preserve_backslashes' is set to
798  /// false (the default), back-slashes are passed through verbatim, unless
799  /// the following character is a double quote. Likewise, any pairs of
800  /// back-slashes preceding a double quote are turned into single
801  /// back-slashes.
802  ///
803  /// See also UTUTbuildDOSCommandLine() for reconstructing from arguments.
804  ///
805  /// @{
806  int dosParse(UT_WorkArgs &argv, bool preserve_backslashes=false);
807  int dosParse(char *argv[], int max_args,
808  bool preserve_backslashes=false);
809  /// Perform dos parsing modifying the buffer passed in. The args will be
810  /// stored as raw pointers into the given buffer
811  static int dosParse(char *buffer, UT_WorkArgs &args,
812  bool preserve_backslashes);
813  /// @}
814 
815  // parse will insert nulls into the string.
816  // NB: The argv array is null terminated, thus the effective
817  // maximum number of arguments is one less than maxArgs.
818  // NB: The maxArgs variants are all deprecated, use UT_WorkArgs
819  // instead.
820  int parse(char *argv[], int max_args,
821  const char *quotes = "\"'", bool keep_quotes = false)
822  {
823  harden();
824  return parseInPlace(argv, max_args, quotes, keep_quotes);
825  }
826  int parse(UT_WorkArgs &argv, int start_arg = 0,
827  const char *quotes = "\"'", bool keep_quotes = false)
828  {
829  harden();
830  return parseInPlace(argv, start_arg, quotes, keep_quotes);
831  }
832  int parse(UT_StringArray &argv, int start_arg = 0,
833  const char *quotes = "\"'", bool keep_quotes = false)
834  {
835  harden();
836  return parseInPlace(argv, start_arg, quotes, keep_quotes);
837  }
838  // Warning: the following methods insert nulls into the string without
839  // hardening.
840  int parseInPlace(char *argv[], int max_args,
841  const char *quotes = "\"'", bool keep_quotes = false);
842  int parseInPlace(UT_WorkArgs &argv, int start_arg = 0,
843  const char *quotes = "\"'", bool keep_quotes = false);
844  int parseInPlace(UT_StringArray &argv, int start_arg = 0,
845  const char *quotes = "\"'", bool keep_quotes = false);
846 
847  // Splits the string at specific separator characters. Unlike the parse
848  // methods, the tokenize methods ignore quoting completely.
849  int tokenize(char *argv[], int max_args, char separator)
850  {
851  harden();
852  return tokenizeInPlace(argv, max_args, separator);
853  }
854  int tokenizeInPlace(char *argv[], int max_args, char separator);
855  int tokenize(UT_WorkArgs &argv, char separator)
856  {
857  harden();
858  return tokenizeInPlace(argv, separator);
859  }
860  int tokenizeInPlace(UT_WorkArgs &argv, char separator);
861  int tokenize(char *argv[], int max_args,
862  const char *separators = " \t\n")
863  {
864  harden();
865  return tokenizeInPlace(argv, max_args, separators);
866  }
867  int tokenizeInPlace(char *argv[], int max_args,
868  const char *separators = " \t\n");
869  int tokenize(UT_WorkArgs &argv, const char *separators = " \t\n")
870  {
871  harden();
872  return tokenizeInPlace(argv, separators);
873  }
874  int tokenizeInPlace(UT_WorkArgs &argv,
875  const char *separators = " \t\n");
876 
877  template<typename T>
878  int tokenize(T &list, const char *separators = " \t\n")
879  {
880  harden();
881  return tokenizeInPlace(list, separators);
882  }
883 
884  template<typename T>
885  int tokenizeInPlace(T &list,
886  const char *separators = " \t\n")
887  {
888  char *token;
889  char *context;
890 
891  if (!isstring())
892  return 0;
893  if (!(token = SYSstrtok(myData, separators, &context)))
894  return 0;
895 
896  list.append(token);
897 
898  while ((token = SYSstrtok(nullptr, separators, &context))
899  != nullptr)
900  list.append(token);
901 
902  return list.entries();
903  }
904 
905 
906  // Replaces the contents with variables expanded.
907  void expandVariables();
908 
909  // Functions to hash a string
911  {
912  return hash(myData);
913  }
914 
915  // The code can be used for rudimentary hash chaining, but it is NOT
916  // the case that hash("def", hash("abc")) == hash("abcdef"), so there
917  // is little reason to use this rather than normal hash combiners.
918  static SYS_FORCE_INLINE uint32 hash(const char *str, uint32 code = 0)
919  {
920  return SYSstring_hashseed(
921  str, SYS_EXINT_MAX, code, /*allow_nulls*/ false);
922  }
923 
924  // This does pattern matching on a string. The pattern may include
925  // the following syntax:
926  // ? = match a single character
927  // * = match any number of characters
928  // [char_set] = matches any character in the set
929  bool match(const char *pattern, bool case_sensitive = true) const;
930 
931  // Similar to match() except it assumes that we're dealing with file paths
932  // so that it determines whether to do a case-sensitive match depending on
933  // the platform.
934  bool matchFile(const char *pattern) const;
935 
936  // Similar to match() but uses rsync style matching:
937  // * = match any number of characters up to a slash
938  // ** = match any number of characters, including a slash
939  bool matchPath(const char *pattern, bool case_sensitive = true,
940  bool *excludes_branch = nullptr) const;
941 
942  // multiMatch will actually check multiple patterns all separated
943  // by the separator character: i.e. geo1,geo2,foot*
944  //
945  // NOTE: No pattern or may contain the separator
946  bool multiMatch(const char *pattern,
947  bool case_sensitive, char separator) const;
948  bool multiMatch(const char *pattern, bool case_sensitive = true,
949  const char *separators = ", ",
950  bool *explicitly_excluded = nullptr,
951  int *match_index = nullptr,
952  ut_PatternRecord *pattern_record = nullptr) const;
953  bool multiMatch(const UT_StringMMPattern &pattern,
954  bool *explicitly_excluded = nullptr,
955  int *match_index = nullptr,
956  ut_PatternRecord *pattern_record = nullptr) const;
957 
958  // this method matches a pattern while recording any wildcard
959  // patterns used.
960  bool multiMatchRecord(const char *pattern, int maxpatterns,
961  char *singles, int &nsingles,
962  char **words, int &nwords,
963  bool case_sensitive = true,
964  const char *separators = ", ") const;
965  bool multiMatchRecord(const UT_StringMMPattern &pattern,
966  int maxpatterns,
967  char *singles, int &nsingles,
968  char **words, int &nwords) const;
969  bool multiMatchRecord(const char *pattern,
970  UT_StringHolder &singles,
971  UT_StringArray &words,
972  bool case_sensitive = true,
973  const char *separators = ", ") const;
974 
975  /// matchPattern(UT_WorkArgs &) assumes that the arguments contain the
976  /// components of a pattern to be matched against. The method returns
977  /// true if the pattern matches, false if it doesn't. This matching
978  /// process handles ^ expansion properly (and efficiently).
979  /// If the string doesn't match any components of the pattern, then the
980  /// assumed value is returned.
981  bool matchPattern(const UT_WorkArgs &pattern_args,
982  bool assume_match=false) const;
983 
984  static bool multiMatchCheck(const char *pattern);
985  static bool wildcardMatchCheck(const char *pattern);
986 
987  // Same as match but equivalent to "*pattern*"
988  bool contains(const char *pattern, bool case_sensitive=true) const;
989 
990  // Returns true if our string starts with the specified prefix.
991  bool startsWith(const UT_StringView &prefix,
992  bool case_sensitive = true) const;
993 
994  // Returns true if our string ends with the specified suffix.
995  bool endsWith(const UT_StringView &suffix,
996  bool case_sensitive = true) const;
997 
998  /// Pluralize an English noun ending (i.e. box->boxes or tube->tubes). The
999  /// ending must be lower case to be processed properly.
1000  void pluralize();
1001 
1002  // Will parse strings like 1-10:2,3 and call func for every element
1003  // implied. It will stop when the func returns 0 or the parsing
1004  // is complete, in which case it returns 1.
1005  // Parsing also allows secondary elements to be specified eg 3.4 0.12
1006  // The secfunc is used to find the maximum index of secondary elements
1007  // for each compound num. The elements are assumed to be
1008  // non-negative integers.
1009  int traversePattern(int max, void *data,
1010  int (*func)(int num, int sec, void *data),
1011  unsigned int (*secfunc)(int num,void *data)
1012  = nullptr,
1013  int offset=0) const;
1014 
1015  // Fast containment, assumes no special characters
1016  const char *fcontain(const char *pattern, bool case_sensitive=true) const
1017  {
1018  if (!myData)
1019  return nullptr;
1020  return case_sensitive ? strstr(myData, pattern)
1021  : SYSstrcasestr(myData, pattern);
1022  }
1023 
1024  // Given the match pattern which fits our contents, any assigned wildcards
1025  // are subsitituted. The wildcards may also be indexed.
1026  // Returns true if rename was successful.
1027  //
1028  // @note This code was adapted from CHOP_Rename::subPatterns() and
1029  // works the same way.
1030  //
1031  // eg. this = apple, match = a*le, replace = b* ---> bpp
1032  // this = a_to_b, match = *_to_*, replace = *(1)_to_*(0) ---> b_to_a
1033  bool patternRename(const char *match_pattern, const char *replace);
1034 
1035  // Given the name rule according to which a name consists of a base name
1036  // (char sequence ending in a non-digit) and a numerical suffix, the
1037  // following two methods return the base and the suffix respectively.
1038  // base() needs a string buffer and will return a const char* pointing to it.
1039  // base() always returns a non-zero pointer,
1040  // while suffix() returns 0 if no suffix is found.
1041  const char *base(UT_String &buf) const;
1042  const char *suffix() const;
1043 
1044  // incrementNumberedName will increment a name. If it has a numerical
1045  // suffix, that suffix is incremented. If not, "2" is appended to the
1046  // name. The preserve_padding parameter can be set to true so that zero
1047  // padding is preserved. Incrementing foo0009 will produce foo10 with
1048  // this parameter set to false, or foo0010 if it is set to true.
1049  void incrementNumberedName(bool preserve_padding = false);
1050 
1051  // setFormat is used to set how an outstream formats its ascii output.
1052  // So you can use printf style formatting. eg:
1053  // UT_String::setFormat(cout, "%08d") << 100;
1054  //
1055  // Note: Don't do:
1056  // cout << UT_String::setFormat(cout, "%08d") << 100;
1057  // ^^^^
1058  // Also: The formating changes (except for field width) are permanent,
1059  // so you'll have to reset them manually.
1060  //
1061  // TODO: A resetFormat, and a push/pop format pair.
1062  static std::ostream &setFormat(std::ostream &os, const char *fmt);
1063  std::ostream &setFormat(std::ostream &os);
1064 
1065  int replacePrefix(const char *oldpref,
1066  const char *newpref);
1067  int replaceSuffix(const char *oldsuffix,
1068  const char *newsuffix);
1069 
1070  // expandArrays will expand a series of tokens of the
1071  // form prefix[pattern]suffix into the names array
1072  //
1073  // Note: Each names[i] must be free'd after use
1074  // and label is used on the non-const parse method
1075  // NB: The max variants are all deprecated, use UT_WorkArgs
1076  // instead.
1077  int expandArrays(char *names[], int max);
1078 
1079  // This routine will ensure no line is over the specified
1080  // number of columns. Offending lines will be wrapped at
1081  // the first spaceChar or cut at exactly cols if spaceChar
1082  // is not found.
1083  // It returns one if any changes were done.
1084  // It currently treats tabs as single characters which should be
1085  // changed.
1086  // It will break words at hyphens if possible.
1087  int format(int cols);
1088 
1089  /// Replaces up to 'count' occurrences of 'find' with 'replacement',
1090  /// and returns the number of substitutions that occurred.
1091  /// If 'count' <= 0, all occurrences will be replaced.
1092  int substitute( const char *find, const char *replacement,
1093  exint count = -1);
1094 
1095  // This function replaces the character found with another character.
1096  int substitute( char find, char replacement, bool all = true );
1097 
1098  // this function removes the substring at pos and len, and inserts str
1099  // at pos. it returns the difference (new_length - old_length)
1100  int replace( int pos, int len, const char *str );
1101 
1102  // remove the first len characters of this string
1103  int eraseHead(int len)
1104  { return replace(0, len, ""); }
1105 
1106  // remove the last len characters of this string
1107  int eraseTail(int len)
1108  { return replace(length() - len, len, ""); }
1109 
1110  // remove the substring start at pos for len characters
1111  int erase(int pos = 0, int len = -1)
1112  {
1113  if (len < 0)
1114  len = length() - pos;
1115  return replace(pos, len, "");
1116  }
1117 
1118  // insert the given string at pos into this string
1119  int insert(int pos, const char *str)
1120  { return replace(pos, 0, str); }
1121 
1122  // Does a "smart" string compare which will sort based on numbered names.
1123  // That is "text20" is bigger than "text3". In a strictly alphanumeric
1124  // comparison, this would not be the case. Zero is only returned if both
1125  // strings are identical.
1126  static int compareNumberedString(const char *s1,
1127  const char *s2,
1128  bool case_sensitive=true,
1129  bool allow_negatives=false);
1130  static int qsortCmpNumberedString(const char *const*v1,
1131  const char *const*v2);
1132 
1133  // Like compare numbered strings, but it sorts better when there are
1134  // .ext extensions (i.e. it handles '.' as a special case)
1135  static int compareNumberedFilename(const char *s1,
1136  const char *s2,
1137  bool case_sensitive=false);
1138  static int qsortCmpNumberedFilename(const char *const*v1,
1139  const char *const*v2);
1140 
1141  // Like compare numbered strings, but allows special ordering of certain
1142  // characters that should always come first or last.
1143  static int compareNumberedStringWithExceptions(const char *s1,
1144  const char *s2,
1145  bool case_sensitive=false,
1146  bool allow_negatives=false,
1147  const char *sorted_first=nullptr,
1148  const char *sorted_last=nullptr);
1149 
1150  /// Compare two version strings which have numbered components separated by
1151  /// dots. eg. "X.Y.Z". Assumes the components go from most to least
1152  /// significant in left to right order.
1153  static int compareVersionString(const char *s1, const char *s2);
1154 
1155  /// Given a path, set the value of the string to the program name. For
1156  /// example: @code
1157  /// str.extractProgramName(argv[0]);
1158  /// str.extractProgramName("c:/Path/program.exe");
1159  /// str.extractProgramName("/usr/bin/program");
1160  /// @endcode
1161  /// This will extract the last path component. Program names may also have
1162  /// their extensions stripped. For example ".exe" on Windows and "-bin" to
1163  /// strip the Houdini wrappers on other platforms.
1164  ///
1165  /// @note The path should be normalized to have forward slashes as the path
1166  /// separator.
1167  void extractProgramName(const char *path,
1168  bool strip_extension=true,
1169  bool normalize_path=true);
1170 
1171  /// Given a path, check to see whether the program name matches the
1172  /// expected. For example: @code
1173  /// if (UT_String::matchProgramname(argv[0], "houdini"))
1174  /// if (UT_String::matchProgramname("c:/Path/houdini.exe", "houdini"))
1175  /// if (UT_String::matchProgramname("/usr/bin/houdini", "houdini"))
1176  /// @endcode
1177  /// The matching is always case-insensitive.
1178  ///
1179  /// @note The path should be normalized to have forward slashes as the path
1180  /// separator.
1181  static bool matchProgramName(const char *path, const char *expected,
1182  bool normalize_path=false);
1183 
1184  /// Convert a path to a "normalized" path. That is, all back-slashes will
1185  /// be converted to forward slashes. On some operating systems, this will
1186  /// leave the string unchanged.
1187  void normalizePath();
1188 
1189  // A very fast integer to string converter. This is faster (at least on
1190  // SGI) than using sprintf("%d"). About two to three times as fast. Both
1191  // of these methods return the length of the string generated.
1192  static int itoa(char *str, int64 i);
1193  static int utoa(char *str, uint64 i);
1194 
1195  // Versions of the above functions which set into this string object
1196  void itoa(int64 i);
1197  void utoa(uint64 i);
1198 
1199  // A reader-friendly version of itoa. This places commas appropriately
1200  // to ensure the person can pick out the kilo points easily.
1201  // This can handle numbers up to 999,999,999,999,999,999.
1202  void itoaPretty(int64 val);
1203 
1204  /// Convert the given time delta (in milliseconds)
1205  /// to a reader-friendly string in days, hours, minutes, and seconds.
1206  void timeDeltaToPrettyString(double time_ms);
1207 
1208  /// Convert the given time delta (in milliseconds)
1209  /// to a reader-friendly string in milliseconds.
1210  void timeDeltaToPrettyStringMS(double time_ms);
1211 
1212  // Do an sprintf into this string. This method will allocate exactly the
1213  // number of bytes required for the final string. If the format string is
1214  // bad, isstring() will return false afterwards.
1215  int sprintf(const char *fmt, ...) SYS_PRINTF_CHECK_ATTRIBUTE(2, 3);
1216 
1217  // This will change the string into a valid C style variable name.
1218  // All non-alpha numerics will be converted to _.
1219  // If the first letter is a digit, it is prefixed with an _.
1220  // This returns 0 if no changes occurred, 1 if something had to
1221  // be adjusted.
1222  // Note that this does NOT force the name to be non-zero in length.
1223  // The safechars parameter is a string containing extra characters
1224  // that should be considered safe. These characters are not
1225  // converted to underscores.
1226  int forceValidVariableName(const char *safechars = nullptr);
1227  // Returns true if the string matches a C-style varaible name.
1228  // The safechars are not allowed to be the start.
1229  // Matching forceValid, empty strings are considered valid!
1230  bool isValidVariableName(const char *safechars = nullptr) const;
1231 
1232  // This will force all non-alphanumeric characters to be underscores.
1233  // Returns true if any changes were required.
1234  bool forceAlphaNumeric();
1235 
1236  // This function will calculate the relative path to get from src to dest.
1237  // If file_path is false, this method assume it is dealing with node paths.
1238  // If file_path is true, it will also deal with Windows drive letters and
1239  // UNC paths.
1240  void getRelativePath(const char *src_fullpath,
1241  const char *dest_fullpath,
1242  bool file_path = false,
1243  bool allow_relative_path_from_root = true);
1244 
1245  // This function takes two absolute paths and returns the length of the
1246  // longest common path prefix, up to and including the last '/'. This
1247  // means, for instance, that if fullpath1[len1-1] == '/' then all of
1248  // fullpath1 is eligible as a common prefix.
1249  // NB: This function DOES NOT handle NT style drive names! It is currently
1250  // only used for op paths. If you want to add support for this, you
1251  // should add another default parameter to do this.
1252  static int findLongestCommonPathPrefix(const char *fullpath1, int len1,
1253  const char *fullpath2, int len2);
1254 
1255  // This function tests whether we are an absolute path, and returns true or
1256  // false depending on whether we are.
1257  bool isAbsolutePath(bool file_path=false) const;
1258 
1259  // This function assumes that we are an absolute path and will remove all
1260  // un-necessary components from it as long as we remain an absolute path.
1261  // We return false if an error was encountered, in which case the results
1262  // are unpredictable.
1263  bool collapseAbsolutePath(bool file_path=false);
1264 
1265  // This function will make sure that the string is at most max_length
1266  // characters long. If the string is longer than that, it will
1267  // replace the middle of the string by "...". Returns true if the string
1268  // has changed and false otherwise. max_length must be greater than 3.
1269  bool truncateMiddle(int max_length);
1270 
1271  // This function is an abomination when you can just write:
1272  // UT_String foo("");
1273  // ...
1274  // if (foo.isstring())
1275  // ...
1276  // Avoid using it and do not write functions that return "const UT_String&"
1277  static const UT_String &getEmptyString();
1278 
1279  /// Count the number of valid characters in the : modifier for variable
1280  /// expansion. For example, the string ":r" will return 2, the string
1281  /// ":r:t" will return 4, the string ":z" will return 0. These use the csh
1282  /// expansion modifiers.
1283  ///
1284  /// If the string doesn't start with a ':', the method will return 0.
1285  static int countCshModifiers(const char *src);
1286 
1287  /// Applies a "csh" style modifier string to this string. For example, a
1288  /// modifier string of ":e" would replace the string with the file
1289  /// extension of the string.
1290  ///
1291  /// Returns true if any modifications were performed
1292  bool applyCshModifiers(const char *modifiers);
1293 
1294 
1295  /// This will remove the range from a string of the form foo$Fbar.ext (#-#)
1296  /// and return the first number from the range. If there is only 1 range
1297  /// number, it will be returned. If there is no range, 0 is returned.
1298  /// The returned string is hardened.
1299  UT_String removeRange ();
1300 
1301  /// This will format a value to represent a given size in bytes, kilobytes,
1302  /// megabytes, etc.
1303  void formatByteSize(exint size, int digits=2);
1304 
1305  // UTF-8 helpers
1306 
1307  /// Returns the number of Unicode codepoints in the string, assuming it's
1308  /// encoded as UTF-8.
1309  int getCodePointCount() const;
1310 
1311  /// Returns a list of Unicode code points from this string.
1312  void getAsCodePoints(UT_Int32Array &cp_list) const;
1313 
1314  /// Friend specialization of std::swap() to use UT_String::swap()
1315  /// @internal This is needed because standard std::swap() implementations
1316  /// will try to copy the UT_String objects, causing hardened strings to
1317  /// become weak.
1318  friend void swap(UT_String& a, UT_String& b) { a.swap(b); }
1319 
1320  /// expandArrays will expand a series of tokens of the
1321  /// form prefix[pattern]suffix into the names UT_StringArray
1322  /// @param tokens is will store the parsed tokens without expansion
1323  /// @param names is will store the parsed tokens with expansion
1324  /// This doesn't need a max argument like:
1325  /// int expandArrays(char *names[], int max)
1326  int expandArrays(UT_StringArray &tokens, UT_StringArray &names);
1327 
1328 private:
1329  template <typename OSTREAM>
1330  void saveInternal(OSTREAM &os, bool binary) const;
1331 
1332  void freeData();
1333 
1334  /// implements a few csh-style modifiers.
1335  /// @param mod pointer to a string starting with the modifier to apply.
1336  /// so, to apply a global substitute modifier :gs/l/r/
1337  /// mod should be: s/l/r
1338  /// @param all True if all possible modifications should be
1339  /// (recursively) performed.
1340  /// Otherwise, at most one modification is applied.
1341  /// @return whether any modification was performed
1342  bool applyNextModifier(const char *mod, bool all);
1343 
1344 
1345  /// Sets myIsReference to false and copies the other_string into myData,
1346  /// but attempts to avoid unnecessary memory reallocations. Frees up
1347  /// any previous data, if necessary. If other_string is NULL, the call
1348  /// is equivalent to freeData().
1349  void doSmartCopyFrom(const char* other_string);
1350 
1351  static int compareNumberedStringInternal(const char *s1, const char *s2,
1352  bool case_sensitive,
1353  bool allow_negatives,
1354  const char *sorted_first,
1355  const char *sorted_last);
1356 
1357  static SYS_FORCE_INLINE void utStrFree(char *str)
1358  {
1359 #if defined(UT_DEBUG) && !defined(_WIN32)
1360  if (str)
1361  ::memset((void *)str, 0xDD, ::strlen(str) + 1);
1362 #endif
1363  ::free((void *)str);
1364  }
1365 
1366  char *myData;
1367  bool myIsReference:1,
1368  myIsAlwaysDeep:1;
1369 
1370  /// This operator saves the string to the stream via the string's
1371  /// saveAscii() method, protecting any whitespace (by adding quotes),
1372  /// backslashes or quotes in the string.
1373  friend UT_API std::ostream &operator<<(std::ostream &os, const UT_String &d);
1374  friend UT_API UT_OStream &operator<<(UT_OStream &os, const UT_String &d);
1375 
1376  friend class UT_API UT_StringRef;
1377 };
1378 
1379 /// Creates a shallow wrapper around a string for calling UT_String's many
1380 /// const algorithms.
1382 {
1383 public:
1384  // We only have a single constructor which is always shallow.
1386  UT_StringWrap(const char *str)
1387  : UT_String(str)
1388  {}
1389  // It seems necessary on MSVC to forceinline the empty constructor in order
1390  // to have it inlined.
1393  {}
1394 
1395  UT_StringWrap(const UT_StringWrap &) = delete;
1396  UT_StringWrap &operator=(const UT_StringWrap &) = delete;
1397 
1398  // Manually wrap methods that have non-const overloads or return non-const
1399  // pointers.
1400  char operator()(unsigned i) const { return UT_String::operator()(i); }
1401  const char *findChar(int c) const { return UT_String::findChar(c); }
1402  const char *findChar(const char *str) const { return UT_String::findChar(str); }
1403  const char *findNonSpace() const { return UT_String::findNonSpace(); }
1404  const char *lastChar(int c) const { return UT_String::lastChar(c); }
1405 
1406  using UT_String::operator==;
1407  using UT_String::operator!=;
1408  using UT_String::c_str;
1409  using UT_String::length;
1410 
1411  using UT_String::base;
1412  using UT_String::compare;
1413  using UT_String::contains;
1414  using UT_String::count;
1415  using UT_String::countChar;
1416  using UT_String::distance;
1417  using UT_String::endsWith;
1418  using UT_String::equal;
1419  using UT_String::fcontain;
1421  using UT_String::fileName;
1422  using UT_String::findWord;
1423  using UT_String::findString;
1426  using UT_String::isFloat;
1427  using UT_String::isInteger;
1429  using UT_String::isstring;
1430  using UT_String::match;
1431  using UT_String::matchFile;
1433  using UT_String::matchPath;
1435  using UT_String::multiMatch;
1440  using UT_String::save;
1441  using UT_String::saveAscii;
1442  using UT_String::saveBinary;
1443  using UT_String::splitPath;
1444  using UT_String::startsWith;
1445  using UT_String::substr;
1446  using UT_String::suffix;
1447  using UT_String::toFloat;
1448  using UT_String::toInt;
1449 };
1450 
1451 inline
1453  : myIsReference(false)
1454  , myIsAlwaysDeep(true)
1455  , myData(nullptr)
1456 {
1457  *this = str;
1458 }
1459 
1460 inline
1462  : myIsReference(false)
1463  , myIsAlwaysDeep(true)
1464  , myData(nullptr)
1465 {
1466  *this = std::move(str);
1467 }
1468 
1469 inline UT_String &
1471 {
1472  adopt(str);
1473  myIsAlwaysDeep = true; // matches copy constructor behaviour
1474  return *this;
1475 }
1476 
1479 {
1480  if (!myIsReference && myData)
1481  utStrFree(myData);
1482 }
1483 
1485 void
1486 UT_String::freeData()
1487 {
1488  if (myData)
1489  {
1490  if (!myIsReference)
1491  utStrFree(myData);
1492  myData = nullptr;
1493  }
1494 }
1495 
1496 inline void
1498 {
1499  // We can't use UTswap because it doesn't work with bit fields.
1500  bool temp = myIsReference;
1501  myIsReference = other.myIsReference;
1502  other.myIsReference = temp;
1503 
1504  char *tmp_data = myData;
1505  myData = other.myData;
1506  other.myData = tmp_data;
1507 
1508  if (myIsAlwaysDeep)
1509  harden();
1510 
1511  if (other.myIsAlwaysDeep)
1512  other.harden();
1513 }
1514 
1516 {
1517 public:
1518  UT_String myOut; // Points to argument following '>'
1519  UT_String myErr; // Points to argument following '>&'
1520  UT_String myIn; // Points to argument following '<'
1521  short myDoubleOut; // If the argument is '>>' or '>>&'
1522  short myDoubleIn; // If the argument is '<<'
1523 };
1524 
1525 UT_API std::ostream & do_setformat(std::ostream &os, const char fmt[]);
1526 
1527 /// Does a "smart" string compare which will sort based on numbered names.
1528 /// That is "text20" is bigger than "text3". In a strictly alphanumeric
1529 /// comparison, this would not be the case.
1531 {
1532  bool operator()(const char *s1, const char *s2) const
1533  {
1534  return UT_String::compareNumberedString(s1, s2) < 0;
1535  }
1536 
1537  bool operator()(const std::string &s1, const std::string &s2) const
1538  {
1539  return operator()(s1.c_str(), s2.c_str());
1540  }
1541 };
1542 
1543 #endif
bool match(const char *pattern, bool case_sensitive=true) const
int tokenize(char *argv[], int max_args, const char *separators=" \t\n")
Definition: UT_String.h:861
UT_String & operator+=(const char *str)
Definition: UT_String.h:349
static SYS_FORCE_INLINE uint32 hash(const char *str, uint32 code=0)
Definition: UT_String.h:918
int distance(const char *str, bool case_sensitive=true, bool allow_subst=true) const
char * lastChar(int c)
Definition: UT_String.h:581
typedef int(APIENTRYP RE_PFNGLXSWAPINTERVALSGIPROC)(int)
bool isValidVariableName(const char *safechars=nullptr) const
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition: glcorearb.h:2540
bool operator!=(const char *str) const
Definition: UT_String.h:434
UT_String & operator+=(const UT_String &str)
Definition: UT_String.h:381
UT_API void normalizePath(UT_String &file_path, bool want_marker=false, bool always_want_expanded_path=false)
bool operator>=(const UT_StringRef &str) const
Definition: UT_String.h:490
int count(const char *str, bool case_sensitive=true) const
Count the occurrences of the string.
T mod(T x, int y)
Definition: chrono.h:1648
bool matchFileExtension(const char *match_extension) const
Definition: UT_String.h:664
void swap(UT_String &other)
Definition: UT_String.h:1497
void saveAscii(UT_OStream &os) const
Definition: UT_String.h:308
bool operator()(const char *s1, const char *s2) const
Definition: UT_String.h:1532
T negative(const T &val)
Return the unary negation of the given value.
Definition: Math.h:128
const char * lastChar(int c) const
Definition: UT_String.h:1404
bool isInteger(bool skip_spaces=false) const
Determine if string can be seen as a single integer number.
that also have some descendant prim *whose name begins with which in turn has a child named baz where *the predicate and *a name There is also one special expression _ which means *the weaker expression when composing expressions together See with
bool operator<=(const char *str) const
Definition: UT_String.h:458
UT_String myIn
Definition: UT_String.h:1520
fpreal toFloat() const
bool operator==(const char *str) const
Definition: UT_String.h:422
bool operator<=(const UT_String &str) const
Definition: UT_String.h:462
int toInt() const
char * fileExtension()
Definition: UT_String.h:647
CompareResults OIIO_API compare(const ImageBuf &A, const ImageBuf &B, float failthresh, float warnthresh, float failrelative, float warnrelative, ROI roi={}, int nthreads=0)
const GLuint GLenum const void * binary
Definition: glcorearb.h:1924
bool isHard() const
Returns whether this string is hardened already.
Definition: UT_String.h:251
GLsizei const GLchar *const * path
Definition: glcorearb.h:3341
SYS_FORCE_INLINE T * SYSconst_cast(const T *foo)
Definition: SYS_Types.h:136
UT_String makeQuotedString(char delimiter='\'', bool escape_nonprinting=false) const
const char * findChar(const char *str) const
Definition: UT_String.h:575
int64 exint
Definition: SYS_Types.h:125
GLboolean GLboolean GLboolean GLboolean a
Definition: glcorearb.h:1222
GLdouble s
Definition: glad.h:3009
void swap(T &lhs, T &rhs)
Definition: pugixml.cpp:7440
void write(unsigned i, char c)
Definition: UT_String.h:534
bool operator==(const UT_String &str) const
Definition: UT_String.h:426
GLuint GLsizei GLsizei * length
Definition: glcorearb.h:795
#define UT_API
Definition: UT_API.h:14
const char * fileExtension() const
Definition: UT_String.h:654
const char * data() const
Definition: UT_String.h:517
bool isAbsolutePath(bool file_path=false) const
bool findString(const char *str, bool fullword, bool usewildcards) const
**But if you need a result
Definition: thread.h:622
char * findChar(int c)
Definition: UT_String.h:565
#define SYS_EXINT_MAX
Definition: SYS_Types.h:181
FMT_CONSTEXPR auto find(Ptr first, Ptr last, T value, Ptr &out) -> bool
Definition: core.h:2138
char & operator()(unsigned i)
Definition: UT_String.h:527
bool equal(const char *str, bool case_sensitive=true) const
Definition: UT_String.h:409
GLfloat GLfloat GLfloat v2
Definition: glcorearb.h:818
const char * findNonSpace() const
Definition: UT_String.h:1403
unsigned long long uint64
Definition: SYS_Types.h:117
int compare(const char *str, bool case_sensitive=true) const
Definition: UT_String.h:389
GLuint buffer
Definition: glcorearb.h:660
void clear()
Reset the string to the default constructor.
Definition: UT_String.h:318
bool isAlwaysDeep() const
Definition: UT_String.h:213
const char * c_str() const
Definition: UT_String.h:515
OutGridT const XformOp bool bool
SYS_FORCE_INLINE UT_String(const char *str=nullptr)
Construct UT_String from a C string, using shallow semantics.
Definition: UT_String.h:85
bool matchPath(const char *pattern, bool case_sensitive=true, bool *excludes_branch=nullptr) const
SIM_API const UT_StringHolder all
unsigned length() const
Return length of string.
Definition: UT_String.h:553
int compare(const UT_String &str, bool case_sensitive=true) const
Definition: UT_String.h:403
< returns > If no error
Definition: snippets.dox:2
const char * suffix() const
bool operator<(const char *str) const
Definition: UT_String.h:446
bool operator<(const UT_StringRef &str) const
Definition: UT_String.h:454
UT_API void UTexprLookup(const char *name, UT_String &result)
bool contains(const char *pattern, bool case_sensitive=true) const
int tokenize(UT_WorkArgs &argv, const char *separators=" \t\n")
Definition: UT_String.h:869
std::ostream & operator<<(std::ostream &ostr, const DataType &a)
Definition: DataType.h:133
UT_String(UT_AlwaysDeepType, const std::string &str)
Construct UT_String from a std::string, using ALWAYS_DEEP semantics.
Definition: UT_String.h:160
void hardenIfNeeded(const char *s)
Take shallow copy and make it deep.
Definition: UT_String.h:241
const char * buffer() const
Definition: UT_String.h:516
A utility class to do read-only operations on a subset of an existing string.
Definition: UT_StringView.h:40
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool isEmpty() const
Returns true if the string is empty.
SYS_FORCE_INLINE uint32 hash() const
Definition: UT_String.h:910
bool operator==(const UT_StringRef &str) const
Definition: UT_String.h:430
GLintptr offset
Definition: glcorearb.h:665
char operator()(unsigned i) const
Definition: UT_String.h:1400
int tokenize(char *argv[], int max_args, char separator)
Definition: UT_String.h:849
bool operator>=(const char *str) const
Definition: UT_String.h:482
SYS_NO_DISCARD_RESULT UT_StringView UTstringFileName(const StringT &str)
int tokenizeInPlace(T &list, const char *separators=" \t\n")
Definition: UT_String.h:885
OIIO_FORCEINLINE const vint4 & operator+=(vint4 &a, const vint4 &b)
Definition: simd.h:4512
#define SYS_SAFE_BOOL
Definition: SYS_Compiler.h:55
bool operator!=(const UT_String &str) const
Definition: UT_String.h:438
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:155
bool operator>=(const UT_String &str) const
Definition: UT_String.h:486
#define SYS_PRINTF_CHECK_ATTRIBUTE(string_index, first_to_check)
Definition: SYS_Types.h:448
char * findNonSpace()
std::string OIIO_UTIL_API replace(string_view str, string_view pattern, string_view replacement, bool global=false)
UT_String(UT_AlwaysDeepType, const char *str=nullptr)
Construct UT_String from a C string, using ALWAYS_DEEP semantics.
Definition: UT_String.h:153
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
GLint GLint GLsizei GLint GLenum format
Definition: glcorearb.h:108
bool matchPattern(const UT_WorkArgs &pattern_args, bool assume_match=false) const
bool operator>(const UT_String &str) const
Definition: UT_String.h:474
char * SYSstrtok(char *string, const char *delimit, char **context)
Definition: SYS_String.h:106
char * findChar(const char *str)
Definition: UT_String.h:573
#define UT_ASSERT_SLOW(ZZ)
Definition: UT_Assert.h:154
const char * findChar(int c) const
Definition: UT_String.h:567
void harden()
Take shallow copy and make it deep.
Definition: UT_String.h:222
void saveAscii(std::ostream &os) const
Definition: UT_String.h:307
bool equal(const UT_StringRef &str, bool case_sensitive=true) const
Definition: UT_String.h:417
UT_String(UT_String &&str) noexcept
Definition: UT_String.h:176
long long int64
Definition: SYS_Types.h:116
bool equal(const UT_String &str, bool case_sensitive=true) const
Definition: UT_String.h:413
void setAlwaysDeep(bool deep)
Make a string always deep.
Definition: UT_String.h:197
bool operator>(const UT_StringRef &str) const
Definition: UT_String.h:478
const char * findChar(const char *str) const
Definition: UT_String.h:1402
bool matchFile(const char *pattern) const
bool operator()(const std::string &s1, const std::string &s2) const
Definition: UT_String.h:1537
GLuint const GLchar * name
Definition: glcorearb.h:786
int eraseHead(int len)
Definition: UT_String.h:1103
GLushort pattern
Definition: glad.h:2583
void toUpper()
Definition: UT_String.h:619
void adopt(UT_String &str)
Definition: UT_String.h:294
GLboolean GLboolean GLboolean b
Definition: glcorearb.h:1222
SYS_FORCE_INLINE ~UT_StringWrap()
Definition: UT_String.h:1392
const char * findWord(const char *word) const
bool operator>(const char *str) const
Definition: UT_String.h:470
int64 getMemoryUsage(bool inclusive=true) const
Return memory usage in bytes.
Definition: UT_String.h:557
void saveBinary(std::ostream &os) const
Save string to binary stream.
Definition: UT_String.h:303
bool isFloat(bool skip_spaces=false, bool loose=false, bool allow_underscore=false) const
Determine if string can be seen as a single floating point number.
static int compareNumberedString(const char *s1, const char *s2, bool case_sensitive=true, bool allow_negatives=false)
short myDoubleIn
Definition: UT_String.h:1522
void adopt(char *s)
Definition: UT_String.h:284
GLsizeiptr size
Definition: glcorearb.h:664
UT_String pathUpToExtension() const
__hostdev__ bool isInteger(GridType gridType)
Return true if the GridType maps to a POD integer type.
Definition: NanoVDB.h:820
SYS_NO_DISCARD_RESULT UT_StringView UTstringFileExtension(const StringT &str)
GLenum func
Definition: glcorearb.h:783
int substr(UT_String &buf, int index, int len=0) const
SYS_NO_DISCARD_RESULT bool UTstringMatchFileExtension(const StringT &str, const char *extension)
void save(std::ostream &os, bool binary) const
Save string to stream. Saves as binary if binary is true.
short myDoubleOut
Definition: UT_String.h:1521
fpreal64 fpreal
Definition: SYS_Types.h:278
int parse(UT_StringArray &argv, int start_arg=0, const char *quotes="\"'", bool keep_quotes=false)
Definition: UT_String.h:832
bool multiMatch(const char *pattern, bool case_sensitive, char separator) const
LeafData & operator=(const LeafData &)=delete
char * steal()
Definition: UT_String.h:262
char * SYSstrcasestr(const char *haystack, const char *needle)
Replacement for strcasestr, since no equivalent exists on Win32.
Definition: SYS_String.h:290
GLuint index
Definition: glcorearb.h:786
bool multiMatchRecord(const char *pattern, int maxpatterns, char *singles, int &nsingles, char **words, int &nwords, bool case_sensitive=true, const char *separators=", ") const
int parseNumberedFilename(UT_String &prefix, UT_String &frame, UT_String &suff, bool negative=true, bool fractional=false) const
UT_AlwaysDeepType
Definition: UT_String.h:79
GLfloat GLfloat v1
Definition: glcorearb.h:817
auto ptr(T p) -> const void *
Definition: format.h:4331
GLuint GLfloat * val
Definition: glcorearb.h:1608
ImageBuf OIIO_API max(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
**If you just want to fire and args
Definition: thread.h:618
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const_iterator begin() const
Returns a constant iterator pointing to the beginning of the string.
unsigned int uint32
Definition: SYS_Types.h:40
const char * lastChar(int c) const
Definition: UT_String.h:583
UT_String myOut
Definition: UT_String.h:1518
UT_String myErr
Definition: UT_String.h:1519
bool isstring() const
Definition: UT_String.h:698
int findLongestCommonSuffix(const char *with) const
void hardenIfNeeded()
Take shallow copy and make it deep.
Definition: UT_String.h:231
const char * findChar(int c) const
Definition: UT_String.h:1401
int parse(char *argv[], int max_args, const char *quotes="\"'", bool keep_quotes=false)
Definition: UT_String.h:820
bool operator<(const UT_String &str) const
Definition: UT_String.h:450
int erase(int pos=0, int len=-1)
Definition: UT_String.h:1111
int tokenize(UT_WorkArgs &argv, char separator)
Definition: UT_String.h:855
auto sprintf(const S &fmt, const T &...args) -> std::basic_string< Char >
Definition: printf.h:617
string_view OIIO_UTIL_API strip(string_view str, string_view chars=string_view())
SIM_API const UT_StringHolder distance
bool operator<=(const UT_StringRef &str) const
Definition: UT_String.h:466
bool startsWith(const UT_StringView &prefix, bool case_sensitive=true) const
void splitPath(UT_String &dir_name, UT_String &file_name) const
char operator()(unsigned i) const
Definition: UT_String.h:520
bool OIIO_UTIL_API contains(string_view a, string_view b)
Does 'a' contain the string 'b' within it?
int parse(UT_WorkArgs &argv, int start_arg=0, const char *quotes="\"'", bool keep_quotes=false)
Definition: UT_String.h:826
const char * base(UT_String &buf) const
UT_String & operator=(UT_String &&str) noexcept
Definition: UT_String.h:184
void removeLast()
Remove the last character.
Definition: UT_String.h:334
UT_API void UTvarLookup(const char *name, UT_String &result)
SYS_FORCE_INLINE UT_StringWrap(const char *str)
Definition: UT_String.h:1386
bool endsWith(const UT_StringView &suffix, bool case_sensitive=true) const
UT_String(const std::string &str)
Construct UT_String from a std::string, always doing a deep copy. The result will only be a UT_Always...
Definition: UT_String.h:127
int eraseTail(int len)
Definition: UT_String.h:1107
const char * fileName() const
Definition: UT_String.h:640
OIIO_UTIL_API std::string extension(string_view filepath, bool include_dot=true) noexcept
GLint GLsizei count
Definition: glcorearb.h:405
Definition: format.h:1821
int countChar(int c) const
Return the number of occurrences of the specified character.
UT_API std::ostream & do_setformat(std::ostream &os, const char fmt[])
int tokenize(T &list, const char *separators=" \t\n")
Definition: UT_String.h:878
const char * nonNullBuffer() const
Definition: UT_String.h:518
void toLower()
Definition: UT_String.h:626
GLenum src
Definition: glcorearb.h:1793
int insert(int pos, const char *str)
Definition: UT_String.h:1119
const char * fcontain(const char *pattern, bool case_sensitive=true) const
Definition: UT_String.h:1016
bool operator!=(const UT_StringRef &str) const
Definition: UT_String.h:442