HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_String.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  *
7  * NAME: Utility Library (C++)
8  *
9  * COMMENTS: String class
10  *
11  */
12 
13 #ifndef __UT_String_h__
14 #define __UT_String_h__
15 
16 #include "UT_API.h"
17 
18 #include "UT_Assert.h"
19 #include "UT_VectorTypes.h"
20 #include "UT_StringView.h"
21 #include "UT_StringUtils.h"
22 
23 #include <SYS/SYS_Compiler.h>
24 #include <SYS/SYS_Deprecated.h>
25 #include <SYS/SYS_Inline.h>
26 #include <SYS/SYS_String.h>
27 #include <SYS/SYS_Types.h>
28 
29 #include <iosfwd>
30 #include <string>
31 #include <utility>
32 
33 #include <ctype.h>
34 #include <stdlib.h>
35 #include <string.h>
36 
37 #ifdef WIN32
38  #define strcasecmp stricmp
39  #define strncasecmp strnicmp
40 #endif
41 
42 class UT_OStream;
43 class UT_String;
44 class UT_StringCshIO;
45 class UT_WorkArgs;
46 class UT_IStream;
47 class ut_PatternRecord;
48 class UT_StringMMPattern;
49 class UT_StringArray;
50 class UT_StringHolder;
51 class UT_StringRef;
52 
53 // The following lookup functions are used by cshParse. By default,
54 // varLookup simply uses getenv, exprLookup opens the command as
55 // a pipe and uses the result.
56 UT_API extern void UTvarLookup(const char *name, UT_String &result);
57 UT_API extern void UTexprLookup(const char *name, UT_String &result);
58 
59 /// @file
60 /// @class UT_String
61 ///
62 /// UT_String is a string class that support two different types of assignment
63 /// semantics:
64 /// - Shallow (default): Just reference the given string and NOT take
65 /// ownership.
66 /// - Deep: Make a copy of the given string, taking ownership in the
67 /// process (aka it making it "hard").
68 ///
69 /// If UT_String::harden() is called, or any other UT_String method that
70 /// requires modifying the string, it will make a copy of its reference pointer
71 /// (and take ownership) first.
72 ///
74 {
75 public:
76 
77  /// UT_String can be constructed with UT_String::ALWAYS_DEEP to create an
78  /// object that will always perform deep copies when assigned to.
79  enum UT_AlwaysDeepType { ALWAYS_DEEP };
80 
81  /// @brief Construct UT_String from a C string, using shallow semantics
82  ///
83  /// @param str The initial string.
85  UT_String(const char *str = 0)
86  : myData(SYSconst_cast(str))
87  , myIsReference(true)
88  , myIsAlwaysDeep(false)
89  {}
90 
91  /// @brief Construct UT_String from a C string, using shallow semantics
92  ///
93  /// @param str The initial string.
94  /// @param deep_copy If true, a copy of @em str will be used.
95  /// @param len Number of characters to use from @em str. Use -1 to
96  /// use the entire string. If len is non-negative, then
97  /// deepCopy will be implicitly set to true. If str is NULL
98  /// and len is non-negative, then it will be initialized
99  /// with "".
100  UT_String(const char *str, bool deep_copy, int len = -1);
101 
102  /// @brief Construct UT_String from a std::string, always doing
103  /// a deep copy. The result will only be a UT_AlwaysDeep if the
104  /// appropriate version is used, however!
105  ///
106  /// NOTE: You cannot do:
107  /// UT_String foo;
108  /// std::string bar = "hello world";
109  /// foo = UT_String(bar.substr(2, 5));
110  ///
111  /// It provides an shortcut for constructing a UT_String from a function
112  /// that returns a std::string by value. For example, it lets you write
113  /// @code
114  /// UT_String str(func());
115  /// @endcode
116  /// instead of
117  /// @code
118  /// UT_String str(func().c_str(), /*harden=*/true);
119  /// @endcode
120  explicit UT_String(const std::string &str)
121  : myIsReference(false),
122  myIsAlwaysDeep(false)
123  { myData = strdup(str.c_str()); }
124 
125  /// @brief Construct UT_String from a UT_StringHolder.
126  /// This always duplicates and uses ALWAYS_DEEP semantics.
127  explicit UT_String(const UT_StringHolder &str);
128 
129  /// @brief Construct UT_String from a UT_StringHolder rvalue with
130  /// ALWAYS_DEEP semantics.
131  explicit UT_String(UT_StringHolder &&str);
132 
133 private:
134  /// This is intentionally not implemented - callers should choose between
135  /// the const char * and UT_StringHolder constructors, depending on whether
136  /// they want to make a deep copy.
137  /// @see UT_StringWrap.
138  UT_String(const UT_StringRef &);
139 
140 public:
141  /// @brief Construct UT_String from a UT_StringView.
142  /// This always duplicates and uses ALWAYS_DEEP semantics.
143  explicit UT_String(const UT_StringView &sv);
144 
145  /// @brief Construct UT_String from a C string, using ALWAYS_DEEP semantics
146  UT_String(UT_AlwaysDeepType, const char *str = 0)
147  : myIsReference(false),
148  myIsAlwaysDeep(true)
149  { myData = str ? strdup(str) : 0; }
150 
151  /// @brief Construct UT_String from a std::string, using ALWAYS_DEEP
152  /// semantics
154  : myIsReference(false),
155  myIsAlwaysDeep(true)
156  { myData = strdup(str.c_str()); }
157 
158  /// Copy constructor
159  ///
160  /// If the string we're copying from is ALWAYS_DEEP, then this object will
161  /// also become ALWAYS_DEEP. This way, you can pass/return a string by
162  /// value.
163  UT_String(const UT_String &str);
164 
165  ~UT_String();
166 
167  /// Move operators
168  /// @{
169  UT_String(UT_String &&str) noexcept
170  : myData(str.myData)
171  , myIsReference(str.myIsReference)
172  , myIsAlwaysDeep(str.myIsAlwaysDeep)
173  {
174  str.myData = nullptr;
175  str.myIsReference = !str.myIsAlwaysDeep;
176  }
178  {
179  freeData();
180  myData = str.myData;
181  myIsReference = str.myIsReference;
182  myIsAlwaysDeep = str.myIsAlwaysDeep;
183  str.myData = nullptr;
184  str.myIsReference = !str.myIsAlwaysDeep;
185  return *this;
186  }
187  /// @}
188 
189  /// Make a string always deep
190  void setAlwaysDeep(bool deep)
191  {
192  myIsAlwaysDeep = deep;
193  if (deep && myIsReference)
194  {
195  if (myData != NULL)
196  harden();
197  else
198  {
199  // This takes the same semantic as
200  // str = NULL;
201  // where str is an always deep string
202  myIsReference = false;
203  }
204  }
205  }
206  bool isAlwaysDeep() const
207  {
208  return myIsAlwaysDeep;
209  }
210 
211  void swap( UT_String &other );
212 
213  /// Take shallow copy and make it deep.
214  // @{
215  void harden()
216  {
217  if (!myIsReference && myData)
218  return;
219  myData = strdup(myData ? myData : "");
220  myIsReference = false;
221  }
222 
223  void harden(const char *s, int len = -1);
225  {
226  if (myIsReference)
227  {
228  if (isstring())
229  harden();
230  else
231  *this = "";
232  }
233  }
234  void hardenIfNeeded(const char *s)
235  {
236  if (s && *s)
237  harden(s);
238  else
239  *this = "";
240  }
241  // @}
242 
243  /// Returns whether this string is hardened already.
244  bool isHard() const { return !myIsReference; }
245 
246  /// Give up ownership of string
247  ///
248  /// Take a hard reference and make it shallow. This method makes sure
249  /// it gives back something you can delete, because this UT_String is
250  /// taking its hands off the data. Use it with care since it may lead
251  /// to memory leaks if, for example, you harden it again later.
252  ///
253  /// In the case of ALWAYS_DEEP strings, this is disallowed so it will
254  /// just return a copy of the data.
255  char * steal()
256  {
257  if (!myIsAlwaysDeep)
258  {
259  if (myIsReference)
260  myData = strdup(myData ? myData : ""); // harden
261  myIsReference = true; // but say it's soft
262  return myData;
263  }
264  else
265  {
266  // return a new copy of the data without releasing
267  // ownership for always deep strings
268  return strdup(myData ? myData : "");
269  }
270  }
271 
272  /// Take ownership of given string
273  ///
274  /// adopt() is the opposite of steal(). Basically, you're giving
275  /// the UT_String ownership of the string.
276  // @{
277  void adopt(char *s)
278  {
279  if (!myIsReference)
280  {
281  if (s != myData)
282  utStrFree(myData);
283  }
284  myData = s;
285  myIsReference = false;
286  }
287  void adopt(UT_String &str)
288  {
289  adopt(str.steal());
290  }
291  void adopt(UT_StringHolder &holder);
292 
293  // @}
294 
295  /// Save string to binary stream.
296  void saveBinary(std::ostream &os) const { save(os, true); }
297 
298  /// Save string to ASCII stream. This will add double quotes and escape to
299  /// the stream if necessary (empty string or contains spaces).
300  void saveAscii(std::ostream &os) const { save(os, false); }
301  void saveAscii(UT_OStream &os) const { save(os, false); }
302 
303  /// Save string to stream. Saves as binary if @em binary is true.
304  void save(std::ostream &os, bool binary) const;
305  void save(UT_OStream &os, bool binary) const;
306 
307  /// Load string from stream. Use is.eof() to check eof status
308  bool load(UT_IStream &is);
309 
310  /// Reset the string to the default constructor.
311  void clear()
312  { *this = (const char *)NULL; }
313 
314  /// Prepend a string (or character)
315  // @{
316  void prepend(const char *prefix);
317  void prepend(char ch);
318  // @}
319 
320  /// Append a character
321  void append(char ch);
322 
323  /// Append a string or a section of a string.
324  void append(const char *str, exint len = -1);
325 
326  /// Remove the last character
327  void removeLast() { truncate(length()-1); }
328  /// Truncate the string at the Nth character
329  void truncate(exint len);
330 
331  UT_String &operator=(const UT_String &str);
332  UT_String &operator=(const char *str);
333  UT_String &operator=(const std::string &str);
334  UT_String &operator=(const UT_StringHolder &str);
336  UT_String &operator=(const UT_StringView &str);
337 private:
338  /// Not implemented - see UT_String(const UT_StringRef &).
340 
341 public:
342  UT_String &operator+=(const char *str)
343  {
344  if (!isstring())
345  {
346  // We are an empty string, so we merely copy
347  // the incoming string rather than trying to append
348  // to it.
349  harden(str);
350  }
351  else
352  {
353  bool same = (str == myData);
354  harden();
355  if (str)
356  {
357  int mylen = (int)strlen(myData);
358  myData = (char *)realloc(myData,
359  mylen+strlen(str)+1);
360  if (!same)
361  {
362  strcpy(&myData[mylen], str);
363  }
364  else
365  {
366  memcpy(myData + mylen, myData, mylen);
367  myData[mylen * 2] = '\0';
368  }
369  }
370  }
371  return *this;
372  }
373 
375  {
376  *this += (const char *)str.myData;
377  return *this;
378  }
379  UT_String &operator+=(const UT_StringRef &str);
380 
381  // Basic equality functions and operators
382  int compare(const char *str, bool case_sensitive=true) const
383  {
384  // Unlike std::string, UT_String treats NULL and
385  // the empty string as distinct (empty has precedence).
386  if (myData==0 || str==0)
387  {
388  if (myData) return 1;
389  if(str) return -1;
390  return 0;
391  }
392  if (case_sensitive)
393  return strcmp(myData, str);
394  return strcasecmp(myData, str);
395  }
396  int compare(const UT_String &str, bool case_sensitive=true) const
397  {
398  return compare(str.myData,case_sensitive);
399  }
400  int compare(const UT_StringRef &str, bool case_sensitive=true) const;
401 
402  bool equal(const char *str, bool case_sensitive=true) const
403  {
404  return compare(str,case_sensitive)==0;
405  }
406  bool equal(const UT_String &str, bool case_sensitive=true) const
407  {
408  return compare(str.myData,case_sensitive)==0;
409  }
410  bool equal(const UT_StringRef &str, bool case_sensitive=true) const
411  {
412  return compare(str,case_sensitive)==0;
413  }
414 
415  bool operator==(const char *str) const
416  {
417  return compare(str)==0;
418  }
419  bool operator==(const UT_String &str) const
420  {
421  return compare(str.myData)==0;
422  }
423  bool operator==(const UT_StringRef &str) const
424  {
425  return compare(str)==0;
426  }
427  bool operator!=(const char *str) const
428  {
429  return compare(str)!=0;
430  }
431  bool operator!=(const UT_String &str) const
432  {
433  return compare(str.myData)!=0;
434  }
435  bool operator!=(const UT_StringRef &str) const
436  {
437  return compare(str)!=0;
438  }
439  bool operator<(const char *str) const
440  {
441  return compare(str)<0;
442  }
443  bool operator<(const UT_String &str) const
444  {
445  return compare(str.myData)<0;
446  }
447  bool operator<(const UT_StringRef &str) const
448  {
449  return compare(str)<0;
450  }
451  bool operator<=(const char *str) const
452  {
453  return compare(str)<=0;
454  }
455  bool operator<=(const UT_String &str) const
456  {
457  return compare(str.myData)<=0;
458  }
459  bool operator<=(const UT_StringRef &str) const
460  {
461  return compare(str)<=0;
462  }
463  bool operator>(const char *str) const
464  {
465  return compare(str)>0;
466  }
467  bool operator>(const UT_String &str) const
468  {
469  return compare(str.myData)>0;
470  }
471  bool operator>(const UT_StringRef &str) const
472  {
473  return compare(str)>0;
474  }
475  bool operator>=(const char *str) const
476  {
477  return compare(str)>=0;
478  }
479  bool operator>=(const UT_String &str) const
480  {
481  return compare(str.myData)>=0;
482  }
483  bool operator>=(const UT_StringRef &str) const
484  {
485  return compare(str)>=0;
486  }
487 
488  /// Test whether the string is defined or not
489  SYS_SAFE_BOOL operator bool() const { return isstring(); }
490 
491  /// Return the edit distance between two strings.
492  /// See http://en.wikipedia.org/wiki/Levenshtein_distance for details.
493  /// allow_subst controls whether a substitution of a character with
494  /// another is a single operation, rather than two operations of
495  /// insert and delete.
496  int distance(const char *str,
497  bool case_sensitive = true,
498  bool allow_subst = true) const;
499 
500  operator const char *() const
501  { return (const char *)myData; }
502  operator char *()
503  { return myData; }
504 
505  operator UT_StringView() const
506  { return UT_StringView(myData); }
507 
508  const char *c_str() const { return buffer(); }
509  const char *buffer() const { return myData; }
510  const char *data() const { return buffer(); }
511  const char *nonNullBuffer() const { return myData ? myData : ""; }
512 
513  char operator()(unsigned i) const
514  {
515  UT_ASSERT_P( isstring() );
516  UT_ASSERT_SLOW(i <= strlen(myData));
517  return myData[i];
518  }
519 
520  char &operator()(unsigned i)
521  {
522  harden();
523  return myData[i];
524  }
525 
526  // Prefer using write() since ideally the non-const operator() is removed
527  inline void write(unsigned i, char c)
528  {
529  hardenIfNeeded();
530  myData[i] = c;
531  }
532 
533  int toInt() const;
534  fpreal toFloat() const;
535 
536  /// Converts the contents of this UT_String to a std::string. Note that
537  /// std::string can't be constructed with a null pointer, so you can't
538  /// just write std::string s = ut_string.buffer();
539  std::string toStdString() const;
540 
541  //
542  // Here, we're finished with operators
543  //
544 
545  /// Return length of string
546  unsigned length() const
547  { return (myData) ? (unsigned)strlen(myData) : 0; }
548 
549  /// Return memory usage in bytes
550  int64 getMemoryUsage(bool inclusive=true) const
551  {
552  return (inclusive ? sizeof(*this) : 0)
553  + (!myIsReference ? (length() + 1)*sizeof(char) : 0);
554  }
555 
556  /// Find first occurrance of character. Returns NULL upon failure.
557  /// @{
558  char *findChar(int c)
559  { return myData ? strchr(myData, c) : nullptr; }
560  const char *findChar(int c) const
561  { return SYSconst_cast(*this).findChar(c); }
562  /// @}
563 
564  /// Find first occurrance of any character in @em str
565  /// @{
566  char *findChar(const char *str)
567  { return myData ? strpbrk(myData, str) : nullptr; }
568  const char *findChar(const char *str) const
569  { return SYSconst_cast(*this).findChar(str); }
570  /// @}
571 
572  /// Find last occurance of character
573  /// @{
574  char *lastChar(int c)
575  { return myData ? strrchr(myData, c) : nullptr; }
576  const char *lastChar(int c) const
577  { return SYSconst_cast(*this).lastChar(c); }
578  /// @}
579 
580  /// Return the number of occurrences of the specified character.
581  int countChar(int c) const;
582 
583  /// Count the occurrences of the string
584  int count(const char *str, bool case_sensitive = true) const;
585 
586  char *findNonSpace();
587  const char *findNonSpace() const;
588  const char *findWord(const char *word) const;
589  bool findString(const char *str, bool fullword,
590  bool usewildcards) const;
591  int changeWord(const char *from, const char *to, bool all = true);
592  int changeString(const char *from, const char *to, bool fullword);
593  int changeQuotedWord(const char *from, const char *to,
594  int quote = '`', bool all = true);
595 
596  int findLongestCommonSuffix( const char *with ) const;
597 
598  /// Perform deep copy of the substring starting from @em index
599  /// for @em len characters into the specified UT_String.
600  /// If @em len is too long, then a substring starting from @em index to
601  /// the end of the string is copied.
602  /// Returns the length of the copied substring.
603  int substr(UT_String &buf, int index, int len=0) const;
604 
605  /// Determine if string can be seen as a single floating point number
606  bool isFloat(bool skip_spaces = false,
607  bool loose = false,
608  bool allow_underscore = false) const;
609  /// Determine if string can be seen as a single integer number
610  bool isInteger(bool skip_spaces = false) const;
611 
612  void toUpper()
613  {
614  char *ptr;
615  harden();
616  for (ptr=myData; *ptr; ptr++)
617  *ptr = (char)toupper(*ptr);
618  }
619  void toLower()
620  {
621  char *ptr;
622  harden();
623  for (ptr=myData; *ptr; ptr++)
624  *ptr = (char)tolower(*ptr);
625  }
626 
627 
628  /// Return last component of forward slash separated path string
629  ///
630  /// If there is a slash in the string, fileName() returns the string
631  /// starting after the slash. Otherwise, it returns the contents of
632  /// this string. Note that it returns a pointer into this string.
633  const char *fileName() const
634  {
635  UT_StringView file_name = UTstringFileName(*this);
636  return file_name.begin();
637  }
638  /// Return the extension of a file path string
639  /// @{
641  {
643  if (extension.isEmpty())
644  return nullptr;
645  return myData + (extension.begin() - myData);
646  }
647  const char *fileExtension() const
648  {
649  return SYSconst_cast(*this).fileExtension();
650  }
651  /// @}
652 
653  /// Return whether the file extension matches. The extension passed in
654  /// should include the '.' separator. For example: @code
655  /// matchFileExtension(".jpg")
656  /// @endcode
657  bool matchFileExtension(const char *match_extension) const
658  {
659  return UTstringMatchFileExtension(*this, match_extension);
660  }
661  /// Return path terminated just before the extension.
662  /// If the filename starts with '.' and no path is provided,
663  /// returns NULL
664  UT_String pathUpToExtension() const;
665 
666  /// Replace the file extension and return the new string
667  UT_String replaceExtension(const UT_String &new_ext) const;
668 
669  /// Split a path into @em dir_name and @em file_name, where @em file_name
670  /// is everything after the final slash (i.e. the same as fileName()).
671  /// Either part may be empty. Note that if the string starts with / and
672  /// only contains that one slash, the @em dir_name will be / and not blank.
673  /// @em dir_name and @em file_name will either be set to hardened strings
674  /// or an empty string.
675  void splitPath(UT_String &dir_name, UT_String &file_name) const;
676 
677  /// Decompose a filename into various parts
678  ///
679  /// parseNumberedFileName will breakup a filename into its various
680  /// parts: file = prefix$Fsuffix (note: suffix is
681  /// not the same as file extension.) 0 is returned if there is
682  /// no frame number. 'negative' allows -[frame] to be interpreted as a
683  /// negative number. 'fractional' allows [frame].[number] to be interpreted
684  /// as a fractional frame.
685  int parseNumberedFilename(UT_String &prefix,
686  UT_String &frame,
687  UT_String &suff,
688  bool negative = true,
689  bool fractional = false) const;
690 
691  bool isstring() const
692  { return (myData && *myData); }
693 
694  /// trimSpace() will remove all space characters (leading and following)
695  /// from a string. If the string consists of multiple words, the words will
696  /// be collapsed. The function returns 1 if space was trimmed.
697  int trimSpace(bool leave_single_space_between_words = false);
698 
699  /// A version of trimSpace() that only removes leading and following spaces
700  /// from a string, leaving any between words intact.
701  int trimBoundingSpace();
702 
703  /// strips out all characters found in 'chars'. The string length will be
704  /// reduced by the number of characters removed. The number of characters
705  /// removed is returned.
706  int strip(const char *chars);
707 
708  /// protectString() will modify the existing string to escape double quotes
709  /// and backslashes. It will only wrap the string in double quotes if
710  /// it has spaces in it. If 'protect_empty' is true, the string will
711  /// become '""', otherwise it will stay empty.
712  void protectString(bool protect_empty=false);
713 
714  /// If the char is a quote character `"` or `'` then make sure to protect
715  /// it by adding '\' before the quote character. If the character is not
716  /// a quote character then the character is simply added to the ostream.
717  static void protectString(std::ostream& os, char c);
718 
719  /// protectPreQuotePythonStringLiteral() will modify the existing string
720  // to escape any non-printing characters, backslashes, and instances of the
721  /// specified delimiter. Unlike protectString(), it will not wrap the
722  /// string in quotes.
723  void protectPreQuotePythonStringLiteral(char delimiter='\'');
724 
725  /// returns true if the string begins and ends with a (non-escaped) quote
726  /// 'delimiter'.
727  bool isQuotedString(char delimiter='\'') const;
728 
729  /// makeQuotedString() is similar to protectString() except it returns a
730  /// new string instead of changing this string, it does wrap the string
731  /// in quotes, and it lets you use either ' or " as the delimiter.
732  /// The quoted string can also be optionally be made to escape non-printing
733  /// characters. The string that's returned is UT_String::ALWAYS_DEEP.
734  UT_String makeQuotedString(char delimiter='\'',
735  bool escape_nonprinting=false) const;
736 
737  /// makeSmartQuotedString() will use either ' or " as the delimiter to
738  /// avoid escaped quotes, using the default delimiter if it doesn't
739  /// matter. The quoted string can also be optionally be made to escape
740  /// non-printing characters. The string that's returned is
741  /// UT_String::ALWAYS_DEEP.
742  UT_String makeSmartQuotedString(char default_delimiter='\'',
743  bool escape_nonprinting=false) const;
744 
745  /// Expands standard control sequences ('\\n', '\\r', '\\t', '\\0') to their
746  /// corresponding ASCII values (10, 13, 9, 0, respectively).
747  /// If the expand_extended flag is enabled, an extended expansion is enabled
748  /// which adds hexadecimal, decimal and Unicode control sequence expansion.
749  /// Any values resulting from that expansion, which are outside the standard
750  /// ASCII range, will be encoded as UTF8-encoded control points.
751  void expandControlSequences(bool expand_extended = false);
752 
753  bool hasWhiteSpace() const;
754 
755  void removeTrailingSpace();
756  void removeTrailingChars(char chr);
757 
758  void removeTrailingDigits();
759 
760  /// Parse string into array of arguments similar to csh.
761  ///
762  /// cshParse() does not need to harden the string. It does very robust
763  /// parsing in the style of csh. It actually does better parsing than
764  /// csh. Variable expansion & backquote expansion are done in the
765  /// correct order for the correct arguments. One caveat is that the
766  /// string cannot have \0377 (0xff) as a character in it.
767  ///
768  /// If there is an error in parsing, the error flag (if passed in) will be
769  /// set to:
770  /// 0 = no error
771  /// 1 = line too long
772  ///
773  /// To reconstruct the command line, use UT_Args::fillCommandLine().
774  ///
775  /// @{
776  int cshParse(char *argv[], int max_args,
777  void (*vlookup)(const char *, UT_String&)=UTvarLookup,
778  void (*elookup)(const char *, UT_String&)=UTexprLookup,
779  int *error = 0,
780  UT_StringCshIO *io=0);
781  int cshParse(UT_WorkArgs &argv,
782  void (*vlookup)(const char *, UT_String&)=UTvarLookup,
783  void (*elookup)(const char *, UT_String&)=UTexprLookup,
784  int *error = 0,
785  UT_StringCshIO *io=0);
786  /// @}
787 
788  /// dosParse() uses the semi-braindead approach of ms-dos to argument
789  /// parsing. That is, arguments are separated by a double quote or space
790  /// (being a space or a tab). If 'preserve_backslashes' is set to
791  /// false (the default), back-slashes are passed through verbatim, unless
792  /// the following character is a double quote. Likewise, any pairs of
793  /// back-slashes preceding a double quote are turned into single
794  /// back-slashes.
795  ///
796  /// See also UTUTbuildDOSCommandLine() for reconstructing from arguments.
797  ///
798  /// @{
799  int dosParse(UT_WorkArgs &argv, bool preserve_backslashes=false);
800  int dosParse(char *argv[], int max_args,
801  bool preserve_backslashes=false);
802  /// Perform dos parsing modifying the buffer passed in. The args will be
803  /// stored as raw pointers into the given buffer
804  static int dosParse(char *buffer, UT_WorkArgs &args,
805  bool preserve_backslashes);
806  /// @}
807 
808  // parse will insert nulls into the string.
809  // NB: The argv array is null terminated, thus the effective
810  // maximum number of arguments is one less than maxArgs.
811  // NB: The maxArgs variants are all deprecated, use UT_WorkArgs
812  // instead.
813  int parse(char *argv[], int max_args,
814  const char *quotes = "\"'", bool keep_quotes = false)
815  {
816  harden();
817  return parseInPlace(argv, max_args, quotes, keep_quotes);
818  }
819  int parse(UT_WorkArgs &argv, int start_arg = 0,
820  const char *quotes = "\"'", bool keep_quotes = false)
821  {
822  harden();
823  return parseInPlace(argv, start_arg, quotes, keep_quotes);
824  }
825  int parse(UT_StringArray &argv, int start_arg = 0,
826  const char *quotes = "\"'", bool keep_quotes = false)
827  {
828  harden();
829  return parseInPlace(argv, start_arg, quotes, keep_quotes);
830  }
831  // Warning: the following methods insert nulls into the string without
832  // hardening.
833  int parseInPlace(char *argv[], int max_args,
834  const char *quotes = "\"'", bool keep_quotes = false);
835  int parseInPlace(UT_WorkArgs &argv, int start_arg = 0,
836  const char *quotes = "\"'", bool keep_quotes = false);
837  int parseInPlace(UT_StringArray &argv, int start_arg = 0,
838  const char *quotes = "\"'", bool keep_quotes = false);
839 
840  // Splits the string at specific separator characters. Unlike the parse
841  // methods, the tokenize methods ignore quoting completely.
842  int tokenize(char *argv[], int max_args, char separator)
843  {
844  harden();
845  return tokenizeInPlace(argv, max_args, separator);
846  }
847  int tokenizeInPlace(char *argv[], int max_args, char separator);
848  int tokenize(UT_WorkArgs &argv, char separator)
849  {
850  harden();
851  return tokenizeInPlace(argv, separator);
852  }
853  int tokenizeInPlace(UT_WorkArgs &argv, char separator);
854  int tokenize(char *argv[], int max_args,
855  const char *separators = " \t\n")
856  {
857  harden();
858  return tokenizeInPlace(argv, max_args, separators);
859  }
860  int tokenizeInPlace(char *argv[], int max_args,
861  const char *separators = " \t\n");
862  int tokenize(UT_WorkArgs &argv, const char *separators = " \t\n")
863  {
864  harden();
865  return tokenizeInPlace(argv, separators);
866  }
867  int tokenizeInPlace(UT_WorkArgs &argv,
868  const char *separators = " \t\n");
869 
870  template<typename T>
871  int tokenize(T &list, const char *separators = " \t\n")
872  {
873  harden();
874  return tokenizeInPlace(list, separators);
875  }
876 
877  template<typename T>
878  int tokenizeInPlace(T &list,
879  const char *separators = " \t\n")
880  {
881  char *token;
882  char *context;
883 
884  if (!isstring())
885  return 0;
886  if (!(token = SYSstrtok(myData, separators, &context)))
887  return 0;
888 
889  list.append(token);
890 
891  while ((token = SYSstrtok(0, separators, &context)) != NULL)
892  list.append(token);
893 
894  return list.entries();
895  }
896 
897 
898  // Replaces the contents with variables expanded.
899  void expandVariables();
900 
901  // Functions to hash a string
903  {
904  return hash(myData);
905  }
906 
907  // The code can be used for rudimentary hash chaining, but it is NOT
908  // the case that hash("def", hash("abc")) == hash("abcdef"), so there
909  // is little reason to use this rather than normal hash combiners.
910  static SYS_FORCE_INLINE uint32 hash(const char *str, uint32 code = 0)
911  {
912  return SYSstring_hashseed(
913  str, SYS_EXINT_MAX, code, /*allow_nulls*/ false);
914  }
915 
916  // This does pattern matching on a string. The pattern may include
917  // the following syntax:
918  // ? = match a single character
919  // * = match any number of characters
920  // [char_set] = matches any character in the set
921  bool match(const char *pattern, bool case_sensitive = true) const;
922 
923  // Similar to match() except it assumes that we're dealing with file paths
924  // so that it determines whether to do a case-sensitive match depending on
925  // the platform.
926  bool matchFile(const char *pattern) const;
927 
928  // Similar to match() but uses rsync style matching:
929  // * = match any number of characters up to a slash
930  // ** = match any number of characters, including a slash
931  bool matchPath(const char *pattern, bool case_sensitive = true,
932  bool *excludes_branch = nullptr) const;
933 
934  // multiMatch will actually check multiple patterns all separated
935  // by the separator character: i.e. geo1,geo2,foot*
936  //
937  // NOTE: No pattern or may contain the separator
938  bool multiMatch(const char *pattern,
939  bool case_sensitive, char separator) const;
940  bool multiMatch(const char *pattern, bool case_sensitive = true,
941  const char *separators = ", ",
942  bool *explicitly_excluded = 0,
943  int *match_index = 0,
944  ut_PatternRecord *pattern_record=NULL) const;
945  bool multiMatch(const UT_StringMMPattern &pattern,
946  bool *explicitly_excluded = 0,
947  int *match_index = 0,
948  ut_PatternRecord *pattern_record=NULL) const;
949 
950  // this method matches a pattern while recording any wildcard
951  // patterns used.
952  bool multiMatchRecord(const char *pattern, int maxpatterns,
953  char *singles, int &nsingles,
954  char **words, int &nwords,
955  bool case_sensitive = true,
956  const char *separators = ", ") const;
957  bool multiMatchRecord(const UT_StringMMPattern &pattern,
958  int maxpatterns,
959  char *singles, int &nsingles,
960  char **words, int &nwords) const;
961  bool multiMatchRecord(const char *pattern,
962  UT_StringHolder &singles,
963  UT_StringArray &words,
964  bool case_sensitive = true,
965  const char *separators = ", ") const;
966 
967  /// matchPattern(UT_WorkArgs &) assumes that the arguments contain the
968  /// components of a pattern to be matched against. The method returns
969  /// true if the pattern matches, false if it doesn't. This matching
970  /// process handles ^ expansion properly (and efficiently).
971  /// If the string doesn't match any components of the pattern, then the
972  /// assumed value is returned.
973  bool matchPattern(const UT_WorkArgs &pattern_args,
974  bool assume_match=false) const;
975 
976  static bool multiMatchCheck(const char *pattern);
977  static bool wildcardMatchCheck(const char *pattern);
978 
979  // Same as match but equivalent to "*pattern*"
980  bool contains(const char *pattern, bool case_sensitive=true) const;
981 
982  // Returns true if our string starts with the specified prefix.
983  bool startsWith(const UT_StringView &prefix,
984  bool case_sensitive = true) const;
985 
986  // Returns true if our string ends with the specified suffix.
987  bool endsWith(const UT_StringView &suffix,
988  bool case_sensitive = true) const;
989 
990  /// Pluralize an English noun ending (i.e. box->boxes or tube->tubes). The
991  /// ending must be lower case to be processed properly.
992  void pluralize();
993 
994  // Will parse strings like 1-10:2,3 and call func for every element
995  // implied. It will stop when the func returns 0 or the parsing
996  // is complete, in which case it returns 1.
997  // Parsing also allows secondary elements to be specified eg 3.4 0.12
998  // The secfunc is used to find the maximum index of secondary elements
999  // for each compound num. The elements are assumed to be
1000  // non-negative integers.
1001  int traversePattern(int max, void *data,
1002  int (*func)(int num, int sec, void *data),
1003  unsigned int (*secfunc)(int num,void *data)=0,
1004  int offset=0) const;
1005 
1006  // Fast containment, assumes no special characters
1007  const char *fcontain(const char *pattern, bool case_sensitive=true) const
1008  {
1009  if (!myData) return NULL;
1010  return case_sensitive ? strstr(myData, pattern)
1011  : SYSstrcasestr(myData, pattern);
1012  }
1013 
1014  // Given the match pattern which fits our contents, any assigned wildcards
1015  // are subsitituted. The wildcards may also be indexed.
1016  // Returns true if rename was successful.
1017  //
1018  // @note This code was adapted from CHOP_Rename::subPatterns() and
1019  // works the same way.
1020  //
1021  // eg. this = apple, match = a*le, replace = b* ---> bpp
1022  // this = a_to_b, match = *_to_*, replace = *(1)_to_*(0) ---> b_to_a
1023  bool patternRename(const char *match_pattern, const char *replace);
1024 
1025  // Given the name rule according to which a name consists of a base name
1026  // (char sequence ending in a non-digit) and a numerical suffix, the
1027  // following two methods return the base and the suffix respectively.
1028  // base() needs a string buffer and will return a const char* pointing to it.
1029  // base() always returns a non-zero pointer,
1030  // while suffix() returns 0 if no suffix is found.
1031  const char *base(UT_String &buf) const;
1032  const char *suffix() const;
1033 
1034  // incrementNumberedName will increment a name. If it has a numerical
1035  // suffix, that suffix is incremented. If not, "2" is appended to the
1036  // name. The preserve_padding parameter can be set to true so that zero
1037  // padding is preserved. Incrementing foo0009 will produce foo10 with
1038  // this parameter set to false, or foo0010 if it is set to true.
1039  void incrementNumberedName(bool preserve_padding = false);
1040 
1041  // setFormat is used to set how an outstream formats its ascii output.
1042  // So you can use printf style formatting. eg:
1043  // UT_String::setFormat(cout, "%08d") << 100;
1044  //
1045  // Note: Don't do:
1046  // cout << UT_String::setFormat(cout, "%08d") << 100;
1047  // ^^^^
1048  // Also: The formating changes (except for field width) are permanent,
1049  // so you'll have to reset them manually.
1050  //
1051  // TODO: A resetFormat, and a push/pop format pair.
1052  static std::ostream &setFormat(std::ostream &os, const char *fmt);
1053  std::ostream &setFormat(std::ostream &os);
1054 
1055  int replacePrefix(const char *oldpref,
1056  const char *newpref);
1057  int replaceSuffix(const char *oldsuffix,
1058  const char *newsuffix);
1059 
1060  // expandArrays will expand a series of tokens of the
1061  // form prefix[pattern]suffix into the names array
1062  //
1063  // Note: Each names[i] must be free'd after use
1064  // and label is used on the non-const parse method
1065  // NB: The max variants are all deprecated, use UT_WorkArgs
1066  // instead.
1067  int expandArrays(char *names[], int max);
1068 
1069  // This routine will ensure no line is over the specified
1070  // number of columns. Offending lines will be wrapped at
1071  // the first spaceChar or cut at exactly cols if spaceChar
1072  // is not found.
1073  // It returns one if any changes were done.
1074  // It currently treats tabs as single characters which should be
1075  // changed.
1076  // It will break words at hyphens if possible.
1077  int format(int cols);
1078 
1079  /// Replaces up to 'count' occurrences of 'find' with 'replacement',
1080  /// and returns the number of substitutions that occurred.
1081  /// If 'count' <= 0, all occurrences will be replaced.
1082  int substitute( const char *find, const char *replacement,
1083  int count = -1);
1084  /// Convenience version of substitute() for all or single occurrence.
1085  SYS_DEPRECATED_REPLACE(19.5, "Use 'count' variant")
1086  int substitute( const char *find, const char *replacement,
1087  bool all )
1088  { return substitute(find, replacement, !all ? 1 : -1); }
1089 
1090  // This function replaces the character found with another character.
1091  int substitute( char find, char replacement, bool all = true );
1092 
1093  // this function removes the substring at pos and len, and inserts str
1094  // at pos. it returns the difference (new_length - old_length)
1095  int replace( int pos, int len, const char *str );
1096 
1097  // remove the first len characters of this string
1098  int eraseHead(int len)
1099  { return replace(0, len, ""); }
1100 
1101  // remove the last len characters of this string
1102  int eraseTail(int len)
1103  { return replace(length() - len, len, ""); }
1104 
1105  // remove the substring start at pos for len characters
1106  int erase(int pos = 0, int len = -1)
1107  {
1108  if (len < 0)
1109  len = length() - pos;
1110  return replace(pos, len, "");
1111  }
1112 
1113  // insert the given string at pos into this string
1114  int insert(int pos, const char *str)
1115  { return replace(pos, 0, str); }
1116 
1117  // Does a "smart" string compare which will sort based on numbered names.
1118  // That is "text20" is bigger than "text3". In a strictly alphanumeric
1119  // comparison, this would not be the case. Zero is only returned if both
1120  // strings are identical.
1121  static int compareNumberedString(const char *s1,
1122  const char *s2,
1123  bool case_sensitive=true,
1124  bool allow_negatives=false);
1125  static int qsortCmpNumberedString(const char *const*v1,
1126  const char *const*v2);
1127 
1128  // Like compare numbered strings, but it sorts better when there are
1129  // .ext extensions (i.e. it handles '.' as a special case)
1130  static int compareNumberedFilename(const char *s1,
1131  const char *s2,
1132  bool case_sensitive=false);
1133  static int qsortCmpNumberedFilename(const char *const*v1,
1134  const char *const*v2);
1135 
1136  // Like compare numbered strings, but allows special ordering of certain
1137  // characters that should always come first or last.
1138  static int compareNumberedStringWithExceptions(const char *s1,
1139  const char *s2,
1140  bool case_sensitive=false,
1141  bool allow_negatives=false,
1142  const char *sorted_first=nullptr,
1143  const char *sorted_last=nullptr);
1144 
1145  /// Compare two version strings which have numbered components separated by
1146  /// dots. eg. "X.Y.Z". Assumes the components go from most to least
1147  /// significant in left to right order.
1148  static int compareVersionString(const char *s1, const char *s2);
1149 
1150  /// Given a path, set the value of the string to the program name. For
1151  /// example: @code
1152  /// str.extractProgramName(argv[0]);
1153  /// str.extractProgramName("c:/Path/program.exe");
1154  /// str.extractProgramName("/usr/bin/program");
1155  /// @endcode
1156  /// This will extract the last path component. Program names may also have
1157  /// their extensions stripped. For example ".exe" on Windows and "-bin" to
1158  /// strip the Houdini wrappers on other platforms.
1159  ///
1160  /// @note The path should be normalized to have forward slashes as the path
1161  /// separator.
1162  void extractProgramName(const char *path,
1163  bool strip_extension=true,
1164  bool normalize_path=true);
1165 
1166  /// Given a path, check to see whether the program name matches the
1167  /// expected. For example: @code
1168  /// if (UT_String::matchProgramname(argv[0], "houdini"))
1169  /// if (UT_String::matchProgramname("c:/Path/houdini.exe", "houdini"))
1170  /// if (UT_String::matchProgramname("/usr/bin/houdini", "houdini"))
1171  /// @endcode
1172  /// The matching is always case-insensitive.
1173  ///
1174  /// @note The path should be normalized to have forward slashes as the path
1175  /// separator.
1176  static bool matchProgramName(const char *path, const char *expected,
1177  bool normalize_path=false);
1178 
1179  /// Convert a path to a "normalized" path. That is, all back-slashes will
1180  /// be converted to forward slashes. On some operating systems, this will
1181  /// leave the string unchanged.
1182  void normalizePath();
1183 
1184  // A very fast integer to string converter. This is faster (at least on
1185  // SGI) than using sprintf("%d"). About two to three times as fast. Both
1186  // of these methods return the length of the string generated.
1187  static int itoa(char *str, int64 i);
1188  static int utoa(char *str, uint64 i);
1189 
1190  // Versions of the above functions which set into this string object
1191  void itoa(int64 i);
1192  void utoa(uint64 i);
1193 
1194  // A reader-friendly version of itoa. This places commas appropriately
1195  // to ensure the person can pick out the kilo points easily.
1196  // This can handle numbers up to 999,999,999,999,999,999.
1197  void itoaPretty(int64 val);
1198 
1199  /// Convert the given time delta (in milliseconds)
1200  /// to a reader-friendly string in days, hours, minutes, and seconds.
1201  void timeDeltaToPrettyString(double time_ms);
1202 
1203  /// Convert the given time delta (in milliseconds)
1204  /// to a reader-friendly string in milliseconds.
1205  void timeDeltaToPrettyStringMS(double time_ms);
1206 
1207  // Do an sprintf into this string. This method will allocate exactly the
1208  // number of bytes required for the final string. If the format string is
1209  // bad, isstring() will return false afterwards.
1210  int sprintf(const char *fmt, ...) SYS_PRINTF_CHECK_ATTRIBUTE(2, 3);
1211 
1212  // This will change the string into a valid C style variable name.
1213  // All non-alpha numerics will be converted to _.
1214  // If the first letter is a digit, it is prefixed with an _.
1215  // This returns 0 if no changes occurred, 1 if something had to
1216  // be adjusted.
1217  // Note that this does NOT force the name to be non-zero in length.
1218  // The safechars parameter is a string containing extra characters
1219  // that should be considered safe. These characters are not
1220  // converted to underscores.
1221  int forceValidVariableName(const char *safechars = NULL);
1222  // Returns true if the string matches a C-style varaible name.
1223  // The safechars are not allowed to be the start.
1224  // Matching forceValid, empty strings are considered valid!
1225  bool isValidVariableName(const char *safechars = NULL) const;
1226 
1227  // This will force all non-alphanumeric characters to be underscores.
1228  // Returns true if any changes were required.
1229  bool forceAlphaNumeric();
1230 
1231  // This function will calculate the relative path to get from src to dest.
1232  // If file_path is false, this method assume it is dealing with node paths.
1233  // If file_path is true, it will also deal with Windows drive letters and
1234  // UNC paths.
1235  void getRelativePath(const char *src_fullpath,
1236  const char *dest_fullpath,
1237  bool file_path = false);
1238 
1239  // This function takes two absolute paths and returns the length of the
1240  // longest common path prefix, up to and including the last '/'. This
1241  // means, for instance, that if fullpath1[len1-1] == '/' then all of
1242  // fullpath1 is eligible as a common prefix.
1243  // NB: This function DOES NOT handle NT style drive names! It is currently
1244  // only used for op paths. If you want to add support for this, you
1245  // should add another default parameter to do this.
1246  static int findLongestCommonPathPrefix(const char *fullpath1, int len1,
1247  const char *fullpath2, int len2);
1248 
1249  // This function tests whether we are an absolute path, and returns true or
1250  // false depending on whether we are.
1251  bool isAbsolutePath(bool file_path=false) const;
1252 
1253  // This function assumes that we are an absolute path and will remove all
1254  // un-necessary components from it as long as we remain an absolute path.
1255  // We return false if an error was encountered, in which case the results
1256  // are unpredictable.
1257  bool collapseAbsolutePath(bool file_path=false);
1258 
1259  // This function will make sure that the string is at most max_length
1260  // characters long. If the string is longer than that, it will
1261  // replace the middle of the string by "...". Returns true if the string
1262  // has changed and false otherwise. max_length must be greater than 3.
1263  bool truncateMiddle(int max_length);
1264 
1265  // This function is an abomination when you can just write:
1266  // UT_String foo("");
1267  // ...
1268  // if (foo.isstring())
1269  // ...
1270  // Avoid using it and do not write functions that return "const UT_String&"
1271  static const UT_String &getEmptyString();
1272 
1273  /// Count the number of valid characters in the : modifier for variable
1274  /// expansion. For example, the string ":r" will return 2, the string
1275  /// ":r:t" will return 4, the string ":z" will return 0. These use the csh
1276  /// expansion modifiers.
1277  ///
1278  /// If the string doesn't start with a ':', the method will return 0.
1279  static int countCshModifiers(const char *src);
1280 
1281  /// Applies a "csh" style modifier string to this string. For example, a
1282  /// modifier string of ":e" would replace the string with the file
1283  /// extension of the string.
1284  ///
1285  /// Returns true if any modifications were performed
1286  bool applyCshModifiers(const char *modifiers);
1287 
1288 
1289  /// This will remove the range from a string of the form foo$Fbar.ext (#-#)
1290  /// and return the first number from the range. If there is only 1 range
1291  /// number, it will be returned. If there is no range, 0 is returned.
1292  /// The returned string is hardened.
1293  UT_String removeRange ();
1294 
1295  /// This will format a value to represent a given size in bytes, kilobytes,
1296  /// megabytes, etc.
1297  void formatByteSize(exint size, int digits=2);
1298 
1299  // UTF-8 helpers
1300 
1301  /// Returns the number of Unicode codepoints in the string, assuming it's
1302  /// encoded as UTF-8.
1303  int getCodePointCount() const;
1304 
1305  /// Returns a list of Unicode code points from this string.
1306  void getAsCodePoints(UT_Int32Array &cp_list) const;
1307 
1308  /// Friend specialization of std::swap() to use UT_String::swap()
1309  /// @internal This is needed because standard std::swap() implementations
1310  /// will try to copy the UT_String objects, causing hardened strings to
1311  /// become weak.
1312  friend void swap(UT_String& a, UT_String& b) { a.swap(b); }
1313 
1314  /// expandArrays will expand a series of tokens of the
1315  /// form prefix[pattern]suffix into the names UT_StringArray
1316  /// @param tokens is will store the parsed tokens without expansion
1317  /// @param names is will store the parsed tokens with expansion
1318  /// This doesn't need a max argument like:
1319  /// int expandArrays(char *names[], int max)
1320  int expandArrays(UT_StringArray &tokens, UT_StringArray &names);
1321 
1322 private:
1323  template <typename OSTREAM>
1324  void saveInternal(OSTREAM &os, bool binary) const;
1325 
1326  void freeData();
1327 
1328  /// implements a few csh-style modifiers.
1329  /// @param mod pointer to a string starting with the modifier to apply.
1330  /// so, to apply a global substitute modifier :gs/l/r/
1331  /// mod should be: s/l/r
1332  /// @param all True if all possible modifications should be
1333  /// (recursively) performed.
1334  /// Otherwise, at most one modification is applied.
1335  /// @return whether any modification was performed
1336  bool applyNextModifier(const char *mod, bool all);
1337 
1338 
1339  /// Sets myIsReference to false and copies the other_string into myData,
1340  /// but attempts to avoid unnecessary memory reallocations. Frees up
1341  /// any previous data, if necessary. If other_string is NULL, the call
1342  /// is equivalent to freeData().
1343  void doSmartCopyFrom(const char* other_string);
1344 
1345  static int compareNumberedStringInternal(const char *s1, const char *s2,
1346  bool case_sensitive,
1347  bool allow_negatives,
1348  const char *sorted_first,
1349  const char *sorted_last);
1350 
1351  static SYS_FORCE_INLINE void utStrFree(char *str)
1352  {
1353 #if defined(UT_DEBUG) && !defined(_WIN32)
1354  if (str)
1355  ::memset((void *)str, 0xDD, ::strlen(str) + 1);
1356 #endif
1357  ::free((void *)str);
1358  }
1359 
1360  char *myData;
1361  bool myIsReference:1,
1362  myIsAlwaysDeep:1;
1363 
1364  /// This operator saves the string to the stream via the string's
1365  /// saveAscii() method, protecting any whitespace (by adding quotes),
1366  /// backslashes or quotes in the string.
1367  friend UT_API std::ostream &operator<<(std::ostream &os, const UT_String &d);
1368  friend UT_API UT_OStream &operator<<(UT_OStream &os, const UT_String &d);
1369 
1370  friend class UT_API UT_StringRef;
1371 };
1372 
1373 /// Creates a shallow wrapper around a string for calling UT_String's many
1374 /// const algorithms.
1376 {
1377 public:
1378  // We only have a single constructor which is always shallow.
1380  UT_StringWrap(const char *str)
1381  : UT_String(str)
1382  {}
1383  // It seems necessary on MSVC to forceinline the empty constructor in order
1384  // to have it inlined.
1387  {}
1388 
1389  UT_StringWrap(const UT_StringWrap &) = delete;
1390  UT_StringWrap &operator=(const UT_StringWrap &) = delete;
1391 
1392  // Manually wrap methods that have non-const overloads or return non-const
1393  // pointers.
1394  char operator()(unsigned i) const { return UT_String::operator()(i); }
1395  const char *findChar(int c) const { return UT_String::findChar(c); }
1396  const char *findChar(const char *str) const { return UT_String::findChar(str); }
1397  const char *findNonSpace() const { return UT_String::findNonSpace(); }
1398  const char *lastChar(int c) const { return UT_String::lastChar(c); }
1399 
1400  using UT_String::operator==;
1401  using UT_String::operator!=;
1402  using UT_String::c_str;
1403  using UT_String::length;
1404 
1405  using UT_String::base;
1406  using UT_String::compare;
1407  using UT_String::contains;
1408  using UT_String::count;
1409  using UT_String::countChar;
1410  using UT_String::distance;
1411  using UT_String::endsWith;
1412  using UT_String::equal;
1413  using UT_String::fcontain;
1415  using UT_String::fileName;
1416  using UT_String::findWord;
1417  using UT_String::findString;
1420  using UT_String::isFloat;
1421  using UT_String::isInteger;
1423  using UT_String::isstring;
1424  using UT_String::match;
1425  using UT_String::matchFile;
1427  using UT_String::matchPath;
1429  using UT_String::multiMatch;
1433  using UT_String::save;
1434  using UT_String::saveAscii;
1435  using UT_String::saveBinary;
1436  using UT_String::splitPath;
1437  using UT_String::startsWith;
1438  using UT_String::substr;
1439  using UT_String::suffix;
1440  using UT_String::toFloat;
1441  using UT_String::toInt;
1442 };
1443 
1444 inline
1446  : myIsReference(false)
1447  , myIsAlwaysDeep(true)
1448  , myData(nullptr)
1449 {
1450  *this = str;
1451 }
1452 
1453 inline
1455  : myIsReference(false)
1456  , myIsAlwaysDeep(true)
1457  , myData(nullptr)
1458 {
1459  *this = std::move(str);
1460 }
1461 
1462 inline UT_String &
1464 {
1465  adopt(str);
1466  myIsAlwaysDeep = true; // matches copy constructor behaviour
1467  return *this;
1468 }
1469 
1472 {
1473  if (!myIsReference && myData)
1474  utStrFree(myData);
1475 }
1476 
1478 void
1479 UT_String::freeData()
1480 {
1481  if (myData)
1482  {
1483  if (!myIsReference)
1484  utStrFree(myData);
1485  myData = 0;
1486  }
1487 }
1488 
1489 inline void
1491 {
1492  // We can't use UTswap because it doesn't work with bit fields.
1493  bool temp = myIsReference;
1494  myIsReference = other.myIsReference;
1495  other.myIsReference = temp;
1496 
1497  char *tmp_data = myData;
1498  myData = other.myData;
1499  other.myData = tmp_data;
1500 
1501  if (myIsAlwaysDeep)
1502  harden();
1503 
1504  if (other.myIsAlwaysDeep)
1505  other.harden();
1506 }
1507 
1509 public:
1510  UT_String myOut; // Points to argument following '>'
1511  UT_String myErr; // Points to argument following '>&'
1512  UT_String myIn; // Points to argument following '<'
1513  short myDoubleOut; // If the argument is '>>' or '>>&'
1514  short myDoubleIn; // If the argument is '<<'
1515 };
1516 
1517 UT_API std::ostream & do_setformat(std::ostream &os, const char fmt[]);
1518 
1519 /// Does a "smart" string compare which will sort based on numbered names.
1520 /// That is "text20" is bigger than "text3". In a strictly alphanumeric
1521 /// comparison, this would not be the case.
1523 {
1524  bool operator()(const char *s1, const char *s2) const
1525  {
1526  return UT_String::compareNumberedString(s1, s2) < 0;
1527  }
1528 
1529  bool operator()(const std::string &s1, const std::string &s2) const
1530  {
1531  return operator()(s1.c_str(), s2.c_str());
1532  }
1533 };
1534 
1535 #endif
bool match(const char *pattern, bool case_sensitive=true) const
int tokenize(char *argv[], int max_args, const char *separators=" \t\n")
Definition: UT_String.h:854
UT_String & operator+=(const char *str)
Definition: UT_String.h:342
static SYS_FORCE_INLINE uint32 hash(const char *str, uint32 code=0)
Definition: UT_String.h:910
int distance(const char *str, bool case_sensitive=true, bool allow_subst=true) const
char * lastChar(int c)
Definition: UT_String.h:574
typedef int(APIENTRYP RE_PFNGLXSWAPINTERVALSGIPROC)(int)
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition: glcorearb.h:2540
bool operator!=(const char *str) const
Definition: UT_String.h:427
UT_String & operator+=(const UT_String &str)
Definition: UT_String.h:374
UT_API void normalizePath(UT_String &file_path, bool want_marker=false, bool always_want_expanded_path=false)
bool operator>=(const UT_StringRef &str) const
Definition: UT_String.h:483
int count(const char *str, bool case_sensitive=true) const
Count the occurrences of the string.
bool matchFileExtension(const char *match_extension) const
Definition: UT_String.h:657
void swap(UT_String &other)
Definition: UT_String.h:1490
void saveAscii(UT_OStream &os) const
Definition: UT_String.h:301
bool operator()(const char *s1, const char *s2) const
Definition: UT_String.h:1524
T negative(const T &val)
Return the unary negation of the given value.
Definition: Math.h:128
const char * lastChar(int c) const
Definition: UT_String.h:1398
bool isInteger(bool skip_spaces=false) const
Determine if string can be seen as a single integer number.
that also have some descendant prim *whose name begins with which in turn has a child named baz where *the predicate and *a name There is also one special expression _ which means *the weaker expression when composing expressions together See with
bool operator<=(const char *str) const
Definition: UT_String.h:451
UT_String myIn
Definition: UT_String.h:1512
GLsizei const GLchar *const * string
Definition: glcorearb.h:814
fpreal toFloat() const
bool operator==(const char *str) const
Definition: UT_String.h:415
bool operator<=(const UT_String &str) const
Definition: UT_String.h:455
int toInt() const
char * fileExtension()
Definition: UT_String.h:640
const GLuint GLenum const void * binary
Definition: glcorearb.h:1924
bool isHard() const
Returns whether this string is hardened already.
Definition: UT_String.h:244
GLsizei const GLchar *const * path
Definition: glcorearb.h:3341
SYS_FORCE_INLINE T * SYSconst_cast(const T *foo)
Definition: SYS_Types.h:136
UT_String makeQuotedString(char delimiter='\'', bool escape_nonprinting=false) const
const char * findChar(const char *str) const
Definition: UT_String.h:568
int64 exint
Definition: SYS_Types.h:125
GLboolean GLboolean GLboolean GLboolean a
Definition: glcorearb.h:1222
GLdouble s
Definition: glad.h:3009
void swap(T &lhs, T &rhs)
Definition: pugixml.cpp:7172
void write(unsigned i, char c)
Definition: UT_String.h:527
bool operator==(const UT_String &str) const
Definition: UT_String.h:419
GLuint GLsizei GLsizei * length
Definition: glcorearb.h:795
#define UT_API
Definition: UT_API.h:14
const char * fileExtension() const
Definition: UT_String.h:647
const char * data() const
Definition: UT_String.h:510
bool isAbsolutePath(bool file_path=false) const
bool findString(const char *str, bool fullword, bool usewildcards) const
**But if you need a result
Definition: thread.h:613
char * findChar(int c)
Definition: UT_String.h:558
#define SYS_EXINT_MAX
Definition: SYS_Types.h:181
char & operator()(unsigned i)
Definition: UT_String.h:520
bool equal(const char *str, bool case_sensitive=true) const
Definition: UT_String.h:402
GLfloat GLfloat GLfloat v2
Definition: glcorearb.h:818
const char * findNonSpace() const
Definition: UT_String.h:1397
unsigned long long uint64
Definition: SYS_Types.h:117
int compare(const char *str, bool case_sensitive=true) const
Definition: UT_String.h:382
GLuint buffer
Definition: glcorearb.h:660
void clear()
Reset the string to the default constructor.
Definition: UT_String.h:311
bool isAlwaysDeep() const
Definition: UT_String.h:206
const char * c_str() const
Definition: UT_String.h:508
bool matchPath(const char *pattern, bool case_sensitive=true, bool *excludes_branch=nullptr) const
SIM_API const UT_StringHolder all
unsigned length() const
Return length of string.
Definition: UT_String.h:546
int compare(const UT_String &str, bool case_sensitive=true) const
Definition: UT_String.h:396
< returns > If no error
Definition: snippets.dox:2
const char * suffix() const
bool operator<(const char *str) const
Definition: UT_String.h:439
bool operator<(const UT_StringRef &str) const
Definition: UT_String.h:447
UT_API void UTexprLookup(const char *name, UT_String &result)
bool contains(const char *pattern, bool case_sensitive=true) const
int tokenize(UT_WorkArgs &argv, const char *separators=" \t\n")
Definition: UT_String.h:862
#define SYS_DEPRECATED_REPLACE(__V__, __R__)
std::ostream & operator<<(std::ostream &ostr, const DataType &a)
Definition: DataType.h:133
UT_String(UT_AlwaysDeepType, const std::string &str)
Construct UT_String from a std::string, using ALWAYS_DEEP semantics.
Definition: UT_String.h:153
void hardenIfNeeded(const char *s)
Take shallow copy and make it deep.
Definition: UT_String.h:234
const char * buffer() const
Definition: UT_String.h:509
CompareResults OIIO_API compare(const ImageBuf &A, const ImageBuf &B, float failthresh, float warnthresh, ROI roi={}, int nthreads=0)
A utility class to do read-only operations on a subset of an existing string.
Definition: UT_StringView.h:39
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool isEmpty() const
Returns true if the string is empty.
SYS_FORCE_INLINE uint32 hash() const
Definition: UT_String.h:902
bool operator==(const UT_StringRef &str) const
Definition: UT_String.h:423
GLintptr offset
Definition: glcorearb.h:665
Definition: core.h:760
char operator()(unsigned i) const
Definition: UT_String.h:1394
int tokenize(char *argv[], int max_args, char separator)
Definition: UT_String.h:842
bool operator>=(const char *str) const
Definition: UT_String.h:475
UT_String & operator=(UT_String &&str)
Definition: UT_String.h:177
SYS_NO_DISCARD_RESULT UT_StringView UTstringFileName(const StringT &str)
int tokenizeInPlace(T &list, const char *separators=" \t\n")
Definition: UT_String.h:878
OIIO_FORCEINLINE const vint4 & operator+=(vint4 &a, const vint4 &b)
Definition: simd.h:4369
#define SYS_SAFE_BOOL
Definition: SYS_Compiler.h:55
bool operator!=(const UT_String &str) const
Definition: UT_String.h:431
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:155
bool operator>=(const UT_String &str) const
Definition: UT_String.h:479
#define SYS_PRINTF_CHECK_ATTRIBUTE(string_index, first_to_check)
Definition: SYS_Types.h:447
char * findNonSpace()
std::string OIIO_UTIL_API replace(string_view str, string_view pattern, string_view replacement, bool global=false)
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
GLint GLint GLsizei GLint GLenum format
Definition: glcorearb.h:108
bool operator>(const UT_String &str) const
Definition: UT_String.h:467
char * SYSstrtok(char *string, const char *delimit, char **context)
Definition: SYS_String.h:106
char * findChar(const char *str)
Definition: UT_String.h:566
#define UT_ASSERT_SLOW(ZZ)
Definition: UT_Assert.h:154
const char * findChar(int c) const
Definition: UT_String.h:560
SYS_FORCE_INLINE UT_String(const char *str=0)
Construct UT_String from a C string, using shallow semantics.
Definition: UT_String.h:85
void harden()
Take shallow copy and make it deep.
Definition: UT_String.h:215
void saveAscii(std::ostream &os) const
Definition: UT_String.h:300
bool equal(const UT_StringRef &str, bool case_sensitive=true) const
Definition: UT_String.h:410
UT_String(UT_String &&str) noexcept
Definition: UT_String.h:169
long long int64
Definition: SYS_Types.h:116
bool equal(const UT_String &str, bool case_sensitive=true) const
Definition: UT_String.h:406
void setAlwaysDeep(bool deep)
Make a string always deep.
Definition: UT_String.h:190
bool operator>(const UT_StringRef &str) const
Definition: UT_String.h:471
const char * findChar(const char *str) const
Definition: UT_String.h:1396
bool matchFile(const char *pattern) const
bool operator()(const std::string &s1, const std::string &s2) const
Definition: UT_String.h:1529
GLuint const GLchar * name
Definition: glcorearb.h:786
int eraseHead(int len)
Definition: UT_String.h:1098
GLushort pattern
Definition: glad.h:2583
void toUpper()
Definition: UT_String.h:612
void adopt(UT_String &str)
Definition: UT_String.h:287
GLboolean GLboolean GLboolean b
Definition: glcorearb.h:1222
SYS_FORCE_INLINE ~UT_StringWrap()
Definition: UT_String.h:1386
const char * findWord(const char *word) const
bool operator>(const char *str) const
Definition: UT_String.h:463
int64 getMemoryUsage(bool inclusive=true) const
Return memory usage in bytes.
Definition: UT_String.h:550
void saveBinary(std::ostream &os) const
Save string to binary stream.
Definition: UT_String.h:296
bool isFloat(bool skip_spaces=false, bool loose=false, bool allow_underscore=false) const
Determine if string can be seen as a single floating point number.
bool isValidVariableName(const char *safechars=NULL) const
static int compareNumberedString(const char *s1, const char *s2, bool case_sensitive=true, bool allow_negatives=false)
short myDoubleIn
Definition: UT_String.h:1514
void adopt(char *s)
Definition: UT_String.h:277
GLsizeiptr size
Definition: glcorearb.h:664
UT_String pathUpToExtension() const
__hostdev__ bool isInteger(GridType gridType)
Return true if the GridType maps to a POD integer type.
Definition: NanoVDB.h:820
SYS_NO_DISCARD_RESULT UT_StringView UTstringFileExtension(const StringT &str)
GLenum func
Definition: glcorearb.h:783
int substr(UT_String &buf, int index, int len=0) const
SYS_NO_DISCARD_RESULT bool UTstringMatchFileExtension(const StringT &str, const char *extension)
void save(std::ostream &os, bool binary) const
Save string to stream. Saves as binary if binary is true.
short myDoubleOut
Definition: UT_String.h:1513
fpreal64 fpreal
Definition: SYS_Types.h:277
int parse(UT_StringArray &argv, int start_arg=0, const char *quotes="\"'", bool keep_quotes=false)
Definition: UT_String.h:825
bool multiMatch(const char *pattern, bool case_sensitive, char separator) const
LeafData & operator=(const LeafData &)=delete
char * steal()
Definition: UT_String.h:255
char * SYSstrcasestr(const char *haystack, const char *needle)
Replacement for strcasestr, since no equivalent exists on Win32.
Definition: SYS_String.h:290
GLuint index
Definition: glcorearb.h:786
bool multiMatchRecord(const char *pattern, int maxpatterns, char *singles, int &nsingles, char **words, int &nwords, bool case_sensitive=true, const char *separators=", ") const
int parseNumberedFilename(UT_String &prefix, UT_String &frame, UT_String &suff, bool negative=true, bool fractional=false) const
UT_AlwaysDeepType
Definition: UT_String.h:79
GLfloat GLfloat v1
Definition: glcorearb.h:817
auto ptr(T p) -> const void *
Definition: format.h:2448
GLuint GLfloat * val
Definition: glcorearb.h:1608
ImageBuf OIIO_API max(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
**If you just want to fire and args
Definition: thread.h:609
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const_iterator begin() const
Returns a constant iterator pointing to the beginning of the string.
unsigned int uint32
Definition: SYS_Types.h:40
const char * lastChar(int c) const
Definition: UT_String.h:576
UT_String myOut
Definition: UT_String.h:1510
UT_String myErr
Definition: UT_String.h:1511
bool isstring() const
Definition: UT_String.h:691
int findLongestCommonSuffix(const char *with) const
void hardenIfNeeded()
Take shallow copy and make it deep.
Definition: UT_String.h:224
const char * findChar(int c) const
Definition: UT_String.h:1395
int parse(char *argv[], int max_args, const char *quotes="\"'", bool keep_quotes=false)
Definition: UT_String.h:813
UT_String(UT_AlwaysDeepType, const char *str=0)
Construct UT_String from a C string, using ALWAYS_DEEP semantics.
Definition: UT_String.h:146
bool operator<(const UT_String &str) const
Definition: UT_String.h:443
int erase(int pos=0, int len=-1)
Definition: UT_String.h:1106
int tokenize(UT_WorkArgs &argv, char separator)
Definition: UT_String.h:848
auto sprintf(const S &fmt, const T &...args) -> std::basic_string< Char >
Definition: printf.h:574
string_view OIIO_UTIL_API strip(string_view str, string_view chars=string_view())
SIM_API const UT_StringHolder distance
bool operator<=(const UT_StringRef &str) const
Definition: UT_String.h:459
bool startsWith(const UT_StringView &prefix, bool case_sensitive=true) const
void splitPath(UT_String &dir_name, UT_String &file_name) const
char operator()(unsigned i) const
Definition: UT_String.h:513
bool OIIO_UTIL_API contains(string_view a, string_view b)
Does 'a' contain the string 'b' within it?
int parse(UT_WorkArgs &argv, int start_arg=0, const char *quotes="\"'", bool keep_quotes=false)
Definition: UT_String.h:819
const char * base(UT_String &buf) const
void removeLast()
Remove the last character.
Definition: UT_String.h:327
UT_API void UTvarLookup(const char *name, UT_String &result)
SYS_FORCE_INLINE UT_StringWrap(const char *str)
Definition: UT_String.h:1380
bool endsWith(const UT_StringView &suffix, bool case_sensitive=true) const
UT_String(const std::string &str)
Construct UT_String from a std::string, always doing a deep copy. The result will only be a UT_Always...
Definition: UT_String.h:120
int eraseTail(int len)
Definition: UT_String.h:1102
const char * fileName() const
Definition: UT_String.h:633
OIIO_UTIL_API std::string extension(string_view filepath, bool include_dot=true) noexcept
GLint GLsizei count
Definition: glcorearb.h:405
Definition: format.h:895
int countChar(int c) const
Return the number of occurrences of the specified character.
UT_API std::ostream & do_setformat(std::ostream &os, const char fmt[])
int tokenize(T &list, const char *separators=" \t\n")
Definition: UT_String.h:871
FMT_CONSTEXPR auto find(Ptr first, Ptr last, T value, Ptr &out) -> bool
Definition: core.h:2089
const char * nonNullBuffer() const
Definition: UT_String.h:511
void toLower()
Definition: UT_String.h:619
GLenum src
Definition: glcorearb.h:1793
int insert(int pos, const char *str)
Definition: UT_String.h:1114
const char * fcontain(const char *pattern, bool case_sensitive=true) const
Definition: UT_String.h:1007
bool operator!=(const UT_StringRef &str) const
Definition: UT_String.h:435