HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_String.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  *
7  * NAME: Utility Library (C++)
8  *
9  * COMMENTS: String class
10  *
11  */
12 
13 #ifndef __UT_String_h__
14 #define __UT_String_h__
15 
16 #include "UT_API.h"
17 
18 #include "UT_Assert.h"
19 #include "UT_VectorTypes.h"
20 #include "UT_StringView.h"
21 #include "UT_StringUtils.h"
22 
23 #include <SYS/SYS_Compiler.h>
24 #include <SYS/SYS_Deprecated.h>
25 #include <SYS/SYS_Inline.h>
26 #include <SYS/SYS_String.h>
27 #include <SYS/SYS_Types.h>
28 
29 #include <iosfwd>
30 #include <string>
31 #include <utility>
32 
33 #include <ctype.h>
34 #include <stdlib.h>
35 #include <string.h>
36 
37 #ifdef WIN32
38  #define strcasecmp stricmp
39  #define strncasecmp strnicmp
40 #endif
41 
42 class UT_OStream;
43 class UT_String;
44 class UT_StringCshIO;
45 class UT_WorkArgs;
46 class UT_IStream;
47 class ut_PatternRecord;
48 class UT_StringMMPattern;
49 class UT_StringArray;
50 class UT_StringHolder;
51 class UT_StringRef;
52 
53 // The following lookup functions are used by cshParse. By default,
54 // varLookup simply uses getenv, exprLookup opens the command as
55 // a pipe and uses the result.
56 UT_API extern void UTvarLookup(const char *name, UT_String &result);
57 UT_API extern void UTexprLookup(const char *name, UT_String &result);
58 
59 /// @file
60 /// @class UT_String
61 ///
62 /// UT_String is a string class that support two different types of assignment
63 /// semantics:
64 /// - Shallow (default): Just reference the given string and NOT take
65 /// ownership.
66 /// - Deep: Make a copy of the given string, taking ownership in the
67 /// process (aka it making it "hard").
68 ///
69 /// If UT_String::harden() is called, or any other UT_String method that
70 /// requires modifying the string, it will make a copy of its reference pointer
71 /// (and take ownership) first.
72 ///
74 {
75 public:
76 
77  /// UT_String can be constructed with UT_String::ALWAYS_DEEP to create an
78  /// object that will always perform deep copies when assigned to.
79  enum UT_AlwaysDeepType { ALWAYS_DEEP };
80 
81  /// @brief Construct UT_String from a C string, using shallow semantics
82  ///
83  /// @param str The initial string.
85  UT_String(const char *str = 0)
86  : myData(SYSconst_cast(str))
87  , myIsReference(true)
88  , myIsAlwaysDeep(false)
89  {}
90 
91  /// @brief Construct UT_String from a C string, using shallow semantics
92  ///
93  /// @param str The initial string.
94  /// @param deep_copy If true, a copy of @em str will be used.
95  /// @param len Number of characters to use from @em str. Use -1 to
96  /// use the entire string. If len is non-negative, then
97  /// deepCopy will be implicitly set to true. If str is NULL
98  /// and len is non-negative, then it will be initialized
99  /// with "".
100  UT_String(const char *str, bool deep_copy, int len = -1);
101 
102  /// @brief Construct UT_String from a std::string, always doing
103  /// a deep copy. The result will only be a UT_AlwaysDeep if the
104  /// appropriate version is used, however!
105  ///
106  /// NOTE: You cannot do:
107  /// UT_String foo;
108  /// std::string bar = "hello world";
109  /// foo = UT_String(bar.substr(2, 5));
110  ///
111  /// It provides an shortcut for constructing a UT_String from a function
112  /// that returns a std::string by value. For example, it lets you write
113  /// @code
114  /// UT_String str(func());
115  /// @endcode
116  /// instead of
117  /// @code
118  /// UT_String str(func().c_str(), /*harden=*/true);
119  /// @endcode
120  explicit UT_String(const std::string &str)
121  : myIsReference(false),
122  myIsAlwaysDeep(false)
123  { myData = strdup(str.c_str()); }
124 
125  /// @brief Construct UT_String from a UT_StringHolder.
126  /// This always duplicates and uses ALWAYS_DEEP semantics.
127  explicit UT_String(const UT_StringHolder &str);
128 
129  /// @brief Construct UT_String from a UT_StringHolder rvalue with
130  /// ALWAYS_DEEP semantics.
131  explicit UT_String(UT_StringHolder &&str);
132 
133 private:
134  /// This is intentionally not implemented - callers should choose between
135  /// the const char * and UT_StringHolder constructors, depending on whether
136  /// they want to make a deep copy.
137  /// @see UT_StringWrap.
138  UT_String(const UT_StringRef &);
139 
140 public:
141  /// @brief Construct UT_String from a UT_StringView.
142  /// This always duplicates and uses ALWAYS_DEEP semantics.
143  explicit UT_String(const UT_StringView &sv);
144 
145  /// @brief Construct UT_String from a C string, using ALWAYS_DEEP semantics
146  UT_String(UT_AlwaysDeepType, const char *str = 0)
147  : myIsReference(false),
148  myIsAlwaysDeep(true)
149  { myData = str ? strdup(str) : 0; }
150 
151  /// @brief Construct UT_String from a std::string, using ALWAYS_DEEP
152  /// semantics
154  : myIsReference(false),
155  myIsAlwaysDeep(true)
156  { myData = strdup(str.c_str()); }
157 
158  /// Copy constructor
159  ///
160  /// If the string we're copying from is ALWAYS_DEEP, then this object will
161  /// also become ALWAYS_DEEP. This way, you can pass/return a string by
162  /// value.
163  UT_String(const UT_String &str);
164 
165  ~UT_String();
166 
167  /// Move operators
168  /// @{
169  UT_String(UT_String &&str) noexcept
170  : myData(str.myData)
171  , myIsReference(str.myIsReference)
172  , myIsAlwaysDeep(str.myIsAlwaysDeep)
173  {
174  str.myData = nullptr;
175  str.myIsReference = !str.myIsAlwaysDeep;
176  }
178  {
179  freeData();
180  myData = str.myData;
181  myIsReference = str.myIsReference;
182  myIsAlwaysDeep = str.myIsAlwaysDeep;
183  str.myData = nullptr;
184  str.myIsReference = !str.myIsAlwaysDeep;
185  return *this;
186  }
187  /// @}
188 
189  /// Make a string always deep
190  void setAlwaysDeep(bool deep)
191  {
192  myIsAlwaysDeep = deep;
193  if (deep && myIsReference)
194  {
195  if (myData != NULL)
196  harden();
197  else
198  {
199  // This takes the same semantic as
200  // str = NULL;
201  // where str is an always deep string
202  myIsReference = false;
203  }
204  }
205  }
206  bool isAlwaysDeep() const
207  {
208  return myIsAlwaysDeep;
209  }
210 
211  void swap( UT_String &other );
212 
213  /// Take shallow copy and make it deep.
214  // @{
215  void harden()
216  {
217  if (!myIsReference && myData)
218  return;
219  myData = strdup(myData ? myData : "");
220  myIsReference = false;
221  }
222 
223  void harden(const char *s, int len = -1);
225  {
226  if (myIsReference)
227  {
228  if (isstring())
229  harden();
230  else
231  *this = "";
232  }
233  }
234  void hardenIfNeeded(const char *s)
235  {
236  if (s && *s)
237  harden(s);
238  else
239  *this = "";
240  }
241  // @}
242 
243  /// Returns whether this string is hardened already.
244  bool isHard() const { return !myIsReference; }
245 
246  /// Give up ownership of string
247  ///
248  /// Take a hard reference and make it shallow. This method makes sure
249  /// it gives back something you can delete, because this UT_String is
250  /// taking its hands off the data. Use it with care since it may lead
251  /// to memory leaks if, for example, you harden it again later.
252  ///
253  /// In the case of ALWAYS_DEEP strings, this is disallowed so it will
254  /// just return a copy of the data.
255  char * steal()
256  {
257  if (!myIsAlwaysDeep)
258  {
259  if (myIsReference)
260  myData = strdup(myData ? myData : ""); // harden
261  myIsReference = true; // but say it's soft
262  return myData;
263  }
264  else
265  {
266  // return a new copy of the data without releasing
267  // ownership for always deep strings
268  return strdup(myData ? myData : "");
269  }
270  }
271 
272  /// Take ownership of given string
273  ///
274  /// adopt() is the opposite of steal(). Basically, you're giving
275  /// the UT_String ownership of the string.
276  // @{
277  void adopt(char *s)
278  {
279  if (!myIsReference)
280  {
281  if (s != myData)
282  free(myData);
283  }
284  myData = s;
285  myIsReference = false;
286  }
287  void adopt(UT_String &str)
288  {
289  adopt(str.steal());
290  }
291  void adopt(UT_StringHolder &holder);
292 
293  // @}
294 
295  /// Save string to binary stream.
296  void saveBinary(std::ostream &os) const { save(os, true); }
297 
298  /// Save string to ASCII stream. This will add double quotes and escape to
299  /// the stream if necessary (empty string or contains spaces).
300  void saveAscii(std::ostream &os) const { save(os, false); }
301  void saveAscii(UT_OStream &os) const { save(os, false); }
302 
303  /// Save string to stream. Saves as binary if @em binary is true.
304  void save(std::ostream &os, bool binary) const;
305  void save(UT_OStream &os, bool binary) const;
306 
307  /// Load string from stream. Use is.eof() to check eof status
308  bool load(UT_IStream &is);
309 
310  /// Reset the string to the default constructor.
311  void clear()
312  { *this = (const char *)NULL; }
313 
314  /// Prepend a string (or character)
315  // @{
316  void prepend(const char *prefix);
317  void prepend(char ch);
318  // @}
319 
320  /// Append a character
321  void append(char ch);
322 
323  /// Append a string or a section of a string.
324  void append(const char *str, exint len = -1);
325 
326  /// Remove the last character
327  void removeLast() { truncate(length()-1); }
328  /// Truncate the string at the Nth character
329  void truncate(exint len);
330 
331  UT_String &operator=(const UT_String &str);
332  UT_String &operator=(const char *str);
333  UT_String &operator=(const std::string &str);
334  UT_String &operator=(const UT_StringHolder &str);
335  UT_String &operator=(UT_StringHolder &&str);
336  UT_String &operator=(const UT_StringView &str);
337 private:
338  /// Not implemented - see UT_String(const UT_StringRef &).
339  UT_String &operator=(const UT_StringRef);
340 
341 public:
342  UT_String &operator+=(const char *str)
343  {
344  if (!isstring())
345  {
346  // We are an empty string, so we merely copy
347  // the incoming string rather than trying to append
348  // to it.
349  harden(str);
350  }
351  else
352  {
353  bool same = (str == myData);
354  harden();
355  if (str)
356  {
357  int mylen = (int)strlen(myData);
358  myData = (char *)realloc(myData,
359  mylen+strlen(str)+1);
360  if (!same)
361  {
362  strcpy(&myData[mylen], str);
363  }
364  else
365  {
366  memcpy(myData + mylen, myData, mylen);
367  myData[mylen * 2] = '\0';
368  }
369  }
370  }
371  return *this;
372  }
373 
375  {
376  *this += (const char *)str.myData;
377  return *this;
378  }
379  UT_String &operator+=(const UT_StringRef &str);
380 
381  // Basic equality functions and operators
382  int compare(const char *str, bool case_sensitive=true) const
383  {
384  // Unlike std::string, UT_String treats NULL and
385  // the empty string as distinct (empty has precedence).
386  if (myData==0 || str==0)
387  {
388  if (myData) return 1;
389  if(str) return -1;
390  return 0;
391  }
392  if (case_sensitive)
393  return strcmp(myData, str);
394  return strcasecmp(myData, str);
395  }
396  int compare(const UT_String &str, bool case_sensitive=true) const
397  {
398  return compare(str.myData,case_sensitive);
399  }
400  int compare(const UT_StringRef &str, bool case_sensitive=true) const;
401 
402  bool equal(const char *str, bool case_sensitive=true) const
403  {
404  return compare(str,case_sensitive)==0;
405  }
406  bool equal(const UT_String &str, bool case_sensitive=true) const
407  {
408  return compare(str.myData,case_sensitive)==0;
409  }
410  bool equal(const UT_StringRef &str, bool case_sensitive=true) const
411  {
412  return compare(str,case_sensitive)==0;
413  }
414 
415  bool operator==(const char *str) const
416  {
417  return compare(str)==0;
418  }
419  bool operator==(const UT_String &str) const
420  {
421  return compare(str.myData)==0;
422  }
423  bool operator==(const UT_StringRef &str) const
424  {
425  return compare(str)==0;
426  }
427  bool operator!=(const char *str) const
428  {
429  return compare(str)!=0;
430  }
431  bool operator!=(const UT_String &str) const
432  {
433  return compare(str.myData)!=0;
434  }
435  bool operator!=(const UT_StringRef &str) const
436  {
437  return compare(str)!=0;
438  }
439  bool operator<(const char *str) const
440  {
441  return compare(str)<0;
442  }
443  bool operator<(const UT_String &str) const
444  {
445  return compare(str.myData)<0;
446  }
447  bool operator<(const UT_StringRef &str) const
448  {
449  return compare(str)<0;
450  }
451  bool operator<=(const char *str) const
452  {
453  return compare(str)<=0;
454  }
455  bool operator<=(const UT_String &str) const
456  {
457  return compare(str.myData)<=0;
458  }
459  bool operator<=(const UT_StringRef &str) const
460  {
461  return compare(str)<=0;
462  }
463  bool operator>(const char *str) const
464  {
465  return compare(str)>0;
466  }
467  bool operator>(const UT_String &str) const
468  {
469  return compare(str.myData)>0;
470  }
471  bool operator>(const UT_StringRef &str) const
472  {
473  return compare(str)>0;
474  }
475  bool operator>=(const char *str) const
476  {
477  return compare(str)>=0;
478  }
479  bool operator>=(const UT_String &str) const
480  {
481  return compare(str.myData)>=0;
482  }
483  bool operator>=(const UT_StringRef &str) const
484  {
485  return compare(str)>=0;
486  }
487 
488  /// Test whether the string is defined or not
489  SYS_SAFE_BOOL operator bool() const { return isstring(); }
490 
491  /// Return the edit distance between two strings.
492  /// See http://en.wikipedia.org/wiki/Levenshtein_distance for details.
493  /// allow_subst controls whether a substitution of a character with
494  /// another is a single operation, rather than two operations of
495  /// insert and delete.
496  int distance(const char *str,
497  bool case_sensitive = true,
498  bool allow_subst = true) const;
499 
500  operator const char *() const
501  { return (const char *)myData; }
502  operator char *()
503  { return myData; }
504 
505  operator UT_StringView() const
506  { return UT_StringView(myData); }
507 
508  const char *c_str() const { return buffer(); }
509  const char *buffer() const { return myData; }
510  const char *data() const { return buffer(); }
511  const char *nonNullBuffer() const { return myData ? myData : ""; }
512 
513  char operator()(unsigned i) const
514  {
515  UT_ASSERT_P( isstring() );
516  UT_ASSERT_SLOW(i <= strlen(myData));
517  return myData[i];
518  }
519 
520  char &operator()(unsigned i)
521  {
522  harden();
523  return myData[i];
524  }
525 
526  // Prefer using write() since ideally the non-const operator() is removed
527  inline void write(unsigned i, char c)
528  {
529  hardenIfNeeded();
530  myData[i] = c;
531  }
532 
533  int toInt() const;
534  fpreal toFloat() const;
535 
536  /// Converts the contents of this UT_String to a std::string. Note that
537  /// std::string can't be constructed with a null pointer, so you can't
538  /// just write std::string s = ut_string.buffer();
539  std::string toStdString() const;
540 
541  //
542  // Here, we're finished with operators
543  //
544 
545  /// Return length of string
546  unsigned length() const
547  { return (myData) ? (unsigned)strlen(myData) : 0; }
548 
549  /// Return memory usage in bytes
550  int64 getMemoryUsage(bool inclusive=true) const
551  {
552  return (inclusive ? sizeof(*this) : 0)
553  + (!myIsReference ? (length() + 1)*sizeof(char) : 0);
554  }
555 
556  /// Find first occurrance of character. Returns NULL upon failure.
557  /// @{
558  char *findChar(int c)
559  { return myData ? strchr(myData, c) : nullptr; }
560  const char *findChar(int c) const
561  { return SYSconst_cast(*this).findChar(c); }
562  /// @}
563 
564  /// Find first occurrance of any character in @em str
565  /// @{
566  char *findChar(const char *str)
567  { return myData ? strpbrk(myData, str) : nullptr; }
568  const char *findChar(const char *str) const
569  { return SYSconst_cast(*this).findChar(str); }
570  /// @}
571 
572  /// Find last occurance of character
573  /// @{
574  char *lastChar(int c)
575  { return myData ? strrchr(myData, c) : nullptr; }
576  const char *lastChar(int c) const
577  { return SYSconst_cast(*this).lastChar(c); }
578  /// @}
579 
580  /// Return the number of occurrences of the specified character.
581  int countChar(int c) const;
582 
583  /// Count the occurrences of the string
584  int count(const char *str, bool case_sensitive = true) const;
585 
586  char *findNonSpace();
587  const char *findNonSpace() const;
588  const char *findWord(const char *word) const;
589  bool findString(const char *str, bool fullword,
590  bool usewildcards) const;
591  int changeWord(const char *from, const char *to, bool all = true);
592  int changeString(const char *from, const char *to, bool fullword);
593  int changeQuotedWord(const char *from, const char *to,
594  int quote = '`', bool all = true);
595 
596  int findLongestCommonSuffix( const char *with ) const;
597 
598  /// Perform deep copy of the substring starting from @em index
599  /// for @em len characters into the specified UT_String.
600  /// If @em len is too long, then a substring starting from @em index to
601  /// the end of the string is copied.
602  /// Returns the length of the copied substring.
603  int substr(UT_String &buf, int index, int len=0) const;
604 
605  /// Determine if string can be seen as a single floating point number
606  bool isFloat(bool skip_spaces = false,
607  bool loose = false,
608  bool allow_underscore = false) const;
609  /// Determine if string can be seen as a single integer number
610  bool isInteger(bool skip_spaces = false) const;
611 
612  void toUpper()
613  {
614  char *ptr;
615  harden();
616  for (ptr=myData; *ptr; ptr++)
617  *ptr = (char)toupper(*ptr);
618  }
619  void toLower()
620  {
621  char *ptr;
622  harden();
623  for (ptr=myData; *ptr; ptr++)
624  *ptr = (char)tolower(*ptr);
625  }
626 
627 
628  /// Return last component of forward slash separated path string
629  ///
630  /// If there is a slash in the string, fileName() returns the string
631  /// starting after the slash. Otherwise, it returns the contents of
632  /// this string. Note that it returns a pointer into this string.
633  const char *fileName() const
634  {
635  UT_StringView file_name = UTstringFileName(*this);
636  return file_name.begin();
637  }
638  /// Return the extension of a file path string
639  /// @{
641  {
643  if (extension.isEmpty())
644  return nullptr;
645  return myData + (extension.begin() - myData);
646  }
647  const char *fileExtension() const
648  {
649  return SYSconst_cast(*this).fileExtension();
650  }
651  /// @}
652 
653  /// Return whether the file extension matches. The extension passed in
654  /// should include the '.' separator. For example: @code
655  /// matchFileExtension(".jpg")
656  /// @endcode
657  bool matchFileExtension(const char *match_extension) const
658  {
659  return UTstringMatchFileExtension(*this, match_extension);
660  }
661  /// Return path terminated just before the extension.
662  /// If the filename starts with '.' and no path is provided,
663  /// returns NULL
664  UT_String pathUpToExtension() const;
665 
666  /// Replace the file extension and return the new string
667  UT_String replaceExtension(const UT_String &new_ext) const;
668 
669  /// Split a path into @em dir_name and @em file_name, where @em file_name
670  /// is everything after the final slash (i.e. the same as fileName()).
671  /// Either part may be empty. Note that if the string starts with / and
672  /// only contains that one slash, the @em dir_name will be / and not blank.
673  /// @em dir_name and @em file_name will either be set to hardened strings
674  /// or an empty string.
675  void splitPath(UT_String &dir_name, UT_String &file_name) const;
676 
677  /// Decompose a filename into various parts
678  ///
679  /// parseNumberedFileName will breakup a filename into its various
680  /// parts: file = prefix$Fsuffix (note: suffix is
681  /// not the same as file extension.) 0 is returned if there is
682  /// no frame number. 'negative' allows -[frame] to be interpreted as a
683  /// negative number. 'fractional' allows [frame].[number] to be interpreted
684  /// as a fractional frame.
685  int parseNumberedFilename(UT_String &prefix,
686  UT_String &frame,
687  UT_String &suff,
688  bool negative = true,
689  bool fractional = false) const;
690 
691  bool isstring() const
692  { return (myData && *myData); }
693 
694  /// trimSpace() will remove all space characters (leading and following)
695  /// from a string. If the string consists of multiple words, the words will
696  /// be collapsed. The function returns 1 if space was trimmed.
697  int trimSpace(bool leave_single_space_between_words = false);
698 
699  /// A version of trimSpace() that only removes leading and following spaces
700  /// from a string, leaving any between words intact.
701  int trimBoundingSpace();
702 
703  /// strips out all characters found in 'chars'. The string length will be
704  /// reduced by the number of characters removed. The number of characters
705  /// removed is returned.
706  int strip(const char *chars);
707 
708  /// protectString() will modify the existing string to escape double quotes
709  /// and backslashes. It will only wrap the string in double quotes if
710  /// it has spaces in it. If 'protect_empty' is true, the string will
711  /// become '""', otherwise it will stay empty.
712  void protectString(bool protect_empty=false);
713 
714  /// If the char is a quote character `"` or `'` then make sure to protect
715  /// it by adding '\' before the quote character. If the character is not
716  /// a quote character then the character is simply added to the ostream.
717  static void protectString(std::ostream& os, char c);
718 
719  /// protectPreQuotePythonStringLiteral() will modify the existing string
720  // to escape any non-printing characters, backslashes, and instances of the
721  /// specified delimiter. Unlike protectString(), it will not wrap the
722  /// string in quotes.
723  void protectPreQuotePythonStringLiteral(char delimiter='\'');
724 
725  /// returns true if the string begins and ends with a (non-escaped) quote
726  /// 'delimiter'.
727  bool isQuotedString(char delimiter='\'') const;
728 
729  /// makeQuotedString() is similar to protectString() except it returns a
730  /// new string instead of changing this string, it does wrap the string
731  /// in quotes, and it lets you use either ' or " as the delimiter.
732  /// The quoted string can also be optionally be made to escape non-printing
733  /// characters. The string that's returned is UT_String::ALWAYS_DEEP.
734  UT_String makeQuotedString(char delimiter='\'',
735  bool escape_nonprinting=false) const;
736 
737  /// makeSmartQuotedString() will use either ' or " as the delimiter to
738  /// avoid escaped quotes, using the default delimiter if it doesn't
739  /// matter. The quoted string can also be optionally be made to escape
740  /// non-printing characters. The string that's returned is
741  /// UT_String::ALWAYS_DEEP.
742  UT_String makeSmartQuotedString(char default_delimiter='\'',
743  bool escape_nonprinting=false) const;
744 
745  /// Expands standard control sequences ('\\n', '\\r', '\\t', '\\0') to their
746  /// corresponding ASCII values (10, 13, 9, 0, respectively).
747  /// If the expand_extended flag is enabled, an extended expansion is enabled
748  /// which adds hexadecimal, decimal and Unicode control sequence expansion.
749  /// Any values resulting from that expansion, which are outside the standard
750  /// ASCII range, will be encoded as UTF8-encoded control points.
751  void expandControlSequences(bool expand_extended = false);
752 
753  bool hasWhiteSpace() const;
754 
755  void removeTrailingSpace();
756  void removeTrailingChars(char chr);
757 
758  void removeTrailingDigits();
759 
760  // cshParse() does not need to harden the string. It does very robust
761  // parsing in the style of csh. It actually does better parsing than
762  // csh. Variable expansion & backquote expansion are done in the
763  // correct order for the correct arguments. One caveat is that the
764  // string cannot have \0377 (0xff) as a character in it.
765  //
766  // If there is an error in parsing, the error flag (if passed in) will be
767  // set to:
768  // 0 = no error
769  // 1 = line too long
770  int cshParse(char *argv[], int max_args,
771  void (*vlookup)(const char *, UT_String&)=UTvarLookup,
772  void (*elookup)(const char *, UT_String&)=UTexprLookup,
773  int *error = 0,
774  UT_StringCshIO *io=0);
775 
776  int cshParse(UT_WorkArgs &argv,
777  void (*vlookup)(const char *, UT_String&)=UTvarLookup,
778  void (*elookup)(const char *, UT_String&)=UTexprLookup,
779  int *error = 0,
780  UT_StringCshIO *io=0);
781 
782  // dosParse() uses the semi-braindead approach of ms-dos to argument
783  // parsing. That is, arguments are separated by a double quote or space
784  // (being a space or a tab). If 'preserve_backslashes' is set to
785  // false (the default), back-slashes are passed through verbatim, unless
786  // the following character is a double quote. Likewise, any pairs of
787  // back-slashes preceding a double quote are turned into single
788  // back-slashes.
789  int dosParse(UT_WorkArgs &argv, bool preserve_backslashes=false);
790  int dosParse(char *argv[], int max_args,
791  bool preserve_backslashes=false);
792 
793  /// Perform dos parsing modifying the buffer passed in. The args will be
794  /// stored as raw pointers into the given buffer
795  static int dosParse(char *buffer, UT_WorkArgs &args,
796  bool preserve_backslashes);
797 
798  // parse will insert nulls into the string.
799  // NB: The argv array is null terminated, thus the effective
800  // maximum number of arguments is one less than maxArgs.
801  // NB: The maxArgs variants are all deprecated, use UT_WorkArgs
802  // instead.
803  int parse(char *argv[], int max_args,
804  const char *quotes = "\"'", bool keep_quotes = false)
805  {
806  harden();
807  return parseInPlace(argv, max_args, quotes, keep_quotes);
808  }
809  int parse(UT_WorkArgs &argv, int start_arg = 0,
810  const char *quotes = "\"'", bool keep_quotes = false)
811  {
812  harden();
813  return parseInPlace(argv, start_arg, quotes, keep_quotes);
814  }
815  int parse(UT_StringArray &argv, int start_arg = 0,
816  const char *quotes = "\"'", bool keep_quotes = false)
817  {
818  harden();
819  return parseInPlace(argv, start_arg, quotes, keep_quotes);
820  }
821  // Warning: the following methods insert nulls into the string without
822  // hardening.
823  int parseInPlace(char *argv[], int max_args,
824  const char *quotes = "\"'", bool keep_quotes = false);
825  int parseInPlace(UT_WorkArgs &argv, int start_arg = 0,
826  const char *quotes = "\"'", bool keep_quotes = false);
827  int parseInPlace(UT_StringArray &argv, int start_arg = 0,
828  const char *quotes = "\"'", bool keep_quotes = false);
829 
830  // Splits the string at specific separator characters. Unlike the parse
831  // methods, the tokenize methods ignore quoting completely.
832  int tokenize(char *argv[], int max_args, char separator)
833  {
834  harden();
835  return tokenizeInPlace(argv, max_args, separator);
836  }
837  int tokenizeInPlace(char *argv[], int max_args, char separator);
838  int tokenize(UT_WorkArgs &argv, char separator)
839  {
840  harden();
841  return tokenizeInPlace(argv, separator);
842  }
843  int tokenizeInPlace(UT_WorkArgs &argv, char separator);
844  int tokenize(char *argv[], int max_args,
845  const char *separators = " \t\n")
846  {
847  harden();
848  return tokenizeInPlace(argv, max_args, separators);
849  }
850  int tokenizeInPlace(char *argv[], int max_args,
851  const char *separators = " \t\n");
852  int tokenize(UT_WorkArgs &argv, const char *separators = " \t\n")
853  {
854  harden();
855  return tokenizeInPlace(argv, separators);
856  }
857  int tokenizeInPlace(UT_WorkArgs &argv,
858  const char *separators = " \t\n");
859 
860  template<typename T>
861  int tokenize(T &list, const char *separators = " \t\n")
862  {
863  harden();
864  return tokenizeInPlace(list, separators);
865  }
866 
867  template<typename T>
868  int tokenizeInPlace(T &list,
869  const char *separators = " \t\n")
870  {
871  char *token;
872  char *context;
873 
874  if (!isstring())
875  return 0;
876  if (!(token = SYSstrtok(myData, separators, &context)))
877  return 0;
878 
879  list.append(token);
880 
881  while ((token = SYSstrtok(0, separators, &context)) != NULL)
882  list.append(token);
883 
884  return list.entries();
885  }
886 
887 
888  // Replaces the contents with variables expanded.
889  void expandVariables();
890 
891  // Functions to hash a string
893  {
894  return hash(myData);
895  }
896 
897  // The code can be used for rudimentary hash chaining, but it is NOT
898  // the case that hash("def", hash("abc")) == hash("abcdef"), so there
899  // is little reason to use this rather than normal hash combiners.
900  static SYS_FORCE_INLINE uint32 hash(const char *str, uint32 code = 0)
901  {
902  return SYSstring_hashseed(
903  str, SYS_EXINT_MAX, code, /*allow_nulls*/ false);
904  }
905 
906  // This does pattern matching on a string. The pattern may include
907  // the following syntax:
908  // ? = match a single character
909  // * = match any number of characters
910  // [char_set] = matches any character in the set
911  bool match(const char *pattern, bool case_sensitive = true) const;
912 
913  // Similar to match() except it assumes that we're dealing with file paths
914  // so that it determines whether to do a case-sensitive match depending on
915  // the platform.
916  bool matchFile(const char *pattern) const;
917 
918  // Similar to match() but uses rsync style matching:
919  // * = match any number of characters up to a slash
920  // ** = match any number of characters, including a slash
921  bool matchPath(const char *pattern, bool case_sensitive = true,
922  bool *excludes_branch = nullptr) const;
923 
924  // multiMatch will actually check multiple patterns all separated
925  // by the separator character: i.e. geo1,geo2,foot*
926  //
927  // NOTE: No pattern or may contain the separator
928  bool multiMatch(const char *pattern,
929  bool case_sensitive, char separator) const;
930  bool multiMatch(const char *pattern, bool case_sensitive = true,
931  const char *separators = ", ",
932  bool *explicitly_excluded = 0,
933  int *match_index = 0,
934  ut_PatternRecord *pattern_record=NULL) const;
935  bool multiMatch(const UT_StringMMPattern &pattern,
936  bool *explicitly_excluded = 0,
937  int *match_index = 0,
938  ut_PatternRecord *pattern_record=NULL) const;
939 
940  // this method matches a pattern while recording any wildcard
941  // patterns used.
942  bool multiMatchRecord(const char *pattern, int maxpatterns,
943  char *singles, int &nsingles,
944  char **words, int &nwords,
945  bool case_sensitive = true,
946  const char *separators = ", ") const;
947  bool multiMatchRecord(const UT_StringMMPattern &pattern,
948  int maxpatterns,
949  char *singles, int &nsingles,
950  char **words, int &nwords) const;
951  bool multiMatchRecord(const char *pattern,
952  UT_StringHolder &singles,
953  UT_StringArray &words,
954  bool case_sensitive = true,
955  const char *separators = ", ") const;
956 
957  /// matchPattern(UT_WorkArgs &) assumes that the arguments contain the
958  /// components of a pattern to be matched against. The method returns
959  /// true if the pattern matches, false if it doesn't. This matching
960  /// process handles ^ expansion properly (and efficiently).
961  /// If the string doesn't match any components of the pattern, then the
962  /// assumed value is returned.
963  bool matchPattern(const UT_WorkArgs &pattern_args,
964  bool assume_match=false) const;
965 
966  static bool multiMatchCheck(const char *pattern);
967  static bool wildcardMatchCheck(const char *pattern);
968 
969  // Same as match but equivalent to "*pattern*"
970  bool contains(const char *pattern, bool case_sensitive=true) const;
971 
972  // Returns true if our string starts with the specified prefix.
973  bool startsWith(const UT_StringView &prefix,
974  bool case_sensitive = true) const;
975 
976  // Returns true if our string ends with the specified suffix.
977  bool endsWith(const UT_StringView &suffix,
978  bool case_sensitive = true) const;
979 
980  /// Pluralize an English noun ending (i.e. box->boxes or tube->tubes). The
981  /// ending must be lower case to be processed properly.
982  void pluralize();
983 
984  // Will parse strings like 1-10:2,3 and call func for every element
985  // implied. It will stop when the func returns 0 or the parsing
986  // is complete, in which case it returns 1.
987  // Parsing also allows secondary elements to be specified eg 3.4 0.12
988  // The secfunc is used to find the maximum index of secondary elements
989  // for each compound num. The elements are assumed to be
990  // non-negative integers.
991  int traversePattern(int max, void *data,
992  int (*func)(int num, int sec, void *data),
993  unsigned int (*secfunc)(int num,void *data)=0,
994  int offset=0) const;
995 
996  // Fast containment, assumes no special characters
997  const char *fcontain(const char *pattern, bool case_sensitive=true) const
998  {
999  if (!myData) return NULL;
1000  return case_sensitive ? strstr(myData, pattern)
1001  : SYSstrcasestr(myData, pattern);
1002  }
1003 
1004  // Given the match pattern which fits our contents, any assigned wildcards
1005  // are subsitituted. The wildcards may also be indexed.
1006  // Returns true if rename was successful.
1007  //
1008  // @note This code was adapted from CHOP_Rename::subPatterns() and
1009  // works the same way.
1010  //
1011  // eg. this = apple, match = a*le, replace = b* ---> bpp
1012  // this = a_to_b, match = *_to_*, replace = *(1)_to_*(0) ---> b_to_a
1013  bool patternRename(const char *match_pattern, const char *replace);
1014 
1015  // Given the name rule according to which a name consists of a base name
1016  // (char sequence ending in a non-digit) and a numerical suffix, the
1017  // following two methods return the base and the suffix respectively.
1018  // base() needs a string buffer and will return a const char* pointing to it.
1019  // base() always returns a non-zero pointer,
1020  // while suffix() returns 0 if no suffix is found.
1021  const char *base(UT_String &buf) const;
1022  const char *suffix() const;
1023 
1024  // incrementNumberedName will increment a name. If it has a numerical
1025  // suffix, that suffix is incremented. If not, "2" is appended to the
1026  // name. The preserve_padding parameter can be set to true so that zero
1027  // padding is preserved. Incrementing foo0009 will produce foo10 with
1028  // this parameter set to false, or foo0010 if it is set to true.
1029  void incrementNumberedName(bool preserve_padding = false);
1030 
1031  // setFormat is used to set how an outstream formats its ascii output.
1032  // So you can use printf style formatting. eg:
1033  // UT_String::setFormat(cout, "%08d") << 100;
1034  //
1035  // Note: Don't do:
1036  // cout << UT_String::setFormat(cout, "%08d") << 100;
1037  // ^^^^
1038  // Also: The formating changes (except for field width) are permanent,
1039  // so you'll have to reset them manually.
1040  //
1041  // TODO: A resetFormat, and a push/pop format pair.
1042  static std::ostream &setFormat(std::ostream &os, const char *fmt);
1043  std::ostream &setFormat(std::ostream &os);
1044 
1045  int replacePrefix(const char *oldpref,
1046  const char *newpref);
1047  int replaceSuffix(const char *oldsuffix,
1048  const char *newsuffix);
1049 
1050  // expandArrays will expand a series of tokens of the
1051  // form prefix[pattern]suffix into the names array
1052  //
1053  // Note: Each names[i] must be free'd after use
1054  // and label is used on the non-const parse method
1055  // NB: The max variants are all deprecated, use UT_WorkArgs
1056  // instead.
1057  int expandArrays(char *names[], int max);
1058 
1059  // This routine will ensure no line is over the specified
1060  // number of columns. Offending lines will be wrapped at
1061  // the first spaceChar or cut at exactly cols if spaceChar
1062  // is not found.
1063  // It returns one if any changes were done.
1064  // It currently treats tabs as single characters which should be
1065  // changed.
1066  // It will break words at hyphens if possible.
1067  int format(int cols);
1068 
1069  /// Replaces up to 'count' occurrences of 'find' with 'replacement',
1070  /// and returns the number of substitutions that occurred.
1071  /// If 'count' <= 0, all occurrences will be replaced.
1072  int substitute( const char *find, const char *replacement,
1073  int count = -1);
1074  /// Convenience version of substitute() for all or single occurrence.
1075  SYS_DEPRECATED_REPLACE(19.5, "Use 'count' variant")
1076  int substitute( const char *find, const char *replacement,
1077  bool all )
1078  { return substitute(find, replacement, !all ? 1 : -1); }
1079 
1080  // This function replaces the character found with another character.
1081  int substitute( char find, char replacement, bool all = true );
1082 
1083  // this function removes the substring at pos and len, and inserts str
1084  // at pos. it returns the difference (new_length - old_length)
1085  int replace( int pos, int len, const char *str );
1086 
1087  // remove the first len characters of this string
1088  int eraseHead(int len)
1089  { return replace(0, len, ""); }
1090 
1091  // remove the last len characters of this string
1092  int eraseTail(int len)
1093  { return replace(length() - len, len, ""); }
1094 
1095  // remove the substring start at pos for len characters
1096  int erase(int pos = 0, int len = -1)
1097  {
1098  if (len < 0)
1099  len = length() - pos;
1100  return replace(pos, len, "");
1101  }
1102 
1103  // insert the given string at pos into this string
1104  int insert(int pos, const char *str)
1105  { return replace(pos, 0, str); }
1106 
1107  // Does a "smart" string compare which will sort based on numbered names.
1108  // That is "text20" is bigger than "text3". In a strictly alphanumeric
1109  // comparison, this would not be the case. Zero is only returned if both
1110  // strings are identical.
1111  static int compareNumberedString(const char *s1,
1112  const char *s2,
1113  bool case_sensitive=true,
1114  bool allow_negatives=false);
1115  static int qsortCmpNumberedString(const char *const*v1,
1116  const char *const*v2);
1117 
1118  // Like compare numbered strings, but it sorts better when there are
1119  // .ext extensions (i.e. it handles '.' as a special case)
1120  static int compareNumberedFilename(const char *s1,
1121  const char *s2,
1122  bool case_sensitive=false);
1123  static int qsortCmpNumberedFilename(const char *const*v1,
1124  const char *const*v2);
1125 
1126  // Like compare numbered strings, but allows special ordering of certain
1127  // characters that should always come first or last.
1128  static int compareNumberedStringWithExceptions(const char *s1,
1129  const char *s2,
1130  bool case_sensitive=false,
1131  bool allow_negatives=false,
1132  const char *sorted_first=nullptr,
1133  const char *sorted_last=nullptr);
1134 
1135  /// Compare two version strings which have numbered components separated by
1136  /// dots. eg. "X.Y.Z". Assumes the components go from most to least
1137  /// significant in left to right order.
1138  static int compareVersionString(const char *s1, const char *s2);
1139 
1140  /// Given a path, set the value of the string to the program name. For
1141  /// example: @code
1142  /// str.extractProgramName(argv[0]);
1143  /// str.extractProgramName("c:/Path/program.exe");
1144  /// str.extractProgramName("/usr/bin/program");
1145  /// @endcode
1146  /// This will extract the last path component. Program names may also have
1147  /// their extensions stripped. For example ".exe" on Windows and "-bin" to
1148  /// strip the Houdini wrappers on other platforms.
1149  ///
1150  /// @note The path should be normalized to have forward slashes as the path
1151  /// separator.
1152  void extractProgramName(const char *path,
1153  bool strip_extension=true,
1154  bool normalize_path=true);
1155 
1156  /// Given a path, check to see whether the program name matches the
1157  /// expected. For example: @code
1158  /// if (UT_String::matchProgramname(argv[0], "houdini"))
1159  /// if (UT_String::matchProgramname("c:/Path/houdini.exe", "houdini"))
1160  /// if (UT_String::matchProgramname("/usr/bin/houdini", "houdini"))
1161  /// @endcode
1162  /// The matching is always case-insensitive.
1163  ///
1164  /// @note The path should be normalized to have forward slashes as the path
1165  /// separator.
1166  static bool matchProgramName(const char *path, const char *expected,
1167  bool normalize_path=false);
1168 
1169  /// Convert a path to a "normalized" path. That is, all back-slashes will
1170  /// be converted to forward slashes. On some operating systems, this will
1171  /// leave the string unchanged.
1172  void normalizePath();
1173 
1174  // A very fast integer to string converter. This is faster (at least on
1175  // SGI) than using sprintf("%d"). About two to three times as fast. Both
1176  // of these methods return the length of the string generated.
1177  static int itoa(char *str, int64 i);
1178  static int utoa(char *str, uint64 i);
1179 
1180  // Versions of the above functions which set into this string object
1181  void itoa(int64 i);
1182  void utoa(uint64 i);
1183 
1184  // A reader-friendly version of itoa. This places commas appropriately
1185  // to ensure the person can pick out the kilo points easily.
1186  // This can handle numbers up to 999,999,999,999,999,999.
1187  void itoaPretty(int64 val);
1188 
1189  /// Convert the given time delta (in milliseconds)
1190  /// to a reader-friendly string in days, hours, minutes, and seconds.
1191  void timeDeltaToPrettyString(double time_ms);
1192 
1193  /// Convert the given time delta (in milliseconds)
1194  /// to a reader-friendly string in milliseconds.
1195  void timeDeltaToPrettyStringMS(double time_ms);
1196 
1197  // Do an sprintf into this string. This method will allocate exactly the
1198  // number of bytes required for the final string. If the format string is
1199  // bad, isstring() will return false afterwards.
1200  int sprintf(const char *fmt, ...) SYS_PRINTF_CHECK_ATTRIBUTE(2, 3);
1201 
1202  // This will change the string into a valid C style variable name.
1203  // All non-alpha numerics will be converted to _.
1204  // If the first letter is a digit, it is prefixed with an _.
1205  // This returns 0 if no changes occurred, 1 if something had to
1206  // be adjusted.
1207  // Note that this does NOT force the name to be non-zero in length.
1208  // The safechars parameter is a string containing extra characters
1209  // that should be considered safe. These characters are not
1210  // converted to underscores.
1211  int forceValidVariableName(const char *safechars = NULL);
1212  // Returns true if the string matches a C-style varaible name.
1213  // The safechars are not allowed to be the start.
1214  // Matching forceValid, empty strings are considered valid!
1215  bool isValidVariableName(const char *safechars = NULL) const;
1216 
1217  // This will force all non-alphanumeric characters to be underscores.
1218  // Returns true if any changes were required.
1219  bool forceAlphaNumeric();
1220 
1221  // This function will calculate the relative path to get from src to dest.
1222  // If file_path is false, this method assume it is dealing with node paths.
1223  // If file_path is true, it will also deal with Windows drive letters and
1224  // UNC paths.
1225  void getRelativePath(const char *src_fullpath,
1226  const char *dest_fullpath,
1227  bool file_path = false);
1228 
1229  // This function takes two absolute paths and returns the length of the
1230  // longest common path prefix, up to and including the last '/'. This
1231  // means, for instance, that if fullpath1[len1-1] == '/' then all of
1232  // fullpath1 is eligible as a common prefix.
1233  // NB: This function DOES NOT handle NT style drive names! It is currently
1234  // only used for op paths. If you want to add support for this, you
1235  // should add another default parameter to do this.
1236  static int findLongestCommonPathPrefix(const char *fullpath1, int len1,
1237  const char *fullpath2, int len2);
1238 
1239  // This function tests whether we are an absolute path, and returns true or
1240  // false depending on whether we are.
1241  bool isAbsolutePath(bool file_path=false) const;
1242 
1243  // This function assumes that we are an absolute path and will remove all
1244  // un-necessary components from it as long as we remain an absolute path.
1245  // We return false if an error was encountered, in which case the results
1246  // are unpredictable.
1247  bool collapseAbsolutePath(bool file_path=false);
1248 
1249  // This function will make sure that the string is at most max_length
1250  // characters long. If the string is longer than that, it will
1251  // replace the middle of the string by "...". Returns true if the string
1252  // has changed and false otherwise. max_length must be greater than 3.
1253  bool truncateMiddle(int max_length);
1254 
1255  // This function is an abomination when you can just write:
1256  // UT_String foo("");
1257  // ...
1258  // if (foo.isstring())
1259  // ...
1260  // Avoid using it and do not write functions that return "const UT_String&"
1261  static const UT_String &getEmptyString();
1262 
1263  /// Count the number of valid characters in the : modifier for variable
1264  /// expansion. For example, the string ":r" will return 2, the string
1265  /// ":r:t" will return 4, the string ":z" will return 0. These use the csh
1266  /// expansion modifiers.
1267  ///
1268  /// If the string doesn't start with a ':', the method will return 0.
1269  static int countCshModifiers(const char *src);
1270 
1271  /// Applies a "csh" style modifier string to this string. For example, a
1272  /// modifier string of ":e" would replace the string with the file
1273  /// extension of the string.
1274  ///
1275  /// Returns true if any modifications were performed
1276  bool applyCshModifiers(const char *modifiers);
1277 
1278 
1279  /// This will remove the range from a string of the form foo$Fbar.ext (#-#)
1280  /// and return the first number from the range. If there is only 1 range
1281  /// number, it will be returned. If there is no range, 0 is returned.
1282  /// The returned string is hardened.
1283  UT_String removeRange ();
1284 
1285  /// This will format a value to represent a given size in bytes, kilobytes,
1286  /// megabytes, etc.
1287  void formatByteSize(exint size, int digits=2);
1288 
1289  // UTF-8 helpers
1290 
1291  /// Returns the number of Unicode codepoints in the string, assuming it's
1292  /// encoded as UTF-8.
1293  int getCodePointCount() const;
1294 
1295  /// Returns a list of Unicode code points from this string.
1296  void getAsCodePoints(UT_Int32Array &cp_list) const;
1297 
1298  /// Friend specialization of std::swap() to use UT_String::swap()
1299  /// @internal This is needed because standard std::swap() implementations
1300  /// will try to copy the UT_String objects, causing hardened strings to
1301  /// become weak.
1302  friend void swap(UT_String& a, UT_String& b) { a.swap(b); }
1303 
1304  /// expandArrays will expand a series of tokens of the
1305  /// form prefix[pattern]suffix into the names UT_StringArray
1306  /// @param tokens is will store the parsed tokens without expansion
1307  /// @param names is will store the parsed tokens with expansion
1308  /// This doesn't need a max argument like:
1309  /// int expandArrays(char *names[], int max)
1310  int expandArrays(UT_StringArray &tokens, UT_StringArray &names);
1311 
1312 private:
1313  template <typename OSTREAM>
1314  void saveInternal(OSTREAM &os, bool binary) const;
1315 
1316  void freeData();
1317 
1318  /// implements a few csh-style modifiers.
1319  /// @param mod pointer to a string starting with the modifier to apply.
1320  /// so, to apply a global substitute modifier :gs/l/r/
1321  /// mod should be: s/l/r
1322  /// @param all True if all possible modifications should be
1323  /// (recursively) performed.
1324  /// Otherwise, at most one modification is applied.
1325  /// @return whether any modification was performed
1326  bool applyNextModifier(const char *mod, bool all);
1327 
1328 
1329  /// Sets myIsReference to false and copies the other_string into myData,
1330  /// but attempts to avoid unnecessary memory reallocations. Frees up
1331  /// any previous data, if necessary. If other_string is NULL, the call
1332  /// is equivalent to freeData().
1333  void doSmartCopyFrom(const char* other_string);
1334 
1335  static int compareNumberedStringInternal(const char *s1, const char *s2,
1336  bool case_sensitive,
1337  bool allow_negatives,
1338  const char *sorted_first,
1339  const char *sorted_last);
1340 
1341  static SYS_FORCE_INLINE void utStrFree(char *str)
1342  {
1343 #if defined(UT_DEBUG) && !defined(_WIN32)
1344  if (str)
1345  ::memset((void *)str, 0xDD, ::strlen(str) + 1);
1346 #endif
1347  ::free((void *)str);
1348  }
1349 
1350  char *myData;
1351  bool myIsReference:1,
1352  myIsAlwaysDeep:1;
1353 
1354  /// This operator saves the string to the stream via the string's
1355  /// saveAscii() method, protecting any whitespace (by adding quotes),
1356  /// backslashes or quotes in the string.
1357  friend UT_API std::ostream &operator<<(std::ostream &os, const UT_String &d);
1358  friend UT_API UT_OStream &operator<<(UT_OStream &os, const UT_String &d);
1359 
1360  friend class UT_API UT_StringRef;
1361 };
1362 
1363 /// Creates a shallow wrapper around a string for calling UT_String's many
1364 /// const algorithms.
1366 {
1367 public:
1368  // We only have a single constructor which is always shallow.
1370  UT_StringWrap(const char *str)
1371  : UT_String(str)
1372  {}
1373  // It seems necessary on MSVC to forceinline the empty constructor in order
1374  // to have it inlined.
1377  {}
1378 
1379  UT_StringWrap(const UT_StringWrap &) = delete;
1380  UT_StringWrap &operator=(const UT_StringWrap &) = delete;
1381 
1382  // Manually wrap methods that have non-const overloads or return non-const
1383  // pointers.
1384  char operator()(unsigned i) const { return UT_String::operator()(i); }
1385  const char *findChar(int c) const { return UT_String::findChar(c); }
1386  const char *findChar(const char *str) const { return UT_String::findChar(str); }
1387  const char *findNonSpace() const { return UT_String::findNonSpace(); }
1388  const char *lastChar(int c) const { return UT_String::lastChar(c); }
1389 
1390  using UT_String::operator==;
1391  using UT_String::operator!=;
1392  using UT_String::c_str;
1393  using UT_String::length;
1394 
1395  using UT_String::base;
1396  using UT_String::compare;
1397  using UT_String::contains;
1398  using UT_String::count;
1399  using UT_String::countChar;
1400  using UT_String::distance;
1401  using UT_String::endsWith;
1402  using UT_String::equal;
1403  using UT_String::fcontain;
1405  using UT_String::fileName;
1406  using UT_String::findWord;
1407  using UT_String::findString;
1410  using UT_String::isFloat;
1411  using UT_String::isInteger;
1413  using UT_String::isstring;
1414  using UT_String::match;
1415  using UT_String::matchFile;
1417  using UT_String::matchPath;
1419  using UT_String::multiMatch;
1423  using UT_String::save;
1424  using UT_String::saveAscii;
1425  using UT_String::saveBinary;
1426  using UT_String::splitPath;
1427  using UT_String::startsWith;
1428  using UT_String::substr;
1429  using UT_String::suffix;
1430  using UT_String::toFloat;
1431  using UT_String::toInt;
1432 };
1433 
1434 inline
1436  : myIsReference(false)
1437  , myIsAlwaysDeep(true)
1438  , myData(nullptr)
1439 {
1440  *this = str;
1441 }
1442 
1443 inline
1445  : myIsReference(false)
1446  , myIsAlwaysDeep(true)
1447  , myData(nullptr)
1448 {
1449  *this = std::move(str);
1450 }
1451 
1452 inline UT_String &
1454 {
1455  adopt(str);
1456  myIsAlwaysDeep = true; // matches copy constructor behaviour
1457  return *this;
1458 }
1459 
1462 {
1463  if (!myIsReference && myData)
1464  utStrFree(myData);
1465 }
1466 
1468 void
1469 UT_String::freeData()
1470 {
1471  if (myData)
1472  {
1473  if (!myIsReference)
1474  utStrFree(myData);
1475  myData = 0;
1476  }
1477 }
1478 
1479 inline void
1481 {
1482  // We can't use UTswap because it doesn't work with bit fields.
1483  bool temp = myIsReference;
1484  myIsReference = other.myIsReference;
1485  other.myIsReference = temp;
1486 
1487  char *tmp_data = myData;
1488  myData = other.myData;
1489  other.myData = tmp_data;
1490 
1491  if (myIsAlwaysDeep)
1492  harden();
1493 
1494  if (other.myIsAlwaysDeep)
1495  other.harden();
1496 }
1497 
1499 public:
1500  UT_String myOut; // Points to argument following '>'
1501  UT_String myErr; // Points to argument following '>&'
1502  UT_String myIn; // Points to argument following '<'
1503  short myDoubleOut; // If the argument is '>>' or '>>&'
1504  short myDoubleIn; // If the argument is '<<'
1505 };
1506 
1507 UT_API std::ostream & do_setformat(std::ostream &os, const char fmt[]);
1508 
1509 /// Does a "smart" string compare which will sort based on numbered names.
1510 /// That is "text20" is bigger than "text3". In a strictly alphanumeric
1511 /// comparison, this would not be the case.
1513 {
1514  bool operator()(const char *s1, const char *s2) const
1515  {
1516  return UT_String::compareNumberedString(s1, s2) < 0;
1517  }
1518 
1519  bool operator()(const std::string &s1, const std::string &s2) const
1520  {
1521  return operator()(s1.c_str(), s2.c_str());
1522  }
1523 };
1524 
1525 #endif
bool match(const char *pattern, bool case_sensitive=true) const
int tokenize(char *argv[], int max_args, const char *separators=" \t\n")
Definition: UT_String.h:844
UT_String & operator+=(const char *str)
Definition: UT_String.h:342
static SYS_FORCE_INLINE uint32 hash(const char *str, uint32 code=0)
Definition: UT_String.h:900
int distance(const char *str, bool case_sensitive=true, bool allow_subst=true) const
char * lastChar(int c)
Definition: UT_String.h:574
typedef int(APIENTRYP RE_PFNGLXSWAPINTERVALSGIPROC)(int)
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition: glcorearb.h:2540
bool operator!=(const char *str) const
Definition: UT_String.h:427
UT_String & operator+=(const UT_String &str)
Definition: UT_String.h:374
UT_API void normalizePath(UT_String &file_path, bool want_marker=false, bool always_want_expanded_path=false)
bool operator>=(const UT_StringRef &str) const
Definition: UT_String.h:483
int count(const char *str, bool case_sensitive=true) const
Count the occurrences of the string.
bool matchFileExtension(const char *match_extension) const
Definition: UT_String.h:657
void swap(UT_String &other)
Definition: UT_String.h:1480
void saveAscii(UT_OStream &os) const
Definition: UT_String.h:301
bool operator()(const char *s1, const char *s2) const
Definition: UT_String.h:1514
T negative(const T &val)
Return the unary negation of the given value.
Definition: Math.h:128
const char * lastChar(int c) const
Definition: UT_String.h:1388
bool isInteger(bool skip_spaces=false) const
Determine if string can be seen as a single integer number.
bool operator<=(const char *str) const
Definition: UT_String.h:451
UT_String myIn
Definition: UT_String.h:1502
GLsizei const GLchar *const * string
Definition: glcorearb.h:814
fpreal toFloat() const
bool operator==(const char *str) const
Definition: UT_String.h:415
bool operator<=(const UT_String &str) const
Definition: UT_String.h:455
int toInt() const
char * fileExtension()
Definition: UT_String.h:640
const GLuint GLenum const void * binary
Definition: glcorearb.h:1924
bool isHard() const
Returns whether this string is hardened already.
Definition: UT_String.h:244
GLsizei const GLchar *const * path
Definition: glcorearb.h:3341
SYS_FORCE_INLINE T * SYSconst_cast(const T *foo)
Definition: SYS_Types.h:136
UT_String makeQuotedString(char delimiter='\'', bool escape_nonprinting=false) const
const char * findChar(const char *str) const
Definition: UT_String.h:568
int64 exint
Definition: SYS_Types.h:125
GLboolean GLboolean GLboolean GLboolean a
Definition: glcorearb.h:1222
GLdouble s
Definition: glad.h:3009
void swap(T &lhs, T &rhs)
Definition: pugixml.cpp:7172
void write(unsigned i, char c)
Definition: UT_String.h:527
bool operator==(const UT_String &str) const
Definition: UT_String.h:419
GLuint GLsizei GLsizei * length
Definition: glcorearb.h:795
#define UT_API
Definition: UT_API.h:14
const char * fileExtension() const
Definition: UT_String.h:647
const char * data() const
Definition: UT_String.h:510
bool isAbsolutePath(bool file_path=false) const
bool findString(const char *str, bool fullword, bool usewildcards) const
**But if you need a result
Definition: thread.h:613
char * findChar(int c)
Definition: UT_String.h:558
#define SYS_EXINT_MAX
Definition: SYS_Types.h:181
char & operator()(unsigned i)
Definition: UT_String.h:520
bool equal(const char *str, bool case_sensitive=true) const
Definition: UT_String.h:402
GLfloat GLfloat GLfloat v2
Definition: glcorearb.h:818
const char * findNonSpace() const
Definition: UT_String.h:1387
unsigned long long uint64
Definition: SYS_Types.h:117
int compare(const char *str, bool case_sensitive=true) const
Definition: UT_String.h:382
GLuint buffer
Definition: glcorearb.h:660
void clear()
Reset the string to the default constructor.
Definition: UT_String.h:311
bool isAlwaysDeep() const
Definition: UT_String.h:206
const char * c_str() const
Definition: UT_String.h:508
bool matchPath(const char *pattern, bool case_sensitive=true, bool *excludes_branch=nullptr) const
SIM_API const UT_StringHolder all
unsigned length() const
Return length of string.
Definition: UT_String.h:546
int compare(const UT_String &str, bool case_sensitive=true) const
Definition: UT_String.h:396
< returns > If no error
Definition: snippets.dox:2
const char * suffix() const
bool operator<(const char *str) const
Definition: UT_String.h:439
bool operator<(const UT_StringRef &str) const
Definition: UT_String.h:447
UT_API void UTexprLookup(const char *name, UT_String &result)
bool contains(const char *pattern, bool case_sensitive=true) const
int tokenize(UT_WorkArgs &argv, const char *separators=" \t\n")
Definition: UT_String.h:852
#define SYS_DEPRECATED_REPLACE(__V__, __R__)
std::ostream & operator<<(std::ostream &ostr, const DataType &a)
Definition: DataType.h:133
UT_String(UT_AlwaysDeepType, const std::string &str)
Construct UT_String from a std::string, using ALWAYS_DEEP semantics.
Definition: UT_String.h:153
void hardenIfNeeded(const char *s)
Take shallow copy and make it deep.
Definition: UT_String.h:234
const char * buffer() const
Definition: UT_String.h:509
CompareResults OIIO_API compare(const ImageBuf &A, const ImageBuf &B, float failthresh, float warnthresh, ROI roi={}, int nthreads=0)
A utility class to do read-only operations on a subset of an existing string.
Definition: UT_StringView.h:39
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE bool isEmpty() const
Returns true if the string is empty.
SYS_FORCE_INLINE uint32 hash() const
Definition: UT_String.h:892
bool operator==(const UT_StringRef &str) const
Definition: UT_String.h:423
GLintptr offset
Definition: glcorearb.h:665
Definition: core.h:760
char operator()(unsigned i) const
Definition: UT_String.h:1384
int tokenize(char *argv[], int max_args, char separator)
Definition: UT_String.h:832
bool operator>=(const char *str) const
Definition: UT_String.h:475
UT_String & operator=(UT_String &&str)
Definition: UT_String.h:177
SYS_NO_DISCARD_RESULT UT_StringView UTstringFileName(const StringT &str)
int tokenizeInPlace(T &list, const char *separators=" \t\n")
Definition: UT_String.h:868
OIIO_FORCEINLINE const vint4 & operator+=(vint4 &a, const vint4 &b)
Definition: simd.h:4369
#define SYS_SAFE_BOOL
Definition: SYS_Compiler.h:55
bool operator!=(const UT_String &str) const
Definition: UT_String.h:431
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:155
bool operator>=(const UT_String &str) const
Definition: UT_String.h:479
#define SYS_PRINTF_CHECK_ATTRIBUTE(string_index, first_to_check)
Definition: SYS_Types.h:447
char * findNonSpace()
std::string OIIO_UTIL_API replace(string_view str, string_view pattern, string_view replacement, bool global=false)
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
GLint GLint GLsizei GLint GLenum format
Definition: glcorearb.h:108
bool operator>(const UT_String &str) const
Definition: UT_String.h:467
char * SYSstrtok(char *string, const char *delimit, char **context)
Definition: SYS_String.h:106
char * findChar(const char *str)
Definition: UT_String.h:566
#define UT_ASSERT_SLOW(ZZ)
Definition: UT_Assert.h:154
const char * findChar(int c) const
Definition: UT_String.h:560
SYS_FORCE_INLINE UT_String(const char *str=0)
Construct UT_String from a C string, using shallow semantics.
Definition: UT_String.h:85
void harden()
Take shallow copy and make it deep.
Definition: UT_String.h:215
void saveAscii(std::ostream &os) const
Definition: UT_String.h:300
bool equal(const UT_StringRef &str, bool case_sensitive=true) const
Definition: UT_String.h:410
UT_String(UT_String &&str) noexcept
Definition: UT_String.h:169
long long int64
Definition: SYS_Types.h:116
bool equal(const UT_String &str, bool case_sensitive=true) const
Definition: UT_String.h:406
void setAlwaysDeep(bool deep)
Make a string always deep.
Definition: UT_String.h:190
bool operator>(const UT_StringRef &str) const
Definition: UT_String.h:471
const char * findChar(const char *str) const
Definition: UT_String.h:1386
bool matchFile(const char *pattern) const
bool operator()(const std::string &s1, const std::string &s2) const
Definition: UT_String.h:1519
GLuint const GLchar * name
Definition: glcorearb.h:786
int eraseHead(int len)
Definition: UT_String.h:1088
GLushort pattern
Definition: glad.h:2583
void toUpper()
Definition: UT_String.h:612
void adopt(UT_String &str)
Definition: UT_String.h:287
GLboolean GLboolean GLboolean b
Definition: glcorearb.h:1222
SYS_FORCE_INLINE ~UT_StringWrap()
Definition: UT_String.h:1376
const char * findWord(const char *word) const
bool operator>(const char *str) const
Definition: UT_String.h:463
int64 getMemoryUsage(bool inclusive=true) const
Return memory usage in bytes.
Definition: UT_String.h:550
void saveBinary(std::ostream &os) const
Save string to binary stream.
Definition: UT_String.h:296
bool isFloat(bool skip_spaces=false, bool loose=false, bool allow_underscore=false) const
Determine if string can be seen as a single floating point number.
bool isValidVariableName(const char *safechars=NULL) const
static int compareNumberedString(const char *s1, const char *s2, bool case_sensitive=true, bool allow_negatives=false)
short myDoubleIn
Definition: UT_String.h:1504
void adopt(char *s)
Definition: UT_String.h:277
GLsizeiptr size
Definition: glcorearb.h:664
UT_String pathUpToExtension() const
SYS_NO_DISCARD_RESULT UT_StringView UTstringFileExtension(const StringT &str)
GLenum func
Definition: glcorearb.h:783
int substr(UT_String &buf, int index, int len=0) const
SYS_NO_DISCARD_RESULT bool UTstringMatchFileExtension(const StringT &str, const char *extension)
void save(std::ostream &os, bool binary) const
Save string to stream. Saves as binary if binary is true.
short myDoubleOut
Definition: UT_String.h:1503
fpreal64 fpreal
Definition: SYS_Types.h:277
int parse(UT_StringArray &argv, int start_arg=0, const char *quotes="\"'", bool keep_quotes=false)
Definition: UT_String.h:815
bool multiMatch(const char *pattern, bool case_sensitive, char separator) const
char * steal()
Definition: UT_String.h:255
char * SYSstrcasestr(const char *haystack, const char *needle)
Replacement for strcasestr, since no equivalent exists on Win32.
Definition: SYS_String.h:290
GLuint index
Definition: glcorearb.h:786
bool multiMatchRecord(const char *pattern, int maxpatterns, char *singles, int &nsingles, char **words, int &nwords, bool case_sensitive=true, const char *separators=", ") const
int parseNumberedFilename(UT_String &prefix, UT_String &frame, UT_String &suff, bool negative=true, bool fractional=false) const
UT_AlwaysDeepType
Definition: UT_String.h:79
GLfloat GLfloat v1
Definition: glcorearb.h:817
auto ptr(T p) -> const void *
Definition: format.h:2448
GLuint GLfloat * val
Definition: glcorearb.h:1608
ImageBuf OIIO_API max(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
**If you just want to fire and args
Definition: thread.h:609
SYS_NO_DISCARD_RESULT SYS_FORCE_INLINE const_iterator begin() const
Returns a constant iterator pointing to the beginning of the string.
unsigned int uint32
Definition: SYS_Types.h:40
const char * lastChar(int c) const
Definition: UT_String.h:576
UT_String myOut
Definition: UT_String.h:1500
UT_String myErr
Definition: UT_String.h:1501
bool isstring() const
Definition: UT_String.h:691
int findLongestCommonSuffix(const char *with) const
void hardenIfNeeded()
Take shallow copy and make it deep.
Definition: UT_String.h:224
const char * findChar(int c) const
Definition: UT_String.h:1385
int parse(char *argv[], int max_args, const char *quotes="\"'", bool keep_quotes=false)
Definition: UT_String.h:803
UT_String(UT_AlwaysDeepType, const char *str=0)
Construct UT_String from a C string, using ALWAYS_DEEP semantics.
Definition: UT_String.h:146
bool operator<(const UT_String &str) const
Definition: UT_String.h:443
int erase(int pos=0, int len=-1)
Definition: UT_String.h:1096
#define const
Definition: zconf.h:214
int tokenize(UT_WorkArgs &argv, char separator)
Definition: UT_String.h:838
auto sprintf(const S &fmt, const T &...args) -> std::basic_string< Char >
Definition: printf.h:574
string_view OIIO_UTIL_API strip(string_view str, string_view chars=string_view())
SIM_API const UT_StringHolder distance
bool operator<=(const UT_StringRef &str) const
Definition: UT_String.h:459
bool startsWith(const UT_StringView &prefix, bool case_sensitive=true) const
void splitPath(UT_String &dir_name, UT_String &file_name) const
char operator()(unsigned i) const
Definition: UT_String.h:513
bool OIIO_UTIL_API contains(string_view a, string_view b)
Does 'a' contain the string 'b' within it?
int parse(UT_WorkArgs &argv, int start_arg=0, const char *quotes="\"'", bool keep_quotes=false)
Definition: UT_String.h:809
const char * base(UT_String &buf) const
void removeLast()
Remove the last character.
Definition: UT_String.h:327
UT_API void UTvarLookup(const char *name, UT_String &result)
SYS_FORCE_INLINE UT_StringWrap(const char *str)
Definition: UT_String.h:1370
bool endsWith(const UT_StringView &suffix, bool case_sensitive=true) const
UT_String(const std::string &str)
Construct UT_String from a std::string, always doing a deep copy. The result will only be a UT_Always...
Definition: UT_String.h:120
int eraseTail(int len)
Definition: UT_String.h:1092
const char * fileName() const
Definition: UT_String.h:633
OIIO_UTIL_API std::string extension(string_view filepath, bool include_dot=true) noexcept
GLint GLsizei count
Definition: glcorearb.h:405
Definition: format.h:895
int countChar(int c) const
Return the number of occurrences of the specified character.
UT_API std::ostream & do_setformat(std::ostream &os, const char fmt[])
int tokenize(T &list, const char *separators=" \t\n")
Definition: UT_String.h:861
FMT_CONSTEXPR auto find(Ptr first, Ptr last, T value, Ptr &out) -> bool
Definition: core.h:2089
const char * nonNullBuffer() const
Definition: UT_String.h:511
void toLower()
Definition: UT_String.h:619
GLenum src
Definition: glcorearb.h:1793
int insert(int pos, const char *str)
Definition: UT_String.h:1104
const char * fcontain(const char *pattern, bool case_sensitive=true) const
Definition: UT_String.h:997
bool operator!=(const UT_StringRef &str) const
Definition: UT_String.h:435