HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_String.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  *
7  * NAME: Utility Library (C++)
8  *
9  * COMMENTS: String class
10  *
11  */
12 
13 #ifndef __UT_String_h__
14 #define __UT_String_h__
15 
16 #include "UT_API.h"
17 
18 #include "UT_Assert.h"
19 #include "UT_VectorTypes.h"
20 #include "UT_StringView.h"
21 
22 #include <SYS/SYS_Compiler.h>
23 #include <SYS/SYS_Inline.h>
24 #include <SYS/SYS_String.h>
25 #include <SYS/SYS_Types.h>
26 
27 #include <functional>
28 #include <iosfwd>
29 #include <string>
30 
31 #include <ctype.h>
32 #include <stdlib.h>
33 #include <string.h>
34 
35 #ifdef WIN32
36  #define strcasecmp stricmp
37  #define strncasecmp strnicmp
38 #endif
39 
40 class UT_OStream;
41 class UT_String;
42 class UT_StringCshIO;
43 class UT_WorkArgs;
44 class UT_IStream;
45 class ut_PatternRecord;
46 class UT_StringMMPattern;
47 class UT_StringArray;
48 class UT_StringHolder;
49 class UT_StringRef;
50 
51 // The following lookup functions are used by cshParse. By default,
52 // varLookup simply uses getenv, exprLookup opens the command as
53 // a pipe and uses the result.
54 UT_API extern void UTvarLookup(const char *name, UT_String &result);
55 UT_API extern void UTexprLookup(const char *name, UT_String &result);
56 
57 SYS_FORCE_INLINE bool UTisstring(const char *s) { return s && *s; }
58 
59 // Because invoking isdigit with a negative value is undefined,
60 // some MSVC compilers decide to crash. Thus we cast explicitly
61 // to unsigned.
63  { return isdigit((unsigned char) c); }
64 
65 /// @file
66 /// @class UT_String
67 ///
68 /// UT_String is a string class that support two different types of assignment
69 /// semantics:
70 /// - Shallow (default): Just reference the given string and NOT take
71 /// ownership.
72 /// - Deep: Make a copy of the given string, taking ownership in the
73 /// process (aka it making it "hard").
74 ///
75 /// If UT_String::harden() is called, or any other UT_String method that
76 /// requires modifying the string, it will make a copy of its reference pointer
77 /// (and take ownership) first.
78 ///
80 {
81 public:
82 
83  /// UT_String can be constructed with UT_String::ALWAYS_DEEP to create an
84  /// object that will always perform deep copies when assigned to.
85  enum UT_AlwaysDeepType { ALWAYS_DEEP };
86 
87  /// @brief Construct UT_String from a C string, using shallow semantics
88  ///
89  /// @param str The initial string.
90  /// @param deepCopy If true, a copy of @em str will be used.
91  /// @param len Number of characters to use from @em str. Use -1 to
92  /// use the entire string. If len is non-negative, then
93  /// deepCopy will be implicitly set to true. If str is NULL
94  /// and len is non-negative, then it will be initialized
95  /// with "".
97  UT_String(const char *str = 0)
98  : myData(SYSconst_cast(str))
99  , myIsReference(true)
100  , myIsAlwaysDeep(false)
101  {}
102  UT_String(const char *str, bool deep_copy, int len = -1);
103 
104  /// @brief Construct UT_String from a std::string, always doing
105  /// a deep copy. The result will only be a UT_AlwaysDeep if the
106  /// appropriate version is used, however!
107  ///
108  /// NOTE: You cannot do:
109  /// UT_String foo;
110  /// std::string bar = "hello world";
111  /// foo = UT_String(bar.substr(2, 5));
112  ///
113  /// It provides an shortcut for constructing a UT_String from a function
114  /// that returns a std::string by value. For example, it lets you write
115  /// @code
116  /// UT_String str(func());
117  /// @endcode
118  /// instead of
119  /// @code
120  /// UT_String str(func().c_str(), /*harden=*/true);
121  /// @endcode
122  explicit UT_String(const std::string &str)
123  : myIsReference(false),
124  myIsAlwaysDeep(false)
125  { myData = strdup(str.c_str()); }
126 
127  /// @brief Construct UT_String from a UT_StringHolder.
128  /// This always duplicates and uses ALWAYS_DEEP semantics.
129  explicit UT_String(const UT_StringHolder &str);
130 
131 private:
132  /// This is intentionally not implemented - callers should choose between
133  /// the const char * and UT_StringHolder constructors, depending on whether
134  /// they want to make a deep copy.
135  /// @see UT_StringWrap.
136  UT_String(const UT_StringRef &);
137 
138 public:
139  /// @brief Construct UT_String from a UT_StringView.
140  /// This always duplicates and uses ALWAYS_DEEP semantics.
141  explicit UT_String(const UT_StringView &sv);
142 
143  /// @brief Construct UT_String from a C string, using ALWAYS_DEEP semantics
144  UT_String(UT_AlwaysDeepType, const char *str = 0)
145  : myIsReference(false),
146  myIsAlwaysDeep(true)
147  { myData = str ? strdup(str) : 0; }
148 
149  /// @brief Construct UT_String from a std::string, using ALWAYS_DEEP
150  /// semantics
152  : myIsReference(false),
153  myIsAlwaysDeep(true)
154  { myData = strdup(str.c_str()); }
155 
156  /// Copy constructor
157  ///
158  /// If the string we're copying from is ALWAYS_DEEP, then this object will
159  /// also become ALWAYS_DEEP. This way, you can pass/return a string by
160  /// value.
161  UT_String(const UT_String &str);
162 
163  ~UT_String();
164 
165  /// Move operators
166  /// @{
167  UT_String(UT_String &&str) noexcept
168  : myData(str.myData)
169  , myIsReference(str.myIsReference)
170  , myIsAlwaysDeep(str.myIsAlwaysDeep)
171  {
172  str.myData = nullptr;
173  str.myIsReference = !str.myIsAlwaysDeep;
174  }
176  {
177  freeData();
178  myData = str.myData;
179  myIsReference = str.myIsReference;
180  myIsAlwaysDeep = str.myIsAlwaysDeep;
181  str.myData = nullptr;
182  str.myIsReference = !str.myIsAlwaysDeep;
183  return *this;
184  }
185  /// @}
186 
187  /// Make a string always deep
188  void setAlwaysDeep(bool deep)
189  {
190  myIsAlwaysDeep = deep;
191  if (deep && myIsReference)
192  {
193  if (myData != NULL)
194  harden();
195  else
196  {
197  // This takes the same semantic as
198  // str = NULL;
199  // where str is an always deep string
200  myIsReference = false;
201  }
202  }
203  }
204  bool isAlwaysDeep() const
205  {
206  return myIsAlwaysDeep;
207  }
208 
209  void swap( UT_String &other );
210 
211  /// Take shallow copy and make it deep.
212  // @{
213  void harden()
214  {
215  if (!myIsReference && myData)
216  return;
217  myData = strdup(myData ? myData : "");
218  myIsReference = false;
219  }
220 
221  void harden(const char *s, int len = -1);
223  {
224  if (myIsReference)
225  {
226  if (isstring())
227  harden();
228  else
229  *this = "";
230  }
231  }
232  void hardenIfNeeded(const char *s)
233  {
234  if (s && *s)
235  harden(s);
236  else
237  *this = "";
238  }
239  // @}
240 
241  /// Returns whether this string is hardened already.
242  bool isHard() const { return !myIsReference; }
243 
244  /// Give up ownership of string
245  ///
246  /// Take a hard reference and make it shallow. This method makes sure
247  /// it gives back something you can delete, because this UT_String is
248  /// taking its hands off the data. Use it with care since it may lead
249  /// to memory leaks if, for example, you harden it again later.
250  ///
251  /// In the case of ALWAYS_DEEP strings, this is disallowed so it will
252  /// just return a copy of the data.
253  char * steal()
254  {
255  if (!myIsAlwaysDeep)
256  {
257  if (myIsReference)
258  myData = strdup(myData ? myData : ""); // harden
259  myIsReference = true; // but say it's soft
260  return myData;
261  }
262  else
263  {
264  // return a new copy of the data without releasing
265  // ownership for always deep strings
266  return strdup(myData ? myData : "");
267  }
268  }
269 
270  /// Take ownership of given string
271  ///
272  /// adopt() is the opposite of steal(). Basically, you're giving
273  /// the UT_String ownership of the string.
274  // @{
275  void adopt(char *s)
276  {
277  if (!myIsReference)
278  {
279  if (s != myData)
280  free(myData);
281  }
282  myData = s;
283  myIsReference = false;
284  }
285  void adopt(UT_String &str)
286  {
287  adopt(str.steal());
288  }
289  void adopt(UT_StringHolder &holder);
290 
291  // @}
292 
293  /// Save string to binary stream.
294  void saveBinary(std::ostream &os) const { save(os, true); }
295 
296  /// Save string to ASCII stream. This will add double quotes and escape to
297  /// the stream if necessary (empty string or contains spaces).
298  void saveAscii(std::ostream &os) const { save(os, false); }
299  void saveAscii(UT_OStream &os) const { save(os, false); }
300 
301  /// Save string to stream. Saves as binary if @em binary is true.
302  void save(std::ostream &os, bool binary) const;
303  void save(UT_OStream &os, bool binary) const;
304 
305  /// Load string from stream. Use is.eof() to check eof status
306  bool load(UT_IStream &is);
307 
308  /// Reset the string to the default constructor.
309  void clear()
310  { *this = (const char *)NULL; }
311 
312  /// Prepend a string (or character)
313  // @{
314  void prepend(const char *prefix);
315  void prepend(char ch);
316  // @}
317 
318  /// Append a character
319  void append(char ch);
320 
321  /// Append a string or a section of a string.
322  void append(const char *str, exint len = -1);
323 
324  /// Remove the last character
325  void removeLast() { truncate(length()-1); }
326  /// Truncate the string at the Nth character
327  void truncate(exint len);
328 
329  UT_String &operator=(const UT_String &str);
330  UT_String &operator=(const char *str);
331  UT_String &operator=(const std::string &str);
332  UT_String &operator=(const UT_StringHolder &str);
333  UT_String &operator=(const UT_StringView &str);
334 private:
335  /// Not implemented - see UT_String(const UT_StringRef &).
336  UT_String &operator=(const UT_StringRef);
337 
338 public:
339  UT_String &operator+=(const char *str)
340  {
341  if (!isstring())
342  {
343  // We are an empty string, so we merely copy
344  // the incoming string rather than trying to append
345  // to it.
346  harden(str);
347  }
348  else
349  {
350  bool same = (str == myData);
351  harden();
352  if (str)
353  {
354  int mylen = (int)strlen(myData);
355  myData = (char *)realloc(myData,
356  mylen+strlen(str)+1);
357  if (!same)
358  {
359  strcpy(&myData[mylen], str);
360  }
361  else
362  {
363  memcpy(myData + mylen, myData, mylen);
364  myData[mylen * 2] = '\0';
365  }
366  }
367  }
368  return *this;
369  }
370 
372  {
373  *this += (const char *)str.myData;
374  return *this;
375  }
376  UT_String &operator+=(const UT_StringRef &str);
377 
378  // Basic equality functions and operators
379  int compare(const char *str, bool case_sensitive=true) const
380  {
381  // Unlike std::string, UT_String treats NULL and
382  // the empty string as distinct (empty has precedence).
383  if (myData==0 || str==0)
384  {
385  if (myData) return 1;
386  if(str) return -1;
387  return 0;
388  }
389  if (case_sensitive)
390  return strcmp(myData, str);
391  return strcasecmp(myData, str);
392  }
393  int compare(const UT_String &str, bool case_sensitive=true) const
394  {
395  return compare(str.myData,case_sensitive);
396  }
397  int compare(const UT_StringRef &str, bool case_sensitive=true) const;
398 
399  bool equal(const char *str, bool case_sensitive=true) const
400  {
401  return compare(str,case_sensitive)==0;
402  }
403  bool equal(const UT_String &str, bool case_sensitive=true) const
404  {
405  return compare(str.myData,case_sensitive)==0;
406  }
407  bool equal(const UT_StringRef &str, bool case_sensitive=true) const
408  {
409  return compare(str,case_sensitive)==0;
410  }
411 
412  bool operator==(const char *str) const
413  {
414  return compare(str)==0;
415  }
416  bool operator==(const UT_String &str) const
417  {
418  return compare(str.myData)==0;
419  }
420  bool operator==(const UT_StringRef &str) const
421  {
422  return compare(str)==0;
423  }
424  bool operator!=(const char *str) const
425  {
426  return compare(str)!=0;
427  }
428  bool operator!=(const UT_String &str) const
429  {
430  return compare(str.myData)!=0;
431  }
432  bool operator!=(const UT_StringRef &str) const
433  {
434  return compare(str)!=0;
435  }
436  bool operator<(const char *str) const
437  {
438  return compare(str)<0;
439  }
440  bool operator<(const UT_String &str) const
441  {
442  return compare(str.myData)<0;
443  }
444  bool operator<(const UT_StringRef &str) const
445  {
446  return compare(str)<0;
447  }
448  bool operator<=(const char *str) const
449  {
450  return compare(str)<=0;
451  }
452  bool operator<=(const UT_String &str) const
453  {
454  return compare(str.myData)<=0;
455  }
456  bool operator<=(const UT_StringRef &str) const
457  {
458  return compare(str)<=0;
459  }
460  bool operator>(const char *str) const
461  {
462  return compare(str)>0;
463  }
464  bool operator>(const UT_String &str) const
465  {
466  return compare(str.myData)>0;
467  }
468  bool operator>(const UT_StringRef &str) const
469  {
470  return compare(str)>0;
471  }
472  bool operator>=(const char *str) const
473  {
474  return compare(str)>=0;
475  }
476  bool operator>=(const UT_String &str) const
477  {
478  return compare(str.myData)>=0;
479  }
480  bool operator>=(const UT_StringRef &str) const
481  {
482  return compare(str)>=0;
483  }
484 
485  /// Test whether the string is defined or not
486  SYS_SAFE_BOOL operator bool() const { return isstring(); }
487 
488  /// Return the edit distance between two strings.
489  /// See http://en.wikipedia.org/wiki/Levenshtein_distance for details.
490  /// allow_subst controls whether a substitution of a character with
491  /// another is a single operation, rather than two operations of
492  /// insert and delete.
493  int distance(const char *str,
494  bool case_sensitive = true,
495  bool allow_subst = true) const;
496 
497  operator const char *() const
498  { return (const char *)myData; }
499  operator char *()
500  { return myData; }
501 
502  operator UT_StringView() const
503  { return UT_StringView(myData); }
504 
505  const char *c_str() const { return buffer(); }
506  const char *buffer() const { return myData; }
507  const char *data() const { return buffer(); }
508  const char *nonNullBuffer() const { return myData ? myData : ""; }
509 
510  char operator()(unsigned i) const
511  {
512  UT_ASSERT_P( isstring() );
513  UT_ASSERT_SLOW(i <= strlen(myData));
514  return myData[i];
515  }
516 
517  char &operator()(unsigned i)
518  {
519  harden();
520  return myData[i];
521  }
522 
523  // Prefer using write() since ideally the non-const operator() is removed
524  inline void write(unsigned i, char c)
525  {
526  hardenIfNeeded();
527  myData[i] = c;
528  }
529 
530  int toInt() const;
531  fpreal toFloat() const;
532 
533  /// Converts the contents of this UT_String to a std::string. Note that
534  /// std::string can't be constructed with a null pointer, so you can't
535  /// just write std::string s = ut_string.buffer();
536  std::string toStdString() const;
537 
538  //
539  // Here, we're finished with operators
540  //
541 
542  /// Return length of string
543  unsigned length() const
544  { return (myData) ? (unsigned)strlen(myData) : 0; }
545 
546  /// Return memory usage in bytes
547  int64 getMemoryUsage(bool inclusive=true) const
548  {
549  return (inclusive ? sizeof(*this) : 0)
550  + (!myIsReference ? (length() + 1)*sizeof(char) : 0);
551  }
552 
553  /// Find first occurrance of character. Returns NULL upon failure.
554  /// @{
555  char *findChar(int c)
556  { return myData ? strchr(myData, c) : nullptr; }
557  const char *findChar(int c) const
558  { return SYSconst_cast(*this).findChar(c); }
559  /// @}
560 
561  /// Find first occurrance of any character in @em str
562  /// @{
563  char *findChar(const char *str)
564  { return myData ? strpbrk(myData, str) : nullptr; }
565  const char *findChar(const char *str) const
566  { return SYSconst_cast(*this).findChar(str); }
567  /// @}
568 
569  /// Find last occurance of character
570  /// @{
571  char *lastChar(int c)
572  { return myData ? strrchr(myData, c) : nullptr; }
573  const char *lastChar(int c) const
574  { return SYSconst_cast(*this).lastChar(c); }
575  /// @}
576 
577  /// Return the number of occurrences of the specified character.
578  int countChar(int c) const;
579 
580  /// Count the occurrences of the string
581  int count(const char *str, bool case_sensitive = true) const;
582 
583  char *findNonSpace();
584  const char *findNonSpace() const;
585  const char *findWord(const char *word) const;
586  bool findString(const char *str, bool fullword,
587  bool usewildcards) const;
588  int changeWord(const char *from, const char *to, bool all = true);
589  int changeString(const char *from, const char *to, bool fullword);
590  int changeQuotedWord(const char *from, const char *to,
591  int quote = '`', bool all = true);
592 
593  int findLongestCommonSuffix( const char *with ) const;
594 
595  /// Perform deep copy of the substring starting from @em index
596  /// for @em len characters into the specified UT_String.
597  /// If @em len is too long, then a substring starting from @em index to
598  /// the end of the string is copied.
599  /// Returns the length of the copied substring.
600  int substr(UT_String &buf, int index, int len=0) const;
601 
602  /// Determine if string can be seen as a single floating point number
603  bool isFloat(bool skip_spaces = false,
604  bool loose = false,
605  bool allow_underscore = false) const;
606  /// Determine if string can be seen as a single integer number
607  bool isInteger(bool skip_spaces = false) const;
608 
609  void toUpper()
610  {
611  char *ptr;
612  harden();
613  for (ptr=myData; *ptr; ptr++)
614  *ptr = (char)toupper(*ptr);
615  }
616  void toLower()
617  {
618  char *ptr;
619  harden();
620  for (ptr=myData; *ptr; ptr++)
621  *ptr = (char)tolower(*ptr);
622  }
623 
624 
625  /// Return last component of forward slash separated path string
626  ///
627  /// If there is a slash in the string, fileName() returns the string
628  /// starting after the slash. Otherwise, it returns the contents of
629  /// this string. Note that it returns a pointer into this string.
630  const char *fileName() const
631  {
632  const char *fname;
633 
634  if (!myData)
635  return 0;
636 
637  fname = lastChar('/');
638 
639  if (!fname)
640  {
641  fname = myData;
642  }
643  else
644  {
645  fname++; // Get past the /
646  }
647  return fname;
648  }
649  /// Return the extension of a file path string
650  /// @{
652  {
653  if( !isstring() )
654  return 0;
655 
656  char *dot = lastChar('.');
657  if (dot)
658  {
659  const char *slash = lastChar('/');
660 
661  if (slash && slash > dot)
662  dot = NULL;
663  }
664  return dot;
665  }
666  const char *fileExtension() const
667  {
668  return SYSconst_cast(*this).fileExtension();
669  }
670  /// @}
671 
672  /// Return whether the file extension matches. The extension passed in
673  /// should include the '.' separator. For example: @code
674  /// matchFileExtension(".jpg")
675  /// @endcode
676  bool matchFileExtension(const char *match_extension) const
677  {
678  const char *ext = fileExtension();
679  return ext && !SYSstrcasecmp(ext, match_extension);
680  }
681  /// Return path terminated just before the extension.
682  /// If the filename starts with '.' and no path is provided,
683  /// returns NULL
684  UT_String pathUpToExtension() const;
685 
686  /// Replace the file extension and return the new string
687  UT_String replaceExtension(const UT_String &new_ext) const;
688 
689  /// Split a path into @em dir_name and @em file_name, where @em file_name
690  /// is everything after the final slash (i.e. the same as fileName()).
691  /// Either part may be empty. Note that if the string starts with / and
692  /// only contains that one slash, the @em dir_name will be / and not blank.
693  /// @em dir_name and @em file_name will either be set to hardened strings
694  /// or an empty string.
695  void splitPath(UT_String &dir_name, UT_String &file_name) const;
696 
697  /// Decompose a filename into various parts
698  ///
699  /// parseNumberedFileName will breakup a filename into its various
700  /// parts: file = prefix$Fsuffix (note: suffix is
701  /// not the same as file extension.) 0 is returned if there is
702  /// no frame number. 'negative' allows -[frame] to be interpreted as a
703  /// negative number. 'fractional' allows [frame].[number] to be interpreted
704  /// as a fractional frame.
705  int parseNumberedFilename(UT_String &prefix,
706  UT_String &frame,
707  UT_String &suff,
708  bool negative = true,
709  bool fractional = false) const;
710 
711  bool isstring() const
712  { return (myData && *myData); }
713 
714  /// trimSpace() will remove all space characters (leading and following)
715  /// from a string. If the string consists of multiple words, the words will
716  /// be collapsed. The function returns 1 if space was trimmed.
717  int trimSpace(bool leave_single_space_between_words = false);
718 
719  /// A version of trimSpace() that only removes leading and following spaces
720  /// from a string, leaving any between words intact.
721  int trimBoundingSpace();
722 
723  /// strips out all characters found in 'chars'. The string length will be
724  /// reduced by the number of characters removed. The number of characters
725  /// removed is returned.
726  int strip(const char *chars);
727 
728  /// protectString() will modify the existing string to escape double quotes
729  /// and backslashes. It will only wrap the string in double quotes if
730  /// it has spaces in it. If 'protect_empty' is true, the string will
731  /// become '""', otherwise it will stay empty.
732  void protectString(bool protect_empty=false);
733 
734  /// protectPreQuotePythonStringLiteral() will modify the existing string
735  // to escape any non-printing characters, backslashes, and instances of the
736  /// specified delimiter. Unlike protectString(), it will not wrap the
737  /// string in quotes.
738  void protectPreQuotePythonStringLiteral(char delimiter='\'');
739 
740  /// returns true if the string begins and ends with a (non-escaped) quote
741  /// 'delimiter'.
742  bool isQuotedString(char delimiter='\'') const;
743 
744  /// makeQuotedString() is similar to protectString() except it returns a
745  /// new string instead of changing this string, it does wrap the string
746  /// in quotes, and it lets you use either ' or " as the delimiter.
747  /// The quoted string can also be optionally be made to escape non-printing
748  /// characters. The string that's returned is UT_String::ALWAYS_DEEP.
749  UT_String makeQuotedString(char delimiter='\'',
750  bool escape_nonprinting=false) const;
751 
752  /// makeSmartQuotedString() will use either ' or " as the delimiter to
753  /// avoid escaped quotes, using the default delimiter if it doesn't
754  /// matter. The quoted string can also be optionally be made to escape
755  /// non-printing characters. The string that's returned is
756  /// UT_String::ALWAYS_DEEP.
757  UT_String makeSmartQuotedString(char default_delimiter='\'',
758  bool escape_nonprinting=false) const;
759 
760  /// Expands standard control sequences ('\\n', '\\r', '\\t', '\\0') to their
761  /// corresponding ASCII values (10, 13, 9, 0, respectively).
762  /// If the expand_extended flag is enabled, an extended expansion is enabled
763  /// which adds hexadecimal, decimal and Unicode control sequence expansion.
764  /// Any values resulting from that expansion, which are outside the standard
765  /// ASCII range, will be encoded as UTF8-encoded control points.
766  void expandControlSequences(bool expand_extended = false);
767 
768  bool hasWhiteSpace() const;
769 
770  void removeTrailingSpace();
771  void removeTrailingChars(char chr);
772 
773  void removeTrailingDigits();
774 
775  // cshParse() does not need to harden the string. It does very robust
776  // parsing in the style of csh. It actually does better parsing than
777  // csh. Variable expansion & backquote expansion are done in the
778  // correct order for the correct arguments. One caveat is that the
779  // string cannot have \0377 (0xff) as a character in it.
780  //
781  // If there is an error in parsing, the error flag (if passed in) will be
782  // set to:
783  // 0 = no error
784  // 1 = line too long
785  int cshParse(char *argv[], int max_args,
786  void (*vlookup)(const char *, UT_String&)=UTvarLookup,
787  void (*elookup)(const char *, UT_String&)=UTexprLookup,
788  int *error = 0,
789  UT_StringCshIO *io=0);
790 
791  int cshParse(UT_WorkArgs &argv,
792  void (*vlookup)(const char *, UT_String&)=UTvarLookup,
793  void (*elookup)(const char *, UT_String&)=UTexprLookup,
794  int *error = 0,
795  UT_StringCshIO *io=0);
796 
797  // dosParse() uses the semi-braindead approach of ms-dos to argument
798  // parsing. That is, arguments are separated by a double quote or space
799  // (being a space or a tab). If 'preserve_backslashes' is set to
800  // false (the default), back-slashes are passed through verbatim, unless
801  // the following character is a double quote. Likewise, any pairs of
802  // back-slashes preceding a double quote are turned into single
803  // back-slashes.
804  int dosParse(UT_WorkArgs &argv, bool preserve_backslashes=false);
805  int dosParse(char *argv[], int max_args,
806  bool preserve_backslashes=false);
807 
808  /// Perform dos parsing modifying the buffer passed in. The args will be
809  /// stored as raw pointers into the given buffer
810  static int dosParse(char *buffer, UT_WorkArgs &args,
811  bool preserve_backslashes);
812 
813  // parse will insert nulls into the string.
814  // NB: The argv array is null terminated, thus the effective
815  // maximum number of arguments is one less than maxArgs.
816  // NB: The maxArgs variants are all deprecated, use UT_WorkArgs
817  // instead.
818  int parse(char *argv[], int max_args,
819  const char *quotes = "\"'", bool keep_quotes = false)
820  {
821  harden();
822  return parseInPlace(argv, max_args, quotes, keep_quotes);
823  }
824  int parse(UT_WorkArgs &argv, int start_arg = 0,
825  const char *quotes = "\"'", bool keep_quotes = false)
826  {
827  harden();
828  return parseInPlace(argv, start_arg, quotes, keep_quotes);
829  }
830  // Warning: the following methods insert nulls into the string without
831  // hardening.
832  int parseInPlace(char *argv[], int max_args,
833  const char *quotes = "\"'", bool keep_quotes = false);
834  int parseInPlace(UT_WorkArgs &argv, int start_arg = 0,
835  const char *quotes = "\"'", bool keep_quotes = false);
836 
837  // Splits the string at specific separator characters. Unlike the parse
838  // methods, the tokenize methods ignore quoting completely.
839  int tokenize(char *argv[], int max_args, char separator)
840  {
841  harden();
842  return tokenizeInPlace(argv, max_args, separator);
843  }
844  int tokenizeInPlace(char *argv[], int max_args, char separator);
845  int tokenize(UT_WorkArgs &argv, char separator)
846  {
847  harden();
848  return tokenizeInPlace(argv, separator);
849  }
850  int tokenizeInPlace(UT_WorkArgs &argv, char separator);
851  int tokenize(char *argv[], int max_args,
852  const char *separators = " \t\n")
853  {
854  harden();
855  return tokenizeInPlace(argv, max_args, separators);
856  }
857  int tokenizeInPlace(char *argv[], int max_args,
858  const char *separators = " \t\n");
859  int tokenize(UT_WorkArgs &argv, const char *separators = " \t\n")
860  {
861  harden();
862  return tokenizeInPlace(argv, separators);
863  }
864  int tokenizeInPlace(UT_WorkArgs &argv,
865  const char *separators = " \t\n");
866 
867  template<typename T>
868  int tokenize(T &list, const char *separators = " \t\n")
869  {
870  harden();
871  return tokenizeInPlace(list, separators);
872  }
873 
874  template<typename T>
875  int tokenizeInPlace(T &list,
876  const char *separators = " \t\n")
877  {
878  char *token;
879  char *context;
880 
881  if (!isstring())
882  return 0;
883  if (!(token = SYSstrtok(myData, separators, &context)))
884  return 0;
885 
886  list.append(token);
887 
888  while ((token = SYSstrtok(0, separators, &context)) != NULL)
889  list.append(token);
890 
891  return list.entries();
892  }
893 
894 
895  // Replaces the contents with variables expanded.
896  void expandVariables();
897 
898  // Functions to hash a string
900  {
901  return hash(myData);
902  }
903 
904  static SYS_FORCE_INLINE uint32 hash(const char *str, uint32 code = 0)
905  {
906  return SYSstring_hashseed(str, SYS_EXINT_MAX, code);
907  }
908 
909  // This does pattern matching on a string. The pattern may include
910  // the following syntax:
911  // ? = match a single character
912  // * = match any number of characters
913  // [char_set] = matches any character in the set
914  bool match(const char *pattern, bool case_sensitive = true) const;
915 
916  // Similar to match() except it assumes that we're dealing with file paths
917  // so that it determines whether to do a case-sensitive match depending on
918  // the platform.
919  bool matchFile(const char *pattern) const;
920 
921  // Similar to match() but uses rsync style matching:
922  // * = match any number of characters up to a slash
923  // ** = match any number of characters, including a slash
924  bool matchPath(const char *pattern, bool case_sensitive = true,
925  bool *excludes_branch = nullptr) const;
926 
927  // multiMatch will actually check multiple patterns all separated
928  // by the separator character: i.e. geo1,geo2,foot*
929  //
930  // NOTE: No pattern or may contain the separator
931  bool multiMatch(const char *pattern,
932  bool case_sensitive, char separator) const;
933  bool multiMatch(const char *pattern, bool case_sensitive = true,
934  const char *separators = ", ",
935  bool *explicitly_excluded = 0,
936  int *match_index = 0,
937  ut_PatternRecord *pattern_record=NULL) const;
938  bool multiMatch(const UT_StringMMPattern &pattern,
939  bool *explicitly_excluded = 0,
940  int *match_index = 0,
941  ut_PatternRecord *pattern_record=NULL) const;
942 
943  // this method matches a pattern while recording any wildcard
944  // patterns used.
945  bool multiMatchRecord(const char *pattern, int maxpatterns,
946  char *singles, int &nsingles,
947  char **words, int &nwords,
948  bool case_sensitive = true,
949  const char *separators = ", ") const;
950  bool multiMatchRecord(const UT_StringMMPattern &pattern,
951  int maxpatterns,
952  char *singles, int &nsingles,
953  char **words, int &nwords) const;
954  bool multiMatchRecord(const char *pattern,
955  UT_StringHolder &singles,
956  UT_StringArray &words,
957  bool case_sensitive = true,
958  const char *separators = ", ") const;
959 
960  /// matchPattern(UT_WorkArgs &) assumes that the arguments contain the
961  /// components of a pattern to be matched against. The method returns
962  /// true if the pattern matches, false if it doesn't. This matching
963  /// process handles ^ expansion properly (and efficiently).
964  /// If the string doesn't match any components of the pattern, then the
965  /// assumed value is returned.
966  bool matchPattern(const UT_WorkArgs &pattern_args,
967  bool assume_match=false) const;
968 
969  static bool multiMatchCheck(const char *pattern);
970  static bool wildcardMatchCheck(const char *pattern);
971 
972  // Same as match but equivalent to "*pattern*"
973  bool contains(const char *pattern, bool case_sensitive=true) const;
974 
975  // Returns true if our string starts with the specified prefix.
976  bool startsWith(const UT_StringView &prefix,
977  bool case_sensitive = true) const;
978 
979  // Returns true if our string ends with the specified suffix.
980  bool endsWith(const UT_StringView &suffix,
981  bool case_sensitive = true) const;
982 
983  /// Pluralize an English noun ending (i.e. box->boxes or tube->tubes). The
984  /// ending must be lower case to be processed properly.
985  void pluralize();
986 
987  // Will parse strings like 1-10:2,3 and call func for every element
988  // implied. It will stop when the func returns 0 or the parsing
989  // is complete, in which case it returns 1.
990  // Parsing also allows secondary elements to be specified eg 3.4 0.12
991  // The secfunc is used to find the maximum index of secondary elements
992  // for each compound num. The elements are assumed to be
993  // non-negative integers.
994  int traversePattern(int max, void *data,
995  int (*func)(int num, int sec, void *data),
996  unsigned int (*secfunc)(int num,void *data)=0,
997  int offset=0) const;
998 
999  // Fast containment, assumes no special characters
1000  const char *fcontain(const char *pattern, bool case_sensitive=true) const
1001  {
1002  if (!myData) return NULL;
1003  return case_sensitive ? strstr(myData, pattern)
1004  : SYSstrcasestr(myData, pattern);
1005  }
1006 
1007  // Given the match pattern which fits our contents, any assigned wildcards
1008  // are subsitituted. The wildcards may also be indexed.
1009  // Returns true if rename was successful.
1010  //
1011  // @note This code was adapted from CHOP_Rename::subPatterns() and
1012  // works the same way.
1013  //
1014  // eg. this = apple, match = a*le, replace = b* ---> bpp
1015  // this = a_to_b, match = *_to_*, replace = *(1)_to_*(0) ---> b_to_a
1016  bool patternRename(const char *match_pattern, const char *replace);
1017 
1018  // Given the name rule according to which a name consists of a base name
1019  // (char sequence ending in a non-digit) and a numerical suffix, the
1020  // following two methods return the base and the suffix respectively.
1021  // base() needs a string buffer and will return a const char* pointing to it.
1022  // base() always returns a non-zero pointer,
1023  // while suffix() returns 0 if no suffix is found.
1024  const char *base(UT_String &buf) const;
1025  const char *suffix() const;
1026 
1027  // incrementNumberedName will increment a name. If it has a numerical
1028  // suffix, that suffix is incremented. If not, "2" is appended to the
1029  // name. The preserve_padding parameter can be set to true so that zero
1030  // padding is preserved. Incrementing foo0009 will produce foo10 with
1031  // this parameter set to false, or foo0010 if it is set to true.
1032  void incrementNumberedName(bool preserve_padding = false);
1033 
1034  // setFormat is used to set how an outstream formats its ascii output.
1035  // So you can use printf style formatting. eg:
1036  // UT_String::setFormat(cout, "%08d") << 100;
1037  //
1038  // Note: Don't do:
1039  // cout << UT_String::setFormat(cout, "%08d") << 100;
1040  // ^^^^
1041  // Also: The formating changes (except for field width) are permanent,
1042  // so you'll have to reset them manually.
1043  //
1044  // TODO: A resetFormat, and a push/pop format pair.
1045  static std::ostream &setFormat(std::ostream &os, const char *fmt);
1046  std::ostream &setFormat(std::ostream &os);
1047 
1048  int replacePrefix(const char *oldpref,
1049  const char *newpref);
1050  int replaceSuffix(const char *oldsuffix,
1051  const char *newsuffix);
1052 
1053  // expandArrays will expand a series of tokens of the
1054  // form prefix[pattern]suffix into the names array
1055  //
1056  // Note: Each names[i] must be free'd after use
1057  // and label is used on the non-const parse method
1058  // NB: The max variants are all deprecated, use UT_WorkArgs
1059  // instead.
1060  int expandArrays(char *names[], int max);
1061 
1062  // This routine will ensure no line is over the specified
1063  // number of columns. Offending lines will be wrapped at
1064  // the first spaceChar or cut at exactly cols if spaceChar
1065  // is not found.
1066  // It returns one if any changes were done.
1067  // It currently treats tabs as single characters which should be
1068  // changed.
1069  // It will break words at hyphens if possible.
1070  int format(int cols);
1071 
1072  // this method is similar to changeWord.. This method performs
1073  // a "dumb" substitution. Return's the # of substitutions
1074  int substitute( const char *find, const char *replacement,
1075  bool all = true );
1076 
1077  // This function replaces the character found with another character.
1078  int substitute( char find, char replacement, bool all = true );
1079 
1080  // this function removes the substring at pos and len, and inserts str
1081  // at pos. it returns the difference (new_length - old_length)
1082  int replace( int pos, int len, const char *str );
1083 
1084  // remove the first len characters of this string
1085  int eraseHead(int len)
1086  { return replace(0, len, ""); }
1087 
1088  // remove the last len characters of this string
1089  int eraseTail(int len)
1090  { return replace(length() - len, len, ""); }
1091 
1092  // remove the substring start at pos for len characters
1093  int erase(int pos = 0, int len = -1)
1094  {
1095  if (len < 0)
1096  len = length() - pos;
1097  return replace(pos, len, "");
1098  }
1099 
1100  // insert the given string at pos into this string
1101  int insert(int pos, const char *str)
1102  { return replace(pos, 0, str); }
1103 
1104  // Does a "smart" string compare which will sort based on numbered names.
1105  // That is "text20" is bigger than "text3". In a strictly alphanumeric
1106  // comparison, this would not be the case. Zero is only returned if both
1107  // strings are identical.
1108  static int compareNumberedString(const char *s1, const char *s2,
1109  bool case_sensitive=true,
1110  bool allow_negatives=false);
1111  static int qsortCmpNumberedString(const char *const*v1, const char *const*v2);
1112 
1113  // Like compare numbered strings, but it sorts better when there are
1114  // .ext extensions (i.e. it handles '.' as a special case)
1115  static int compareNumberedFilename(const char *s1, const char *s2,
1116  bool case_sensitive=false);
1117  static int qsortCmpNumberedFilename(const char *const*v1, const char *const*v2);
1118 
1119  /// Compare two version strings which have numbered components separated by
1120  /// dots. eg. "X.Y.Z". Assumes the components go from most to least
1121  /// significant in left to right order.
1122  static int compareVersionString(const char *s1, const char *s2);
1123 
1124  /// Given a path, set the value of the string to the program name. For
1125  /// example: @code
1126  /// str.extractProgramName(argv[0]);
1127  /// str.extractProgramName("c:/Path/program.exe");
1128  /// str.extractProgramName("/usr/bin/program");
1129  /// @endcode
1130  /// This will extract the last path component. Program names may also have
1131  /// their extensions stripped. For example ".exe" on Windows and "-bin" to
1132  /// strip the Houdini wrappers on other platforms.
1133  ///
1134  /// @note The path should be normalized to have forward slashes as the path
1135  /// separator.
1136  void extractProgramName(const char *path,
1137  bool strip_extension=true,
1138  bool normalize_path=true);
1139 
1140  /// Given a path, check to see whether the program name matches the
1141  /// expected. For example: @code
1142  /// if (UT_String::matchProgramname(argv[0], "houdini"))
1143  /// if (UT_String::matchProgramname("c:/Path/houdini.exe", "houdini"))
1144  /// if (UT_String::matchProgramname("/usr/bin/houdini", "houdini"))
1145  /// @endcode
1146  /// The matching is always case-insensitive.
1147  ///
1148  /// @note The path should be normalized to have forward slashes as the path
1149  /// separator.
1150  static bool matchProgramName(const char *path, const char *expected,
1151  bool normalize_path=false);
1152 
1153  /// Convert a path to a "normalized" path. That is, all back-slashes will
1154  /// be converted to forward slashes. On some operating systems, this will
1155  /// leave the string unchanged.
1156  void normalizePath();
1157 
1158  // A very fast integer to string converter. This is faster (at least on
1159  // SGI) than using sprintf("%d"). About two to three times as fast. Both
1160  // of these methods return the length of the string generated.
1161  static int itoa(char *str, int64 i);
1162  static int utoa(char *str, uint64 i);
1163 
1164  // Versions of the above functions which set into this string object
1165  void itoa(int64 i);
1166  void utoa(uint64 i);
1167 
1168  // A reader-friendly version of itoa. This places commas appropriately
1169  // to ensure the person can pick out the kilo points easily.
1170  // This can handle numbers up to 999,999,999,999,999,999.
1171  void itoaPretty(int64 val);
1172 
1173  /// Convert the given time delta (in milliseconds)
1174  /// to a reader-friendly string in days, hours, minutes, and seconds.
1175  void timeDeltaToPrettyString(double time_ms);
1176 
1177  /// Convert the given time delta (in milliseconds)
1178  /// to a reader-friendly string in milliseconds.
1179  void timeDeltaToPrettyStringMS(double time_ms);
1180 
1181  // Do an sprintf into this string. This method will allocate exactly the
1182  // number of bytes required for the final string. If the format string is
1183  // bad, isstring() will return false afterwards.
1184  int sprintf(const char *fmt, ...) SYS_PRINTF_CHECK_ATTRIBUTE(2, 3);
1185 
1186  // This will change the string into a valid C style variable name.
1187  // All non-alpha numerics will be converted to _.
1188  // If the first letter is a digit, it is prefixed with an _.
1189  // This returns 0 if no changes occurred, 1 if something had to
1190  // be adjusted.
1191  // Note that this does NOT force the name to be non-zero in length.
1192  // The safechars parameter is a string containing extra characters
1193  // that should be considered safe. These characters are not
1194  // converted to underscores.
1195  int forceValidVariableName(const char *safechars = NULL);
1196  // Returns true if the string matches a C-style varaible name.
1197  // The safechars are not allowed to be the start.
1198  // Matching forceValid, empty strings are considered valid!
1199  bool isValidVariableName(const char *safechars = NULL) const;
1200 
1201  // This will force all non-alphanumeric characters to be underscores.
1202  // Returns true if any changes were required.
1203  bool forceAlphaNumeric();
1204 
1205  // This function will calculate the relative path to get from src to dest.
1206  // If file_path is false, this method assume it is dealing with node paths.
1207  // If file_path is true, it will also deal with Windows drive letters and
1208  // UNC paths.
1209  void getRelativePath(const char *src_fullpath,
1210  const char *dest_fullpath,
1211  bool file_path = false);
1212 
1213  // This function takes two absolute paths and returns the length of the
1214  // longest common path prefix, up to and including the last '/'. This
1215  // means, for instance, that if fullpath1[len1-1] == '/' then all of
1216  // fullpath1 is eligible as a common prefix.
1217  // NB: This function DOES NOT handle NT style drive names! It is currently
1218  // only used for op paths. If you want to add support for this, you
1219  // should add another default parameter to do this.
1220  static int findLongestCommonPathPrefix(const char *fullpath1, int len1,
1221  const char *fullpath2, int len2);
1222 
1223  // This function tests whether we are an absolute path, and returns true or
1224  // false depending on whether we are.
1225  bool isAbsolutePath(bool file_path=false) const;
1226 
1227  // This function assumes that we are an absolute path and will remove all
1228  // un-necessary components from it as long as we remain an absolute path.
1229  // We return false if an error was encountered, in which case the results
1230  // are unpredictable.
1231  bool collapseAbsolutePath(bool file_path=false);
1232 
1233  // This function will make sure that the string is at most max_length
1234  // characters long. If the string is longer than that, it will
1235  // replace the middle of the string by "...". Returns true if the string
1236  // has changed and false otherwise. max_length must be greater than 3.
1237  bool truncateMiddle(int max_length);
1238 
1239  // This function is an abomination when you can just write:
1240  // UT_String foo("");
1241  // ...
1242  // if (foo.isstring())
1243  // ...
1244  // Avoid using it and do not write functions that return "const UT_String&"
1245  static const UT_String &getEmptyString();
1246 
1247  /// Count the number of valid characters in the : modifier for variable
1248  /// expansion. For example, the string ":r" will return 2, the string
1249  /// ":r:t" will return 4, the string ":z" will return 0. These use the csh
1250  /// expansion modifiers.
1251  ///
1252  /// If the string doesn't start with a ':', the method will return 0.
1253  static int countCshModifiers(const char *src);
1254 
1255  /// Applies a "csh" style modifier string to this string. For example, a
1256  /// modifier string of ":e" would replace the string with the file
1257  /// extension of the string.
1258  ///
1259  /// Returns true if any modifications were performed
1260  bool applyCshModifiers(const char *modifiers);
1261 
1262 
1263  /// This will remove the range from a string of the form foo$Fbar.ext (#-#)
1264  /// and return the first number from the range. If there is only 1 range
1265  /// number, it will be returned. If there is no range, 0 is returned.
1266  /// The returned string is hardened.
1267  UT_String removeRange ();
1268 
1269  /// This will format a value to represent a given size in bytes, kilobytes,
1270  /// megabytes, etc.
1271  void formatByteSize(exint size, int digits=2);
1272 
1273  // UTF-8 helpers
1274 
1275  /// Returns the number of Unicode codepoints in the string, assuming it's
1276  /// encoded as UTF-8.
1277  int getCodePointCount() const;
1278 
1279  /// Returns a list of Unicode code points from this string.
1280  void getAsCodePoints(UT_Int32Array &cp_list) const;
1281 
1282  /// Friend specialization of std::swap() to use UT_String::swap()
1283  /// @internal This is needed because standard std::swap() implementations
1284  /// will try to copy the UT_String objects, causing hardened strings to
1285  /// become weak.
1286  friend void swap(UT_String& a, UT_String& b) { a.swap(b); }
1287 
1288  /// expandArrays will expand a series of tokens of the
1289  /// form prefix[pattern]suffix into the names UT_WorkArgs
1290  /// @param tokens is will store the parsed tokens without expansion
1291  /// @param names is will store the parsed tokens with expansion
1292  /// This doesn't need a max argument like:
1293  /// int expandArrays(char *names[], int max)
1294  ///
1295  // Note: Each names[i] must be free'd after use
1296  // and label is used on the non-const parse method
1297  int expandArrays(UT_WorkArgs &tokens, UT_WorkArgs &names);
1298 
1299 private:
1300  template <typename OSTREAM>
1301  void saveInternal(OSTREAM &os, bool binary) const;
1302 
1303  void freeData();
1304 
1305  /// implements a few csh-style modifiers.
1306  /// @param mod pointer to a string starting with the modifier to apply.
1307  /// so, to apply a global substitute modifier :gs/l/r/
1308  /// mod should be: s/l/r
1309  /// @param all True if all possible modifications should be
1310  /// (recursively) performed.
1311  /// Otherwise, at most one modification is applied.
1312  /// @return whether any modification was performed
1313  bool applyNextModifier(const char *mod, bool all);
1314 
1315 
1316  /// Sets myIsReference to false and copies the other_string into myData,
1317  /// but attempts to avoid unnecessary memory reallocations. Frees up
1318  /// any previous data, if necessary. If other_string is NULL, the call
1319  /// is equivalent to freeData().
1320  void doSmartCopyFrom(const char* other_string);
1321 
1322  static int compareNumberedStringInternal(const char *s1, const char *s2,
1323  bool case_sensitive,
1324  bool allow_negatives,
1325  bool dot_first);
1326 
1327  static SYS_FORCE_INLINE void utStrFree(char *str)
1328  {
1329 #if defined(UT_DEBUG) && !defined(_WIN32)
1330  if (str)
1331  ::memset((void *)str, 0xDD, ::strlen(str) + 1);
1332 #endif
1333  ::free((void *)str);
1334  }
1335 
1336  char *myData;
1337  bool myIsReference:1,
1338  myIsAlwaysDeep:1;
1339 
1340  /// This operator saves the string to the stream via the string's
1341  /// saveAscii() method, protecting any whitespace (by adding quotes),
1342  /// backslashes or quotes in the string.
1343  friend UT_API std::ostream &operator<<(std::ostream &os, const UT_String &d);
1344  friend UT_API UT_OStream &operator<<(UT_OStream &os, const UT_String &d);
1345 
1346  friend class UT_API UT_StringRef;
1347 };
1348 
1349 /// Creates a shallow wrapper around a string for calling UT_String's many
1350 /// const algorithms.
1352 {
1353 public:
1354  // We only have a single constructor which is always shallow.
1356  UT_StringWrap(const char *str)
1357  : UT_String(str)
1358  {}
1359  // It seems necessary on MSVC to forceinline the empty constructor in order
1360  // to have it inlined.
1363  {}
1364 
1365  // Manually wrap methods that have non-const overloads or return non-const
1366  // pointers.
1367  char operator()(unsigned i) const { return UT_String::operator()(i); }
1368  const char *findChar(int c) const { return UT_String::findChar(c); }
1369  const char *findChar(const char *str) const { return UT_String::findChar(str); }
1370  const char *findNonSpace() const { return UT_String::findNonSpace(); }
1371  const char *lastChar(int c) const { return UT_String::lastChar(c); }
1372 
1373  using UT_String::operator==;
1374  using UT_String::operator!=;
1375  using UT_String::c_str;
1376  using UT_String::length;
1377 
1378  using UT_String::base;
1379  using UT_String::compare;
1380  using UT_String::contains;
1381  using UT_String::count;
1382  using UT_String::countChar;
1383  using UT_String::distance;
1384  using UT_String::endsWith;
1385  using UT_String::equal;
1386  using UT_String::fcontain;
1388  using UT_String::fileName;
1389  using UT_String::findWord;
1390  using UT_String::findString;
1393  using UT_String::isFloat;
1394  using UT_String::isInteger;
1396  using UT_String::isstring;
1397  using UT_String::match;
1398  using UT_String::matchFile;
1400  using UT_String::matchPath;
1402  using UT_String::multiMatch;
1406  using UT_String::save;
1407  using UT_String::saveAscii;
1408  using UT_String::saveBinary;
1409  using UT_String::splitPath;
1410  using UT_String::startsWith;
1411  using UT_String::substr;
1412  using UT_String::suffix;
1413  using UT_String::toFloat;
1414  using UT_String::toInt;
1415 };
1416 
1419 {
1420  if (!myIsReference && myData)
1421  utStrFree(myData);
1422 }
1423 
1425 void
1426 UT_String::freeData()
1427 {
1428  if (myData)
1429  {
1430  if (!myIsReference)
1431  utStrFree(myData);
1432  myData = 0;
1433  }
1434 }
1435 
1436 inline void
1438 {
1439  // We can't use UTswap because it doesn't work with bit fields.
1440  bool temp = myIsReference;
1441  myIsReference = other.myIsReference;
1442  other.myIsReference = temp;
1443 
1444  char *tmp_data = myData;
1445  myData = other.myData;
1446  other.myData = tmp_data;
1447 
1448  if (myIsAlwaysDeep)
1449  harden();
1450 
1451  if (other.myIsAlwaysDeep)
1452  other.harden();
1453 }
1454 
1456 public:
1457  UT_String myOut; // Points to argument following '>'
1458  UT_String myErr; // Points to argument following '>&'
1459  UT_String myIn; // Points to argument following '<'
1460  short myDoubleOut; // If the argument is '>>' or '>>&'
1461  short myDoubleIn; // If the argument is '<<'
1462 };
1463 
1464 UT_API std::ostream & do_setformat(std::ostream &os, const char fmt[]);
1465 
1466 /// Does a "smart" string compare which will sort based on numbered names.
1467 /// That is "text20" is bigger than "text3". In a strictly alphanumeric
1468 /// comparison, this would not be the case.
1470 {
1471  bool operator()(const char *s1, const char *s2) const
1472  {
1473  return UT_String::compareNumberedString(s1, s2) < 0;
1474  }
1475 
1476  bool operator()(const std::string &s1, const std::string &s2) const
1477  {
1478  return operator()(s1.c_str(), s2.c_str());
1479  }
1480 };
1481 
1482 #endif
bool match(const char *pattern, bool case_sensitive=true) const
GLdouble s
Definition: glew.h:1390
int tokenize(char *argv[], int max_args, const char *separators=" \t\n")
Definition: UT_String.h:851
UT_String & operator+=(const char *str)
Definition: UT_String.h:339
static SYS_FORCE_INLINE uint32 hash(const char *str, uint32 code=0)
Definition: UT_String.h:904
int distance(const char *str, bool case_sensitive=true, bool allow_subst=true) const
char * lastChar(int c)
Definition: UT_String.h:571
vint4 max(const vint4 &a, const vint4 &b)
Definition: simd.h:4703
std::string sprintf(const char *fmt, const Args &...args)
Definition: strutil.h:136
bool operator!=(const char *str) const
Definition: UT_String.h:424
UT_String & operator+=(const UT_String &str)
Definition: UT_String.h:371
string_view OIIO_API strip(string_view str, string_view chars=string_view())
bool operator>=(const UT_StringRef &str) const
Definition: UT_String.h:480
int count(const char *str, bool case_sensitive=true) const
Count the occurrences of the string.
GLsizeiptr size
Definition: glew.h:1681
GLenum src
Definition: glew.h:2410
GLuint const GLchar * name
Definition: glew.h:1814
bool matchFileExtension(const char *match_extension) const
Definition: UT_String.h:676
void swap(UT_String &other)
Definition: UT_String.h:1437
void saveAscii(UT_OStream &os) const
Definition: UT_String.h:299
bool operator()(const char *s1, const char *s2) const
Definition: UT_String.h:1471
const Args & args
Definition: printf.h:628
T negative(const T &val)
Return the unary negation of the given value.
Definition: Math.h:117
GLuint index
Definition: glew.h:1814
const char * lastChar(int c) const
Definition: UT_String.h:1371
bool isInteger(bool skip_spaces=false) const
Determine if string can be seen as a single integer number.
bool operator<=(const char *str) const
Definition: UT_String.h:448
UT_String myIn
Definition: UT_String.h:1459
GLuint const GLfloat * val
Definition: glew.h:2794
fpreal toFloat() const
bool operator==(const char *str) const
Definition: UT_String.h:412
bool operator<=(const UT_String &str) const
Definition: UT_String.h:452
int toInt() const
char * fileExtension()
Definition: UT_String.h:651
GLboolean GLboolean GLboolean GLboolean a
Definition: glew.h:9477
bool isHard() const
Returns whether this string is hardened already.
Definition: UT_String.h:242
SYS_FORCE_INLINE T * SYSconst_cast(const T *foo)
Definition: SYS_Types.h:136
UT_String makeQuotedString(char delimiter='\'', bool escape_nonprinting=false) const
const char * findChar(const char *str) const
Definition: UT_String.h:565
int64 exint
Definition: SYS_Types.h:125
void swap(T &lhs, T &rhs)
Definition: pugixml.cpp:7172
void write(unsigned i, char c)
Definition: UT_String.h:524
bool operator==(const UT_String &str) const
Definition: UT_String.h:416
GLint GLint GLsizei GLsizei GLsizei GLint GLenum format
Definition: glew.h:1254
#define UT_API
Definition: UT_API.h:13
const char * fileExtension() const
Definition: UT_String.h:666
const char * data() const
Definition: UT_String.h:507
bool isAbsolutePath(bool file_path=false) const
bool findString(const char *str, bool fullword, bool usewildcards) const
char * findChar(int c)
Definition: UT_String.h:555
#define SYS_EXINT_MAX
Definition: SYS_Types.h:181
char & operator()(unsigned i)
Definition: UT_String.h:517
bool equal(const char *str, bool case_sensitive=true) const
Definition: UT_String.h:399
const char * findNonSpace() const
Definition: UT_String.h:1370
unsigned long long uint64
Definition: SYS_Types.h:117
int compare(const char *str, bool case_sensitive=true) const
Definition: UT_String.h:379
void clear()
Reset the string to the default constructor.
Definition: UT_String.h:309
bool isAlwaysDeep() const
Definition: UT_String.h:204
const char * c_str() const
Definition: UT_String.h:505
bool matchPath(const char *pattern, bool case_sensitive=true, bool *excludes_branch=nullptr) const
unsigned length() const
Return length of string.
Definition: UT_String.h:543
int compare(const UT_String &str, bool case_sensitive=true) const
Definition: UT_String.h:393
const char * suffix() const
bool operator<(const char *str) const
Definition: UT_String.h:436
bool operator<(const UT_StringRef &str) const
Definition: UT_String.h:444
UT_API void UTexprLookup(const char *name, UT_String &result)
bool contains(const char *pattern, bool case_sensitive=true) const
int tokenize(UT_WorkArgs &argv, const char *separators=" \t\n")
Definition: UT_String.h:859
std::ostream & operator<<(std::ostream &ostr, const DataType &a)
Definition: DataType.h:133
UT_String(UT_AlwaysDeepType, const std::string &str)
Construct UT_String from a std::string, using ALWAYS_DEEP semantics.
Definition: UT_String.h:151
void hardenIfNeeded(const char *s)
Take shallow copy and make it deep.
Definition: UT_String.h:232
const char * buffer() const
Definition: UT_String.h:506
CompareResults OIIO_API compare(const ImageBuf &A, const ImageBuf &B, float failthresh, float warnthresh, ROI roi={}, int nthreads=0)
A utility class to do read-only operations on a subset of an existing string.
Definition: UT_StringView.h:40
GLfloat GLfloat GLfloat v2
Definition: glew.h:1856
SYS_FORCE_INLINE uint32 hash() const
Definition: UT_String.h:899
bool operator==(const UT_StringRef &str) const
Definition: UT_String.h:420
char operator()(unsigned i) const
Definition: UT_String.h:1367
int tokenize(char *argv[], int max_args, char separator)
Definition: UT_String.h:839
bool operator>=(const char *str) const
Definition: UT_String.h:472
UT_String & operator=(UT_String &&str)
Definition: UT_String.h:175
int tokenizeInPlace(T &list, const char *separators=" \t\n")
Definition: UT_String.h:875
OIIO_FORCEINLINE const vint4 & operator+=(vint4 &a, const vint4 &b)
Definition: simd.h:4246
#define SYS_SAFE_BOOL
Definition: SYS_Compiler.h:55
std::string OIIO_API replace(string_view str, string_view pattern, string_view replacement, bool global=false)
bool operator!=(const UT_String &str) const
Definition: UT_String.h:428
GLuint buffer
Definition: glew.h:1680
GLint GLenum GLsizei GLint GLsizei const void * data
Definition: glew.h:1379
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:134
bool operator>=(const UT_String &str) const
Definition: UT_String.h:476
const GLuint GLenum const void * binary
Definition: glew.h:3502
#define SYS_PRINTF_CHECK_ATTRIBUTE(string_index, first_to_check)
Definition: SYS_Types.h:433
char * findNonSpace()
fpreal64 dot(const CE_VectorT< T > &a, const CE_VectorT< T > &b)
Definition: CE_Vector.h:127
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
bool operator>(const UT_String &str) const
Definition: UT_String.h:464
char * SYSstrtok(char *string, const char *delimit, char **context)
Definition: SYS_String.h:113
char * findChar(const char *str)
Definition: UT_String.h:563
GLuint const GLuint * names
Definition: glew.h:2690
#define UT_ASSERT_SLOW(ZZ)
Definition: UT_Assert.h:133
const GLfloat * c
Definition: glew.h:16296
GLuint GLsizei GLsizei * length
Definition: glew.h:1825
const char * findChar(int c) const
Definition: UT_String.h:557
GLsizei GLsizei GLfloat distance
Definition: glew.h:13640
SYS_FORCE_INLINE UT_String(const char *str=0)
Construct UT_String from a C string, using shallow semantics.
Definition: UT_String.h:97
void harden()
Take shallow copy and make it deep.
Definition: UT_String.h:213
void saveAscii(std::ostream &os) const
Definition: UT_String.h:298
bool equal(const UT_StringRef &str, bool case_sensitive=true) const
Definition: UT_String.h:407
UT_String(UT_String &&str) noexcept
Definition: UT_String.h:167
long long int64
Definition: SYS_Types.h:116
bool equal(const UT_String &str, bool case_sensitive=true) const
Definition: UT_String.h:403
typedef int(WINAPI *PFNWGLRELEASEPBUFFERDCARBPROC)(HPBUFFERARB hPbuffer
SYS_FORCE_INLINE bool UTisdigit(char c)
Definition: UT_String.h:62
void setAlwaysDeep(bool deep)
Make a string always deep.
Definition: UT_String.h:188
bool operator>(const UT_StringRef &str) const
Definition: UT_String.h:468
const char * findChar(const char *str) const
Definition: UT_String.h:1369
bool matchFile(const char *pattern) const
bool operator()(const std::string &s1, const std::string &s2) const
Definition: UT_String.h:1476
int eraseHead(int len)
Definition: UT_String.h:1085
void toUpper()
Definition: UT_String.h:609
void adopt(UT_String &str)
Definition: UT_String.h:285
SYS_FORCE_INLINE ~UT_StringWrap()
Definition: UT_String.h:1362
const char * findWord(const char *word) const
bool operator>(const char *str) const
Definition: UT_String.h:460
int64 getMemoryUsage(bool inclusive=true) const
Return memory usage in bytes.
Definition: UT_String.h:547
void saveBinary(std::ostream &os) const
Save string to binary stream.
Definition: UT_String.h:294
bool isFloat(bool skip_spaces=false, bool loose=false, bool allow_underscore=false) const
Determine if string can be seen as a single floating point number.
bool OIIO_API contains(string_view a, string_view b)
Does 'a' contain the string 'b' within it?
GLsizei const GLchar *const * path
Definition: glew.h:6461
bool isValidVariableName(const char *safechars=NULL) const
GLdouble GLdouble GLdouble b
Definition: glew.h:9122
static int compareNumberedString(const char *s1, const char *s2, bool case_sensitive=true, bool allow_negatives=false)
short myDoubleIn
Definition: UT_String.h:1461
void adopt(char *s)
Definition: UT_String.h:275
GLsizei const GLchar *const * string
Definition: glew.h:1844
UT_String pathUpToExtension() const
GLenum func
Definition: glcorearb.h:782
int substr(UT_String &buf, int index, int len=0) const
basic_printf_context_t< buffer >::type context
Definition: printf.h:631
SYS_FORCE_INLINE bool UTisstring(const char *s)
Definition: UT_String.h:57
GLuint num
Definition: glew.h:2690
void save(std::ostream &os, bool binary) const
Save string to stream. Saves as binary if binary is true.
const void * ptr(const T *p)
Definition: format.h:3292
short myDoubleOut
Definition: UT_String.h:1460
fpreal64 fpreal
Definition: SYS_Types.h:277
bool multiMatch(const char *pattern, bool case_sensitive, char separator) const
char * steal()
Definition: UT_String.h:253
char * SYSstrcasestr(const char *haystack, const char *needle)
Replacement for strcasestr, since no equivalent exists on Win32.
Definition: SYS_String.h:297
int SYSstrcasecmp(const char *a, const char *b)
Definition: SYS_String.h:227
bool multiMatchRecord(const char *pattern, int maxpatterns, char *singles, int &nsingles, char **words, int &nwords, bool case_sensitive=true, const char *separators=", ") const
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat s1
Definition: glew.h:12681
int parseNumberedFilename(UT_String &prefix, UT_String &frame, UT_String &suff, bool negative=true, bool fractional=false) const
FMT_CONSTEXPR bool find(Ptr first, Ptr last, T value, Ptr &out)
Definition: format.h:2104
GLuint GLuint GLsizei count
Definition: glew.h:1253
UT_AlwaysDeepType
Definition: UT_String.h:85
unsigned int uint32
Definition: SYS_Types.h:40
const char * lastChar(int c) const
Definition: UT_String.h:573
UT_String myOut
Definition: UT_String.h:1457
UT_String myErr
Definition: UT_String.h:1458
GLuint64EXT * result
Definition: glew.h:14007
bool isstring() const
Definition: UT_String.h:711
int findLongestCommonSuffix(const char *with) const
void hardenIfNeeded()
Take shallow copy and make it deep.
Definition: UT_String.h:222
const char * findChar(int c) const
Definition: UT_String.h:1368
int parse(char *argv[], int max_args, const char *quotes="\"'", bool keep_quotes=false)
Definition: UT_String.h:818
UT_String(UT_AlwaysDeepType, const char *str=0)
Construct UT_String from a C string, using ALWAYS_DEEP semantics.
Definition: UT_String.h:144
bool operator<(const UT_String &str) const
Definition: UT_String.h:440
int erase(int pos=0, int len=-1)
Definition: UT_String.h:1093
#define const
Definition: zconf.h:214
int tokenize(UT_WorkArgs &argv, char separator)
Definition: UT_String.h:845
GLubyte * pattern
Definition: glew.h:5711
bool operator<=(const UT_StringRef &str) const
Definition: UT_String.h:456
bool startsWith(const UT_StringView &prefix, bool case_sensitive=true) const
void splitPath(UT_String &dir_name, UT_String &file_name) const
char operator()(unsigned i) const
Definition: UT_String.h:510
GLenum GLuint GLsizei const GLchar * buf
Definition: glew.h:2580
int parse(UT_WorkArgs &argv, int start_arg=0, const char *quotes="\"'", bool keep_quotes=false)
Definition: UT_String.h:824
const char * base(UT_String &buf) const
void removeLast()
Remove the last character.
Definition: UT_String.h:325
bool all(const vbool4 &v)
Definition: simd.h:3371
UT_API void UTvarLookup(const char *name, UT_String &result)
SYS_FORCE_INLINE UT_StringWrap(const char *str)
Definition: UT_String.h:1356
bool endsWith(const UT_StringView &suffix, bool case_sensitive=true) const
UT_String(const std::string &str)
Construct UT_String from a std::string, always doing a deep copy. The result will only be a UT_Always...
Definition: UT_String.h:122
int eraseTail(int len)
Definition: UT_String.h:1089
const char * fileName() const
Definition: UT_String.h:630
GLfloat GLfloat v1
Definition: glew.h:1852
int countChar(int c) const
Return the number of occurrences of the specified character.
UT_API std::ostream & do_setformat(std::ostream &os, const char fmt[])
int tokenize(T &list, const char *separators=" \t\n")
Definition: UT_String.h:868
const char * nonNullBuffer() const
Definition: UT_String.h:508
GLenum GLsizei len
Definition: glew.h:7752
void toLower()
Definition: UT_String.h:616
int insert(int pos, const char *str)
Definition: UT_String.h:1101
GLintptr offset
Definition: glew.h:1682
const char * fcontain(const char *pattern, bool case_sensitive=true) const
Definition: UT_String.h:1000
bool operator!=(const UT_StringRef &str) const
Definition: UT_String.h:432