HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_String.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  *
7  * NAME: Utility Library (C++)
8  *
9  * COMMENTS: String class
10  *
11  */
12 
13 #ifndef __UT_String_h__
14 #define __UT_String_h__
15 
16 #include "UT_API.h"
17 
18 #include "UT_Assert.h"
19 #include "UT_VectorTypes.h"
20 #include "UT_StringView.h"
21 
22 #include <SYS/SYS_Compiler.h>
23 #include <SYS/SYS_Inline.h>
24 #include <SYS/SYS_String.h>
25 #include <SYS/SYS_Types.h>
26 
27 #include <functional>
28 #include <iosfwd>
29 #include <string>
30 
31 #include <ctype.h>
32 #include <stdlib.h>
33 #include <string.h>
34 
35 #ifdef WIN32
36  #define strcasecmp stricmp
37  #define strncasecmp strnicmp
38 #endif
39 
40 class UT_OStream;
41 class UT_String;
42 class UT_StringCshIO;
43 class UT_WorkArgs;
44 class UT_IStream;
45 class ut_PatternRecord;
46 class UT_StringMMPattern;
47 class UT_StringArray;
48 class UT_StringHolder;
49 class UT_StringRef;
50 
51 // The following lookup functions are used by cshParse. By default,
52 // varLookup simply uses getenv, exprLookup opens the command as
53 // a pipe and uses the result.
54 UT_API extern void UTvarLookup(const char *name, UT_String &result);
55 UT_API extern void UTexprLookup(const char *name, UT_String &result);
56 
57 SYS_FORCE_INLINE bool UTisstring(const char *s) { return s && *s; }
58 
59 // Because invoking isdigit with a negative value is undefined,
60 // some MSVC compilers decide to crash. Thus we cast explicitly
61 // to unsigned.
63  { return isdigit((unsigned char) c); }
64 
65 /// @file
66 /// @class UT_String
67 ///
68 /// UT_String is a string class that support two different types of assignment
69 /// semantics:
70 /// - Shallow (default): Just reference the given string and NOT take
71 /// ownership.
72 /// - Deep: Make a copy of the given string, taking ownership in the
73 /// process (aka it making it "hard").
74 ///
75 /// If UT_String::harden() is called, or any other UT_String method that
76 /// requires modifying the string, it will make a copy of its reference pointer
77 /// (and take ownership) first.
78 ///
80 {
81 public:
82 
83  /// UT_String can be constructed with UT_String::ALWAYS_DEEP to create an
84  /// object that will always perform deep copies when assigned to.
85  enum UT_AlwaysDeepType { ALWAYS_DEEP };
86 
87  /// @brief Construct UT_String from a C string, using shallow semantics
88  ///
89  /// @param str The initial string.
90  /// @param deepCopy If true, a copy of @em str will be used.
91  /// @param len Number of characters to use from @em str. Use -1 to
92  /// use the entire string. If len is non-negative, then
93  /// deepCopy will be implicitly set to true. If str is NULL
94  /// and len is non-negative, then it will be initialized
95  /// with "".
97  UT_String(const char *str = 0)
98  : myData(SYSconst_cast(str))
99  , myIsReference(true)
100  , myIsAlwaysDeep(false)
101  {}
102  UT_String(const char *str, bool deep_copy, int len = -1);
103 
104  /// @brief Construct UT_String from a std::string, always doing
105  /// a deep copy. The result will only be a UT_AlwaysDeep if the
106  /// appropriate version is used, however!
107  ///
108  /// NOTE: You cannot do:
109  /// UT_String foo;
110  /// std::string bar = "hello world";
111  /// foo = UT_String(bar.substr(2, 5));
112  ///
113  /// It provides an shortcut for constructing a UT_String from a function
114  /// that returns a std::string by value. For example, it lets you write
115  /// @code
116  /// UT_String str(func());
117  /// @endcode
118  /// instead of
119  /// @code
120  /// UT_String str(func().c_str(), /*harden=*/true);
121  /// @endcode
122  explicit UT_String(const std::string &str)
123  : myIsReference(false),
124  myIsAlwaysDeep(false)
125  { myData = strdup(str.c_str()); }
126 
127  /// @brief Construct UT_String from a UT_StringHolder.
128  /// This always duplicates and uses ALWAYS_DEEP semantics.
129  explicit UT_String(const UT_StringHolder &str);
130 
131 private:
132  /// This is intentionally not implemented - callers should choose between
133  /// the const char * and UT_StringHolder constructors, depending on whether
134  /// they want to make a deep copy.
135  /// @see UT_StringWrap.
136  UT_String(const UT_StringRef &);
137 
138 public:
139  /// @brief Construct UT_String from a UT_StringView.
140  /// This always duplicates and uses ALWAYS_DEEP semantics.
141  explicit UT_String(const UT_StringView &sv);
142 
143  /// @brief Construct UT_String from a C string, using ALWAYS_DEEP semantics
144  UT_String(UT_AlwaysDeepType, const char *str = 0)
145  : myIsReference(false),
146  myIsAlwaysDeep(true)
147  { myData = str ? strdup(str) : 0; }
148 
149  /// @brief Construct UT_String from a std::string, using ALWAYS_DEEP
150  /// semantics
152  : myIsReference(false),
153  myIsAlwaysDeep(true)
154  { myData = strdup(str.c_str()); }
155 
156  /// Copy constructor
157  ///
158  /// If the string we're copying from is ALWAYS_DEEP, then this object will
159  /// also become ALWAYS_DEEP. This way, you can pass/return a string by
160  /// value.
161  UT_String(const UT_String &str);
162 
163  ~UT_String();
164 
165  /// Move operators
166  /// @{
167  UT_String(UT_String &&str) noexcept
168  : myData(str.myData)
169  , myIsReference(str.myIsReference)
170  , myIsAlwaysDeep(str.myIsAlwaysDeep)
171  {
172  str.myData = nullptr;
173  str.myIsReference = !str.myIsAlwaysDeep;
174  }
176  {
177  freeData();
178  myData = str.myData;
179  myIsReference = str.myIsReference;
180  myIsAlwaysDeep = str.myIsAlwaysDeep;
181  str.myData = nullptr;
182  str.myIsReference = !str.myIsAlwaysDeep;
183  return *this;
184  }
185  /// @}
186 
187  /// Make a string always deep
188  void setAlwaysDeep(bool deep)
189  {
190  myIsAlwaysDeep = deep;
191  if (deep && myIsReference)
192  {
193  if (myData != NULL)
194  harden();
195  else
196  {
197  // This takes the same semantic as
198  // str = NULL;
199  // where str is an always deep string
200  myIsReference = false;
201  }
202  }
203  }
204  bool isAlwaysDeep() const
205  {
206  return myIsAlwaysDeep;
207  }
208 
209  void swap( UT_String &other );
210 
211  /// Take shallow copy and make it deep.
212  // @{
213  void harden()
214  {
215  if (!myIsReference && myData)
216  return;
217  myData = strdup(myData ? myData : "");
218  myIsReference = false;
219  }
220 
221  void harden(const char *s, int len = -1);
223  {
224  if (myIsReference)
225  {
226  if (isstring())
227  harden();
228  else
229  *this = "";
230  }
231  }
232  void hardenIfNeeded(const char *s)
233  {
234  if (s && *s)
235  harden(s);
236  else
237  *this = "";
238  }
239  // @}
240 
241  /// Returns whether this string is hardened already.
242  bool isHard() const { return !myIsReference; }
243 
244  /// Give up ownership of string
245  ///
246  /// Take a hard reference and make it shallow. This method makes sure
247  /// it gives back something you can delete, because this UT_String is
248  /// taking its hands off the data. Use it with care since it may lead
249  /// to memory leaks if, for example, you harden it again later.
250  ///
251  /// In the case of ALWAYS_DEEP strings, this is disallowed so it will
252  /// just return a copy of the data.
253  char * steal()
254  {
255  if (!myIsAlwaysDeep)
256  {
257  if (myIsReference)
258  myData = strdup(myData ? myData : ""); // harden
259  myIsReference = true; // but say it's soft
260  return myData;
261  }
262  else
263  {
264  // return a new copy of the data without releasing
265  // ownership for always deep strings
266  return strdup(myData ? myData : "");
267  }
268  }
269 
270  /// Take ownership of given string
271  ///
272  /// adopt() is the opposite of steal(). Basically, you're giving
273  /// the UT_String ownership of the string.
274  // @{
275  void adopt(char *s)
276  {
277  if (!myIsReference)
278  {
279  if (s != myData)
280  free(myData);
281  }
282  myData = s;
283  myIsReference = false;
284  }
285  void adopt(UT_String &str)
286  {
287  adopt(str.steal());
288  }
289  void adopt(UT_StringHolder &holder);
290 
291  // @}
292 
293  /// Save string to binary stream.
294  void saveBinary(std::ostream &os) const { save(os, true); }
295 
296  /// Save string to ASCII stream. This will add double quotes and escape to
297  /// the stream if necessary (empty string or contains spaces).
298  void saveAscii(std::ostream &os) const { save(os, false); }
299  void saveAscii(UT_OStream &os) const { save(os, false); }
300 
301  /// Save string to stream. Saves as binary if @em binary is true.
302  void save(std::ostream &os, bool binary) const;
303  void save(UT_OStream &os, bool binary) const;
304 
305  /// Load string from stream. Use is.eof() to check eof status
306  bool load(UT_IStream &is);
307 
308  /// Reset the string to the default constructor.
309  void clear()
310  { *this = (const char *)NULL; }
311 
312  /// Prepend a string (or character)
313  // @{
314  void prepend(const char *prefix);
315  void prepend(char ch);
316  // @}
317 
318  /// Append a character
319  void append(char ch);
320 
321  /// Append a string or a section of a string.
322  void append(const char *str, exint len = -1);
323 
324  /// Remove the last character
325  void removeLast() { truncate(length()-1); }
326  /// Truncate the string at the Nth character
327  void truncate(exint len);
328 
329  UT_String &operator=(const UT_String &str);
330  UT_String &operator=(const char *str);
331  UT_String &operator=(const std::string &str);
332  UT_String &operator=(const UT_StringHolder &str);
333  UT_String &operator=(const UT_StringView &str);
334 private:
335  /// Not implemented - see UT_String(const UT_StringRef &).
336  UT_String &operator=(const UT_StringRef);
337 
338 public:
339  UT_String &operator+=(const char *str)
340  {
341  if (!isstring())
342  {
343  // We are an empty string, so we merely copy
344  // the incoming string rather than trying to append
345  // to it.
346  harden(str);
347  }
348  else
349  {
350  bool same = (str == myData);
351  harden();
352  if (str)
353  {
354  int mylen = (int)strlen(myData);
355  myData = (char *)realloc(myData,
356  mylen+strlen(str)+1);
357  if (!same)
358  {
359  strcpy(&myData[mylen], str);
360  }
361  else
362  {
363  memcpy(myData + mylen, myData, mylen);
364  myData[mylen * 2] = '\0';
365  }
366  }
367  }
368  return *this;
369  }
370 
372  {
373  *this += (const char *)str.myData;
374  return *this;
375  }
376  UT_String &operator+=(const UT_StringRef &str);
377 
378  // Basic equality functions and operators
379  int compare(const char *str, bool case_sensitive=true) const
380  {
381  // Unlike std::string, UT_String treats NULL and
382  // the empty string as distinct (empty has precedence).
383  if (myData==0 || str==0)
384  {
385  if (myData) return 1;
386  if(str) return -1;
387  return 0;
388  }
389  if (case_sensitive)
390  return strcmp(myData, str);
391  return strcasecmp(myData, str);
392  }
393  int compare(const UT_String &str, bool case_sensitive=true) const
394  {
395  return compare(str.myData,case_sensitive);
396  }
397  int compare(const UT_StringRef &str, bool case_sensitive=true) const;
398 
399  bool equal(const char *str, bool case_sensitive=true) const
400  {
401  return compare(str,case_sensitive)==0;
402  }
403  bool equal(const UT_String &str, bool case_sensitive=true) const
404  {
405  return compare(str.myData,case_sensitive)==0;
406  }
407  bool equal(const UT_StringRef &str, bool case_sensitive=true) const
408  {
409  return compare(str,case_sensitive)==0;
410  }
411 
412  bool operator==(const char *str) const
413  {
414  return compare(str)==0;
415  }
416  bool operator==(const UT_String &str) const
417  {
418  return compare(str.myData)==0;
419  }
420  bool operator==(const UT_StringRef &str) const
421  {
422  return compare(str)==0;
423  }
424  bool operator!=(const char *str) const
425  {
426  return compare(str)!=0;
427  }
428  bool operator!=(const UT_String &str) const
429  {
430  return compare(str.myData)!=0;
431  }
432  bool operator!=(const UT_StringRef &str) const
433  {
434  return compare(str)!=0;
435  }
436  bool operator<(const char *str) const
437  {
438  return compare(str)<0;
439  }
440  bool operator<(const UT_String &str) const
441  {
442  return compare(str.myData)<0;
443  }
444  bool operator<(const UT_StringRef &str) const
445  {
446  return compare(str)<0;
447  }
448  bool operator<=(const char *str) const
449  {
450  return compare(str)<=0;
451  }
452  bool operator<=(const UT_String &str) const
453  {
454  return compare(str.myData)<=0;
455  }
456  bool operator<=(const UT_StringRef &str) const
457  {
458  return compare(str)<=0;
459  }
460  bool operator>(const char *str) const
461  {
462  return compare(str)>0;
463  }
464  bool operator>(const UT_String &str) const
465  {
466  return compare(str.myData)>0;
467  }
468  bool operator>(const UT_StringRef &str) const
469  {
470  return compare(str)>0;
471  }
472  bool operator>=(const char *str) const
473  {
474  return compare(str)>=0;
475  }
476  bool operator>=(const UT_String &str) const
477  {
478  return compare(str.myData)>=0;
479  }
480  bool operator>=(const UT_StringRef &str) const
481  {
482  return compare(str)>=0;
483  }
484 
485  /// Test whether the string is defined or not
486  SYS_SAFE_BOOL operator bool() const { return isstring(); }
487 
488  /// Return the edit distance between two strings.
489  /// See http://en.wikipedia.org/wiki/Levenshtein_distance for details.
490  /// allow_subst controls whether a substitution of a character with
491  /// another is a single operation, rather than two operations of
492  /// insert and delete.
493  int distance(const char *str,
494  bool case_sensitive = true,
495  bool allow_subst = true) const;
496 
497  operator const char *() const
498  { return (const char *)myData; }
499  operator char *()
500  { return myData; }
501 
502  operator UT_StringView() const
503  { return UT_StringView(myData); }
504 
505  const char *c_str() const { return buffer(); }
506  const char *buffer() const { return myData; }
507  const char *nonNullBuffer() const { return myData ? myData : ""; }
508 
509  char operator()(unsigned i) const
510  {
511  UT_ASSERT_P( isstring() );
512  UT_ASSERT_SLOW(i <= strlen(myData));
513  return myData[i];
514  }
515 
516  char &operator()(unsigned i)
517  {
518  harden();
519  return myData[i];
520  }
521 
522  // Prefer using write() since ideally the non-const operator() is removed
523  inline void write(unsigned i, char c)
524  {
525  hardenIfNeeded();
526  myData[i] = c;
527  }
528 
529  int toInt() const;
530  fpreal toFloat() const;
531 
532  /// Converts the contents of this UT_String to a std::string. Note that
533  /// std::string can't be constructed with a null pointer, so you can't
534  /// just write std::string s = ut_string.buffer();
535  std::string toStdString() const;
536 
537  //
538  // Here, we're finished with operators
539  //
540 
541  /// Return length of string
542  unsigned length() const
543  { return (myData) ? (unsigned)strlen(myData) : 0; }
544 
545  /// Return memory usage in bytes
546  int64 getMemoryUsage(bool inclusive=true) const
547  {
548  return (inclusive ? sizeof(*this) : 0)
549  + (!myIsReference ? (length() + 1)*sizeof(char) : 0);
550  }
551 
552  /// Find first occurrance of character. Returns NULL upon failure.
553  /// @{
554  char *findChar(int c)
555  { return myData ? strchr(myData, c) : nullptr; }
556  const char *findChar(int c) const
557  { return SYSconst_cast(*this).findChar(c); }
558  /// @}
559 
560  /// Find first occurrance of any character in @em str
561  /// @{
562  char *findChar(const char *str)
563  { return myData ? strpbrk(myData, str) : nullptr; }
564  const char *findChar(const char *str) const
565  { return SYSconst_cast(*this).findChar(str); }
566  /// @}
567 
568  /// Find last occurance of character
569  /// @{
570  char *lastChar(int c)
571  { return myData ? strrchr(myData, c) : nullptr; }
572  const char *lastChar(int c) const
573  { return SYSconst_cast(*this).lastChar(c); }
574  /// @}
575 
576  /// Return the number of occurrences of the specified character.
577  int countChar(int c) const;
578 
579  /// Count the occurrences of the string
580  int count(const char *str, bool case_sensitive = true) const;
581 
582  char *findNonSpace();
583  const char *findNonSpace() const;
584  const char *findWord(const char *word) const;
585  bool findString(const char *str, bool fullword,
586  bool usewildcards) const;
587  int changeWord(const char *from, const char *to, bool all = true);
588  int changeString(const char *from, const char *to, bool fullword);
589  int changeQuotedWord(const char *from, const char *to,
590  int quote = '`', bool all = true);
591 
592  int findLongestCommonSuffix( const char *with ) const;
593 
594  /// Perform deep copy of the substring starting from @em index
595  /// for @em len characters into the specified UT_String.
596  /// If @em len is too long, then a substring starting from @em index to
597  /// the end of the string is copied.
598  /// Returns the length of the copied substring.
599  int substr(UT_String &buf, int index, int len=0) const;
600 
601  /// Determine if string can be seen as a single floating point number
602  bool isFloat(bool skip_spaces = false,
603  bool loose = false,
604  bool allow_underscore = false) const;
605  /// Determine if string can be seen as a single integer number
606  bool isInteger(bool skip_spaces = false) const;
607 
608  void toUpper()
609  {
610  char *ptr;
611  harden();
612  for (ptr=myData; *ptr; ptr++)
613  *ptr = (char)toupper(*ptr);
614  }
615  void toLower()
616  {
617  char *ptr;
618  harden();
619  for (ptr=myData; *ptr; ptr++)
620  *ptr = (char)tolower(*ptr);
621  }
622 
623 
624  /// Return last component of forward slash separated path string
625  ///
626  /// If there is a slash in the string, fileName() returns the string
627  /// starting after the slash. Otherwise, it returns the contents of
628  /// this string. Note that it returns a pointer into this string.
629  const char *fileName() const
630  {
631  const char *fname;
632 
633  if (!myData)
634  return 0;
635 
636  fname = lastChar('/');
637 
638  if (!fname)
639  {
640  fname = myData;
641  }
642  else
643  {
644  fname++; // Get past the /
645  }
646  return fname;
647  }
648  /// Return the extension of a file path string
649  /// @{
651  {
652  if( !isstring() )
653  return 0;
654 
655  char *dot = lastChar('.');
656  if (dot)
657  {
658  const char *slash = lastChar('/');
659 
660  if (slash && slash > dot)
661  dot = NULL;
662  }
663  return dot;
664  }
665  const char *fileExtension() const
666  {
667  return SYSconst_cast(*this).fileExtension();
668  }
669  /// @}
670 
671  /// Return whether the file extension matches. The extension passed in
672  /// should include the '.' separator. For example: @code
673  /// matchFileExtension(".jpg")
674  /// @endcode
675  bool matchFileExtension(const char *match_extension) const
676  {
677  const char *ext = fileExtension();
678  return ext && !SYSstrcasecmp(ext, match_extension);
679  }
680  /// Return path terminated just before the extension.
681  /// If the filename starts with '.' and no path is provided,
682  /// returns NULL
683  UT_String pathUpToExtension() const;
684 
685  /// Replace the file extension and return the new string
686  UT_String replaceExtension(const UT_String &new_ext) const;
687 
688  /// Split a path into @em dir_name and @em file_name, where @em file_name
689  /// is everything after the final slash (i.e. the same as fileName()).
690  /// Either part may be empty. Note that if the string starts with / and
691  /// only contains that one slash, the @em dir_name will be / and not blank.
692  /// @em dir_name and @em file_name will either be set to hardened strings
693  /// or an empty string.
694  void splitPath(UT_String &dir_name, UT_String &file_name) const;
695 
696  /// Decompose a filename into various parts
697  ///
698  /// parseNumberedFileName will breakup a filename into its various
699  /// parts: file = prefix$Fsuffix (note: suffix is
700  /// not the same as file extension.) 0 is returned if there is
701  /// no frame number. 'negative' allows -[frame] to be interpreted as a
702  /// negative number. 'fractional' allows [frame].[number] to be interpreted
703  /// as a fractional frame.
704  int parseNumberedFilename(UT_String &prefix,
705  UT_String &frame,
706  UT_String &suff,
707  bool negative = true,
708  bool fractional = false) const;
709 
710  bool isstring() const
711  { return (myData && *myData); }
712 
713  /// trimSpace() will remove all space characters (leading and following)
714  /// from a string. If the string consists of multiple words, the words will
715  /// be collapsed. The function returns 1 if space was trimmed.
716  int trimSpace(bool leave_single_space_between_words = false);
717 
718  /// A version of trimSpace() that only removes leading and following spaces
719  /// from a string, leaving any between words intact.
720  int trimBoundingSpace();
721 
722  /// strips out all characters found in 'chars'. The string length will be
723  /// reduced by the number of characters removed. The number of characters
724  /// removed is returned.
725  int strip(const char *chars);
726 
727  /// protectString() will modify the existing string to escape double quotes
728  /// and backslashes. It will only wrap the string in double quotes if
729  /// it has spaces in it. If 'protect_empty' is true, the string will
730  /// become '""', otherwise it will stay empty.
731  void protectString(bool protect_empty=false);
732 
733  /// protectPreQuotePythonStringLiteral() will modify the existing string
734  // to escape any non-printing characters, backslashes, and instances of the
735  /// specified delimiter. Unlike protectString(), it will not wrap the
736  /// string in quotes.
737  void protectPreQuotePythonStringLiteral(char delimiter='\'');
738 
739  /// returns true if the string begins and ends with a (non-escaped) quote
740  /// 'delimiter'.
741  bool isQuotedString(char delimiter='\'') const;
742 
743  /// makeQuotedString() is similar to protectString() except it returns a
744  /// new string instead of changing this string, it does wrap the string
745  /// in quotes, and it lets you use either ' or " as the delimiter.
746  /// The quoted string can also be optionally be made to escape non-printing
747  /// characters. The string that's returned is UT_String::ALWAYS_DEEP.
748  UT_String makeQuotedString(char delimiter='\'',
749  bool escape_nonprinting=false) const;
750 
751  /// makeSmartQuotedString() will use either ' or " as the delimiter to
752  /// avoid escaped quotes, using the default delimiter if it doesn't
753  /// matter. The quoted string can also be optionally be made to escape
754  /// non-printing characters. The string that's returned is
755  /// UT_String::ALWAYS_DEEP.
756  UT_String makeSmartQuotedString(char default_delimiter='\'',
757  bool escape_nonprinting=false) const;
758 
759  /// Expands standard control sequences ('\\n', '\\r', '\\t', '\\0') to their
760  /// corresponding ASCII values (10, 13, 9, 0, respectively).
761  /// If the expand_extended flag is enabled, an extended expansion is enabled
762  /// which adds hexadecimal, decimal and Unicode control sequence expansion.
763  /// Any values resulting from that expansion, which are outside the standard
764  /// ASCII range, will be encoded as UTF8-encoded control points.
765  void expandControlSequences(bool expand_extended = false);
766 
767  bool hasWhiteSpace() const;
768 
769  void removeTrailingSpace();
770  void removeTrailingChars(char chr);
771 
772  void removeTrailingDigits();
773 
774  // cshParse() does not need to harden the string. It does very robust
775  // parsing in the style of csh. It actually does better parsing than
776  // csh. Variable expansion & backquote expansion are done in the
777  // correct order for the correct arguments. One caveat is that the
778  // string cannot have \0377 (0xff) as a character in it.
779  //
780  // If there is an error in parsing, the error flag (if passed in) will be
781  // set to:
782  // 0 = no error
783  // 1 = line too long
784  int cshParse(char *argv[], int max_args,
785  void (*vlookup)(const char *, UT_String&)=UTvarLookup,
786  void (*elookup)(const char *, UT_String&)=UTexprLookup,
787  int *error = 0,
788  UT_StringCshIO *io=0);
789 
790  int cshParse(UT_WorkArgs &argv,
791  void (*vlookup)(const char *, UT_String&)=UTvarLookup,
792  void (*elookup)(const char *, UT_String&)=UTexprLookup,
793  int *error = 0,
794  UT_StringCshIO *io=0);
795 
796  // dosParse() uses the semi-braindead approach of ms-dos to argument
797  // parsing. That is, arguments are separated by a double quote or space
798  // (being a space or a tab). If 'preserve_backslashes' is set to
799  // false (the default), back-slashes are passed through verbatim, unless
800  // the following character is a double quote. Likewise, any pairs of
801  // back-slashes preceding a double quote are turned into single
802  // back-slashes.
803  int dosParse(UT_WorkArgs &argv, bool preserve_backslashes=false);
804  int dosParse(char *argv[], int max_args,
805  bool preserve_backslashes=false);
806 
807  /// Perform dos parsing modifying the buffer passed in. The args will be
808  /// stored as raw pointers into the given buffer
809  static int dosParse(char *buffer, UT_WorkArgs &args,
810  bool preserve_backslashes);
811 
812  // parse will insert nulls into the string.
813  // NB: The argv array is null terminated, thus the effective
814  // maximum number of arguments is one less than maxArgs.
815  // NB: The maxArgs variants are all deprecated, use UT_WorkArgs
816  // instead.
817  int parse(char *argv[], int max_args,
818  const char *quotes = "\"'", bool keep_quotes = false)
819  {
820  harden();
821  return parseInPlace(argv, max_args, quotes, keep_quotes);
822  }
823  int parse(UT_WorkArgs &argv, int start_arg = 0,
824  const char *quotes = "\"'", bool keep_quotes = false)
825  {
826  harden();
827  return parseInPlace(argv, start_arg, quotes, keep_quotes);
828  }
829  // Warning: the following methods insert nulls into the string without
830  // hardening.
831  int parseInPlace(char *argv[], int max_args,
832  const char *quotes = "\"'", bool keep_quotes = false);
833  int parseInPlace(UT_WorkArgs &argv, int start_arg = 0,
834  const char *quotes = "\"'", bool keep_quotes = false);
835 
836  // Splits the string at specific separator characters. Unlike the parse
837  // methods, the tokenize methods ignore quoting completely.
838  int tokenize(char *argv[], int max_args, char separator)
839  {
840  harden();
841  return tokenizeInPlace(argv, max_args, separator);
842  }
843  int tokenizeInPlace(char *argv[], int max_args, char separator);
844  int tokenize(UT_WorkArgs &argv, char separator)
845  {
846  harden();
847  return tokenizeInPlace(argv, separator);
848  }
849  int tokenizeInPlace(UT_WorkArgs &argv, char separator);
850  int tokenize(char *argv[], int max_args,
851  const char *separators = " \t\n")
852  {
853  harden();
854  return tokenizeInPlace(argv, max_args, separators);
855  }
856  int tokenizeInPlace(char *argv[], int max_args,
857  const char *separators = " \t\n");
858  int tokenize(UT_WorkArgs &argv, const char *separators = " \t\n")
859  {
860  harden();
861  return tokenizeInPlace(argv, separators);
862  }
863  int tokenizeInPlace(UT_WorkArgs &argv,
864  const char *separators = " \t\n");
865 
866  template<typename T>
867  int tokenize(T &list, const char *separators = " \t\n")
868  {
869  harden();
870  return tokenizeInPlace(list, separators);
871  }
872 
873  template<typename T>
874  int tokenizeInPlace(T &list,
875  const char *separators = " \t\n")
876  {
877  char *token;
878  char *context;
879 
880  if (!isstring())
881  return 0;
882  if (!(token = SYSstrtok(myData, separators, &context)))
883  return 0;
884 
885  list.append(token);
886 
887  while ((token = SYSstrtok(0, separators, &context)) != NULL)
888  list.append(token);
889 
890  return list.entries();
891  }
892 
893 
894  // Replaces the contents with variables expanded.
895  void expandVariables();
896 
897  // Functions to hash a string
899  {
900  return hash(myData);
901  }
902 
903  static SYS_FORCE_INLINE uint32 hash(const char *str, uint32 code = 0)
904  {
905  return SYSstring_hashseed(str, SYS_EXINT_MAX, code);
906  }
907 
908  // This does pattern matching on a string. The pattern may include
909  // the following syntax:
910  // ? = match a single character
911  // * = match any number of characters
912  // [char_set] = matches any character in the set
913  bool match(const char *pattern, bool case_sensitive = true) const;
914 
915  // Similar to match() except it assumes that we're dealing with file paths
916  // so that it determines whether to do a case-sensitive match depending on
917  // the platform.
918  bool matchFile(const char *pattern) const;
919 
920  // Similar to match() but uses rsync style matching:
921  // * = match any number of characters up to a slash
922  // ** = match any number of characters, including a slash
923  bool matchPath(const char *pattern, bool case_sensitive = true,
924  bool *excludes_branch = nullptr) const;
925 
926  // multiMatch will actually check multiple patterns all separated
927  // by the separator character: i.e. geo1,geo2,foot*
928  //
929  // NOTE: No pattern or may contain the separator
930  bool multiMatch(const char *pattern,
931  bool case_sensitive, char separator) const;
932  bool multiMatch(const char *pattern, bool case_sensitive = true,
933  const char *separators = ", ",
934  bool *explicitly_excluded = 0,
935  int *match_index = 0,
936  ut_PatternRecord *pattern_record=NULL) const;
937  bool multiMatch(const UT_StringMMPattern &pattern,
938  bool *explicitly_excluded = 0,
939  int *match_index = 0,
940  ut_PatternRecord *pattern_record=NULL) const;
941 
942  // this method matches a pattern while recording any wildcard
943  // patterns used.
944  bool multiMatchRecord(const char *pattern, int maxpatterns,
945  char *singles, int &nsingles,
946  char **words, int &nwords,
947  bool case_sensitive = true,
948  const char *separators = ", ") const;
949  bool multiMatchRecord(const UT_StringMMPattern &pattern,
950  int maxpatterns,
951  char *singles, int &nsingles,
952  char **words, int &nwords) const;
953  bool multiMatchRecord(const char *pattern,
954  UT_StringHolder &singles,
955  UT_StringArray &words,
956  bool case_sensitive = true,
957  const char *separators = ", ") const;
958 
959  /// matchPattern(UT_WorkArgs &) assumes that the arguments contain the
960  /// components of a pattern to be matched against. The method returns
961  /// true if the pattern matches, false if it doesn't. This matching
962  /// process handles ^ expansion properly (and efficiently).
963  /// If the string doesn't match any components of the pattern, then the
964  /// assumed value is returned.
965  bool matchPattern(const UT_WorkArgs &pattern_args,
966  bool assume_match=false) const;
967 
968  static bool multiMatchCheck(const char *pattern);
969  static bool wildcardMatchCheck(const char *pattern);
970 
971  // Same as match but equivalent to "*pattern*"
972  bool contains(const char *pattern, bool case_sensitive=true) const;
973 
974  // Returns true if our string starts with the specified prefix.
975  bool startsWith(const UT_StringView &prefix,
976  bool case_sensitive = true) const;
977 
978  // Returns true if our string ends with the specified suffix.
979  bool endsWith(const UT_StringView &suffix,
980  bool case_sensitive = true) const;
981 
982  /// Pluralize an English noun ending (i.e. box->boxes or tube->tubes). The
983  /// ending must be lower case to be processed properly.
984  void pluralize();
985 
986  // Will parse strings like 1-10:2,3 and call func for every element
987  // implied. It will stop when the func returns 0 or the parsing
988  // is complete, in which case it returns 1.
989  // Parsing also allows secondary elements to be specified eg 3.4 0.12
990  // The secfunc is used to find the maximum index of secondary elements
991  // for each compound num. The elements are assumed to be
992  // non-negative integers.
993  int traversePattern(int max, void *data,
994  int (*func)(int num, int sec, void *data),
995  unsigned int (*secfunc)(int num,void *data)=0,
996  int offset=0) const;
997 
998  // Fast containment, assumes no special characters
999  const char *fcontain(const char *pattern, bool case_sensitive=true) const
1000  {
1001  if (!myData) return NULL;
1002  return case_sensitive ? strstr(myData, pattern)
1003  : SYSstrcasestr(myData, pattern);
1004  }
1005 
1006  // Given the match pattern which fits our contents, any assigned wildcards
1007  // are subsitituted. The wildcards may also be indexed.
1008  // Returns true if rename was successful.
1009  //
1010  // @note This code was adapted from CHOP_Rename::subPatterns() and
1011  // works the same way.
1012  //
1013  // eg. this = apple, match = a*le, replace = b* ---> bpp
1014  // this = a_to_b, match = *_to_*, replace = *(1)_to_*(0) ---> b_to_a
1015  bool patternRename(const char *match_pattern, const char *replace);
1016 
1017  // Given the name rule according to which a name consists of a base name
1018  // (char sequence ending in a non-digit) and a numerical suffix, the
1019  // following two methods return the base and the suffix respectively.
1020  // base() needs a string buffer and will return a const char* pointing to it.
1021  // base() always returns a non-zero pointer,
1022  // while suffix() returns 0 if no suffix is found.
1023  const char *base(UT_String &buf) const;
1024  const char *suffix() const;
1025 
1026  // incrementNumberedName will increment a name. If it has a numerical
1027  // suffix, that suffix is incremented. If not, "2" is appended to the
1028  // name. The preserve_padding parameter can be set to true so that zero
1029  // padding is preserved. Incrementing foo0009 will produce foo10 with
1030  // this parameter set to false, or foo0010 if it is set to true.
1031  void incrementNumberedName(bool preserve_padding = false);
1032 
1033  // setFormat is used to set how an outstream formats its ascii output.
1034  // So you can use printf style formatting. eg:
1035  // UT_String::setFormat(cout, "%08d") << 100;
1036  //
1037  // Note: Don't do:
1038  // cout << UT_String::setFormat(cout, "%08d") << 100;
1039  // ^^^^
1040  // Also: The formating changes (except for field width) are permanent,
1041  // so you'll have to reset them manually.
1042  //
1043  // TODO: A resetFormat, and a push/pop format pair.
1044  static std::ostream &setFormat(std::ostream &os, const char *fmt);
1045  std::ostream &setFormat(std::ostream &os);
1046 
1047  int replacePrefix(const char *oldpref,
1048  const char *newpref);
1049  int replaceSuffix(const char *oldsuffix,
1050  const char *newsuffix);
1051 
1052  // expandArrays will expand a series of tokens of the
1053  // form prefix[pattern]suffix into the names array
1054  //
1055  // Note: Each names[i] must be free'd after use
1056  // and label is used on the non-const parse method
1057  // NB: The max variants are all deprecated, use UT_WorkArgs
1058  // instead.
1059  int expandArrays(char *names[], int max);
1060 
1061  // This routine will ensure no line is over the specified
1062  // number of columns. Offending lines will be wrapped at
1063  // the first spaceChar or cut at exactly cols if spaceChar
1064  // is not found.
1065  // It returns one if any changes were done.
1066  // It currently treats tabs as single characters which should be
1067  // changed.
1068  // It will break words at hyphens if possible.
1069  int format(int cols);
1070 
1071  // this method is similar to changeWord.. This method performs
1072  // a "dumb" substitution. Return's the # of substitutions
1073  int substitute( const char *find, const char *replacement,
1074  bool all = true );
1075 
1076  // This function replaces the character found with another character.
1077  int substitute( char find, char replacement, bool all = true );
1078 
1079  // this function removes the substring at pos and len, and inserts str
1080  // at pos. it returns the difference (new_length - old_length)
1081  int replace( int pos, int len, const char *str );
1082 
1083  // remove the first len characters of this string
1084  int eraseHead(int len)
1085  { return replace(0, len, ""); }
1086 
1087  // remove the last len characters of this string
1088  int eraseTail(int len)
1089  { return replace(length() - len, len, ""); }
1090 
1091  // remove the substring start at pos for len characters
1092  int erase(int pos = 0, int len = -1)
1093  {
1094  if (len < 0)
1095  len = length() - pos;
1096  return replace(pos, len, "");
1097  }
1098 
1099  // insert the given string at pos into this string
1100  int insert(int pos, const char *str)
1101  { return replace(pos, 0, str); }
1102 
1103  // Does a "smart" string compare which will sort based on numbered names.
1104  // That is "text20" is bigger than "text3". In a strictly alphanumeric
1105  // comparison, this would not be the case. Zero is only returned if both
1106  // strings are identical.
1107  static int compareNumberedString(const char *s1, const char *s2,
1108  bool case_sensitive=true,
1109  bool allow_negatives=false);
1110  static int qsortCmpNumberedString(const char *const*v1, const char *const*v2);
1111 
1112  // Like compare numbered strings, but it sorts better when there are
1113  // .ext extensions (i.e. it handles '.' as a special case)
1114  static int compareNumberedFilename(const char *s1, const char *s2,
1115  bool case_sensitive=false);
1116  static int qsortCmpNumberedFilename(const char *const*v1, const char *const*v2);
1117 
1118  /// Compare two version strings which have numbered components separated by
1119  /// dots. eg. "X.Y.Z". Assumes the components go from most to least
1120  /// significant in left to right order.
1121  static int compareVersionString(const char *s1, const char *s2);
1122 
1123  /// Given a path, set the value of the string to the program name. For
1124  /// example: @code
1125  /// str.extractProgramName(argv[0]);
1126  /// str.extractProgramName("c:/Path/program.exe");
1127  /// str.extractProgramName("/usr/bin/program");
1128  /// @endcode
1129  /// This will extract the last path component. Program names may also have
1130  /// their extensions stripped. For example ".exe" on Windows and "-bin" to
1131  /// strip the Houdini wrappers on other platforms.
1132  ///
1133  /// @note The path should be normalized to have forward slashes as the path
1134  /// separator.
1135  void extractProgramName(const char *path,
1136  bool strip_extension=true,
1137  bool normalize_path=true);
1138 
1139  /// Given a path, check to see whether the program name matches the
1140  /// expected. For example: @code
1141  /// if (UT_String::matchProgramname(argv[0], "houdini"))
1142  /// if (UT_String::matchProgramname("c:/Path/houdini.exe", "houdini"))
1143  /// if (UT_String::matchProgramname("/usr/bin/houdini", "houdini"))
1144  /// @endcode
1145  /// The matching is always case-insensitive.
1146  ///
1147  /// @note The path should be normalized to have forward slashes as the path
1148  /// separator.
1149  static bool matchProgramName(const char *path, const char *expected,
1150  bool normalize_path=false);
1151 
1152  /// Convert a path to a "normalized" path. That is, all back-slashes will
1153  /// be converted to forward slashes. On some operating systems, this will
1154  /// leave the string unchanged.
1155  void normalizePath();
1156 
1157  // A very fast integer to string converter. This is faster (at least on
1158  // SGI) than using sprintf("%d"). About two to three times as fast. Both
1159  // of these methods return the length of the string generated.
1160  static int itoa(char *str, int64 i);
1161  static int utoa(char *str, uint64 i);
1162 
1163  // Versions of the above functions which set into this string object
1164  void itoa(int64 i);
1165  void utoa(uint64 i);
1166 
1167  // A reader-friendly version of itoa. This places commas appropriately
1168  // to ensure the person can pick out the kilo points easily.
1169  // This can handle numbers up to 999,999,999,999,999,999.
1170  void itoaPretty(int64 val);
1171 
1172  /// Convert the given time delta (in milliseconds)
1173  /// to a reader-friendly string in days, hours, minutes, and seconds.
1174  void timeDeltaToPrettyString(double time_ms);
1175 
1176  /// Convert the given time delta (in milliseconds)
1177  /// to a reader-friendly string in milliseconds.
1178  void timeDeltaToPrettyStringMS(double time_ms);
1179 
1180  // Do an sprintf into this string. This method will allocate exactly the
1181  // number of bytes required for the final string. If the format string is
1182  // bad, isstring() will return false afterwards.
1183  int sprintf(const char *fmt, ...) SYS_PRINTF_CHECK_ATTRIBUTE(2, 3);
1184 
1185  // This will change the string into a valid C style variable name.
1186  // All non-alpha numerics will be converted to _.
1187  // If the first letter is a digit, it is prefixed with an _.
1188  // This returns 0 if no changes occurred, 1 if something had to
1189  // be adjusted.
1190  // Note that this does NOT force the name to be non-zero in length.
1191  // The safechars parameter is a string containing extra characters
1192  // that should be considered safe. These characters are not
1193  // converted to underscores.
1194  int forceValidVariableName(const char *safechars = NULL);
1195  // Returns true if the string matches a C-style varaible name.
1196  // The safechars are not allowed to be the start.
1197  // Matching forceValid, empty strings are considered valid!
1198  bool isValidVariableName(const char *safechars = NULL) const;
1199 
1200  // This will force all non-alphanumeric characters to be underscores.
1201  // Returns true if any changes were required.
1202  bool forceAlphaNumeric();
1203 
1204  // This function will calculate the relative path to get from src to dest.
1205  // If file_path is false, this method assume it is dealing with node paths.
1206  // If file_path is true, it will also deal with Windows drive letters and
1207  // UNC paths.
1208  void getRelativePath(const char *src_fullpath,
1209  const char *dest_fullpath,
1210  bool file_path = false);
1211 
1212  // This function takes two absolute paths and returns the length of the
1213  // longest common path prefix, up to and including the last '/'. This
1214  // means, for instance, that if fullpath1[len1-1] == '/' then all of
1215  // fullpath1 is eligible as a common prefix.
1216  // NB: This function DOES NOT handle NT style drive names! It is currently
1217  // only used for op paths. If you want to add support for this, you
1218  // should add another default parameter to do this.
1219  static int findLongestCommonPathPrefix(const char *fullpath1, int len1,
1220  const char *fullpath2, int len2);
1221 
1222  // This function tests whether we are an absolute path, and returns true or
1223  // false depending on whether we are.
1224  bool isAbsolutePath(bool file_path=false) const;
1225 
1226  // This function assumes that we are an absolute path and will remove all
1227  // un-necessary components from it as long as we remain an absolute path.
1228  // We return false if an error was encountered, in which case the results
1229  // are unpredictable.
1230  bool collapseAbsolutePath(bool file_path=false);
1231 
1232  // This function will make sure that the string is at most max_length
1233  // characters long. If the string is longer than that, it will
1234  // replace the middle of the string by "...". Returns true if the string
1235  // has changed and false otherwise. max_length must be greater than 3.
1236  bool truncateMiddle(int max_length);
1237 
1238  // This function is an abomination when you can just write:
1239  // UT_String foo("");
1240  // ...
1241  // if (foo.isstring())
1242  // ...
1243  // Avoid using it and do not write functions that return "const UT_String&"
1244  static const UT_String &getEmptyString();
1245 
1246  /// Count the number of valid characters in the : modifier for variable
1247  /// expansion. For example, the string ":r" will return 2, the string
1248  /// ":r:t" will return 4, the string ":z" will return 0. These use the csh
1249  /// expansion modifiers.
1250  ///
1251  /// If the string doesn't start with a ':', the method will return 0.
1252  static int countCshModifiers(const char *src);
1253 
1254  /// Applies a "csh" style modifier string to this string. For example, a
1255  /// modifier string of ":e" would replace the string with the file
1256  /// extension of the string.
1257  ///
1258  /// Returns true if any modifications were performed
1259  bool applyCshModifiers(const char *modifiers);
1260 
1261 
1262  /// This will remove the range from a string of the form foo$Fbar.ext (#-#)
1263  /// and return the first number from the range. If there is only 1 range
1264  /// number, it will be returned. If there is no range, 0 is returned.
1265  /// The returned string is hardened.
1266  UT_String removeRange ();
1267 
1268  /// This will format a value to represent a given size in bytes, kilobytes,
1269  /// megabytes, etc.
1270  void formatByteSize(exint size, int digits=2);
1271 
1272  // UTF-8 helpers
1273 
1274  /// Returns the number of Unicode codepoints in the string, assuming it's
1275  /// encoded as UTF-8.
1276  int getCodePointCount() const;
1277 
1278  /// Returns a list of Unicode code points from this string.
1279  void getAsCodePoints(UT_Int32Array &cp_list) const;
1280 
1281  /// Friend specialization of std::swap() to use UT_String::swap()
1282  /// @internal This is needed because standard std::swap() implementations
1283  /// will try to copy the UT_String objects, causing hardened strings to
1284  /// become weak.
1285  friend void swap(UT_String& a, UT_String& b) { a.swap(b); }
1286 
1287  /// expandArrays will expand a series of tokens of the
1288  /// form prefix[pattern]suffix into the names UT_WorkArgs
1289  /// @param tokens is will store the parsed tokens without expansion
1290  /// @param names is will store the parsed tokens with expansion
1291  /// This doesn't need a max argument like:
1292  /// int expandArrays(char *names[], int max)
1293  ///
1294  // Note: Each names[i] must be free'd after use
1295  // and label is used on the non-const parse method
1296  int expandArrays(UT_WorkArgs &tokens, UT_WorkArgs &names);
1297 
1298 private:
1299  template <typename OSTREAM>
1300  void saveInternal(OSTREAM &os, bool binary) const;
1301 
1302  void freeData();
1303 
1304  /// implements a few csh-style modifiers.
1305  /// @param mod pointer to a string starting with the modifier to apply.
1306  /// so, to apply a global substitute modifier :gs/l/r/
1307  /// mod should be: s/l/r
1308  /// @param all True if all possible modifications should be
1309  /// (recursively) performed.
1310  /// Otherwise, at most one modification is applied.
1311  /// @return whether any modification was performed
1312  bool applyNextModifier(const char *mod, bool all);
1313 
1314 
1315  /// Sets myIsReference to false and copies the other_string into myData,
1316  /// but attempts to avoid unnecessary memory reallocations. Frees up
1317  /// any previous data, if necessary. If other_string is NULL, the call
1318  /// is equivalent to freeData().
1319  void doSmartCopyFrom(const char* other_string);
1320 
1321  static int compareNumberedStringInternal(const char *s1, const char *s2,
1322  bool case_sensitive,
1323  bool allow_negatives,
1324  bool dot_first);
1325 
1326  static SYS_FORCE_INLINE void utStrFree(char *str)
1327  {
1328 #if defined(UT_DEBUG) && !defined(_WIN32)
1329  if (str)
1330  ::memset((void *)str, 0xDD, ::strlen(str) + 1);
1331 #endif
1332  ::free((void *)str);
1333  }
1334 
1335  char *myData;
1336  bool myIsReference:1,
1337  myIsAlwaysDeep:1;
1338 
1339  /// This operator saves the string to the stream via the string's
1340  /// saveAscii() method, protecting any whitespace (by adding quotes),
1341  /// backslashes or quotes in the string.
1342  friend UT_API std::ostream &operator<<(std::ostream &os, const UT_String &d);
1343  friend UT_API UT_OStream &operator<<(UT_OStream &os, const UT_String &d);
1344 
1345  friend class UT_API UT_StringRef;
1346 };
1347 
1348 /// Creates a shallow wrapper around a string for calling UT_String's many
1349 /// const algorithms.
1351 {
1352 public:
1353  // We only have a single constructor which is always shallow.
1355  UT_StringWrap(const char *str)
1356  : UT_String(str)
1357  {}
1358  // It seems necessary on MSVC to forceinline the empty constructor in order
1359  // to have it inlined.
1362  {}
1363 
1364  // Manually wrap methods that have non-const overloads or return non-const
1365  // pointers.
1366  char operator()(unsigned i) const { return UT_String::operator()(i); }
1367  const char *findChar(int c) const { return UT_String::findChar(c); }
1368  const char *findChar(const char *str) const { return UT_String::findChar(str); }
1369  const char *findNonSpace() const { return UT_String::findNonSpace(); }
1370  const char *lastChar(int c) const { return UT_String::lastChar(c); }
1371 
1372  using UT_String::operator==;
1373  using UT_String::operator!=;
1374  using UT_String::c_str;
1375  using UT_String::length;
1376 
1377  using UT_String::base;
1378  using UT_String::compare;
1379  using UT_String::contains;
1380  using UT_String::count;
1381  using UT_String::countChar;
1382  using UT_String::distance;
1383  using UT_String::endsWith;
1384  using UT_String::equal;
1385  using UT_String::fcontain;
1387  using UT_String::fileName;
1388  using UT_String::findWord;
1389  using UT_String::findString;
1392  using UT_String::isFloat;
1393  using UT_String::isInteger;
1395  using UT_String::isstring;
1396  using UT_String::match;
1397  using UT_String::matchFile;
1399  using UT_String::matchPath;
1401  using UT_String::multiMatch;
1405  using UT_String::save;
1406  using UT_String::saveAscii;
1407  using UT_String::saveBinary;
1408  using UT_String::splitPath;
1409  using UT_String::startsWith;
1410  using UT_String::substr;
1411  using UT_String::suffix;
1412  using UT_String::toFloat;
1413  using UT_String::toInt;
1414 };
1415 
1418 {
1419  if (!myIsReference && myData)
1420  utStrFree(myData);
1421 }
1422 
1424 void
1425 UT_String::freeData()
1426 {
1427  if (myData)
1428  {
1429  if (!myIsReference)
1430  utStrFree(myData);
1431  myData = 0;
1432  }
1433 }
1434 
1435 inline void
1437 {
1438  // We can't use UTswap because it doesn't work with bit fields.
1439  bool temp = myIsReference;
1440  myIsReference = other.myIsReference;
1441  other.myIsReference = temp;
1442 
1443  char *tmp_data = myData;
1444  myData = other.myData;
1445  other.myData = tmp_data;
1446 
1447  if (myIsAlwaysDeep)
1448  harden();
1449 
1450  if (other.myIsAlwaysDeep)
1451  other.harden();
1452 }
1453 
1455 public:
1456  UT_String myOut; // Points to argument following '>'
1457  UT_String myErr; // Points to argument following '>&'
1458  UT_String myIn; // Points to argument following '<'
1459  short myDoubleOut; // If the argument is '>>' or '>>&'
1460  short myDoubleIn; // If the argument is '<<'
1461 };
1462 
1463 UT_API std::ostream & do_setformat(std::ostream &os, const char fmt[]);
1464 
1465 /// Does a "smart" string compare which will sort based on numbered names.
1466 /// That is "text20" is bigger than "text3". In a strictly alphanumeric
1467 /// comparison, this would not be the case.
1469 {
1470  bool operator()(const char *s1, const char *s2) const
1471  {
1472  return UT_String::compareNumberedString(s1, s2) < 0;
1473  }
1474 
1475  bool operator()(const std::string &s1, const std::string &s2) const
1476  {
1477  return operator()(s1.c_str(), s2.c_str());
1478  }
1479 };
1480 
1481 #endif
bool match(const char *pattern, bool case_sensitive=true) const
GLdouble s
Definition: glew.h:1390
int tokenize(char *argv[], int max_args, const char *separators=" \t\n")
Definition: UT_String.h:850
UT_String & operator+=(const char *str)
Definition: UT_String.h:339
static SYS_FORCE_INLINE uint32 hash(const char *str, uint32 code=0)
Definition: UT_String.h:903
int distance(const char *str, bool case_sensitive=true, bool allow_subst=true) const
char * lastChar(int c)
Definition: UT_String.h:570
vint4 max(const vint4 &a, const vint4 &b)
Definition: simd.h:4703
std::string sprintf(const char *fmt, const Args &...args)
Definition: strutil.h:136
bool operator!=(const char *str) const
Definition: UT_String.h:424
UT_String & operator+=(const UT_String &str)
Definition: UT_String.h:371
string_view OIIO_API strip(string_view str, string_view chars=string_view())
bool operator>=(const UT_StringRef &str) const
Definition: UT_String.h:480
int count(const char *str, bool case_sensitive=true) const
Count the occurrences of the string.
GLsizeiptr size
Definition: glew.h:1681
GLenum src
Definition: glew.h:2410
GLuint const GLchar * name
Definition: glew.h:1814
bool matchFileExtension(const char *match_extension) const
Definition: UT_String.h:675
void swap(UT_String &other)
Definition: UT_String.h:1436
void saveAscii(UT_OStream &os) const
Definition: UT_String.h:299
bool operator()(const char *s1, const char *s2) const
Definition: UT_String.h:1470
const Args & args
Definition: printf.h:628
T negative(const T &val)
Return the unary negation of the given value.
Definition: Math.h:90
GLuint index
Definition: glew.h:1814
const char * lastChar(int c) const
Definition: UT_String.h:1370
bool isInteger(bool skip_spaces=false) const
Determine if string can be seen as a single integer number.
bool operator<=(const char *str) const
Definition: UT_String.h:448
UT_String myIn
Definition: UT_String.h:1458
GLuint const GLfloat * val
Definition: glew.h:2794
fpreal toFloat() const
bool operator==(const char *str) const
Definition: UT_String.h:412
bool operator<=(const UT_String &str) const
Definition: UT_String.h:452
int toInt() const
char * fileExtension()
Definition: UT_String.h:650
GLboolean GLboolean GLboolean GLboolean a
Definition: glew.h:9477
bool isHard() const
Returns whether this string is hardened already.
Definition: UT_String.h:242
SYS_FORCE_INLINE T * SYSconst_cast(const T *foo)
Definition: SYS_Types.h:136
UT_String makeQuotedString(char delimiter='\'', bool escape_nonprinting=false) const
const char * findChar(const char *str) const
Definition: UT_String.h:564
int64 exint
Definition: SYS_Types.h:125
void swap(T &lhs, T &rhs)
Definition: pugixml.cpp:7172
void write(unsigned i, char c)
Definition: UT_String.h:523
bool operator==(const UT_String &str) const
Definition: UT_String.h:416
GLint GLint GLsizei GLsizei GLsizei GLint GLenum format
Definition: glew.h:1254
#define UT_API
Definition: UT_API.h:13
const char * fileExtension() const
Definition: UT_String.h:665
bool isAbsolutePath(bool file_path=false) const
bool findString(const char *str, bool fullword, bool usewildcards) const
char * findChar(int c)
Definition: UT_String.h:554
#define SYS_EXINT_MAX
Definition: SYS_Types.h:181
char & operator()(unsigned i)
Definition: UT_String.h:516
bool equal(const char *str, bool case_sensitive=true) const
Definition: UT_String.h:399
const char * findNonSpace() const
Definition: UT_String.h:1369
unsigned long long uint64
Definition: SYS_Types.h:117
int compare(const char *str, bool case_sensitive=true) const
Definition: UT_String.h:379
void clear()
Reset the string to the default constructor.
Definition: UT_String.h:309
bool isAlwaysDeep() const
Definition: UT_String.h:204
const char * c_str() const
Definition: UT_String.h:505
bool matchPath(const char *pattern, bool case_sensitive=true, bool *excludes_branch=nullptr) const
unsigned length() const
Return length of string.
Definition: UT_String.h:542
int compare(const UT_String &str, bool case_sensitive=true) const
Definition: UT_String.h:393
const char * suffix() const
bool operator<(const char *str) const
Definition: UT_String.h:436
bool operator<(const UT_StringRef &str) const
Definition: UT_String.h:444
UT_API void UTexprLookup(const char *name, UT_String &result)
bool contains(const char *pattern, bool case_sensitive=true) const
int tokenize(UT_WorkArgs &argv, const char *separators=" \t\n")
Definition: UT_String.h:858
std::ostream & operator<<(std::ostream &ostr, const DataType &a)
Definition: DataType.h:133
UT_String(UT_AlwaysDeepType, const std::string &str)
Construct UT_String from a std::string, using ALWAYS_DEEP semantics.
Definition: UT_String.h:151
void hardenIfNeeded(const char *s)
Take shallow copy and make it deep.
Definition: UT_String.h:232
const char * buffer() const
Definition: UT_String.h:506
CompareResults OIIO_API compare(const ImageBuf &A, const ImageBuf &B, float failthresh, float warnthresh, ROI roi={}, int nthreads=0)
A utility class to do read-only operations on a subset of an existing string.
Definition: UT_StringView.h:40
GLfloat GLfloat GLfloat v2
Definition: glew.h:1856
SYS_FORCE_INLINE uint32 hash() const
Definition: UT_String.h:898
bool operator==(const UT_StringRef &str) const
Definition: UT_String.h:420
char operator()(unsigned i) const
Definition: UT_String.h:1366
int tokenize(char *argv[], int max_args, char separator)
Definition: UT_String.h:838
bool operator>=(const char *str) const
Definition: UT_String.h:472
UT_String & operator=(UT_String &&str)
Definition: UT_String.h:175
int tokenizeInPlace(T &list, const char *separators=" \t\n")
Definition: UT_String.h:874
OIIO_FORCEINLINE const vint4 & operator+=(vint4 &a, const vint4 &b)
Definition: simd.h:4246
#define SYS_SAFE_BOOL
Definition: SYS_Compiler.h:55
std::string OIIO_API replace(string_view str, string_view pattern, string_view replacement, bool global=false)
bool operator!=(const UT_String &str) const
Definition: UT_String.h:428
GLuint buffer
Definition: glew.h:1680
GLint GLenum GLsizei GLint GLsizei const void * data
Definition: glew.h:1379
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:134
bool operator>=(const UT_String &str) const
Definition: UT_String.h:476
const GLuint GLenum const void * binary
Definition: glew.h:3502
#define SYS_PRINTF_CHECK_ATTRIBUTE(string_index, first_to_check)
Definition: SYS_Types.h:433
char * findNonSpace()
fpreal64 dot(const CE_VectorT< T > &a, const CE_VectorT< T > &b)
Definition: CE_Vector.h:127
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
bool operator>(const UT_String &str) const
Definition: UT_String.h:464
char * SYSstrtok(char *string, const char *delimit, char **context)
Definition: SYS_String.h:113
char * findChar(const char *str)
Definition: UT_String.h:562
GLuint const GLuint * names
Definition: glew.h:2690
#define UT_ASSERT_SLOW(ZZ)
Definition: UT_Assert.h:133
const GLfloat * c
Definition: glew.h:16296
GLuint GLsizei GLsizei * length
Definition: glew.h:1825
const char * findChar(int c) const
Definition: UT_String.h:556
GLsizei GLsizei GLfloat distance
Definition: glew.h:13640
SYS_FORCE_INLINE UT_String(const char *str=0)
Construct UT_String from a C string, using shallow semantics.
Definition: UT_String.h:97
void harden()
Take shallow copy and make it deep.
Definition: UT_String.h:213
void saveAscii(std::ostream &os) const
Definition: UT_String.h:298
bool equal(const UT_StringRef &str, bool case_sensitive=true) const
Definition: UT_String.h:407
UT_String(UT_String &&str) noexcept
Definition: UT_String.h:167
long long int64
Definition: SYS_Types.h:116
bool equal(const UT_String &str, bool case_sensitive=true) const
Definition: UT_String.h:403
typedef int(WINAPI *PFNWGLRELEASEPBUFFERDCARBPROC)(HPBUFFERARB hPbuffer
SYS_FORCE_INLINE bool UTisdigit(char c)
Definition: UT_String.h:62
void setAlwaysDeep(bool deep)
Make a string always deep.
Definition: UT_String.h:188
bool operator>(const UT_StringRef &str) const
Definition: UT_String.h:468
const char * findChar(const char *str) const
Definition: UT_String.h:1368
bool matchFile(const char *pattern) const
bool operator()(const std::string &s1, const std::string &s2) const
Definition: UT_String.h:1475
int eraseHead(int len)
Definition: UT_String.h:1084
void toUpper()
Definition: UT_String.h:608
void adopt(UT_String &str)
Definition: UT_String.h:285
SYS_FORCE_INLINE ~UT_StringWrap()
Definition: UT_String.h:1361
const char * findWord(const char *word) const
bool operator>(const char *str) const
Definition: UT_String.h:460
int64 getMemoryUsage(bool inclusive=true) const
Return memory usage in bytes.
Definition: UT_String.h:546
void saveBinary(std::ostream &os) const
Save string to binary stream.
Definition: UT_String.h:294
bool isFloat(bool skip_spaces=false, bool loose=false, bool allow_underscore=false) const
Determine if string can be seen as a single floating point number.
bool OIIO_API contains(string_view a, string_view b)
Does 'a' contain the string 'b' within it?
GLsizei const GLchar *const * path
Definition: glew.h:6461
bool isValidVariableName(const char *safechars=NULL) const
GLdouble GLdouble GLdouble b
Definition: glew.h:9122
static int compareNumberedString(const char *s1, const char *s2, bool case_sensitive=true, bool allow_negatives=false)
short myDoubleIn
Definition: UT_String.h:1460
void adopt(char *s)
Definition: UT_String.h:275
GLsizei const GLchar *const * string
Definition: glew.h:1844
UT_String pathUpToExtension() const
GLenum func
Definition: glcorearb.h:782
int substr(UT_String &buf, int index, int len=0) const
basic_printf_context_t< buffer >::type context
Definition: printf.h:631
SYS_FORCE_INLINE bool UTisstring(const char *s)
Definition: UT_String.h:57
GLuint num
Definition: glew.h:2690
void save(std::ostream &os, bool binary) const
Save string to stream. Saves as binary if binary is true.
const void * ptr(const T *p)
Definition: format.h:3292
short myDoubleOut
Definition: UT_String.h:1459
fpreal64 fpreal
Definition: SYS_Types.h:277
bool multiMatch(const char *pattern, bool case_sensitive, char separator) const
char * steal()
Definition: UT_String.h:253
char * SYSstrcasestr(const char *haystack, const char *needle)
Replacement for strcasestr, since no equivalent exists on Win32.
Definition: SYS_String.h:297
int SYSstrcasecmp(const char *a, const char *b)
Definition: SYS_String.h:227
bool multiMatchRecord(const char *pattern, int maxpatterns, char *singles, int &nsingles, char **words, int &nwords, bool case_sensitive=true, const char *separators=", ") const
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat s1
Definition: glew.h:12681
int parseNumberedFilename(UT_String &prefix, UT_String &frame, UT_String &suff, bool negative=true, bool fractional=false) const
FMT_CONSTEXPR bool find(Ptr first, Ptr last, T value, Ptr &out)
Definition: format.h:2104
GLuint GLuint GLsizei count
Definition: glew.h:1253
UT_AlwaysDeepType
Definition: UT_String.h:85
unsigned int uint32
Definition: SYS_Types.h:40
const char * lastChar(int c) const
Definition: UT_String.h:572
UT_String myOut
Definition: UT_String.h:1456
UT_String myErr
Definition: UT_String.h:1457
GLuint64EXT * result
Definition: glew.h:14007
bool isstring() const
Definition: UT_String.h:710
int findLongestCommonSuffix(const char *with) const
void hardenIfNeeded()
Take shallow copy and make it deep.
Definition: UT_String.h:222
const char * findChar(int c) const
Definition: UT_String.h:1367
int parse(char *argv[], int max_args, const char *quotes="\"'", bool keep_quotes=false)
Definition: UT_String.h:817
UT_String(UT_AlwaysDeepType, const char *str=0)
Construct UT_String from a C string, using ALWAYS_DEEP semantics.
Definition: UT_String.h:144
bool operator<(const UT_String &str) const
Definition: UT_String.h:440
int erase(int pos=0, int len=-1)
Definition: UT_String.h:1092
#define const
Definition: zconf.h:214
int tokenize(UT_WorkArgs &argv, char separator)
Definition: UT_String.h:844
GLubyte * pattern
Definition: glew.h:5711
bool operator<=(const UT_StringRef &str) const
Definition: UT_String.h:456
bool startsWith(const UT_StringView &prefix, bool case_sensitive=true) const
void splitPath(UT_String &dir_name, UT_String &file_name) const
char operator()(unsigned i) const
Definition: UT_String.h:509
GLenum GLuint GLsizei const GLchar * buf
Definition: glew.h:2580
int parse(UT_WorkArgs &argv, int start_arg=0, const char *quotes="\"'", bool keep_quotes=false)
Definition: UT_String.h:823
const char * base(UT_String &buf) const
void removeLast()
Remove the last character.
Definition: UT_String.h:325
bool all(const vbool4 &v)
Definition: simd.h:3371
UT_API void UTvarLookup(const char *name, UT_String &result)
SYS_FORCE_INLINE UT_StringWrap(const char *str)
Definition: UT_String.h:1355
bool endsWith(const UT_StringView &suffix, bool case_sensitive=true) const
UT_String(const std::string &str)
Construct UT_String from a std::string, always doing a deep copy. The result will only be a UT_Always...
Definition: UT_String.h:122
int eraseTail(int len)
Definition: UT_String.h:1088
const char * fileName() const
Definition: UT_String.h:629
GLfloat GLfloat v1
Definition: glew.h:1852
int countChar(int c) const
Return the number of occurrences of the specified character.
UT_API std::ostream & do_setformat(std::ostream &os, const char fmt[])
int tokenize(T &list, const char *separators=" \t\n")
Definition: UT_String.h:867
const char * nonNullBuffer() const
Definition: UT_String.h:507
GLenum GLsizei len
Definition: glew.h:7752
void toLower()
Definition: UT_String.h:615
int insert(int pos, const char *str)
Definition: UT_String.h:1100
GLintptr offset
Definition: glew.h:1682
const char * fcontain(const char *pattern, bool case_sensitive=true) const
Definition: UT_String.h:999
bool operator!=(const UT_StringRef &str) const
Definition: UT_String.h:432