HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_String.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  *
7  * NAME: Utility Library (C++)
8  *
9  * COMMENTS: String class
10  *
11  */
12 
13 #ifndef __UT_String_h__
14 #define __UT_String_h__
15 
16 #include "UT_API.h"
17 
18 #include "UT_Assert.h"
19 #include "UT_VectorTypes.h"
20 
21 #include <SYS/SYS_Compiler.h>
22 #include <SYS/SYS_Inline.h>
23 #include <SYS/SYS_String.h>
24 #include <SYS/SYS_Types.h>
25 
26 #include <functional>
27 #include <iosfwd>
28 #include <string>
29 
30 #include <ctype.h>
31 #include <stdlib.h>
32 #include <string.h>
33 
34 #ifdef WIN32
35  #define strcasecmp stricmp
36  #define strncasecmp strnicmp
37 #endif
38 
39 class UT_OStream;
40 class UT_String;
41 class UT_StringCshIO;
42 class UT_WorkArgs;
43 class UT_IStream;
44 class ut_PatternRecord;
45 class UT_StringMMPattern;
46 class UT_StringArray;
47 class UT_StringHolder;
48 class UT_StringRef;
49 class UT_StringView;
50 
51 // The following lookup functions are used by cshParse. By default,
52 // varLookup simply uses getenv, exprLookup opens the command as
53 // a pipe and uses the result.
54 UT_API extern void UTvarLookup(const char *name, UT_String &result);
55 UT_API extern void UTexprLookup(const char *name, UT_String &result);
56 
57 SYS_FORCE_INLINE bool UTisstring(const char *s) { return s && *s; }
58 
59 // Because invoking isdigit with a negative value is undefined,
60 // some MSVC compilers decide to crash. Thus we cast explicitly
61 // to unsigned.
63  { return isdigit((unsigned char) c); }
64 
65 /// @file
66 /// @class UT_String
67 ///
68 /// UT_String is a string class that support two different types of assignment
69 /// semantics:
70 /// - Shallow (default): Just reference the given string and NOT take
71 /// ownership.
72 /// - Deep: Make a copy of the given string, taking ownership in the
73 /// process (aka it making it "hard").
74 ///
75 /// If UT_String::harden() is called, or any other UT_String method that
76 /// requires modifying the string, it will make a copy of its reference pointer
77 /// (and take ownership) first.
78 ///
80 {
81 public:
82 
83  /// UT_String can be constructed with UT_String::ALWAYS_DEEP to create an
84  /// object that will always perform deep copies when assigned to.
85  enum UT_AlwaysDeepType { ALWAYS_DEEP };
86 
87  /// @brief Construct UT_String from a C string, using shallow semantics
88  ///
89  /// @param str The initial string.
90  /// @param deepCopy If true, a copy of @em str will be used.
91  /// @param len Number of characters to use from @em str. Use -1 to
92  /// use the entire string. If len is non-negative, then
93  /// deepCopy will be implicitly set to true. If str is NULL
94  /// and len is non-negative, then it will be initialized
95  /// with "".
97  UT_String(const char *str = 0)
98  : myIsReference(true)
99  , myIsAlwaysDeep(false)
100  , myData(SYSconst_cast(str))
101  {}
102  UT_String(const char *str, int deepCopy, int len = -1);
103 
104  /// @brief Construct UT_String from a std::string, always doing
105  /// a deep copy. The result will only be a UT_AlwaysDeep if the
106  /// appropriate version is used, however!
107  ///
108  /// NOTE: You cannot do:
109  /// UT_String foo;
110  /// std::string bar = "hello world";
111  /// foo = UT_String(bar.substr(2, 5));
112  ///
113  /// It provides an shortcut for constructing a UT_String from a function
114  /// that returns a std::string by value. For example, it lets you write
115  /// @code
116  /// UT_String str(func());
117  /// @endcode
118  /// instead of
119  /// @code
120  /// UT_String str(func().c_str(), /*harden=*/true);
121  /// @endcode
122  explicit UT_String(const std::string &str)
123  : myIsReference(false),
124  myIsAlwaysDeep(false)
125  { myData = strdup(str.c_str()); }
126 
127  /// @brief Construct UT_String from a UT_StringHolder.
128  /// This always duplicates and uses ALWAYS_DEEP semantics.
129  explicit UT_String(const UT_StringHolder &str);
130 
131 private:
132  /// This is intentionally not implemented - callers should choose between
133  /// the const char * and UT_StringHolder constructors, depending on whether
134  /// they want to make a deep copy.
135  /// @see UT_StringWrap.
136  UT_String(const UT_StringRef &);
137 
138 public:
139  /// @brief Construct UT_String from a UT_StringView.
140  /// This always duplicates and uses ALWAYS_DEEP semantics.
141  explicit UT_String(const UT_StringView &sv);
142 
143  /// @brief Construct UT_String from a C string, using ALWAYS_DEEP semantics
144  UT_String(UT_AlwaysDeepType, const char *str = 0)
145  : myIsReference(false),
146  myIsAlwaysDeep(true)
147  { myData = str ? strdup(str) : 0; }
148 
149  /// @brief Construct UT_String from a std::string, using ALWAYS_DEEP
150  /// semantics
152  : myIsReference(false),
153  myIsAlwaysDeep(true)
154  { myData = strdup(str.c_str()); }
155 
156  /// Copy constructor
157  ///
158  /// If the string we're copying from is ALWAYS_DEEP, then this object will
159  /// also become ALWAYS_DEEP. This way, you can pass/return a string by
160  /// value.
161  UT_String(const UT_String &str);
162 
163  ~UT_String();
164 
165  /// Move operators
166  /// @{
168  : myData(str.myData)
169  , myIsReference(str.myIsReference)
170  , myIsAlwaysDeep(str.myIsAlwaysDeep)
171  {
172  str.myData = nullptr;
173  str.myIsReference = !str.myIsAlwaysDeep;
174  }
176  {
177  freeData();
178  myData = str.myData;
179  myIsReference = str.myIsReference;
180  myIsAlwaysDeep = str.myIsAlwaysDeep;
181  str.myData = nullptr;
182  str.myIsReference = !str.myIsAlwaysDeep;
183  return *this;
184  }
185  /// @}
186 
187  /// Make a string always deep
188  void setAlwaysDeep(bool deep)
189  {
190  myIsAlwaysDeep = deep;
191  if (deep && myIsReference)
192  {
193  if (myData != NULL)
194  harden();
195  else
196  {
197  // This takes the same semantic as
198  // str = NULL;
199  // where str is an always deep string
200  myIsReference = false;
201  }
202  }
203  }
204  bool isAlwaysDeep() const
205  {
206  return myIsAlwaysDeep;
207  }
208 
209  void swap( UT_String &other );
210 
211  /// Take shallow copy and make it deep.
212  // @{
213  void harden()
214  {
215  if (!myIsReference && myData)
216  return;
217  myData = strdup(myData ? myData : "");
218  myIsReference = false;
219  }
220 
221  void harden(const char *s, int len = -1);
223  {
224  if (myIsReference)
225  {
226  if (isstring())
227  harden();
228  else
229  *this = "";
230  }
231  }
232  void hardenIfNeeded(const char *s)
233  {
234  if (s && *s)
235  harden(s);
236  else
237  *this = "";
238  }
239  // @}
240 
241  /// Returns whether this string is hardened already.
242  bool isHard() const { return !myIsReference; }
243 
244  /// Give up ownership of string
245  ///
246  /// Take a hard reference and make it shallow. This method makes sure
247  /// it gives back something you can delete, because this UT_String is
248  /// taking its hands off the data. Use it with care since it may lead
249  /// to memory leaks if, for example, you harden it again later.
250  ///
251  /// In the case of ALWAYS_DEEP strings, this is disallowed so it will
252  /// just return a copy of the data.
253  char * steal(void)
254  {
255  if (!myIsAlwaysDeep)
256  {
257  if (myIsReference)
258  myData = strdup(myData ? myData : ""); // harden
259  myIsReference = true; // but say it's soft
260  return myData;
261  }
262  else
263  {
264  // return a new copy of the data without releasing
265  // ownership for always deep strings
266  return strdup(myData ? myData : "");
267  }
268  }
269 
270  /// Take ownership of given string
271  ///
272  /// adopt() is the opposite of steal(). Basically, you're giving
273  /// the UT_String ownership of the string.
274  // @{
275  void adopt(char *s)
276  {
277  if (!myIsReference)
278  {
279  if (s != myData)
280  free(myData);
281  }
282  myData = s;
283  myIsReference = false;
284  }
285  void adopt(UT_String &str)
286  {
287  adopt(str.steal());
288  }
289  void adopt(UT_StringHolder &holder);
290 
291  // @}
292 
293  /// Save string to binary stream.
294  void saveBinary(std::ostream &os) const { save(os, true); }
295 
296  /// Save string to ASCII stream. This will add double quotes and escape to
297  /// the stream if necessary (empty string or contains spaces).
298  void saveAscii(std::ostream &os) const { save(os, false); }
299  void saveAscii(UT_OStream &os) const { save(os, false); }
300 
301  /// Save string to stream. Saves as binary if @em binary is true.
302  void save(std::ostream &os, bool binary) const;
303  void save(UT_OStream &os, bool binary) const;
304 
305  /// Load string from stream. Use is.eof() to check eof status
306  bool load(UT_IStream &is);
307 
308  /// Reset the string to the default constructor.
309  void clear()
310  { *this = (const char *)NULL; }
311 
312  /// Prepend a string (or character)
313  // @{
314  void prepend(const char *prefix);
315  void prepend(char ch);
316  // @}
317 
318  /// Append a character
319  void append(char ch);
320 
321  /// Append a string or a section of a string.
322  void append(const char *str, exint len = -1);
323 
324  /// Remove the last character
325  void removeLast() { truncate(length()-1); }
326  /// Truncate the string at the Nth character
327  void truncate(exint len);
328 
329  UT_String &operator=(const UT_String &str);
330  UT_String &operator=(const char *str);
331  UT_String &operator=(const std::string &str);
332  UT_String &operator=(const UT_StringHolder &str);
333  UT_String &operator=(const UT_StringView &str);
334 private:
335  /// Not implemented - see UT_String(const UT_StringRef &).
336  UT_String &operator=(const UT_StringRef);
337 
338 public:
339  UT_String &operator+=(const char *str)
340  {
341  if (!isstring())
342  {
343  // We are an empty string, so we merely copy
344  // the incoming string rather than trying to append
345  // to it.
346  harden(str);
347  }
348  else
349  {
350  bool same = (str == myData);
351  harden();
352  if (str)
353  {
354  int mylen = (int)strlen(myData);
355  myData = (char *)realloc(myData,
356  mylen+strlen(str)+1);
357  if (!same)
358  {
359  strcpy(&myData[mylen], str);
360  }
361  else
362  {
363  memcpy(myData + mylen, myData, mylen);
364  myData[mylen * 2] = '\0';
365  }
366  }
367  }
368  return *this;
369  }
370 
372  {
373  *this += (const char *)str.myData;
374  return *this;
375  }
376  UT_String &operator+=(const UT_StringRef &str);
377 
378  // Basic equality functions and operators
379  int compare(const char *str, bool case_sensitive=true) const
380  {
381  // Unlike std::string, UT_String treats NULL and
382  // the empty string as distinct (empty has precedence).
383  if (myData==0 || str==0)
384  {
385  if (myData) return 1;
386  if(str) return -1;
387  return 0;
388  }
389  if (case_sensitive)
390  return strcmp(myData, str);
391  return strcasecmp(myData, str);
392  }
393  int compare(const UT_String &str, bool case_sensitive=true) const
394  {
395  return compare(str.myData,case_sensitive);
396  }
397  int compare(const UT_StringRef &str, bool case_sensitive=true) const;
398 
399  bool equal(const char *str, bool case_sensitive=true) const
400  {
401  return compare(str,case_sensitive)==0;
402  }
403  bool equal(const UT_String &str, bool case_sensitive=true) const
404  {
405  return compare(str.myData,case_sensitive)==0;
406  }
407  bool equal(const UT_StringRef &str, bool case_sensitive=true) const
408  {
409  return compare(str,case_sensitive)==0;
410  }
411 
412  bool operator==(const char *str) const
413  {
414  return compare(str)==0;
415  }
416  bool operator==(const UT_String &str) const
417  {
418  return compare(str.myData)==0;
419  }
420  bool operator==(const UT_StringRef &str) const
421  {
422  return compare(str)==0;
423  }
424  bool operator!=(const char *str) const
425  {
426  return compare(str)!=0;
427  }
428  bool operator!=(const UT_String &str) const
429  {
430  return compare(str.myData)!=0;
431  }
432  bool operator!=(const UT_StringRef &str) const
433  {
434  return compare(str)!=0;
435  }
436  bool operator<(const char *str) const
437  {
438  return compare(str)<0;
439  }
440  bool operator<(const UT_String &str) const
441  {
442  return compare(str.myData)<0;
443  }
444  bool operator<(const UT_StringRef &str) const
445  {
446  return compare(str)<0;
447  }
448  bool operator<=(const char *str) const
449  {
450  return compare(str)<=0;
451  }
452  bool operator<=(const UT_String &str) const
453  {
454  return compare(str.myData)<=0;
455  }
456  bool operator<=(const UT_StringRef &str) const
457  {
458  return compare(str)<=0;
459  }
460  bool operator>(const char *str) const
461  {
462  return compare(str)>0;
463  }
464  bool operator>(const UT_String &str) const
465  {
466  return compare(str.myData)>0;
467  }
468  bool operator>(const UT_StringRef &str) const
469  {
470  return compare(str)>0;
471  }
472  bool operator>=(const char *str) const
473  {
474  return compare(str)>=0;
475  }
476  bool operator>=(const UT_String &str) const
477  {
478  return compare(str.myData)>=0;
479  }
480  bool operator>=(const UT_StringRef &str) const
481  {
482  return compare(str)>=0;
483  }
484 
485  /// Test whether the string is defined or not
486  SYS_SAFE_BOOL operator bool() const { return isstring(); }
487 
488  /// Return the edit distance between two strings.
489  /// See http://en.wikipedia.org/wiki/Levenshtein_distance for details.
490  /// allow_subst controls whether a substitution of a character with
491  /// another is a single operation, rather than two operations of
492  /// insert and delete.
493  int distance(const char *str,
494  bool case_sensitive = true,
495  bool allow_subst = true) const;
496 
497  operator const char *() const
498  { return (const char *)myData; }
499  operator char *()
500  { return myData; }
501 
502  const char *c_str() const { return buffer(); }
503  const char *buffer() const { return myData; }
504  const char *nonNullBuffer() const { return myData ? myData : ""; }
505 
506  char operator()(unsigned i) const
507  {
508  UT_ASSERT_P( isstring() );
509  UT_ASSERT_SLOW(i <= strlen(myData));
510  return myData[i];
511  }
512 
513  char &operator()(unsigned i)
514  {
515  harden();
516  return myData[i];
517  }
518 
519  // Prefer using write() since ideally the non-const operator() is removed
520  inline void write(unsigned i, char c)
521  {
522  hardenIfNeeded();
523  myData[i] = c;
524  }
525 
526  int toInt() const;
527  fpreal toFloat() const;
528 
529  /// Converts the contents of this UT_String to a std::string. Note that
530  /// std::string can't be constructed with a null pointer, so you can't
531  /// just write std::string s = ut_string.buffer();
532  std::string toStdString() const;
533 
534  //
535  // Here, we're finished with operators
536  //
537 
538  /// Return length of string
539  unsigned length(void) const
540  { return (myData) ? (unsigned)strlen(myData) : 0; }
541 
542  /// Return memory usage in bytes
543  int64 getMemoryUsage(bool inclusive=true) const
544  {
545  return (inclusive ? sizeof(*this) : 0)
546  + (!myIsReference ? (length() + 1)*sizeof(char) : 0);
547  }
548 
549  /// Find first occurrance of character. Returns NULL upon failure.
550  /// @{
551  char *findChar(int c)
552  { return myData ? strchr(myData, c) : nullptr; }
553  const char *findChar(int c) const
554  { return SYSconst_cast(*this).findChar(c); }
555  /// @}
556 
557  /// Find first occurrance of any character in @em str
558  /// @{
559  char *findChar(const char *str)
560  { return myData ? strpbrk(myData, str) : nullptr; }
561  const char *findChar(const char *str) const
562  { return SYSconst_cast(*this).findChar(str); }
563  /// @}
564 
565  /// Find last occurance of character
566  /// @{
567  char *lastChar(int c)
568  { return myData ? strrchr(myData, c) : nullptr; }
569  const char *lastChar(int c) const
570  { return SYSconst_cast(*this).lastChar(c); }
571  /// @}
572 
573  /// Return the number of occurrences of the specified character.
574  int countChar(int c) const;
575 
576  /// Count the occurrences of the string
577  int count(const char *str, bool case_sensitive = true) const;
578 
579  char *findNonSpace();
580  const char *findNonSpace() const;
581  const char *findWord(const char *word) const;
582  bool findString(const char *str, bool fullword,
583  bool usewildcards) const;
584  int changeWord(const char *from, const char *to, int all=1);
585  int changeString(const char *from, const char *to, bool fullword);
586  int changeQuotedWord(const char *from, const char *to,
587  int quote = '`', int all = 1);
588 
589  int findLongestCommonSuffix( const char *with ) const;
590 
591  /// Perform deep copy of the substring starting from @em index
592  /// for @em len characters into the specified UT_String.
593  /// If @em len is too long, then a substring starting from @em index to
594  /// the end of the string is copied.
595  /// Returns the length of the copied substring.
596  int substr(UT_String &buf, int index, int len=0) const;
597 
598  /// Determine if string can be seen as a single floating point number
599  unsigned isFloat(int skip_spaces = 0, int loose = 0, bool allow_underscore = false) const;
600  /// Determine if string can be seen as a single integer number
601  unsigned isInteger(int skip_spaces = 0) const;
602 
603  void toUpper()
604  {
605  char *ptr;
606  harden();
607  for (ptr=myData; *ptr; ptr++)
608  *ptr = (char)toupper(*ptr);
609  }
610  void toLower()
611  {
612  char *ptr;
613  harden();
614  for (ptr=myData; *ptr; ptr++)
615  *ptr = (char)tolower(*ptr);
616  }
617 
618 
619  /// Return last component of forward slash separated path string
620  ///
621  /// If there is a slash in the string, fileName() returns the string
622  /// starting after the slash. Otherwise, it returns the contents of
623  /// this string. Note that it returns a pointer into this string.
624  const char *fileName() const
625  {
626  const char *fname;
627 
628  if (!myData)
629  return 0;
630 
631  fname = lastChar('/');
632 
633  if (!fname)
634  {
635  fname = myData;
636  }
637  else
638  {
639  fname++; // Get past the /
640  }
641  return fname;
642  }
643  /// Return the extension of a file path string
644  /// @{
646  {
647  if( !isstring() )
648  return 0;
649 
650  char *dot = lastChar('.');
651  if (dot)
652  {
653  const char *slash = lastChar('/');
654 
655  if (slash && slash > dot)
656  dot = NULL;
657  }
658  return dot;
659  }
660  const char *fileExtension() const
661  {
662  return SYSconst_cast(*this).fileExtension();
663  }
664  /// @}
665 
666  /// Return whether the file extension matches. The extension passed in
667  /// should include the '.' separator. For example: @code
668  /// matchFileExtension(".jpg")
669  /// @endcode
670  bool matchFileExtension(const char *match_extension) const
671  {
672  const char *ext = fileExtension();
673  return ext && !SYSstrcasecmp(ext, match_extension);
674  }
675  /// Return path terminated just before the extension.
676  /// If the filename starts with '.' and no path is provided,
677  /// returns NULL
678  UT_String pathUpToExtension() const;
679 
680  /// Replace the file extension and return the new string
681  UT_String replaceExtension(const UT_String &new_ext) const;
682 
683  /// Split a path into @em dir_name and @em file_name, where @em file_name
684  /// is everything after the final slash (i.e. the same as fileName()).
685  /// Either part may be empty. Note that if the string starts with / and
686  /// only contains that one slash, the @em dir_name will be / and not blank.
687  /// @em dir_name and @em file_name will either be set to hardened strings
688  /// or an empty string.
689  void splitPath(UT_String &dir_name, UT_String &file_name) const;
690 
691  /// Decompose a filename into various parts
692  ///
693  /// parseNumberedFileName will breakup a filename into its various
694  /// parts: file = prefix$Fsuffix (note: suffix is
695  /// not the same as file extension.) 0 is returned if there is
696  /// no frame number. 'negative' allows -[frame] to be interpreted as a
697  /// negative number. 'fractional' allows [frame].[number] to be interpreted
698  /// as a fractional frame.
699  int parseNumberedFilename(UT_String &prefix,
700  UT_String &frame,
701  UT_String &suff,
702  bool negative = true,
703  bool fractional = false) const;
704 
705  bool isstring() const
706  { return (myData && *myData); }
707 
708  /// trimSpace() will remove all space characters (leading and following)
709  /// from a string. If the string consists of multiple words, the words will
710  /// be collapsed. The function returns 1 if space was trimmed.
711  int trimSpace(bool leaveSingleSpaceBetweenWords = false);
712 
713  /// A version of trimSpace() that only removes leading and following spaces
714  /// from a string, leaving any between words intact.
715  int trimBoundingSpace();
716 
717  /// strips out all characters found in 'chars'. The string length will be
718  /// reduced by the number of characters removed. The number of characters
719  /// removed is returned.
720  int strip(const char *chars);
721 
722  /// protectString() will modify the existing string to escape double quotes
723  /// and backslashes. It will only wrap the string in double quotes if
724  /// it has spaces in it. If 'protect_empty' is true, the string will
725  /// become '""', otherwise it will stay empty.
726  void protectString(bool protect_empty=false);
727 
728  /// protectPreQuotePythonStringLiteral() will modify the existing string
729  // to escape any non-printing characters, backslashes, and instances of the
730  /// specified delimiter. Unlike protectString(), it will not wrap the
731  /// string in quotes.
732  void protectPreQuotePythonStringLiteral(char delimiter='\'');
733 
734  /// returns true if the string begins and ends with a (non-escaped) quote
735  /// 'delimiter'.
736  bool isQuotedString(char delimiter='\'') const;
737 
738  /// makeQuotedString() is similar to protectString() except it returns a
739  /// new string instead of changing this string, it does wrap the string
740  /// in quotes, and it lets you use either ' or " as the delimiter.
741  /// The quoted string can also be optionally be made to escape non-printing
742  /// characters. The string that's returned is UT_String::ALWAYS_DEEP.
743  UT_String makeQuotedString(char delimiter='\'',
744  bool escape_nonprinting=false) const;
745 
746  /// makeSmartQuotedString() will use either ' or " as the delimiter to
747  /// avoid escaped quotes, using the default delimiter if it doesn't
748  /// matter. The quoted string can also be optionally be made to escape
749  /// non-printing characters. The string that's returned is
750  /// UT_String::ALWAYS_DEEP.
751  UT_String makeSmartQuotedString(char default_delimiter='\'',
752  bool escape_nonprinting=false) const;
753 
754  /// Expands standard control sequences ('\\n', '\\r', '\\t', '\\0') to their
755  /// corresponding ASCII values (10, 13, 9, 0, respectively).
756  /// If the expand_extended flag is enabled, an extended expansion is enabled
757  /// which adds hexadecimal, decimal and Unicode control sequence expansion.
758  /// Any values resulting from that expansion, which are outside the standard
759  /// ASCII range, will be encoded as UTF8-encoded control points.
760  void expandControlSequences(bool expand_extended = false);
761 
762  bool hasWhiteSpace() const;
763 
764  void removeTrailingSpace();
765  void removeTrailingChars(char chr);
766 
767  void removeTrailingDigits();
768 
769  // cshParse() does not need to harden the string. It does very robust
770  // parsing in the style of csh. It actually does better parsing than
771  // csh. Variable expansion & backquote expansion are done in the
772  // correct order for the correct arguments. One caveat is that the
773  // string cannot have \0377 (0xff) as a character in it.
774  //
775  // If there is an error in parsing, the error flag (if passed in) will be
776  // set to:
777  // 0 = no error
778  // 1 = line too long
779  int cshParse(char *argv[], int maxArgs,
780  void (*vlookup)(const char *, UT_String&)=UTvarLookup,
781  void (*elookup)(const char *, UT_String&)=UTexprLookup,
782  int *error = 0,
783  UT_StringCshIO *io=0);
784 
785  int cshParse(UT_WorkArgs &argv,
786  void (*vlookup)(const char *, UT_String&)=UTvarLookup,
787  void (*elookup)(const char *, UT_String&)=UTexprLookup,
788  int *error = 0,
789  UT_StringCshIO *io=0);
790 
791  // dosParse() uses the semi-braindead approach of ms-dos to argument
792  // parsing. That is, arguments are separated by a double quote or space
793  // (being a space or a tab). If 'preserve_backslashes' is set to
794  // false (the default), back-slashes are passed through verbatim, unless
795  // the following character is a double quote. Likewise, any pairs of
796  // back-slashes preceding a double quote are turned into single
797  // back-slashes.
798  int dosParse(UT_WorkArgs &argv, bool preserve_backslashes=false);
799  int dosParse(char *argv[], int maxArgs,
800  bool preserve_backslashes=false);
801 
802  /// Perform dos parsing modifying the buffer passed in. The args will be
803  /// stored as raw pointers into the given buffer
804  static int dosParse(char *buffer, UT_WorkArgs &args,
805  bool preserve_backslashes);
806 
807  // parse will insert nulls into the string.
808  // NB: The argv array is null terminated, thus the effective
809  // maximum number of arguments is one less than maxArgs.
810  // NB: The maxArgs variants are all deprecated, use UT_WorkArgs
811  // instead.
812  int parse(char *argv[], int maxArgs,
813  const char *quotes = "\"'", bool keep_quotes = false)
814  {
815  harden();
816  return parseInPlace(argv, maxArgs, quotes, keep_quotes);
817  }
818  int parse(UT_WorkArgs &argv, int start_arg = 0,
819  const char *quotes = "\"'", bool keep_quotes = false)
820  {
821  harden();
822  return parseInPlace(argv, start_arg, quotes, keep_quotes);
823  }
824  // Warning: the following methods insert nulls into the string without
825  // hardening.
826  int parseInPlace(char *argv[], int maxArgs,
827  const char *quotes = "\"'", bool keep_quotes = false);
828  int parseInPlace(UT_WorkArgs &argv, int start_arg = 0,
829  const char *quotes = "\"'", bool keep_quotes = false);
830 
831  // Splits the string at specific separator characters. Unlike the parse
832  // methods, the tokenize methods ignore quoting completely.
833  int tokenize(char *argv[], int maxArgs, char separator)
834  {
835  harden();
836  return tokenizeInPlace(argv, maxArgs, separator);
837  }
838  int tokenizeInPlace(char *argv[], int maxArgs, char separator);
839  int tokenize(UT_WorkArgs &argv, char separator)
840  {
841  harden();
842  return tokenizeInPlace(argv, separator);
843  }
844  int tokenizeInPlace(UT_WorkArgs &argv, char separator);
845  int tokenize(char *argv[], int maxArgs,
846  const char *separators = " \t\n")
847  {
848  harden();
849  return tokenizeInPlace(argv, maxArgs, separators);
850  }
851  int tokenizeInPlace(char *argv[], int maxArgs,
852  const char *separators = " \t\n");
853  int tokenize(UT_WorkArgs &argv, const char *separators = " \t\n")
854  {
855  harden();
856  return tokenizeInPlace(argv, separators);
857  }
858  int tokenizeInPlace(UT_WorkArgs &argv,
859  const char *separators = " \t\n");
860 
861  template<typename T>
862  int tokenize(T &list, const char *separators = " \t\n")
863  {
864  harden();
865  return tokenizeInPlace(list, separators);
866  }
867 
868  template<typename T>
869  int tokenizeInPlace(T &list,
870  const char *separators = " \t\n")
871  {
872  char *token;
873  char *context;
874 
875  if (!isstring())
876  return 0;
877  if (!(token = SYSstrtok(myData, separators, &context)))
878  return 0;
879 
880  list.append(token);
881 
882  while ((token = SYSstrtok(0, separators, &context)) != NULL)
883  list.append(token);
884 
885  return list.entries();
886  }
887 
888 
889  // Replaces the contents with variables expanded.
890  void expandVariables();
891 
892  // Functions to hash a string
894  {
895  return hash(myData);
896  }
897 
898  static SYS_FORCE_INLINE uint32 hash(const char *str, uint32 code = 0)
899  {
900  return SYSstring_hashseed(str, SYS_EXINT_MAX, code);
901  }
902 
903  // This does pattern matching on a string. The pattern may include
904  // the following syntax:
905  // ? = match a single character
906  // * = match any number of characters
907  // [char_set] = matches any character in the set
908  unsigned match(const char *pattern, int caseSensitive=1) const;
909 
910  // Similar to match() except it assumes that we're dealing with file paths
911  // so that it determines whether to do a case-sensitive match depending on
912  // the platform.
913  unsigned matchFile(const char *pattern) const;
914 
915  // Similar to match() but uses rsync style matching:
916  // * = match any number of characters up to a slash
917  // ** = match any number of characters, including a slash
918  unsigned matchPath(const char *pattern, int caseSensitive=1) const;
919 
920  // multiMatch will actually check multiple patterns all separated
921  // by the separator character: i.e. geo1,geo2,foot*
922  //
923  // NOTE: No pattern or may contain the separator
924  unsigned multiMatch(const char *pattern,
925  int caseSensitive, char separator) const;
926  unsigned multiMatch(const char *pattern, int caseSensitive = 1,
927  const char *separators = ", ",
928  bool *explicitlyExcluded = 0,
929  int *matchIndex = 0,
930  ut_PatternRecord *pattern_record=NULL) const;
931  unsigned multiMatch(const UT_StringMMPattern &pattern,
932  bool *explicitlyExcluded = 0,
933  int *matchIndex = 0,
934  ut_PatternRecord *pattern_record=NULL) const;
935 
936  // this method matches a pattern while recording any wildcard
937  // patterns used.
938  unsigned multiMatchRecord(const char *pattern, int maxpatterns,
939  char *singles, int &nsingles,
940  char **words, int &nwords,
941  int case_sensitive = 1,
942  const char *separators = ", ") const;
943  unsigned multiMatchRecord(const UT_StringMMPattern &pattern,
944  int maxpatterns,
945  char *singles, int &nsingles,
946  char **words, int &nwords) const;
947  unsigned multiMatchRecord(const char *pattern,
948  UT_StringHolder &singles,
949  UT_StringArray &words,
950  int case_sensitive = 1,
951  const char *separators = ", ") const;
952 
953  /// matchPattern(UT_WorkArgs &) assumes that the arguments contain the
954  /// components of a pattern to be matched against. The method returns
955  /// true if the pattern matches, false if it doesn't. This matching
956  /// process handles ^ expansion properly (and efficiently).
957  /// If the string doesn't match any components of the pattern, then the
958  /// assumed value is returned.
959  bool matchPattern(const UT_WorkArgs &pattern_args,
960  bool assume_match=false) const;
961 
962  static int multiMatchCheck(const char *pattern);
963  static int wildcardMatchCheck(const char *pattern);
964 
965  // Same as match but equivalent to "*pattern*"
966  bool contains(const char *pattern, bool case_sensitive=true) const;
967 
968  // Returns true if our string starts with the specified prefix.
969  bool startsWith(const char *prefix,
970  bool case_sensitive = true,
971  exint len = -1) const;
972 
973  // Returns true if our string ends with the specified suffix.
974  bool endsWith(const char *suffix,
975  bool case_sensitive = true,
976  exint len = -1) const;
977 
978  /// Pluralize an English noun ending (i.e. box->boxes or tube->tubes). The
979  /// ending must be lower case to be processed properly.
980  void pluralize();
981 
982  // Will parse strings like 1-10:2,3 and call func for every element
983  // implied. It will stop when the func returns 0 or the parsing
984  // is complete, in which case it returns 1.
985  // Parsing also allows secondary elements to be specified eg 3.4 0.12
986  // The secfunc is used to find the maximum index of secondary elements
987  // for each compound num. The elements are assumed to be
988  // non-negative integers.
989  int traversePattern(int max, void *data,
990  int (*func)(int num, int sec, void *data),
991  unsigned int (*secfunc)(int num,void *data)=0,
992  int offset=0) const;
993 
994  // Fast containment, assumes no special characters
995  const char *fcontain(const char *pattern, bool case_sensitive=true) const
996  {
997  if (!myData) return NULL;
998  return case_sensitive ? strstr(myData, pattern)
999  : SYSstrcasestr(myData, pattern);
1000  }
1001 
1002  // Given the match pattern which fits our contents, any assigned wildcards
1003  // are subsitituted. The wildcards may also be indexed.
1004  // Returns true if rename was successful.
1005  //
1006  // @note This code was adapted from CHOP_Rename::subPatterns() and
1007  // works the same way.
1008  //
1009  // eg. this = apple, match = a*le, replace = b* ---> bpp
1010  // this = a_to_b, match = *_to_*, replace = *(1)_to_*(0) ---> b_to_a
1011  bool patternRename(const char *match_pattern, const char *replace);
1012 
1013  // Given the name rule according to which a name consists of a base name
1014  // (char sequence ending in a non-digit) and a numerical suffix, the
1015  // following two methods return the base and the suffix respectively.
1016  // base() needs a string buffer and will return a const char* pointing to it.
1017  // base() always returns a non-zero pointer,
1018  // while suffix() returns 0 if no suffix is found.
1019  const char *base(UT_String &buf) const;
1020  const char *suffix(void) const;
1021 
1022  // incrementNumberedName will increment a name. If it has a numerical
1023  // suffix, that suffix is incremented. If not, "2" is appended to the
1024  // name. The preserve_padding parameter can be set to true so that zero
1025  // padding is preserved. Incrementing foo0009 will produce foo10 with
1026  // this parameter set to false, or foo0010 if it is set to true.
1027  void incrementNumberedName(bool preserve_padding = false);
1028 
1029  // setFormat is used to set how an outstream formats its ascii output.
1030  // So you can use printf style formatting. eg:
1031  // UT_String::setFormat(cout, "%08d") << 100;
1032  //
1033  // Note: Don't do:
1034  // cout << UT_String::setFormat(cout, "%08d") << 100;
1035  // ^^^^
1036  // Also: The formating changes (except for field width) are permanent,
1037  // so you'll have to reset them manually.
1038  //
1039  // TODO: A resetFormat, and a push/pop format pair.
1040  static std::ostream &setFormat(std::ostream &os, const char *fmt);
1041  std::ostream &setFormat(std::ostream &os);
1042 
1043  int replacePrefix(const char *oldpref,
1044  const char *newpref);
1045  int replaceSuffix(const char *oldsuffix,
1046  const char *newsuffix);
1047 
1048  // expandArrays will expand a series of tokens of the
1049  // form prefix[pattern]suffix into the names array
1050  //
1051  // Note: Each names[i] must be free'd after use
1052  // and label is used on the non-const parse method
1053  int expandArrays(char *names[], int max);
1054 
1055  // This routine will ensure no line is over the specified
1056  // number of columns. Offending lines will be wrapped at
1057  // the first spaceChar or cut at exactly cols if spaceChar
1058  // is not found.
1059  // It returns one if any changes were done.
1060  // It currently treats tabs as single characters which should be
1061  // changed.
1062  // It will break words at hyphens if possible.
1063  int format(int cols);
1064 
1065  // this method is similar to changeWord.. This method performs
1066  // a "dumb" substitution. Return's the # of substitutions
1067  int substitute( const char *find, const char *replacement,
1068  bool all = true );
1069 
1070  // This function replaces the character found with another character.
1071  int substitute( char find, char replacement, bool all = true );
1072 
1073  // this function removes the substring at pos and len, and inserts str
1074  // at pos. it returns the difference (new_length - old_length)
1075  int replace( int pos, int len, const char *str );
1076 
1077  // remove the first len characters of this string
1078  int eraseHead(int len)
1079  { return replace(0, len, ""); }
1080 
1081  // remove the last len characters of this string
1082  int eraseTail(int len)
1083  { return replace(length() - len, len, ""); }
1084 
1085  // remove the substring start at pos for len characters
1086  int erase(int pos = 0, int len = -1)
1087  {
1088  if (len < 0)
1089  len = length() - pos;
1090  return replace(pos, len, "");
1091  }
1092 
1093  // insert the given string at pos into this string
1094  int insert(int pos, const char *str)
1095  { return replace(pos, 0, str); }
1096 
1097  // Does a "smart" string compare which will sort based on numbered names.
1098  // That is "text20" is bigger than "text3". In a strictly alphanumeric
1099  // comparison, this would not be the case. Zero is only returned if both
1100  // strings are identical.
1101  static int compareNumberedString(const char *s1, const char *s2,
1102  bool case_sensitive=true,
1103  bool allow_negatives=false);
1104  static int qsortCmpNumberedString(const char *const*v1, const char *const*v2);
1105 
1106  // Like compare numbered strings, but it sorts better when there are
1107  // .ext extensions (i.e. it handles '.' as a special case)
1108  static int compareNumberedFilename(const char *s1, const char *s2,
1109  bool case_sensitive=false);
1110  static int qsortCmpNumberedFilename(const char *const*v1, const char *const*v2);
1111 
1112  /// Compare two version strings which have numbered components separated by
1113  /// dots. eg. "X.Y.Z". Assumes the components go from most to least
1114  /// significant in left to right order.
1115  static int compareVersionString(const char *s1, const char *s2);
1116 
1117  /// Given a path, set the value of the string to the program name. For
1118  /// example: @code
1119  /// str.extractProgramName(argv[0]);
1120  /// str.extractProgramName("c:/Path/program.exe");
1121  /// str.extractProgramName("/usr/bin/program");
1122  /// @endcode
1123  /// This will extract the last path component. Program names may also have
1124  /// their extensions stripped. For example ".exe" on Windows and "-bin" to
1125  /// strip the Houdini wrappers on other platforms.
1126  ///
1127  /// @note The path should be normalized to have forward slashes as the path
1128  /// separator.
1129  void extractProgramName(const char *path,
1130  bool strip_extension=true,
1131  bool normalize_path=true);
1132 
1133  /// Given a path, check to see whether the program name matches the
1134  /// expected. For example: @code
1135  /// if (UT_String::matchProgramname(argv[0], "houdini"))
1136  /// if (UT_String::matchProgramname("c:/Path/houdini.exe", "houdini"))
1137  /// if (UT_String::matchProgramname("/usr/bin/houdini", "houdini"))
1138  /// @endcode
1139  /// The matching is always case-insensitive.
1140  ///
1141  /// @note The path should be normalized to have forward slashes as the path
1142  /// separator.
1143  static bool matchProgramName(const char *path, const char *expected,
1144  bool normalize_path=false);
1145 
1146  /// Convert a path to a "normalized" path. That is, all back-slashes will
1147  /// be converted to forward slashes. On some operating systems, this will
1148  /// leave the string unchanged.
1149  void normalizePath();
1150 
1151  // A very fast integer to string converter. This is faster (at least on
1152  // SGI) than using sprintf("%d"). About two to three times as fast. Both
1153  // of these methods return the length of the string generated.
1154  static int itoa(char *str, int64 i);
1155  static int utoa(char *str, uint64 i);
1156 
1157  // Versions of the above functions which set into this string object
1158  void itoa(int64 i);
1159  void utoa(uint64 i);
1160 
1161  // A reader-friendly version of itoa. This places commas appropriately
1162  // to ensure the person can pick out the kilo points easily.
1163  // This can handle numbers up to 999,999,999,999,999,999.
1164  void itoa_pretty(int64 val);
1165 
1166  /// Convert the given time delta (in milliseconds)
1167  /// to a reader-friendly string in days, hours, minutes, and seconds.
1168  void timeDeltaToPrettyString(double time_ms);
1169 
1170  /// Convert the given time delta (in milliseconds)
1171  /// to a reader-friendly string in milliseconds.
1172  void timeDeltaToPrettyStringMS(double time_ms);
1173 
1174  // Do an sprintf into this string. This method will allocate exactly the
1175  // number of bytes required for the final string. If the format string is
1176  // bad, isstring() will return false afterwards.
1177  int sprintf(const char *fmt, ...) SYS_PRINTF_CHECK_ATTRIBUTE(2, 3);
1178 
1179  // This will change the string into a valid C style variable name.
1180  // All non-alpha numerics will be converted to _.
1181  // If the first letter is a digit, it is prefixed with an _.
1182  // This returns 0 if no changes occurred, 1 if something had to
1183  // be adjusted.
1184  // Note that this does NOT force the name to be non-zero in length.
1185  // The safechars parameter is a string containing extra characters
1186  // that should be considered safe. These characters are not
1187  // converted to underscores.
1188  int forceValidVariableName(const char *safechars = NULL);
1189  // Returns true if the string matches a C-style varaible name.
1190  // The safechars are not allowed to be the start.
1191  // Matching forceValid, empty strings are considered valid!
1192  bool isValidVariableName(const char *safechars = NULL) const;
1193 
1194  // This will force all non-alphanumeric characters to be underscores.
1195  // Returns true if any changes were required.
1196  bool forceAlphaNumeric();
1197 
1198  // This function will calculate the relative path to get from src to dest.
1199  // If file_path is false, this method assume it is dealing with node paths.
1200  // If file_path is true, it will also deal with Windows drive letters and
1201  // UNC paths.
1202  void getRelativePath(const char *src_fullpath,
1203  const char *dest_fullpath,
1204  bool file_path = false);
1205 
1206  // This function takes two absolute paths and returns the length of the
1207  // longest common path prefix, up to and including the last '/'. This
1208  // means, for instance, that if fullpath1[len1-1] == '/' then all of
1209  // fullpath1 is eligible as a common prefix.
1210  // NB: This function DOES NOT handle NT style drive names! It is currently
1211  // only used for op paths. If you want to add support for this, you
1212  // should add another default parameter to do this.
1213  static int findLongestCommonPathPrefix(const char *fullpath1, int len1,
1214  const char *fullpath2, int len2);
1215 
1216  // This function tests whether we are an absolute path, and returns true or
1217  // false depending on whether we are.
1218  bool isAbsolutePath(bool file_path=false) const;
1219 
1220  // This function assumes that we are an absolute path and will remove all
1221  // un-necessary components from it as long as we remain an absolute path.
1222  // We return false if an error was encountered, in which case the results
1223  // are unpredictable.
1224  bool collapseAbsolutePath(bool file_path=false);
1225 
1226  // This function will make sure that the string is at most max_length
1227  // characters long. If the string is longer than that, it will
1228  // replace the middle of the string by "...". Returns true if the string
1229  // has changed and false otherwise. max_length must be greater than 3.
1230  bool truncateMiddle(int max_length);
1231 
1232  // This function is an abomination when you can just write:
1233  // UT_String foo("");
1234  // ...
1235  // if (foo.isstring())
1236  // ...
1237  // Avoid using it and do not write functions that return "const UT_String&"
1238  static const UT_String &getEmptyString();
1239 
1240  /// Count the number of valid characters in the : modifier for variable
1241  /// expansion. For example, the string ":r" will return 2, the string
1242  /// ":r:t" will return 4, the string ":z" will return 0. These use the csh
1243  /// expansion modifiers.
1244  ///
1245  /// If the string doesn't start with a ':', the method will return 0.
1246  static int countCshModifiers(const char *src);
1247 
1248  /// Applies a "csh" style modifier string to this string. For example, a
1249  /// modifier string of ":e" would replace the string with the file
1250  /// extension of the string.
1251  ///
1252  /// Returns true if any modifications were performed
1253  bool applyCshModifiers(const char *modifiers);
1254 
1255 
1256  /// This will remove the range from a string of the form foo$Fbar.ext (#-#)
1257  /// and return the first number from the range. If there is only 1 range
1258  /// number, it will be returned. If there is no range, 0 is returned.
1259  /// The returned string is hardened.
1260  UT_String removeRange ();
1261 
1262  /// This will format a value to represent a given size in bytes, kilobytes,
1263  /// megabytes, etc.
1264  void formatByteSize(exint size, int digits=2);
1265 
1266  // UTF-8 helpers
1267 
1268  /// Returns the number of Unicode codepoints in the string, assuming it's
1269  /// encoded as UTF-8.
1270  int getCodePointCount() const;
1271 
1272  /// Returns a list of Unicode code points from this string.
1273  void getAsCodePoints(UT_Int32Array &cp_list) const;
1274 
1275  /// Friend specialization of std::swap() to use UT_String::swap()
1276  /// @internal This is needed because standard std::swap() implementations
1277  /// will try to copy the UT_String objects, causing hardened strings to
1278  /// become weak.
1279  friend void swap(UT_String& a, UT_String& b) { a.swap(b); }
1280 
1281 private:
1282  template <typename OSTREAM>
1283  void saveInternal(OSTREAM &os, bool binary) const;
1284 
1285  void freeData();
1286 
1287  /// implements a few csh-style modifiers.
1288  /// @param mod pointer to a string starting with the modifier to apply.
1289  /// so, to apply a global substitute modifier :gs/l/r/
1290  /// mod should be: s/l/r
1291  /// @param all True if all possible modifications should be
1292  /// (recursively) performed.
1293  /// Otherwise, at most one modification is applied.
1294  /// @return whether any modification was performed
1295  bool applyNextModifier(const char *mod, bool all);
1296 
1297 
1298  /// Sets myIsReference to false and copies the other_string into myData,
1299  /// but attempts to avoid unnecessary memory reallocations. Frees up
1300  /// any previous data, if necessary. If other_string is NULL, the call
1301  /// is equivalent to freeData().
1302  void doSmartCopyFrom(const char* other_string);
1303 
1304  static int compareNumberedStringInternal(const char *s1, const char *s2,
1305  bool case_sensitive,
1306  bool allow_negatives,
1307  bool dot_first);
1308 
1309  static SYS_FORCE_INLINE void utStrFree(char *str)
1310  {
1311 #if defined(UT_DEBUG) && !defined(_WIN32)
1312  if (str)
1313  ::memset((void *)str, 0xDD, ::strlen(str) + 1);
1314 #endif
1315  ::free((void *)str);
1316  }
1317 
1318  char *myData;
1319  bool myIsReference:1,
1320  myIsAlwaysDeep:1;
1321 
1322  /// This operator saves the string to the stream via the string's
1323  /// saveAscii() method, protecting any whitespace (by adding quotes),
1324  /// backslashes or quotes in the string.
1325  friend UT_API std::ostream &operator<<(std::ostream &os, const UT_String &d);
1326  friend UT_API UT_OStream &operator<<(UT_OStream &os, const UT_String &d);
1327 
1328  friend class UT_API UT_StringRef;
1329 };
1330 
1331 /// Creates a shallow wrapper around a string for calling UT_String's many
1332 /// const algorithms.
1334 {
1335 public:
1336  // We only have a single constructor which is always shallow.
1338  UT_StringWrap(const char *str)
1339  : UT_String(str)
1340  {}
1341  // It seems necessary on MSVC to forceinline the empty constructor in order
1342  // to have it inlined.
1345  {}
1346 
1347  // Manually wrap methods that have non-const overloads or return non-const
1348  // pointers.
1349  char operator()(unsigned i) const { return UT_String::operator()(i); }
1350  const char *findChar(int c) const { return UT_String::findChar(c); }
1351  const char *findChar(const char *str) const { return UT_String::findChar(str); }
1352  const char *findNonSpace() const { return UT_String::findNonSpace(); }
1353  const char *lastChar(int c) const { return UT_String::lastChar(c); }
1354 
1355  using UT_String::operator==;
1356  using UT_String::operator!=;
1357  using UT_String::c_str;
1358  using UT_String::length;
1359 
1360  using UT_String::base;
1361  using UT_String::compare;
1362  using UT_String::contains;
1363  using UT_String::count;
1364  using UT_String::countChar;
1365  using UT_String::distance;
1366  using UT_String::endsWith;
1367  using UT_String::equal;
1368  using UT_String::fcontain;
1370  using UT_String::fileName;
1371  using UT_String::findWord;
1372  using UT_String::findString;
1375  using UT_String::isFloat;
1376  using UT_String::isInteger;
1378  using UT_String::isstring;
1379  using UT_String::match;
1380  using UT_String::matchFile;
1382  using UT_String::matchPath;
1384  using UT_String::multiMatch;
1388  using UT_String::save;
1389  using UT_String::saveAscii;
1390  using UT_String::saveBinary;
1391  using UT_String::splitPath;
1392  using UT_String::startsWith;
1393  using UT_String::substr;
1394  using UT_String::suffix;
1395  using UT_String::toFloat;
1396  using UT_String::toInt;
1397 };
1398 
1401 {
1402  if (!myIsReference && myData)
1403  utStrFree(myData);
1404 }
1405 
1407 void
1408 UT_String::freeData()
1409 {
1410  if (myData)
1411  {
1412  if (!myIsReference)
1413  utStrFree(myData);
1414  myData = 0;
1415  }
1416 }
1417 
1418 inline void
1420 {
1421  // We can't use UTswap because it doesn't work with bit fields.
1422  bool temp = myIsReference;
1423  myIsReference = other.myIsReference;
1424  other.myIsReference = temp;
1425 
1426  char *tmp_data = myData;
1427  myData = other.myData;
1428  other.myData = tmp_data;
1429 
1430  if (myIsAlwaysDeep)
1431  harden();
1432 
1433  if (other.myIsAlwaysDeep)
1434  other.harden();
1435 }
1436 
1438 public:
1439  UT_String myOut; // Points to argument following '>'
1440  UT_String myErr; // Points to argument following '>&'
1441  UT_String myIn; // Points to argument following '<'
1442  short myDoubleOut; // If the argument is '>>' or '>>&'
1443  short myDoubleIn; // If the argument is '<<'
1444 };
1445 
1446 UT_API std::ostream & do_setformat(std::ostream &os, const char fmt[]);
1447 
1448 /// Does a "smart" string compare which will sort based on numbered names.
1449 /// That is "text20" is bigger than "text3". In a strictly alphanumeric
1450 /// comparison, this would not be the case.
1452 {
1453  bool operator()(const char *s1, const char *s2) const
1454  {
1455  return UT_String::compareNumberedString(s1, s2) < 0;
1456  }
1457 
1458  bool operator()(const std::string &s1, const std::string &s2) const
1459  {
1460  return operator()(s1.c_str(), s2.c_str());
1461  }
1462 };
1463 
1464 #endif
GLdouble s
Definition: glew.h:1390
UT_String & operator+=(const char *str)
Definition: UT_String.h:339
static SYS_FORCE_INLINE uint32 hash(const char *str, uint32 code=0)
Definition: UT_String.h:898
int distance(const char *str, bool case_sensitive=true, bool allow_subst=true) const
char * lastChar(int c)
Definition: UT_String.h:567
vint4 max(const vint4 &a, const vint4 &b)
Definition: simd.h:4703
unsigned matchPath(const char *pattern, int caseSensitive=1) const
std::string sprintf(const char *fmt, const Args &...args)
Definition: strutil.h:136
bool operator!=(const char *str) const
Definition: UT_String.h:424
UT_String & operator+=(const UT_String &str)
Definition: UT_String.h:371
string_view OIIO_API strip(string_view str, string_view chars=string_view())
unsigned match(const char *pattern, int caseSensitive=1) const
bool operator>=(const UT_StringRef &str) const
Definition: UT_String.h:480
int count(const char *str, bool case_sensitive=true) const
Count the occurrences of the string.
GLsizeiptr size
Definition: glew.h:1681
GLenum src
Definition: glew.h:2410
bool endsWith(const char *suffix, bool case_sensitive=true, exint len=-1) const
GLuint const GLchar * name
Definition: glew.h:1814
bool matchFileExtension(const char *match_extension) const
Definition: UT_String.h:670
void swap(UT_String &other)
Definition: UT_String.h:1419
void saveAscii(UT_OStream &os) const
Definition: UT_String.h:299
bool operator()(const char *s1, const char *s2) const
Definition: UT_String.h:1453
const Args & args
Definition: printf.h:628
T negative(const T &val)
Return the unary negation of the given value.
Definition: Math.h:108
GLuint index
Definition: glew.h:1814
UT_String(UT_String &&str) SYS_NOEXCEPT
Definition: UT_String.h:167
const char * lastChar(int c) const
Definition: UT_String.h:1353
bool operator<=(const char *str) const
Definition: UT_String.h:448
unsigned length(void) const
Return length of string.
Definition: UT_String.h:539
UT_String myIn
Definition: UT_String.h:1441
GLuint const GLfloat * val
Definition: glew.h:2794
fpreal toFloat() const
bool operator==(const char *str) const
Definition: UT_String.h:412
bool operator<=(const UT_String &str) const
Definition: UT_String.h:452
int toInt() const
char * fileExtension()
Definition: UT_String.h:645
GLboolean GLboolean GLboolean GLboolean a
Definition: glew.h:9477
bool isHard() const
Returns whether this string is hardened already.
Definition: UT_String.h:242
SYS_FORCE_INLINE T * SYSconst_cast(const T *foo)
Definition: SYS_Types.h:131
UT_String makeQuotedString(char delimiter='\'', bool escape_nonprinting=false) const
const char * findChar(const char *str) const
Definition: UT_String.h:561
void swap(T &lhs, T &rhs)
Definition: pugixml.cpp:7172
void write(unsigned i, char c)
Definition: UT_String.h:520
bool operator==(const UT_String &str) const
Definition: UT_String.h:416
int tokenize(char *argv[], int maxArgs, char separator)
Definition: UT_String.h:833
#define UT_API
Definition: UT_API.h:13
const char * fileExtension() const
Definition: UT_String.h:660
bool isAbsolutePath(bool file_path=false) const
bool findString(const char *str, bool fullword, bool usewildcards) const
char * findChar(int c)
Definition: UT_String.h:551
#define SYS_EXINT_MAX
Definition: SYS_Types.h:176
char & operator()(unsigned i)
Definition: UT_String.h:513
bool equal(const char *str, bool case_sensitive=true) const
Definition: UT_String.h:399
const char * findNonSpace() const
Definition: UT_String.h:1352
int compare(const char *str, bool case_sensitive=true) const
Definition: UT_String.h:379
unsigned isInteger(int skip_spaces=0) const
Determine if string can be seen as a single integer number.
void clear()
Reset the string to the default constructor.
Definition: UT_String.h:309
bool isAlwaysDeep() const
Definition: UT_String.h:204
const char * c_str() const
Definition: UT_String.h:502
int compare(const UT_String &str, bool case_sensitive=true) const
Definition: UT_String.h:393
bool operator<(const char *str) const
Definition: UT_String.h:436
bool operator<(const UT_StringRef &str) const
Definition: UT_String.h:444
const char * suffix(void) const
UT_API void UTexprLookup(const char *name, UT_String &result)
bool contains(const char *pattern, bool case_sensitive=true) const
int tokenize(UT_WorkArgs &argv, const char *separators=" \t\n")
Definition: UT_String.h:853
std::ostream & operator<<(std::ostream &ostr, const DataType &a)
Definition: DataType.h:133
UT_String(UT_AlwaysDeepType, const std::string &str)
Construct UT_String from a std::string, using ALWAYS_DEEP semantics.
Definition: UT_String.h:151
void hardenIfNeeded(const char *s)
Take shallow copy and make it deep.
Definition: UT_String.h:232
long long int64
Definition: SYS_Types.h:111
const char * buffer() const
Definition: UT_String.h:503
CompareResults OIIO_API compare(const ImageBuf &A, const ImageBuf &B, float failthresh, float warnthresh, ROI roi={}, int nthreads=0)
unsigned multiMatch(const char *pattern, int caseSensitive, char separator) const
A utility class to do read-only operations on a subset of an existing string.
Definition: UT_StringView.h:32
GLfloat GLfloat GLfloat v2
Definition: glew.h:1856
SYS_FORCE_INLINE uint32 hash() const
Definition: UT_String.h:893
bool operator==(const UT_StringRef &str) const
Definition: UT_String.h:420
unsigned long long uint64
Definition: SYS_Types.h:112
char operator()(unsigned i) const
Definition: UT_String.h:1349
bool operator>=(const char *str) const
Definition: UT_String.h:472
UT_String & operator=(UT_String &&str)
Definition: UT_String.h:175
int tokenizeInPlace(T &list, const char *separators=" \t\n")
Definition: UT_String.h:869
OIIO_FORCEINLINE const vint4 & operator+=(vint4 &a, const vint4 &b)
Definition: simd.h:4246
#define SYS_SAFE_BOOL
Definition: SYS_Compiler.h:62
std::string OIIO_API replace(string_view str, string_view pattern, string_view replacement, bool global=false)
int64 exint
Definition: SYS_Types.h:120
bool operator!=(const UT_String &str) const
Definition: UT_String.h:428
GLuint buffer
Definition: glew.h:1680
GLint GLenum GLsizei GLint GLsizei const void * data
Definition: glew.h:1379
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:134
bool operator>=(const UT_String &str) const
Definition: UT_String.h:476
const GLuint GLenum const void * binary
Definition: glew.h:3502
#define SYS_PRINTF_CHECK_ATTRIBUTE(string_index, first_to_check)
Definition: SYS_Types.h:432
char * findNonSpace()
fpreal64 dot(const CE_VectorT< T > &a, const CE_VectorT< T > &b)
Definition: CE_Vector.h:218
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
bool operator>(const UT_String &str) const
Definition: UT_String.h:464
char * SYSstrtok(char *string, const char *delimit, char **context)
Definition: SYS_String.h:113
char * findChar(const char *str)
Definition: UT_String.h:559
GLuint const GLuint * names
Definition: glew.h:2690
#define UT_ASSERT_SLOW(ZZ)
Definition: UT_Assert.h:133
const GLfloat * c
Definition: glew.h:16296
GLuint GLsizei GLsizei * length
Definition: glew.h:1825
const char * findChar(int c) const
Definition: UT_String.h:553
GLsizei GLsizei GLfloat distance
Definition: glew.h:13640
SYS_FORCE_INLINE UT_String(const char *str=0)
Construct UT_String from a C string, using shallow semantics.
Definition: UT_String.h:97
void harden()
Take shallow copy and make it deep.
Definition: UT_String.h:213
void saveAscii(std::ostream &os) const
Definition: UT_String.h:298
bool equal(const UT_StringRef &str, bool case_sensitive=true) const
Definition: UT_String.h:407
bool equal(const UT_String &str, bool case_sensitive=true) const
Definition: UT_String.h:403
typedef int(WINAPI *PFNWGLRELEASEPBUFFERDCARBPROC)(HPBUFFERARB hPbuffer
SYS_FORCE_INLINE bool UTisdigit(char c)
Definition: UT_String.h:62
void setAlwaysDeep(bool deep)
Make a string always deep.
Definition: UT_String.h:188
bool operator>(const UT_StringRef &str) const
Definition: UT_String.h:468
const char * findChar(const char *str) const
Definition: UT_String.h:1351
bool operator()(const std::string &s1, const std::string &s2) const
Definition: UT_String.h:1458
int eraseHead(int len)
Definition: UT_String.h:1078
void toUpper()
Definition: UT_String.h:603
void adopt(UT_String &str)
Definition: UT_String.h:285
SYS_FORCE_INLINE ~UT_StringWrap()
Definition: UT_String.h:1344
const char * findWord(const char *word) const
bool operator>(const char *str) const
Definition: UT_String.h:460
int64 getMemoryUsage(bool inclusive=true) const
Return memory usage in bytes.
Definition: UT_String.h:543
void saveBinary(std::ostream &os) const
Save string to binary stream.
Definition: UT_String.h:294
bool OIIO_API contains(string_view a, string_view b)
Does 'a' contain the string 'b' within it?
GLsizei const GLchar *const * path
Definition: glew.h:6461
int tokenize(char *argv[], int maxArgs, const char *separators=" \t\n")
Definition: UT_String.h:845
bool isValidVariableName(const char *safechars=NULL) const
GLdouble GLdouble GLdouble b
Definition: glew.h:9122
char * steal(void)
Definition: UT_String.h:253
static int compareNumberedString(const char *s1, const char *s2, bool case_sensitive=true, bool allow_negatives=false)
short myDoubleIn
Definition: UT_String.h:1443
void adopt(char *s)
Definition: UT_String.h:275
GLsizei const GLchar *const * string
Definition: glew.h:1844
double fpreal
Definition: SYS_Types.h:276
UT_String pathUpToExtension() const
GLenum func
Definition: glcorearb.h:782
int substr(UT_String &buf, int index, int len=0) const
basic_printf_context_t< buffer >::type context
Definition: printf.h:631
SYS_FORCE_INLINE bool UTisstring(const char *s)
Definition: UT_String.h:57
GLuint num
Definition: glew.h:2690
void save(std::ostream &os, bool binary) const
Save string to stream. Saves as binary if binary is true.
const void * ptr(const T *p)
Definition: format.h:3292
short myDoubleOut
Definition: UT_String.h:1442
#define SYS_NOEXCEPT
Definition: SYS_Compiler.h:55
GLint GLint GLsizei GLsizei GLsizei GLint GLenum format
Definition: glew.h:1254
char * SYSstrcasestr(const char *haystack, const char *needle)
Replacement for strcasestr, since no equivalent exists on Win32.
Definition: SYS_String.h:248
int SYSstrcasecmp(const char *a, const char *b)
Definition: SYS_String.h:227
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat s1
Definition: glew.h:12681
int parseNumberedFilename(UT_String &prefix, UT_String &frame, UT_String &suff, bool negative=true, bool fractional=false) const
FMT_CONSTEXPR bool find(Ptr first, Ptr last, T value, Ptr &out)
Definition: format.h:2104
GLuint GLuint GLsizei count
Definition: glew.h:1253
UT_AlwaysDeepType
Definition: UT_String.h:85
const char * lastChar(int c) const
Definition: UT_String.h:569
UT_String myOut
Definition: UT_String.h:1439
UT_String myErr
Definition: UT_String.h:1440
GLuint64EXT * result
Definition: glew.h:14007
bool isstring() const
Definition: UT_String.h:705
int findLongestCommonSuffix(const char *with) const
void hardenIfNeeded()
Take shallow copy and make it deep.
Definition: UT_String.h:222
const char * findChar(int c) const
Definition: UT_String.h:1350
unsigned matchFile(const char *pattern) const
UT_String(UT_AlwaysDeepType, const char *str=0)
Construct UT_String from a C string, using ALWAYS_DEEP semantics.
Definition: UT_String.h:144
bool operator<(const UT_String &str) const
Definition: UT_String.h:440
int erase(int pos=0, int len=-1)
Definition: UT_String.h:1086
#define const
Definition: zconf.h:214
int tokenize(UT_WorkArgs &argv, char separator)
Definition: UT_String.h:839
GLubyte * pattern
Definition: glew.h:5711
bool operator<=(const UT_StringRef &str) const
Definition: UT_String.h:456
void splitPath(UT_String &dir_name, UT_String &file_name) const
unsigned isFloat(int skip_spaces=0, int loose=0, bool allow_underscore=false) const
Determine if string can be seen as a single floating point number.
char operator()(unsigned i) const
Definition: UT_String.h:506
GLenum GLuint GLsizei const GLchar * buf
Definition: glew.h:2580
int parse(UT_WorkArgs &argv, int start_arg=0, const char *quotes="\"'", bool keep_quotes=false)
Definition: UT_String.h:818
const char * base(UT_String &buf) const
void removeLast()
Remove the last character.
Definition: UT_String.h:325
bool all(const vbool4 &v)
Definition: simd.h:3371
UT_API void UTvarLookup(const char *name, UT_String &result)
SYS_FORCE_INLINE UT_StringWrap(const char *str)
Definition: UT_String.h:1338
UT_String(const std::string &str)
Construct UT_String from a std::string, always doing a deep copy. The result will only be a UT_Always...
Definition: UT_String.h:122
int eraseTail(int len)
Definition: UT_String.h:1082
const char * fileName() const
Definition: UT_String.h:624
GLfloat GLfloat v1
Definition: glew.h:1852
int countChar(int c) const
Return the number of occurrences of the specified character.
int parse(char *argv[], int maxArgs, const char *quotes="\"'", bool keep_quotes=false)
Definition: UT_String.h:812
UT_API std::ostream & do_setformat(std::ostream &os, const char fmt[])
bool startsWith(const char *prefix, bool case_sensitive=true, exint len=-1) const
int tokenize(T &list, const char *separators=" \t\n")
Definition: UT_String.h:862
const char * nonNullBuffer() const
Definition: UT_String.h:504
GLenum GLsizei len
Definition: glew.h:7752
void toLower()
Definition: UT_String.h:610
unsigned int uint32
Definition: SYS_Types.h:40
int insert(int pos, const char *str)
Definition: UT_String.h:1094
GLintptr offset
Definition: glew.h:1682
const char * fcontain(const char *pattern, bool case_sensitive=true) const
Definition: UT_String.h:995
bool operator!=(const UT_StringRef &str) const
Definition: UT_String.h:432
unsigned multiMatchRecord(const char *pattern, int maxpatterns, char *singles, int &nsingles, char **words, int &nwords, int case_sensitive=1, const char *separators=", ") const