HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_WorkBuffer.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: UT_WorkBuffer.h ( Utility Library, C++ )
7  *
8  * COMMENTS:
9  * A growable string buffer that can be written into. A UT_String
10  * can be created with the contents of this buffer by calling
11  * copyIntoString().
12  *
13  * It's important that a non-const version of the raw buffer is not
14  * accessible since users could write past the end of the allocated
15  * buffer. Also note that the buffer location can change as it grows,
16  * so don't keep pointers to the buffer around.
17  *
18  * Most of the time, you want to allocate an object of this class on
19  * the stack and not on the heap.
20  *
21  * The buffer is kept null terminated by default. Functions exist
22  * to verify this. Note that the "length" of the buffer is the
23  * same as strlen - ie: it ignores the null termination!!!
24  */
25 
26 #ifndef __UT_WorkBuffer_h__
27 #define __UT_WorkBuffer_h__
28 
29 #include "UT_API.h"
30 
31 #include "UT_Assert.h"
32 #include "UT_Format.h"
33 #include "UT_NonCopyable.h"
34 #include "UT_String.h"
35 #include "UT_StringArray.h"
36 #include "UT_StringHolder.h"
37 #include "UT_StringView.h"
38 #include "UT_Swap.h"
39 #include "UT_Unicode.h"
40 
41 #include <SYS/SYS_Inline.h>
42 #include <SYS/SYS_Types.h>
43 
44 #include <iosfwd>
45 
46 #include <stdlib.h>
47 #include <stdio.h>
48 #include <string.h>
49 
50 
51 // The default page size on most systems is 4K. We choose a default
52 // buffer size less than half of that in the hopes that if we have
53 // functions with 2 work buffers or additional variables on the stack that we
54 // may not have to allocate multiple stack pages.
55 #define UT_INITIAL_BUFFER_SIZE 2000
56 
57 class UT_WorkArgs;
58 class UT_IStream;
59 
60 template <typename T>
61 class UT_Array;
62 
64 {
65 public:
66  typedef char value_type;
67 
70  : myBuffer(myStackBuffer)
71  {
72  // Default termination.
73  myBuffer[0] = '\0';
74  }
75 
77  explicit UT_WorkBuffer(const char *str)
78  : myBuffer(myStackBuffer)
79  {
80  myBuffer[0] = '\0';
81  append(str);
82  }
83 
85  explicit UT_WorkBuffer(const char *data, exint size)
86  : myBuffer(myStackBuffer)
87  {
88  myBuffer[0] = '\0';
89  append(data, size);
90  }
91 
93  explicit UT_WorkBuffer(const UT_String &str)
94  : myBuffer(myStackBuffer)
95  {
96  myBuffer[0] = '\0';
97  append(str);
98  }
99 
101  explicit UT_WorkBuffer(const UT_StringRef &str)
102  : myBuffer(myStackBuffer)
103  {
104  myBuffer[0] = '\0';
105  append(str);
106  }
107 
110  : myBuffer(myStackBuffer)
111  {
112  myBuffer[0] = '\0';
113  append(other);
114  }
115 
118  {
119  if (myBuffer != myStackBuffer)
120  {
121  UT_ASSERT(myBuffer);
122  ::free(myBuffer);
123  }
124  }
125 
126  /// Create a work buffer to contain a UTF-16LE (little endian)
127  /// representation of the incoming UTF-8 string.
128  /// The work buffer will be zero-word terminated.
130  static UT_WorkBuffer
131  widen(const utf8 *str)
132  {
133  return UT_WorkBuffer(do_widen(), str);
134  }
135 
136  /// Create a work buffer to contain the UTF-8 representation of the
137  /// incoming UTF-16 string. The UTF-16 string is assumed to be
138  /// little-endian, unless prefixed with BOM that indicates endianness.
139  /// The incoming string should be zero-word terminated.
141  static UT_WorkBuffer
142  narrow(const utf16 *str)
143  {
144  return UT_WorkBuffer(do_narrow(), str);
145  }
146 
147  // It's important that there is no non-const access method to the buffer.
148  // Also note that the pointer to the buffer can change if the buffer
149  // grows.
151  const char *buffer() const { return myBuffer; }
152  /// Alias for the common string access across all string types (including
153  /// standard library)
155  const char *data() const { return buffer(); }
156 
157  // Having said that, if you need a non-const pointer you must lock
158  // the string. This prohibits ANY update which changes the myLength
159  // variable (and thus potentially a realloc)
160  // You must release the buffer before any such changes.
161  // The work buffer continues to own the memory and will free it when
162  // it goes out of scope so don't think this is the same as a "steal"
163  // in UT_String.
164  // Currently, to ensure people couple their locks & releases,
165  // it asserts there is no unaccounted locks on death. This is so
166  // people who think it is steal find out otherwise.
167  // Offset is where in the string to get the pointer from.
168  // This is only to be used when absolutely necessary.
169  // When releasing, if you have a string buffer, and you have modified the
170  // length, you should set the recompute_length flag to 1. This will adjust
171  // the internal length variable so that further concatenations will work
172  // properly.
173  // The reserve_bytes parameter tells the lock to ensure that there are at
174  // least that many bytes in the locked buffer.
175  // NOTE: Unlike other UT_WorkBuffer functions, it is the user's
176  // responsibility to maintain a NUL termination guarantee when manipulating
177  // the raw buffer.
178  char *lock(exint offset = 0, exint reserve_bytes=0);
179  void release(bool recompute_length = false);
180  void releaseSetLength(exint new_length);
181 
183  exint getAllocatedSize() const { return myAllocatedSize; }
184  int64 getMemoryUsage(bool inclusive) const;
185 
186  /// Class to handle auto-locking of the UT_WorkBuffer. This is not related
187  /// to multi-threading, but to the lock/release methods above.
188  ///
189  /// You should never append data to a locked buffer.
190  class AutoLock
191  {
192  public:
195  : myBuffer(buf)
196  {
197  myString = myBuffer.lock();
198  }
201  {
202  release();
203  }
204  /// @{
205  /// Get access to the non-const buffer. This may return nullptr if the
206  /// lock has been released.
208  char *operator*() const { return myString; }
210  char *string() const { return myString; }
211  /// @}
212 
213  /// You can manually release the buffer
215  void release(bool recompute_length=false)
216  {
217  if (myString)
218  {
219  myBuffer.release(recompute_length);
220  myString = nullptr;
221  }
222  }
223  /// If you've manually released the lock, you can relock the buffer
225  void relock()
226  {
227  UT_ASSERT(!myString);
228  myString = myBuffer.lock();
229  }
230  private:
231  UT_WorkBuffer &myBuffer;
232  char *myString;
233  };
234 
235  void reserve(exint bytes=0);
236 
237  // This is a read only operator. We are avoiding the writeable
238  // versions as they lead to problems when people do a:
239  // foo[pastend] = foo(start)
240  // causing an implicit realloc.
242  char operator()(exint idx) const
243  {
244  // We allow an index at myLength as if we have a null
245  // terminated buffer that is the null termination.
246  UT_ASSERT_P(idx >= 0 && idx <= myLength);
247  return myBuffer[idx];
248  }
249 
250  // Returns last character. Only valid if !isEmpty()
252  char first() const
253  {
254  UT_ASSERT_P(myLength > 0);
255  return myBuffer[0];
256  }
257  // Returns last character. Only valid if !isEmpty()
259  char last() const
260  {
261  UT_ASSERT_P(myLength > 0);
262  return myBuffer[myLength - 1];
263  }
264 
265  // This should always be true. It's here to act as a sanity function.
266  int isNullTerminated() const;
267 
270  {
271  strcpy(other);
272  return *this;
273  }
275  UT_WorkBuffer &operator=(const char *str)
276  {
277  clear();
278  append(str);
279  return *this;
280  }
283  {
284  clear();
285  append(str.c_str(), str.length());
286  return *this;
287  }
288 
289  /// Comparison operator. Null strings are considered as empty strings.
290  /// @{
292  bool operator==(const char *str) const
293  {
294  if (!str)
295  return isEmpty();
296  return (::strcmp(str, myBuffer) == 0);
297  }
299  bool operator==(const UT_String &str) const
300  {
301  if (!(const char *)str)
302  return isEmpty();
303  return (::strcmp(str, myBuffer) == 0);
304  }
306  bool operator==(const UT_WorkBuffer &buf) const
307  {
308  if (buf.isEmpty())
309  return isEmpty();
310  if (length() != buf.length())
311  return false;
312  return (::memcmp(myBuffer, buf.myBuffer, myLength) == 0);
313  }
315  bool operator!=(const char *str) const
316  {
317  return !(*this == str);
318  }
320  bool operator!=(const UT_String &str) const
321  {
322  return !(*this == str);
323  }
325  bool operator!=(const UT_WorkBuffer &buf) const
326  {
327  return !(*this == buf);
328  }
329  /// @}
330 
331 private:
332  // Reallocate the buffer until the allocated size is >= the length. This
333  // private method needs to come first so it can be inlined.
334  void growBufferIfNeeded()
335  {
336  // Using a while loop instead of computing an accurate size the
337  // first time is slower, but most of the time the loop will execute
338  // at most once.
339  // We need to use myLength+1 as we need room for the null.
340  while (myLength+1 > myAllocatedSize) // false most of the time
341  reserve(myAllocatedSize * 2);
342  }
343 
344 public:
345  // These are standard string operators people tend to use:
347  void strcpy(const char *src)
348  {
349  clear();
350  append(src);
351  }
353  void strcpy(const UT_String &src)
354  {
355  clear();
356  append(src);
357  }
359  void strcpy(const UT_StringRef &src)
360  {
361  clear();
362  append(src);
363  }
365  void strcpy(const UT_WorkBuffer &src)
366  {
367  clear();
368  append(src);
369  }
370 
371  // NOTE: unlike strncpy(), maxlen does not include the null terminator.
373  void strncpy(const char *src, exint maxlen)
374  {
375  clear();
376  // Ensure we have enough room:
377  myLength = maxlen+1;
378  growBufferIfNeeded();
379  myLength = 0;
380  SYSstrlcpy(myBuffer, src, maxlen+1);
381  myLength = ::strlen(myBuffer);
382  }
383 
384  // Note we can't just return myLength as there may be embedded NULLs.
386  exint strlen() const
387  {
388  UT_ASSERT_P(isNullTerminated());
389  return ::strlen(myBuffer);
390  }
391 
393  exint length() const
394  {
395  return myLength;
396  }
397 
399  void strcat(const char *src)
400  {
401  append(src);
402  }
403 
404  // protectedStrcat() will quote the string in double quotes if required and
405  // protect any enclosed double quotes or backslashes in the source. It
406  // will not escape any other characters.
407  void protectedStrcat(const char *str, bool force_quote=false);
408 
409  // fullyProtected*Strcat() is similar to protectedStrcat, except it escapes
410  // any non-printable characters. It will not escape single quotes, and if
411  // force_quote is true, it will add double-quotes. It will work with
412  // arbitrary binary data and uses the \xNN syntax to encode bytes.
413  // UT_IStream::read() is capable of loading strings encoded with this
414  // method, and these strings can also be decoded in Python. If
415  // fullyProtectedBinaryStrcat is called, this method can handle data
416  // containing null characters.
417  void fullyProtectedStrcat(const char *str, bool force_quote=false);
418  void fullyProtectedBinaryStrcat(
419  const char *str, exint size, bool force_quote=false);
420 
421  /// Append a string of a given maximum length to the current string.
422  /// Unlike the POSIX's strncat(3), we ignore any NUL bytes in the current
423  /// string and blindly append at the end of the work buffer.
425  void strncat(const char *src, exint len)
426  {
427  if (!src)
428  return;
429  append(src, ::strnlen(src, len));
430  }
431 
432  // Extract the first argument from the src and append it to the work
433  // buffer. This does NOT handle quotes properly (i.e. if the first word
434  // is quoted with spaces).
435  void strcatFirstWord(const char *src);
436 
438  int strcmp(const char *src) const
439  {
440  UT_ASSERT_P(isNullTerminated());
441  return ::strcmp(myBuffer, src);
442  }
443 
445  int strncmp(const char *src, exint n) const
446  {
447  UT_ASSERT_P(isNullTerminated());
448  return ::strncmp(myBuffer, src, n);
449  }
450 
452  char *strdup() const
453  {
454  UT_ASSERT(isNullTerminated());
455  return ::strdup(myBuffer);
456  }
457 
458  // Reset the buffer to an empty buffer.
460  void clear()
461  {
462  if (myLockCount) { UT_ASSERT(0); return; }
463  myLength = 0;
464  myBuffer[0] = '\0';
465  }
466 
468  bool isEmpty() const
469  {
470  return (myLength == 0);
471  }
473  bool isstring() const
474  {
475  return !isEmpty();
476  }
477 
478  // Write into the buffer at a specific place.
479  // This WILL expand the buffer if it is required and keep it null
480  // terminated.
482  void write(exint offset, char c)
483  {
484  UT_ASSERT(offset >= 0);
485  if (offset < 0) return;
486  if (offset >= myLength)
487  {
488  if (myLockCount) { UT_ASSERT(0); return; }
489  myLength = offset+1;
490  growBufferIfNeeded();
491  myBuffer[myLength] = '\0';
492  }
493  myBuffer[offset] = c;
494  if (c == '\0')
495  myLength = offset;
496  }
497 
498  // This does NOT write out the trailing NULL of src, but the buffer will
499  // still be null-terminated.
500  void write(exint offset, const char *src)
501  {
502  while (*src)
503  {
504  write(offset, *src);
505  src++;
506  offset++;
507  }
508  }
509 
511  {
512  write(offset, src.c_str());
513  }
514 
515  /// Load an entire file into the buffer. Returns @b false if there was an
516  /// error reading the file
517  bool readFile(const char *filename);
518 
519  // Read a line from an istream -- no matter how long the line is
520  // Returns 0 if the stream read failed or 1 otherwise
521  bool getline(std::istream &is);
522  bool getline(FILE *fp);
523 
524  // Much like getline() except that it has more features. The string itself
525  // is tokenized which the UT_WorkArgs points into.
526  // line_num is incremented for each line read.
527  // comment_chars is list of characters to treat as comments.
528  // this can be NULL if we don't want this feature.
529  // Returns false if the stream read failed.
530  bool cmdGetLine(std::istream &is, UT_WorkArgs &args, int &line_num,
531  const char *comment_chars = "#",
532  const char *separators = " \t\n\r");
533  bool cmdGetLine(UT_IStream &is, UT_WorkArgs &args, int &line_num,
534  const char *comment_chars = "#",
535  const char *separators = " \t\n\r");
536  bool cmdGetLine(FILE *fp, UT_WorkArgs &args, int &line_num,
537  const char *comment_chars = "#",
538  const char *separators = " \t\n\r");
539 
540  /// Fast integer to string conversion.
541  /// @{
542  void itoa(int64 i);
543  void utoa(uint64 i);
544  /// @}
545 
546  int sprintf(const char *fmt, ...)
548  int appendSprintf(const char *fmt, ...)
550 
551  int vsprintf(const char *fmt, va_list ap);
552 
553  /// Replace the contents of the work buffer using the same formatting as
554  /// UTformat.
555  /// Returns the size of the appended portion, in bytes.
556  template<typename... Args>
557  size_t format(const char *fmt, const Args &...args)
558  {
559  clear();
560  return appendFormat(fmt, args...);
561  }
562 
563  /// Append to the work buffer using the same formatting as UTformat.
564  /// Returns the size of the appended portion, in bytes.
565  template<typename... Args>
566  size_t appendFormat(const char *fmt, const Args &...args)
567  {
568  if (myLockCount) { UT_ASSERT(0); return 0; }
569  UT_ASSERT_P(isNullTerminated());
570 
571  using namespace UT::Format;
572  Writer w;
574  size_t nb_needed = f.format(w, fmt, {args...});
575 
576  myLength += nb_needed;
577  growBufferIfNeeded();
578 
579  // Format again, this time to fill in the buffer.
580  w.setBuffer(myBuffer + myLength - nb_needed, nb_needed);
581  f.format(w, fmt, {args...});
582 
583  myBuffer[myLength] = '\0';
584  return nb_needed;
585  }
586 
587  /// Replace the contents of the work buffer using UTformat formatting
588  /// with an implicit "{} " for each argument, giving a Python-style
589  /// print result.
590  template<typename... Args>
591  size_t print(const Args &...args)
592  {
593  clear();
594  return appendPrint(args...);
595  }
596 
597  /// Append to the work buffer using the UTformat with an implicit "{} "
598  /// format for each parameter.
599  /// Returns the size of the appended portion, in bytes.
600  template<typename... Args>
601  size_t appendPrint()
602  {
603  return 0;
604  }
605  template<typename T, typename... Args>
606  size_t appendPrint(const T &value, const Args &...args)
607  {
608  size_t newbytes;
609  newbytes = appendFormat("{}", value);
610  // NB: we might be empty when value was the empty string
611  if (!isEmpty() && last() != '\n')
612  {
613  append(' ');
614  newbytes++;
615  }
616  newbytes += appendPrint(args...);
617  return newbytes;
618  }
619 
620  // These tack stuff to the end of the buffer.
622  void append(char character)
623  {
624  if (myLockCount) { UT_ASSERT(0); return; }
625  UT_ASSERT_P(isNullTerminated());
626  myLength++;
627  growBufferIfNeeded();
628  myBuffer[myLength - 1] = character;
629  myBuffer[myLength] = '\0';
630  }
631 
632  void printMemory(int64 mem) { clear(); appendPrintMemory(mem); }
633  void appendPrintMemory(int64 mem);
634 
635  void append(exint n, char character)
636  {
637  if (myLockCount) { UT_ASSERT(0); return; }
638  UT_ASSERT_P(isNullTerminated());
639  myLength += n;
640  growBufferIfNeeded();
641  for (int i = n; i > 0; i--)
642  myBuffer[myLength - i] = character;
643  myBuffer[myLength] = '\0';
644  }
645 
646  /// Append a single Unicode code point, converted to UTF8
647  void append(utf32 cp)
648  {
650  int len = UT_Unicode::convert(cp, buf, sizeof(buf));
651  if (!len)
652  return;
653 
654  if (myLockCount) { UT_ASSERT(0); return; }
655  UT_ASSERT_P(isNullTerminated());
656  myLength += len;
657  growBufferIfNeeded();
658  ::memcpy(myBuffer + myLength - len, buf, len);
659  myBuffer[myLength] = '\0';
660  }
661 
662  void append(const char *data, exint size)
663  {
664  if (myLockCount) { UT_ASSERT(0); return; }
665  UT_ASSERT_P(data);
666  UT_ASSERT_P(isNullTerminated());
667  myLength += size;
668  growBufferIfNeeded();
669  ::memcpy(myBuffer + myLength - size, data, size);
670  myBuffer[myLength] = '\0';
671  }
672 
674  void append(const char *str)
675  {
676  if( UTisstring(str) )
677  append(str, ::strlen(str));
678  }
679 
681  void append(const UT_String &str)
682  {
683  if (str.isstring())
684  append((const char *)str);
685  }
686 
688  void append(const UT_StringRef &str)
689  {
690  if (str.isstring())
691  append(str.buffer(), str.length());
692  }
693 
694  void append(const UT_StringArray &strs, const UT_StringRef &sep)
695  {
696  for (exint i = 0; i < strs.entries(); i++)
697  {
698  append(strs(i));
699  if (i+1 < strs.entries())
700  append(sep);
701  }
702  }
703 
705  void append(const UT_WorkBuffer &wb)
706  {
707  append( wb.buffer(), wb.length() );
708  }
709 
711  UT_WorkBuffer &operator+=(const char *str)
712  {
713  append(str);
714  return *this;
715  }
716 
719  {
720  append(str);
721  return *this;
722  }
723 
726  {
727  append(wb);
728  return *this;
729  }
730 
733  {
734  append(str);
735  return *this;
736  }
737 
740  {
741  append(str);
742  return *this;
743  }
744 
745  void prepend(char character)
746  {
747  if (myLockCount) { UT_ASSERT(0); return; }
748  UT_ASSERT_P(isNullTerminated());
749  myLength++;
750  growBufferIfNeeded();
751  ::memmove(myBuffer+1, myBuffer, myLength);
752  myBuffer[0] = character;
753  }
754  void prepend(const char *data, exint size)
755  {
756  if (myLockCount) { UT_ASSERT(0); return; }
757  UT_ASSERT_P(data);
758  UT_ASSERT_P(isNullTerminated());
759  myLength += size;
760  growBufferIfNeeded();
761  ::memmove(myBuffer+size, myBuffer, myLength+1 - size);
762  ::memcpy(myBuffer, data, size);
763  }
765  void prepend(const char *str)
766  {
767  UT_ASSERT_P(str);
768  prepend(str, ::strlen(str));
769  }
770 
772  void prepend(const UT_String &str)
773  {
774  if (str.isstring())
775  prepend((const char *)str);
776  }
778  void prepend(const UT_StringRef &str)
779  {
780  if (str)
781  prepend(str.buffer(), str.length());
782  }
783 
784  /// Insert @c slen characters from @c str, at location @c pos. If @c pos
785  /// exceeds the current length, the position is truncated and to an append.
786  void insert(exint pos, const char* str, exint slen);
787 
788  /// Erase @c len characters from location @c pos in the string.
789  void erase(exint pos, exint len);
790 
791  void rewind() { backup(myLength); }
792 
793  /// Rewind by the given length
795  void backup(exint by_length)
796  {
797  if (myLockCount) { UT_ASSERT(0); return; }
798  UT_ASSERT_P(isNullTerminated());
799  UT_ASSERT_P(by_length >= 0);
800  myLength -= by_length;
801  UT_ASSERT(myLength >= 0);
802  myBuffer[myLength] = '\0';
803  }
804 
805  /// Truncate the buffer to the specified length. Truncating to 0 is
806  /// identical to clear().
808  void truncate(exint new_length)
809  {
810  if (new_length >= myLength)
811  {
812  UT_ASSERT(0 && "Truncating beyond buffer extent");
813  return;
814  }
815  backup(myLength-new_length);
816  }
817 
818  // Delete characters off the end of the string until we hit the
819  // requested character.
820  void backupTo(char c)
821  {
822  if (myLockCount) { UT_ASSERT(0); return; }
823  UT_ASSERT_P(isNullTerminated());
824  while( myLength > 0 && myBuffer[myLength-1] != c )
825  myLength--;
826  myBuffer[myLength] = '\0';
827  }
828 
829  void advance(exint by_length)
830  {
831  if (myLockCount) { UT_ASSERT(0); return; }
832  UT_ASSERT_P(isNullTerminated());
833  UT_ASSERT_P(by_length >= 0);
834  myLength -= by_length;
835  UT_ASSERT(myLength >= 0);
836  for (int i=0; i<myLength; i++)
837  myBuffer[i] = myBuffer[by_length+i];
838  myBuffer[myLength] = '\0';
839  }
840 
841  // Finds the 'occurance_number'-th occurance of char c in the string.
843  const char *findChar(char c, int occurance_number = 1) const
844  {
845  return findCharFrom(c, 0, occurance_number);
846  }
847  // Same as findChar, but searches from the end of the string.
848  const char *lastChar(char c, int occurance_number = 1) const
849  {
850  if (myLockCount) { UT_ASSERT(0); return NULL; }
851 
852  UT_ASSERT_P(isNullTerminated());
853 
854  for (exint i = myLength; i --> 0;)
855  {
856  if(c == myBuffer[i])
857  {
858  occurance_number--;
859  if(occurance_number <= 0)
860  {
861  return (myBuffer + i);
862  }
863  }
864  }
865 
866  return NULL;
867  }
868  // Same and findChar, bu searches from given position in the string.
869  const char *findCharFrom(char c, exint position,
870  int occurance_number = 1) const
871  {
872  if (myLockCount) { UT_ASSERT(0); return NULL; }
873 
874  UT_ASSERT_P(isNullTerminated());
875 
876  if (position < 0 || position >= myLength) { return NULL; }
877 
878  for(exint i = position; i < myLength; ++i)
879  {
880  if(c == myBuffer[i])
881  {
882  occurance_number--;
883  if(occurance_number <= 0)
884  {
885  return (myBuffer + i);
886  }
887  }
888  }
889 
890  return NULL;
891  }
892 
893  /// Adopt a string from an outside source. The passed string is now
894  /// owned by the workbuffer.
895  void adoptFromMalloc(char* data, exint length);
896 
897  void adoptFromCharArray(UT_Array<char>& data);
898 
899  /// Count the occurrences of the text in the current string
900  exint count(const char *needle) const;
901 
902  // Get the next token pointed at by string and advance string past the
903  // token. Returns whether or not a token was retrieved successfully.
904  // Note that string is modified!!!
905  bool getNextToken(const char *(&string),
906  const UT_String separators = " \t\n");
907 
908  // Harden the contents of the buffer into a UT_String.
909  void copyIntoString(UT_String &str) const;
910 
911  // Copy the contents into a fixed length buffer.
912  // TODO: Get rid of this method, since it encourages fixed-length buffers.
913  void copyIntoString(char *str, exint max_length) const;
914 
915  // Steal the contents of this work buffer into the string.
916  void stealIntoString(UT_String &str);
917 
918  // Steal the contents of this work buffer into the string.
919  // NB: Please use UT_StringHolder move constructor/assignment instead of
920  // this function.
921  SYS_DEPRECATED(19.0)
922  void stealIntoStringHolder(UT_StringHolder &str);
923 
924  // Return a string containing the contents of this work buffer, preserving
925  // any null characters in it.
926  std::string toStdString() const
927  { return std::string(buffer(), length()); }
928 
929  // Strips the characters after comment_char from the buffer. This method
930  // goes to some effort to enusre that the comment_char is not preceded by
931  // a backslash or is not in a quoted string. Returns true if it found a
932  // comment and modified the buffer, and false otherwise.
933  bool stripComments(char comment_char = '#');
934 
935  /// Strips out all characters found in 'chars'. The string length will be
936  /// reduced by the number of characters removed. The number of characters
937  /// removed is returned.
938  int strip(const char *chars);
939 
940  /// Remove trailing whitespace lines
941  void removeTrailingSpaceLines();
942 
943  /// Remove trailing whitespace, return true if whitespace was removed.
944  bool removeTrailingSpace();
945 
946  /// Remove leading white space, return true if whitespace was removed.
947  bool removeLeadingSpace();
948 
949  /// Remove trailing digits, return true if some were removed.
950  bool removeTrailingDigits();
951 
952  /// Convert string to lower case
953  void lower();
954 
955  /// Convert string to upper case
956  void upper();
957 
958  /// Create a string of tabs & spaces which represents the given indent
959  void makeIndentString(exint indent, exint tabstop=8);
960 
961  /// Remove the first n characters.
963  {
964  if (n < myLength)
965  {
966  myLength -= n;
967  ::memmove(myBuffer, myBuffer + n, myLength);
968  }
969  else
970  myLength = 0;
971 
972  myBuffer[myLength] = '\0';
973  }
974 
975  /// Replaces up to 'count' occurrences of 'find' with 'replacement',
976  /// and returns the number of substitutions that occurred.
977  /// If 'count' <= 0, all occurrences will be replaced.
978  int substitute(const char *find, const char *replacement, int count);
979 
980  /// Convenience version of substitute() for all or single occurrence.
981  int substitute(const char *find, const char *replacement, bool all = true)
982  {
983  return substitute(find, replacement, !all ? 1 : -1);
984  }
985 
986  /// Given from_name which is assumed to fit from_pattern, any assigned
987  /// wildcards are subsitituted in to_pattern, writing the result to this.
988  /// The wildcards may also be indexed. For example:
989  ///
990  /// to_pattern = b* from_name = apple from_pattern = a*le
991  /// ---> this = bpp
992  ///
993  /// to_pattern = *(1)_to_*(0) from_name = a_to_b from_pattern = *_to_*
994  /// ---> this = b_to_a
995  bool subPatterns(
996  const char *to_pattern,
997  const char *from_name,
998  const char *from_pattern);
999 
1000  /// UTF-16 / UTF-8 conversions.
1001 
1002  /// Set the work buffer to contain the UTF-8 representation of the incoming UTF-16 string.
1003  /// The UTF-16 string is assumed to be little-endian, unless prefixed with BOM that
1004  /// indicates endianness.
1005  /// The incoming string should be zero-word terminated.
1006  void setFromUTF16(const utf16 *str);
1007 
1008  /// Set the work buffer to contain a UTF-16LE (little endian) representation of the
1009  /// incoming UTF-8 string.
1010  /// The work buffer will be zero-word terminated.
1011  void setAsUTF16(const utf8 *str);
1012 
1013  /// Once set as UTF16-LE, get it back as such a pointer.
1015  const utf16* castToUTF16() const { return (const utf16*) myBuffer; }
1016 
1017  /// Lock buffer for `len` utf-16 characters.
1019  {
1020  return (utf16*)lock(offset, len*sizeof(utf16));
1021  }
1022 
1023  void swap(UT_WorkBuffer &other)
1024  {
1025  // Warn if we're about to swap locked buffers.
1026  UT_ASSERT(myLockCount==0);
1027 
1028  bool this_stack = (myBuffer == myStackBuffer);
1029  bool other_stack = (other.myBuffer == other.myStackBuffer);
1030 
1031  if (this_stack && other_stack)
1032  {
1033  // If both buffers are using the stack buffer, just swap the
1034  // buffer contents.
1035  size_t max_size = (myLength > other.myLength) ? myLength
1036  : other.myLength;
1037 
1038  UTswap(myStackBuffer, other.myStackBuffer, max_size + 1);
1039  }
1040  else if (this_stack && !other_stack)
1041  {
1042  ::memcpy(other.myStackBuffer, myStackBuffer, myLength + 1);
1043  myBuffer = other.myBuffer;
1044  other.myBuffer = other.myStackBuffer;
1045  }
1046  else if (!this_stack && other_stack)
1047  {
1048  ::memcpy(myStackBuffer, other.myStackBuffer, other.myLength + 1);
1049  other.myBuffer = myBuffer;
1050  myBuffer = myStackBuffer;
1051  }
1052  else
1053  UTswap(myBuffer, other.myBuffer);
1054  UTswap(myAllocatedSize, other.myAllocatedSize);
1055  UTswap(myLength, other.myLength);
1056  UTswap(myLockCount, other.myLockCount);
1057  }
1058 public:
1059  /// Iterator compatibility.
1061  const char *begin() const { return myBuffer; }
1063  const char *end() const { return myBuffer + myLength; }
1064 
1065 private:
1066 
1067  struct do_widen {};
1068  struct do_narrow {};
1069  /// Private constructors to allow for the Return Value Optimization
1070  /// @{
1072  UT_WorkBuffer(do_widen, const utf8 *str)
1073  : myBuffer(myStackBuffer)
1074  , myAllocatedSize(UT_INITIAL_BUFFER_SIZE)
1075  , myLength(0)
1076  , myLockCount(0)
1077  {
1078  setAsUTF16(str);
1079  }
1081  UT_WorkBuffer(do_narrow, const utf16 *str)
1082  : myBuffer(myStackBuffer)
1083  , myAllocatedSize(UT_INITIAL_BUFFER_SIZE)
1084  , myLength(0)
1085  , myLockCount(0)
1086  {
1087  setFromUTF16(str);
1088  }
1089  /// @}
1090 
1091  friend UT_API std::ostream &operator<<(std::ostream &os,
1092  const UT_WorkBuffer &buffer);
1093 
1094 private: // Data:
1095 
1096  char *myBuffer; // Do not make an access method to the data
1097  exint myAllocatedSize = UT_INITIAL_BUFFER_SIZE;
1098  exint myLength = 0;
1099  int myLockCount = 0;
1100  char myStackBuffer[UT_INITIAL_BUFFER_SIZE];
1101 };
1102 
1103 
1104 static inline size_t
1105 format(char *buffer, size_t buffer_size, const UT_WorkBuffer &v)
1106 {
1107  if (!buffer)
1108  return v.length();
1109  else
1110  {
1111  size_t len = std::min(size_t(v.length()), buffer_size);
1112  ::memcpy(buffer, v.buffer(), len);
1113  return len;
1114  }
1115 }
1116 
1117 template <typename T>
1118 void
1119 UTstringJoin(UT_StringHolder& out, const UT_StringHolder& sep, const T& items)
1120 {
1121  //TODO: check T is iterable once UT can use C++17. Its far to ugly to
1122  //bother doing this in C++14.
1123  UT_WorkBuffer wbuf;
1124  for (auto&& item : items)
1125  {
1126  if (!wbuf.isEmpty())
1127  wbuf.append(sep);
1128  wbuf.appendFormat("{}", item);
1129  }
1130  out = std::move(wbuf);
1131 }
1132 
1133 inline
1134 void
1136 {
1137  // Once we switch to C++17 for UT we can remove this function and just
1138  // count the number of args left
1139 }
1140 
1141 template <typename T>
1142 void
1144 {
1145  if (result.isEmpty())
1146  {
1147  result.format("{}", arg);
1148  return;
1149  }
1150 
1151  UT_WorkBuffer str_arg;
1152  str_arg.format("{}", arg);
1153  if (str_arg.isEmpty())
1154  return;
1155 
1156  // If both the end of the current path and the beginning of the path to be
1157  // added dont have path separators then add one.
1158  if (result.last() != '/' && str_arg.first() != '/')
1159  result.append('/');
1160 
1161  // If both the current path and the beginning of the added path have
1162  // separators then remove the separator from the new path so that we
1163  // dont end up with two separators together.
1164  UT_StringView view(str_arg.buffer(), str_arg.length());
1165  if (result.last() == '/' && str_arg.first() == '/')
1166  {
1167  view.removePrefix(1);
1168  }
1169 
1170  if (!view.isEmpty())
1171  result.append(view.data(), view.length());
1172 }
1173 
1174 template <typename T, typename... Args>
1175 void
1177 {
1178  UTstringPathJoin(result, arg);
1179  UTstringPathJoin(result, std::forward<Args>(args)...);
1180 }
1181 
1182 #endif
size_t print(const Args &...args)
vbool4 insert(const vbool4 &a, bool val)
Helper: substitute val for a[i].
Definition: simd.h:3414
std::basic_string< Char > vsprintf(const S &format, basic_format_args< basic_printf_context_t< type_identity_t< Char >>> args)
Definition: printf.h:634
SYS_FORCE_INLINE void append(const UT_StringRef &str)
std::string upper(string_view a)
Return an all-upper case version of a (locale-independent).
Definition: strutil.h:349
string_view OIIO_API strip(string_view str, string_view chars=string_view())
GT_API const UT_StringHolder filename
SYS_FORCE_INLINE exint length() const
void UTstringJoin(UT_StringHolder &out, const UT_StringHolder &sep, const T &items)
#define SYS_DEPRECATED(__V__)
SYS_FORCE_INLINE void strcpy(const UT_StringRef &src)
void write(exint offset, const UT_StringHolder &src)
SYS_FORCE_INLINE exint getAllocatedSize() const
SYS_FORCE_INLINE char * operator*() const
void UTswap(T &a, T &b)
Definition: UT_Swap.h:35
void
Definition: png.h:1083
SYS_FORCE_INLINE bool operator==(const UT_String &str) const
SYS_FORCE_INLINE UT_WorkBuffer(const char *data, exint size)
Definition: UT_WorkBuffer.h:85
SYS_FORCE_INLINE void strncat(const char *src, exint len)
unsigned short utf16
Definition: SYS_Types.h:56
SYS_FORCE_INLINE UT_WorkBuffer(const UT_StringRef &str)
const GLfloat * c
Definition: glew.h:16631
SYS_FORCE_INLINE UT_WorkBuffer(const char *str)
Definition: UT_WorkBuffer.h:77
int64 exint
Definition: SYS_Types.h:125
void append(exint n, char character)
SYS_FORCE_INLINE const char * buffer() const
SYS_FORCE_INLINE void strcpy(const char *src)
SYS_FORCE_INLINE void release(bool recompute_length=false)
You can manually release the buffer.
#define UT_API
Definition: UT_API.h:14
const char * findCharFrom(char c, exint position, int occurance_number=1) const
void append(const char *data, exint size)
const char * lastChar(char c, int occurance_number=1) const
size_t appendPrint()
GLenum src
Definition: glcorearb.h:1792
unsigned long long uint64
Definition: SYS_Types.h:117
SYS_FORCE_INLINE char last() const
utf16 * lockUTF16(exint offset=0, exint len=0)
Lock buffer for len utf-16 characters.
SYS_FORCE_INLINE void relock()
If you've manually released the lock, you can relock the buffer.
void removePrefix(exint n)
GLuint buffer
Definition: glcorearb.h:659
SYS_FORCE_INLINE void append(const UT_String &str)
SYS_FORCE_INLINE const char * data() const
void swap(UT_WorkBuffer &other)
void eraseHead(exint n)
Remove the first n characters.
SYS_FORCE_INLINE void append(const UT_WorkBuffer &wb)
GLsizeiptr size
Definition: glcorearb.h:663
GLubyte GLubyte GLubyte GLubyte w
Definition: glcorearb.h:856
SYS_FORCE_INLINE bool operator==(const UT_WorkBuffer &buf) const
std::ostream & operator<<(std::ostream &ostr, const DataType &a)
Definition: DataType.h:133
SYS_FORCE_INLINE UT_WorkBuffer & operator+=(const UT_WorkBuffer &wb)
SYS_FORCE_INLINE void append(const char *str)
A utility class to do read-only operations on a subset of an existing string.
Definition: UT_StringView.h:40
Format
Definition: oidn.hpp:16
static const utf8 * convert(const utf8 *str, utf32 &cp)
size_t appendFormat(const char *fmt, const Args &...args)
#define UT_INITIAL_BUFFER_SIZE
Definition: UT_WorkBuffer.h:55
GLuint64EXT * result
Definition: glew.h:14311
SYS_FORCE_INLINE void strcpy(const UT_WorkBuffer &src)
ImageBuf OIIO_API min(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
exint length() const
SYS_FORCE_INLINE const char * buffer() const
SYS_FORCE_INLINE const char * end() const
FMT_CONSTEXPR bool find(Ptr first, Ptr last, T value, Ptr &out)
Definition: format.h:2929
GLenum GLsizei len
Definition: glew.h:7782
SYS_FORCE_INLINE UT_WorkBuffer & operator=(const UT_WorkBuffer &other)
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:170
unsigned int utf32
Definition: SYS_Types.h:58
void printMemory(int64 mem)
#define SYS_PRINTF_CHECK_ATTRIBUTE(string_index, first_to_check)
Definition: SYS_Types.h:447
const GLdouble * v
Definition: glcorearb.h:836
SYS_FORCE_INLINE const char * begin() const
Iterator compatibility.
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
GLsizei const GLchar *const * string
Definition: glcorearb.h:813
SYS_FORCE_INLINE void prepend(const UT_StringRef &str)
void prepend(char character)
SYS_FORCE_INLINE bool isEmpty() const
SYS_FORCE_INLINE char * strdup() const
long long int64
Definition: SYS_Types.h:116
static SYS_FORCE_INLINE UT_WorkBuffer widen(const utf8 *str)
SYS_FORCE_INLINE const char * findChar(char c, int occurance_number=1) const
SYS_FORCE_INLINE UT_WorkBuffer & operator+=(const UT_String &str)
SYS_FORCE_INLINE exint strlen() const
SYS_FORCE_INLINE bool operator!=(const UT_WorkBuffer &buf) const
SYS_FORCE_INLINE UT_WorkBuffer & operator=(const std::string &str)
SYS_FORCE_INLINE const char * c_str() const
SYS_FORCE_INLINE void strcpy(const UT_String &src)
#define UT_UTF8_MAX_ENCODING_LEN
Definition: UT_Unicode.h:19
GLint GLsizei count
Definition: glcorearb.h:404
size_t SYSstrlcpy(char *dest, const char *src, size_t size)
Definition: SYS_String.h:180
checked_ptr< typename Container::value_type > reserve(std::back_insert_iterator< Container > it, size_t n)
Definition: format.h:373
SYS_FORCE_INLINE char * string() const
SYS_FORCE_INLINE void prepend(const UT_String &str)
SYS_FORCE_INLINE char operator()(exint idx) const
SYS_FORCE_INLINE UT_WorkBuffer(const UT_WorkBuffer &other)
SYS_FORCE_INLINE void strcat(const char *src)
detail::named_arg< Char, T > arg(const Char *name, const T &arg)
Definition: core.h:1640
int substitute(const char *find, const char *replacement, bool all=true)
Convenience version of substitute() for all or single occurrence.
SYS_FORCE_INLINE void truncate(exint new_length)
SYS_FORCE_INLINE const utf16 * castToUTF16() const
Once set as UTF16-LE, get it back as such a pointer.
GLdouble n
Definition: glcorearb.h:2007
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition: glcorearb.h:2539
exint entries() const
Alias of size(). size() is preferred.
Definition: UT_Array.h:481
GLboolean * data
Definition: glcorearb.h:130
void append(utf32 cp)
Append a single Unicode code point, converted to UTF8.
SYS_FORCE_INLINE int strcmp(const char *src) const
GLuint GLsizei GLsizei * length
Definition: glcorearb.h:794
GLint GLint GLsizei GLint GLenum format
Definition: glcorearb.h:107
static SYS_FORCE_INLINE UT_WorkBuffer narrow(const utf16 *str)
SYS_FORCE_INLINE UT_WorkBuffer & operator=(const char *str)
SYS_FORCE_INLINE bool isstring() const
SYS_FORCE_INLINE void backup(exint by_length)
Rewind by the given length.
void write(exint offset, const char *src)
size_t format(const char *fmt, const Args &...args)
SYS_FORCE_INLINE UT_WorkBuffer(const UT_String &str)
Definition: UT_WorkBuffer.h:93
SYS_FORCE_INLINE AutoLock(UT_WorkBuffer &buf)
void backupTo(char c)
size_t appendPrint(const T &value, const Args &...args)
SYS_FORCE_INLINE bool operator==(const char *str) const
std::string lower(string_view a)
Return an all-upper case version of a (locale-independent).
Definition: strutil.h:342
void advance(exint by_length)
SYS_FORCE_INLINE bool operator!=(const char *str) const
SYS_FORCE_INLINE UT_WorkBuffer & operator+=(const char *str)
SYS_FORCE_INLINE bool UTisstring(const char *s)
SYS_FORCE_INLINE void append(char character)
Type-safe formatting, modeled on the Python str.format function.
void prepend(const char *data, exint size)
GLsizei const GLfloat * value
Definition: glcorearb.h:823
GLfloat f
Definition: glcorearb.h:1925
virtual bool readFile(GA_Detail &g, const char *filename, const GA_LoadOptions *opts, UT_StringArray *errors) const
Class which defines an I/O interface to save/load geometry.
**If you just want to fire and args
Definition: thread.h:615
void append(const UT_StringArray &strs, const UT_StringRef &sep)
bool isstring() const
Definition: UT_String.h:681
#define UT_ASSERT(ZZ)
Definition: UT_Assert.h:171
SYS_FORCE_INLINE ~UT_WorkBuffer()
SYS_FORCE_INLINE void clear()
char utf8
Definition: SYS_Types.h:52
GLintptr offset
Definition: glcorearb.h:664
SYS_FORCE_INLINE ~AutoLock()
#define const
Definition: zconf.h:214
SYS_FORCE_INLINE char first() const
void UTstringPathJoin(UT_WorkBuffer &result)
void write(T &out, bool v)
Definition: ImfXdr.h:332
SYS_FORCE_INLINE bool operator!=(const UT_String &str) const
SYS_FORCE_INLINE UT_WorkBuffer & operator+=(const UT_StringRef &str)
bool all(const vbool4 &v)
Definition: simd.h:3445
SYS_FORCE_INLINE UT_WorkBuffer & operator+=(const std::string &str)
SYS_FORCE_INLINE void write(exint offset, char c)
SYS_FORCE_INLINE bool isstring() const
Definition: format.h:3611
SYS_FORCE_INLINE void prepend(const char *str)
SYS_FORCE_INLINE void strncpy(const char *src, exint maxlen)
SYS_FORCE_INLINE int strncmp(const char *src, exint n) const
std::basic_string< Char > sprintf(const S &format, const Args &...args)
Definition: printf.h:653
SYS_FORCE_INLINE UT_WorkBuffer()
Definition: UT_WorkBuffer.h:69