HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_WorkBuffer.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: UT_WorkBuffer.h ( Utility Library, C++ )
7  *
8  * COMMENTS:
9  * A growable string buffer that can be written into. A UT_String
10  * can be created with the contents of this buffer by calling
11  * copyIntoString().
12  *
13  * It's important that a non-const version of the raw buffer is not
14  * accessible since users could write past the end of the allocated
15  * buffer. Also note that the buffer location can change as it grows,
16  * so don't keep pointers to the buffer around.
17  *
18  * Most of the time, you want to allocate an object of this class on
19  * the stack and not on the heap.
20  *
21  * The buffer is kept null terminated by default. Functions exist
22  * to verify this. Note that the "length" of the buffer is the
23  * same as strlen - ie: it ignores the null termination!!!
24  */
25 
26 #ifndef __UT_WorkBuffer_h__
27 #define __UT_WorkBuffer_h__
28 
29 #include "UT_API.h"
30 
31 #include "UT_Assert.h"
32 #include "UT_Format.h"
33 #include "UT_NonCopyable.h"
34 #include "UT_String.h"
35 #include "UT_StringArray.h"
36 #include "UT_StringHolder.h"
37 #include "UT_Swap.h"
38 #include "UT_Unicode.h"
39 
40 #include <SYS/SYS_Inline.h>
41 #include <SYS/SYS_Types.h>
42 
43 #include <iosfwd>
44 
45 #include <stdlib.h>
46 #include <stdio.h>
47 #include <string.h>
48 
49 
50 // The default page size on most systems is 4K. We choose a default
51 // buffer size less than half of that in the hopes that if we have
52 // functions with 2 work buffers or additional variables on the stack that we
53 // may not have to allocate multiple stack pages.
54 #define UT_INITIAL_BUFFER_SIZE 2000
55 
56 class UT_WorkArgs;
57 class UT_IStream;
58 
59 template <typename T>
60 class UT_Array;
61 
63 {
64 public:
65  typedef char value_type;
66 
69  : myBuffer(myStackBuffer)
70  {
71  // Default termination.
72  myBuffer[0] = '\0';
73  }
74 
76  explicit UT_WorkBuffer(const char *str)
77  : myBuffer(myStackBuffer)
78  {
79  myBuffer[0] = '\0';
80  append(str);
81  }
82 
84  explicit UT_WorkBuffer(const char *data, exint size)
85  : myBuffer(myStackBuffer)
86  {
87  myBuffer[0] = '\0';
88  append(data, size);
89  }
90 
92  explicit UT_WorkBuffer(const UT_String &str)
93  : myBuffer(myStackBuffer)
94  {
95  myBuffer[0] = '\0';
96  append(str);
97  }
98 
100  explicit UT_WorkBuffer(const UT_StringRef &str)
101  : myBuffer(myStackBuffer)
102  {
103  myBuffer[0] = '\0';
104  append(str);
105  }
106 
109  : myBuffer(myStackBuffer)
110  {
111  myBuffer[0] = '\0';
112  append(other);
113  }
114 
117  {
118  if (myBuffer != myStackBuffer)
119  {
120  UT_ASSERT(myBuffer);
121  ::free(myBuffer);
122  }
123  }
124 
125  /// Create a work buffer to contain a UTF-16LE (little endian)
126  /// representation of the incoming UTF-8 string.
127  /// The work buffer will be zero-word terminated.
129  static UT_WorkBuffer
130  widen(const utf8 *str)
131  {
132  return UT_WorkBuffer(do_widen(), str);
133  }
134 
135  /// Create a work buffer to contain the UTF-8 representation of the
136  /// incoming UTF-16 string. The UTF-16 string is assumed to be
137  /// little-endian, unless prefixed with BOM that indicates endianness.
138  /// The incoming string should be zero-word terminated.
140  static UT_WorkBuffer
141  narrow(const utf16 *str)
142  {
143  return UT_WorkBuffer(do_narrow(), str);
144  }
145 
146  // It's important that there is no non-const access method to the buffer.
147  // Also note that the pointer to the buffer can change if the buffer
148  // grows.
150  const char *buffer() const { return myBuffer; }
151  /// Alias for the common string access across all string types (including
152  /// standard library)
154  const char *data() const { return buffer(); }
155 
156  // Having said that, if you need a non-const pointer you must lock
157  // the string. This prohibits ANY update which changes the myLength
158  // variable (and thus potentially a realloc)
159  // You must release the buffer before any such changes.
160  // The work buffer continues to own the memory and will free it when
161  // it goes out of scope so don't think this is the same as a "steal"
162  // in UT_String.
163  // Currently, to ensure people couple their locks & releases,
164  // it asserts there is no unaccounted locks on death. This is so
165  // people who think it is steal find out otherwise.
166  // Offset is where in the string to get the pointer from.
167  // This is only to be used when absolutely necessary.
168  // When releasing, if you have a string buffer, and you have modified the
169  // length, you should set the recompute_length flag to 1. This will adjust
170  // the internal length variable so that further concatenations will work
171  // properly.
172  // The reserve_bytes parameter tells the lock to ensure that there are at
173  // least that many bytes in the locked buffer.
174  // NOTE: Unlike other UT_WorkBuffer functions, it is the user's
175  // responsibility to maintain a NUL termination guarantee when manipulating
176  // the raw buffer.
177  char *lock(exint offset = 0, exint reserve_bytes=0);
178  void release(bool recompute_length = false);
179  void releaseSetLength(exint new_length);
180 
182  exint getAllocatedSize() const { return myAllocatedSize; }
183  int64 getMemoryUsage(bool inclusive) const;
184 
185  /// Class to handle auto-locking of the UT_WorkBuffer. This is not related
186  /// to multi-threading, but to the lock/release methods above.
187  ///
188  /// You should never append data to a locked buffer.
189  class AutoLock
190  {
191  public:
194  : myBuffer(buf)
195  {
196  myString = myBuffer.lock();
197  }
200  {
201  release();
202  }
203  /// @{
204  /// Get access to the non-const buffer. This may return nullptr if the
205  /// lock has been released.
207  char *operator*() const { return myString; }
209  char *string() const { return myString; }
210  /// @}
211 
212  /// You can manually release the buffer
214  void release(bool recompute_length=false)
215  {
216  if (myString)
217  {
218  myBuffer.release(recompute_length);
219  myString = nullptr;
220  }
221  }
222  /// If you've manually released the lock, you can relock the buffer
224  void relock()
225  {
226  UT_ASSERT(!myString);
227  myString = myBuffer.lock();
228  }
229  private:
230  UT_WorkBuffer &myBuffer;
231  char *myString;
232  };
233 
234  void reserve(exint bytes=0);
235 
236  // This is a read only operator. We are avoiding the writeable
237  // versions as they lead to problems when people do a:
238  // foo[pastend] = foo(start)
239  // causing an implicit realloc.
241  char operator()(exint idx) const
242  {
243  // We allow an index at myLength as if we have a null
244  // terminated buffer that is the null termination.
245  UT_ASSERT_P(idx >= 0 && idx <= myLength);
246  return myBuffer[idx];
247  }
248 
249  // Returns last character. Only valid if !isEmpty()
251  char first() const
252  {
253  UT_ASSERT_P(myLength > 0);
254  return myBuffer[0];
255  }
256  // Returns last character. Only valid if !isEmpty()
258  char last() const
259  {
260  UT_ASSERT_P(myLength > 0);
261  return myBuffer[myLength - 1];
262  }
263 
264  // This should always be true. It's here to act as a sanity function.
265  int isNullTerminated() const;
266 
269  {
270  strcpy(other);
271  return *this;
272  }
274  UT_WorkBuffer &operator=(const char *str)
275  {
276  clear();
277  append(str);
278  return *this;
279  }
282  {
283  clear();
284  append(str.c_str(), str.length());
285  return *this;
286  }
287 
288  /// Comparison operator. Null strings are considered as empty strings.
289  /// @{
291  bool operator==(const char *str) const
292  {
293  if (!str)
294  return isEmpty();
295  return (::strcmp(str, myBuffer) == 0);
296  }
298  bool operator==(const UT_String &str) const
299  {
300  if (!(const char *)str)
301  return isEmpty();
302  return (::strcmp(str, myBuffer) == 0);
303  }
305  bool operator==(const UT_WorkBuffer &buf) const
306  {
307  if (buf.isEmpty())
308  return isEmpty();
309  if (length() != buf.length())
310  return false;
311  return (::memcmp(myBuffer, buf.myBuffer, myLength) == 0);
312  }
314  bool operator!=(const char *str) const
315  {
316  return !(*this == str);
317  }
319  bool operator!=(const UT_String &str) const
320  {
321  return !(*this == str);
322  }
324  bool operator!=(const UT_WorkBuffer &buf) const
325  {
326  return !(*this == buf);
327  }
328  /// @}
329 
330 private:
331  // Reallocate the buffer until the allocated size is >= the length. This
332  // private method needs to come first so it can be inlined.
333  void growBufferIfNeeded()
334  {
335  // Using a while loop instead of computing an accurate size the
336  // first time is slower, but most of the time the loop will execute
337  // at most once.
338  // We need to use myLength+1 as we need room for the null.
339  while (myLength+1 > myAllocatedSize) // false most of the time
340  reserve(myAllocatedSize * 2);
341  }
342 
343 public:
344  // These are standard string operators people tend to use:
346  void strcpy(const char *src)
347  {
348  clear();
349  append(src);
350  }
352  void strcpy(const UT_String &src)
353  {
354  clear();
355  append(src);
356  }
358  void strcpy(const UT_StringRef &src)
359  {
360  clear();
361  append(src);
362  }
364  void strcpy(const UT_WorkBuffer &src)
365  {
366  clear();
367  append(src);
368  }
369 
370  // NOTE: unlike strncpy(), maxlen does not include the null terminator.
372  void strncpy(const char *src, exint maxlen)
373  {
374  clear();
375  // Ensure we have enough room:
376  myLength = maxlen+1;
377  growBufferIfNeeded();
378  myLength = 0;
379  SYSstrlcpy(myBuffer, src, maxlen+1);
380  myLength = ::strlen(myBuffer);
381  }
382 
383  // Note we can't just return myLength as there may be embedded NULLs.
385  exint strlen() const
386  {
387  UT_ASSERT_P(isNullTerminated());
388  return ::strlen(myBuffer);
389  }
390 
392  exint length() const
393  {
394  return myLength;
395  }
396 
398  void strcat(const char *src)
399  {
400  append(src);
401  }
402 
403  // protectedStrcat() will quote the string in double quotes if required and
404  // protect any enclosed double quotes or backslashes in the source. It
405  // will not escape any other characters.
406  void protectedStrcat(const char *str, bool force_quote=false);
407 
408  // fullyProtected*Strcat() is similar to protectedStrcat, except it escapes
409  // any non-printable characters. It will not escape single quotes, and if
410  // force_quote is true, it will add double-quotes. It will work with
411  // arbitrary binary data and uses the \xNN syntax to encode bytes.
412  // UT_IStream::read() is capable of loading strings encoded with this
413  // method, and these strings can also be decoded in Python. If
414  // fullyProtectedBinaryStrcat is called, this method can handle data
415  // containing null characters.
416  void fullyProtectedStrcat(const char *str, bool force_quote=false);
417  void fullyProtectedBinaryStrcat(
418  const char *str, exint size, bool force_quote=false);
419 
420  /// Append a string of a given maximum length to the current string.
421  /// Unlike the POSIX's strncat(3), we ignore any NUL bytes in the current
422  /// string and blindly append at the end of the work buffer.
424  void strncat(const char *src, exint len)
425  {
426  if (!src)
427  return;
428  append(src, ::strnlen(src, len));
429  }
430 
431  // Extract the first argument from the src and append it to the work
432  // buffer. This does NOT handle quotes properly (i.e. if the first word
433  // is quoted with spaces).
434  void strcatFirstWord(const char *src);
435 
437  int strcmp(const char *src) const
438  {
439  UT_ASSERT_P(isNullTerminated());
440  return ::strcmp(myBuffer, src);
441  }
442 
444  int strncmp(const char *src, exint n) const
445  {
446  UT_ASSERT_P(isNullTerminated());
447  return ::strncmp(myBuffer, src, n);
448  }
449 
451  char *strdup() const
452  {
453  UT_ASSERT(isNullTerminated());
454  return ::strdup(myBuffer);
455  }
456 
457  // Reset the buffer to an empty buffer.
459  void clear()
460  {
461  if (myLockCount) { UT_ASSERT(0); return; }
462  myLength = 0;
463  myBuffer[0] = '\0';
464  }
465 
467  bool isEmpty() const
468  {
469  return (myLength == 0);
470  }
472  bool isstring() const
473  {
474  return !isEmpty();
475  }
476 
477  // Write into the buffer at a specific place.
478  // This WILL expand the buffer if it is required and keep it null
479  // terminated.
481  void write(exint offset, char c)
482  {
483  UT_ASSERT(offset >= 0);
484  if (offset < 0) return;
485  if (offset >= myLength)
486  {
487  if (myLockCount) { UT_ASSERT(0); return; }
488  myLength = offset+1;
489  growBufferIfNeeded();
490  myBuffer[myLength] = '\0';
491  }
492  myBuffer[offset] = c;
493  if (c == '\0')
494  myLength = offset;
495  }
496 
497  // This does NOT write out the trailing NULL of src, but the buffer will
498  // still be null-terminated.
499  void write(exint offset, const char *src)
500  {
501  while (*src)
502  {
503  write(offset, *src);
504  src++;
505  offset++;
506  }
507  }
508 
510  {
511  write(offset, src.c_str());
512  }
513 
514  /// Load an entire file into the buffer. Returns @b false if there was an
515  /// error reading the file
516  bool readFile(const char *filename);
517 
518  // Read a line from an istream -- no matter how long the line is
519  // Returns 0 if the stream read failed or 1 otherwise
520  bool getline(std::istream &is);
521  bool getline(FILE *fp);
522 
523  // Much like getline() except that it has more features. The string itself
524  // is tokenized which the UT_WorkArgs points into.
525  // line_num is incremented for each line read.
526  // comment_chars is list of characters to treat as comments.
527  // this can be NULL if we don't want this feature.
528  // Returns false if the stream read failed.
529  bool cmdGetLine(std::istream &is, UT_WorkArgs &args, int &line_num,
530  const char *comment_chars = "#",
531  const char *separators = " \t\n\r");
532  bool cmdGetLine(UT_IStream &is, UT_WorkArgs &args, int &line_num,
533  const char *comment_chars = "#",
534  const char *separators = " \t\n\r");
535  bool cmdGetLine(FILE *fp, UT_WorkArgs &args, int &line_num,
536  const char *comment_chars = "#",
537  const char *separators = " \t\n\r");
538 
539  /// Fast integer to string conversion.
540  /// @{
541  void itoa(int64 i);
542  void utoa(uint64 i);
543  /// @}
544 
545  int sprintf(const char *fmt, ...)
547  int appendSprintf(const char *fmt, ...)
549 
550  int vsprintf(const char *fmt, va_list ap);
551 
552  /// Replace the contents of the work buffer using the same formatting as
553  /// UTformat.
554  /// Returns the size of the appended portion, in bytes.
555  template<typename... Args>
556  size_t format(const char *fmt, const Args &...args)
557  {
558  clear();
559  return appendFormat(fmt, args...);
560  }
561 
562  /// Append to the work buffer using the same formatting as UTformat.
563  /// Returns the size of the appended portion, in bytes.
564  template<typename... Args>
565  size_t appendFormat(const char *fmt, const Args &...args)
566  {
567  if (myLockCount) { UT_ASSERT(0); return 0; }
568  UT_ASSERT_P(isNullTerminated());
569 
570  using namespace UT::Format;
571  Writer w;
573  size_t nb_needed = f.format(w, fmt, {args...});
574 
575  myLength += nb_needed;
576  growBufferIfNeeded();
577 
578  // Format again, this time to fill in the buffer.
579  w.setBuffer(myBuffer + myLength - nb_needed, nb_needed);
580  f.format(w, fmt, {args...});
581 
582  myBuffer[myLength] = '\0';
583  return nb_needed;
584  }
585 
586  /// Replace the contents of the work buffer using UTformat formatting
587  /// with an implicit "{} " for each argument, giving a Python-style
588  /// print result.
589  template<typename... Args>
590  size_t print(const Args &...args)
591  {
592  clear();
593  return appendPrint(args...);
594  }
595 
596  /// Append to the work buffer using the UTformat with an implicit "{} "
597  /// format for each parameter.
598  /// Returns the size of the appended portion, in bytes.
599  template<typename... Args>
600  size_t appendPrint()
601  {
602  return 0;
603  }
604  template<typename T, typename... Args>
605  size_t appendPrint(const T &value, const Args &...args)
606  {
607  size_t newbytes;
608  newbytes = appendFormat("{}", value);
609  // NB: we might be empty when value was the empty string
610  if (!isEmpty() && last() != '\n')
611  {
612  append(' ');
613  newbytes++;
614  }
615  newbytes += appendPrint(args...);
616  return newbytes;
617  }
618 
619  // These tack stuff to the end of the buffer.
621  void append(char character)
622  {
623  if (myLockCount) { UT_ASSERT(0); return; }
624  UT_ASSERT_P(isNullTerminated());
625  myLength++;
626  growBufferIfNeeded();
627  myBuffer[myLength - 1] = character;
628  myBuffer[myLength] = '\0';
629  }
630 
631  void printMemory(int64 mem) { clear(); appendPrintMemory(mem); }
632  void appendPrintMemory(int64 mem);
633 
634  void append(exint n, char character)
635  {
636  if (myLockCount) { UT_ASSERT(0); return; }
637  UT_ASSERT_P(isNullTerminated());
638  myLength += n;
639  growBufferIfNeeded();
640  for (int i = n; i > 0; i--)
641  myBuffer[myLength - i] = character;
642  myBuffer[myLength] = '\0';
643  }
644 
645  /// Append a single Unicode code point, converted to UTF8
646  void append(utf32 cp)
647  {
649  int len = UT_Unicode::convert(cp, buf, sizeof(buf));
650  if (!len)
651  return;
652 
653  if (myLockCount) { UT_ASSERT(0); return; }
654  UT_ASSERT_P(isNullTerminated());
655  myLength += len;
656  growBufferIfNeeded();
657  ::memcpy(myBuffer + myLength - len, buf, len);
658  myBuffer[myLength] = '\0';
659  }
660 
661  void append(const char *data, exint size)
662  {
663  if (myLockCount) { UT_ASSERT(0); return; }
664  UT_ASSERT_P(data);
665  UT_ASSERT_P(isNullTerminated());
666  myLength += size;
667  growBufferIfNeeded();
668  ::memcpy(myBuffer + myLength - size, data, size);
669  myBuffer[myLength] = '\0';
670  }
671 
673  void append(const char *str)
674  {
675  if( UTisstring(str) )
676  append(str, ::strlen(str));
677  }
678 
680  void append(const UT_String &str)
681  {
682  if (str.isstring())
683  append((const char *)str);
684  }
685 
687  void append(const UT_StringRef &str)
688  {
689  if (str.isstring())
690  append(str.buffer(), str.length());
691  }
692 
693  void append(const UT_StringArray &strs, const UT_StringRef &sep)
694  {
695  for (exint i = 0; i < strs.entries(); i++)
696  {
697  append(strs(i));
698  if (i+1 < strs.entries())
699  append(sep);
700  }
701  }
702 
704  void append(const UT_WorkBuffer &wb)
705  {
706  append( wb.buffer(), wb.length() );
707  }
708 
710  UT_WorkBuffer &operator+=(const char *str)
711  {
712  append(str);
713  return *this;
714  }
715 
718  {
719  append(str);
720  return *this;
721  }
722 
725  {
726  append(wb);
727  return *this;
728  }
729 
732  {
733  append(str);
734  return *this;
735  }
736 
739  {
740  append(str);
741  return *this;
742  }
743 
744  void prepend(char character)
745  {
746  if (myLockCount) { UT_ASSERT(0); return; }
747  UT_ASSERT_P(isNullTerminated());
748  myLength++;
749  growBufferIfNeeded();
750  ::memmove(myBuffer+1, myBuffer, myLength);
751  myBuffer[0] = character;
752  }
753  void prepend(const char *data, exint size)
754  {
755  if (myLockCount) { UT_ASSERT(0); return; }
756  UT_ASSERT_P(data);
757  UT_ASSERT_P(isNullTerminated());
758  myLength += size;
759  growBufferIfNeeded();
760  ::memmove(myBuffer+size, myBuffer, myLength+1 - size);
761  ::memcpy(myBuffer, data, size);
762  }
764  void prepend(const char *str)
765  {
766  UT_ASSERT_P(str);
767  prepend(str, ::strlen(str));
768  }
769 
771  void prepend(const UT_String &str)
772  {
773  if (str.isstring())
774  prepend((const char *)str);
775  }
777  void prepend(const UT_StringRef &str)
778  {
779  if (str)
780  prepend(str.buffer(), str.length());
781  }
782 
783  /// Insert @c slen characters from @c str, at location @c pos. If @c pos
784  /// exceeds the current length, the position is truncated and to an append.
785  void insert(exint pos, const char* str, exint slen);
786 
787  /// Erase @c len characters from location @c pos in the string.
788  void erase(exint pos, exint len);
789 
790  void rewind() { backup(myLength); }
791 
792  /// Rewind by the given length
794  void backup(exint by_length)
795  {
796  if (myLockCount) { UT_ASSERT(0); return; }
797  UT_ASSERT_P(isNullTerminated());
798  UT_ASSERT_P(by_length >= 0);
799  myLength -= by_length;
800  UT_ASSERT(myLength >= 0);
801  myBuffer[myLength] = '\0';
802  }
803 
804  /// Truncate the buffer to the specified length. Truncating to 0 is
805  /// identical to clear().
807  void truncate(exint new_length)
808  {
809  if (new_length >= myLength)
810  {
811  UT_ASSERT(0 && "Truncating beyond buffer extent");
812  return;
813  }
814  backup(myLength-new_length);
815  }
816 
817  // Delete characters off the end of the string until we hit the
818  // requested character.
819  void backupTo(char c)
820  {
821  if (myLockCount) { UT_ASSERT(0); return; }
822  UT_ASSERT_P(isNullTerminated());
823  while( myLength > 0 && myBuffer[myLength-1] != c )
824  myLength--;
825  myBuffer[myLength] = '\0';
826  }
827 
828  void advance(exint by_length)
829  {
830  if (myLockCount) { UT_ASSERT(0); return; }
831  UT_ASSERT_P(isNullTerminated());
832  UT_ASSERT_P(by_length >= 0);
833  myLength -= by_length;
834  UT_ASSERT(myLength >= 0);
835  for (int i=0; i<myLength; i++)
836  myBuffer[i] = myBuffer[by_length+i];
837  myBuffer[myLength] = '\0';
838  }
839 
840  // Finds the 'occurance_number'-th occurance of char c in the string.
842  const char *findChar(char c, int occurance_number = 1) const
843  {
844  return findCharFrom(c, 0, occurance_number);
845  }
846  // Same as findChar, but searches from the end of the string.
847  const char *lastChar(char c, int occurance_number = 1) const
848  {
849  if (myLockCount) { UT_ASSERT(0); return NULL; }
850 
851  UT_ASSERT_P(isNullTerminated());
852 
853  for (exint i = myLength; i --> 0;)
854  {
855  if(c == myBuffer[i])
856  {
857  occurance_number--;
858  if(occurance_number <= 0)
859  {
860  return (myBuffer + i);
861  }
862  }
863  }
864 
865  return NULL;
866  }
867  // Same and findChar, bu searches from given position in the string.
868  const char *findCharFrom(char c, exint position,
869  int occurance_number = 1) const
870  {
871  if (myLockCount) { UT_ASSERT(0); return NULL; }
872 
873  UT_ASSERT_P(isNullTerminated());
874 
875  if (position < 0 || position >= myLength) { return NULL; }
876 
877  for(exint i = position; i < myLength; ++i)
878  {
879  if(c == myBuffer[i])
880  {
881  occurance_number--;
882  if(occurance_number <= 0)
883  {
884  return (myBuffer + i);
885  }
886  }
887  }
888 
889  return NULL;
890  }
891 
892  /// Adopt a string from an outside source. The passed string is now
893  /// owned by the workbuffer.
894  void adoptFromMalloc(char* data, exint length);
895 
896  void adoptFromCharArray(UT_Array<char>& data);
897 
898  /// Count the occurrences of the text in the current string
899  exint count(const char *needle) const;
900 
901  // Get the next token pointed at by string and advance string past the
902  // token. Returns whether or not a token was retrieved successfully.
903  // Note that string is modified!!!
904  bool getNextToken(const char *(&string),
905  const UT_String separators = " \t\n");
906 
907  // Harden the contents of the buffer into a UT_String.
908  void copyIntoString(UT_String &str) const;
909 
910  // Copy the contents into a fixed length buffer.
911  // TODO: Get rid of this method, since it encourages fixed-length buffers.
912  void copyIntoString(char *str, exint max_length) const;
913 
914  // Steal the contents of this work buffer into the string.
915  void stealIntoString(UT_String &str);
916 
917  // Steal the contents of this work buffer into the string.
918  // NB: Please use UT_StringHolder move constructor/assignment instead of
919  // this function.
920  SYS_DEPRECATED(19.0)
921  void stealIntoStringHolder(UT_StringHolder &str);
922 
923  // Return a string containing the contents of this work buffer, preserving
924  // any null characters in it.
925  std::string toStdString() const
926  { return std::string(buffer(), length()); }
927 
928  // Strips the characters after comment_char from the buffer. This method
929  // goes to some effort to enusre that the comment_char is not preceded by
930  // a backslash or is not in a quoted string. Returns true if it found a
931  // comment and modified the buffer, and false otherwise.
932  bool stripComments(char comment_char = '#');
933 
934  /// Strips out all characters found in 'chars'. The string length will be
935  /// reduced by the number of characters removed. The number of characters
936  /// removed is returned.
937  int strip(const char *chars);
938 
939  /// Remove trailing whitespace lines
940  void removeTrailingSpaceLines();
941 
942  /// Remove trailing whitespace, return true if whitespace was removed.
943  bool removeTrailingSpace();
944 
945  /// Remove leading white space, return true if whitespace was removed.
946  bool removeLeadingSpace();
947 
948  /// Remove trailing digits, return true if some were removed.
949  bool removeTrailingDigits();
950 
951  /// Convert string to lower case
952  void lower();
953 
954  /// Convert string to upper case
955  void upper();
956 
957  /// Create a string of tabs & spaces which represents the given indent
958  void makeIndentString(exint indent, exint tabstop=8);
959 
960  /// Remove the first n characters.
962  {
963  if (n < myLength)
964  {
965  myLength -= n;
966  ::memmove(myBuffer, myBuffer + n, myLength);
967  }
968  else
969  myLength = 0;
970 
971  myBuffer[myLength] = '\0';
972  }
973 
974  /// Replaces up to 'count' occurrences of 'find' with 'replacement',
975  /// and returns the number of substitutions that occurred.
976  /// If 'count' <= 0, all occurrences will be replaced.
977  int substitute(const char *find, const char *replacement, int count);
978 
979  /// Convenience version of substitute() for all or single occurrence.
980  int substitute(const char *find, const char *replacement, bool all = true)
981  {
982  return substitute(find, replacement, !all ? 1 : -1);
983  }
984 
985  /// Given from_name which is assumed to fit from_pattern, any assigned
986  /// wildcards are subsitituted in to_pattern, writing the result to this.
987  /// The wildcards may also be indexed. For example:
988  ///
989  /// to_pattern = b* from_name = apple from_pattern = a*le
990  /// ---> this = bpp
991  ///
992  /// to_pattern = *(1)_to_*(0) from_name = a_to_b from_pattern = *_to_*
993  /// ---> this = b_to_a
994  bool subPatterns(
995  const char *to_pattern,
996  const char *from_name,
997  const char *from_pattern);
998 
999  /// UTF-16 / UTF-8 conversions.
1000 
1001  /// Set the work buffer to contain the UTF-8 representation of the incoming UTF-16 string.
1002  /// The UTF-16 string is assumed to be little-endian, unless prefixed with BOM that
1003  /// indicates endianness.
1004  /// The incoming string should be zero-word terminated.
1005  void setFromUTF16(const utf16 *str);
1006 
1007  /// Set the work buffer to contain a UTF-16LE (little endian) representation of the
1008  /// incoming UTF-8 string.
1009  /// The work buffer will be zero-word terminated.
1010  void setAsUTF16(const utf8 *str);
1011 
1012  /// Once set as UTF16-LE, get it back as such a pointer.
1014  const utf16* castToUTF16() const { return (const utf16*) myBuffer; }
1015 
1016  /// Lock buffer for `len` utf-16 characters.
1018  {
1019  return (utf16*)lock(offset, len*sizeof(utf16));
1020  }
1021 
1022  void swap(UT_WorkBuffer &other)
1023  {
1024  // Warn if we're about to swap locked buffers.
1025  UT_ASSERT(myLockCount==0);
1026 
1027  bool this_stack = (myBuffer == myStackBuffer);
1028  bool other_stack = (other.myBuffer == other.myStackBuffer);
1029 
1030  if (this_stack && other_stack)
1031  {
1032  // If both buffers are using the stack buffer, just swap the
1033  // buffer contents.
1034  size_t max_size = (myLength > other.myLength) ? myLength
1035  : other.myLength;
1036 
1037  UTswap(myStackBuffer, other.myStackBuffer, max_size + 1);
1038  }
1039  else if (this_stack && !other_stack)
1040  {
1041  ::memcpy(other.myStackBuffer, myStackBuffer, myLength + 1);
1042  myBuffer = other.myBuffer;
1043  other.myBuffer = other.myStackBuffer;
1044  }
1045  else if (!this_stack && other_stack)
1046  {
1047  ::memcpy(myStackBuffer, other.myStackBuffer, other.myLength + 1);
1048  other.myBuffer = myBuffer;
1049  myBuffer = myStackBuffer;
1050  }
1051  else
1052  UTswap(myBuffer, other.myBuffer);
1053  UTswap(myAllocatedSize, other.myAllocatedSize);
1054  UTswap(myLength, other.myLength);
1055  UTswap(myLockCount, other.myLockCount);
1056  }
1057 public:
1058  /// Iterator compatibility.
1060  const char *begin() const { return myBuffer; }
1062  const char *end() const { return myBuffer + myLength; }
1063 
1064 private:
1065 
1066  struct do_widen {};
1067  struct do_narrow {};
1068  /// Private constructors to allow for the Return Value Optimization
1069  /// @{
1071  UT_WorkBuffer(do_widen, const utf8 *str)
1072  : myBuffer(myStackBuffer)
1073  , myAllocatedSize(UT_INITIAL_BUFFER_SIZE)
1074  , myLength(0)
1075  , myLockCount(0)
1076  {
1077  setAsUTF16(str);
1078  }
1080  UT_WorkBuffer(do_narrow, const utf16 *str)
1081  : myBuffer(myStackBuffer)
1082  , myAllocatedSize(UT_INITIAL_BUFFER_SIZE)
1083  , myLength(0)
1084  , myLockCount(0)
1085  {
1086  setFromUTF16(str);
1087  }
1088  /// @}
1089 
1090  friend UT_API std::ostream &operator<<(std::ostream &os,
1091  const UT_WorkBuffer &buffer);
1092 
1093 private: // Data:
1094 
1095  char *myBuffer; // Do not make an access method to the data
1096  exint myAllocatedSize = UT_INITIAL_BUFFER_SIZE;
1097  exint myLength = 0;
1098  int myLockCount = 0;
1099  char myStackBuffer[UT_INITIAL_BUFFER_SIZE];
1100 };
1101 
1102 
1103 static inline size_t
1104 format(char *buffer, size_t buffer_size, const UT_WorkBuffer &v)
1105 {
1106  if (!buffer)
1107  return v.length();
1108  else
1109  {
1110  size_t len = std::min(size_t(v.length()), buffer_size);
1111  ::memcpy(buffer, v.buffer(), len);
1112  return len;
1113  }
1114 }
1115 
1116 template <typename T>
1117 void
1118 UTstringJoin(UT_StringHolder& out, const UT_StringHolder& sep, const T& items)
1119 {
1120  //TODO: check T is iterable once UT can use C++17. Its far to ugly to
1121  //bother doing this in C++14.
1122  UT_WorkBuffer wbuf;
1123  for (auto&& item : items)
1124  {
1125  if (!wbuf.isEmpty())
1126  wbuf.append(sep);
1127  wbuf.appendFormat("{}", item);
1128  }
1129  out = std::move(wbuf);
1130 }
1131 
1132 #endif
size_t print(const Args &...args)
vbool4 insert(const vbool4 &a, bool val)
Helper: substitute val for a[i].
Definition: simd.h:3414
std::basic_string< Char > vsprintf(const S &format, basic_format_args< basic_printf_context_t< type_identity_t< Char >>> args)
Definition: printf.h:634
SYS_FORCE_INLINE void append(const UT_StringRef &str)
std::string upper(string_view a)
Return an all-upper case version of a (locale-independent).
Definition: strutil.h:349
string_view OIIO_API strip(string_view str, string_view chars=string_view())
GT_API const UT_StringHolder filename
SYS_FORCE_INLINE exint length() const
void UTstringJoin(UT_StringHolder &out, const UT_StringHolder &sep, const T &items)
#define SYS_DEPRECATED(__V__)
SYS_FORCE_INLINE void strcpy(const UT_StringRef &src)
void write(exint offset, const UT_StringHolder &src)
SYS_FORCE_INLINE exint getAllocatedSize() const
SYS_FORCE_INLINE char * operator*() const
void UTswap(T &a, T &b)
Definition: UT_Swap.h:35
void
Definition: png.h:1083
SYS_FORCE_INLINE bool operator==(const UT_String &str) const
SYS_FORCE_INLINE UT_WorkBuffer(const char *data, exint size)
Definition: UT_WorkBuffer.h:84
SYS_FORCE_INLINE void strncat(const char *src, exint len)
unsigned short utf16
Definition: SYS_Types.h:56
SYS_FORCE_INLINE UT_WorkBuffer(const UT_StringRef &str)
const GLfloat * c
Definition: glew.h:16631
SYS_FORCE_INLINE UT_WorkBuffer(const char *str)
Definition: UT_WorkBuffer.h:76
int64 exint
Definition: SYS_Types.h:125
void append(exint n, char character)
SYS_FORCE_INLINE const char * buffer() const
SYS_FORCE_INLINE void strcpy(const char *src)
SYS_FORCE_INLINE void release(bool recompute_length=false)
You can manually release the buffer.
#define UT_API
Definition: UT_API.h:14
const char * findCharFrom(char c, exint position, int occurance_number=1) const
void append(const char *data, exint size)
const char * lastChar(char c, int occurance_number=1) const
size_t appendPrint()
GLenum src
Definition: glcorearb.h:1792
unsigned long long uint64
Definition: SYS_Types.h:117
SYS_FORCE_INLINE char last() const
utf16 * lockUTF16(exint offset=0, exint len=0)
Lock buffer for len utf-16 characters.
SYS_FORCE_INLINE void relock()
If you've manually released the lock, you can relock the buffer.
GLuint buffer
Definition: glcorearb.h:659
SYS_FORCE_INLINE void append(const UT_String &str)
SYS_FORCE_INLINE const char * data() const
void swap(UT_WorkBuffer &other)
void eraseHead(exint n)
Remove the first n characters.
SYS_FORCE_INLINE void append(const UT_WorkBuffer &wb)
GLsizeiptr size
Definition: glcorearb.h:663
GLubyte GLubyte GLubyte GLubyte w
Definition: glcorearb.h:856
SYS_FORCE_INLINE bool operator==(const UT_WorkBuffer &buf) const
std::ostream & operator<<(std::ostream &ostr, const DataType &a)
Definition: DataType.h:133
SYS_FORCE_INLINE UT_WorkBuffer & operator+=(const UT_WorkBuffer &wb)
SYS_FORCE_INLINE void append(const char *str)
Format
Definition: oidn.hpp:16
static const utf8 * convert(const utf8 *str, utf32 &cp)
size_t appendFormat(const char *fmt, const Args &...args)
#define UT_INITIAL_BUFFER_SIZE
Definition: UT_WorkBuffer.h:54
SYS_FORCE_INLINE void strcpy(const UT_WorkBuffer &src)
ImageBuf OIIO_API min(Image_or_Const A, Image_or_Const B, ROI roi={}, int nthreads=0)
exint length() const
SYS_FORCE_INLINE const char * buffer() const
SYS_FORCE_INLINE const char * end() const
FMT_CONSTEXPR bool find(Ptr first, Ptr last, T value, Ptr &out)
Definition: format.h:2929
GLenum GLsizei len
Definition: glew.h:7782
SYS_FORCE_INLINE UT_WorkBuffer & operator=(const UT_WorkBuffer &other)
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:170
unsigned int utf32
Definition: SYS_Types.h:58
void printMemory(int64 mem)
#define SYS_PRINTF_CHECK_ATTRIBUTE(string_index, first_to_check)
Definition: SYS_Types.h:447
const GLdouble * v
Definition: glcorearb.h:836
SYS_FORCE_INLINE const char * begin() const
Iterator compatibility.
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
GLsizei const GLchar *const * string
Definition: glcorearb.h:813
SYS_FORCE_INLINE void prepend(const UT_StringRef &str)
void prepend(char character)
SYS_FORCE_INLINE bool isEmpty() const
SYS_FORCE_INLINE char * strdup() const
long long int64
Definition: SYS_Types.h:116
static SYS_FORCE_INLINE UT_WorkBuffer widen(const utf8 *str)
SYS_FORCE_INLINE const char * findChar(char c, int occurance_number=1) const
SYS_FORCE_INLINE UT_WorkBuffer & operator+=(const UT_String &str)
SYS_FORCE_INLINE exint strlen() const
SYS_FORCE_INLINE bool operator!=(const UT_WorkBuffer &buf) const
SYS_FORCE_INLINE UT_WorkBuffer & operator=(const std::string &str)
SYS_FORCE_INLINE const char * c_str() const
SYS_FORCE_INLINE void strcpy(const UT_String &src)
#define UT_UTF8_MAX_ENCODING_LEN
Definition: UT_Unicode.h:19
GLint GLsizei count
Definition: glcorearb.h:404
size_t SYSstrlcpy(char *dest, const char *src, size_t size)
Definition: SYS_String.h:180
checked_ptr< typename Container::value_type > reserve(std::back_insert_iterator< Container > it, size_t n)
Definition: format.h:373
SYS_FORCE_INLINE char * string() const
SYS_FORCE_INLINE void prepend(const UT_String &str)
SYS_FORCE_INLINE char operator()(exint idx) const
SYS_FORCE_INLINE UT_WorkBuffer(const UT_WorkBuffer &other)
SYS_FORCE_INLINE void strcat(const char *src)
int substitute(const char *find, const char *replacement, bool all=true)
Convenience version of substitute() for all or single occurrence.
SYS_FORCE_INLINE void truncate(exint new_length)
SYS_FORCE_INLINE const utf16 * castToUTF16() const
Once set as UTF16-LE, get it back as such a pointer.
GLdouble n
Definition: glcorearb.h:2007
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition: glcorearb.h:2539
exint entries() const
Alias of size(). size() is preferred.
Definition: UT_Array.h:481
GLboolean * data
Definition: glcorearb.h:130
void append(utf32 cp)
Append a single Unicode code point, converted to UTF8.
SYS_FORCE_INLINE int strcmp(const char *src) const
GLuint GLsizei GLsizei * length
Definition: glcorearb.h:794
GLint GLint GLsizei GLint GLenum format
Definition: glcorearb.h:107
static SYS_FORCE_INLINE UT_WorkBuffer narrow(const utf16 *str)
SYS_FORCE_INLINE UT_WorkBuffer & operator=(const char *str)
SYS_FORCE_INLINE bool isstring() const
SYS_FORCE_INLINE void backup(exint by_length)
Rewind by the given length.
void write(exint offset, const char *src)
SYS_FORCE_INLINE UT_WorkBuffer(const UT_String &str)
Definition: UT_WorkBuffer.h:92
SYS_FORCE_INLINE AutoLock(UT_WorkBuffer &buf)
void backupTo(char c)
size_t appendPrint(const T &value, const Args &...args)
SYS_FORCE_INLINE bool operator==(const char *str) const
std::string lower(string_view a)
Return an all-upper case version of a (locale-independent).
Definition: strutil.h:342
void advance(exint by_length)
SYS_FORCE_INLINE bool operator!=(const char *str) const
SYS_FORCE_INLINE UT_WorkBuffer & operator+=(const char *str)
SYS_FORCE_INLINE bool UTisstring(const char *s)
SYS_FORCE_INLINE void append(char character)
Type-safe formatting, modeled on the Python str.format function.
void prepend(const char *data, exint size)
GLsizei const GLfloat * value
Definition: glcorearb.h:823
GLfloat f
Definition: glcorearb.h:1925
virtual bool readFile(GA_Detail &g, const char *filename, const GA_LoadOptions *opts, UT_StringArray *errors) const
Class which defines an I/O interface to save/load geometry.
**If you just want to fire and args
Definition: thread.h:615
void append(const UT_StringArray &strs, const UT_StringRef &sep)
bool isstring() const
Definition: UT_String.h:681
#define UT_ASSERT(ZZ)
Definition: UT_Assert.h:171
SYS_FORCE_INLINE ~UT_WorkBuffer()
SYS_FORCE_INLINE void clear()
char utf8
Definition: SYS_Types.h:52
GLintptr offset
Definition: glcorearb.h:664
SYS_FORCE_INLINE ~AutoLock()
#define const
Definition: zconf.h:214
SYS_FORCE_INLINE char first() const
void write(T &out, bool v)
Definition: ImfXdr.h:332
SYS_FORCE_INLINE bool operator!=(const UT_String &str) const
SYS_FORCE_INLINE UT_WorkBuffer & operator+=(const UT_StringRef &str)
bool all(const vbool4 &v)
Definition: simd.h:3445
SYS_FORCE_INLINE UT_WorkBuffer & operator+=(const std::string &str)
SYS_FORCE_INLINE void write(exint offset, char c)
SYS_FORCE_INLINE bool isstring() const
Definition: format.h:3611
SYS_FORCE_INLINE void prepend(const char *str)
SYS_FORCE_INLINE void strncpy(const char *src, exint maxlen)
SYS_FORCE_INLINE int strncmp(const char *src, exint n) const
std::basic_string< Char > sprintf(const S &format, const Args &...args)
Definition: printf.h:653
SYS_FORCE_INLINE UT_WorkBuffer()
Definition: UT_WorkBuffer.h:68