HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_WorkBuffer.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: UT_WorkBuffer.h ( Utility Library, C++ )
7  *
8  * COMMENTS:
9  * A growable string buffer that can be written into. A UT_String
10  * can be created with the contents of this buffer by calling
11  * copyIntoString().
12  *
13  * It's important that a non-const version of the raw buffer is not
14  * accessible since users could write past the end of the allocated
15  * buffer. Also note that the buffer location can change as it grows,
16  * so don't keep pointers to the buffer around.
17  *
18  * Most of the time, you want to allocate an object of this class on
19  * the stack and not on the heap.
20  *
21  * The buffer is kept null terminated by default. Functions exist
22  * to verify this. Note that the "length" of the buffer is the
23  * same as strlen - ie: it ignores the null termination!!!
24  */
25 
26 #ifndef __UT_WorkBuffer_h__
27 #define __UT_WorkBuffer_h__
28 
29 #include "UT_API.h"
30 
31 #include "UT_Assert.h"
32 #include "UT_Format.h"
33 #include "UT_NonCopyable.h"
34 #include "UT_String.h"
35 #include "UT_StringArray.h"
36 #include "UT_StringHolder.h"
37 #include "UT_Swap.h"
38 #include "UT_Unicode.h"
39 
40 #include <SYS/SYS_Inline.h>
41 #include <SYS/SYS_Types.h>
42 
43 #include <iosfwd>
44 
45 #include <stdlib.h>
46 #include <stdio.h>
47 #include <string.h>
48 
49 
50 // The default page size on most systems is 4K. We choose a default
51 // buffer size less than half of that in the hopes that if we have
52 // functions with 2 work buffers or additional variables on the stack that we
53 // may not have to allocate multiple stack pages.
54 #define UT_INITIAL_BUFFER_SIZE 2000
55 
56 class UT_WorkArgs;
57 class UT_IStream;
58 
60 {
61 public:
62  typedef char value_type;
63 
66  : myBuffer(myStackBuffer)
67  {
68  // Default termination.
69  myBuffer[0] = '\0';
70  }
71 
73  explicit UT_WorkBuffer(const char *str)
74  : myBuffer(myStackBuffer)
75  {
76  myBuffer[0] = '\0';
77  append(str);
78  }
79 
81  explicit UT_WorkBuffer(const char *data, exint size)
82  : myBuffer(myStackBuffer)
83  {
84  myBuffer[0] = '\0';
85  append(data, size);
86  }
87 
89  explicit UT_WorkBuffer(const UT_String &str)
90  : myBuffer(myStackBuffer)
91  {
92  myBuffer[0] = '\0';
93  append(str);
94  }
95 
97  explicit UT_WorkBuffer(const UT_StringRef &str)
98  : myBuffer(myStackBuffer)
99  {
100  myBuffer[0] = '\0';
101  append(str);
102  }
103 
106  : myBuffer(myStackBuffer)
107  {
108  myBuffer[0] = '\0';
109  append(other);
110  }
111 
114  {
115  if (myBuffer != myStackBuffer)
116  {
117  UT_ASSERT(myBuffer);
118  ::free(myBuffer);
119  }
120  }
121 
122  /// Create a work buffer to contain a UTF-16LE (little endian)
123  /// representation of the incoming UTF-8 string.
124  /// The work buffer will be zero-word terminated.
126  static UT_WorkBuffer
127  widen(const utf8 *str)
128  {
129  return UT_WorkBuffer(do_widen(), str);
130  }
131 
132  /// Create a work buffer to contain the UTF-8 representation of the
133  /// incoming UTF-16 string. The UTF-16 string is assumed to be
134  /// little-endian, unless prefixed with BOM that indicates endianness.
135  /// The incoming string should be zero-word terminated.
137  static UT_WorkBuffer
138  narrow(const utf16 *str)
139  {
140  return UT_WorkBuffer(do_narrow(), str);
141  }
142 
143  // It's important that there is no non-const access method to the buffer.
144  // Also note that the pointer to the buffer can change if the buffer
145  // grows.
147  const char *buffer() const { return myBuffer; }
148  /// Alias for the common string access across all string types (including
149  /// standard library)
151  const char *data() const { return buffer(); }
152 
153  // Having said that, if you need a non-const pointer you must lock
154  // the string. This prohibits ANY update which changes the myLength
155  // variable (and thus potentially a realloc)
156  // You must release the buffer before any such changes.
157  // The work buffer continues to own the memory and will free it when
158  // it goes out of scope so don't think this is the same as a "steal"
159  // in UT_String.
160  // Currently, to ensure people couple their locks & releases,
161  // it asserts there is no unaccounted locks on death. This is so
162  // people who think it is steal find out otherwise.
163  // Offset is where in the string to get the pointer from.
164  // This is only to be used when absolutely necessary.
165  // When releasing, if you have a string buffer, and you have modified the
166  // length, you should set the recompute_length flag to 1. This will adjust
167  // the internal length variable so that further concatenations will work
168  // properly.
169  // The reserve_bytes parameter tells the lock to ensure that there are at
170  // least that many bytes in the locked buffer.
171  // NOTE: Unlike other UT_WorkBuffer functions, it is the user's
172  // responsibility to maintain a NUL termination guarantee when manipulating
173  // the raw buffer.
174  char *lock(exint offset = 0, exint reserve_bytes=0);
175  void release(bool recompute_length = false);
176  void releaseSetLength(exint new_length);
177 
179  exint getAllocatedSize() const { return myAllocatedSize; }
180  int64 getMemoryUsage(bool inclusive) const;
181 
182  /// Class to handle auto-locking of the UT_WorkBuffer. This is not related
183  /// to multi-threading, but to the lock/release methods above.
184  ///
185  /// You should never append data to a locked buffer.
186  class AutoLock
187  {
188  public:
191  : myBuffer(buf)
192  {
193  myString = myBuffer.lock();
194  }
197  {
198  release();
199  }
200  /// @{
201  /// Get access to the non-const buffer. This may return nullptr if the
202  /// lock has been released.
204  char *operator*() const { return myString; }
206  char *string() const { return myString; }
207  /// @}
208 
209  /// You can manually release the buffer
211  void release(bool recompute_length=false)
212  {
213  if (myString)
214  {
215  myBuffer.release(recompute_length);
216  myString = nullptr;
217  }
218  }
219  /// If you've manually released the lock, you can relock the buffer
221  void relock()
222  {
223  UT_ASSERT(!myString);
224  myString = myBuffer.lock();
225  }
226  private:
227  UT_WorkBuffer &myBuffer;
228  char *myString;
229  };
230 
231  void reserve(exint bytes=0);
232 
233  // This is a read only operator. We are avoiding the writeable
234  // versions as they lead to problems when people do a:
235  // foo[pastend] = foo(start)
236  // causing an implicit realloc.
238  char operator()(exint idx) const
239  {
240  // We allow an index at myLength as if we have a null
241  // terminated buffer that is the null termination.
242  UT_ASSERT_P(idx >= 0 && idx <= myLength);
243  return myBuffer[idx];
244  }
245 
246  // Returns last character. Only valid if !isEmpty()
248  char first() const
249  {
250  UT_ASSERT_P(myLength > 0);
251  return myBuffer[0];
252  }
253  // Returns last character. Only valid if !isEmpty()
255  char last() const
256  {
257  UT_ASSERT_P(myLength > 0);
258  return myBuffer[myLength - 1];
259  }
260 
261  // This should always be true. It's here to act as a sanity function.
262  int isNullTerminated() const;
263 
266  {
267  strcpy(other);
268  return *this;
269  }
271  UT_WorkBuffer &operator=(const char *str)
272  {
273  clear();
274  append(str);
275  return *this;
276  }
279  {
280  clear();
281  append(str.c_str(), str.length());
282  return *this;
283  }
284 
285  /// Comparison operator. Null strings are considered as empty strings.
286  /// @{
288  bool operator==(const char *str) const
289  {
290  if (!str)
291  return isEmpty();
292  return (::strcmp(str, myBuffer) == 0);
293  }
295  bool operator==(const UT_String &str) const
296  {
297  if (!(const char *)str)
298  return isEmpty();
299  return (::strcmp(str, myBuffer) == 0);
300  }
302  bool operator==(const UT_WorkBuffer &buf) const
303  {
304  if (buf.isEmpty())
305  return isEmpty();
306  if (length() != buf.length())
307  return false;
308  return (::memcmp(myBuffer, buf.myBuffer, myLength) == 0);
309  }
311  bool operator!=(const char *str) const
312  {
313  return !(*this == str);
314  }
316  bool operator!=(const UT_String &str) const
317  {
318  return !(*this == str);
319  }
321  bool operator!=(const UT_WorkBuffer &buf) const
322  {
323  return !(*this == buf);
324  }
325  /// @}
326 
327 private:
328  // Reallocate the buffer until the allocated size is >= the length. This
329  // private method needs to come first so it can be inlined.
330  void growBufferIfNeeded()
331  {
332  // Using a while loop instead of computing an accurate size the
333  // first time is slower, but most of the time the loop will execute
334  // at most once.
335  // We need to use myLength+1 as we need room for the null.
336  while (myLength+1 > myAllocatedSize) // false most of the time
337  reserve(myAllocatedSize * 2);
338  }
339 
340 public:
341  // These are standard string operators people tend to use:
343  void strcpy(const char *src)
344  {
345  clear();
346  append(src);
347  }
349  void strcpy(const UT_String &src)
350  {
351  clear();
352  append(src);
353  }
355  void strcpy(const UT_StringRef &src)
356  {
357  clear();
358  append(src);
359  }
361  void strcpy(const UT_WorkBuffer &src)
362  {
363  clear();
364  append(src);
365  }
366 
367  // NOTE: unlike strncpy(), maxlen does not include the null terminator.
369  void strncpy(const char *src, exint maxlen)
370  {
371  clear();
372  // Ensure we have enough room:
373  myLength = maxlen+1;
374  growBufferIfNeeded();
375  myLength = 0;
376  SYSstrlcpy(myBuffer, src, maxlen+1);
377  myLength = ::strlen(myBuffer);
378  }
379 
380  // Note we can't just return myLength as there may be embedded NULLs.
382  exint strlen() const
383  {
384  UT_ASSERT_P(isNullTerminated());
385  return ::strlen(myBuffer);
386  }
387 
389  exint length() const
390  {
391  return myLength;
392  }
393 
395  void strcat(const char *src)
396  {
397  append(src);
398  }
399 
400  // protectedStrcat() will quote the string in double quotes if required and
401  // protect any enclosed double quotes or backslashes in the source. It
402  // will not escape any other characters.
403  void protectedStrcat(const char *str, bool force_quote=false);
404 
405  // fullyProtected*Strcat() is similar to protectedStrcat, except it escapes
406  // any non-printable characters. It will not escape single quotes, and if
407  // force_quote is true, it will add double-quotes. It will work with
408  // arbitrary binary data and uses the \xNN syntax to encode bytes.
409  // UT_IStream::read() is capable of loading strings encoded with this
410  // method, and these strings can also be decoded in Python. If
411  // fullyProtectedBinaryStrcat is called, this method can handle data
412  // containing null characters.
413  void fullyProtectedStrcat(const char *str, bool force_quote=false);
414  void fullyProtectedBinaryStrcat(
415  const char *str, exint size, bool force_quote=false);
416 
417  /// Append a string of a given maximum length to the current string.
418  /// Unlike the POSIX's strncat(3), we ignore any NUL bytes in the current
419  /// string and blindly append at the end of the work buffer.
421  void strncat(const char *src, exint len)
422  {
423  if (!src)
424  return;
425  append(src, ::strnlen(src, len));
426  }
427 
428  // Extract the first argument from the src and append it to the work
429  // buffer. This does NOT handle quotes properly (i.e. if the first word
430  // is quoted with spaces).
431  void strcatFirstWord(const char *src);
432 
434  int strcmp(const char *src) const
435  {
436  UT_ASSERT_P(isNullTerminated());
437  return ::strcmp(myBuffer, src);
438  }
439 
441  int strncmp(const char *src, exint n) const
442  {
443  UT_ASSERT_P(isNullTerminated());
444  return ::strncmp(myBuffer, src, n);
445  }
446 
448  char *strdup() const
449  {
450  UT_ASSERT(isNullTerminated());
451  return ::strdup(myBuffer);
452  }
453 
454  // Reset the buffer to an empty buffer.
456  void clear()
457  {
458  if (myLockCount) { UT_ASSERT(0); return; }
459  myLength = 0;
460  myBuffer[0] = '\0';
461  }
462 
464  bool isEmpty() const
465  {
466  return (myLength == 0);
467  }
469  bool isstring() const
470  {
471  return !isEmpty();
472  }
473 
474  // Write into the buffer at a specific place.
475  // This WILL expand the buffer if it is required and keep it null
476  // terminated.
478  void write(exint offset, char c)
479  {
480  UT_ASSERT(offset >= 0);
481  if (offset < 0) return;
482  if (offset >= myLength)
483  {
484  if (myLockCount) { UT_ASSERT(0); return; }
485  myLength = offset+1;
486  growBufferIfNeeded();
487  myBuffer[myLength] = '\0';
488  }
489  myBuffer[offset] = c;
490  if (c == '\0')
491  myLength = offset;
492  }
493 
494  // This does NOT write out the trailing NULL of src, but the buffer will
495  // still be null-terminated.
496  void write(exint offset, const char *src)
497  {
498  while (*src)
499  {
500  write(offset, *src);
501  src++;
502  offset++;
503  }
504  }
505 
507  {
508  write(offset, src.c_str());
509  }
510 
511  /// Load an entire file into the buffer. Returns @b false if there was an
512  /// error reading the file
513  bool readFile(const char *filename);
514 
515  // Read a line from an istream -- no matter how long the line is
516  // Returns 0 if the stream read failed or 1 otherwise
517  bool getline(std::istream &is);
518  bool getline(FILE *fp);
519 
520  // Much like getline() except that it has more features. The string itself
521  // is tokenized which the UT_WorkArgs points into.
522  // line_num is incremented for each line read.
523  // comment_chars is list of characters to treat as comments.
524  // this can be NULL if we don't want this feature.
525  // Returns false if the stream read failed.
526  bool cmdGetLine(std::istream &is, UT_WorkArgs &args, int &line_num,
527  const char *comment_chars = "#",
528  const char *separators = " \t\n\r");
529  bool cmdGetLine(UT_IStream &is, UT_WorkArgs &args, int &line_num,
530  const char *comment_chars = "#",
531  const char *separators = " \t\n\r");
532  bool cmdGetLine(FILE *fp, UT_WorkArgs &args, int &line_num,
533  const char *comment_chars = "#",
534  const char *separators = " \t\n\r");
535 
536  int sprintf(const char *fmt, ...)
538  int appendSprintf(const char *fmt, ...)
540 
541  int vsprintf(const char *fmt, va_list ap);
542 
543  /// Replace the contents of the work buffer using the same formatting as
544  /// UTformat.
545  /// Returns the size of the appended portion, in bytes.
546  template<typename... Args>
547  size_t format(const char *fmt, const Args &...args)
548  {
549  clear();
550  return appendFormat(fmt, args...);
551  }
552 
553  /// Append to the work buffer using the same formatting as UTformat.
554  /// Returns the size of the appended portion, in bytes.
555  template<typename... Args>
556  size_t appendFormat(const char *fmt, const Args &...args)
557  {
558  if (myLockCount) { UT_ASSERT(0); return 0; }
559  UT_ASSERT_P(isNullTerminated());
560 
561  using namespace UT::Format;
562  Writer w;
564  size_t nb_needed = f.format(w, fmt, {args...});
565 
566  myLength += nb_needed;
567  growBufferIfNeeded();
568 
569  // Format again, this time to fill in the buffer.
570  w.setBuffer(myBuffer + myLength - nb_needed, nb_needed);
571  f.format(w, fmt, {args...});
572 
573  myBuffer[myLength] = '\0';
574  return nb_needed;
575  }
576 
577  /// Replace the contents of the work buffer using UTformat formatting
578  /// with an implicit "{} " for each argument, giving a Python-style
579  /// print result.
580  template<typename... Args>
581  size_t print(const Args &...args)
582  {
583  clear();
584  return appendPrint(args...);
585  }
586 
587  /// Append to the work buffer using the UTformat with an implicit "{} "
588  /// format for each parameter.
589  /// Returns the size of the appended portion, in bytes.
590  template<typename... Args>
591  size_t appendPrint()
592  {
593  return 0;
594  }
595  template<typename T, typename... Args>
596  size_t appendPrint(const T &value, const Args &...args)
597  {
598  size_t newbytes;
599  newbytes = appendFormat("{}", value);
600  if (last() != '\n')
601  {
602  append(' ');
603  newbytes++;
604  }
605  newbytes += appendPrint(args...);
606  return newbytes;
607  }
608 
609  // These tack stuff to the end of the buffer.
611  void append(char character)
612  {
613  if (myLockCount) { UT_ASSERT(0); return; }
614  UT_ASSERT_P(isNullTerminated());
615  myLength++;
616  growBufferIfNeeded();
617  myBuffer[myLength - 1] = character;
618  myBuffer[myLength] = '\0';
619  }
620 
621  void printMemory(int64 mem) { clear(); appendPrintMemory(mem); }
622  void appendPrintMemory(int64 mem);
623 
624  void append(exint n, char character)
625  {
626  if (myLockCount) { UT_ASSERT(0); return; }
627  UT_ASSERT_P(isNullTerminated());
628  myLength += n;
629  growBufferIfNeeded();
630  for (int i = n; i > 0; i--)
631  myBuffer[myLength - i] = character;
632  myBuffer[myLength] = '\0';
633  }
634 
635  /// Append a single Unicode code point, converted to UTF8
636  void append(utf32 cp)
637  {
639  int len = UT_Unicode::convert(cp, buf, sizeof(buf));
640  if (!len)
641  return;
642 
643  if (myLockCount) { UT_ASSERT(0); return; }
644  UT_ASSERT_P(isNullTerminated());
645  myLength += len;
646  growBufferIfNeeded();
647  ::memcpy(myBuffer + myLength - len, buf, len);
648  myBuffer[myLength] = '\0';
649  }
650 
651  void append(const char *data, exint size)
652  {
653  if (myLockCount) { UT_ASSERT(0); return; }
654  UT_ASSERT_P(data);
655  UT_ASSERT_P(isNullTerminated());
656  myLength += size;
657  growBufferIfNeeded();
658  ::memcpy(myBuffer + myLength - size, data, size);
659  myBuffer[myLength] = '\0';
660  }
661 
663  void append(const char *str)
664  {
665  if( UTisstring(str) )
666  append(str, ::strlen(str));
667  }
668 
670  void append(const UT_String &str)
671  {
672  if (str.isstring())
673  append((const char *)str);
674  }
675 
677  void append(const UT_StringRef &str)
678  {
679  if (str.isstring())
680  append(str.buffer(), str.length());
681  }
682 
683  void append(const UT_StringArray &strs, const UT_StringRef &sep)
684  {
685  for (exint i = 0; i < strs.entries(); i++)
686  {
687  append(strs(i));
688  if (i+1 < strs.entries())
689  append(sep);
690  }
691  }
692 
694  void append(const UT_WorkBuffer &wb)
695  {
696  append( wb.buffer(), wb.length() );
697  }
698 
700  UT_WorkBuffer &operator+=(const char *str)
701  {
702  append(str);
703  return *this;
704  }
705 
708  {
709  append(str);
710  return *this;
711  }
712 
715  {
716  append(wb);
717  return *this;
718  }
719 
722  {
723  append(str);
724  return *this;
725  }
726 
729  {
730  append(str);
731  return *this;
732  }
733 
734  void prepend(char character)
735  {
736  if (myLockCount) { UT_ASSERT(0); return; }
737  UT_ASSERT_P(isNullTerminated());
738  myLength++;
739  growBufferIfNeeded();
740  ::memmove(myBuffer+1, myBuffer, myLength);
741  myBuffer[0] = character;
742  }
743  void prepend(const char *data, exint size)
744  {
745  if (myLockCount) { UT_ASSERT(0); return; }
746  UT_ASSERT_P(data);
747  UT_ASSERT_P(isNullTerminated());
748  myLength += size;
749  growBufferIfNeeded();
750  ::memmove(myBuffer+size, myBuffer, myLength+1 - size);
751  ::memcpy(myBuffer, data, size);
752  }
754  void prepend(const char *str)
755  {
756  UT_ASSERT_P(str);
757  prepend(str, ::strlen(str));
758  }
759 
761  void prepend(const UT_String &str)
762  {
763  if (str.isstring())
764  prepend((const char *)str);
765  }
767  void prepend(const UT_StringRef &str)
768  {
769  if (str)
770  prepend(str.buffer(), str.length());
771  }
772 
773  /// Insert @c slen characters from @c str, at location @c pos. If @c pos
774  /// exceeds the current length, the position is truncated and to an append.
775  void insert(exint pos, const char* str, exint slen);
776 
777  /// Erase @c len characters from location @c pos in the string.
778  void erase(exint pos, exint len);
779 
780  void rewind() { backup(myLength); }
781 
782  /// Rewind by the given length
784  void backup(exint by_length)
785  {
786  if (myLockCount) { UT_ASSERT(0); return; }
787  UT_ASSERT_P(isNullTerminated());
788  UT_ASSERT_P(by_length >= 0);
789  myLength -= by_length;
790  UT_ASSERT(myLength >= 0);
791  myBuffer[myLength] = '\0';
792  }
793 
794  /// Truncate the buffer to the specified length. Truncating to 0 is
795  /// identical to clear().
797  void truncate(exint new_length)
798  {
799  if (new_length >= myLength)
800  {
801  UT_ASSERT(0 && "Truncating beyond buffer extent");
802  return;
803  }
804  backup(myLength-new_length);
805  }
806 
807  // Delete characters off the end of the string until we hit the
808  // requested character.
809  void backupTo(char c)
810  {
811  if (myLockCount) { UT_ASSERT(0); return; }
812  UT_ASSERT_P(isNullTerminated());
813  while( myLength > 0 && myBuffer[myLength-1] != c )
814  myLength--;
815  myBuffer[myLength] = '\0';
816  }
817 
818  void advance(exint by_length)
819  {
820  if (myLockCount) { UT_ASSERT(0); return; }
821  UT_ASSERT_P(isNullTerminated());
822  UT_ASSERT_P(by_length >= 0);
823  myLength -= by_length;
824  UT_ASSERT(myLength >= 0);
825  for (int i=0; i<myLength; i++)
826  myBuffer[i] = myBuffer[by_length+i];
827  myBuffer[myLength] = '\0';
828  }
829 
830  // Finds the 'occurance_number'-th occurance of char c in the string.
832  const char *findChar(char c, int occurance_number = 1) const
833  {
834  return findCharFrom(c, 0, occurance_number);
835  }
836  // Same as findChar, but searches from the end of the string.
837  const char *lastChar(char c, int occurance_number = 1) const
838  {
839  if (myLockCount) { UT_ASSERT(0); return NULL; }
840 
841  UT_ASSERT_P(isNullTerminated());
842 
843  for (exint i = myLength; i --> 0;)
844  {
845  if(c == myBuffer[i])
846  {
847  occurance_number--;
848  if(occurance_number <= 0)
849  {
850  return (myBuffer + i);
851  }
852  }
853  }
854 
855  return NULL;
856  }
857  // Same and findChar, bu searches from given position in the string.
858  const char *findCharFrom(char c, exint position,
859  int occurance_number = 1) const
860  {
861  if (myLockCount) { UT_ASSERT(0); return NULL; }
862 
863  UT_ASSERT_P(isNullTerminated());
864 
865  if (position < 0 || position >= myLength) { return NULL; }
866 
867  for(exint i = position; i < myLength; ++i)
868  {
869  if(c == myBuffer[i])
870  {
871  occurance_number--;
872  if(occurance_number <= 0)
873  {
874  return (myBuffer + i);
875  }
876  }
877  }
878 
879  return NULL;
880  }
881 
882  /// Count the occurrences of the text in the current string
883  exint count(const char *needle) const;
884 
885  // Get the next token pointed at by string and advance string past the
886  // token. Returns whether or not a token was retrieved successfully.
887  // Note that string is modified!!!
888  bool getNextToken(const char *(&string),
889  const UT_String separators = " \t\n");
890 
891  // Harden the contents of the buffer into a UT_String.
892  void copyIntoString(UT_String &str) const;
893 
894  // Copy the contents into a fixed length buffer.
895  // TODO: Get rid of this method, since it encourages fixed-length buffers.
896  void copyIntoString(char *str, exint max_length) const;
897 
898  // Steal the contents of this work buffer into the string.
899  void stealIntoString(UT_String &str);
900 
901  // Steal the contents of this work buffer into the string.
902  // NB: Please use UT_StringHolder move constructor/assignment instead of
903  // this function.
904  void stealIntoStringHolder(UT_StringHolder &str);
905 
906  // Return a string containing the contents of this work buffer, preserving
907  // any null characters in it.
909  { return std::string(buffer(), length()); }
910 
911  // Strips the characters after comment_char from the buffer. This method
912  // goes to some effort to enusre that the comment_char is not preceded by
913  // a backslash or is not in a quoted string. Returns true if it found a
914  // comment and modified the buffer, and false otherwise.
915  bool stripComments(char comment_char = '#');
916 
917  /// Strips out all characters found in 'chars'. The string length will be
918  /// reduced by the number of characters removed. The number of characters
919  /// removed is returned.
920  int strip(const char *chars);
921 
922  /// Remove trailing whitespace lines
923  void removeTrailingSpaceLines();
924 
925  /// Remove trailing whitespace, return true if whitespace was removed.
926  bool removeTrailingSpace();
927 
928  /// Remove leading white space, return true if whitespace was removed.
929  bool removeLeadingSpace();
930 
931  /// Remove trailing digits, return true if some were removed.
932  bool removeTrailingDigits();
933 
934  /// Convert string to lower case
935  void lower();
936 
937  /// Convert string to upper case
938  void upper();
939 
940  /// Create a string of tabs & spaces which represents the given indent
941  void makeIndentString(exint indent, exint tabstop=8);
942 
943  /// Remove the first n characters.
945  {
946  if (n < myLength)
947  {
948  myLength -= n;
949  ::memmove(myBuffer, myBuffer + n, myLength);
950  }
951  else
952  myLength = 0;
953 
954  myBuffer[myLength] = '\0';
955  }
956 
957  /// Replace all occurances of 'find' with 'replacement'
958  /// Return the number of substitutions that occured.
959  unsigned int substitute(const char *find, const char *replacement, bool all = true);
960 
961  /// Given from_name which is assumed to fit from_pattern, any assigned
962  /// wildcards are subsitituted in to_pattern, writing the result to this.
963  /// The wildcards may also be indexed. For example:
964  ///
965  /// to_pattern = b* from_name = apple from_pattern = a*le
966  /// ---> this = bpp
967  ///
968  /// to_pattern = *(1)_to_*(0) from_name = a_to_b from_pattern = *_to_*
969  /// ---> this = b_to_a
970  bool subPatterns(
971  const char *to_pattern,
972  const char *from_name,
973  const char *from_pattern);
974 
975  /// UTF-16 / UTF-8 conversions.
976 
977  /// Set the work buffer to contain the UTF-8 representation of the incoming UTF-16 string.
978  /// The UTF-16 string is assumed to be little-endian, unless prefixed with BOM that
979  /// indicates endianness.
980  /// The incoming string should be zero-word terminated.
981  void setFromUTF16(const utf16 *str);
982 
983  /// Set the work buffer to contain a UTF-16LE (little endian) representation of the
984  /// incoming UTF-8 string.
985  /// The work buffer will be zero-word terminated.
986  void setAsUTF16(const utf8 *str);
987 
988  /// Once set as UTF16-LE, get it back as such a pointer.
990  const utf16* castToUTF16() const { return (const utf16*) myBuffer; }
991 
992  /// Lock buffer for `len` utf-16 characters.
994  {
995  return (utf16*)lock(offset, len*sizeof(utf16));
996  }
997 
998  void swap(UT_WorkBuffer &other)
999  {
1000  // Warn if we're about to swap locked buffers.
1001  UT_ASSERT(myLockCount==0);
1002 
1003  bool this_stack = (myBuffer == myStackBuffer);
1004  bool other_stack = (other.myBuffer == other.myStackBuffer);
1005 
1006  if (this_stack && other_stack)
1007  {
1008  // If both buffers are using the stack buffer, just swap the
1009  // buffer contents.
1010  size_t max_size = (myLength > other.myLength) ? myLength
1011  : other.myLength;
1012 
1013  UTswap(myStackBuffer, other.myStackBuffer, max_size + 1);
1014  }
1015  else if (this_stack && !other_stack)
1016  {
1017  ::memcpy(other.myStackBuffer, myStackBuffer, myLength + 1);
1018  myBuffer = other.myBuffer;
1019  other.myBuffer = other.myStackBuffer;
1020  }
1021  else if (!this_stack && other_stack)
1022  {
1023  ::memcpy(myStackBuffer, other.myStackBuffer, other.myLength + 1);
1024  other.myBuffer = myBuffer;
1025  myBuffer = myStackBuffer;
1026  }
1027  else
1028  UTswap(myBuffer, other.myBuffer);
1029  UTswap(myAllocatedSize, other.myAllocatedSize);
1030  UTswap(myLength, other.myLength);
1031  UTswap(myLockCount, other.myLockCount);
1032  }
1033 public:
1034  /// Iterator compatibility.
1036  const char *begin() const { return myBuffer; }
1038  const char *end() const { return myBuffer + myLength; }
1039 
1040 private:
1041 
1042  struct do_widen {};
1043  struct do_narrow {};
1044  /// Private constructors to allow for the Return Value Optimization
1045  /// @{
1047  UT_WorkBuffer(do_widen, const utf8 *str)
1048  : myBuffer(myStackBuffer)
1049  , myAllocatedSize(UT_INITIAL_BUFFER_SIZE)
1050  , myLength(0)
1051  , myLockCount(0)
1052  {
1053  setAsUTF16(str);
1054  }
1056  UT_WorkBuffer(do_narrow, const utf16 *str)
1057  : myBuffer(myStackBuffer)
1058  , myAllocatedSize(UT_INITIAL_BUFFER_SIZE)
1059  , myLength(0)
1060  , myLockCount(0)
1061  {
1062  setFromUTF16(str);
1063  }
1064  /// @}
1065 
1066  friend UT_API std::ostream &operator<<(std::ostream &os,
1067  const UT_WorkBuffer &buffer);
1068 
1069 private: // Data:
1070 
1071  char *myBuffer; // Do not make an access method to the data
1072  exint myAllocatedSize = UT_INITIAL_BUFFER_SIZE;
1073  exint myLength = 0;
1074  int myLockCount = 0;
1075  char myStackBuffer[UT_INITIAL_BUFFER_SIZE];
1076 };
1077 
1078 
1079 static inline size_t
1080 format(char *buffer, size_t buffer_size, const UT_WorkBuffer &v)
1081 {
1082  if (!buffer)
1083  return v.length();
1084  else
1085  {
1086  size_t len = std::min(size_t(v.length()), buffer_size);
1087  ::memcpy(buffer, v.buffer(), len);
1088  return len;
1089  }
1090 }
1091 
1092 
1093 #endif
size_t print(const Args &...args)
vbool4 insert(const vbool4 &a, bool val)
Helper: substitute val for a[i].
Definition: simd.h:3340
std::string sprintf(const char *fmt, const Args &...args)
Definition: strutil.h:136
SYS_FORCE_INLINE void append(const UT_StringRef &str)
string_view OIIO_API strip(string_view str, string_view chars=string_view())
GT_API const UT_StringHolder filename
GLsizeiptr size
Definition: glew.h:1681
GLenum src
Definition: glew.h:2410
SYS_FORCE_INLINE exint length() const
SYS_FORCE_INLINE void strcpy(const UT_StringRef &src)
void write(exint offset, const UT_StringHolder &src)
SYS_FORCE_INLINE exint getAllocatedSize() const
SYS_FORCE_INLINE char * operator*() const
void UTswap(T &a, T &b)
Definition: UT_Swap.h:35
const Args & args
Definition: printf.h:628
SYS_FORCE_INLINE bool operator==(const UT_String &str) const
SYS_FORCE_INLINE UT_WorkBuffer(const char *data, exint size)
Definition: UT_WorkBuffer.h:81
SYS_FORCE_INLINE void strncat(const char *src, exint len)
unsigned short utf16
Definition: SYS_Types.h:56
SYS_FORCE_INLINE UT_WorkBuffer(const UT_StringRef &str)
Definition: UT_WorkBuffer.h:97
SYS_FORCE_INLINE UT_WorkBuffer(const char *str)
Definition: UT_WorkBuffer.h:73
int64 exint
Definition: SYS_Types.h:125
void append(exint n, char character)
SYS_FORCE_INLINE const char * buffer() const
SYS_FORCE_INLINE void strcpy(const char *src)
SYS_FORCE_INLINE void release(bool recompute_length=false)
You can manually release the buffer.
GLint GLint GLsizei GLsizei GLsizei GLint GLenum format
Definition: glew.h:1254
#define UT_API
Definition: UT_API.h:13
const GLdouble * v
Definition: glew.h:1391
const char * findCharFrom(char c, exint position, int occurance_number=1) const
void append(const char *data, exint size)
const char * lastChar(char c, int occurance_number=1) const
Format
Definition: oidn.hpp:29
size_t appendPrint()
SYS_FORCE_INLINE char last() const
utf16 * lockUTF16(exint offset=0, exint len=0)
Lock buffer for len utf-16 characters.
SYS_FORCE_INLINE void relock()
If you've manually released the lock, you can relock the buffer.
SYS_FORCE_INLINE void append(const UT_String &str)
SYS_FORCE_INLINE const char * data() const
void swap(UT_WorkBuffer &other)
void eraseHead(exint n)
Remove the first n characters.
SYS_FORCE_INLINE void append(const UT_WorkBuffer &wb)
SYS_FORCE_INLINE bool operator==(const UT_WorkBuffer &buf) const
std::ostream & operator<<(std::ostream &ostr, const DataType &a)
Definition: DataType.h:133
SYS_FORCE_INLINE UT_WorkBuffer & operator+=(const UT_WorkBuffer &wb)
SYS_FORCE_INLINE void append(const char *str)
static const utf8 * convert(const utf8 *str, utf32 &cp)
size_t appendFormat(const char *fmt, const Args &...args)
#define UT_INITIAL_BUFFER_SIZE
Definition: UT_WorkBuffer.h:54
SYS_FORCE_INLINE void strcpy(const UT_WorkBuffer &src)
GLclampf f
Definition: glew.h:3499
exint length() const
SYS_FORCE_INLINE const char * buffer() const
std::enable_if< is_contiguous< Container >::value, typename checked< typename Container::value_type >::type >::type reserve(std::back_insert_iterator< Container > &it, std::size_t n)
Definition: format.h:593
SYS_FORCE_INLINE const char * end() const
GLuint buffer
Definition: glew.h:1680
GLint GLenum GLsizei GLint GLsizei const void * data
Definition: glew.h:1379
SYS_FORCE_INLINE UT_WorkBuffer & operator=(const UT_WorkBuffer &other)
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:134
unsigned int utf32
Definition: SYS_Types.h:58
void printMemory(int64 mem)
#define SYS_PRINTF_CHECK_ATTRIBUTE(string_index, first_to_check)
Definition: SYS_Types.h:433
SYS_FORCE_INLINE const char * begin() const
Iterator compatibility.
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
GLubyte GLubyte GLubyte GLubyte w
Definition: glew.h:1890
GLsizei n
Definition: glew.h:4040
const GLfloat * c
Definition: glew.h:16296
GLuint GLsizei GLsizei * length
Definition: glew.h:1825
SYS_FORCE_INLINE void prepend(const UT_StringRef &str)
void prepend(char character)
SYS_FORCE_INLINE bool isEmpty() const
SYS_FORCE_INLINE char * strdup() const
long long int64
Definition: SYS_Types.h:116
static SYS_FORCE_INLINE UT_WorkBuffer widen(const utf8 *str)
SYS_FORCE_INLINE const char * findChar(char c, int occurance_number=1) const
SYS_FORCE_INLINE UT_WorkBuffer & operator+=(const UT_String &str)
SYS_FORCE_INLINE exint strlen() const
SYS_FORCE_INLINE bool operator!=(const UT_WorkBuffer &buf) const
SYS_FORCE_INLINE UT_WorkBuffer & operator=(const std::string &str)
SYS_FORCE_INLINE const char * c_str() const
SYS_FORCE_INLINE void strcpy(const UT_String &src)
#define UT_UTF8_MAX_ENCODING_LEN
Definition: UT_Unicode.h:19
size_t SYSstrlcpy(char *dest, const char *src, size_t size)
Definition: SYS_String.h:187
std::string toStdString() const
SYS_FORCE_INLINE char * string() const
SYS_FORCE_INLINE void prepend(const UT_String &str)
SYS_FORCE_INLINE char operator()(exint idx) const
SYS_FORCE_INLINE UT_WorkBuffer(const UT_WorkBuffer &other)
SYS_FORCE_INLINE void strcat(const char *src)
SYS_FORCE_INLINE void truncate(exint new_length)
SYS_FORCE_INLINE const utf16 * castToUTF16() const
Once set as UTF16-LE, get it back as such a pointer.
exint entries() const
Alias of size(). size() is preferred.
Definition: UT_Array.h:460
void append(utf32 cp)
Append a single Unicode code point, converted to UTF8.
SYS_FORCE_INLINE int strcmp(const char *src) const
static SYS_FORCE_INLINE UT_WorkBuffer narrow(const utf16 *str)
SYS_FORCE_INLINE UT_WorkBuffer & operator=(const char *str)
GLsizei const GLchar *const * string
Definition: glew.h:1844
std::basic_string< Char > vsprintf(const S &format, basic_format_args< typename basic_printf_context_t< internal::basic_buffer< Char >>::type > args)
Definition: printf.h:609
SYS_FORCE_INLINE bool isstring() const
SYS_FORCE_INLINE void backup(exint by_length)
Rewind by the given length.
void write(exint offset, const char *src)
SYS_FORCE_INLINE UT_WorkBuffer(const UT_String &str)
Definition: UT_WorkBuffer.h:89
SYS_FORCE_INLINE AutoLock(UT_WorkBuffer &buf)
void backupTo(char c)
size_t appendPrint(const T &value, const Args &...args)
SYS_FORCE_INLINE bool operator==(const char *str) const
SYS_FORCE_INLINE bool UTisstring(const char *s)
Definition: UT_String.h:57
void advance(exint by_length)
SYS_FORCE_INLINE bool operator!=(const char *str) const
SYS_FORCE_INLINE UT_WorkBuffer & operator+=(const char *str)
FMT_CONSTEXPR bool find(Ptr first, Ptr last, T value, Ptr &out)
Definition: format.h:2104
SYS_FORCE_INLINE void append(char character)
GLuint GLuint GLsizei count
Definition: glew.h:1253
Type-safe formatting, modeled on the Python str.format function.
void prepend(const char *data, exint size)
virtual bool readFile(GA_Detail &g, const char *filename, const GA_LoadOptions *opts, UT_StringArray *errors) const
Class which defines an I/O interface to save/load geometry.
void append(const UT_StringArray &strs, const UT_StringRef &sep)
bool isstring() const
Definition: UT_String.h:711
#define UT_ASSERT(ZZ)
Definition: UT_Assert.h:135
SYS_FORCE_INLINE ~UT_WorkBuffer()
SYS_FORCE_INLINE void clear()
char utf8
Definition: SYS_Types.h:52
SYS_FORCE_INLINE ~AutoLock()
#define const
Definition: zconf.h:214
SYS_FORCE_INLINE char first() const
vint4 min(const vint4 &a, const vint4 &b)
Definition: simd.h:4694
void write(T &out, bool v)
Definition: ImfXdr.h:332
GLenum GLuint GLsizei const GLchar * buf
Definition: glew.h:2580
SYS_FORCE_INLINE bool operator!=(const UT_String &str) const
SYS_FORCE_INLINE UT_WorkBuffer & operator+=(const UT_StringRef &str)
bool all(const vbool4 &v)
Definition: simd.h:3371
SYS_FORCE_INLINE UT_WorkBuffer & operator+=(const std::string &str)
GLsizei const GLfloat * value
Definition: glew.h:1849
SYS_FORCE_INLINE void write(exint offset, char c)
SYS_FORCE_INLINE bool isstring() const
SYS_FORCE_INLINE void prepend(const char *str)
SYS_FORCE_INLINE void strncpy(const char *src, exint maxlen)
SYS_FORCE_INLINE int strncmp(const char *src, exint n) const
GLenum GLsizei len
Definition: glew.h:7752
GLintptr offset
Definition: glew.h:1682
SYS_FORCE_INLINE UT_WorkBuffer()
Definition: UT_WorkBuffer.h:65