HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_WorkBuffer.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: UT_WorkBuffer.h ( Utility Library, C++ )
7  *
8  * COMMENTS:
9  * A growable string buffer that can be written into. A UT_String
10  * can be created with the contents of this buffer by calling
11  * copyIntoString().
12  *
13  * It's important that a non-const version of the raw buffer is not
14  * accessible since users could write past the end of the allocated
15  * buffer. Also note that the buffer location can change as it grows,
16  * so don't keep pointers to the buffer around.
17  *
18  * Most of the time, you want to allocate an object of this class on
19  * the stack and not on the heap.
20  *
21  * The buffer is kept null terminated by default. Functions exist
22  * to verify this. Note that the "length" of the buffer is the
23  * same as strlen - ie: it ignores the null termination!!!
24  */
25 
26 #ifndef __UT_WorkBuffer_h__
27 #define __UT_WorkBuffer_h__
28 
29 #include "UT_API.h"
30 
31 #include "UT_Assert.h"
32 #include "UT_Format.h"
33 #include "UT_NonCopyable.h"
34 #include "UT_String.h"
35 #include "UT_StringArray.h"
36 #include "UT_StringHolder.h"
37 #include "UT_Swap.h"
38 #include "UT_Unicode.h"
39 
40 #include <SYS/SYS_Inline.h>
41 #include <SYS/SYS_Types.h>
42 
43 #include <iosfwd>
44 
45 #include <stdlib.h>
46 #include <stdio.h>
47 #include <string.h>
48 
49 
50 // The default page size on most systems is 4K. We choose a default
51 // buffer size less than half of that in the hopes that if we have
52 // functions with 2 work buffers or additional variables on the stack that we
53 // may not have to allocate multiple stack pages.
54 #define UT_INITIAL_BUFFER_SIZE 2000
55 
56 class UT_WorkArgs;
57 class UT_IStream;
58 
60 {
61 public:
62  typedef char value_type;
63 
66  : myBuffer(myStackBuffer)
67  {
68  // Default termination.
69  myBuffer[0] = '\0';
70  }
71 
73  explicit UT_WorkBuffer(const char *str)
74  : myBuffer(myStackBuffer)
75  {
76  myBuffer[0] = '\0';
77  append(str);
78  }
79 
81  explicit UT_WorkBuffer(const char *data, exint size)
82  : myBuffer(myStackBuffer)
83  {
84  myBuffer[0] = '\0';
85  append(data, size);
86  }
87 
89  explicit UT_WorkBuffer(const UT_String &str)
90  : myBuffer(myStackBuffer)
91  {
92  myBuffer[0] = '\0';
93  append(str);
94  }
95 
97  explicit UT_WorkBuffer(const UT_StringRef &str)
98  : myBuffer(myStackBuffer)
99  {
100  myBuffer[0] = '\0';
101  append(str);
102  }
103 
106  : myBuffer(myStackBuffer)
107  {
108  myBuffer[0] = '\0';
109  append(other);
110  }
111 
114  {
115  if (myBuffer != myStackBuffer)
116  {
117  UT_ASSERT(myBuffer);
118  ::free(myBuffer);
119  }
120  }
121 
122  /// Create a work buffer to contain a UTF-16LE (little endian)
123  /// representation of the incoming UTF-8 string.
124  /// The work buffer will be zero-word terminated.
126  static UT_WorkBuffer
127  widen(const utf8 *str)
128  {
129  return UT_WorkBuffer(do_widen(), str);
130  }
131 
132  /// Create a work buffer to contain the UTF-8 representation of the
133  /// incoming UTF-16 string. The UTF-16 string is assumed to be
134  /// little-endian, unless prefixed with BOM that indicates endianness.
135  /// The incoming string should be zero-word terminated.
137  static UT_WorkBuffer
138  narrow(const utf16 *str)
139  {
140  return UT_WorkBuffer(do_narrow(), str);
141  }
142 
143  // It's important that there is no non-const access method to the buffer.
144  // Also note that the pointer to the buffer can change if the buffer
145  // grows.
147  const char *buffer() const { return myBuffer; }
148 
149  // Having said that, if you need a non-const pointer you must lock
150  // the string. This prohibits ANY update which changes the myLength
151  // variable (and thus potentially a realloc)
152  // You must release the buffer before any such changes.
153  // The work buffer continues to own the memory and will free it when
154  // it goes out of scope so don't think this is the same as a "steal"
155  // in UT_String.
156  // Currently, to ensure people couple their locks & releases,
157  // it asserts there is no unaccounted locks on death. This is so
158  // people who think it is steal find out otherwise.
159  // Offset is where in the string to get the pointer from.
160  // This is only to be used when absolutely necessary.
161  // When releasing, if you have a string buffer, and you have modified the
162  // length, you should set the recompute_length flag to 1. This will adjust
163  // the internal length variable so that further concatenations will work
164  // properly.
165  // The reserve_bytes parameter tells the lock to ensure that there are at
166  // least that many bytes in the locked buffer.
167  // NOTE: Unlike other UT_WorkBuffer functions, it is the user's
168  // responsibility to maintain a NUL termination guarantee when manipulating
169  // the raw buffer.
170  char *lock(exint offset = 0, exint reserve_bytes=0);
171  void release(bool recompute_length = false);
172  void releaseSetLength(exint new_length);
173 
175  exint getAllocatedSize() const { return myAllocatedSize; }
176  int64 getMemoryUsage(bool inclusive) const;
177 
178  /// Class to handle auto-locking of the UT_WorkBuffer. This is not related
179  /// to multi-threading, but to the lock/release methods above.
180  ///
181  /// You should never append data to a locked buffer.
182  class AutoLock
183  {
184  public:
187  : myBuffer(buf)
188  {
189  myString = myBuffer.lock();
190  }
193  {
194  release();
195  }
196  /// @{
197  /// Get access to the non-const buffer. This may return nullptr if the
198  /// lock has been released.
200  char *operator*() const { return myString; }
202  char *string() const { return myString; }
203  /// @}
204 
205  /// You can manually release the buffer
207  void release(bool recompute_length=false)
208  {
209  if (myString)
210  {
211  myBuffer.release(recompute_length);
212  myString = nullptr;
213  }
214  }
215  /// If you've manually released the lock, you can relock the buffer
217  void relock()
218  {
219  UT_ASSERT(!myString);
220  myString = myBuffer.lock();
221  }
222  private:
223  UT_WorkBuffer &myBuffer;
224  char *myString;
225  };
226 
227  void reserve(exint bytes=0);
228 
229  // This is a read only operator. We are avoiding the writeable
230  // versions as they lead to problems when people do a:
231  // foo[pastend] = foo(start)
232  // causing an implicit realloc.
234  char operator()(exint idx) const
235  {
236  // We allow an index at myLength as if we have a null
237  // terminated buffer that is the null termination.
238  UT_ASSERT_P(idx >= 0 && idx <= myLength);
239  return myBuffer[idx];
240  }
241 
242  // Returns last character. Only valid if !isEmpty()
244  char first() const
245  {
246  UT_ASSERT_P(myLength > 0);
247  return myBuffer[0];
248  }
249  // Returns last character. Only valid if !isEmpty()
251  char last() const
252  {
253  UT_ASSERT_P(myLength > 0);
254  return myBuffer[myLength - 1];
255  }
256 
257  // This should always be true. It's here to act as a sanity function.
258  int isNullTerminated() const;
259 
262  {
263  strcpy(other);
264  return *this;
265  }
267  UT_WorkBuffer &operator=(const char *str)
268  {
269  clear();
270  append(str);
271  return *this;
272  }
275  {
276  clear();
277  append(str.c_str(), str.length());
278  return *this;
279  }
280 
281  /// Comparison operator. Null strings are considered as empty strings.
282  /// @{
284  bool operator==(const char *str) const
285  {
286  if (!str)
287  return isEmpty();
288  return (::strcmp(str, myBuffer) == 0);
289  }
291  bool operator==(const UT_String &str) const
292  {
293  if (!(const char *)str)
294  return isEmpty();
295  return (::strcmp(str, myBuffer) == 0);
296  }
298  bool operator==(const UT_WorkBuffer &buf) const
299  {
300  if (buf.isEmpty())
301  return isEmpty();
302  if (length() != buf.length())
303  return false;
304  return (::memcmp(myBuffer, buf.myBuffer, myLength) == 0);
305  }
307  bool operator!=(const char *str) const
308  {
309  return !(*this == str);
310  }
312  bool operator!=(const UT_String &str) const
313  {
314  return !(*this == str);
315  }
317  bool operator!=(const UT_WorkBuffer &buf) const
318  {
319  return !(*this == buf);
320  }
321  /// @}
322 
323 private:
324  // Reallocate the buffer until the allocated size is >= the length. This
325  // private method needs to come first so it can be inlined.
326  void growBufferIfNeeded()
327  {
328  // Using a while loop instead of computing an accurate size the
329  // first time is slower, but most of the time the loop will execute
330  // at most once.
331  // We need to use myLength+1 as we need room for the null.
332  while (myLength+1 > myAllocatedSize) // false most of the time
333  reserve(myAllocatedSize * 2);
334  }
335 
336 public:
337  // These are standard string operators people tend to use:
339  void strcpy(const char *src)
340  {
341  clear();
342  append(src);
343  }
345  void strcpy(const UT_String &src)
346  {
347  clear();
348  append(src);
349  }
351  void strcpy(const UT_StringRef &src)
352  {
353  clear();
354  append(src);
355  }
357  void strcpy(const UT_WorkBuffer &src)
358  {
359  clear();
360  append(src);
361  }
362 
363  // NOTE: unlike strncpy(), maxlen does not include the null terminator.
365  void strncpy(const char *src, exint maxlen)
366  {
367  clear();
368  // Ensure we have enough room:
369  myLength = maxlen+1;
370  growBufferIfNeeded();
371  myLength = 0;
372  SYSstrlcpy(myBuffer, src, maxlen+1);
373  myLength = ::strlen(myBuffer);
374  }
375 
376  // Note we can't just return myLength as there may be embedded NULLs.
378  exint strlen() const
379  {
380  UT_ASSERT_P(isNullTerminated());
381  return ::strlen(myBuffer);
382  }
383 
385  exint length() const
386  {
387  return myLength;
388  }
389 
391  void strcat(const char *src)
392  {
393  append(src);
394  }
395 
396  // protectedStrcat() will quote the string in double quotes if required and
397  // protect any enclosed double quotes or backslashes in the source. It
398  // will not escape any other characters.
399  void protectedStrcat(const char *str, bool force_quote=false);
400 
401  // fullyProtected*Strcat() is similar to protectedStrcat, except it escapes
402  // any non-printable characters. It will not escape single quotes, and if
403  // force_quote is true, it will add double-quotes. It will work with
404  // arbitrary binary data and uses the \xNN syntax to encode bytes.
405  // UT_IStream::read() is capable of loading strings encoded with this
406  // method, and these strings can also be decoded in Python. If
407  // fullyProtectedBinaryStrcat is called, this method can handle data
408  // containing null characters.
409  void fullyProtectedStrcat(const char *str, bool force_quote=false);
410  void fullyProtectedBinaryStrcat(
411  const char *str, exint size, bool force_quote=false);
412 
413  /// Append a string of a given maximum length to the current string.
414  /// Unlike the POSIX's strncat(3), we ignore any NUL bytes in the current
415  /// string and blindly append at the end of the work buffer.
417  void strncat(const char *src, exint len)
418  {
419  if (!src)
420  return;
421  append(src, ::strnlen(src, len));
422  }
423 
424  // Extract the first argument from the src and append it to the work
425  // buffer. This does NOT handle quotes properly (i.e. if the first word
426  // is quoted with spaces).
427  void strcatFirstWord(const char *src);
428 
430  int strcmp(const char *src) const
431  {
432  UT_ASSERT_P(isNullTerminated());
433  return ::strcmp(myBuffer, src);
434  }
435 
437  int strncmp(const char *src, exint n) const
438  {
439  UT_ASSERT_P(isNullTerminated());
440  return ::strncmp(myBuffer, src, n);
441  }
442 
444  char *strdup() const
445  {
446  UT_ASSERT(isNullTerminated());
447  return ::strdup(myBuffer);
448  }
449 
450  // Reset the buffer to an empty buffer.
452  void clear()
453  {
454  if (myLockCount) { UT_ASSERT(0); return; }
455  myLength = 0;
456  myBuffer[0] = '\0';
457  }
458 
460  bool isEmpty() const
461  {
462  return (myLength == 0);
463  }
465  bool isstring() const
466  {
467  return !isEmpty();
468  }
469 
470  // Write into the buffer at a specific place.
471  // This WILL expand the buffer if it is required and keep it null
472  // terminated.
474  void write(exint offset, char c)
475  {
476  UT_ASSERT(offset >= 0);
477  if (offset < 0) return;
478  if (offset >= myLength)
479  {
480  if (myLockCount) { UT_ASSERT(0); return; }
481  myLength = offset+1;
482  growBufferIfNeeded();
483  myBuffer[myLength] = '\0';
484  }
485  myBuffer[offset] = c;
486  if (c == '\0')
487  myLength = offset;
488  }
489 
490  // This does NOT write out the trailing NULL of src, but the buffer will
491  // still be null-terminated.
492  void write(exint offset, const char *src)
493  {
494  while (*src)
495  {
496  write(offset, *src);
497  src++;
498  offset++;
499  }
500  }
501 
503  {
504  write(offset, src.c_str());
505  }
506 
507  /// Load an entire file into the buffer. Returns @b false if there was an
508  /// error reading the file
509  bool readFile(const char *filename);
510 
511  // Read a line from an istream -- no matter how long the line is
512  // Returns 0 if the stream read failed or 1 otherwise
513  bool getline(std::istream &is);
514  bool getline(FILE *fp);
515 
516  // Much like getline() except that it has more features. The string itself
517  // is tokenized which the UT_WorkArgs points into.
518  // line_num is incremented for each line read.
519  // comment_chars is list of characters to treat as comments.
520  // this can be NULL if we don't want this feature.
521  // Returns false if the stream read failed.
522  bool cmdGetLine(std::istream &is, UT_WorkArgs &args, int &line_num,
523  const char *comment_chars = "#",
524  const char *separators = " \t\n\r");
525  bool cmdGetLine(UT_IStream &is, UT_WorkArgs &args, int &line_num,
526  const char *comment_chars = "#",
527  const char *separators = " \t\n\r");
528  bool cmdGetLine(FILE *fp, UT_WorkArgs &args, int &line_num,
529  const char *comment_chars = "#",
530  const char *separators = " \t\n\r");
531 
532  int sprintf(const char *fmt, ...)
534  int appendSprintf(const char *fmt, ...)
536 
537  int vsprintf(const char *fmt, va_list ap);
538 
539  /// Replace the contents of the work buffer using the same formatting as
540  /// UTformat.
541  /// Returns the size of the appended portion, in bytes.
542  template<typename... Args>
543  size_t format(const char *fmt, const Args &...args)
544  {
545  clear();
546  return appendFormat(fmt, args...);
547  }
548 
549  /// Append to the work buffer using the same formatting as UTformat.
550  /// Returns the size of the appended portion, in bytes.
551  template<typename... Args>
552  size_t appendFormat(const char *fmt, const Args &...args)
553  {
554  if (myLockCount) { UT_ASSERT(0); return 0; }
555  UT_ASSERT_P(isNullTerminated());
556 
557  using namespace UT::Format;
558  Writer w;
560  size_t nb_needed = f.format(w, fmt, {args...});
561 
562  myLength += nb_needed;
563  growBufferIfNeeded();
564 
565  // Format again, this time to fill in the buffer.
566  w.setBuffer(myBuffer + myLength - nb_needed, nb_needed);
567  f.format(w, fmt, {args...});
568 
569  myBuffer[myLength] = '\0';
570  return nb_needed;
571  }
572 
573  /// Replace the contents of the work buffer using UTformat formatting
574  /// with an implicit "{} " for each argument, giving a Python-style
575  /// print result.
576  template<typename... Args>
577  size_t print(const Args &...args)
578  {
579  clear();
580  return appendPrint(args...);
581  }
582 
583  /// Append to the work buffer using the UTformat with an implicit "{} "
584  /// format for each parameter.
585  /// Returns the size of the appended portion, in bytes.
586  template<typename... Args>
587  size_t appendPrint()
588  {
589  return 0;
590  }
591  template<typename T, typename... Args>
592  size_t appendPrint(const T &value, const Args &...args)
593  {
594  size_t newbytes;
595  newbytes = appendFormat("{}", value);
596  if (last() != '\n')
597  {
598  append(' ');
599  newbytes++;
600  }
601  newbytes += appendPrint(args...);
602  return newbytes;
603  }
604 
605  // These tack stuff to the end of the buffer.
607  void append(char character)
608  {
609  if (myLockCount) { UT_ASSERT(0); return; }
610  UT_ASSERT_P(isNullTerminated());
611  myLength++;
612  growBufferIfNeeded();
613  myBuffer[myLength - 1] = character;
614  myBuffer[myLength] = '\0';
615  }
616 
617  void printMemory(int64 mem) { clear(); appendPrintMemory(mem); }
618  void appendPrintMemory(int64 mem);
619 
620  void append(exint n, char character)
621  {
622  if (myLockCount) { UT_ASSERT(0); return; }
623  UT_ASSERT_P(isNullTerminated());
624  myLength += n;
625  growBufferIfNeeded();
626  for (int i = n; i > 0; i--)
627  myBuffer[myLength - i] = character;
628  myBuffer[myLength] = '\0';
629  }
630 
631  /// Append a single Unicode code point, converted to UTF8
632  void append(utf32 cp)
633  {
635  int len = UT_Unicode::convert(cp, buf, sizeof(buf));
636  if (!len)
637  return;
638 
639  if (myLockCount) { UT_ASSERT(0); return; }
640  UT_ASSERT_P(isNullTerminated());
641  myLength += len;
642  growBufferIfNeeded();
643  ::memcpy(myBuffer + myLength - len, buf, len);
644  myBuffer[myLength] = '\0';
645  }
646 
647  void append(const char *data, exint size)
648  {
649  if (myLockCount) { UT_ASSERT(0); return; }
650  UT_ASSERT_P(data);
651  UT_ASSERT_P(isNullTerminated());
652  myLength += size;
653  growBufferIfNeeded();
654  ::memcpy(myBuffer + myLength - size, data, size);
655  myBuffer[myLength] = '\0';
656  }
657 
659  void append(const char *str)
660  {
661  if( UTisstring(str) )
662  append(str, ::strlen(str));
663  }
664 
666  void append(const UT_String &str)
667  {
668  if (str.isstring())
669  append((const char *)str);
670  }
671 
673  void append(const UT_StringRef &str)
674  {
675  if (str.isstring())
676  append(str.buffer(), str.length());
677  }
678 
679  void append(const UT_StringArray &strs, const UT_StringRef &sep)
680  {
681  for (exint i = 0; i < strs.entries(); i++)
682  {
683  append(strs(i));
684  if (i+1 < strs.entries())
685  append(sep);
686  }
687  }
688 
690  void append(const UT_WorkBuffer &wb)
691  {
692  append( wb.buffer(), wb.length() );
693  }
694 
696  UT_WorkBuffer &operator+=(const char *str)
697  {
698  append(str);
699  return *this;
700  }
701 
704  {
705  append(str);
706  return *this;
707  }
708 
711  {
712  append(wb);
713  return *this;
714  }
715 
718  {
719  append(str);
720  return *this;
721  }
722 
725  {
726  append(str);
727  return *this;
728  }
729 
730  void prepend(char character)
731  {
732  if (myLockCount) { UT_ASSERT(0); return; }
733  UT_ASSERT_P(isNullTerminated());
734  myLength++;
735  growBufferIfNeeded();
736  ::memmove(myBuffer+1, myBuffer, myLength);
737  myBuffer[0] = character;
738  }
739  void prepend(const char *data, exint size)
740  {
741  if (myLockCount) { UT_ASSERT(0); return; }
742  UT_ASSERT_P(data);
743  UT_ASSERT_P(isNullTerminated());
744  myLength += size;
745  growBufferIfNeeded();
746  ::memmove(myBuffer+size, myBuffer, myLength+1 - size);
747  ::memcpy(myBuffer, data, size);
748  }
750  void prepend(const char *str)
751  {
752  UT_ASSERT_P(str);
753  prepend(str, ::strlen(str));
754  }
755 
757  void prepend(const UT_String &str)
758  {
759  if (str.isstring())
760  prepend((const char *)str);
761  }
763  void prepend(const UT_StringRef &str)
764  {
765  if (str)
766  prepend(str.buffer(), str.length());
767  }
768 
769  /// Insert @c slen characters from @c str, at location @c pos. If @c pos
770  /// exceeds the current length, the position is truncated and to an append.
771  void insert(exint pos, const char* str, exint slen);
772 
773  /// Erase @c len characters from location @c pos in the string.
774  void erase(exint pos, exint len);
775 
776  void rewind() { backup(myLength); }
777 
778  /// Rewind by the given length
780  void backup(exint by_length)
781  {
782  if (myLockCount) { UT_ASSERT(0); return; }
783  UT_ASSERT_P(isNullTerminated());
784  UT_ASSERT_P(by_length >= 0);
785  myLength -= by_length;
786  UT_ASSERT(myLength >= 0);
787  myBuffer[myLength] = '\0';
788  }
789 
790  /// Truncate the buffer to the specified length. Truncating to 0 is
791  /// identical to clear().
793  void truncate(exint new_length)
794  {
795  if (new_length >= myLength)
796  {
797  UT_ASSERT(0 && "Truncating beyond buffer extent");
798  return;
799  }
800  backup(myLength-new_length);
801  }
802 
803  // Delete characters off the end of the string until we hit the
804  // requested character.
805  void backupTo(char c)
806  {
807  if (myLockCount) { UT_ASSERT(0); return; }
808  UT_ASSERT_P(isNullTerminated());
809  while( myLength > 0 && myBuffer[myLength-1] != c )
810  myLength--;
811  myBuffer[myLength] = '\0';
812  }
813 
814  void advance(exint by_length)
815  {
816  if (myLockCount) { UT_ASSERT(0); return; }
817  UT_ASSERT_P(isNullTerminated());
818  UT_ASSERT_P(by_length >= 0);
819  myLength -= by_length;
820  UT_ASSERT(myLength >= 0);
821  for (int i=0; i<myLength; i++)
822  myBuffer[i] = myBuffer[by_length+i];
823  myBuffer[myLength] = '\0';
824  }
825 
826  // Finds the 'occurance_number'-th occurance of char c in the string.
828  const char *findChar(char c, int occurance_number = 1) const
829  {
830  return findCharFrom(c, 0, occurance_number);
831  }
832  // Same as findChar, but searches from the end of the string.
833  const char *lastChar(char c, int occurance_number = 1) const
834  {
835  if (myLockCount) { UT_ASSERT(0); return NULL; }
836 
837  UT_ASSERT_P(isNullTerminated());
838 
839  for (exint i = myLength; i --> 0;)
840  {
841  if(c == myBuffer[i])
842  {
843  occurance_number--;
844  if(occurance_number <= 0)
845  {
846  return (myBuffer + i);
847  }
848  }
849  }
850 
851  return NULL;
852  }
853  // Same and findChar, bu searches from given position in the string.
854  const char *findCharFrom(char c, exint position,
855  int occurance_number = 1) const
856  {
857  if (myLockCount) { UT_ASSERT(0); return NULL; }
858 
859  UT_ASSERT_P(isNullTerminated());
860 
861  if (position < 0 || position >= myLength) { return NULL; }
862 
863  for(exint i = position; i < myLength; ++i)
864  {
865  if(c == myBuffer[i])
866  {
867  occurance_number--;
868  if(occurance_number <= 0)
869  {
870  return (myBuffer + i);
871  }
872  }
873  }
874 
875  return NULL;
876  }
877 
878  /// Count the occurrences of the text in the current string
879  exint count(const char *needle) const;
880 
881  // Get the next token pointed at by string and advance string past the
882  // token. Returns whether or not a token was retrieved successfully.
883  // Note that string is modified!!!
884  bool getNextToken(const char *(&string),
885  const UT_String separators = " \t\n");
886 
887  // Harden the contents of the buffer into a UT_String.
888  void copyIntoString(UT_String &str) const;
889 
890  // Copy the contents into a fixed length buffer.
891  // TODO: Get rid of this method, since it encourages fixed-length buffers.
892  void copyIntoString(char *str, exint max_length) const;
893 
894  // Steal the contents of this work buffer into the string.
895  void stealIntoString(UT_String &str);
896 
897  // Steal the contents of this work buffer into the string.
898  // NB: Please use UT_StringHolder move constructor/assignment instead of
899  // this function.
900  void stealIntoStringHolder(UT_StringHolder &str);
901 
902  // Return a string containing the contents of this work buffer, preserving
903  // any null characters in it.
905  { return std::string(buffer(), length()); }
906 
907  // Strips the characters after comment_char from the buffer. This method
908  // goes to some effort to enusre that the comment_char is not preceded by
909  // a backslash or is not in a quoted string. Returns true if it found a
910  // comment and modified the buffer, and false otherwise.
911  bool stripComments(char comment_char = '#');
912 
913  /// Strips out all characters found in 'chars'. The string length will be
914  /// reduced by the number of characters removed. The number of characters
915  /// removed is returned.
916  int strip(const char *chars);
917 
918  /// Remove trailing whitespace lines
919  void removeTrailingSpaceLines();
920 
921  /// Remove trailing whitespace, return true if whitespace was removed.
922  bool removeTrailingSpace();
923 
924  /// Remove leading white space, return true if whitespace was removed.
925  bool removeLeadingSpace();
926 
927  /// Remove trailing digits, return true if some were removed.
928  bool removeTrailingDigits();
929 
930  /// Convert string to lower case
931  void lower();
932 
933  /// Convert string to upper case
934  void upper();
935 
936  /// Create a string of tabs & spaces which represents the given indent
937  void makeIndentString(exint indent, exint tabstop=8);
938 
939  /// Remove the first n characters.
941  {
942  if (n < myLength)
943  {
944  myLength -= n;
945  ::memmove(myBuffer, myBuffer + n, myLength);
946  }
947  else
948  myLength = 0;
949 
950  myBuffer[myLength] = '\0';
951  }
952 
953  /// Replace all occurances of 'find' with 'replacement'
954  /// Return the number of substitutions that occured.
955  unsigned int substitute(const char *find, const char *replacement, bool all = true);
956 
957  /// Given from_name which is assumed to fit from_pattern, any assigned
958  /// wildcards are subsitituted in to_pattern, writing the result to this.
959  /// The wildcards may also be indexed. For example:
960  ///
961  /// to_pattern = b* from_name = apple from_pattern = a*le
962  /// ---> this = bpp
963  ///
964  /// to_pattern = *(1)_to_*(0) from_name = a_to_b from_pattern = *_to_*
965  /// ---> this = b_to_a
966  bool subPatterns(
967  const char *to_pattern,
968  const char *from_name,
969  const char *from_pattern);
970 
971  /// UTF-16 / UTF-8 conversions.
972 
973  /// Set the work buffer to contain the UTF-8 representation of the incoming UTF-16 string.
974  /// The UTF-16 string is assumed to be little-endian, unless prefixed with BOM that
975  /// indicates endianness.
976  /// The incoming string should be zero-word terminated.
977  void setFromUTF16(const utf16 *str);
978 
979  /// Set the work buffer to contain a UTF-16LE (little endian) representation of the
980  /// incoming UTF-8 string.
981  /// The work buffer will be zero-word terminated.
982  void setAsUTF16(const utf8 *str);
983 
984  /// Once set as UTF16-LE, get it back as such a pointer.
986  const utf16* castToUTF16() const { return (const utf16*) myBuffer; }
987 
988  /// Lock buffer for `len` utf-16 characters.
990  {
991  return (utf16*)lock(offset, len*sizeof(utf16));
992  }
993 
994  void swap(UT_WorkBuffer &other)
995  {
996  // Warn if we're about to swap locked buffers.
997  UT_ASSERT(myLockCount==0);
998 
999  bool this_stack = (myBuffer == myStackBuffer);
1000  bool other_stack = (other.myBuffer == other.myStackBuffer);
1001 
1002  if (this_stack && other_stack)
1003  {
1004  // If both buffers are using the stack buffer, just swap the
1005  // buffer contents.
1006  size_t max_size = (myLength > other.myLength) ? myLength
1007  : other.myLength;
1008 
1009  UTswap(myStackBuffer, other.myStackBuffer, max_size + 1);
1010  }
1011  else if (this_stack && !other_stack)
1012  {
1013  ::memcpy(other.myStackBuffer, myStackBuffer, myLength + 1);
1014  myBuffer = other.myBuffer;
1015  other.myBuffer = other.myStackBuffer;
1016  }
1017  else if (!this_stack && other_stack)
1018  {
1019  ::memcpy(myStackBuffer, other.myStackBuffer, other.myLength + 1);
1020  other.myBuffer = myBuffer;
1021  myBuffer = myStackBuffer;
1022  }
1023  else
1024  UTswap(myBuffer, other.myBuffer);
1025  UTswap(myAllocatedSize, other.myAllocatedSize);
1026  UTswap(myLength, other.myLength);
1027  UTswap(myLockCount, other.myLockCount);
1028  }
1029 public:
1030  /// Iterator compatibility.
1032  const char *begin() const { return myBuffer; }
1034  const char *end() const { return myBuffer + myLength; }
1035 
1036 private:
1037 
1038  struct do_widen {};
1039  struct do_narrow {};
1040  /// Private constructors to allow for the Return Value Optimization
1041  /// @{
1043  UT_WorkBuffer(do_widen, const utf8 *str)
1044  : myBuffer(myStackBuffer)
1045  , myAllocatedSize(UT_INITIAL_BUFFER_SIZE)
1046  , myLength(0)
1047  , myLockCount(0)
1048  {
1049  setAsUTF16(str);
1050  }
1052  UT_WorkBuffer(do_narrow, const utf16 *str)
1053  : myBuffer(myStackBuffer)
1054  , myAllocatedSize(UT_INITIAL_BUFFER_SIZE)
1055  , myLength(0)
1056  , myLockCount(0)
1057  {
1058  setFromUTF16(str);
1059  }
1060  /// @}
1061 
1062  friend UT_API std::ostream &operator<<(std::ostream &os,
1063  const UT_WorkBuffer &buffer);
1064 
1065 private: // Data:
1066 
1067  char *myBuffer; // Do not make an access method to the data
1068  exint myAllocatedSize = UT_INITIAL_BUFFER_SIZE;
1069  exint myLength = 0;
1070  int myLockCount = 0;
1071  char myStackBuffer[UT_INITIAL_BUFFER_SIZE];
1072 };
1073 
1074 
1075 static inline size_t
1076 format(char *buffer, size_t buffer_size, const UT_WorkBuffer &v)
1077 {
1078  if (!buffer)
1079  return v.length();
1080  else
1081  {
1082  size_t len = std::min(size_t(v.length()), buffer_size);
1083  ::memcpy(buffer, v.buffer(), len);
1084  return len;
1085  }
1086 }
1087 
1088 
1089 #endif
size_t print(const Args &...args)
vbool4 insert(const vbool4 &a, bool val)
Helper: substitute val for a[i].
Definition: simd.h:3340
std::string sprintf(const char *fmt, const Args &...args)
Definition: strutil.h:136
SYS_FORCE_INLINE void append(const UT_StringRef &str)
string_view OIIO_API strip(string_view str, string_view chars=string_view())
GT_API const UT_StringHolder filename
GLsizeiptr size
Definition: glew.h:1681
GLenum src
Definition: glew.h:2410
SYS_FORCE_INLINE exint length() const
SYS_FORCE_INLINE void strcpy(const UT_StringRef &src)
void write(exint offset, const UT_StringHolder &src)
SYS_FORCE_INLINE exint getAllocatedSize() const
SYS_FORCE_INLINE char * operator*() const
void UTswap(T &a, T &b)
Definition: UT_Swap.h:35
const Args & args
Definition: printf.h:628
SYS_FORCE_INLINE bool operator==(const UT_String &str) const
SYS_FORCE_INLINE UT_WorkBuffer(const char *data, exint size)
Definition: UT_WorkBuffer.h:81
SYS_FORCE_INLINE void strncat(const char *src, exint len)
unsigned short utf16
Definition: SYS_Types.h:56
SYS_FORCE_INLINE UT_WorkBuffer(const UT_StringRef &str)
Definition: UT_WorkBuffer.h:97
SYS_FORCE_INLINE UT_WorkBuffer(const char *str)
Definition: UT_WorkBuffer.h:73
int64 exint
Definition: SYS_Types.h:125
void append(exint n, char character)
SYS_FORCE_INLINE const char * buffer() const
SYS_FORCE_INLINE void strcpy(const char *src)
SYS_FORCE_INLINE void release(bool recompute_length=false)
You can manually release the buffer.
GLint GLint GLsizei GLsizei GLsizei GLint GLenum format
Definition: glew.h:1254
#define UT_API
Definition: UT_API.h:13
const GLdouble * v
Definition: glew.h:1391
const char * findCharFrom(char c, exint position, int occurance_number=1) const
void append(const char *data, exint size)
const char * lastChar(char c, int occurance_number=1) const
Format
Definition: oidn.hpp:29
size_t appendPrint()
SYS_FORCE_INLINE char last() const
utf16 * lockUTF16(exint offset=0, exint len=0)
Lock buffer for len utf-16 characters.
SYS_FORCE_INLINE void relock()
If you've manually released the lock, you can relock the buffer.
SYS_FORCE_INLINE void append(const UT_String &str)
void swap(UT_WorkBuffer &other)
void eraseHead(exint n)
Remove the first n characters.
SYS_FORCE_INLINE void append(const UT_WorkBuffer &wb)
SYS_FORCE_INLINE bool operator==(const UT_WorkBuffer &buf) const
std::ostream & operator<<(std::ostream &ostr, const DataType &a)
Definition: DataType.h:133
SYS_FORCE_INLINE UT_WorkBuffer & operator+=(const UT_WorkBuffer &wb)
SYS_FORCE_INLINE void append(const char *str)
static const utf8 * convert(const utf8 *str, utf32 &cp)
size_t appendFormat(const char *fmt, const Args &...args)
#define UT_INITIAL_BUFFER_SIZE
Definition: UT_WorkBuffer.h:54
SYS_FORCE_INLINE void strcpy(const UT_WorkBuffer &src)
GLclampf f
Definition: glew.h:3499
exint length() const
SYS_FORCE_INLINE const char * buffer() const
std::enable_if< is_contiguous< Container >::value, typename checked< typename Container::value_type >::type >::type reserve(std::back_insert_iterator< Container > &it, std::size_t n)
Definition: format.h:593
SYS_FORCE_INLINE const char * end() const
GLuint buffer
Definition: glew.h:1680
GLint GLenum GLsizei GLint GLsizei const void * data
Definition: glew.h:1379
SYS_FORCE_INLINE UT_WorkBuffer & operator=(const UT_WorkBuffer &other)
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:134
unsigned int utf32
Definition: SYS_Types.h:58
void printMemory(int64 mem)
#define SYS_PRINTF_CHECK_ATTRIBUTE(string_index, first_to_check)
Definition: SYS_Types.h:433
SYS_FORCE_INLINE const char * begin() const
Iterator compatibility.
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
GLubyte GLubyte GLubyte GLubyte w
Definition: glew.h:1890
GLsizei n
Definition: glew.h:4040
const GLfloat * c
Definition: glew.h:16296
GLuint GLsizei GLsizei * length
Definition: glew.h:1825
SYS_FORCE_INLINE void prepend(const UT_StringRef &str)
void prepend(char character)
SYS_FORCE_INLINE bool isEmpty() const
SYS_FORCE_INLINE char * strdup() const
long long int64
Definition: SYS_Types.h:116
static SYS_FORCE_INLINE UT_WorkBuffer widen(const utf8 *str)
SYS_FORCE_INLINE const char * findChar(char c, int occurance_number=1) const
SYS_FORCE_INLINE UT_WorkBuffer & operator+=(const UT_String &str)
SYS_FORCE_INLINE exint strlen() const
SYS_FORCE_INLINE bool operator!=(const UT_WorkBuffer &buf) const
SYS_FORCE_INLINE UT_WorkBuffer & operator=(const std::string &str)
SYS_FORCE_INLINE const char * c_str() const
SYS_FORCE_INLINE void strcpy(const UT_String &src)
#define UT_UTF8_MAX_ENCODING_LEN
Definition: UT_Unicode.h:19
size_t SYSstrlcpy(char *dest, const char *src, size_t size)
Definition: SYS_String.h:187
std::string toStdString() const
SYS_FORCE_INLINE char * string() const
SYS_FORCE_INLINE void prepend(const UT_String &str)
SYS_FORCE_INLINE char operator()(exint idx) const
SYS_FORCE_INLINE UT_WorkBuffer(const UT_WorkBuffer &other)
SYS_FORCE_INLINE void strcat(const char *src)
SYS_FORCE_INLINE void truncate(exint new_length)
SYS_FORCE_INLINE const utf16 * castToUTF16() const
Once set as UTF16-LE, get it back as such a pointer.
exint entries() const
Alias of size(). size() is preferred.
Definition: UT_Array.h:460
void append(utf32 cp)
Append a single Unicode code point, converted to UTF8.
SYS_FORCE_INLINE int strcmp(const char *src) const
static SYS_FORCE_INLINE UT_WorkBuffer narrow(const utf16 *str)
SYS_FORCE_INLINE UT_WorkBuffer & operator=(const char *str)
GLsizei const GLchar *const * string
Definition: glew.h:1844
std::basic_string< Char > vsprintf(const S &format, basic_format_args< typename basic_printf_context_t< internal::basic_buffer< Char >>::type > args)
Definition: printf.h:609
SYS_FORCE_INLINE bool isstring() const
SYS_FORCE_INLINE void backup(exint by_length)
Rewind by the given length.
void write(exint offset, const char *src)
SYS_FORCE_INLINE UT_WorkBuffer(const UT_String &str)
Definition: UT_WorkBuffer.h:89
SYS_FORCE_INLINE AutoLock(UT_WorkBuffer &buf)
void backupTo(char c)
size_t appendPrint(const T &value, const Args &...args)
SYS_FORCE_INLINE bool operator==(const char *str) const
SYS_FORCE_INLINE bool UTisstring(const char *s)
Definition: UT_String.h:57
void advance(exint by_length)
SYS_FORCE_INLINE bool operator!=(const char *str) const
SYS_FORCE_INLINE UT_WorkBuffer & operator+=(const char *str)
FMT_CONSTEXPR bool find(Ptr first, Ptr last, T value, Ptr &out)
Definition: format.h:2104
SYS_FORCE_INLINE void append(char character)
GLuint GLuint GLsizei count
Definition: glew.h:1253
Type-safe formatting, modeled on the Python str.format function.
void prepend(const char *data, exint size)
virtual bool readFile(GA_Detail &g, const char *filename, const GA_LoadOptions *opts, UT_StringArray *errors) const
Class which defines an I/O interface to save/load geometry.
void append(const UT_StringArray &strs, const UT_StringRef &sep)
bool isstring() const
Definition: UT_String.h:710
#define UT_ASSERT(ZZ)
Definition: UT_Assert.h:135
SYS_FORCE_INLINE ~UT_WorkBuffer()
SYS_FORCE_INLINE void clear()
char utf8
Definition: SYS_Types.h:52
SYS_FORCE_INLINE ~AutoLock()
#define const
Definition: zconf.h:214
SYS_FORCE_INLINE char first() const
vint4 min(const vint4 &a, const vint4 &b)
Definition: simd.h:4694
void write(T &out, bool v)
Definition: ImfXdr.h:332
GLenum GLuint GLsizei const GLchar * buf
Definition: glew.h:2580
SYS_FORCE_INLINE bool operator!=(const UT_String &str) const
SYS_FORCE_INLINE UT_WorkBuffer & operator+=(const UT_StringRef &str)
bool all(const vbool4 &v)
Definition: simd.h:3371
SYS_FORCE_INLINE UT_WorkBuffer & operator+=(const std::string &str)
GLsizei const GLfloat * value
Definition: glew.h:1849
SYS_FORCE_INLINE void write(exint offset, char c)
SYS_FORCE_INLINE bool isstring() const
SYS_FORCE_INLINE void prepend(const char *str)
SYS_FORCE_INLINE void strncpy(const char *src, exint maxlen)
SYS_FORCE_INLINE int strncmp(const char *src, exint n) const
GLenum GLsizei len
Definition: glew.h:7752
GLintptr offset
Definition: glew.h:1682
SYS_FORCE_INLINE UT_WorkBuffer()
Definition: UT_WorkBuffer.h:65