HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_IStreamBuf.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: UT_IStreamBuf.h ( UT Library, C++)
7  *
8  * COMMENTS: Since the semantics of C++ streams leave a little bit to be
9  * desired, this class provides an efficient mechanism to read
10  * streams line by line.
11  *
12  * However, since this class buffers the stream input itself, it's
13  * not safe to use the stream outside of the class.
14  */
15 
16 #ifndef __UT_IStreamBuf__
17 #define __UT_IStreamBuf__
18 
19 #include "UT_API.h"
20 #include "UT_Assert.h"
21 #include "UT_IntrusivePtr.h"
22 #include "UT_StringHolder.h"
23 #include <SYS/SYS_Inline.h>
24 #include <SYS/SYS_Math.h>
25 #include <SYS/SYS_String.h>
26 #include <SYS/SYS_Types.h>
27 #include <iosfwd>
28 #include <stdio.h>
29 #include <string.h>
30 
31 #define UT_STREAMBUF_SIZE 65536
32 
33 class UT_Options;
34 class UT_WorkBuffer;
35 
38 
40 {
41 public:
42  UT_IStreamBuf(bool bufferable = true);
43 
44  enum {
45  UT_SEEK_BEG=0, // Seek from beginning of stream
46  UT_SEEK_CUR=1, // Seek from current location
47  UT_SEEK_END=2 // Seek from the end of the stream
48  };
49 
50  static UT_IStreamBuf *nullBuf();
51  static UT_IStreamBuf *fileBuf(const UT_StringHolder &filename,
52  const UT_Options *options,
53  bool bufferable);
54  static UT_IStreamBuf *fileBuf(FILE *fp,
55  bool bufferable);
56  static UT_IStreamBuf *istreamBuf(std::istream &is,
57  bool bufferable);
58  static UT_IStreamBuf *memoryBuf(const char *src, size_t len);
59  static UT_IStreamBuf *rangeBuf(UT_IStreamBuf *src,
60  int64 start, int64 end);
61 
62  static UT_IStreamBuf *zlibBuf(UT_IStreamBuf *src,
63  bool bufferable=true);
64  static UT_IStreamBuf *gzipBuf(UT_IStreamBuf *src,
65  bool bufferable=true);
66  static UT_IStreamBuf *bloscBuf(UT_IStreamBuf *src,
67  bool bufferable=true);
68  static UT_IStreamBuf *scBuf(UT_IStream *src,
69  bool bufferable=true);
70  static UT_IStreamBuf *blowfishBuf(UT_IStreamBuf *src,
71  const unsigned char *key, exint key_length,
72  bool bufferable=true);
73  static exint blowfishPlainTextLength(UT_IStreamBuf *src);
74 
75  static void setStdinCompatible(bool state);
76 
77  void bumpRef() { myRefCount++; }
78  void decRef()
79  {
80  myRefCount--;
81  if (!myRefCount)
82  delete this;
83  }
84 
85  /// Set the index for the stream buffer (does nothing if stream
86  /// buffer does not support index)
87  virtual void setIndex(const UT_CompressedBlockIndexPtr &index) {}
88 
89  // Determine if the caller tried to read past the end of file,
90  // assuming that we've run out of buffer.
91  bool isEof() const
92  {
93  return myBufCur == myBufEnd && endOfFileSet();
94  }
95 
96  // If the stream has exhausted the buffered read (if there was a buffer
97  // associated with this stream-buf), this method provides the way to read
98  // directly from the stream.
99  // It returns the number of bytes read.
100  virtual exint read(char *buffer, exint asize=1) = 0;
101 
102  // peek returns the character read as an unsigned char cast to an
103  // int, or -1 on failure. In other words, characters with the high bit set
104  // will return a number from 128 to 255, not a negative number.
105  virtual int streamPeek() = 0;
106 
107  // For streams which own the underlying stream, the close method allows
108  // that stream to be closed.
109  virtual bool closeFile();
110 
111  // Return true for a random-access file.
112  virtual bool isRandomAccessFile(UT_WorkBuffer &filename) const;
113 
114  // Query the file system type - not supported on all platforms
115  virtual int getFileSystemType() const;
116 
117  // Query the file descriptor associated with an open stream, if any. Be
118  // careful what you do with it.
119  virtual int getFileDescriptor() const;
120 
121  // If there was an error on the stream, the error flag will be set
122  inline bool getError() const { return myError; }
123  const char *getErrorStr() const;
124  void stealLoadBufferData(const char *&bufstart,
125  const char *&bufend,
126  exint maxlen);
127 
129  {
130  if (myError)
131  return 0;
132 
133  // Fast path for size 1, because it's being hit often.
134  if (size == 1 && myBufCur != myBufEnd)
135  {
136  *buffer = *myBufCur;
137  ++myBufCur;
138  return 1;
139  }
140 
141  if (!myBufferable)
142  return read(buffer, size);
143 
144  exint nread = 0;
145  exint left = size;
146  while (left > 0)
147  {
148  if (myBufCur == myBufEnd)
149  {
150  // If we're reading a small amount of data, buffer it.
151  if (left >= UT_STREAMBUF_SIZE)
152  {
153  // Clear the buffer in this case because the position of
154  // the underlying stream will be past the buffer. If we
155  // try to seek back into the buffer and then read past it
156  // the underlying stream will be in the wrong place.
157  myBufStartPos = -1;
158  myBufStart = NULL;
159  myBufCur = NULL;
160  myBufEnd = NULL;
161  break;
162  }
163  if (!loadBuffer())
164  break;
165  }
166 
167  // Load first portion from my buffer
168  // How much can I read
169  exint n = SYSmin(left, exint(myBufEnd - myBufCur));
170  memcpy(buffer+nread, myBufCur, n);
171  myBufCur += n;
172  left -= n;
173  nread += n;
174  }
175  if (left)
176  nread += read(buffer+nread, left);
177 
178  return nread;
179  }
180 
181  exint tellg() const
182  {
183  if (myBufCur != NULL)
184  return myBufStartPos + (myBufCur - myBufStart);
185 
186  return tellPos();
187  }
188 
189  bool seekg(exint pos, int seekdir)
190  {
191  myError = false;
192  if (myBufCur != NULL && seekdir != UT_SEEK_END)
193  {
194  // Get the position relative to the buffer start
195  exint relpos;
196  if (seekdir == UT_SEEK_CUR)
197  relpos = pos + (myBufCur - myBufStart);
198  else
199  {
200  UT_ASSERT_P(seekdir == UT_SEEK_BEG);
201  relpos = pos - myBufStartPos;
202  }
203  // If we're seeking inside the buffer, just move myBufCur.
204  if (relpos >= 0 && relpos < (myBufEnd - myBufStart))
205  {
206  myBufCur = myBufStart + relpos;
207  return true;
208  }
209 
210  // The stream has already been read until the end of the buffer,
211  // so any seeks relative to the current position need to be
212  // adjusted to be relative to the end of the buffer.
213  if (seekdir == UT_SEEK_CUR)
214  pos -= (myBufEnd - myBufCur);
215  }
216 
217  // Clear the buffer since we're seeking to a new place in the stream
218  myBufStartPos = -1;
219  myBufStart = NULL;
220  myBufCur = NULL;
221  myBufEnd = NULL;
222  return seekPos(pos, seekdir);
223  }
224 
225  void clearLoadBuffer();
226  bool getLine(UT_WorkBuffer &buffer, int end='\n');
227  bool skipLine(int end='\n');
228  bool getJSONWord(UT_WorkBuffer &buffer, bool &real);
229  bool getWord(UT_WorkBuffer &buffer);
230  bool getString(UT_WorkBuffer &buffer);
231  bool getNumber(UT_WorkBuffer &buffer, bool &real);
232  bool skipWhitespace();
233  bool skipWhitespace(int64 *line_count, int64 *line_start_pos);
234  bool getAll(UT_WorkBuffer &buffer);
235  bool getAllAscii(UT_WorkBuffer &buffer);
236 
237  SYS_FORCE_INLINE bool
238  checkToken(const char *match)
239  {
240  UT_ASSERT_MSG(isBufferable(),"Unbuffered checkToken() not implemented");
241 
242  if (!skipWhitespace())
243  return false;
244 
245  // Check token against match
246  for ( ; *match; ++myBufCur, ++match)
247  {
248  if (myBufCur == myBufEnd && !loadBuffer())
249  return false;
250  if (*match != *myBufCur)
251  return false;
252  }
253  // Check one more character to see if it's whitespace (space or newline)
254  if (myBufCur == myBufEnd && !loadBuffer())
255  return !myError;
256  return !myError && SYSisspace(*myBufCur);
257  }
258 
259  /// getc and peek return the character read as an unsigned char cast to an
260  /// int, or -1 (EOF) on failure. In other words, characters with the high
261  /// bit set will return a number from 128 to 255, not a negative number.
263  {
264  if (myBufCur != myBufEnd || loadBuffer())
265  {
266  char c = *myBufCur;
267  ++myBufCur;
268  return (int)(uchar)c;
269  }
270  char data;
271  exint nread = read(&data, 1);
272  if (nread == 0)
273  return -1;
274  return (int)(uchar)data;
275  }
277  {
278  if (myBufCur != myBufEnd || loadBuffer())
279  {
280  char c = *myBufCur;
281  return (int)(uchar)c;
282  }
283  return streamPeek();
284  }
286  {
287  len = SYSmin(exint(myBufCur - myBufStart), len);
288  myBufCur -= len;
289  return len;
290  }
291 
292  virtual int64 getMemoryUsage(bool inclusive) const;
293 
294  bool isBufferable() const
295  { return myBufferable; }
296 
297 protected:
298  virtual ~UT_IStreamBuf();
299 
300  UT_IStreamBuf(const UT_IStreamBuf &) = delete;
301  UT_IStreamBuf &operator=(const UT_IStreamBuf &) = delete;
302 
303  void setError(const char *msg=0);
304 
305  // Extract an alpha-numeric word, with an optional prefix
306  bool getAlNumWord(UT_WorkBuffer &buffer, const char *prefix="");
307 
308  // Read data into a buffer and fill in the start and end pointers.
309  // The file position of the start of the buffer is put into bufstartpos.
310  // When the end of the buffer is reached, call loadBuffer() again,
311  // to get more data.
312  virtual void loadBuffer(exint &bufstartpos, const char *&bufstart, const char *&bufend, bool keepeofif0) = 0;
313 
314  virtual exint tellPos() const = 0;
315  virtual bool seekPos(exint pos, int dir) = 0;
316  // Determine if the caller tried to read past the end of file,
317  // assuming that we've run out of buffer.
318  // The equivalent to this as a public method is isEof().
319  virtual bool endOfFileSet() const = 0;
320 
321  // If loadBuffer() is being used as if it were read(), and not enough
322  // has already been read to count as a successful read from the outside
323  // perspective, a failure to read anything here should keep the eof
324  // flag set instead of clearing it.
325  // The eof flag is never kept set if loadBuffer reads some non-zero
326  // amount of data, because it logically only read up to the end, not past.
327  bool loadBuffer(bool keepeofif0 = false);
328 
329  void appendDecodedEscapeSequence(UT_WorkBuffer &buffer);
330 
331 private:
332  int64 myBufStartPos;
333  const char *myBufStart;
334  const char *myBufCur;
335  const char *myBufEnd;
336  char *myErrorStr;
337  exint myRefCount;
338  int myErrorNum;
339  bool myError;
340  bool myBufferable;
341 };
342 
343 SYS_FORCE_INLINE bool
345 {
346  // FIXME: Unbuffered UT_IStreamBuf::skipWhitespace is not implemented!!!
348  "Unbuffered skipWhitespace() not implemented");
349 
350  // Return false if we're already at the EOF
351  if (myBufCur == myBufEnd && !loadBuffer(true))
352  return false;
353 
354  do {
355  for ( ; myBufCur < myBufEnd; ++myBufCur)
356  {
357  if (!SYSisspace(*myBufCur))
358  return true;
359  }
360  } while (loadBuffer(false));
361 
362  // Return true if we skipped to the EOF so that the caller can determine
363  // whether it's valid.
364  return true;
365 }
366 
367 inline bool
369 {
370  // FIXME: Unbuffered UT_IStreamBuf::skipLine is not implemented!!!
372  "Unbuffered skipLine not implemented");
373 
374  // Return false if we're already at the EOF
375  if (myBufCur == myBufEnd && !loadBuffer(true))
376  return false;
377 
378  do {
379  for ( ; myBufCur < myBufEnd; ++myBufCur)
380  {
381  if (*myBufCur == newline)
382  {
383  ++myBufCur; // move over the newline
384  return true;
385  }
386  }
387  } while (loadBuffer(false));
388 
389  // Return true if we skipped to the EOF so that the caller can determine
390  // whether it's valid.
391  return true;
392 }
393 
394 #endif
GT_API const UT_StringHolder filename
exint bread(char *buffer, exint size)
virtual void loadBuffer(exint &bufstartpos, const char *&bufstart, const char *&bufend, bool keepeofif0)=0
GLint left
Definition: glcorearb.h:2005
GLboolean * data
Definition: glcorearb.h:131
SYS_FORCE_INLINE int getc()
GLuint start
Definition: glcorearb.h:475
int64 exint
Definition: SYS_Types.h:125
#define UT_API
Definition: UT_API.h:14
#define UT_STREAMBUF_SIZE
Definition: UT_IStreamBuf.h:31
bool seekg(exint pos, int seekdir)
#define UT_ASSERT_MSG(ZZ,...)
Definition: UT_Assert.h:159
SYS_FORCE_INLINE int peekc()
GLdouble n
Definition: glcorearb.h:2008
bool skipWhitespace()
Definition: core.h:760
bool isBufferable() const
bool getError() const
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:155
GLuint GLuint end
Definition: glcorearb.h:475
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
SYS_FORCE_INLINE exint unwind(exint len)
virtual void setIndex(const UT_CompressedBlockIndexPtr &index)
Definition: UT_IStreamBuf.h:87
long long int64
Definition: SYS_Types.h:116
bool isEof() const
Definition: UT_IStreamBuf.h:91
SYS_FORCE_INLINE bool checkToken(const char *match)
GLsizeiptr size
Definition: glcorearb.h:664
A map of string to various well defined value types.
Definition: UT_Options.h:84
exint tellg() const
LeafData & operator=(const LeafData &)=delete
GLuint index
Definition: glcorearb.h:786
bool skipLine(int end='\n')
#define SYSmin(a, b)
Definition: SYS_Math.h:1571
PXR_NAMESPACE_OPEN_SCOPE typedef unsigned char uchar
Definition: inttypes.h:58
GLenum src
Definition: glcorearb.h:1793