HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_IStreamBuf.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: UT_IStreamBuf.h ( UT Library, C++)
7  *
8  * COMMENTS: Since the semantics of C++ streams leave a little bit to be
9  * desired, this class provides an efficient mechanism to read
10  * streams line by line.
11  *
12  * However, since this class buffers the stream input itself, it's
13  * not safe to use the stream outside of the class.
14  */
15 
16 #ifndef __UT_IStreamBuf__
17 #define __UT_IStreamBuf__
18 
19 #include "UT_API.h"
20 #include "UT_Assert.h"
21 #include "UT_SCFCommon.h"
22 #include "UT_StringHolder.h"
23 #include <SYS/SYS_Inline.h>
24 #include <SYS/SYS_Math.h>
25 #include <SYS/SYS_Types.h>
26 #include <iosfwd>
27 #include <fstream> // Only needed for gcc4.4
28 #include <stdio.h>
29 #include <string.h>
30 
31 #define UT_STREAMBUF_SIZE 65536
32 
33 class UT_Options;
34 class UT_WorkBuffer;
35 
37 public:
38  UT_IStreamBuf(bool bufferable = true);
39 
40  enum {
41  UT_SEEK_BEG=0, // Seek from beginning of stream
42  UT_SEEK_CUR=1, // Seek from current location
43  UT_SEEK_END=2 // Seek from the end of the stream
44  };
45 
46  static UT_IStreamBuf *nullBuf();
47  static UT_IStreamBuf *fileBuf(const UT_StringHolder &filename,
48  const UT_Options *options,
49  bool bufferable);
50  static UT_IStreamBuf *fileBuf(FILE *fp,
51  bool bufferable);
52  static UT_IStreamBuf *istreamBuf(std::istream &is,
53  bool bufferable);
54  static UT_IStreamBuf *memoryBuf(const char *src, size_t len);
55  static UT_IStreamBuf *rangeBuf(UT_IStreamBuf *src,
56  int64 start, int64 end);
57 
58  static UT_IStreamBuf *zlibBuf(UT_IStreamBuf *src,
59  bool bufferable=true);
60  static UT_IStreamBuf *gzipBuf(UT_IStreamBuf *src,
61  bool bufferable=true);
62  static UT_IStreamBuf *bloscBuf(UT_IStreamBuf *src,
63  bool bufferable=true);
64  static UT_IStreamBuf *scBuf(UT_IStream *src,
65  bool bufferable=true);
66  static UT_IStreamBuf *blowfishBuf(UT_IStreamBuf *src,
67  const unsigned char *key, exint key_length,
68  bool bufferable=true);
69  static exint blowfishPlainTextLength(UT_IStreamBuf *src);
70 
71  static void setStdinCompatible(bool state);
72 
73  void bumpRef() { myRefCount++; }
74  void decRef()
75  {
76  myRefCount--;
77  if (!myRefCount)
78  delete this;
79  }
80 
81  /// Set the index for the stream buffer (does nothing if stream
82  /// buffer does not support index)
83  virtual void setIndex(const UT_CompressedBlockIndexPtr &index) {}
84 
85  // Determine if the caller tried to read past the end of file,
86  // assuming that we've run out of buffer.
87  bool isEof() const
88  {
89  return myBufCur == myBufEnd && endOfFileSet();
90  }
91 
92  // If the stream has exhausted the buffered read (if there was a buffer
93  // associated with this stream-buf), this method provides the way to read
94  // directly from the stream.
95  // It returns the number of bytes read.
96  virtual exint read(char *buffer, exint asize=1) = 0;
97 
98  // peek returns the character read as an unsigned char cast to an
99  // int, or -1 on failure. In other words, characters with the high bit set
100  // will return a number from 128 to 255, not a negative number.
101  virtual int streamPeek() = 0;
102 
103  // For streams which own the underlying stream, the close method allows
104  // that stream to be closed.
105  virtual bool closeFile();
106 
107  // Return true for a random-access file.
108  virtual bool isRandomAccessFile(UT_WorkBuffer &filename) const;
109 
110  // Query the file system type - not supported on all platforms
111  virtual int getFileSystemType() const;
112 
113  // Query the file descriptor associated with an open stream, if any. Be
114  // careful what you do with it.
115  virtual int getFileDescriptor() const;
116 
117  // If there was an error on the stream, the error flag will be set
118  inline bool getError() const { return myError; }
119  const char *getErrorStr() const;
120  void stealLoadBufferData(const char *&bufstart,
121  const char *&bufend,
122  exint maxlen);
123 
125  {
126  if (myError)
127  return 0;
128 
129  // Fast path for size 1, because it's being hit often.
130  if (size == 1 && myBufCur != myBufEnd)
131  {
132  *buffer = *myBufCur;
133  ++myBufCur;
134  return 1;
135  }
136 
137  if (!myBufferable)
138  return read(buffer, size);
139 
140  exint nread = 0;
141  exint left = size;
142  while (left > 0)
143  {
144  if (myBufCur == myBufEnd)
145  {
146  // If we're reading a small amount of data, buffer it.
147  if (left >= UT_STREAMBUF_SIZE)
148  {
149  // Clear the buffer in this case because the position of
150  // the underlying stream will be past the buffer. If we
151  // try to seek back into the buffer and then read past it
152  // the underlying stream will be in the wrong place.
153  myBufStartPos = -1;
154  myBufStart = NULL;
155  myBufCur = NULL;
156  myBufEnd = NULL;
157  break;
158  }
159  if (!loadBuffer())
160  break;
161  }
162 
163  // Load first portion from my buffer
164  // How much can I read
165  exint n = SYSmin(left, exint(myBufEnd - myBufCur));
166  memcpy(buffer+nread, myBufCur, n);
167  myBufCur += n;
168  left -= n;
169  nread += n;
170  }
171  if (left)
172  nread += read(buffer+nread, left);
173 
174  return nread;
175  }
176 
177  exint tellg() const
178  {
179  if (myBufCur != NULL)
180  return myBufStartPos + (myBufCur - myBufStart);
181 
182  return tellPos();
183  }
184 
185  bool seekg(exint pos, int seekdir)
186  {
187  myError = false;
188  if (myBufCur != NULL && seekdir != UT_SEEK_END)
189  {
190  // Get the position relative to the buffer start
191  exint relpos;
192  if (seekdir == UT_SEEK_CUR)
193  relpos = pos + (myBufCur - myBufStart);
194  else
195  {
196  UT_ASSERT_P(seekdir == UT_SEEK_BEG);
197  relpos = pos - myBufStartPos;
198  }
199  // If we're seeking inside the buffer, just move myBufCur.
200  if (relpos >= 0 && relpos < (myBufEnd - myBufStart))
201  {
202  myBufCur = myBufStart + relpos;
203  return true;
204  }
205 
206  // The stream has already been read until the end of the buffer,
207  // so any seeks relative to the current position need to be
208  // adjusted to be relative to the end of the buffer.
209  if (seekdir == UT_SEEK_CUR)
210  pos -= (myBufEnd - myBufCur);
211  }
212 
213  // Clear the buffer since we're seeking to a new place in the stream
214  myBufStartPos = -1;
215  myBufStart = NULL;
216  myBufCur = NULL;
217  myBufEnd = NULL;
218  return seekPos(pos, seekdir);
219  }
220 
221  void clearLoadBuffer();
222  bool getLine(UT_WorkBuffer &buffer, int end='\n');
223  bool skipLine(int end='\n');
224  bool getJSONWord(UT_WorkBuffer &buffer, bool &real);
225  bool getWord(UT_WorkBuffer &buffer);
226  bool getString(UT_WorkBuffer &buffer);
227  bool getNumber(UT_WorkBuffer &buffer, bool &real);
228  bool skipWhitespace();
229  bool skipWhitespace(int64 *line_count, int64 *line_start_pos);
230  bool getAll(UT_WorkBuffer &buffer);
231  bool getAllAscii(UT_WorkBuffer &buffer);
232 
233  SYS_FORCE_INLINE bool
234  checkToken(const char *match)
235  {
236  UT_ASSERT_MSG(isBufferable(),"Unbuffered checkToken() not implemented");
237 
238  if (!skipWhitespace())
239  return false;
240 
241  // Check token against match
242  for ( ; *match; ++myBufCur, ++match)
243  {
244  if (myBufCur == myBufEnd && !loadBuffer())
245  return false;
246  if (*match != *myBufCur)
247  return false;
248  }
249  // Check one more character to see if it's whitespace (space or newline)
250  if (myBufCur == myBufEnd && !loadBuffer())
251  return !myError;
252  return !myError && SYSisspace(*myBufCur);
253  }
254 
255  /// getc and peek return the character read as an unsigned char cast to an
256  /// int, or -1 (EOF) on failure. In other words, characters with the high
257  /// bit set will return a number from 128 to 255, not a negative number.
259  {
260  if (myBufCur != myBufEnd || loadBuffer())
261  {
262  char c = *myBufCur;
263  ++myBufCur;
264  return (int)(uchar)c;
265  }
266  char data;
267  exint nread = read(&data, 1);
268  if (nread == 0)
269  return -1;
270  return (int)(uchar)data;
271  }
273  {
274  if (myBufCur != myBufEnd || loadBuffer())
275  {
276  char c = *myBufCur;
277  return (int)(uchar)c;
278  }
279  return streamPeek();
280  }
282  {
283  len = SYSmin(exint(myBufCur - myBufStart), len);
284  myBufCur -= len;
285  return len;
286  }
287 
288  virtual int64 getMemoryUsage(bool inclusive) const;
289 
290  bool isBufferable() const
291  { return myBufferable; }
292 
293 protected:
294  virtual ~UT_IStreamBuf();
295  void setError(const char *msg=0);
296 
297  // Extract an alpha-numeric word, with an optional prefix
298  bool getAlNumWord(UT_WorkBuffer &buffer, const char *prefix="");
299 
300  // Read data into a buffer and fill in the start and end pointers.
301  // The file position of the start of the buffer is put into bufstartpos.
302  // When the end of the buffer is reached, call loadBuffer() again,
303  // to get more data.
304  virtual void loadBuffer(exint &bufstartpos, const char *&bufstart, const char *&bufend, bool keepeofif0) = 0;
305 
306  virtual exint tellPos() const = 0;
307  virtual bool seekPos(exint pos, int dir) = 0;
308  // Determine if the caller tried to read past the end of file,
309  // assuming that we've run out of buffer.
310  // The equivalent to this as a public method is isEof().
311  virtual bool endOfFileSet() const = 0;
312 
313  // If loadBuffer() is being used as if it were read(), and not enough
314  // has already been read to count as a successful read from the outside
315  // perspective, a failure to read anything here should keep the eof
316  // flag set instead of clearing it.
317  // The eof flag is never kept set if loadBuffer reads some non-zero
318  // amount of data, because it logically only read up to the end, not past.
319  bool loadBuffer(bool keepeofif0 = false);
320 
321  void appendDecodedEscapeSequence(UT_WorkBuffer &buffer);
322 
323 private:
324  int64 myBufStartPos;
325  const char *myBufStart;
326  const char *myBufCur;
327  const char *myBufEnd;
328  char *myErrorStr;
329  exint myRefCount;
330  int myErrorNum;
331  bool myError;
332  bool myBufferable;
333 };
334 
335 SYS_FORCE_INLINE bool
337 {
338  // FIXME: Unbuffered UT_IStreamBuf::skipWhitespace is not implemented!!!
340  "Unbuffered skipWhitespace() not implemented");
341 
342  // Return false if we're already at the EOF
343  if (myBufCur == myBufEnd && !loadBuffer(true))
344  return false;
345 
346  do {
347  for ( ; myBufCur < myBufEnd; ++myBufCur)
348  {
349  if (!SYSisspace(*myBufCur))
350  return true;
351  }
352  } while (loadBuffer(false));
353 
354  // Return true if we skipped to the EOF so that the caller can determine
355  // whether it's valid.
356  return true;
357 }
358 
359 inline bool
361 {
362  // FIXME: Unbuffered UT_IStreamBuf::skipLine is not implemented!!!
364  "Unbuffered skipLine not implemented");
365 
366  // Return false if we're already at the EOF
367  if (myBufCur == myBufEnd && !loadBuffer(true))
368  return false;
369 
370  do {
371  for ( ; myBufCur < myBufEnd; ++myBufCur)
372  {
373  if (*myBufCur == newline)
374  {
375  ++myBufCur; // move over the newline
376  return true;
377  }
378  }
379  } while (loadBuffer(false));
380 
381  // Return true if we skipped to the EOF so that the caller can determine
382  // whether it's valid.
383  return true;
384 }
385 
386 #endif
GT_API const UT_StringHolder filename
exint bread(char *buffer, exint size)
virtual void loadBuffer(exint &bufstartpos, const char *&bufstart, const char *&bufend, bool keepeofif0)=0
GLint left
Definition: glcorearb.h:2004
SYS_FORCE_INLINE int getc()
GLuint start
Definition: glcorearb.h:474
#define UT_API
Definition: UT_API.h:13
void read(T &in, bool &v)
Definition: ImfXdr.h:611
GLuint buffer
Definition: glcorearb.h:659
GLsizeiptr size
Definition: glcorearb.h:663
#define UT_STREAMBUF_SIZE
Definition: UT_IStreamBuf.h:31
bool seekg(exint pos, int seekdir)
SYS_FORCE_INLINE int peekc()
long long int64
Definition: SYS_Types.h:107
png_FILE_p fp
Definition: png.h:2028
GLdouble n
Definition: glcorearb.h:2007
bool skipWhitespace()
bool isBufferable() const
bool getError() const
int64 exint
Definition: SYS_Types.h:116
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:125
GLuint GLuint end
Definition: glcorearb.h:474
#define SYS_FORCE_INLINE
Definition: SYS_Inline.h:45
SYS_FORCE_INLINE exint unwind(exint len)
virtual void setIndex(const UT_CompressedBlockIndexPtr &index)
Definition: UT_IStreamBuf.h:83
GLboolean * data
Definition: glcorearb.h:130
bool isEof() const
Definition: UT_IStreamBuf.h:87
SYS_FORCE_INLINE bool checkToken(const char *match)
A map of string to various well defined value types.
Definition: UT_Options.h:42
exint tellg() const
#define UT_ASSERT_MSG(ZZ, MM)
Definition: UT_Assert.h:129
GLuint index
Definition: glcorearb.h:785
bool skipLine(int end='\n')
#define SYSmin(a, b)
Definition: SYS_Math.h:1368
PXR_NAMESPACE_OPEN_SCOPE typedef unsigned char uchar
Definition: inttypes.h:43
GLenum src
Definition: glcorearb.h:1792