HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
UT_IStreamBuf.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: UT_IStreamBuf.h ( UT Library, C++)
7  *
8  * COMMENTS: Since the semantics of C++ streams leave a little bit to be
9  * desired, this class provides an efficient mechanism to read
10  * streams line by line.
11  *
12  * However, since this class buffers the stream input itself, it's
13  * not safe to use the stream outside of the class.
14  */
15 
16 #ifndef __UT_IStreamBuf__
17 #define __UT_IStreamBuf__
18 
19 #include "UT_API.h"
20 #include "UT_Assert.h"
21 #include "UT_SCFCommon.h"
22 #include <SYS/SYS_Math.h>
23 #include <SYS/SYS_Types.h>
24 #include <iosfwd>
25 #include <fstream> // Only needed for gcc4.4
26 #include <stdio.h>
27 #include <string.h>
28 
29 #define UT_STREAMBUF_SIZE 65536
30 
31 class UT_WorkBuffer;
32 class UT_Options;
33 
35 public:
36  UT_IStreamBuf(bool bufferable = true);
37 
38  enum {
39  UT_SEEK_BEG=0, // Seek from beginning of stream
40  UT_SEEK_CUR=1, // Seek from current location
41  UT_SEEK_END=2 // Seek from the end of the stream
42  };
43 
44  static UT_IStreamBuf *nullBuf();
45  static UT_IStreamBuf *fileBuf(const char *filename,
46  const UT_Options *options,
47  bool bufferable);
48  static UT_IStreamBuf *fileBuf(FILE *fp,
49  bool bufferable);
50  static UT_IStreamBuf *istreamBuf(std::istream &is,
51  bool bufferable);
52  static UT_IStreamBuf *memoryBuf(const char *src, size_t len);
53  static UT_IStreamBuf *rangeBuf(UT_IStreamBuf *src,
54  int64 start, int64 end);
55 
56  static UT_IStreamBuf *zlibBuf(UT_IStreamBuf *src,
57  bool bufferable=true);
58  static UT_IStreamBuf *gzipBuf(UT_IStreamBuf *src,
59  bool bufferable=true);
60  static UT_IStreamBuf *bloscBuf(UT_IStreamBuf *src,
61  bool bufferable=true);
62  static UT_IStreamBuf *scBuf(UT_IStream *src,
63  bool bufferable=true);
64  static UT_IStreamBuf *blowfishBuf(UT_IStreamBuf *src,
65  const unsigned char *key, exint key_length,
66  bool bufferable=true);
67  static exint blowfishPlainTextLength(UT_IStreamBuf *src);
68 
69  static void setStdinCompatible(bool state);
70 
71  void bumpRef() { myRefCount++; }
72  void decRef()
73  {
74  myRefCount--;
75  if (!myRefCount)
76  delete this;
77  }
78 
79  /// Set the index for the stream buffer (does nothing if stream
80  /// buffer does not support index)
81  virtual void setIndex(const UT_CompressedBlockIndexPtr &index) {}
82 
83  // Determine if the caller tried to read past the end of file,
84  // assuming that we've run out of buffer.
85  bool isEof() const
86  {
87  return myBufCur == myBufEnd && endOfFileSet();
88  }
89 
90  // If the stream has exhausted the buffered read (if there was a buffer
91  // associated with this stream-buf), this method provides the way to read
92  // directly from the stream.
93  // It returns the number of bytes read.
94  virtual exint read(char *buffer, exint asize=1) = 0;
95 
96  // peek returns the character read as an unsigned char cast to an
97  // int, or -1 on failure. In other words, characters with the high bit set
98  // will return a number from 128 to 255, not a negative number.
99  virtual int streamPeek() = 0;
100 
101  // For streams which own the underlying stream, the close method allows
102  // that stream to be closed.
103  virtual bool closeFile();
104 
105  // Return true for a random-access file.
106  virtual bool isRandomAccessFile(UT_WorkBuffer &filename) const;
107 
108  // Query the file system type - not supported on all platforms
109  virtual int getFileSystemType() const;
110 
111  // Query the file descriptor associated with an open stream, if any. Be
112  // careful what you do with it.
113  virtual int getFileDescriptor() const;
114 
115  // If there was an error on the stream, the error flag will be set
116  inline bool getError() const { return myError; }
117  const char *getErrorStr() const;
118  void stealLoadBufferData(const char *&bufstart,
119  const char *&bufend,
120  exint maxlen);
121 
123  {
124  if (myError)
125  return 0;
126 
127  // Fast path for size 1, because it's being hit often.
128  if (size == 1 && myBufCur != myBufEnd)
129  {
130  *buffer = *myBufCur;
131  ++myBufCur;
132  return 1;
133  }
134 
135  if (!myBufferable)
136  return read(buffer, size);
137 
138  exint nread = 0;
139  exint left = size;
140  while (left > 0)
141  {
142  if (myBufCur == myBufEnd)
143  {
144  // If we're reading a small amount of data, buffer it.
145  if (left >= UT_STREAMBUF_SIZE)
146  {
147  // Clear the buffer in this case because the position of
148  // the underlying stream will be past the buffer. If we
149  // try to seek back into the buffer and then read past it
150  // the underlying stream will be in the wrong place.
151  myBufStartPos = -1;
152  myBufStart = NULL;
153  myBufCur = NULL;
154  myBufEnd = NULL;
155  break;
156  }
157  if (!loadBuffer())
158  break;
159  }
160 
161  // Load first portion from my buffer
162  // How much can I read
163  exint n = SYSmin(left, exint(myBufEnd - myBufCur));
164  memcpy(buffer+nread, myBufCur, n);
165  myBufCur += n;
166  left -= n;
167  nread += n;
168  }
169  if (left)
170  nread += read(buffer+nread, left);
171 
172  return nread;
173  }
174 
175  exint tellg() const
176  {
177  if (myBufCur != NULL)
178  return myBufStartPos + (myBufCur - myBufStart);
179 
180  return tellPos();
181  }
182 
183  bool seekg(exint pos, int seekdir)
184  {
185  myError = false;
186  if (myBufCur != NULL && seekdir != UT_SEEK_END)
187  {
188  // Get the position relative to the buffer start
189  exint relpos;
190  if (seekdir == UT_SEEK_CUR)
191  relpos = pos + (myBufCur - myBufStart);
192  else
193  {
194  UT_ASSERT_P(seekdir == UT_SEEK_BEG);
195  relpos = pos - myBufStartPos;
196  }
197  // If we're seeking inside the buffer, just move myBufCur.
198  if (relpos >= 0 && relpos < (myBufEnd - myBufStart))
199  {
200  myBufCur = myBufStart + relpos;
201  return true;
202  }
203 
204  // The stream has already been read until the end of the buffer,
205  // so any seeks relative to the current position need to be
206  // adjusted to be relative to the end of the buffer.
207  if (seekdir == UT_SEEK_CUR)
208  pos -= (myBufEnd - myBufCur);
209  }
210 
211  // Clear the buffer since we're seeking to a new place in the stream
212  myBufStartPos = -1;
213  myBufStart = NULL;
214  myBufCur = NULL;
215  myBufEnd = NULL;
216  return seekPos(pos, seekdir);
217  }
218 
219  void clearLoadBuffer();
220  bool getLine(UT_WorkBuffer &buffer, int end='\n');
221  bool skipLine(int end='\n');
222  bool getJSONWord(UT_WorkBuffer &buffer, bool &real);
223  bool getWord(UT_WorkBuffer &buffer);
224  bool getString(UT_WorkBuffer &buffer);
225  bool getNumber(UT_WorkBuffer &buffer, bool &real);
226  bool skipWhitespace(int64 *line_count, int64 *line_start_pos);
227  bool getAll(UT_WorkBuffer &buffer);
228  bool getAllAscii(UT_WorkBuffer &buffer);
229  /// getc and peek return the character read as an unsigned char cast to an
230  /// int, or -1 (EOF) on failure. In other words, characters with the high
231  /// bit set will return a number from 128 to 255, not a negative number.
232  inline int getc()
233  {
234  if (myBufCur != myBufEnd || loadBuffer())
235  {
236  char c = *myBufCur;
237  ++myBufCur;
238  return (int)(uchar)c;
239  }
240  char data;
241  exint nread = read(&data, 1);
242  if (nread == 0)
243  return -1;
244  return (int)(uchar)data;
245  }
246  inline int peekc()
247  {
248  if (myBufCur != myBufEnd || loadBuffer())
249  {
250  char c = *myBufCur;
251  return (int)(uchar)c;
252  }
253  return streamPeek();
254  }
256  {
257  len = SYSmin(exint(myBufCur - myBufStart), len);
258  myBufCur -= len;
259  return len;
260  }
261 
262  virtual int64 getMemoryUsage(bool inclusive) const;
263 
264  bool isBufferable() const
265  { return myBufferable; }
266 
267 protected:
268  virtual ~UT_IStreamBuf();
269  void setError(const char *msg=0);
270 
271  // Extract an alpha-numeric word, with an optional prefix
272  bool getAlNumWord(UT_WorkBuffer &buffer, const char *prefix="");
273 
274  // Read data into a buffer and fill in the start and end pointers.
275  // The file position of the start of the buffer is put into bufstartpos.
276  // When the end of the buffer is reached, call loadBuffer() again,
277  // to get more data.
278  virtual void loadBuffer(exint &bufstartpos, const char *&bufstart, const char *&bufend, bool keepeofif0) = 0;
279 
280  virtual exint tellPos() const = 0;
281  virtual bool seekPos(exint pos, int dir) = 0;
282  // Determine if the caller tried to read past the end of file,
283  // assuming that we've run out of buffer.
284  // The equivalent to this as a public method is isEof().
285  virtual bool endOfFileSet() const = 0;
286 
287  // If loadBuffer() is being used as if it were read(), and not enough
288  // has already been read to count as a successful read from the outside
289  // perspective, a failure to read anything here should keep the eof
290  // flag set instead of clearing it.
291  // The eof flag is never kept set if loadBuffer reads some non-zero
292  // amount of data, because it logically only read up to the end, not past.
293  bool loadBuffer(bool keepeofif0 = false);
294 
295  void appendDecodedEscapeSequence(UT_WorkBuffer &buffer);
296 
297 private:
298  int64 myBufStartPos;
299  const char *myBufStart;
300  const char *myBufCur;
301  const char *myBufEnd;
302  char *myErrorStr;
303  exint myRefCount;
304  int myErrorNum;
305  bool myError;
306  bool myBufferable;
307 };
308 
309 #endif
unsigned char uchar
Definition: SYS_Types.h:31
exint bread(char *buffer, exint size)
GLint left
Definition: glcorearb.h:2004
GLuint start
Definition: glcorearb.h:474
exint unwind(exint len)
#define UT_API
Definition: UT_API.h:12
void read(T &in, bool &v)
Definition: ImfXdr.h:611
GLuint buffer
Definition: glcorearb.h:659
GLsizeiptr size
Definition: glcorearb.h:663
#define UT_ASSERT_P(ZZ)
Definition: UT_Assert.h:101
#define UT_STREAMBUF_SIZE
Definition: UT_IStreamBuf.h:29
bool seekg(exint pos, int seekdir)
long long int64
Definition: SYS_Types.h:100
png_FILE_p fp
Definition: png.h:2028
GLdouble n
Definition: glcorearb.h:2007
bool isBufferable() const
bool getError() const
int64 exint
Definition: SYS_Types.h:109
GLuint GLuint end
Definition: glcorearb.h:474
virtual void setIndex(const UT_CompressedBlockIndexPtr &index)
Definition: UT_IStreamBuf.h:81
GLboolean * data
Definition: glcorearb.h:130
bool isEof() const
Definition: UT_IStreamBuf.h:85
A map of string to various well defined value types.
Definition: UT_Options.h:42
exint tellg() const
GLuint index
Definition: glcorearb.h:785
#define SYSmin(a, b)
Definition: SYS_Math.h:1366
GLenum src
Definition: glcorearb.h:1792