HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UT_JSONDefines.h
Go to the documentation of this file.
1 /*
2  * PROPRIETARY INFORMATION. This software is proprietary to
3  * Side Effects Software Inc., and is not to be reproduced,
4  * transmitted, or disclosed in any way without written permission.
5  *
6  * NAME: UT_JSONDefines.h ( UT Library, C++)
7  *
8  * COMMENTS: Defines for UT_JSONParser/UT_JSONWrite
9  */
10 
11 #ifndef __UT_JDefines__
12 #define __UT_JDefines__
13 
14 /// @brief The UT_JID enums are used in byte-stream encoding of binary JSON
15 ///
16 /// The JSON token stream is encoded using the binary tokens defined below.
17 /// Each token is stored as a single byte in the stream with possible
18 /// additional data following in the stream.
19 ///
20 /// Note: Our binary encoding of JSON is different than BISON:
21 /// http://kaijaeger.com/articles/introducing-bison-binary-interchange-standard.html
22 /// @li This version has optimizations for storing arrays of uniform type data
23 /// @li This version does *not* support 24, 40, 48 or 56 bit integers
24 /// @li This version adds support for 16 bit floating point values
25 /// @li This version adds support for @b unsigned integerers (bytes)
26 /// @li This version adds supports for optimized encoding of arrays which
27 /// contain uniform values (including packed boolean arrays).
28 /// @li This version allows arbitrary endianness (determined by magic number)
29 /// @li Strings are not null byte encoded. Instead the length of the string is
30 /// stored prior to the data. This allows strings which contain a null
31 /// byte to be encoded properly.
32 /// @li Arrays are not limited to 64K entries
33 /// @li Maps (objects) are not limited to 64K entries
34 ///
35 /// Parsing of the binary stream does not require seeking forward or backward
36 /// through the stream.
37 ///
38 /// @note
39 /// Each binary token expects 0 or more bytes of data to follow. The data for
40 /// multi-byte types is stored in the native format for the machine. The
41 /// endianness of the file is determined by the magic number.@n
42 /// Several entities store an encoded integer value. Encoded integers are only
43 /// used to store unsigned integers for length or id in these entities.
44 /// Encoded integers are @b not used for storing integer data. Encoding is
45 /// performed by by using the first byte to determine the size of the integer:
46 /// @li <tt>byte[0] < 0xf1</tt>: @n
47 /// This value represents the length (0-240)
48 /// @li <tt>byte[0] == 0xf2</tt>: @n
49 /// The next 2 bytes store an unsigned 16-bit length
50 /// @li <tt>byte[0] == 0xf4</tt>: @n
51 /// The next 4 bytes store an unsigned 32-bit length
52 /// @li <tt>byte[0] == 0xf8</tt>: @n
53 /// The next 8 bytes store an unsigned 64-bit length
54 ///
55 /// The values: <tt>0xf1, 0xf3, 0xf5, x0f7, 0xf9-0xff</tt> are reserved for
56 /// future use.
57 /// @par
58 ///
59 /// Strings are encoded in two ways
60 /// @li UT_JID_STRING is followed by an encoded length and the raw string data.
61 /// The terminating null character is not stored. For example, the bytestream:
62 /// @code
63 /// [ 0x0c 0x48 0x65 0x6c 0x6c 0x20 0x77 0x77 0x6f 0x72 0x6c 0x64 0x21 ]
64 /// @endcode
65 /// Would encode the string "Hello world!"
66 ///
67 /// @li UT_JID_TOKENDEF provides a mechanism to store common strings in a
68 /// compact fashion. This token is followed by an encoded integer representing
69 /// an @b id, then the encoded length and string data (just as UT_JID_STRING).
70 /// UT_JID_TOKENREF can then be used to reference the defined string.
71 /// @n
72 /// Token definitions do @b not return a string entity to the read stream, but
73 /// simply define a string. Token definitions can appear anywhere in the
74 /// stream.
75 ///
76 /// The binary format also provides for compact storage of arrays composed of a
77 /// uniform type of data. These arrays are encoded by specifying the
78 /// UT_JID_UNIFORM_ARRAY which is followed by a byte indicating the data
79 /// storage of the array, followed by an encoded length and the raw data for
80 /// the array.
81 ///
82 /// Uniform arrays may only store: UT_JID_BOOL, UT_JID_INT8, UT_JID_INT16,
83 /// UT_JID_INT32, UT_JID_INT64, UT_JID_REAL16, UT_JID_REAL32, UT_JID_REAL64,
84 /// UT_JID_STRING, UT_JID_TOKENREF. All other storage types are considered
85 /// an error.
86 ///
87 /// Uniform arrays of booleans are encoded in words of 32-bits using the
88 /// simple packing method:@code
89 /// value = (word & (1 << index)) != 0;
90 /// @endcode
91 ///
92 /// Uniform arrays of strings are represented using UT_JID_STRING process for
93 /// each string (i.e. the length of the string, followed by the string data).@n
94 ///
95 /// Uniform arrays of tokens are represented using @b N encoded id's.
96 ///
97 /// @see UT_JSONParser, UT_JSONWrite
98 ///
99 typedef enum {
100  /// No data follows the NULL token
101  UT_JID_NULL = 0x00,
102 
103  /// Marks the beginning of a map object. String/Value pairs will follow
104  /// until the UT_JID_MAP_END token is reached.
105  UT_JID_MAP_BEGIN = 0x7b, // '{'
106 
107  /// Marks the end of a map object
108  UT_JID_MAP_END = 0x7d, // '}'
109 
110  /// Marks the beginning of an array object. Values will follow until the
111  /// UT_JID_ARRAY_END token is reached.
112  UT_JID_ARRAY_BEGIN = 0x5b, // '['
113 
114  /// Marks the end of an array object
115  UT_JID_ARRAY_END = 0x5d, // ']'
116 
117  /// The following byte should be 0 or 1. However, the use of this token to
118  /// store booleans is less efficient than using UT_JID_FALSE/UT_JID_TRUE.@n
119  /// This is primarily for completeness (and encoding of fixed boolean
120  /// arrays).
121  UT_JID_BOOL = 0x10, // 0x10 | 0
122 
123  /// The following byte represents an 8 bit integer
124  UT_JID_INT8 = 0x11,
125  /// The following 2 bytes represent an 16 bit integer
126  UT_JID_INT16 = 0x12,
127  /// The following 4 bytes represent an 32 bit integer
128  UT_JID_INT32 = 0x13,
129  /// The following 8 bytes represent an 64 bit integer
130  UT_JID_INT64 = 0x14,
131 
132  /// The following byte represents an unsigned 8 bit integer
133  UT_JID_UINT8 = 0x21,
134  /// The following 2 bytes represents an unsigned 16 bit integer
136  /// 0x23 and 0x24 are reserved for future use (32/64 bit unsigned)
137 
138  /// The following 2 bytes represent an 16 bit real (float)
140  /// The following 4 bytes represent an 32 bit real (float)
142  /// The following 8 bytes represent an 64 bit real (float)
144 
145  /// The data following is a binary encoded string. The byte is followed by
146  /// an encoded length followed by the bytes representing the string (the
147  /// terminating NULL should not be included).
148  UT_JID_STRING = 0x27, // '\''
149  /// A compact way of encoding [ UT_JID_BOOL 0x00 ]
150  UT_JID_FALSE = 0x30, // '0'
151  /// A compact way of encoding [ UT_JID_BOOL 0x01 ]
152  UT_JID_TRUE = 0x31, // '1'
153 
154  /// Though gzip compression on a stream can reduce redundancy, storage of
155  /// command strings can be simplified by using "token-strings". On the
156  /// first use of a binary string, a token-define can be used. This assigns
157  /// an integer value to the string. The string can then be referenced by
158  /// the token-reference identifier.
159  ///
160  /// A token definition is followed by: <tt>[id] [length] [string-data]</tt>
161  ///
162  /// If a string already exists with this id, its definition will be
163  /// replaced with the new definition.
164  UT_JID_TOKENDEF = 0x2b, // '+'
165  /// A token reference is followed by: @c [id]
166  /// The id refers to a string defined by the "tokendef"
167  UT_JID_TOKENREF = 0x26, // '*'
168  /// The token undefine is followed by: @c [id]
169  /// Where the @c id represents the token string to undefine.
170  UT_JID_TOKENUNDEF = 0x2d, // '-'
171 
172  /// This allows for an compact method of storing an array of uniform
173  /// values. The following byte should be one of the elementary types
174  /// (bool, int8-int64, real16-real32, or string). This is followed by the
175  /// number of elements of the array (encoded as a length). The raw data
176  /// follows.
177  UT_JID_UNIFORM_ARRAY = 0x40, // '@'
178 
179  /// @private Used in parsing only
180  UT_JID_KEY_SEPARATOR = 0x3a, // ':'
181  /// @private Used in parsing only
182  UT_JID_VALUE_SEPARATOR = 0x2c, // ','
183 
184  /// This should be the first byte of a binary file. It should be followed
185  /// by the 4 bytes representing the ID_MAGIC_NUMBER. If, when reading the
186  /// file, the magic number is swapped (i.e. ID_MAGIC_NUMBER_SWAP), then the
187  /// file was written on a machine with different endianness and all data
188  /// should be byte-swapped on loading.
189  UT_JID_MAGIC = 0x7f,
190 } UT_JID;
191 
192 /// The magic number to identify binary files (prefixed by UT_JID_MAGIC)
193 #define UT_JID_BINARY_MAGIC 0x624a534e
194 /// A swapped version of the magic number. When reading, if the swapped magic
195 /// number is read, then the binary file was created on a machine with
196 /// different endianness and data should be byte-swapped where appropriate.
197 #define UT_JID_BINARY_MAGIC_SWAP 0x4e534a62 // Swapped endian
198 
199 #endif
The following byte represents an 8 bit integer.
No data follows the NULL token.
The following 4 bytes represent an 32 bit real (float)
UT_JID
The UT_JID enums are used in byte-stream encoding of binary JSON.
Marks the end of an array object.
0x23 and 0x24 are reserved for future use (32/64 bit unsigned)
The following byte represents an unsigned 8 bit integer.
The following 8 bytes represent an 64 bit real (float)
The following 8 bytes represent an 64 bit integer.
The following 2 bytes represent an 16 bit integer.
A compact way of encoding [ UT_JID_BOOL 0x00 ].
Used in parsing only.
The following 4 bytes represent an 32 bit integer.
Marks the end of a map object.
A compact way of encoding [ UT_JID_BOOL 0x01 ].
The following 2 bytes represents an unsigned 16 bit integer.
Used in parsing only.