HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
blosc.h
Go to the documentation of this file.
1 /*********************************************************************
2  Blosc - Blocked Shuffling and Compression Library
3 
4  Author: Francesc Alted <francesc@blosc.org>
5 
6  See LICENSES/BLOSC.txt for details about copyright and rights to use.
7 **********************************************************************/
8 #ifndef BLOSC_H
9 #define BLOSC_H
10 
11 #include <limits.h>
12 #include <stdlib.h>
13 #include "blosc-export.h"
14 
15 #ifdef __cplusplus
16 extern "C" {
17 #endif
18 
19 /* Version numbers */
20 #define BLOSC_VERSION_MAJOR 1 /* for major interface/format changes */
21 #define BLOSC_VERSION_MINOR 20 /* for minor interface/format changes */
22 #define BLOSC_VERSION_RELEASE 1 /* for tweaks, bug-fixes, or development */
23 
24 #define BLOSC_VERSION_STRING "1.20.1" /* string version. Sync with above! */
25 #define BLOSC_VERSION_REVISION "$Rev$" /* revision version */
26 #define BLOSC_VERSION_DATE "$Date:: 2020-09-08 #$" /* date version */
27 
28 #define BLOSCLZ_VERSION_STRING "2.3.0" /* the internal compressor version */
29 
30 /* The *_FORMAT symbols should be just 1-byte long */
31 #define BLOSC_VERSION_FORMAT 2 /* Blosc format version, starting at 1 */
32 
33 /* Minimum header length */
34 #define BLOSC_MIN_HEADER_LENGTH 16
35 
36 /* The maximum overhead during compression in bytes. This equals to
37  BLOSC_MIN_HEADER_LENGTH now, but can be higher in future
38  implementations */
39 #define BLOSC_MAX_OVERHEAD BLOSC_MIN_HEADER_LENGTH
40 
41 /* Maximum source buffer size to be compressed */
42 #define BLOSC_MAX_BUFFERSIZE (INT_MAX - BLOSC_MAX_OVERHEAD)
43 
44 /* Maximum typesize before considering source buffer as a stream of bytes */
45 #define BLOSC_MAX_TYPESIZE 255 /* Cannot be larger than 255 */
46 
47 /* Maximum supported blocksize. Decompression (getitem) requires a temporary
48  buffer of size 3*blocksize + sizeof(int32_t) * typesize. */
49 #define BLOSC_MAX_BLOCKSIZE \
50  ((INT_MAX - BLOSC_MAX_TYPESIZE * sizeof(int32_t)) / 3)
51 
52 /* The maximum number of threads (for some static arrays) */
53 #define BLOSC_MAX_THREADS 256
54 
55 /* Codes for shuffling (see blosc_compress) */
56 #define BLOSC_NOSHUFFLE 0 /* no shuffle */
57 #define BLOSC_SHUFFLE 1 /* byte-wise shuffle */
58 #define BLOSC_BITSHUFFLE 2 /* bit-wise shuffle */
59 
60 /* Codes for internal flags (see blosc_cbuffer_metainfo) */
61 #define BLOSC_DOSHUFFLE 0x1 /* byte-wise shuffle */
62 #define BLOSC_MEMCPYED 0x2 /* plain copy */
63 #define BLOSC_DOBITSHUFFLE 0x4 /* bit-wise shuffle */
64 
65 /* Codes for the different compressors shipped with Blosc */
66 #define BLOSC_BLOSCLZ 0
67 #define BLOSC_LZ4 1
68 #define BLOSC_LZ4HC 2
69 #define BLOSC_SNAPPY 3
70 #define BLOSC_ZLIB 4
71 #define BLOSC_ZSTD 5
72 
73 /* Names for the different compressors shipped with Blosc */
74 #define BLOSC_BLOSCLZ_COMPNAME "blosclz"
75 #define BLOSC_LZ4_COMPNAME "lz4"
76 #define BLOSC_LZ4HC_COMPNAME "lz4hc"
77 #define BLOSC_SNAPPY_COMPNAME "snappy"
78 #define BLOSC_ZLIB_COMPNAME "zlib"
79 #define BLOSC_ZSTD_COMPNAME "zstd"
80 
81 /* Codes for compression libraries shipped with Blosc (code must be < 8) */
82 #define BLOSC_BLOSCLZ_LIB 0
83 #define BLOSC_LZ4_LIB 1
84 #define BLOSC_SNAPPY_LIB 2
85 #define BLOSC_ZLIB_LIB 3
86 #define BLOSC_ZSTD_LIB 4
87 
88 /* Names for the different compression libraries shipped with Blosc */
89 #define BLOSC_BLOSCLZ_LIBNAME "BloscLZ"
90 #define BLOSC_LZ4_LIBNAME "LZ4"
91 #define BLOSC_SNAPPY_LIBNAME "Snappy"
92 #define BLOSC_ZLIB_LIBNAME "Zlib"
93 #define BLOSC_ZSTD_LIBNAME "Zstd"
94 
95 /* The codes for compressor formats shipped with Blosc */
96 #define BLOSC_BLOSCLZ_FORMAT BLOSC_BLOSCLZ_LIB
97 #define BLOSC_LZ4_FORMAT BLOSC_LZ4_LIB
98 #define BLOSC_LZ4HC_FORMAT BLOSC_LZ4_LIB /* LZ4HC and LZ4 share the same format */
99 #define BLOSC_SNAPPY_FORMAT BLOSC_SNAPPY_LIB
100 #define BLOSC_ZLIB_FORMAT BLOSC_ZLIB_LIB
101 #define BLOSC_ZSTD_FORMAT BLOSC_ZSTD_LIB
102 
103 
104 /* The version formats for compressors shipped with Blosc */
105 /* All versions here starts at 1 */
106 #define BLOSC_BLOSCLZ_VERSION_FORMAT 1
107 #define BLOSC_LZ4_VERSION_FORMAT 1
108 #define BLOSC_LZ4HC_VERSION_FORMAT 1 /* LZ4HC and LZ4 share the same format */
109 #define BLOSC_SNAPPY_VERSION_FORMAT 1
110 #define BLOSC_ZLIB_VERSION_FORMAT 1
111 #define BLOSC_ZSTD_VERSION_FORMAT 1
112 
113 /* Split mode for blocks. NEVER and ALWAYS are for experimenting with best compression ratio,
114  * AUTO for optimal behaviour (based on experiments), and FORWARD_COMPAT provides
115  * best forward compatibility */
116 #define BLOSC_ALWAYS_SPLIT 1
117 #define BLOSC_NEVER_SPLIT 2
118 #define BLOSC_AUTO_SPLIT 3
119 #define BLOSC_FORWARD_COMPAT_SPLIT 4
120 
121 /**
122  Initialize the Blosc library environment.
123 
124  You must call this previous to any other Blosc call, unless you want
125  Blosc to be used simultaneously in a multi-threaded environment, in
126  which case you should *exclusively* use the
127  blosc_compress_ctx()/blosc_decompress_ctx() pair (see below).
128  */
129 BLOSC_EXPORT void blosc_init(void);
130 
131 
132 /**
133  Destroy the Blosc library environment.
134 
135  You must call this after to you are done with all the Blosc calls,
136  unless you have not used blosc_init() before (see blosc_init()
137  above).
138  */
139 BLOSC_EXPORT void blosc_destroy(void);
140 
141 
142 /**
143  Compress a block of data in the `src` buffer and returns the size of
144  the compressed block. The size of `src` buffer is specified by
145  `nbytes`. There is not a minimum for `src` buffer size (`nbytes`).
146 
147  `clevel` is the desired compression level and must be a number
148  between 0 (no compression) and 9 (maximum compression).
149 
150  `doshuffle` specifies whether the shuffle compression filters
151  should be applied or not. BLOSC_NOSHUFFLE means not applying it,
152  BLOSC_SHUFFLE means applying it at a byte level and BLOSC_BITSHUFFLE
153  at a bit level (slower but may achieve better entropy alignment).
154 
155  `typesize` is the number of bytes for the atomic type in binary
156  `src` buffer. This is mainly useful for the shuffle filters.
157  For implementation reasons, only a 1 < `typesize` < 256 will allow the
158  shuffle filter to work. When `typesize` is not in this range, shuffle
159  will be silently disabled.
160 
161  The `dest` buffer must have at least the size of `destsize`. Blosc
162  guarantees that if you set `destsize` to, at least,
163  (`nbytes` + BLOSC_MAX_OVERHEAD), the compression will always succeed.
164  The `src` buffer and the `dest` buffer can not overlap.
165 
166  Compression is memory safe and guaranteed not to write the `dest`
167  buffer beyond what is specified in `destsize`.
168 
169  If `src` buffer cannot be compressed into `destsize`, the return
170  value is zero and you should discard the contents of the `dest`
171  buffer.
172 
173  A negative return value means that an internal error happened. This
174  should never happen. If you see this, please report it back
175  together with the buffer data causing this and compression settings.
176 
177  Environment variables
178  ---------------------
179 
180  blosc_compress() honors different environment variables to control
181  internal parameters without the need of doing that programatically.
182  Here are the ones supported:
183 
184  BLOSC_CLEVEL=(INTEGER): This will overwrite the `clevel` parameter
185  before the compression process starts.
186 
187  BLOSC_SHUFFLE=[NOSHUFFLE | SHUFFLE | BITSHUFFLE]: This will
188  overwrite the `doshuffle` parameter before the compression process
189  starts.
190 
191  BLOSC_TYPESIZE=(INTEGER): This will overwrite the `typesize`
192  parameter before the compression process starts.
193 
194  BLOSC_COMPRESSOR=[BLOSCLZ | LZ4 | LZ4HC | SNAPPY | ZLIB]: This will
195  call blosc_set_compressor(BLOSC_COMPRESSOR) before the compression
196  process starts.
197 
198  BLOSC_NTHREADS=(INTEGER): This will call
199  blosc_set_nthreads(BLOSC_NTHREADS) before the compression process
200  starts.
201 
202  BLOSC_BLOCKSIZE=(INTEGER): This will call
203  blosc_set_blocksize(BLOSC_BLOCKSIZE) before the compression process
204  starts. *NOTE:* The blocksize is a critical parameter with
205  important restrictions in the allowed values, so use this with care.
206 
207  BLOSC_NOLOCK=(ANY VALUE): This will call blosc_compress_ctx() under
208  the hood, with the `compressor`, `blocksize` and
209  `numinternalthreads` parameters set to the same as the last calls to
210  blosc_set_compressor(), blosc_set_blocksize() and
211  blosc_set_nthreads(). BLOSC_CLEVEL, BLOSC_SHUFFLE, BLOSC_TYPESIZE
212  environment vars will also be honored.
213 
214  BLOSC_SPLITMODE=[ FORWARD_COMPAT | AUTO | ALWAYS | NEVER ]:
215  This will call blosc_set_splitmode() with the different supported values.
216  See blosc_set_splitmode() docstrings for more info on each mode.
217 
218  BLOSC_WARN=(INTEGER): This will print some warning message on stderr
219  showing more info in situations where data inputs cannot be compressed.
220  The values can range from 1 (less verbose) to 10 (full verbose). 0 is
221  the same as if the BLOSC_WARN envvar was not defined.
222  */
223 BLOSC_EXPORT int blosc_compress(int clevel, int doshuffle, size_t typesize,
224  size_t nbytes, const void *src, void *dest,
225  size_t destsize);
226 
227 
228 /**
229  Context interface to blosc compression. This does not require a call
230  to blosc_init() and can be called from multithreaded applications
231  without the global lock being used, so allowing Blosc be executed
232  simultaneously in those scenarios.
233 
234  It uses the same parameters than the blosc_compress() function plus:
235 
236  `compressor`: the string representing the type of compressor to use.
237 
238  `blocksize`: the requested size of the compressed blocks. If 0, an
239  automatic blocksize will be used.
240 
241  `numinternalthreads`: the number of threads to use internally.
242 
243  A negative return value means that an internal error happened. This
244  should never happen. If you see this, please report it back
245  together with the buffer data causing this and compression settings.
246 */
247 BLOSC_EXPORT int blosc_compress_ctx(int clevel, int doshuffle, size_t typesize,
248  size_t nbytes, const void* src, void* dest,
249  size_t destsize, const char* compressor,
250  size_t blocksize, int numinternalthreads);
251 
252 /**
253  Decompress a block of compressed data in `src`, put the result in
254  `dest` and returns the size of the decompressed block.
255 
256  Call `blosc_cbuffer_validate` to determine the size of the destination buffer.
257 
258  The `src` buffer and the `dest` buffer can not overlap.
259 
260  Decompression is memory safe and guaranteed not to write the `dest`
261  buffer beyond what is specified in `destsize`.
262 
263  If an error occurs, e.g. the compressed data is corrupted or the
264  output buffer is not large enough, then 0 (zero) or a negative value
265  will be returned instead.
266 
267  Environment variables
268  ---------------------
269 
270  blosc_decompress() honors different environment variables to control
271  internal parameters without the need of doing that programatically.
272  Here are the ones supported:
273 
274  BLOSC_NTHREADS=(INTEGER): This will call
275  blosc_set_nthreads(BLOSC_NTHREADS) before the proper decompression
276  process starts.
277 
278  BLOSC_NOLOCK=(ANY VALUE): This will call blosc_decompress_ctx()
279  under the hood, with the `numinternalthreads` parameter set to the
280  same value as the last call to blosc_set_nthreads().
281 */
282 BLOSC_EXPORT int blosc_decompress(const void *src, void *dest, size_t destsize);
283 
284 /**
285  Context interface to blosc decompression. This does not require a
286  call to blosc_init() and can be called from multithreaded
287  applications without the global lock being used, so allowing Blosc
288  be executed simultaneously in those scenarios.
289 
290  Call `blosc_cbuffer_validate` to determine the size of the destination buffer.
291 
292  It uses the same parameters than the blosc_decompress() function plus:
293 
294  `numinternalthreads`: number of threads to use internally.
295 
296  Decompression is memory safe and guaranteed not to write the `dest`
297  buffer more than what is specified in `destsize`.
298 
299  If an error occurs, e.g. the compressed data is corrupted or the
300  output buffer is not large enough, then 0 (zero) or a negative value
301  will be returned instead.
302 */
303 BLOSC_EXPORT int blosc_decompress_ctx(const void *src, void *dest,
304  size_t destsize, int numinternalthreads);
305 
306 /**
307  Get `nitems` (of typesize size) in `src` buffer starting in `start`.
308  The items are returned in `dest` buffer, which has to have enough
309  space for storing all items.
310 
311  Returns the number of bytes copied to `dest` or a negative value if
312  some error happens.
313  */
314 BLOSC_EXPORT int blosc_getitem(const void *src, int start, int nitems, void *dest);
315 
316 /**
317  Returns the current number of threads that are used for
318  compression/decompression.
319  */
321 
322 
323 /**
324  Initialize a pool of threads for compression/decompression. If
325  `nthreads` is 1, then the serial version is chosen and a possible
326  previous existing pool is ended. If this is not called, `nthreads`
327  is set to 1 internally.
328 
329  Returns the previous number of threads.
330  */
331 BLOSC_EXPORT int blosc_set_nthreads(int nthreads);
332 
333 
334 /**
335  Returns the current compressor that is being used for compression.
336  */
337 BLOSC_EXPORT const char* blosc_get_compressor(void);
338 
339 
340 /**
341  Select the compressor to be used. The supported ones are "blosclz",
342  "lz4", "lz4hc", "snappy", "zlib" and "zstd". If this function is not
343  called, then "blosclz" will be used by default.
344 
345  In case the compressor is not recognized, or there is not support
346  for it in this build, it returns a -1. Else it returns the code for
347  the compressor (>=0).
348  */
349 BLOSC_EXPORT int blosc_set_compressor(const char* compname);
350 
351 
352 /**
353  Get the `compname` associated with the `compcode`.
354 
355  If the compressor code is not recognized, or there is not support
356  for it in this build, -1 is returned. Else, the compressor code is
357  returned.
358  */
359 BLOSC_EXPORT int blosc_compcode_to_compname(int compcode, const char **compname);
360 
361 
362 /**
363  Return the compressor code associated with the compressor name.
364 
365  If the compressor name is not recognized, or there is not support
366  for it in this build, -1 is returned instead.
367  */
368 BLOSC_EXPORT int blosc_compname_to_compcode(const char *compname);
369 
370 
371 /**
372  Get a list of compressors supported in the current build. The
373  returned value is a string with a concatenation of "blosclz", "lz4",
374  "lz4hc", "snappy", "zlib" or "zstd "separated by commas, depending
375  on which ones are present in the build.
376 
377  This function does not leak, so you should not free() the returned
378  list.
379 
380  This function should always succeed.
381  */
382 BLOSC_EXPORT const char* blosc_list_compressors(void);
383 
384 /**
385  Return the version of the C-Blosc library in string format.
386 
387  Useful for dynamic libraries.
388 */
389 BLOSC_EXPORT const char* blosc_get_version_string(void);
390 
391 
392 /**
393  Get info from compression libraries included in the current build.
394  In `compname` you pass the compressor name that you want info from.
395 
396  In `complib` and `version` you get a pointer to the compressor
397  library name and the version in string format respectively. After
398  using the name and version, you should free() them so as to avoid
399  leaks. If any of `complib` and `version` are NULL, they will not be
400  assigned to anything, and the user should not need to free them.
401 
402  If the compressor is supported, it returns the code for the library
403  (>=0). If it is not supported, this function returns -1.
404  */
405 BLOSC_EXPORT int blosc_get_complib_info(const char *compname, char **complib, char **version);
406 
407 
408 /**
409  Free possible memory temporaries and thread resources. Use this
410  when you are not going to use Blosc for a long while. In case of
411  problems releasing the resources, it returns a negative number, else
412  it returns 0.
413  */
415 
416 
417 /**
418  Return information about a compressed buffer, namely the number of
419  uncompressed bytes (`nbytes`) and compressed (`cbytes`). It also
420  returns the `blocksize` (which is used internally for doing the
421  compression by blocks).
422 
423  You only need to pass the first BLOSC_MIN_HEADER_LENGTH bytes of a
424  compressed buffer for this call to work.
425 
426  If the format is not supported by the library, all output arguments will be
427  filled with zeros.
428  */
429 BLOSC_EXPORT void blosc_cbuffer_sizes(const void *cbuffer, size_t *nbytes,
430  size_t *cbytes, size_t *blocksize);
431 
432 /**
433  Checks that the compressed buffer starting at `cbuffer` of length `cbytes` may
434  contain valid blosc compressed data, and that it is safe to call
435  blosc_decompress/blosc_decompress_ctx/blosc_getitem.
436 
437  On success, returns 0 and sets *nbytes to the size of the uncompressed data.
438  This does not guarantee that the decompression function won't return an error,
439  but does guarantee that it is safe to attempt decompression.
440 
441  On failure, returns -1.
442  */
443 BLOSC_EXPORT int blosc_cbuffer_validate(const void* cbuffer, size_t cbytes,
444  size_t* nbytes);
445 
446 /**
447  Return meta-information about a compressed buffer, namely the type size
448  (`typesize`), as well as some internal `flags`.
449 
450  The `flags` is a set of bits, where the used ones are:
451  * bit 0: whether the shuffle filter has been applied or not
452  * bit 1: whether the internal buffer is a pure memcpy or not
453  * bit 2: whether the bit shuffle filter has been applied or not
454 
455  You can use the `BLOSC_DOSHUFFLE`, `BLOSC_DOBITSHUFFLE` and
456  `BLOSC_MEMCPYED` symbols for extracting the interesting bits
457  (e.g. ``flags & BLOSC_DOSHUFFLE`` says whether the buffer is
458  byte-shuffled or not).
459 
460  You only need to pass the first BLOSC_MIN_HEADER_LENGTH bytes of a
461  compressed buffer for this call to work.
462 
463  If the format is not supported by the library, all output arguments will be
464  filled with zeros.
465  */
466 BLOSC_EXPORT void blosc_cbuffer_metainfo(const void *cbuffer, size_t *typesize,
467  int *flags);
468 
469 
470 /**
471  Return information about a compressed buffer, namely the internal
472  Blosc format version (`version`) and the format for the internal
473  compressor used (`compversion`).
474 
475  This function should always succeed.
476  */
477 BLOSC_EXPORT void blosc_cbuffer_versions(const void *cbuffer, int *version,
478  int *compversion);
479 
480 
481 /**
482  Return the compressor library/format used in a compressed buffer.
483 
484  This function should always succeed.
485  */
486 BLOSC_EXPORT const char *blosc_cbuffer_complib(const void *cbuffer);
487 
488 
489 
490 /*********************************************************************
491 
492  Low-level functions follows. Use them only if you are an expert!
493 
494 *********************************************************************/
495 
496 /**
497  Get the internal blocksize to be used during compression. 0 means
498  that an automatic blocksize is computed internally (the default).
499  */
501 
502 /**
503  Force the use of a specific blocksize. If 0, an automatic
504  blocksize will be used (the default).
505 
506  The blocksize is a critical parameter with important restrictions in
507  the allowed values, so use this with care.
508  */
509 BLOSC_EXPORT void blosc_set_blocksize(size_t blocksize);
510 
511 /**
512  Set the split mode.
513 
514  This function can take the next values:
515  * BLOSC_FORWARD_COMPAT_SPLIT
516  * BLOSC_AUTO_SPLIT
517  * BLOSC_NEVER_SPLIT
518  * BLOSC_ALWAYS_SPLIT
519 
520  BLOSC_FORWARD_COMPAT offers reasonably forward compatibility,
521  BLOSC_AUTO_SPLIT is for nearly optimal results (based on heuristics),
522  BLOSC_NEVER_SPLIT and BLOSC_ALWAYS_SPLIT are for the user experimenting
523  when trying to get best compression ratios and/or speed.
524 
525  If not called, the default mode is BLOSC_FORWARD_COMPAT_SPLIT.
526 
527  This function should always succeed.
528  */
529 BLOSC_EXPORT void blosc_set_splitmode(int splitmode);
530 
531 
532 #ifdef __cplusplus
533 }
534 #endif
535 
536 
537 #endif
GLbitfield flags
Definition: glcorearb.h:1596
BLOSC_EXPORT int blosc_get_nthreads(void)
BLOSC_EXPORT void blosc_set_blocksize(size_t blocksize)
GLuint start
Definition: glcorearb.h:475
BLOSC_EXPORT const char * blosc_cbuffer_complib(const void *cbuffer)
BLOSC_EXPORT int blosc_compress_ctx(int clevel, int doshuffle, size_t typesize, size_t nbytes, const void *src, void *dest, size_t destsize, const char *compressor, size_t blocksize, int numinternalthreads)
BLOSC_EXPORT void blosc_cbuffer_sizes(const void *cbuffer, size_t *nbytes, size_t *cbytes, size_t *blocksize)
BLOSC_EXPORT void blosc_set_splitmode(int splitmode)
BLOSC_EXPORT int blosc_decompress_ctx(const void *src, void *dest, size_t destsize, int numinternalthreads)
BLOSC_EXPORT int blosc_get_blocksize(void)
BLOSC_EXPORT const char * blosc_get_compressor(void)
BLOSC_EXPORT int blosc_set_compressor(const char *compname)
BLOSC_EXPORT int blosc_getitem(const void *src, int start, int nitems, void *dest)
GT_API const UT_StringHolder version
BLOSC_EXPORT int blosc_decompress(const void *src, void *dest, size_t destsize)
#define BLOSC_EXPORT
Definition: blosc-export.h:30
BLOSC_EXPORT int blosc_free_resources(void)
BLOSC_EXPORT void blosc_cbuffer_versions(const void *cbuffer, int *version, int *compversion)
BLOSC_EXPORT int blosc_compcode_to_compname(int compcode, const char **compname)
BLOSC_EXPORT int blosc_compress(int clevel, int doshuffle, size_t typesize, size_t nbytes, const void *src, void *dest, size_t destsize)
BLOSC_EXPORT int blosc_compname_to_compcode(const char *compname)
BLOSC_EXPORT const char * blosc_get_version_string(void)
BLOSC_EXPORT void blosc_destroy(void)
BLOSC_EXPORT int blosc_set_nthreads(int nthreads)
BLOSC_EXPORT int blosc_cbuffer_validate(const void *cbuffer, size_t cbytes, size_t *nbytes)
BLOSC_EXPORT const char * blosc_list_compressors(void)
BLOSC_EXPORT void blosc_cbuffer_metainfo(const void *cbuffer, size_t *typesize, int *flags)
BLOSC_EXPORT void blosc_init(void)
BLOSC_EXPORT int blosc_get_complib_info(const char *compname, char **complib, char **version)
GLenum src
Definition: glcorearb.h:1793