HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
stb_image.h
Go to the documentation of this file.
1 /* stb_image - v2.23 - public domain image loader - http://nothings.org/stb
2  no warranty implied; use at your own risk
3 
4  Do this:
5  #define STB_IMAGE_IMPLEMENTATION
6  before you include this file in *one* C or C++ file to create the implementation.
7 
8  // i.e. it should look like this:
9  #include ...
10  #include ...
11  #include ...
12  #define STB_IMAGE_IMPLEMENTATION
13  #include "stb_image.h"
14 
15  You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
16  And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
17 
18 
19  QUICK NOTES:
20  Primarily of interest to game developers and other people who can
21  avoid problematic images and only need the trivial interface
22 
23  JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
24  PNG 1/2/4/8/16-bit-per-channel
25 
26  TGA (not sure what subset, if a subset)
27  BMP non-1bpp, non-RLE
28  PSD (composited view only, no extra channels, 8/16 bit-per-channel)
29 
30  GIF (*comp always reports as 4-channel)
31  HDR (radiance rgbE format)
32  PIC (Softimage PIC)
33  PNM (PPM and PGM binary only)
34 
35  Animated GIF still needs a proper API, but here's one way to do it:
36  http://gist.github.com/urraka/685d9a6340b26b830d49
37 
38  - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
39  - decode from arbitrary I/O callbacks
40  - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
41 
42  Full documentation under "DOCUMENTATION" below.
43 
44 
45 LICENSE
46 
47  See end of file for license information.
48 
49 RECENT REVISION HISTORY:
50 
51  2.23 (2019-08-11) fix clang static analysis warning
52  2.22 (2019-03-04) gif fixes, fix warnings
53  2.21 (2019-02-25) fix typo in comment
54  2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
55  2.19 (2018-02-11) fix warning
56  2.18 (2018-01-30) fix warnings
57  2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings
58  2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
59  2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
60  2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
61  2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
62  2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
63  2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
64  RGB-format JPEG; remove white matting in PSD;
65  allocate large structures on the stack;
66  correct channel count for PNG & BMP
67  2.10 (2016-01-22) avoid warning introduced in 2.09
68  2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
69 
70  See end of file for full revision history.
71 
72 
73  ============================ Contributors =========================
74 
75  Image formats Extensions, features
76  Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info)
77  Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info)
78  Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG)
79  Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks)
80  Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG)
81  Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip)
82  Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD)
83  github:urraka (animated gif) Junggon Kim (PNM comments)
84  Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA)
85  socks-the-fox (16-bit PNG)
86  Jeremy Sawicki (handle all ImageNet JPGs)
87  Optimizations & bugfixes Mikhail Morozov (1-bit BMP)
88  Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query)
89  Arseny Kapoulkine
90  John-Mark Allen
91  Carmelo J Fdez-Aguera
92 
93  Bug & warning fixes
94  Marc LeBlanc David Woo Guillaume George Martins Mozeiko
95  Christpher Lloyd Jerry Jansson Joseph Thomson Phil Jordan
96  Dave Moore Roy Eltham Hayaki Saito Nathan Reed
97  Won Chun Luke Graham Johan Duparc Nick Verigakis
98  the Horde3D community Thomas Ruf Ronny Chevalier github:rlyeh
99  Janez Zemva John Bartholomew Michal Cichon github:romigrou
100  Jonathan Blow Ken Hamada Tero Hanninen github:svdijk
101  Laurent Gomila Cort Stratton Sergio Gonzalez github:snagar
102  Aruelien Pocheville Thibault Reuille Cass Everitt github:Zelex
103  Ryamond Barbiero Paul Du Bois Engin Manap github:grim210
104  Aldo Culquicondor Philipp Wiesemann Dale Weiler github:sammyhw
105  Oriol Ferrer Mesia Josh Tobin Matthew Gregan github:phprus
106  Julian Raschke Gregory Mullen Baldur Karlsson github:poppolopoppo
107  Christian Floisand Kevin Schmidt JR Smith github:darealshinji
108  Blazej Dariusz Roszkowski github:Michaelangel007
109 */
110 
111 #ifndef STBI_INCLUDE_STB_IMAGE_H
112 #define STBI_INCLUDE_STB_IMAGE_H
113 
114 // DOCUMENTATION
115 //
116 // Limitations:
117 // - no 12-bit-per-channel JPEG
118 // - no JPEGs with arithmetic coding
119 // - GIF always returns *comp=4
120 //
121 // Basic usage (see HDR discussion below for HDR usage):
122 // int x,y,n;
123 // unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
124 // // ... process data if not NULL ...
125 // // ... x = width, y = height, n = # 8-bit components per pixel ...
126 // // ... replace '0' with '1'..'4' to force that many components per pixel
127 // // ... but 'n' will always be the number that it would have been if you said 0
128 // stbi_image_free(data)
129 //
130 // Standard parameters:
131 // int *x -- outputs image width in pixels
132 // int *y -- outputs image height in pixels
133 // int *channels_in_file -- outputs # of image components in image file
134 // int desired_channels -- if non-zero, # of image components requested in result
135 //
136 // The return value from an image loader is an 'unsigned char *' which points
137 // to the pixel data, or NULL on an allocation failure or if the image is
138 // corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
139 // with each pixel consisting of N interleaved 8-bit components; the first
140 // pixel pointed to is top-left-most in the image. There is no padding between
141 // image scanlines or between pixels, regardless of format. The number of
142 // components N is 'desired_channels' if desired_channels is non-zero, or
143 // *channels_in_file otherwise. If desired_channels is non-zero,
144 // *channels_in_file has the number of components that _would_ have been
145 // output otherwise. E.g. if you set desired_channels to 4, you will always
146 // get RGBA output, but you can check *channels_in_file to see if it's trivially
147 // opaque because e.g. there were only 3 channels in the source image.
148 //
149 // An output image with N components has the following components interleaved
150 // in this order in each pixel:
151 //
152 // N=#comp components
153 // 1 grey
154 // 2 grey, alpha
155 // 3 red, green, blue
156 // 4 red, green, blue, alpha
157 //
158 // If image loading fails for any reason, the return value will be NULL,
159 // and *x, *y, *channels_in_file will be unchanged. The function
160 // stbi_failure_reason() can be queried for an extremely brief, end-user
161 // unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
162 // to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
163 // more user-friendly ones.
164 //
165 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
166 //
167 // ===========================================================================
168 //
169 // UNICODE:
170 //
171 // If compiling for Windows and you wish to use Unicode filenames, compile
172 // with
173 // #define STBI_WINDOWS_UTF8
174 // and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert
175 // Windows wchar_t filenames to utf8.
176 //
177 // ===========================================================================
178 //
179 // Philosophy
180 //
181 // stb libraries are designed with the following priorities:
182 //
183 // 1. easy to use
184 // 2. easy to maintain
185 // 3. good performance
186 //
187 // Sometimes I let "good performance" creep up in priority over "easy to maintain",
188 // and for best performance I may provide less-easy-to-use APIs that give higher
189 // performance, in addition to the easy-to-use ones. Nevertheless, it's important
190 // to keep in mind that from the standpoint of you, a client of this library,
191 // all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
192 //
193 // Some secondary priorities arise directly from the first two, some of which
194 // provide more explicit reasons why performance can't be emphasized.
195 //
196 // - Portable ("ease of use")
197 // - Small source code footprint ("easy to maintain")
198 // - No dependencies ("ease of use")
199 //
200 // ===========================================================================
201 //
202 // I/O callbacks
203 //
204 // I/O callbacks allow you to read from arbitrary sources, like packaged
205 // files or some other source. Data read from callbacks are processed
206 // through a small internal buffer (currently 128 bytes) to try to reduce
207 // overhead.
208 //
209 // The three functions you must define are "read" (reads some bytes of data),
210 // "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
211 //
212 // ===========================================================================
213 //
214 // SIMD support
215 //
216 // The JPEG decoder will try to automatically use SIMD kernels on x86 when
217 // supported by the compiler. For ARM Neon support, you must explicitly
218 // request it.
219 //
220 // (The old do-it-yourself SIMD API is no longer supported in the current
221 // code.)
222 //
223 // On x86, SSE2 will automatically be used when available based on a run-time
224 // test; if not, the generic C versions are used as a fall-back. On ARM targets,
225 // the typical path is to have separate builds for NEON and non-NEON devices
226 // (at least this is true for iOS and Android). Therefore, the NEON support is
227 // toggled by a build flag: define STBI_NEON to get NEON loops.
228 //
229 // If for some reason you do not want to use any of SIMD code, or if
230 // you have issues compiling it, you can disable it entirely by
231 // defining STBI_NO_SIMD.
232 //
233 // ===========================================================================
234 //
235 // HDR image support (disable by defining STBI_NO_HDR)
236 //
237 // stb_image supports loading HDR images in general, and currently the Radiance
238 // .HDR file format specifically. You can still load any file through the existing
239 // interface; if you attempt to load an HDR file, it will be automatically remapped
240 // to LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
241 // both of these constants can be reconfigured through this interface:
242 //
243 // stbi_hdr_to_ldr_gamma(2.2f);
244 // stbi_hdr_to_ldr_scale(1.0f);
245 //
246 // (note, do not use _inverse_ constants; stbi_image will invert them
247 // appropriately).
248 //
249 // Additionally, there is a new, parallel interface for loading files as
250 // (linear) floats to preserve the full dynamic range:
251 //
252 // float *data = stbi_loadf(filename, &x, &y, &n, 0);
253 //
254 // If you load LDR images through this interface, those images will
255 // be promoted to floating point values, run through the inverse of
256 // constants corresponding to the above:
257 //
258 // stbi_ldr_to_hdr_scale(1.0f);
259 // stbi_ldr_to_hdr_gamma(2.2f);
260 //
261 // Finally, given a filename (or an open file or memory block--see header
262 // file for details) containing image data, you can query for the "most
263 // appropriate" interface to use (that is, whether the image is HDR or
264 // not), using:
265 //
266 // stbi_is_hdr(char *filename);
267 //
268 // ===========================================================================
269 //
270 // iPhone PNG support:
271 //
272 // By default we convert iphone-formatted PNGs back to RGB, even though
273 // they are internally encoded differently. You can disable this conversion
274 // by calling stbi_convert_iphone_png_to_rgb(0), in which case
275 // you will always just get the native iphone "format" through (which
276 // is BGR stored in RGB).
277 //
278 // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
279 // pixel to remove any premultiplied alpha *only* if the image file explicitly
280 // says there's premultiplied data (currently only happens in iPhone images,
281 // and only if iPhone convert-to-rgb processing is on).
282 //
283 // ===========================================================================
284 //
285 // ADDITIONAL CONFIGURATION
286 //
287 // - You can suppress implementation of any of the decoders to reduce
288 // your code footprint by #defining one or more of the following
289 // symbols before creating the implementation.
290 //
291 // STBI_NO_JPEG
292 // STBI_NO_PNG
293 // STBI_NO_BMP
294 // STBI_NO_PSD
295 // STBI_NO_TGA
296 // STBI_NO_GIF
297 // STBI_NO_HDR
298 // STBI_NO_PIC
299 // STBI_NO_PNM (.ppm and .pgm)
300 //
301 // - You can request *only* certain decoders and suppress all other ones
302 // (this will be more forward-compatible, as addition of new decoders
303 // doesn't require you to disable them explicitly):
304 //
305 // STBI_ONLY_JPEG
306 // STBI_ONLY_PNG
307 // STBI_ONLY_BMP
308 // STBI_ONLY_PSD
309 // STBI_ONLY_TGA
310 // STBI_ONLY_GIF
311 // STBI_ONLY_HDR
312 // STBI_ONLY_PIC
313 // STBI_ONLY_PNM (.ppm and .pgm)
314 //
315 // - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
316 // want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
317 //
318 
319 
320 #ifndef STBI_NO_STDIO
321 #include <stdio.h>
322 #endif // STBI_NO_STDIO
323 
324 #define STBI_VERSION 1
325 
326 enum
327 {
328  STBI_default = 0, // only used for desired_channels
329 
332  STBI_rgb = 3,
334 };
335 
336 #include <stdlib.h>
337 typedef unsigned char stbi_uc;
338 typedef unsigned short stbi_us;
339 
340 #ifdef __cplusplus
341 extern "C" {
342 #endif
343 
344 #ifndef STBIDEF
345 #ifdef STB_IMAGE_STATIC
346 #define STBIDEF static
347 #else
348 #define STBIDEF extern
349 #endif
350 #endif
351 
352 //////////////////////////////////////////////////////////////////////////////
353 //
354 // PRIMARY API - works on images of any type
355 //
356 
357 //
358 // load image by filename, open file, or memory buffer
359 //
360 
361 typedef struct
362 {
363  int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read
364  void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
365  int (*eof) (void *user); // returns nonzero if we are at end of file/data
367 
368 ////////////////////////////////////
369 //
370 // 8-bits-per-channel interface
371 //
372 
373 STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *channels_in_file, int desired_channels);
374 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *channels_in_file, int desired_channels);
375 
376 #ifndef STBI_NO_STDIO
377 STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
378 STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
379 // for stbi_load_from_file, file pointer is left pointing immediately after image
380 #endif
381 
382 #ifndef STBI_NO_GIF
383 STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
384 #endif
385 
386 #ifdef STBI_WINDOWS_UTF8
387 STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input);
388 #endif
389 
390 ////////////////////////////////////
391 //
392 // 16-bits-per-channel interface
393 //
394 
395 STBIDEF stbi_us *stbi_load_16_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
396 STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
397 
398 #ifndef STBI_NO_STDIO
399 STBIDEF stbi_us *stbi_load_16 (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
400 STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
401 #endif
402 
403 ////////////////////////////////////
404 //
405 // float-per-channel interface
406 //
407 #ifndef STBI_NO_LINEAR
408  STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
409  STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
410 
411  #ifndef STBI_NO_STDIO
412  STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
413  STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
414  #endif
415 #endif
416 
417 #ifndef STBI_NO_HDR
418  STBIDEF void stbi_hdr_to_ldr_gamma(float gamma);
419  STBIDEF void stbi_hdr_to_ldr_scale(float scale);
420 #endif // STBI_NO_HDR
421 
422 #ifndef STBI_NO_LINEAR
423  STBIDEF void stbi_ldr_to_hdr_gamma(float gamma);
424  STBIDEF void stbi_ldr_to_hdr_scale(float scale);
425 #endif // STBI_NO_LINEAR
426 
427 // stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
428 STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
429 STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
430 #ifndef STBI_NO_STDIO
431 STBIDEF int stbi_is_hdr (char const *filename);
432 STBIDEF int stbi_is_hdr_from_file(FILE *f);
433 #endif // STBI_NO_STDIO
434 
435 
436 // get a VERY brief reason for failure
437 // NOT THREADSAFE
438 STBIDEF const char *stbi_failure_reason (void);
439 
440 // free the loaded image -- this is just free()
441 STBIDEF void stbi_image_free (void *retval_from_stbi_load);
442 
443 // get image dimensions & components without fully decoding
444 STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
445 STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
446 STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len);
447 STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user);
448 
449 #ifndef STBI_NO_STDIO
450 STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp);
451 STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp);
452 STBIDEF int stbi_is_16_bit (char const *filename);
454 #endif
455 
456 
457 
458 // for image formats that explicitly notate that they have premultiplied alpha,
459 // we just return the colors as stored in the file. set this flag to force
460 // unpremultiplication. results are undefined if the unpremultiply overflow.
461 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
462 
463 // indicate whether we should process iphone images back to canonical format,
464 // or just pass them through "as-is"
465 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
466 
467 // flip the image vertically, so the first pixel in the output array is the bottom left
468 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
469 
470 // ZLIB client - used by PNG, available for other purposes
471 
472 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
473 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
474 STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
475 STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
476 
477 STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
478 STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
479 
480 
481 #ifdef __cplusplus
482 }
483 #endif
484 
485 //
486 //
487 //// end header file /////////////////////////////////////////////////////
488 #endif // STBI_INCLUDE_STB_IMAGE_H
489 
490 #ifdef STB_IMAGE_IMPLEMENTATION
491 
492 #if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
493  || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
494  || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
495  || defined(STBI_ONLY_ZLIB)
496  #ifndef STBI_ONLY_JPEG
497  #define STBI_NO_JPEG
498  #endif
499  #ifndef STBI_ONLY_PNG
500  #define STBI_NO_PNG
501  #endif
502  #ifndef STBI_ONLY_BMP
503  #define STBI_NO_BMP
504  #endif
505  #ifndef STBI_ONLY_PSD
506  #define STBI_NO_PSD
507  #endif
508  #ifndef STBI_ONLY_TGA
509  #define STBI_NO_TGA
510  #endif
511  #ifndef STBI_ONLY_GIF
512  #define STBI_NO_GIF
513  #endif
514  #ifndef STBI_ONLY_HDR
515  #define STBI_NO_HDR
516  #endif
517  #ifndef STBI_ONLY_PIC
518  #define STBI_NO_PIC
519  #endif
520  #ifndef STBI_ONLY_PNM
521  #define STBI_NO_PNM
522  #endif
523 #endif
524 
525 #if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
526 #define STBI_NO_ZLIB
527 #endif
528 
529 
530 #include <stdarg.h>
531 #include <stddef.h> // ptrdiff_t on osx
532 #include <stdlib.h>
533 #include <string.h>
534 #include <limits.h>
535 
536 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
537 #include <math.h> // ldexp, pow
538 #endif
539 
540 #ifndef STBI_NO_STDIO
541 #include <stdio.h>
542 #endif
543 
544 #ifndef STBI_ASSERT
545 #include <assert.h>
546 #define STBI_ASSERT(x) assert(x)
547 #endif
548 
549 #ifdef __cplusplus
550 #define STBI_EXTERN extern "C"
551 #else
552 #define STBI_EXTERN extern
553 #endif
554 
555 
556 #ifndef _MSC_VER
557  #ifdef __cplusplus
558  #define stbi_inline inline
559  #else
560  #define stbi_inline
561  #endif
562 #else
563  #define stbi_inline __forceinline
564 #endif
565 
566 
567 #ifdef _MSC_VER
568 typedef unsigned short stbi__uint16;
569 typedef signed short stbi__int16;
570 typedef unsigned int stbi__uint32;
571 typedef signed int stbi__int32;
572 #else
573 #include <stdint.h>
574 typedef uint16_t stbi__uint16;
575 typedef int16_t stbi__int16;
576 typedef uint32_t stbi__uint32;
577 typedef int32_t stbi__int32;
578 #endif
579 
580 // should produce compiler error if size is wrong
581 typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
582 
583 #ifdef _MSC_VER
584 #define STBI_NOTUSED(v) (void)(v)
585 #else
586 #define STBI_NOTUSED(v) (void)sizeof(v)
587 #endif
588 
589 #ifdef _MSC_VER
590 #define STBI_HAS_LROTL
591 #endif
592 
593 #ifdef STBI_HAS_LROTL
594  #define stbi_lrot(x,y) _lrotl(x,y)
595 #else
596  #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (32 - (y))))
597 #endif
598 
599 #if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
600 // ok
601 #elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
602 // ok
603 #else
604 #error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
605 #endif
606 
607 #ifndef STBI_MALLOC
608 #define STBI_MALLOC(sz) malloc(sz)
609 #define STBI_REALLOC(p,newsz) realloc(p,newsz)
610 #define STBI_FREE(p) free(p)
611 #endif
612 
613 #ifndef STBI_REALLOC_SIZED
614 #define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
615 #endif
616 
617 // x86/x64 detection
618 #if defined(__x86_64__) || defined(_M_X64)
619 #define STBI__X64_TARGET
620 #elif defined(__i386) || defined(_M_IX86)
621 #define STBI__X86_TARGET
622 #endif
623 
624 #if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
625 // gcc doesn't support sse2 intrinsics unless you compile with -msse2,
626 // which in turn means it gets to use SSE2 everywhere. This is unfortunate,
627 // but previous attempts to provide the SSE2 functions with runtime
628 // detection caused numerous issues. The way architecture extensions are
629 // exposed in GCC/Clang is, sadly, not really suited for one-file libs.
630 // New behavior: if compiled with -msse2, we use SSE2 without any
631 // detection; if not, we don't use it at all.
632 #define STBI_NO_SIMD
633 #endif
634 
635 #if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
636 // Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
637 //
638 // 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
639 // Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
640 // As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
641 // simultaneously enabling "-mstackrealign".
642 //
643 // See https://github.com/nothings/stb/issues/81 for more information.
644 //
645 // So default to no SSE2 on 32-bit MinGW. If you've read this far and added
646 // -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
647 #define STBI_NO_SIMD
648 #endif
649 
650 #if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
651 #define STBI_SSE2
652 #include <emmintrin.h>
653 
654 #ifdef _MSC_VER
655 
656 #if _MSC_VER >= 1400 // not VC6
657 #include <intrin.h> // __cpuid
658 static int stbi__cpuid3(void)
659 {
660  int info[4];
661  __cpuid(info,1);
662  return info[3];
663 }
664 #else
665 static int stbi__cpuid3(void)
666 {
667  int res;
668  __asm {
669  mov eax,1
670  cpuid
671  mov res,edx
672  }
673  return res;
674 }
675 #endif
676 
677 #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
678 
679 #if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
680 static int stbi__sse2_available(void)
681 {
682  int info3 = stbi__cpuid3();
683  return ((info3 >> 26) & 1) != 0;
684 }
685 #endif
686 
687 #else // assume GCC-style if not VC++
688 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
689 
690 #if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
691 static int stbi__sse2_available(void)
692 {
693  // If we're even attempting to compile this on GCC/Clang, that means
694  // -msse2 is on, which means the compiler is allowed to use SSE2
695  // instructions at will, and so are we.
696  return 1;
697 }
698 #endif
699 
700 #endif
701 #endif
702 
703 // ARM NEON
704 #if defined(STBI_NO_SIMD) && defined(STBI_NEON)
705 #undef STBI_NEON
706 #endif
707 
708 #ifdef STBI_NEON
709 #include <arm_neon.h>
710 // assume GCC or Clang on ARM targets
711 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
712 #endif
713 
714 #ifndef STBI_SIMD_ALIGN
715 #define STBI_SIMD_ALIGN(type, name) type name
716 #endif
717 
718 ///////////////////////////////////////////////
719 //
720 // stbi__context struct and start_xxx functions
721 
722 // stbi__context structure is our basic context used by all images, so it
723 // contains all the IO context, plus some basic image information
724 typedef struct
725 {
726  stbi__uint32 img_x, img_y;
727  int img_n, img_out_n;
728 
730  void *io_user_data;
731 
732  int read_from_callbacks;
733  int buflen;
734  stbi_uc buffer_start[128];
735 
736  stbi_uc *img_buffer, *img_buffer_end;
737  stbi_uc *img_buffer_original, *img_buffer_original_end;
738 } stbi__context;
739 
740 
741 static void stbi__refill_buffer(stbi__context *s);
742 
743 // initialize a memory-decode context
744 static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
745 {
746  s->io.read = NULL;
747  s->read_from_callbacks = 0;
748  s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
749  s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
750 }
751 
752 // initialize a callback-based context
753 static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
754 {
755  s->io = *c;
756  s->io_user_data = user;
757  s->buflen = sizeof(s->buffer_start);
758  s->read_from_callbacks = 1;
759  s->img_buffer_original = s->buffer_start;
760  stbi__refill_buffer(s);
761  s->img_buffer_original_end = s->img_buffer_end;
762 }
763 
764 #ifndef STBI_NO_STDIO
765 
766 static int stbi__stdio_read(void *user, char *data, int size)
767 {
768  return (int) fread(data,1,size,(FILE*) user);
769 }
770 
771 static void stbi__stdio_skip(void *user, int n)
772 {
773  fseek((FILE*) user, n, SEEK_CUR);
774 }
775 
776 static int stbi__stdio_eof(void *user)
777 {
778  return feof((FILE*) user);
779 }
780 
781 static stbi_io_callbacks stbi__stdio_callbacks =
782 {
783  stbi__stdio_read,
784  stbi__stdio_skip,
785  stbi__stdio_eof,
786 };
787 
788 static void stbi__start_file(stbi__context *s, FILE *f)
789 {
790  stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
791 }
792 
793 //static void stop_file(stbi__context *s) { }
794 
795 #endif // !STBI_NO_STDIO
796 
797 static void stbi__rewind(stbi__context *s)
798 {
799  // conceptually rewind SHOULD rewind to the beginning of the stream,
800  // but we just rewind to the beginning of the initial buffer, because
801  // we only use it after doing 'test', which only ever looks at at most 92 bytes
802  s->img_buffer = s->img_buffer_original;
803  s->img_buffer_end = s->img_buffer_original_end;
804 }
805 
806 enum
807 {
808  STBI_ORDER_RGB,
809  STBI_ORDER_BGR
810 };
811 
812 typedef struct
813 {
814  int bits_per_channel;
815  int num_channels;
816  int channel_order;
817 } stbi__result_info;
818 
819 #ifndef STBI_NO_JPEG
820 static int stbi__jpeg_test(stbi__context *s);
821 static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
822 static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
823 #endif
824 
825 #ifndef STBI_NO_PNG
826 static int stbi__png_test(stbi__context *s);
827 static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
828 static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
829 static int stbi__png_is16(stbi__context *s);
830 #endif
831 
832 #ifndef STBI_NO_BMP
833 static int stbi__bmp_test(stbi__context *s);
834 static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
835 static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
836 #endif
837 
838 #ifndef STBI_NO_TGA
839 static int stbi__tga_test(stbi__context *s);
840 static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
841 static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
842 #endif
843 
844 #ifndef STBI_NO_PSD
845 static int stbi__psd_test(stbi__context *s);
846 static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
847 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
848 static int stbi__psd_is16(stbi__context *s);
849 #endif
850 
851 #ifndef STBI_NO_HDR
852 static int stbi__hdr_test(stbi__context *s);
853 static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
854 static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
855 #endif
856 
857 #ifndef STBI_NO_PIC
858 static int stbi__pic_test(stbi__context *s);
859 static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
860 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
861 #endif
862 
863 #ifndef STBI_NO_GIF
864 static int stbi__gif_test(stbi__context *s);
865 static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
866 static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
867 static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
868 #endif
869 
870 #ifndef STBI_NO_PNM
871 static int stbi__pnm_test(stbi__context *s);
872 static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
873 static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
874 #endif
875 
876 // this is not threadsafe
877 static const char *stbi__g_failure_reason;
878 
879 STBIDEF const char *stbi_failure_reason(void)
880 {
881  return stbi__g_failure_reason;
882 }
883 
884 static int stbi__err(const char *str)
885 {
886  stbi__g_failure_reason = str;
887  return 0;
888 }
889 
890 static void *stbi__malloc(size_t size)
891 {
892  return STBI_MALLOC(size);
893 }
894 
895 // stb_image uses ints pervasively, including for offset calculations.
896 // therefore the largest decoded image size we can support with the
897 // current code, even on 64-bit targets, is INT_MAX. this is not a
898 // significant limitation for the intended use case.
899 //
900 // we do, however, need to make sure our size calculations don't
901 // overflow. hence a few helper functions for size calculations that
902 // multiply integers together, making sure that they're non-negative
903 // and no overflow occurs.
904 
905 // return 1 if the sum is valid, 0 on overflow.
906 // negative terms are considered invalid.
907 static int stbi__addsizes_valid(int a, int b)
908 {
909  if (b < 0) return 0;
910  // now 0 <= b <= INT_MAX, hence also
911  // 0 <= INT_MAX - b <= INTMAX.
912  // And "a + b <= INT_MAX" (which might overflow) is the
913  // same as a <= INT_MAX - b (no overflow)
914  return a <= INT_MAX - b;
915 }
916 
917 // returns 1 if the product is valid, 0 on overflow.
918 // negative factors are considered invalid.
919 static int stbi__mul2sizes_valid(int a, int b)
920 {
921  if (a < 0 || b < 0) return 0;
922  if (b == 0) return 1; // mul-by-0 is always safe
923  // portable way to check for no overflows in a*b
924  return a <= INT_MAX/b;
925 }
926 
927 // returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
928 static int stbi__mad2sizes_valid(int a, int b, int add)
929 {
930  return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
931 }
932 
933 // returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
934 static int stbi__mad3sizes_valid(int a, int b, int c, int add)
935 {
936  return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
937  stbi__addsizes_valid(a*b*c, add);
938 }
939 
940 // returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
941 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
942 static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
943 {
944  return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
945  stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
946 }
947 #endif
948 
949 // mallocs with size overflow checking
950 static void *stbi__malloc_mad2(int a, int b, int add)
951 {
952  if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
953  return stbi__malloc(a*b + add);
954 }
955 
956 static void *stbi__malloc_mad3(int a, int b, int c, int add)
957 {
958  if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
959  return stbi__malloc(a*b*c + add);
960 }
961 
962 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
963 static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
964 {
965  if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
966  return stbi__malloc(a*b*c*d + add);
967 }
968 #endif
969 
970 // stbi__err - error
971 // stbi__errpf - error returning pointer to float
972 // stbi__errpuc - error returning pointer to unsigned char
973 
974 #ifdef STBI_NO_FAILURE_STRINGS
975  #define stbi__err(x,y) 0
976 #elif defined(STBI_FAILURE_USERMSG)
977  #define stbi__err(x,y) stbi__err(y)
978 #else
979  #define stbi__err(x,y) stbi__err(x)
980 #endif
981 
982 #define stbi__errpf(x,y) ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
983 #define stbi__errpuc(x,y) ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
984 
985 STBIDEF void stbi_image_free(void *retval_from_stbi_load)
986 {
987  STBI_FREE(retval_from_stbi_load);
988 }
989 
990 #ifndef STBI_NO_LINEAR
991 static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
992 #endif
993 
994 #ifndef STBI_NO_HDR
995 static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp);
996 #endif
997 
998 static int stbi__vertically_flip_on_load = 0;
999 
1000 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
1001 {
1002  stbi__vertically_flip_on_load = flag_true_if_should_flip;
1003 }
1004 
1005 static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
1006 {
1007  memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
1008  ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
1009  ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
1010  ri->num_channels = 0;
1011 
1012  #ifndef STBI_NO_JPEG
1013  if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
1014  #endif
1015  #ifndef STBI_NO_PNG
1016  if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri);
1017  #endif
1018  #ifndef STBI_NO_BMP
1019  if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri);
1020  #endif
1021  #ifndef STBI_NO_GIF
1022  if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri);
1023  #endif
1024  #ifndef STBI_NO_PSD
1025  if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
1026  #endif
1027  #ifndef STBI_NO_PIC
1028  if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri);
1029  #endif
1030  #ifndef STBI_NO_PNM
1031  if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri);
1032  #endif
1033 
1034  #ifndef STBI_NO_HDR
1035  if (stbi__hdr_test(s)) {
1036  float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
1037  return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
1038  }
1039  #endif
1040 
1041  #ifndef STBI_NO_TGA
1042  // test tga last because it's a crappy test!
1043  if (stbi__tga_test(s))
1044  return stbi__tga_load(s,x,y,comp,req_comp, ri);
1045  #endif
1046 
1047  return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
1048 }
1049 
1050 static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
1051 {
1052  int i;
1053  int img_len = w * h * channels;
1054  stbi_uc *reduced;
1055 
1056  reduced = (stbi_uc *) stbi__malloc(img_len);
1057  if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
1058 
1059  for (i = 0; i < img_len; ++i)
1060  reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
1061 
1062  STBI_FREE(orig);
1063  return reduced;
1064 }
1065 
1066 static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
1067 {
1068  int i;
1069  int img_len = w * h * channels;
1070  stbi__uint16 *enlarged;
1071 
1072  enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
1073  if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1074 
1075  for (i = 0; i < img_len; ++i)
1076  enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
1077 
1078  STBI_FREE(orig);
1079  return enlarged;
1080 }
1081 
1082 static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
1083 {
1084  int row;
1085  size_t bytes_per_row = (size_t)w * bytes_per_pixel;
1086  stbi_uc temp[2048];
1087  stbi_uc *bytes = (stbi_uc *)image;
1088 
1089  for (row = 0; row < (h>>1); row++) {
1090  stbi_uc *row0 = bytes + row*bytes_per_row;
1091  stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row;
1092  // swap row0 with row1
1093  size_t bytes_left = bytes_per_row;
1094  while (bytes_left) {
1095  size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
1096  memcpy(temp, row0, bytes_copy);
1097  memcpy(row0, row1, bytes_copy);
1098  memcpy(row1, temp, bytes_copy);
1099  row0 += bytes_copy;
1100  row1 += bytes_copy;
1101  bytes_left -= bytes_copy;
1102  }
1103  }
1104 }
1105 
1106 #ifndef STBI_NO_GIF
1107 static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel)
1108 {
1109  int slice;
1110  int slice_size = w * h * bytes_per_pixel;
1111 
1112  stbi_uc *bytes = (stbi_uc *)image;
1113  for (slice = 0; slice < z; ++slice) {
1114  stbi__vertical_flip(bytes, w, h, bytes_per_pixel);
1115  bytes += slice_size;
1116  }
1117 }
1118 #endif
1119 
1120 static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1121 {
1122  stbi__result_info ri;
1123  void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
1124 
1125  if (result == NULL)
1126  return NULL;
1127 
1128  if (ri.bits_per_channel != 8) {
1129  STBI_ASSERT(ri.bits_per_channel == 16);
1130  result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1131  ri.bits_per_channel = 8;
1132  }
1133 
1134  // @TODO: move stbi__convert_format to here
1135 
1136  if (stbi__vertically_flip_on_load) {
1137  int channels = req_comp ? req_comp : *comp;
1138  stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
1139  }
1140 
1141  return (unsigned char *) result;
1142 }
1143 
1144 static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1145 {
1146  stbi__result_info ri;
1147  void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
1148 
1149  if (result == NULL)
1150  return NULL;
1151 
1152  if (ri.bits_per_channel != 16) {
1153  STBI_ASSERT(ri.bits_per_channel == 8);
1154  result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1155  ri.bits_per_channel = 16;
1156  }
1157 
1158  // @TODO: move stbi__convert_format16 to here
1159  // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
1160 
1161  if (stbi__vertically_flip_on_load) {
1162  int channels = req_comp ? req_comp : *comp;
1163  stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
1164  }
1165 
1166  return (stbi__uint16 *) result;
1167 }
1168 
1169 #if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR)
1170 static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
1171 {
1172  if (stbi__vertically_flip_on_load && result != NULL) {
1173  int channels = req_comp ? req_comp : *comp;
1174  stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
1175  }
1176 }
1177 #endif
1178 
1179 #ifndef STBI_NO_STDIO
1180 
1181 #if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
1182 STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
1183 STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
1184 #endif
1185 
1186 #if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
1187 STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
1188 {
1189  return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
1190 }
1191 #endif
1192 
1193 static FILE *stbi__fopen(char const *filename, char const *mode)
1194 {
1195  FILE *f;
1196 #if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
1197  wchar_t wMode[64];
1198  wchar_t wFilename[1024];
1199  if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)))
1200  return 0;
1201 
1202  if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)))
1203  return 0;
1204 
1205 #if _MSC_VER >= 1400
1206  if (0 != _wfopen_s(&f, wFilename, wMode))
1207  f = 0;
1208 #else
1209  f = _wfopen(wFilename, wMode);
1210 #endif
1211 
1212 #elif defined(_MSC_VER) && _MSC_VER >= 1400
1213  if (0 != fopen_s(&f, filename, mode))
1214  f=0;
1215 #else
1216  f = fopen(filename, mode);
1217 #endif
1218  return f;
1219 }
1220 
1221 
1222 STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
1223 {
1224  FILE *f = stbi__fopen(filename, "rb");
1225  unsigned char *result;
1226  if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
1227  result = stbi_load_from_file(f,x,y,comp,req_comp);
1228  fclose(f);
1229  return result;
1230 }
1231 
1232 STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1233 {
1234  unsigned char *result;
1235  stbi__context s;
1236  stbi__start_file(&s,f);
1237  result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1238  if (result) {
1239  // need to 'unget' all the characters in the IO buffer
1240  fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1241  }
1242  return result;
1243 }
1244 
1245 STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
1246 {
1247  stbi__uint16 *result;
1248  stbi__context s;
1249  stbi__start_file(&s,f);
1250  result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
1251  if (result) {
1252  // need to 'unget' all the characters in the IO buffer
1253  fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1254  }
1255  return result;
1256 }
1257 
1258 STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
1259 {
1260  FILE *f = stbi__fopen(filename, "rb");
1261  stbi__uint16 *result;
1262  if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
1263  result = stbi_load_from_file_16(f,x,y,comp,req_comp);
1264  fclose(f);
1265  return result;
1266 }
1267 
1268 
1269 #endif //!STBI_NO_STDIO
1270 
1271 STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
1272 {
1273  stbi__context s;
1274  stbi__start_mem(&s,buffer,len);
1275  return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1276 }
1277 
1278 STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
1279 {
1280  stbi__context s;
1281  stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1282  return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1283 }
1284 
1285 STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1286 {
1287  stbi__context s;
1288  stbi__start_mem(&s,buffer,len);
1289  return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1290 }
1291 
1292 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1293 {
1294  stbi__context s;
1295  stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1296  return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1297 }
1298 
1299 #ifndef STBI_NO_GIF
1300 STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
1301 {
1302  unsigned char *result;
1303  stbi__context s;
1304  stbi__start_mem(&s,buffer,len);
1305 
1306  result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);
1307  if (stbi__vertically_flip_on_load) {
1308  stbi__vertical_flip_slices( result, *x, *y, *z, *comp );
1309  }
1310 
1311  return result;
1312 }
1313 #endif
1314 
1315 #ifndef STBI_NO_LINEAR
1316 static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1317 {
1318  unsigned char *data;
1319  #ifndef STBI_NO_HDR
1320  if (stbi__hdr_test(s)) {
1321  stbi__result_info ri;
1322  float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
1323  if (hdr_data)
1324  stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
1325  return hdr_data;
1326  }
1327  #endif
1328  data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
1329  if (data)
1330  return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
1331  return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
1332 }
1333 
1334 STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1335 {
1336  stbi__context s;
1337  stbi__start_mem(&s,buffer,len);
1338  return stbi__loadf_main(&s,x,y,comp,req_comp);
1339 }
1340 
1341 STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1342 {
1343  stbi__context s;
1344  stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1345  return stbi__loadf_main(&s,x,y,comp,req_comp);
1346 }
1347 
1348 #ifndef STBI_NO_STDIO
1349 STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
1350 {
1351  float *result;
1352  FILE *f = stbi__fopen(filename, "rb");
1353  if (!f) return stbi__errpf("can't fopen", "Unable to open file");
1354  result = stbi_loadf_from_file(f,x,y,comp,req_comp);
1355  fclose(f);
1356  return result;
1357 }
1358 
1359 STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1360 {
1361  stbi__context s;
1362  stbi__start_file(&s,f);
1363  return stbi__loadf_main(&s,x,y,comp,req_comp);
1364 }
1365 #endif // !STBI_NO_STDIO
1366 
1367 #endif // !STBI_NO_LINEAR
1368 
1369 // these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
1370 // defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
1371 // reports false!
1372 
1373 STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
1374 {
1375  #ifndef STBI_NO_HDR
1376  stbi__context s;
1377  stbi__start_mem(&s,buffer,len);
1378  return stbi__hdr_test(&s);
1379  #else
1380  STBI_NOTUSED(buffer);
1381  STBI_NOTUSED(len);
1382  return 0;
1383  #endif
1384 }
1385 
1386 #ifndef STBI_NO_STDIO
1387 STBIDEF int stbi_is_hdr (char const *filename)
1388 {
1389  FILE *f = stbi__fopen(filename, "rb");
1390  int result=0;
1391  if (f) {
1392  result = stbi_is_hdr_from_file(f);
1393  fclose(f);
1394  }
1395  return result;
1396 }
1397 
1398 STBIDEF int stbi_is_hdr_from_file(FILE *f)
1399 {
1400  #ifndef STBI_NO_HDR
1401  long pos = ftell(f);
1402  int res;
1403  stbi__context s;
1404  stbi__start_file(&s,f);
1405  res = stbi__hdr_test(&s);
1406  fseek(f, pos, SEEK_SET);
1407  return res;
1408  #else
1409  STBI_NOTUSED(f);
1410  return 0;
1411  #endif
1412 }
1413 #endif // !STBI_NO_STDIO
1414 
1415 STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
1416 {
1417  #ifndef STBI_NO_HDR
1418  stbi__context s;
1419  stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1420  return stbi__hdr_test(&s);
1421  #else
1422  STBI_NOTUSED(clbk);
1423  STBI_NOTUSED(user);
1424  return 0;
1425  #endif
1426 }
1427 
1428 #ifndef STBI_NO_LINEAR
1429 static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
1430 
1431 STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
1432 STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
1433 #endif
1434 
1435 static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
1436 
1437 STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
1438 STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
1439 
1440 
1441 //////////////////////////////////////////////////////////////////////////////
1442 //
1443 // Common code used by all image loaders
1444 //
1445 
1446 enum
1447 {
1448  STBI__SCAN_load=0,
1449  STBI__SCAN_type,
1450  STBI__SCAN_header
1451 };
1452 
1453 static void stbi__refill_buffer(stbi__context *s)
1454 {
1455  int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
1456  if (n == 0) {
1457  // at end of file, treat same as if from memory, but need to handle case
1458  // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
1459  s->read_from_callbacks = 0;
1460  s->img_buffer = s->buffer_start;
1461  s->img_buffer_end = s->buffer_start+1;
1462  *s->img_buffer = 0;
1463  } else {
1464  s->img_buffer = s->buffer_start;
1465  s->img_buffer_end = s->buffer_start + n;
1466  }
1467 }
1468 
1469 stbi_inline static stbi_uc stbi__get8(stbi__context *s)
1470 {
1471  if (s->img_buffer < s->img_buffer_end)
1472  return *s->img_buffer++;
1473  if (s->read_from_callbacks) {
1474  stbi__refill_buffer(s);
1475  return *s->img_buffer++;
1476  }
1477  return 0;
1478 }
1479 
1480 stbi_inline static int stbi__at_eof(stbi__context *s)
1481 {
1482  if (s->io.read) {
1483  if (!(s->io.eof)(s->io_user_data)) return 0;
1484  // if feof() is true, check if buffer = end
1485  // special case: we've only got the special 0 character at the end
1486  if (s->read_from_callbacks == 0) return 1;
1487  }
1488 
1489  return s->img_buffer >= s->img_buffer_end;
1490 }
1491 
1492 static void stbi__skip(stbi__context *s, int n)
1493 {
1494  if (n < 0) {
1495  s->img_buffer = s->img_buffer_end;
1496  return;
1497  }
1498  if (s->io.read) {
1499  int blen = (int) (s->img_buffer_end - s->img_buffer);
1500  if (blen < n) {
1501  s->img_buffer = s->img_buffer_end;
1502  (s->io.skip)(s->io_user_data, n - blen);
1503  return;
1504  }
1505  }
1506  s->img_buffer += n;
1507 }
1508 
1509 static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
1510 {
1511  if (s->io.read) {
1512  int blen = (int) (s->img_buffer_end - s->img_buffer);
1513  if (blen < n) {
1514  int res, count;
1515 
1516  memcpy(buffer, s->img_buffer, blen);
1517 
1518  count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
1519  res = (count == (n-blen));
1520  s->img_buffer = s->img_buffer_end;
1521  return res;
1522  }
1523  }
1524 
1525  if (s->img_buffer+n <= s->img_buffer_end) {
1526  memcpy(buffer, s->img_buffer, n);
1527  s->img_buffer += n;
1528  return 1;
1529  } else
1530  return 0;
1531 }
1532 
1533 static int stbi__get16be(stbi__context *s)
1534 {
1535  int z = stbi__get8(s);
1536  return (z << 8) + stbi__get8(s);
1537 }
1538 
1539 static stbi__uint32 stbi__get32be(stbi__context *s)
1540 {
1541  stbi__uint32 z = stbi__get16be(s);
1542  return (z << 16) + stbi__get16be(s);
1543 }
1544 
1545 #if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
1546 // nothing
1547 #else
1548 static int stbi__get16le(stbi__context *s)
1549 {
1550  int z = stbi__get8(s);
1551  return z + (stbi__get8(s) << 8);
1552 }
1553 #endif
1554 
1555 #ifndef STBI_NO_BMP
1556 static stbi__uint32 stbi__get32le(stbi__context *s)
1557 {
1558  stbi__uint32 z = stbi__get16le(s);
1559  return z + (stbi__get16le(s) << 16);
1560 }
1561 #endif
1562 
1563 #define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings
1564 
1565 
1566 //////////////////////////////////////////////////////////////////////////////
1567 //
1568 // generic converter from built-in img_n to req_comp
1569 // individual types do this automatically as much as possible (e.g. jpeg
1570 // does all cases internally since it needs to colorspace convert anyway,
1571 // and it never has alpha, so very few cases ). png can automatically
1572 // interleave an alpha=255 channel, but falls back to this for other cases
1573 //
1574 // assume data buffer is malloced, so malloc a new one and free that one
1575 // only failure mode is malloc failing
1576 
1577 static stbi_uc stbi__compute_y(int r, int g, int b)
1578 {
1579  return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8);
1580 }
1581 
1582 static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1583 {
1584  int i,j;
1585  unsigned char *good;
1586 
1587  if (req_comp == img_n) return data;
1588  STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1589 
1590  good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
1591  if (good == NULL) {
1592  STBI_FREE(data);
1593  return stbi__errpuc("outofmem", "Out of memory");
1594  }
1595 
1596  for (j=0; j < (int) y; ++j) {
1597  unsigned char *src = data + j * x * img_n ;
1598  unsigned char *dest = good + j * x * req_comp;
1599 
1600  #define STBI__COMBO(a,b) ((a)*8+(b))
1601  #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1602  // convert source image with img_n components to one with req_comp components;
1603  // avoid switch per pixel, so use switch per scanline and massive macros
1604  switch (STBI__COMBO(img_n, req_comp)) {
1605  STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=255; } break;
1606  STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1607  STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=255; } break;
1608  STBI__CASE(2,1) { dest[0]=src[0]; } break;
1609  STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1610  STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break;
1611  STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=255; } break;
1612  STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break;
1613  STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = 255; } break;
1614  STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break;
1615  STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = src[3]; } break;
1616  STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break;
1617  default: STBI_ASSERT(0);
1618  }
1619  #undef STBI__CASE
1620  }
1621 
1622  STBI_FREE(data);
1623  return good;
1624 }
1625 
1626 static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
1627 {
1628  return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8);
1629 }
1630 
1631 static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1632 {
1633  int i,j;
1634  stbi__uint16 *good;
1635 
1636  if (req_comp == img_n) return data;
1637  STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1638 
1639  good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
1640  if (good == NULL) {
1641  STBI_FREE(data);
1642  return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1643  }
1644 
1645  for (j=0; j < (int) y; ++j) {
1646  stbi__uint16 *src = data + j * x * img_n ;
1647  stbi__uint16 *dest = good + j * x * req_comp;
1648 
1649  #define STBI__COMBO(a,b) ((a)*8+(b))
1650  #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1651  // convert source image with img_n components to one with req_comp components;
1652  // avoid switch per pixel, so use switch per scanline and massive macros
1653  switch (STBI__COMBO(img_n, req_comp)) {
1654  STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=0xffff; } break;
1655  STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1656  STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=0xffff; } break;
1657  STBI__CASE(2,1) { dest[0]=src[0]; } break;
1658  STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1659  STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break;
1660  STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=0xffff; } break;
1661  STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break;
1662  STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = 0xffff; } break;
1663  STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break;
1664  STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = src[3]; } break;
1665  STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break;
1666  default: STBI_ASSERT(0);
1667  }
1668  #undef STBI__CASE
1669  }
1670 
1671  STBI_FREE(data);
1672  return good;
1673 }
1674 
1675 #ifndef STBI_NO_LINEAR
1676 static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
1677 {
1678  int i,k,n;
1679  float *output;
1680  if (!data) return NULL;
1681  output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
1682  if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
1683  // compute number of non-alpha components
1684  if (comp & 1) n = comp; else n = comp-1;
1685  for (i=0; i < x*y; ++i) {
1686  for (k=0; k < n; ++k) {
1687  output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
1688  }
1689  }
1690  if (n < comp) {
1691  for (i=0; i < x*y; ++i) {
1692  output[i*comp + n] = data[i*comp + n]/255.0f;
1693  }
1694  }
1695  STBI_FREE(data);
1696  return output;
1697 }
1698 #endif
1699 
1700 #ifndef STBI_NO_HDR
1701 #define stbi__float2int(x) ((int) (x))
1702 static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp)
1703 {
1704  int i,k,n;
1705  stbi_uc *output;
1706  if (!data) return NULL;
1707  output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
1708  if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
1709  // compute number of non-alpha components
1710  if (comp & 1) n = comp; else n = comp-1;
1711  for (i=0; i < x*y; ++i) {
1712  for (k=0; k < n; ++k) {
1713  float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
1714  if (z < 0) z = 0;
1715  if (z > 255) z = 255;
1716  output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1717  }
1718  if (k < comp) {
1719  float z = data[i*comp+k] * 255 + 0.5f;
1720  if (z < 0) z = 0;
1721  if (z > 255) z = 255;
1722  output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1723  }
1724  }
1725  STBI_FREE(data);
1726  return output;
1727 }
1728 #endif
1729 
1730 //////////////////////////////////////////////////////////////////////////////
1731 //
1732 // "baseline" JPEG/JFIF decoder
1733 //
1734 // simple implementation
1735 // - doesn't support delayed output of y-dimension
1736 // - simple interface (only one output format: 8-bit interleaved RGB)
1737 // - doesn't try to recover corrupt jpegs
1738 // - doesn't allow partial loading, loading multiple at once
1739 // - still fast on x86 (copying globals into locals doesn't help x86)
1740 // - allocates lots of intermediate memory (full size of all components)
1741 // - non-interleaved case requires this anyway
1742 // - allows good upsampling (see next)
1743 // high-quality
1744 // - upsampled channels are bilinearly interpolated, even across blocks
1745 // - quality integer IDCT derived from IJG's 'slow'
1746 // performance
1747 // - fast huffman; reasonable integer IDCT
1748 // - some SIMD kernels for common paths on targets with SSE2/NEON
1749 // - uses a lot of intermediate memory, could cache poorly
1750 
1751 #ifndef STBI_NO_JPEG
1752 
1753 // huffman decoding acceleration
1754 #define FAST_BITS 9 // larger handles more cases; smaller stomps less cache
1755 
1756 typedef struct
1757 {
1758  stbi_uc fast[1 << FAST_BITS];
1759  // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
1760  stbi__uint16 code[256];
1761  stbi_uc values[256];
1762  stbi_uc size[257];
1763  unsigned int maxcode[18];
1764  int delta[17]; // old 'firstsymbol' - old 'firstcode'
1765 } stbi__huffman;
1766 
1767 typedef struct
1768 {
1769  stbi__context *s;
1770  stbi__huffman huff_dc[4];
1771  stbi__huffman huff_ac[4];
1772  stbi__uint16 dequant[4][64];
1773  stbi__int16 fast_ac[4][1 << FAST_BITS];
1774 
1775 // sizes for components, interleaved MCUs
1776  int img_h_max, img_v_max;
1777  int img_mcu_x, img_mcu_y;
1778  int img_mcu_w, img_mcu_h;
1779 
1780 // definition of jpeg image component
1781  struct
1782  {
1783  int id;
1784  int h,v;
1785  int tq;
1786  int hd,ha;
1787  int dc_pred;
1788 
1789  int x,y,w2,h2;
1790  stbi_uc *data;
1791  void *raw_data, *raw_coeff;
1792  stbi_uc *linebuf;
1793  short *coeff; // progressive only
1794  int coeff_w, coeff_h; // number of 8x8 coefficient blocks
1795  } img_comp[4];
1796 
1797  stbi__uint32 code_buffer; // jpeg entropy-coded buffer
1798  int code_bits; // number of valid bits
1799  unsigned char marker; // marker seen while filling entropy buffer
1800  int nomore; // flag if we saw a marker so must stop
1801 
1802  int progressive;
1803  int spec_start;
1804  int spec_end;
1805  int succ_high;
1806  int succ_low;
1807  int eob_run;
1808  int jfif;
1809  int app14_color_transform; // Adobe APP14 tag
1810  int rgb;
1811 
1812  int scan_n, order[4];
1813  int restart_interval, todo;
1814 
1815 // kernels
1816  void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
1817  void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
1818  stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
1819 } stbi__jpeg;
1820 
1821 static int stbi__build_huffman(stbi__huffman *h, int *count)
1822 {
1823  int i,j,k=0;
1824  unsigned int code;
1825  // build size list for each symbol (from JPEG spec)
1826  for (i=0; i < 16; ++i)
1827  for (j=0; j < count[i]; ++j)
1828  h->size[k++] = (stbi_uc) (i+1);
1829  h->size[k] = 0;
1830 
1831  // compute actual symbols (from jpeg spec)
1832  code = 0;
1833  k = 0;
1834  for(j=1; j <= 16; ++j) {
1835  // compute delta to add to code to compute symbol id
1836  h->delta[j] = k - code;
1837  if (h->size[k] == j) {
1838  while (h->size[k] == j)
1839  h->code[k++] = (stbi__uint16) (code++);
1840  if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG");
1841  }
1842  // compute largest code + 1 for this size, preshifted as needed later
1843  h->maxcode[j] = code << (16-j);
1844  code <<= 1;
1845  }
1846  h->maxcode[j] = 0xffffffff;
1847 
1848  // build non-spec acceleration table; 255 is flag for not-accelerated
1849  memset(h->fast, 255, 1 << FAST_BITS);
1850  for (i=0; i < k; ++i) {
1851  int s = h->size[i];
1852  if (s <= FAST_BITS) {
1853  int c = h->code[i] << (FAST_BITS-s);
1854  int m = 1 << (FAST_BITS-s);
1855  for (j=0; j < m; ++j) {
1856  h->fast[c+j] = (stbi_uc) i;
1857  }
1858  }
1859  }
1860  return 1;
1861 }
1862 
1863 // build a table that decodes both magnitude and value of small ACs in
1864 // one go.
1865 static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
1866 {
1867  int i;
1868  for (i=0; i < (1 << FAST_BITS); ++i) {
1869  stbi_uc fast = h->fast[i];
1870  fast_ac[i] = 0;
1871  if (fast < 255) {
1872  int rs = h->values[fast];
1873  int run = (rs >> 4) & 15;
1874  int magbits = rs & 15;
1875  int len = h->size[fast];
1876 
1877  if (magbits && len + magbits <= FAST_BITS) {
1878  // magnitude code followed by receive_extend code
1879  int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
1880  int m = 1 << (magbits - 1);
1881  if (k < m) k += (~0U << magbits) + 1;
1882  // if the result is small enough, we can fit it in fast_ac table
1883  if (k >= -128 && k <= 127)
1884  fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits));
1885  }
1886  }
1887  }
1888 }
1889 
1890 static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
1891 {
1892  do {
1893  unsigned int b = j->nomore ? 0 : stbi__get8(j->s);
1894  if (b == 0xff) {
1895  int c = stbi__get8(j->s);
1896  while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
1897  if (c != 0) {
1898  j->marker = (unsigned char) c;
1899  j->nomore = 1;
1900  return;
1901  }
1902  }
1903  j->code_buffer |= b << (24 - j->code_bits);
1904  j->code_bits += 8;
1905  } while (j->code_bits <= 24);
1906 }
1907 
1908 // (1 << n) - 1
1909 static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
1910 
1911 // decode a jpeg huffman value from the bitstream
1912 stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
1913 {
1914  unsigned int temp;
1915  int c,k;
1916 
1917  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1918 
1919  // look at the top FAST_BITS and determine what symbol ID it is,
1920  // if the code is <= FAST_BITS
1921  c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1922  k = h->fast[c];
1923  if (k < 255) {
1924  int s = h->size[k];
1925  if (s > j->code_bits)
1926  return -1;
1927  j->code_buffer <<= s;
1928  j->code_bits -= s;
1929  return h->values[k];
1930  }
1931 
1932  // naive test is to shift the code_buffer down so k bits are
1933  // valid, then test against maxcode. To speed this up, we've
1934  // preshifted maxcode left so that it has (16-k) 0s at the
1935  // end; in other words, regardless of the number of bits, it
1936  // wants to be compared against something shifted to have 16;
1937  // that way we don't need to shift inside the loop.
1938  temp = j->code_buffer >> 16;
1939  for (k=FAST_BITS+1 ; ; ++k)
1940  if (temp < h->maxcode[k])
1941  break;
1942  if (k == 17) {
1943  // error! code not found
1944  j->code_bits -= 16;
1945  return -1;
1946  }
1947 
1948  if (k > j->code_bits)
1949  return -1;
1950 
1951  // convert the huffman code to the symbol id
1952  c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
1953  STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
1954 
1955  // convert the id to a symbol
1956  j->code_bits -= k;
1957  j->code_buffer <<= k;
1958  return h->values[c];
1959 }
1960 
1961 // bias[n] = (-1<<n) + 1
1962 static const int stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
1963 
1964 // combined JPEG 'receive' and JPEG 'extend', since baseline
1965 // always extends everything it receives.
1966 stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
1967 {
1968  unsigned int k;
1969  int sgn;
1970  if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1971 
1972  sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
1973  k = stbi_lrot(j->code_buffer, n);
1974  STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask)));
1975  j->code_buffer = k & ~stbi__bmask[n];
1976  k &= stbi__bmask[n];
1977  j->code_bits -= n;
1978  return k + (stbi__jbias[n] & ~sgn);
1979 }
1980 
1981 // get some unsigned bits
1982 stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
1983 {
1984  unsigned int k;
1985  if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1986  k = stbi_lrot(j->code_buffer, n);
1987  j->code_buffer = k & ~stbi__bmask[n];
1988  k &= stbi__bmask[n];
1989  j->code_bits -= n;
1990  return k;
1991 }
1992 
1993 stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
1994 {
1995  unsigned int k;
1996  if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
1997  k = j->code_buffer;
1998  j->code_buffer <<= 1;
1999  --j->code_bits;
2000  return k & 0x80000000;
2001 }
2002 
2003 // given a value that's at position X in the zigzag stream,
2004 // where does it appear in the 8x8 matrix coded as row-major?
2005 static const stbi_uc stbi__jpeg_dezigzag[64+15] =
2006 {
2007  0, 1, 8, 16, 9, 2, 3, 10,
2008  17, 24, 32, 25, 18, 11, 4, 5,
2009  12, 19, 26, 33, 40, 48, 41, 34,
2010  27, 20, 13, 6, 7, 14, 21, 28,
2011  35, 42, 49, 56, 57, 50, 43, 36,
2012  29, 22, 15, 23, 30, 37, 44, 51,
2013  58, 59, 52, 45, 38, 31, 39, 46,
2014  53, 60, 61, 54, 47, 55, 62, 63,
2015  // let corrupt input sample past end
2016  63, 63, 63, 63, 63, 63, 63, 63,
2017  63, 63, 63, 63, 63, 63, 63
2018 };
2019 
2020 // decode one 64-entry block--
2021 static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
2022 {
2023  int diff,dc,k;
2024  int t;
2025 
2026  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2027  t = stbi__jpeg_huff_decode(j, hdc);
2028  if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2029 
2030  // 0 all the ac values now so we can do it 32-bits at a time
2031  memset(data,0,64*sizeof(data[0]));
2032 
2033  diff = t ? stbi__extend_receive(j, t) : 0;
2034  dc = j->img_comp[b].dc_pred + diff;
2035  j->img_comp[b].dc_pred = dc;
2036  data[0] = (short) (dc * dequant[0]);
2037 
2038  // decode AC components, see JPEG spec
2039  k = 1;
2040  do {
2041  unsigned int zig;
2042  int c,r,s;
2043  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2044  c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
2045  r = fac[c];
2046  if (r) { // fast-AC path
2047  k += (r >> 4) & 15; // run
2048  s = r & 15; // combined length
2049  j->code_buffer <<= s;
2050  j->code_bits -= s;
2051  // decode into unzigzag'd location
2052  zig = stbi__jpeg_dezigzag[k++];
2053  data[zig] = (short) ((r >> 8) * dequant[zig]);
2054  } else {
2055  int rs = stbi__jpeg_huff_decode(j, hac);
2056  if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2057  s = rs & 15;
2058  r = rs >> 4;
2059  if (s == 0) {
2060  if (rs != 0xf0) break; // end block
2061  k += 16;
2062  } else {
2063  k += r;
2064  // decode into unzigzag'd location
2065  zig = stbi__jpeg_dezigzag[k++];
2066  data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
2067  }
2068  }
2069  } while (k < 64);
2070  return 1;
2071 }
2072 
2073 static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
2074 {
2075  int diff,dc;
2076  int t;
2077  if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2078 
2079  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2080 
2081  if (j->succ_high == 0) {
2082  // first scan for DC coefficient, must be first
2083  memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
2084  t = stbi__jpeg_huff_decode(j, hdc);
2085  diff = t ? stbi__extend_receive(j, t) : 0;
2086 
2087  dc = j->img_comp[b].dc_pred + diff;
2088  j->img_comp[b].dc_pred = dc;
2089  data[0] = (short) (dc << j->succ_low);
2090  } else {
2091  // refinement scan for DC coefficient
2092  if (stbi__jpeg_get_bit(j))
2093  data[0] += (short) (1 << j->succ_low);
2094  }
2095  return 1;
2096 }
2097 
2098 // @OPTIMIZE: store non-zigzagged during the decode passes,
2099 // and only de-zigzag when dequantizing
2100 static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
2101 {
2102  int k;
2103  if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2104 
2105  if (j->succ_high == 0) {
2106  int shift = j->succ_low;
2107 
2108  if (j->eob_run) {
2109  --j->eob_run;
2110  return 1;
2111  }
2112 
2113  k = j->spec_start;
2114  do {
2115  unsigned int zig;
2116  int c,r,s;
2117  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2118  c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
2119  r = fac[c];
2120  if (r) { // fast-AC path
2121  k += (r >> 4) & 15; // run
2122  s = r & 15; // combined length
2123  j->code_buffer <<= s;
2124  j->code_bits -= s;
2125  zig = stbi__jpeg_dezigzag[k++];
2126  data[zig] = (short) ((r >> 8) << shift);
2127  } else {
2128  int rs = stbi__jpeg_huff_decode(j, hac);
2129  if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2130  s = rs & 15;
2131  r = rs >> 4;
2132  if (s == 0) {
2133  if (r < 15) {
2134  j->eob_run = (1 << r);
2135  if (r)
2136  j->eob_run += stbi__jpeg_get_bits(j, r);
2137  --j->eob_run;
2138  break;
2139  }
2140  k += 16;
2141  } else {
2142  k += r;
2143  zig = stbi__jpeg_dezigzag[k++];
2144  data[zig] = (short) (stbi__extend_receive(j,s) << shift);
2145  }
2146  }
2147  } while (k <= j->spec_end);
2148  } else {
2149  // refinement scan for these AC coefficients
2150 
2151  short bit = (short) (1 << j->succ_low);
2152 
2153  if (j->eob_run) {
2154  --j->eob_run;
2155  for (k = j->spec_start; k <= j->spec_end; ++k) {
2156  short *p = &data[stbi__jpeg_dezigzag[k]];
2157  if (*p != 0)
2158  if (stbi__jpeg_get_bit(j))
2159  if ((*p & bit)==0) {
2160  if (*p > 0)
2161  *p += bit;
2162  else
2163  *p -= bit;
2164  }
2165  }
2166  } else {
2167  k = j->spec_start;
2168  do {
2169  int r,s;
2170  int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
2171  if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2172  s = rs & 15;
2173  r = rs >> 4;
2174  if (s == 0) {
2175  if (r < 15) {
2176  j->eob_run = (1 << r) - 1;
2177  if (r)
2178  j->eob_run += stbi__jpeg_get_bits(j, r);
2179  r = 64; // force end of block
2180  } else {
2181  // r=15 s=0 should write 16 0s, so we just do
2182  // a run of 15 0s and then write s (which is 0),
2183  // so we don't have to do anything special here
2184  }
2185  } else {
2186  if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
2187  // sign bit
2188  if (stbi__jpeg_get_bit(j))
2189  s = bit;
2190  else
2191  s = -bit;
2192  }
2193 
2194  // advance by r
2195  while (k <= j->spec_end) {
2196  short *p = &data[stbi__jpeg_dezigzag[k++]];
2197  if (*p != 0) {
2198  if (stbi__jpeg_get_bit(j))
2199  if ((*p & bit)==0) {
2200  if (*p > 0)
2201  *p += bit;
2202  else
2203  *p -= bit;
2204  }
2205  } else {
2206  if (r == 0) {
2207  *p = (short) s;
2208  break;
2209  }
2210  --r;
2211  }
2212  }
2213  } while (k <= j->spec_end);
2214  }
2215  }
2216  return 1;
2217 }
2218 
2219 // take a -128..127 value and stbi__clamp it and convert to 0..255
2220 stbi_inline static stbi_uc stbi__clamp(int x)
2221 {
2222  // trick to use a single test to catch both cases
2223  if ((unsigned int) x > 255) {
2224  if (x < 0) return 0;
2225  if (x > 255) return 255;
2226  }
2227  return (stbi_uc) x;
2228 }
2229 
2230 #define stbi__f2f(x) ((int) (((x) * 4096 + 0.5)))
2231 #define stbi__fsh(x) ((x) * 4096)
2232 
2233 // derived from jidctint -- DCT_ISLOW
2234 #define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
2235  int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
2236  p2 = s2; \
2237  p3 = s6; \
2238  p1 = (p2+p3) * stbi__f2f(0.5411961f); \
2239  t2 = p1 + p3*stbi__f2f(-1.847759065f); \
2240  t3 = p1 + p2*stbi__f2f( 0.765366865f); \
2241  p2 = s0; \
2242  p3 = s4; \
2243  t0 = stbi__fsh(p2+p3); \
2244  t1 = stbi__fsh(p2-p3); \
2245  x0 = t0+t3; \
2246  x3 = t0-t3; \
2247  x1 = t1+t2; \
2248  x2 = t1-t2; \
2249  t0 = s7; \
2250  t1 = s5; \
2251  t2 = s3; \
2252  t3 = s1; \
2253  p3 = t0+t2; \
2254  p4 = t1+t3; \
2255  p1 = t0+t3; \
2256  p2 = t1+t2; \
2257  p5 = (p3+p4)*stbi__f2f( 1.175875602f); \
2258  t0 = t0*stbi__f2f( 0.298631336f); \
2259  t1 = t1*stbi__f2f( 2.053119869f); \
2260  t2 = t2*stbi__f2f( 3.072711026f); \
2261  t3 = t3*stbi__f2f( 1.501321110f); \
2262  p1 = p5 + p1*stbi__f2f(-0.899976223f); \
2263  p2 = p5 + p2*stbi__f2f(-2.562915447f); \
2264  p3 = p3*stbi__f2f(-1.961570560f); \
2265  p4 = p4*stbi__f2f(-0.390180644f); \
2266  t3 += p1+p4; \
2267  t2 += p2+p3; \
2268  t1 += p2+p4; \
2269  t0 += p1+p3;
2270 
2271 static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
2272 {
2273  int i,val[64],*v=val;
2274  stbi_uc *o;
2275  short *d = data;
2276 
2277  // columns
2278  for (i=0; i < 8; ++i,++d, ++v) {
2279  // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
2280  if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
2281  && d[40]==0 && d[48]==0 && d[56]==0) {
2282  // no shortcut 0 seconds
2283  // (1|2|3|4|5|6|7)==0 0 seconds
2284  // all separate -0.047 seconds
2285  // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds
2286  int dcterm = d[0]*4;
2287  v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
2288  } else {
2289  STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
2290  // constants scaled things up by 1<<12; let's bring them back
2291  // down, but keep 2 extra bits of precision
2292  x0 += 512; x1 += 512; x2 += 512; x3 += 512;
2293  v[ 0] = (x0+t3) >> 10;
2294  v[56] = (x0-t3) >> 10;
2295  v[ 8] = (x1+t2) >> 10;
2296  v[48] = (x1-t2) >> 10;
2297  v[16] = (x2+t1) >> 10;
2298  v[40] = (x2-t1) >> 10;
2299  v[24] = (x3+t0) >> 10;
2300  v[32] = (x3-t0) >> 10;
2301  }
2302  }
2303 
2304  for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
2305  // no fast case since the first 1D IDCT spread components out
2306  STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
2307  // constants scaled things up by 1<<12, plus we had 1<<2 from first
2308  // loop, plus horizontal and vertical each scale by sqrt(8) so together
2309  // we've got an extra 1<<3, so 1<<17 total we need to remove.
2310  // so we want to round that, which means adding 0.5 * 1<<17,
2311  // aka 65536. Also, we'll end up with -128 to 127 that we want
2312  // to encode as 0..255 by adding 128, so we'll add that before the shift
2313  x0 += 65536 + (128<<17);
2314  x1 += 65536 + (128<<17);
2315  x2 += 65536 + (128<<17);
2316  x3 += 65536 + (128<<17);
2317  // tried computing the shifts into temps, or'ing the temps to see
2318  // if any were out of range, but that was slower
2319  o[0] = stbi__clamp((x0+t3) >> 17);
2320  o[7] = stbi__clamp((x0-t3) >> 17);
2321  o[1] = stbi__clamp((x1+t2) >> 17);
2322  o[6] = stbi__clamp((x1-t2) >> 17);
2323  o[2] = stbi__clamp((x2+t1) >> 17);
2324  o[5] = stbi__clamp((x2-t1) >> 17);
2325  o[3] = stbi__clamp((x3+t0) >> 17);
2326  o[4] = stbi__clamp((x3-t0) >> 17);
2327  }
2328 }
2329 
2330 #ifdef STBI_SSE2
2331 // sse2 integer IDCT. not the fastest possible implementation but it
2332 // produces bit-identical results to the generic C version so it's
2333 // fully "transparent".
2334 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2335 {
2336  // This is constructed to match our regular (generic) integer IDCT exactly.
2337  __m128i row0, row1, row2, row3, row4, row5, row6, row7;
2338  __m128i tmp;
2339 
2340  // dot product constant: even elems=x, odd elems=y
2341  #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
2342 
2343  // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit)
2344  // out(1) = c1[even]*x + c1[odd]*y
2345  #define dct_rot(out0,out1, x,y,c0,c1) \
2346  __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
2347  __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
2348  __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
2349  __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
2350  __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
2351  __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
2352 
2353  // out = in << 12 (in 16-bit, out 32-bit)
2354  #define dct_widen(out, in) \
2355  __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
2356  __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
2357 
2358  // wide add
2359  #define dct_wadd(out, a, b) \
2360  __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
2361  __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
2362 
2363  // wide sub
2364  #define dct_wsub(out, a, b) \
2365  __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
2366  __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
2367 
2368  // butterfly a/b, add bias, then shift by "s" and pack
2369  #define dct_bfly32o(out0, out1, a,b,bias,s) \
2370  { \
2371  __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
2372  __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
2373  dct_wadd(sum, abiased, b); \
2374  dct_wsub(dif, abiased, b); \
2375  out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
2376  out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
2377  }
2378 
2379  // 8-bit interleave step (for transposes)
2380  #define dct_interleave8(a, b) \
2381  tmp = a; \
2382  a = _mm_unpacklo_epi8(a, b); \
2383  b = _mm_unpackhi_epi8(tmp, b)
2384 
2385  // 16-bit interleave step (for transposes)
2386  #define dct_interleave16(a, b) \
2387  tmp = a; \
2388  a = _mm_unpacklo_epi16(a, b); \
2389  b = _mm_unpackhi_epi16(tmp, b)
2390 
2391  #define dct_pass(bias,shift) \
2392  { \
2393  /* even part */ \
2394  dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
2395  __m128i sum04 = _mm_add_epi16(row0, row4); \
2396  __m128i dif04 = _mm_sub_epi16(row0, row4); \
2397  dct_widen(t0e, sum04); \
2398  dct_widen(t1e, dif04); \
2399  dct_wadd(x0, t0e, t3e); \
2400  dct_wsub(x3, t0e, t3e); \
2401  dct_wadd(x1, t1e, t2e); \
2402  dct_wsub(x2, t1e, t2e); \
2403  /* odd part */ \
2404  dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
2405  dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
2406  __m128i sum17 = _mm_add_epi16(row1, row7); \
2407  __m128i sum35 = _mm_add_epi16(row3, row5); \
2408  dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
2409  dct_wadd(x4, y0o, y4o); \
2410  dct_wadd(x5, y1o, y5o); \
2411  dct_wadd(x6, y2o, y5o); \
2412  dct_wadd(x7, y3o, y4o); \
2413  dct_bfly32o(row0,row7, x0,x7,bias,shift); \
2414  dct_bfly32o(row1,row6, x1,x6,bias,shift); \
2415  dct_bfly32o(row2,row5, x2,x5,bias,shift); \
2416  dct_bfly32o(row3,row4, x3,x4,bias,shift); \
2417  }
2418 
2419  __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
2420  __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
2421  __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
2422  __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
2423  __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
2424  __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
2425  __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
2426  __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
2427 
2428  // rounding biases in column/row passes, see stbi__idct_block for explanation.
2429  __m128i bias_0 = _mm_set1_epi32(512);
2430  __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
2431 
2432  // load
2433  row0 = _mm_load_si128((const __m128i *) (data + 0*8));
2434  row1 = _mm_load_si128((const __m128i *) (data + 1*8));
2435  row2 = _mm_load_si128((const __m128i *) (data + 2*8));
2436  row3 = _mm_load_si128((const __m128i *) (data + 3*8));
2437  row4 = _mm_load_si128((const __m128i *) (data + 4*8));
2438  row5 = _mm_load_si128((const __m128i *) (data + 5*8));
2439  row6 = _mm_load_si128((const __m128i *) (data + 6*8));
2440  row7 = _mm_load_si128((const __m128i *) (data + 7*8));
2441 
2442  // column pass
2443  dct_pass(bias_0, 10);
2444 
2445  {
2446  // 16bit 8x8 transpose pass 1
2447  dct_interleave16(row0, row4);
2448  dct_interleave16(row1, row5);
2449  dct_interleave16(row2, row6);
2450  dct_interleave16(row3, row7);
2451 
2452  // transpose pass 2
2453  dct_interleave16(row0, row2);
2454  dct_interleave16(row1, row3);
2455  dct_interleave16(row4, row6);
2456  dct_interleave16(row5, row7);
2457 
2458  // transpose pass 3
2459  dct_interleave16(row0, row1);
2460  dct_interleave16(row2, row3);
2461  dct_interleave16(row4, row5);
2462  dct_interleave16(row6, row7);
2463  }
2464 
2465  // row pass
2466  dct_pass(bias_1, 17);
2467 
2468  {
2469  // pack
2470  __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
2471  __m128i p1 = _mm_packus_epi16(row2, row3);
2472  __m128i p2 = _mm_packus_epi16(row4, row5);
2473  __m128i p3 = _mm_packus_epi16(row6, row7);
2474 
2475  // 8bit 8x8 transpose pass 1
2476  dct_interleave8(p0, p2); // a0e0a1e1...
2477  dct_interleave8(p1, p3); // c0g0c1g1...
2478 
2479  // transpose pass 2
2480  dct_interleave8(p0, p1); // a0c0e0g0...
2481  dct_interleave8(p2, p3); // b0d0f0h0...
2482 
2483  // transpose pass 3
2484  dct_interleave8(p0, p2); // a0b0c0d0...
2485  dct_interleave8(p1, p3); // a4b4c4d4...
2486 
2487  // store
2488  _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
2489  _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
2490  _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
2491  _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
2492  _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
2493  _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
2494  _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
2495  _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
2496  }
2497 
2498 #undef dct_const
2499 #undef dct_rot
2500 #undef dct_widen
2501 #undef dct_wadd
2502 #undef dct_wsub
2503 #undef dct_bfly32o
2504 #undef dct_interleave8
2505 #undef dct_interleave16
2506 #undef dct_pass
2507 }
2508 
2509 #endif // STBI_SSE2
2510 
2511 #ifdef STBI_NEON
2512 
2513 // NEON integer IDCT. should produce bit-identical
2514 // results to the generic C version.
2515 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2516 {
2517  int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
2518 
2519  int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
2520  int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
2521  int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
2522  int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
2523  int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
2524  int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
2525  int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
2526  int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
2527  int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
2528  int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
2529  int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
2530  int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
2531 
2532 #define dct_long_mul(out, inq, coeff) \
2533  int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
2534  int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
2535 
2536 #define dct_long_mac(out, acc, inq, coeff) \
2537  int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
2538  int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
2539 
2540 #define dct_widen(out, inq) \
2541  int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
2542  int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
2543 
2544 // wide add
2545 #define dct_wadd(out, a, b) \
2546  int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
2547  int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
2548 
2549 // wide sub
2550 #define dct_wsub(out, a, b) \
2551  int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
2552  int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
2553 
2554 // butterfly a/b, then shift using "shiftop" by "s" and pack
2555 #define dct_bfly32o(out0,out1, a,b,shiftop,s) \
2556  { \
2557  dct_wadd(sum, a, b); \
2558  dct_wsub(dif, a, b); \
2559  out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
2560  out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
2561  }
2562 
2563 #define dct_pass(shiftop, shift) \
2564  { \
2565  /* even part */ \
2566  int16x8_t sum26 = vaddq_s16(row2, row6); \
2567  dct_long_mul(p1e, sum26, rot0_0); \
2568  dct_long_mac(t2e, p1e, row6, rot0_1); \
2569  dct_long_mac(t3e, p1e, row2, rot0_2); \
2570  int16x8_t sum04 = vaddq_s16(row0, row4); \
2571  int16x8_t dif04 = vsubq_s16(row0, row4); \
2572  dct_widen(t0e, sum04); \
2573  dct_widen(t1e, dif04); \
2574  dct_wadd(x0, t0e, t3e); \
2575  dct_wsub(x3, t0e, t3e); \
2576  dct_wadd(x1, t1e, t2e); \
2577  dct_wsub(x2, t1e, t2e); \
2578  /* odd part */ \
2579  int16x8_t sum15 = vaddq_s16(row1, row5); \
2580  int16x8_t sum17 = vaddq_s16(row1, row7); \
2581  int16x8_t sum35 = vaddq_s16(row3, row5); \
2582  int16x8_t sum37 = vaddq_s16(row3, row7); \
2583  int16x8_t sumodd = vaddq_s16(sum17, sum35); \
2584  dct_long_mul(p5o, sumodd, rot1_0); \
2585  dct_long_mac(p1o, p5o, sum17, rot1_1); \
2586  dct_long_mac(p2o, p5o, sum35, rot1_2); \
2587  dct_long_mul(p3o, sum37, rot2_0); \
2588  dct_long_mul(p4o, sum15, rot2_1); \
2589  dct_wadd(sump13o, p1o, p3o); \
2590  dct_wadd(sump24o, p2o, p4o); \
2591  dct_wadd(sump23o, p2o, p3o); \
2592  dct_wadd(sump14o, p1o, p4o); \
2593  dct_long_mac(x4, sump13o, row7, rot3_0); \
2594  dct_long_mac(x5, sump24o, row5, rot3_1); \
2595  dct_long_mac(x6, sump23o, row3, rot3_2); \
2596  dct_long_mac(x7, sump14o, row1, rot3_3); \
2597  dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
2598  dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
2599  dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
2600  dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
2601  }
2602 
2603  // load
2604  row0 = vld1q_s16(data + 0*8);
2605  row1 = vld1q_s16(data + 1*8);
2606  row2 = vld1q_s16(data + 2*8);
2607  row3 = vld1q_s16(data + 3*8);
2608  row4 = vld1q_s16(data + 4*8);
2609  row5 = vld1q_s16(data + 5*8);
2610  row6 = vld1q_s16(data + 6*8);
2611  row7 = vld1q_s16(data + 7*8);
2612 
2613  // add DC bias
2614  row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
2615 
2616  // column pass
2617  dct_pass(vrshrn_n_s32, 10);
2618 
2619  // 16bit 8x8 transpose
2620  {
2621 // these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
2622 // whether compilers actually get this is another story, sadly.
2623 #define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
2624 #define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
2625 #define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
2626 
2627  // pass 1
2628  dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
2629  dct_trn16(row2, row3);
2630  dct_trn16(row4, row5);
2631  dct_trn16(row6, row7);
2632 
2633  // pass 2
2634  dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
2635  dct_trn32(row1, row3);
2636  dct_trn32(row4, row6);
2637  dct_trn32(row5, row7);
2638 
2639  // pass 3
2640  dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
2641  dct_trn64(row1, row5);
2642  dct_trn64(row2, row6);
2643  dct_trn64(row3, row7);
2644 
2645 #undef dct_trn16
2646 #undef dct_trn32
2647 #undef dct_trn64
2648  }
2649 
2650  // row pass
2651  // vrshrn_n_s32 only supports shifts up to 16, we need
2652  // 17. so do a non-rounding shift of 16 first then follow
2653  // up with a rounding shift by 1.
2654  dct_pass(vshrn_n_s32, 16);
2655 
2656  {
2657  // pack and round
2658  uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
2659  uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
2660  uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
2661  uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
2662  uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
2663  uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
2664  uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
2665  uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
2666 
2667  // again, these can translate into one instruction, but often don't.
2668 #define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
2669 #define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
2670 #define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
2671 
2672  // sadly can't use interleaved stores here since we only write
2673  // 8 bytes to each scan line!
2674 
2675  // 8x8 8-bit transpose pass 1
2676  dct_trn8_8(p0, p1);
2677  dct_trn8_8(p2, p3);
2678  dct_trn8_8(p4, p5);
2679  dct_trn8_8(p6, p7);
2680 
2681  // pass 2
2682  dct_trn8_16(p0, p2);
2683  dct_trn8_16(p1, p3);
2684  dct_trn8_16(p4, p6);
2685  dct_trn8_16(p5, p7);
2686 
2687  // pass 3
2688  dct_trn8_32(p0, p4);
2689  dct_trn8_32(p1, p5);
2690  dct_trn8_32(p2, p6);
2691  dct_trn8_32(p3, p7);
2692 
2693  // store
2694  vst1_u8(out, p0); out += out_stride;
2695  vst1_u8(out, p1); out += out_stride;
2696  vst1_u8(out, p2); out += out_stride;
2697  vst1_u8(out, p3); out += out_stride;
2698  vst1_u8(out, p4); out += out_stride;
2699  vst1_u8(out, p5); out += out_stride;
2700  vst1_u8(out, p6); out += out_stride;
2701  vst1_u8(out, p7);
2702 
2703 #undef dct_trn8_8
2704 #undef dct_trn8_16
2705 #undef dct_trn8_32
2706  }
2707 
2708 #undef dct_long_mul
2709 #undef dct_long_mac
2710 #undef dct_widen
2711 #undef dct_wadd
2712 #undef dct_wsub
2713 #undef dct_bfly32o
2714 #undef dct_pass
2715 }
2716 
2717 #endif // STBI_NEON
2718 
2719 #define STBI__MARKER_none 0xff
2720 // if there's a pending marker from the entropy stream, return that
2721 // otherwise, fetch from the stream and get a marker. if there's no
2722 // marker, return 0xff, which is never a valid marker value
2723 static stbi_uc stbi__get_marker(stbi__jpeg *j)
2724 {
2725  stbi_uc x;
2726  if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
2727  x = stbi__get8(j->s);
2728  if (x != 0xff) return STBI__MARKER_none;
2729  while (x == 0xff)
2730  x = stbi__get8(j->s); // consume repeated 0xff fill bytes
2731  return x;
2732 }
2733 
2734 // in each scan, we'll have scan_n components, and the order
2735 // of the components is specified by order[]
2736 #define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7)
2737 
2738 // after a restart interval, stbi__jpeg_reset the entropy decoder and
2739 // the dc prediction
2740 static void stbi__jpeg_reset(stbi__jpeg *j)
2741 {
2742  j->code_bits = 0;
2743  j->code_buffer = 0;
2744  j->nomore = 0;
2745  j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
2746  j->marker = STBI__MARKER_none;
2747  j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
2748  j->eob_run = 0;
2749  // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
2750  // since we don't even allow 1<<30 pixels
2751 }
2752 
2753 static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
2754 {
2755  stbi__jpeg_reset(z);
2756  if (!z->progressive) {
2757  if (z->scan_n == 1) {
2758  int i,j;
2759  STBI_SIMD_ALIGN(short, data[64]);
2760  int n = z->order[0];
2761  // non-interleaved data, we just need to process one block at a time,
2762  // in trivial scanline order
2763  // number of blocks to do just depends on how many actual "pixels" this
2764  // component has, independent of interleaved MCU blocking and such
2765  int w = (z->img_comp[n].x+7) >> 3;
2766  int h = (z->img_comp[n].y+7) >> 3;
2767  for (j=0; j < h; ++j) {
2768  for (i=0; i < w; ++i) {
2769  int ha = z->img_comp[n].ha;
2770  if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2771  z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2772  // every data block is an MCU, so countdown the restart interval
2773  if (--z->todo <= 0) {
2774  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2775  // if it's NOT a restart, then just bail, so we get corrupt data
2776  // rather than no data
2777  if (!STBI__RESTART(z->marker)) return 1;
2778  stbi__jpeg_reset(z);
2779  }
2780  }
2781  }
2782  return 1;
2783  } else { // interleaved
2784  int i,j,k,x,y;
2785  STBI_SIMD_ALIGN(short, data[64]);
2786  for (j=0; j < z->img_mcu_y; ++j) {
2787  for (i=0; i < z->img_mcu_x; ++i) {
2788  // scan an interleaved mcu... process scan_n components in order
2789  for (k=0; k < z->scan_n; ++k) {
2790  int n = z->order[k];
2791  // scan out an mcu's worth of this component; that's just determined
2792  // by the basic H and V specified for the component
2793  for (y=0; y < z->img_comp[n].v; ++y) {
2794  for (x=0; x < z->img_comp[n].h; ++x) {
2795  int x2 = (i*z->img_comp[n].h + x)*8;
2796  int y2 = (j*z->img_comp[n].v + y)*8;
2797  int ha = z->img_comp[n].ha;
2798  if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2799  z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
2800  }
2801  }
2802  }
2803  // after all interleaved components, that's an interleaved MCU,
2804  // so now count down the restart interval
2805  if (--z->todo <= 0) {
2806  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2807  if (!STBI__RESTART(z->marker)) return 1;
2808  stbi__jpeg_reset(z);
2809  }
2810  }
2811  }
2812  return 1;
2813  }
2814  } else {
2815  if (z->scan_n == 1) {
2816  int i,j;
2817  int n = z->order[0];
2818  // non-interleaved data, we just need to process one block at a time,
2819  // in trivial scanline order
2820  // number of blocks to do just depends on how many actual "pixels" this
2821  // component has, independent of interleaved MCU blocking and such
2822  int w = (z->img_comp[n].x+7) >> 3;
2823  int h = (z->img_comp[n].y+7) >> 3;
2824  for (j=0; j < h; ++j) {
2825  for (i=0; i < w; ++i) {
2826  short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2827  if (z->spec_start == 0) {
2828  if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2829  return 0;
2830  } else {
2831  int ha = z->img_comp[n].ha;
2832  if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
2833  return 0;
2834  }
2835  // every data block is an MCU, so countdown the restart interval
2836  if (--z->todo <= 0) {
2837  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2838  if (!STBI__RESTART(z->marker)) return 1;
2839  stbi__jpeg_reset(z);
2840  }
2841  }
2842  }
2843  return 1;
2844  } else { // interleaved
2845  int i,j,k,x,y;
2846  for (j=0; j < z->img_mcu_y; ++j) {
2847  for (i=0; i < z->img_mcu_x; ++i) {
2848  // scan an interleaved mcu... process scan_n components in order
2849  for (k=0; k < z->scan_n; ++k) {
2850  int n = z->order[k];
2851  // scan out an mcu's worth of this component; that's just determined
2852  // by the basic H and V specified for the component
2853  for (y=0; y < z->img_comp[n].v; ++y) {
2854  for (x=0; x < z->img_comp[n].h; ++x) {
2855  int x2 = (i*z->img_comp[n].h + x);
2856  int y2 = (j*z->img_comp[n].v + y);
2857  short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
2858  if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2859  return 0;
2860  }
2861  }
2862  }
2863  // after all interleaved components, that's an interleaved MCU,
2864  // so now count down the restart interval
2865  if (--z->todo <= 0) {
2866  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2867  if (!STBI__RESTART(z->marker)) return 1;
2868  stbi__jpeg_reset(z);
2869  }
2870  }
2871  }
2872  return 1;
2873  }
2874  }
2875 }
2876 
2877 static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
2878 {
2879  int i;
2880  for (i=0; i < 64; ++i)
2881  data[i] *= dequant[i];
2882 }
2883 
2884 static void stbi__jpeg_finish(stbi__jpeg *z)
2885 {
2886  if (z->progressive) {
2887  // dequantize and idct the data
2888  int i,j,n;
2889  for (n=0; n < z->s->img_n; ++n) {
2890  int w = (z->img_comp[n].x+7) >> 3;
2891  int h = (z->img_comp[n].y+7) >> 3;
2892  for (j=0; j < h; ++j) {
2893  for (i=0; i < w; ++i) {
2894  short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2895  stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
2896  z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2897  }
2898  }
2899  }
2900  }
2901 }
2902 
2903 static int stbi__process_marker(stbi__jpeg *z, int m)
2904 {
2905  int L;
2906  switch (m) {
2907  case STBI__MARKER_none: // no marker found
2908  return stbi__err("expected marker","Corrupt JPEG");
2909 
2910  case 0xDD: // DRI - specify restart interval
2911  if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
2912  z->restart_interval = stbi__get16be(z->s);
2913  return 1;
2914 
2915  case 0xDB: // DQT - define quantization table
2916  L = stbi__get16be(z->s)-2;
2917  while (L > 0) {
2918  int q = stbi__get8(z->s);
2919  int p = q >> 4, sixteen = (p != 0);
2920  int t = q & 15,i;
2921  if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
2922  if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
2923 
2924  for (i=0; i < 64; ++i)
2925  z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
2926  L -= (sixteen ? 129 : 65);
2927  }
2928  return L==0;
2929 
2930  case 0xC4: // DHT - define huffman table
2931  L = stbi__get16be(z->s)-2;
2932  while (L > 0) {
2933  stbi_uc *v;
2934  int sizes[16],i,n=0;
2935  int q = stbi__get8(z->s);
2936  int tc = q >> 4;
2937  int th = q & 15;
2938  if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
2939  for (i=0; i < 16; ++i) {
2940  sizes[i] = stbi__get8(z->s);
2941  n += sizes[i];
2942  }
2943  L -= 17;
2944  if (tc == 0) {
2945  if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
2946  v = z->huff_dc[th].values;
2947  } else {
2948  if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
2949  v = z->huff_ac[th].values;
2950  }
2951  for (i=0; i < n; ++i)
2952  v[i] = stbi__get8(z->s);
2953  if (tc != 0)
2954  stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
2955  L -= n;
2956  }
2957  return L==0;
2958  }
2959 
2960  // check for comment block or APP blocks
2961  if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
2962  L = stbi__get16be(z->s);
2963  if (L < 2) {
2964  if (m == 0xFE)
2965  return stbi__err("bad COM len","Corrupt JPEG");
2966  else
2967  return stbi__err("bad APP len","Corrupt JPEG");
2968  }
2969  L -= 2;
2970 
2971  if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
2972  static const unsigned char tag[5] = {'J','F','I','F','\0'};
2973  int ok = 1;
2974  int i;
2975  for (i=0; i < 5; ++i)
2976  if (stbi__get8(z->s) != tag[i])
2977  ok = 0;
2978  L -= 5;
2979  if (ok)
2980  z->jfif = 1;
2981  } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
2982  static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
2983  int ok = 1;
2984  int i;
2985  for (i=0; i < 6; ++i)
2986  if (stbi__get8(z->s) != tag[i])
2987  ok = 0;
2988  L -= 6;
2989  if (ok) {
2990  stbi__get8(z->s); // version
2991  stbi__get16be(z->s); // flags0
2992  stbi__get16be(z->s); // flags1
2993  z->app14_color_transform = stbi__get8(z->s); // color transform
2994  L -= 6;
2995  }
2996  }
2997 
2998  stbi__skip(z->s, L);
2999  return 1;
3000  }
3001 
3002  return stbi__err("unknown marker","Corrupt JPEG");
3003 }
3004 
3005 // after we see SOS
3006 static int stbi__process_scan_header(stbi__jpeg *z)
3007 {
3008  int i;
3009  int Ls = stbi__get16be(z->s);
3010  z->scan_n = stbi__get8(z->s);
3011  if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
3012  if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
3013  for (i=0; i < z->scan_n; ++i) {
3014  int id = stbi__get8(z->s), which;
3015  int q = stbi__get8(z->s);
3016  for (which = 0; which < z->s->img_n; ++which)
3017  if (z->img_comp[which].id == id)
3018  break;
3019  if (which == z->s->img_n) return 0; // no match
3020  z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
3021  z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
3022  z->order[i] = which;
3023  }
3024 
3025  {
3026  int aa;
3027  z->spec_start = stbi__get8(z->s);
3028  z->spec_end = stbi__get8(z->s); // should be 63, but might be 0
3029  aa = stbi__get8(z->s);
3030  z->succ_high = (aa >> 4);
3031  z->succ_low = (aa & 15);
3032  if (z->progressive) {
3033  if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
3034  return stbi__err("bad SOS", "Corrupt JPEG");
3035  } else {
3036  if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
3037  if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
3038  z->spec_end = 63;
3039  }
3040  }
3041 
3042  return 1;
3043 }
3044 
3045 static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
3046 {
3047  int i;
3048  for (i=0; i < ncomp; ++i) {
3049  if (z->img_comp[i].raw_data) {
3050  STBI_FREE(z->img_comp[i].raw_data);
3051  z->img_comp[i].raw_data = NULL;
3052  z->img_comp[i].data = NULL;
3053  }
3054  if (z->img_comp[i].raw_coeff) {
3055  STBI_FREE(z->img_comp[i].raw_coeff);
3056  z->img_comp[i].raw_coeff = 0;
3057  z->img_comp[i].coeff = 0;
3058  }
3059  if (z->img_comp[i].linebuf) {
3060  STBI_FREE(z->img_comp[i].linebuf);
3061  z->img_comp[i].linebuf = NULL;
3062  }
3063  }
3064  return why;
3065 }
3066 
3067 static int stbi__process_frame_header(stbi__jpeg *z, int scan)
3068 {
3069  stbi__context *s = z->s;
3070  int Lf,p,i,q, h_max=1,v_max=1,c;
3071  Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
3072  p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
3073  s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
3074  s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
3075  c = stbi__get8(s);
3076  if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
3077  s->img_n = c;
3078  for (i=0; i < c; ++i) {
3079  z->img_comp[i].data = NULL;
3080  z->img_comp[i].linebuf = NULL;
3081  }
3082 
3083  if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
3084 
3085  z->rgb = 0;
3086  for (i=0; i < s->img_n; ++i) {
3087  static const unsigned char rgb[3] = { 'R', 'G', 'B' };
3088  z->img_comp[i].id = stbi__get8(s);
3089  if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
3090  ++z->rgb;
3091  q = stbi__get8(s);
3092  z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
3093  z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
3094  z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
3095  }
3096 
3097  if (scan != STBI__SCAN_load) return 1;
3098 
3099  if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
3100 
3101  for (i=0; i < s->img_n; ++i) {
3102  if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
3103  if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
3104  }
3105 
3106  // compute interleaved mcu info
3107  z->img_h_max = h_max;
3108  z->img_v_max = v_max;
3109  z->img_mcu_w = h_max * 8;
3110  z->img_mcu_h = v_max * 8;
3111  // these sizes can't be more than 17 bits
3112  z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
3113  z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
3114 
3115  for (i=0; i < s->img_n; ++i) {
3116  // number of effective pixels (e.g. for non-interleaved MCU)
3117  z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
3118  z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
3119  // to simplify generation, we'll allocate enough memory to decode
3120  // the bogus oversized data from using interleaved MCUs and their
3121  // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
3122  // discard the extra data until colorspace conversion
3123  //
3124  // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
3125  // so these muls can't overflow with 32-bit ints (which we require)
3126  z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
3127  z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
3128  z->img_comp[i].coeff = 0;
3129  z->img_comp[i].raw_coeff = 0;
3130  z->img_comp[i].linebuf = NULL;
3131  z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
3132  if (z->img_comp[i].raw_data == NULL)
3133  return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3134  // align blocks for idct using mmx/sse
3135  z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
3136  if (z->progressive) {
3137  // w2, h2 are multiples of 8 (see above)
3138  z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
3139  z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
3140  z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
3141  if (z->img_comp[i].raw_coeff == NULL)
3142  return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3143  z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
3144  }
3145  }
3146 
3147  return 1;
3148 }
3149 
3150 // use comparisons since in some cases we handle more than one case (e.g. SOF)
3151 #define stbi__DNL(x) ((x) == 0xdc)
3152 #define stbi__SOI(x) ((x) == 0xd8)
3153 #define stbi__EOI(x) ((x) == 0xd9)
3154 #define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
3155 #define stbi__SOS(x) ((x) == 0xda)
3156 
3157 #define stbi__SOF_progressive(x) ((x) == 0xc2)
3158 
3159 static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
3160 {
3161  int m;
3162  z->jfif = 0;
3163  z->app14_color_transform = -1; // valid values are 0,1,2
3164  z->marker = STBI__MARKER_none; // initialize cached marker to empty
3165  m = stbi__get_marker(z);
3166  if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
3167  if (scan == STBI__SCAN_type) return 1;
3168  m = stbi__get_marker(z);
3169  while (!stbi__SOF(m)) {
3170  if (!stbi__process_marker(z,m)) return 0;
3171  m = stbi__get_marker(z);
3172  while (m == STBI__MARKER_none) {
3173  // some files have extra padding after their blocks, so ok, we'll scan
3174  if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
3175  m = stbi__get_marker(z);
3176  }
3177  }
3178  z->progressive = stbi__SOF_progressive(m);
3179  if (!stbi__process_frame_header(z, scan)) return 0;
3180  return 1;
3181 }
3182 
3183 // decode image to YCbCr format
3184 static int stbi__decode_jpeg_image(stbi__jpeg *j)
3185 {
3186  int m;
3187  for (m = 0; m < 4; m++) {
3188  j->img_comp[m].raw_data = NULL;
3189  j->img_comp[m].raw_coeff = NULL;
3190  }
3191  j->restart_interval = 0;
3192  if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
3193  m = stbi__get_marker(j);
3194  while (!stbi__EOI(m)) {
3195  if (stbi__SOS(m)) {
3196  if (!stbi__process_scan_header(j)) return 0;
3197  if (!stbi__parse_entropy_coded_data(j)) return 0;
3198  if (j->marker == STBI__MARKER_none ) {
3199  // handle 0s at the end of image data from IP Kamera 9060
3200  while (!stbi__at_eof(j->s)) {
3201  int x = stbi__get8(j->s);
3202  if (x == 255) {
3203  j->marker = stbi__get8(j->s);
3204  break;
3205  }
3206  }
3207  // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
3208  }
3209  } else if (stbi__DNL(m)) {
3210  int Ld = stbi__get16be(j->s);
3211  stbi__uint32 NL = stbi__get16be(j->s);
3212  if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG");
3213  if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG");
3214  } else {
3215  if (!stbi__process_marker(j, m)) return 0;
3216  }
3217  m = stbi__get_marker(j);
3218  }
3219  if (j->progressive)
3220  stbi__jpeg_finish(j);
3221  return 1;
3222 }
3223 
3224 // static jfif-centered resampling (across block boundaries)
3225 
3226 typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
3227  int w, int hs);
3228 
3229 #define stbi__div4(x) ((stbi_uc) ((x) >> 2))
3230 
3231 static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3232 {
3233  STBI_NOTUSED(out);
3234  STBI_NOTUSED(in_far);
3235  STBI_NOTUSED(w);
3236  STBI_NOTUSED(hs);
3237  return in_near;
3238 }
3239 
3240 static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3241 {
3242  // need to generate two samples vertically for every one in input
3243  int i;
3244  STBI_NOTUSED(hs);
3245  for (i=0; i < w; ++i)
3246  out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
3247  return out;
3248 }
3249 
3250 static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3251 {
3252  // need to generate two samples horizontally for every one in input
3253  int i;
3254  stbi_uc *input = in_near;
3255 
3256  if (w == 1) {
3257  // if only one sample, can't do any interpolation
3258  out[0] = out[1] = input[0];
3259  return out;
3260  }
3261 
3262  out[0] = input[0];
3263  out[1] = stbi__div4(input[0]*3 + input[1] + 2);
3264  for (i=1; i < w-1; ++i) {
3265  int n = 3*input[i]+2;
3266  out[i*2+0] = stbi__div4(n+input[i-1]);
3267  out[i*2+1] = stbi__div4(n+input[i+1]);
3268  }
3269  out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
3270  out[i*2+1] = input[w-1];
3271 
3272  STBI_NOTUSED(in_far);
3273  STBI_NOTUSED(hs);
3274 
3275  return out;
3276 }
3277 
3278 #define stbi__div16(x) ((stbi_uc) ((x) >> 4))
3279 
3280 static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3281 {
3282  // need to generate 2x2 samples for every one in input
3283  int i,t0,t1;
3284  if (w == 1) {
3285  out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3286  return out;
3287  }
3288 
3289  t1 = 3*in_near[0] + in_far[0];
3290  out[0] = stbi__div4(t1+2);
3291  for (i=1; i < w; ++i) {
3292  t0 = t1;
3293  t1 = 3*in_near[i]+in_far[i];
3294  out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3295  out[i*2 ] = stbi__div16(3*t1 + t0 + 8);
3296  }
3297  out[w*2-1] = stbi__div4(t1+2);
3298 
3299  STBI_NOTUSED(hs);
3300 
3301  return out;
3302 }
3303 
3304 #if defined(STBI_SSE2) || defined(STBI_NEON)
3305 static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3306 {
3307  // need to generate 2x2 samples for every one in input
3308  int i=0,t0,t1;
3309 
3310  if (w == 1) {
3311  out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3312  return out;
3313  }
3314 
3315  t1 = 3*in_near[0] + in_far[0];
3316  // process groups of 8 pixels for as long as we can.
3317  // note we can't handle the last pixel in a row in this loop
3318  // because we need to handle the filter boundary conditions.
3319  for (; i < ((w-1) & ~7); i += 8) {
3320 #if defined(STBI_SSE2)
3321  // load and perform the vertical filtering pass
3322  // this uses 3*x + y = 4*x + (y - x)
3323  __m128i zero = _mm_setzero_si128();
3324  __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i));
3325  __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
3326  __m128i farw = _mm_unpacklo_epi8(farb, zero);
3327  __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
3328  __m128i diff = _mm_sub_epi16(farw, nearw);
3329  __m128i nears = _mm_slli_epi16(nearw, 2);
3330  __m128i curr = _mm_add_epi16(nears, diff); // current row
3331 
3332  // horizontal filter works the same based on shifted vers of current
3333  // row. "prev" is current row shifted right by 1 pixel; we need to
3334  // insert the previous pixel value (from t1).
3335  // "next" is current row shifted left by 1 pixel, with first pixel
3336  // of next block of 8 pixels added in.
3337  __m128i prv0 = _mm_slli_si128(curr, 2);
3338  __m128i nxt0 = _mm_srli_si128(curr, 2);
3339  __m128i prev = _mm_insert_epi16(prv0, t1, 0);
3340  __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
3341 
3342  // horizontal filter, polyphase implementation since it's convenient:
3343  // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3344  // odd pixels = 3*cur + next = cur*4 + (next - cur)
3345  // note the shared term.
3346  __m128i bias = _mm_set1_epi16(8);
3347  __m128i curs = _mm_slli_epi16(curr, 2);
3348  __m128i prvd = _mm_sub_epi16(prev, curr);
3349  __m128i nxtd = _mm_sub_epi16(next, curr);
3350  __m128i curb = _mm_add_epi16(curs, bias);
3351  __m128i even = _mm_add_epi16(prvd, curb);
3352  __m128i odd = _mm_add_epi16(nxtd, curb);
3353 
3354  // interleave even and odd pixels, then undo scaling.
3355  __m128i int0 = _mm_unpacklo_epi16(even, odd);
3356  __m128i int1 = _mm_unpackhi_epi16(even, odd);
3357  __m128i de0 = _mm_srli_epi16(int0, 4);
3358  __m128i de1 = _mm_srli_epi16(int1, 4);
3359 
3360  // pack and write output
3361  __m128i outv = _mm_packus_epi16(de0, de1);
3362  _mm_storeu_si128((__m128i *) (out + i*2), outv);
3363 #elif defined(STBI_NEON)
3364  // load and perform the vertical filtering pass
3365  // this uses 3*x + y = 4*x + (y - x)
3366  uint8x8_t farb = vld1_u8(in_far + i);
3367  uint8x8_t nearb = vld1_u8(in_near + i);
3368  int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
3369  int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
3370  int16x8_t curr = vaddq_s16(nears, diff); // current row
3371 
3372  // horizontal filter works the same based on shifted vers of current
3373  // row. "prev" is current row shifted right by 1 pixel; we need to
3374  // insert the previous pixel value (from t1).
3375  // "next" is current row shifted left by 1 pixel, with first pixel
3376  // of next block of 8 pixels added in.
3377  int16x8_t prv0 = vextq_s16(curr, curr, 7);
3378  int16x8_t nxt0 = vextq_s16(curr, curr, 1);
3379  int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
3380  int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
3381 
3382  // horizontal filter, polyphase implementation since it's convenient:
3383  // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3384  // odd pixels = 3*cur + next = cur*4 + (next - cur)
3385  // note the shared term.
3386  int16x8_t curs = vshlq_n_s16(curr, 2);
3387  int16x8_t prvd = vsubq_s16(prev, curr);
3388  int16x8_t nxtd = vsubq_s16(next, curr);
3389  int16x8_t even = vaddq_s16(curs, prvd);
3390  int16x8_t odd = vaddq_s16(curs, nxtd);
3391 
3392  // undo scaling and round, then store with even/odd phases interleaved
3393  uint8x8x2_t o;
3394  o.val[0] = vqrshrun_n_s16(even, 4);
3395  o.val[1] = vqrshrun_n_s16(odd, 4);
3396  vst2_u8(out + i*2, o);
3397 #endif
3398 
3399  // "previous" value for next iter
3400  t1 = 3*in_near[i+7] + in_far[i+7];
3401  }
3402 
3403  t0 = t1;
3404  t1 = 3*in_near[i] + in_far[i];
3405  out[i*2] = stbi__div16(3*t1 + t0 + 8);
3406 
3407  for (++i; i < w; ++i) {
3408  t0 = t1;
3409  t1 = 3*in_near[i]+in_far[i];
3410  out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3411  out[i*2 ] = stbi__div16(3*t1 + t0 + 8);
3412  }
3413  out[w*2-1] = stbi__div4(t1+2);
3414 
3415  STBI_NOTUSED(hs);
3416 
3417  return out;
3418 }
3419 #endif
3420 
3421 static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3422 {
3423  // resample with nearest-neighbor
3424  int i,j;
3425  STBI_NOTUSED(in_far);
3426  for (i=0; i < w; ++i)
3427  for (j=0; j < hs; ++j)
3428  out[i*hs+j] = in_near[i];
3429  return out;
3430 }
3431 
3432 // this is a reduced-precision calculation of YCbCr-to-RGB introduced
3433 // to make sure the code produces the same results in both SIMD and scalar
3434 #define stbi__float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8)
3435 static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
3436 {
3437  int i;
3438  for (i=0; i < count; ++i) {
3439  int y_fixed = (y[i] << 20) + (1<<19); // rounding
3440  int r,g,b;
3441  int cr = pcr[i] - 128;
3442  int cb = pcb[i] - 128;
3443  r = y_fixed + cr* stbi__float2fixed(1.40200f);
3444  g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3445  b = y_fixed + cb* stbi__float2fixed(1.77200f);
3446  r >>= 20;
3447  g >>= 20;
3448  b >>= 20;
3449  if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3450  if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3451  if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3452  out[0] = (stbi_uc)r;
3453  out[1] = (stbi_uc)g;
3454  out[2] = (stbi_uc)b;
3455  out[3] = 255;
3456  out += step;
3457  }
3458 }
3459 
3460 #if defined(STBI_SSE2) || defined(STBI_NEON)
3461 static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
3462 {
3463  int i = 0;
3464 
3465 #ifdef STBI_SSE2
3466  // step == 3 is pretty ugly on the final interleave, and i'm not convinced
3467  // it's useful in practice (you wouldn't use it for textures, for example).
3468  // so just accelerate step == 4 case.
3469  if (step == 4) {
3470  // this is a fairly straightforward implementation and not super-optimized.
3471  __m128i signflip = _mm_set1_epi8(-0x80);
3472  __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f));
3473  __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
3474  __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
3475  __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f));
3476  __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
3477  __m128i xw = _mm_set1_epi16(255); // alpha channel
3478 
3479  for (; i+7 < count; i += 8) {
3480  // load
3481  __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
3482  __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
3483  __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
3484  __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
3485  __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
3486 
3487  // unpack to short (and left-shift cr, cb by 8)
3488  __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes);
3489  __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
3490  __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
3491 
3492  // color transform
3493  __m128i yws = _mm_srli_epi16(yw, 4);
3494  __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
3495  __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
3496  __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
3497  __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
3498  __m128i rws = _mm_add_epi16(cr0, yws);
3499  __m128i gwt = _mm_add_epi16(cb0, yws);
3500  __m128i bws = _mm_add_epi16(yws, cb1);
3501  __m128i gws = _mm_add_epi16(gwt, cr1);
3502 
3503  // descale
3504  __m128i rw = _mm_srai_epi16(rws, 4);
3505  __m128i bw = _mm_srai_epi16(bws, 4);
3506  __m128i gw = _mm_srai_epi16(gws, 4);
3507 
3508  // back to byte, set up for transpose
3509  __m128i brb = _mm_packus_epi16(rw, bw);
3510  __m128i gxb = _mm_packus_epi16(gw, xw);
3511 
3512  // transpose to interleave channels
3513  __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
3514  __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
3515  __m128i o0 = _mm_unpacklo_epi16(t0, t1);
3516  __m128i o1 = _mm_unpackhi_epi16(t0, t1);
3517 
3518  // store
3519  _mm_storeu_si128((__m128i *) (out + 0), o0);
3520  _mm_storeu_si128((__m128i *) (out + 16), o1);
3521  out += 32;
3522  }
3523  }
3524 #endif
3525 
3526 #ifdef STBI_NEON
3527  // in this version, step=3 support would be easy to add. but is there demand?
3528  if (step == 4) {
3529  // this is a fairly straightforward implementation and not super-optimized.
3530  uint8x8_t signflip = vdup_n_u8(0x80);
3531  int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f));
3532  int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
3533  int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
3534  int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f));
3535 
3536  for (; i+7 < count; i += 8) {
3537  // load
3538  uint8x8_t y_bytes = vld1_u8(y + i);
3539  uint8x8_t cr_bytes = vld1_u8(pcr + i);
3540  uint8x8_t cb_bytes = vld1_u8(pcb + i);
3541  int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
3542  int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
3543 
3544  // expand to s16
3545  int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
3546  int16x8_t crw = vshll_n_s8(cr_biased, 7);
3547  int16x8_t cbw = vshll_n_s8(cb_biased, 7);
3548 
3549  // color transform
3550  int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
3551  int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
3552  int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
3553  int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
3554  int16x8_t rws = vaddq_s16(yws, cr0);
3555  int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
3556  int16x8_t bws = vaddq_s16(yws, cb1);
3557 
3558  // undo scaling, round, convert to byte
3559  uint8x8x4_t o;
3560  o.val[0] = vqrshrun_n_s16(rws, 4);
3561  o.val[1] = vqrshrun_n_s16(gws, 4);
3562  o.val[2] = vqrshrun_n_s16(bws, 4);
3563  o.val[3] = vdup_n_u8(255);
3564 
3565  // store, interleaving r/g/b/a
3566  vst4_u8(out, o);
3567  out += 8*4;
3568  }
3569  }
3570 #endif
3571 
3572  for (; i < count; ++i) {
3573  int y_fixed = (y[i] << 20) + (1<<19); // rounding
3574  int r,g,b;
3575  int cr = pcr[i] - 128;
3576  int cb = pcb[i] - 128;
3577  r = y_fixed + cr* stbi__float2fixed(1.40200f);
3578  g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3579  b = y_fixed + cb* stbi__float2fixed(1.77200f);
3580  r >>= 20;
3581  g >>= 20;
3582  b >>= 20;
3583  if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3584  if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3585  if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3586  out[0] = (stbi_uc)r;
3587  out[1] = (stbi_uc)g;
3588  out[2] = (stbi_uc)b;
3589  out[3] = 255;
3590  out += step;
3591  }
3592 }
3593 #endif
3594 
3595 // set up the kernels
3596 static void stbi__setup_jpeg(stbi__jpeg *j)
3597 {
3598  j->idct_block_kernel = stbi__idct_block;
3599  j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
3600  j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
3601 
3602 #ifdef STBI_SSE2
3603  if (stbi__sse2_available()) {
3604  j->idct_block_kernel = stbi__idct_simd;
3605  j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3606  j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3607  }
3608 #endif
3609 
3610 #ifdef STBI_NEON
3611  j->idct_block_kernel = stbi__idct_simd;
3612  j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3613  j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3614 #endif
3615 }
3616 
3617 // clean up the temporary component buffers
3618 static void stbi__cleanup_jpeg(stbi__jpeg *j)
3619 {
3620  stbi__free_jpeg_components(j, j->s->img_n, 0);
3621 }
3622 
3623 typedef struct
3624 {
3625  resample_row_func resample;
3626  stbi_uc *line0,*line1;
3627  int hs,vs; // expansion factor in each axis
3628  int w_lores; // horizontal pixels pre-expansion
3629  int ystep; // how far through vertical expansion we are
3630  int ypos; // which pre-expansion row we're on
3631 } stbi__resample;
3632 
3633 // fast 0..255 * 0..255 => 0..255 rounded multiplication
3634 static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
3635 {
3636  unsigned int t = x*y + 128;
3637  return (stbi_uc) ((t + (t >>8)) >> 8);
3638 }
3639 
3640 static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
3641 {
3642  int n, decode_n, is_rgb;
3643  z->s->img_n = 0; // make stbi__cleanup_jpeg safe
3644 
3645  // validate req_comp
3646  if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
3647 
3648  // load a jpeg image from whichever source, but leave in YCbCr format
3649  if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
3650 
3651  // determine actual number of components to generate
3652  n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
3653 
3654  is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
3655 
3656  if (z->s->img_n == 3 && n < 3 && !is_rgb)
3657  decode_n = 1;
3658  else
3659  decode_n = z->s->img_n;
3660 
3661  // resample and color-convert
3662  {
3663  int k;
3664  unsigned int i,j;
3665  stbi_uc *output;
3666  stbi_uc *coutput[4] = { NULL, NULL, NULL, NULL };
3667 
3668  stbi__resample res_comp[4];
3669 
3670  for (k=0; k < decode_n; ++k) {
3671  stbi__resample *r = &res_comp[k];
3672 
3673  // allocate line buffer big enough for upsampling off the edges
3674  // with upsample factor of 4
3675  z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
3676  if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3677 
3678  r->hs = z->img_h_max / z->img_comp[k].h;
3679  r->vs = z->img_v_max / z->img_comp[k].v;
3680  r->ystep = r->vs >> 1;
3681  r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
3682  r->ypos = 0;
3683  r->line0 = r->line1 = z->img_comp[k].data;
3684 
3685  if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
3686  else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
3687  else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
3688  else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
3689  else r->resample = stbi__resample_row_generic;
3690  }
3691 
3692  // can't error after this so, this is safe
3693  output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
3694  if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3695 
3696  // now go ahead and resample
3697  for (j=0; j < z->s->img_y; ++j) {
3698  stbi_uc *out = output + n * z->s->img_x * j;
3699  for (k=0; k < decode_n; ++k) {
3700  stbi__resample *r = &res_comp[k];
3701  int y_bot = r->ystep >= (r->vs >> 1);
3702  coutput[k] = r->resample(z->img_comp[k].linebuf,
3703  y_bot ? r->line1 : r->line0,
3704  y_bot ? r->line0 : r->line1,
3705  r->w_lores, r->hs);
3706  if (++r->ystep >= r->vs) {
3707  r->ystep = 0;
3708  r->line0 = r->line1;
3709  if (++r->ypos < z->img_comp[k].y)
3710  r->line1 += z->img_comp[k].w2;
3711  }
3712  }
3713  if (n >= 3) {
3714  stbi_uc *y = coutput[0];
3715  if (z->s->img_n == 3) {
3716  if (is_rgb) {
3717  for (i=0; i < z->s->img_x; ++i) {
3718  out[0] = y[i];
3719  out[1] = coutput[1][i];
3720  out[2] = coutput[2][i];
3721  out[3] = 255;
3722  out += n;
3723  }
3724  } else {
3725  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3726  }
3727  } else if (z->s->img_n == 4) {
3728  if (z->app14_color_transform == 0) { // CMYK
3729  for (i=0; i < z->s->img_x; ++i) {
3730  stbi_uc m = coutput[3][i];
3731  out[0] = stbi__blinn_8x8(coutput[0][i], m);
3732  out[1] = stbi__blinn_8x8(coutput[1][i], m);
3733  out[2] = stbi__blinn_8x8(coutput[2][i], m);
3734  out[3] = 255;
3735  out += n;
3736  }
3737  } else if (z->app14_color_transform == 2) { // YCCK
3738  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3739  for (i=0; i < z->s->img_x; ++i) {
3740  stbi_uc m = coutput[3][i];
3741  out[0] = stbi__blinn_8x8(255 - out[0], m);
3742  out[1] = stbi__blinn_8x8(255 - out[1], m);
3743  out[2] = stbi__blinn_8x8(255 - out[2], m);
3744  out += n;
3745  }
3746  } else { // YCbCr + alpha? Ignore the fourth channel for now
3747  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3748  }
3749  } else
3750  for (i=0; i < z->s->img_x; ++i) {
3751  out[0] = out[1] = out[2] = y[i];
3752  out[3] = 255; // not used if n==3
3753  out += n;
3754  }
3755  } else {
3756  if (is_rgb) {
3757  if (n == 1)
3758  for (i=0; i < z->s->img_x; ++i)
3759  *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3760  else {
3761  for (i=0; i < z->s->img_x; ++i, out += 2) {
3762  out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3763  out[1] = 255;
3764  }
3765  }
3766  } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
3767  for (i=0; i < z->s->img_x; ++i) {
3768  stbi_uc m = coutput[3][i];
3769  stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
3770  stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
3771  stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
3772  out[0] = stbi__compute_y(r, g, b);
3773  out[1] = 255;
3774  out += n;
3775  }
3776  } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
3777  for (i=0; i < z->s->img_x; ++i) {
3778  out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
3779  out[1] = 255;
3780  out += n;
3781  }
3782  } else {
3783  stbi_uc *y = coutput[0];
3784  if (n == 1)
3785  for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
3786  else
3787  for (i=0; i < z->s->img_x; ++i) { *out++ = y[i]; *out++ = 255; }
3788  }
3789  }
3790  }
3791  stbi__cleanup_jpeg(z);
3792  *out_x = z->s->img_x;
3793  *out_y = z->s->img_y;
3794  if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
3795  return output;
3796  }
3797 }
3798 
3799 static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
3800 {
3801  unsigned char* result;
3802  stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
3803  STBI_NOTUSED(ri);
3804  j->s = s;
3805  stbi__setup_jpeg(j);
3806  result = load_jpeg_image(j, x,y,comp,req_comp);
3807  STBI_FREE(j);
3808  return result;
3809 }
3810 
3811 static int stbi__jpeg_test(stbi__context *s)
3812 {
3813  int r;
3814  stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
3815  j->s = s;
3816  stbi__setup_jpeg(j);
3817  r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
3818  stbi__rewind(s);
3819  STBI_FREE(j);
3820  return r;
3821 }
3822 
3823 static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
3824 {
3825  if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
3826  stbi__rewind( j->s );
3827  return 0;
3828  }
3829  if (x) *x = j->s->img_x;
3830  if (y) *y = j->s->img_y;
3831  if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
3832  return 1;
3833 }
3834 
3835 static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
3836 {
3837  int result;
3838  stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
3839  j->s = s;
3840  result = stbi__jpeg_info_raw(j, x, y, comp);
3841  STBI_FREE(j);
3842  return result;
3843 }
3844 #endif
3845 
3846 // public domain zlib decode v0.2 Sean Barrett 2006-11-18
3847 // simple implementation
3848 // - all input must be provided in an upfront buffer
3849 // - all output is written to a single output buffer (can malloc/realloc)
3850 // performance
3851 // - fast huffman
3852 
3853 #ifndef STBI_NO_ZLIB
3854 
3855 // fast-way is faster to check than jpeg huffman, but slow way is slower
3856 #define STBI__ZFAST_BITS 9 // accelerate all cases in default tables
3857 #define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1)
3858 
3859 // zlib-style huffman encoding
3860 // (jpegs packs from left, zlib from right, so can't share code)
3861 typedef struct
3862 {
3863  stbi__uint16 fast[1 << STBI__ZFAST_BITS];
3864  stbi__uint16 firstcode[16];
3865  int maxcode[17];
3866  stbi__uint16 firstsymbol[16];
3867  stbi_uc size[288];
3868  stbi__uint16 value[288];
3869 } stbi__zhuffman;
3870 
3871 stbi_inline static int stbi__bitreverse16(int n)
3872 {
3873  n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
3874  n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
3875  n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
3876  n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
3877  return n;
3878 }
3879 
3880 stbi_inline static int stbi__bit_reverse(int v, int bits)
3881 {
3882  STBI_ASSERT(bits <= 16);
3883  // to bit reverse n bits, reverse 16 and shift
3884  // e.g. 11 bits, bit reverse and shift away 5
3885  return stbi__bitreverse16(v) >> (16-bits);
3886 }
3887 
3888 static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
3889 {
3890  int i,k=0;
3891  int code, next_code[16], sizes[17];
3892 
3893  // DEFLATE spec for generating codes
3894  memset(sizes, 0, sizeof(sizes));
3895  memset(z->fast, 0, sizeof(z->fast));
3896  for (i=0; i < num; ++i)
3897  ++sizes[sizelist[i]];
3898  sizes[0] = 0;
3899  for (i=1; i < 16; ++i)
3900  if (sizes[i] > (1 << i))
3901  return stbi__err("bad sizes", "Corrupt PNG");
3902  code = 0;
3903  for (i=1; i < 16; ++i) {
3904  next_code[i] = code;
3905  z->firstcode[i] = (stbi__uint16) code;
3906  z->firstsymbol[i] = (stbi__uint16) k;
3907  code = (code + sizes[i]);
3908  if (sizes[i])
3909  if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
3910  z->maxcode[i] = code << (16-i); // preshift for inner loop
3911  code <<= 1;
3912  k += sizes[i];
3913  }
3914  z->maxcode[16] = 0x10000; // sentinel
3915  for (i=0; i < num; ++i) {
3916  int s = sizelist[i];
3917  if (s) {
3918  int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
3919  stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
3920  z->size [c] = (stbi_uc ) s;
3921  z->value[c] = (stbi__uint16) i;
3922  if (s <= STBI__ZFAST_BITS) {
3923  int j = stbi__bit_reverse(next_code[s],s);
3924  while (j < (1 << STBI__ZFAST_BITS)) {
3925  z->fast[j] = fastv;
3926  j += (1 << s);
3927  }
3928  }
3929  ++next_code[s];
3930  }
3931  }
3932  return 1;
3933 }
3934 
3935 // zlib-from-memory implementation for PNG reading
3936 // because PNG allows splitting the zlib stream arbitrarily,
3937 // and it's annoying structurally to have PNG call ZLIB call PNG,
3938 // we require PNG read all the IDATs and combine them into a single
3939 // memory buffer
3940 
3941 typedef struct
3942 {
3943  stbi_uc *zbuffer, *zbuffer_end;
3944  int num_bits;
3945  stbi__uint32 code_buffer;
3946 
3947  char *zout;
3948  char *zout_start;
3949  char *zout_end;
3950  int z_expandable;
3951 
3952  stbi__zhuffman z_length, z_distance;
3953 } stbi__zbuf;
3954 
3955 stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
3956 {
3957  if (z->zbuffer >= z->zbuffer_end) return 0;
3958  return *z->zbuffer++;
3959 }
3960 
3961 static void stbi__fill_bits(stbi__zbuf *z)
3962 {
3963  do {
3964  STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
3965  z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
3966  z->num_bits += 8;
3967  } while (z->num_bits <= 24);
3968 }
3969 
3970 stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
3971 {
3972  unsigned int k;
3973  if (z->num_bits < n) stbi__fill_bits(z);
3974  k = z->code_buffer & ((1 << n) - 1);
3975  z->code_buffer >>= n;
3976  z->num_bits -= n;
3977  return k;
3978 }
3979 
3980 static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
3981 {
3982  int b,s,k;
3983  // not resolved by fast table, so compute it the slow way
3984  // use jpeg approach, which requires MSbits at top
3985  k = stbi__bit_reverse(a->code_buffer, 16);
3986  for (s=STBI__ZFAST_BITS+1; ; ++s)
3987  if (k < z->maxcode[s])
3988  break;
3989  if (s == 16) return -1; // invalid code!
3990  // code size is s, so:
3991  b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
3992  STBI_ASSERT(z->size[b] == s);
3993  a->code_buffer >>= s;
3994  a->num_bits -= s;
3995  return z->value[b];
3996 }
3997 
3998 stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
3999 {
4000  int b,s;
4001  if (a->num_bits < 16) stbi__fill_bits(a);
4002  b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
4003  if (b) {
4004  s = b >> 9;
4005  a->code_buffer >>= s;
4006  a->num_bits -= s;
4007  return b & 511;
4008  }
4009  return stbi__zhuffman_decode_slowpath(a, z);
4010 }
4011 
4012 static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes
4013 {
4014  char *q;
4015  int cur, limit, old_limit;
4016  z->zout = zout;
4017  if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
4018  cur = (int) (z->zout - z->zout_start);
4019  limit = old_limit = (int) (z->zout_end - z->zout_start);
4020  while (cur + n > limit)
4021  limit *= 2;
4022  q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
4023  STBI_NOTUSED(old_limit);
4024  if (q == NULL) return stbi__err("outofmem", "Out of memory");
4025  z->zout_start = q;
4026  z->zout = q + cur;
4027  z->zout_end = q + limit;
4028  return 1;
4029 }
4030 
4031 static const int stbi__zlength_base[31] = {
4032  3,4,5,6,7,8,9,10,11,13,
4033  15,17,19,23,27,31,35,43,51,59,
4034  67,83,99,115,131,163,195,227,258,0,0 };
4035 
4036 static const int stbi__zlength_extra[31]=
4037 { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
4038 
4039 static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
4040 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
4041 
4042 static const int stbi__zdist_extra[32] =
4043 { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
4044 
4045 static int stbi__parse_huffman_block(stbi__zbuf *a)
4046 {
4047  char *zout = a->zout;
4048  for(;;) {
4049  int z = stbi__zhuffman_decode(a, &a->z_length);
4050  if (z < 256) {
4051  if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
4052  if (zout >= a->zout_end) {
4053  if (!stbi__zexpand(a, zout, 1)) return 0;
4054  zout = a->zout;
4055  }
4056  *zout++ = (char) z;
4057  } else {
4058  stbi_uc *p;
4059  int len,dist;
4060  if (z == 256) {
4061  a->zout = zout;
4062  return 1;
4063  }
4064  z -= 257;
4065  len = stbi__zlength_base[z];
4066  if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
4067  z = stbi__zhuffman_decode(a, &a->z_distance);
4068  if (z < 0) return stbi__err("bad huffman code","Corrupt PNG");
4069  dist = stbi__zdist_base[z];
4070  if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
4071  if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
4072  if (zout + len > a->zout_end) {
4073  if (!stbi__zexpand(a, zout, len)) return 0;
4074  zout = a->zout;
4075  }
4076  p = (stbi_uc *) (zout - dist);
4077  if (dist == 1) { // run of one byte; common in images.
4078  stbi_uc v = *p;
4079  if (len) { do *zout++ = v; while (--len); }
4080  } else {
4081  if (len) { do *zout++ = *p++; while (--len); }
4082  }
4083  }
4084  }
4085 }
4086 
4087 static int stbi__compute_huffman_codes(stbi__zbuf *a)
4088 {
4089  static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
4090  stbi__zhuffman z_codelength;
4091  stbi_uc lencodes[286+32+137];//padding for maximum single op
4092  stbi_uc codelength_sizes[19];
4093  int i,n;
4094 
4095  int hlit = stbi__zreceive(a,5) + 257;
4096  int hdist = stbi__zreceive(a,5) + 1;
4097  int hclen = stbi__zreceive(a,4) + 4;
4098  int ntot = hlit + hdist;
4099 
4100  memset(codelength_sizes, 0, sizeof(codelength_sizes));
4101  for (i=0; i < hclen; ++i) {
4102  int s = stbi__zreceive(a,3);
4103  codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
4104  }
4105  if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
4106 
4107  n = 0;
4108  while (n < ntot) {
4109  int c = stbi__zhuffman_decode(a, &z_codelength);
4110  if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
4111  if (c < 16)
4112  lencodes[n++] = (stbi_uc) c;
4113  else {
4114  stbi_uc fill = 0;
4115  if (c == 16) {
4116  c = stbi__zreceive(a,2)+3;
4117  if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
4118  fill = lencodes[n-1];
4119  } else if (c == 17)
4120  c = stbi__zreceive(a,3)+3;
4121  else {
4122  STBI_ASSERT(c == 18);
4123  c = stbi__zreceive(a,7)+11;
4124  }
4125  if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
4126  memset(lencodes+n, fill, c);
4127  n += c;
4128  }
4129  }
4130  if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
4131  if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
4132  if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
4133  return 1;
4134 }
4135 
4136 static int stbi__parse_uncompressed_block(stbi__zbuf *a)
4137 {
4138  stbi_uc header[4];
4139  int len,nlen,k;
4140  if (a->num_bits & 7)
4141  stbi__zreceive(a, a->num_bits & 7); // discard
4142  // drain the bit-packed data into header
4143  k = 0;
4144  while (a->num_bits > 0) {
4145  header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
4146  a->code_buffer >>= 8;
4147  a->num_bits -= 8;
4148  }
4149  STBI_ASSERT(a->num_bits == 0);
4150  // now fill header the normal way
4151  while (k < 4)
4152  header[k++] = stbi__zget8(a);
4153  len = header[1] * 256 + header[0];
4154  nlen = header[3] * 256 + header[2];
4155  if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
4156  if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
4157  if (a->zout + len > a->zout_end)
4158  if (!stbi__zexpand(a, a->zout, len)) return 0;
4159  memcpy(a->zout, a->zbuffer, len);
4160  a->zbuffer += len;
4161  a->zout += len;
4162  return 1;
4163 }
4164 
4165 static int stbi__parse_zlib_header(stbi__zbuf *a)
4166 {
4167  int cmf = stbi__zget8(a);
4168  int cm = cmf & 15;
4169  /* int cinfo = cmf >> 4; */
4170  int flg = stbi__zget8(a);
4171  if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
4172  if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
4173  if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
4174  // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
4175  return 1;
4176 }
4177 
4178 static const stbi_uc stbi__zdefault_length[288] =
4179 {
4180  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4181  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4182  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4183  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4184  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4185  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4186  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4187  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4188  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
4189 };
4190 static const stbi_uc stbi__zdefault_distance[32] =
4191 {
4192  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
4193 };
4194 /*
4195 Init algorithm:
4196 {
4197  int i; // use <= to match clearly with spec
4198  for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8;
4199  for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9;
4200  for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7;
4201  for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8;
4202 
4203  for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5;
4204 }
4205 */
4206 
4207 static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
4208 {
4209  int final, type;
4210  if (parse_header)
4211  if (!stbi__parse_zlib_header(a)) return 0;
4212  a->num_bits = 0;
4213  a->code_buffer = 0;
4214  do {
4215  final = stbi__zreceive(a,1);
4216  type = stbi__zreceive(a,2);
4217  if (type == 0) {
4218  if (!stbi__parse_uncompressed_block(a)) return 0;
4219  } else if (type == 3) {
4220  return 0;
4221  } else {
4222  if (type == 1) {
4223  // use fixed code lengths
4224  if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , 288)) return 0;
4225  if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0;
4226  } else {
4227  if (!stbi__compute_huffman_codes(a)) return 0;
4228  }
4229  if (!stbi__parse_huffman_block(a)) return 0;
4230  }
4231  } while (!final);
4232  return 1;
4233 }
4234 
4235 static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
4236 {
4237  a->zout_start = obuf;
4238  a->zout = obuf;
4239  a->zout_end = obuf + olen;
4240  a->z_expandable = exp;
4241 
4242  return stbi__parse_zlib(a, parse_header);
4243 }
4244 
4245 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
4246 {
4247  stbi__zbuf a;
4248  char *p = (char *) stbi__malloc(initial_size);
4249  if (p == NULL) return NULL;
4250  a.zbuffer = (stbi_uc *) buffer;
4251  a.zbuffer_end = (stbi_uc *) buffer + len;
4252  if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
4253  if (outlen) *outlen = (int) (a.zout - a.zout_start);
4254  return a.zout_start;
4255  } else {
4256  STBI_FREE(a.zout_start);
4257  return NULL;
4258  }
4259 }
4260 
4261 STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
4262 {
4263  return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
4264 }
4265 
4266 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
4267 {
4268  stbi__zbuf a;
4269  char *p = (char *) stbi__malloc(initial_size);
4270  if (p == NULL) return NULL;
4271  a.zbuffer = (stbi_uc *) buffer;
4272  a.zbuffer_end = (stbi_uc *) buffer + len;
4273  if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
4274  if (outlen) *outlen = (int) (a.zout - a.zout_start);
4275  return a.zout_start;
4276  } else {
4277  STBI_FREE(a.zout_start);
4278  return NULL;
4279  }
4280 }
4281 
4282 STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
4283 {
4284  stbi__zbuf a;
4285  a.zbuffer = (stbi_uc *) ibuffer;
4286  a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4287  if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
4288  return (int) (a.zout - a.zout_start);
4289  else
4290  return -1;
4291 }
4292 
4293 STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
4294 {
4295  stbi__zbuf a;
4296  char *p = (char *) stbi__malloc(16384);
4297  if (p == NULL) return NULL;
4298  a.zbuffer = (stbi_uc *) buffer;
4299  a.zbuffer_end = (stbi_uc *) buffer+len;
4300  if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
4301  if (outlen) *outlen = (int) (a.zout - a.zout_start);
4302  return a.zout_start;
4303  } else {
4304  STBI_FREE(a.zout_start);
4305  return NULL;
4306  }
4307 }
4308 
4309 STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
4310 {
4311  stbi__zbuf a;
4312  a.zbuffer = (stbi_uc *) ibuffer;
4313  a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4314  if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
4315  return (int) (a.zout - a.zout_start);
4316  else
4317  return -1;
4318 }
4319 #endif
4320 
4321 // public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18
4322 // simple implementation
4323 // - only 8-bit samples
4324 // - no CRC checking
4325 // - allocates lots of intermediate memory
4326 // - avoids problem of streaming data between subsystems
4327 // - avoids explicit window management
4328 // performance
4329 // - uses stb_zlib, a PD zlib implementation with fast huffman decoding
4330 
4331 #ifndef STBI_NO_PNG
4332 typedef struct
4333 {
4334  stbi__uint32 length;
4335  stbi__uint32 type;
4336 } stbi__pngchunk;
4337 
4338 static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
4339 {
4340  stbi__pngchunk c;
4341  c.length = stbi__get32be(s);
4342  c.type = stbi__get32be(s);
4343  return c;
4344 }
4345 
4346 static int stbi__check_png_header(stbi__context *s)
4347 {
4348  static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
4349  int i;
4350  for (i=0; i < 8; ++i)
4351  if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
4352  return 1;
4353 }
4354 
4355 typedef struct
4356 {
4357  stbi__context *s;
4358  stbi_uc *idata, *expanded, *out;
4359  int depth;
4360 } stbi__png;
4361 
4362 
4363 enum {
4364  STBI__F_none=0,
4365  STBI__F_sub=1,
4366  STBI__F_up=2,
4367  STBI__F_avg=3,
4368  STBI__F_paeth=4,
4369  // synthetic filters used for first scanline to avoid needing a dummy row of 0s
4370  STBI__F_avg_first,
4371  STBI__F_paeth_first
4372 };
4373 
4374 static stbi_uc first_row_filter[5] =
4375 {
4376  STBI__F_none,
4377  STBI__F_sub,
4378  STBI__F_none,
4379  STBI__F_avg_first,
4380  STBI__F_paeth_first
4381 };
4382 
4383 static int stbi__paeth(int a, int b, int c)
4384 {
4385  int p = a + b - c;
4386  int pa = abs(p-a);
4387  int pb = abs(p-b);
4388  int pc = abs(p-c);
4389  if (pa <= pb && pa <= pc) return a;
4390  if (pb <= pc) return b;
4391  return c;
4392 }
4393 
4394 static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
4395 
4396 // create the png data from post-deflated data
4397 static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
4398 {
4399  int bytes = (depth == 16? 2 : 1);
4400  stbi__context *s = a->s;
4401  stbi__uint32 i,j,stride = x*out_n*bytes;
4402  stbi__uint32 img_len, img_width_bytes;
4403  int k;
4404  int img_n = s->img_n; // copy it into a local for later
4405 
4406  int output_bytes = out_n*bytes;
4407  int filter_bytes = img_n*bytes;
4408  int width = x;
4409 
4410  STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
4411  a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
4412  if (!a->out) return stbi__err("outofmem", "Out of memory");
4413 
4414  if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG");
4415  img_width_bytes = (((img_n * x * depth) + 7) >> 3);
4416  img_len = (img_width_bytes + 1) * y;
4417 
4418  // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
4419  // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
4420  // so just check for raw_len < img_len always.
4421  if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
4422 
4423  for (j=0; j < y; ++j) {
4424  stbi_uc *cur = a->out + stride*j;
4425  stbi_uc *prior;
4426  int filter = *raw++;
4427 
4428  if (filter > 4)
4429  return stbi__err("invalid filter","Corrupt PNG");
4430 
4431  if (depth < 8) {
4432  STBI_ASSERT(img_width_bytes <= x);
4433  cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
4434  filter_bytes = 1;
4435  width = img_width_bytes;
4436  }
4437  prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above
4438 
4439  // if first row, use special filter that doesn't sample previous row
4440  if (j == 0) filter = first_row_filter[filter];
4441 
4442  // handle first byte explicitly
4443  for (k=0; k < filter_bytes; ++k) {
4444  switch (filter) {
4445  case STBI__F_none : cur[k] = raw[k]; break;
4446  case STBI__F_sub : cur[k] = raw[k]; break;
4447  case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
4448  case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break;
4449  case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break;
4450  case STBI__F_avg_first : cur[k] = raw[k]; break;
4451  case STBI__F_paeth_first: cur[k] = raw[k]; break;
4452  }
4453  }
4454 
4455  if (depth == 8) {
4456  if (img_n != out_n)
4457  cur[img_n] = 255; // first pixel
4458  raw += img_n;
4459  cur += out_n;
4460  prior += out_n;
4461  } else if (depth == 16) {
4462  if (img_n != out_n) {
4463  cur[filter_bytes] = 255; // first pixel top byte
4464  cur[filter_bytes+1] = 255; // first pixel bottom byte
4465  }
4466  raw += filter_bytes;
4467  cur += output_bytes;
4468  prior += output_bytes;
4469  } else {
4470  raw += 1;
4471  cur += 1;
4472  prior += 1;
4473  }
4474 
4475  // this is a little gross, so that we don't switch per-pixel or per-component
4476  if (depth < 8 || img_n == out_n) {
4477  int nk = (width - 1)*filter_bytes;
4478  #define STBI__CASE(f) \
4479  case f: \
4480  for (k=0; k < nk; ++k)
4481  switch (filter) {
4482  // "none" filter turns into a memcpy here; make that explicit.
4483  case STBI__F_none: memcpy(cur, raw, nk); break;
4484  STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break;
4485  STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4486  STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break;
4487  STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break;
4488  STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break;
4489  STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break;
4490  }
4491  #undef STBI__CASE
4492  raw += nk;
4493  } else {
4494  STBI_ASSERT(img_n+1 == out_n);
4495  #define STBI__CASE(f) \
4496  case f: \
4497  for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
4498  for (k=0; k < filter_bytes; ++k)
4499  switch (filter) {
4500  STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break;
4501  STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break;
4502  STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4503  STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break;
4504  STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break;
4505  STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break;
4506  STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break;
4507  }
4508  #undef STBI__CASE
4509 
4510  // the loop above sets the high byte of the pixels' alpha, but for
4511  // 16 bit png files we also need the low byte set. we'll do that here.
4512  if (depth == 16) {
4513  cur = a->out + stride*j; // start at the beginning of the row again
4514  for (i=0; i < x; ++i,cur+=output_bytes) {
4515  cur[filter_bytes+1] = 255;
4516  }
4517  }
4518  }
4519  }
4520 
4521  // we make a separate pass to expand bits to pixels; for performance,
4522  // this could run two scanlines behind the above code, so it won't
4523  // intefere with filtering but will still be in the cache.
4524  if (depth < 8) {
4525  for (j=0; j < y; ++j) {
4526  stbi_uc *cur = a->out + stride*j;
4527  stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes;
4528  // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
4529  // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
4530  stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
4531 
4532  // note that the final byte might overshoot and write more data than desired.
4533  // we can allocate enough data that this never writes out of memory, but it
4534  // could also overwrite the next scanline. can it overwrite non-empty data
4535  // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
4536  // so we need to explicitly clamp the final ones
4537 
4538  if (depth == 4) {
4539  for (k=x*img_n; k >= 2; k-=2, ++in) {
4540  *cur++ = scale * ((*in >> 4) );
4541  *cur++ = scale * ((*in ) & 0x0f);
4542  }
4543  if (k > 0) *cur++ = scale * ((*in >> 4) );
4544  } else if (depth == 2) {
4545  for (k=x*img_n; k >= 4; k-=4, ++in) {
4546  *cur++ = scale * ((*in >> 6) );
4547  *cur++ = scale * ((*in >> 4) & 0x03);
4548  *cur++ = scale * ((*in >> 2) & 0x03);
4549  *cur++ = scale * ((*in ) & 0x03);
4550  }
4551  if (k > 0) *cur++ = scale * ((*in >> 6) );
4552  if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
4553  if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
4554  } else if (depth == 1) {
4555  for (k=x*img_n; k >= 8; k-=8, ++in) {
4556  *cur++ = scale * ((*in >> 7) );
4557  *cur++ = scale * ((*in >> 6) & 0x01);
4558  *cur++ = scale * ((*in >> 5) & 0x01);
4559  *cur++ = scale * ((*in >> 4) & 0x01);
4560  *cur++ = scale * ((*in >> 3) & 0x01);
4561  *cur++ = scale * ((*in >> 2) & 0x01);
4562  *cur++ = scale * ((*in >> 1) & 0x01);
4563  *cur++ = scale * ((*in ) & 0x01);
4564  }
4565  if (k > 0) *cur++ = scale * ((*in >> 7) );
4566  if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
4567  if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
4568  if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
4569  if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
4570  if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
4571  if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
4572  }
4573  if (img_n != out_n) {
4574  int q;
4575  // insert alpha = 255
4576  cur = a->out + stride*j;
4577  if (img_n == 1) {
4578  for (q=x-1; q >= 0; --q) {
4579  cur[q*2+1] = 255;
4580  cur[q*2+0] = cur[q];
4581  }
4582  } else {
4583  STBI_ASSERT(img_n == 3);
4584  for (q=x-1; q >= 0; --q) {
4585  cur[q*4+3] = 255;
4586  cur[q*4+2] = cur[q*3+2];
4587  cur[q*4+1] = cur[q*3+1];
4588  cur[q*4+0] = cur[q*3+0];
4589  }
4590  }
4591  }
4592  }
4593  } else if (depth == 16) {
4594  // force the image data from big-endian to platform-native.
4595  // this is done in a separate pass due to the decoding relying
4596  // on the data being untouched, but could probably be done
4597  // per-line during decode if care is taken.
4598  stbi_uc *cur = a->out;
4599  stbi__uint16 *cur16 = (stbi__uint16*)cur;
4600 
4601  for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) {
4602  *cur16 = (cur[0] << 8) | cur[1];
4603  }
4604  }
4605 
4606  return 1;
4607 }
4608 
4609 static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
4610 {
4611  int bytes = (depth == 16 ? 2 : 1);
4612  int out_bytes = out_n * bytes;
4613  stbi_uc *final;
4614  int p;
4615  if (!interlaced)
4616  return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
4617 
4618  // de-interlacing
4619  final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
4620  for (p=0; p < 7; ++p) {
4621  int xorig[] = { 0,4,0,2,0,1,0 };
4622  int yorig[] = { 0,0,4,0,2,0,1 };
4623  int xspc[] = { 8,8,4,4,2,2,1 };
4624  int yspc[] = { 8,8,8,4,4,2,2 };
4625  int i,j,x,y;
4626  // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
4627  x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
4628  y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
4629  if (x && y) {
4630  stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
4631  if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
4632  STBI_FREE(final);
4633  return 0;
4634  }
4635  for (j=0; j < y; ++j) {
4636  for (i=0; i < x; ++i) {
4637  int out_y = j*yspc[p]+yorig[p];
4638  int out_x = i*xspc[p]+xorig[p];
4639  memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
4640  a->out + (j*x+i)*out_bytes, out_bytes);
4641  }
4642  }
4643  STBI_FREE(a->out);
4644  image_data += img_len;
4645  image_data_len -= img_len;
4646  }
4647  }
4648  a->out = final;
4649 
4650  return 1;
4651 }
4652 
4653 static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
4654 {
4655  stbi__context *s = z->s;
4656  stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4657  stbi_uc *p = z->out;
4658 
4659  // compute color-based transparency, assuming we've
4660  // already got 255 as the alpha value in the output
4661  STBI_ASSERT(out_n == 2 || out_n == 4);
4662 
4663  if (out_n == 2) {
4664  for (i=0; i < pixel_count; ++i) {
4665  p[1] = (p[0] == tc[0] ? 0 : 255);
4666  p += 2;
4667  }
4668  } else {
4669  for (i=0; i < pixel_count; ++i) {
4670  if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4671  p[3] = 0;
4672  p += 4;
4673  }
4674  }
4675  return 1;
4676 }
4677 
4678 static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
4679 {
4680  stbi__context *s = z->s;
4681  stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4682  stbi__uint16 *p = (stbi__uint16*) z->out;
4683 
4684  // compute color-based transparency, assuming we've
4685  // already got 65535 as the alpha value in the output
4686  STBI_ASSERT(out_n == 2 || out_n == 4);
4687 
4688  if (out_n == 2) {
4689  for (i = 0; i < pixel_count; ++i) {
4690  p[1] = (p[0] == tc[0] ? 0 : 65535);
4691  p += 2;
4692  }
4693  } else {
4694  for (i = 0; i < pixel_count; ++i) {
4695  if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4696  p[3] = 0;
4697  p += 4;
4698  }
4699  }
4700  return 1;
4701 }
4702 
4703 static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
4704 {
4705  stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
4706  stbi_uc *p, *temp_out, *orig = a->out;
4707 
4708  p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
4709  if (p == NULL) return stbi__err("outofmem", "Out of memory");
4710 
4711  // between here and free(out) below, exitting would leak
4712  temp_out = p;
4713 
4714  if (pal_img_n == 3) {
4715  for (i=0; i < pixel_count; ++i) {
4716  int n = orig[i]*4;
4717  p[0] = palette[n ];
4718  p[1] = palette[n+1];
4719  p[2] = palette[n+2];
4720  p += 3;
4721  }
4722  } else {
4723  for (i=0; i < pixel_count; ++i) {
4724  int n = orig[i]*4;
4725  p[0] = palette[n ];
4726  p[1] = palette[n+1];
4727  p[2] = palette[n+2];
4728  p[3] = palette[n+3];
4729  p += 4;
4730  }
4731  }
4732  STBI_FREE(a->out);
4733  a->out = temp_out;
4734 
4735  STBI_NOTUSED(len);
4736 
4737  return 1;
4738 }
4739 
4740 static int stbi__unpremultiply_on_load = 0;
4741 static int stbi__de_iphone_flag = 0;
4742 
4743 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
4744 {
4745  stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
4746 }
4747 
4748 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
4749 {
4750  stbi__de_iphone_flag = flag_true_if_should_convert;
4751 }
4752 
4753 static void stbi__de_iphone(stbi__png *z)
4754 {
4755  stbi__context *s = z->s;
4756  stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4757  stbi_uc *p = z->out;
4758 
4759  if (s->img_out_n == 3) { // convert bgr to rgb
4760  for (i=0; i < pixel_count; ++i) {
4761  stbi_uc t = p[0];
4762  p[0] = p[2];
4763  p[2] = t;
4764  p += 3;
4765  }
4766  } else {
4767  STBI_ASSERT(s->img_out_n == 4);
4768  if (stbi__unpremultiply_on_load) {
4769  // convert bgr to rgb and unpremultiply
4770  for (i=0; i < pixel_count; ++i) {
4771  stbi_uc a = p[3];
4772  stbi_uc t = p[0];
4773  if (a) {
4774  stbi_uc half = a / 2;
4775  p[0] = (p[2] * 255 + half) / a;
4776  p[1] = (p[1] * 255 + half) / a;
4777  p[2] = ( t * 255 + half) / a;
4778  } else {
4779  p[0] = p[2];
4780  p[2] = t;
4781  }
4782  p += 4;
4783  }
4784  } else {
4785  // convert bgr to rgb
4786  for (i=0; i < pixel_count; ++i) {
4787  stbi_uc t = p[0];
4788  p[0] = p[2];
4789  p[2] = t;
4790  p += 4;
4791  }
4792  }
4793  }
4794 }
4795 
4796 #define STBI__PNG_TYPE(a,b,c,d) (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
4797 
4798 static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
4799 {
4800  stbi_uc palette[1024], pal_img_n=0;
4801  stbi_uc has_trans=0, tc[3]={0};
4802  stbi__uint16 tc16[3];
4803  stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
4804  int first=1,k,interlace=0, color=0, is_iphone=0;
4805  stbi__context *s = z->s;
4806 
4807  z->expanded = NULL;
4808  z->idata = NULL;
4809  z->out = NULL;
4810 
4811  if (!stbi__check_png_header(s)) return 0;
4812 
4813  if (scan == STBI__SCAN_type) return 1;
4814 
4815  for (;;) {
4816  stbi__pngchunk c = stbi__get_chunk_header(s);
4817  switch (c.type) {
4818  case STBI__PNG_TYPE('C','g','B','I'):
4819  is_iphone = 1;
4820  stbi__skip(s, c.length);
4821  break;
4822  case STBI__PNG_TYPE('I','H','D','R'): {
4823  int comp,filter;
4824  if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
4825  first = 0;
4826  if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
4827  s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4828  s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4829  z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
4830  color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG");
4831  if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG");
4832  if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
4833  comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG");
4834  filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG");
4835  interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
4836  if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
4837  if (!pal_img_n) {
4838  s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
4839  if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
4840  if (scan == STBI__SCAN_header) return 1;
4841  } else {
4842  // if paletted, then pal_n is our final components, and
4843  // img_n is # components to decompress/filter.
4844  s->img_n = 1;
4845  if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
4846  // if SCAN_header, have to scan to see if we have a tRNS
4847  }
4848  break;
4849  }
4850 
4851  case STBI__PNG_TYPE('P','L','T','E'): {
4852  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4853  if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
4854  pal_len = c.length / 3;
4855  if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
4856  for (i=0; i < pal_len; ++i) {
4857  palette[i*4+0] = stbi__get8(s);
4858  palette[i*4+1] = stbi__get8(s);
4859  palette[i*4+2] = stbi__get8(s);
4860  palette[i*4+3] = 255;
4861  }
4862  break;
4863  }
4864 
4865  case STBI__PNG_TYPE('t','R','N','S'): {
4866  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4867  if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
4868  if (pal_img_n) {
4869  if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
4870  if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
4871  if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
4872  pal_img_n = 4;
4873  for (i=0; i < c.length; ++i)
4874  palette[i*4+3] = stbi__get8(s);
4875  } else {
4876  if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
4877  if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
4878  has_trans = 1;
4879  if (z->depth == 16) {
4880  for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
4881  } else {
4882  for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
4883  }
4884  }
4885  break;
4886  }
4887 
4888  case STBI__PNG_TYPE('I','D','A','T'): {
4889  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4890  if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
4891  if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
4892  if ((int)(ioff + c.length) < (int)ioff) return 0;
4893  if (ioff + c.length > idata_limit) {
4894  stbi__uint32 idata_limit_old = idata_limit;
4895  stbi_uc *p;
4896  if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
4897  while (ioff + c.length > idata_limit)
4898  idata_limit *= 2;
4899  STBI_NOTUSED(idata_limit_old);
4900  p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
4901  z->idata = p;
4902  }
4903  if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
4904  ioff += c.length;
4905  break;
4906  }
4907 
4908  case STBI__PNG_TYPE('I','E','N','D'): {
4909  stbi__uint32 raw_len, bpl;
4910  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4911  if (scan != STBI__SCAN_load) return 1;
4912  if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
4913  // initial guess for decoded data size to avoid unnecessary reallocs
4914  bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
4915  raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
4916  z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
4917  if (z->expanded == NULL) return 0; // zlib should set error
4918  STBI_FREE(z->idata); z->idata = NULL;
4919  if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
4920  s->img_out_n = s->img_n+1;
4921  else
4922  s->img_out_n = s->img_n;
4923  if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
4924  if (has_trans) {
4925  if (z->depth == 16) {
4926  if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
4927  } else {
4928  if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
4929  }
4930  }
4931  if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
4932  stbi__de_iphone(z);
4933  if (pal_img_n) {
4934  // pal_img_n == 3 or 4
4935  s->img_n = pal_img_n; // record the actual colors we had
4936  s->img_out_n = pal_img_n;
4937  if (req_comp >= 3) s->img_out_n = req_comp;
4938  if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
4939  return 0;
4940  } else if (has_trans) {
4941  // non-paletted image with tRNS -> source image has (constant) alpha
4942  ++s->img_n;
4943  }
4944  STBI_FREE(z->expanded); z->expanded = NULL;
4945  return 1;
4946  }
4947 
4948  default:
4949  // if critical, fail
4950  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4951  if ((c.type & (1 << 29)) == 0) {
4952  #ifndef STBI_NO_FAILURE_STRINGS
4953  // not threadsafe
4954  static char invalid_chunk[] = "XXXX PNG chunk not known";
4955  invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
4956  invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
4957  invalid_chunk[2] = STBI__BYTECAST(c.type >> 8);
4958  invalid_chunk[3] = STBI__BYTECAST(c.type >> 0);
4959  #endif
4960  return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
4961  }
4962  stbi__skip(s, c.length);
4963  break;
4964  }
4965  // end of PNG chunk, read and skip CRC
4966  stbi__get32be(s);
4967  }
4968 }
4969 
4970 static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
4971 {
4972  void *result=NULL;
4973  if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
4974  if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
4975  if (p->depth < 8)
4976  ri->bits_per_channel = 8;
4977  else
4978  ri->bits_per_channel = p->depth;
4979  result = p->out;
4980  p->out = NULL;
4981  if (req_comp && req_comp != p->s->img_out_n) {
4982  if (ri->bits_per_channel == 8)
4983  result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4984  else
4985  result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4986  p->s->img_out_n = req_comp;
4987  if (result == NULL) return result;
4988  }
4989  *x = p->s->img_x;
4990  *y = p->s->img_y;
4991  if (n) *n = p->s->img_n;
4992  }
4993  STBI_FREE(p->out); p->out = NULL;
4994  STBI_FREE(p->expanded); p->expanded = NULL;
4995  STBI_FREE(p->idata); p->idata = NULL;
4996 
4997  return result;
4998 }
4999 
5000 static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5001 {
5002  stbi__png p;
5003  p.s = s;
5004  return stbi__do_png(&p, x,y,comp,req_comp, ri);
5005 }
5006 
5007 static int stbi__png_test(stbi__context *s)
5008 {
5009  int r;
5010  r = stbi__check_png_header(s);
5011  stbi__rewind(s);
5012  return r;
5013 }
5014 
5015 static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
5016 {
5017  if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
5018  stbi__rewind( p->s );
5019  return 0;
5020  }
5021  if (x) *x = p->s->img_x;
5022  if (y) *y = p->s->img_y;
5023  if (comp) *comp = p->s->img_n;
5024  return 1;
5025 }
5026 
5027 static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
5028 {
5029  stbi__png p;
5030  p.s = s;
5031  return stbi__png_info_raw(&p, x, y, comp);
5032 }
5033 
5034 static int stbi__png_is16(stbi__context *s)
5035 {
5036  stbi__png p;
5037  p.s = s;
5038  if (!stbi__png_info_raw(&p, NULL, NULL, NULL))
5039  return 0;
5040  if (p.depth != 16) {
5041  stbi__rewind(p.s);
5042  return 0;
5043  }
5044  return 1;
5045 }
5046 #endif
5047 
5048 // Microsoft/Windows BMP image
5049 
5050 #ifndef STBI_NO_BMP
5051 static int stbi__bmp_test_raw(stbi__context *s)
5052 {
5053  int r;
5054  int sz;
5055  if (stbi__get8(s) != 'B') return 0;
5056  if (stbi__get8(s) != 'M') return 0;
5057  stbi__get32le(s); // discard filesize
5058  stbi__get16le(s); // discard reserved
5059  stbi__get16le(s); // discard reserved
5060  stbi__get32le(s); // discard data offset
5061  sz = stbi__get32le(s);
5062  r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
5063  return r;
5064 }
5065 
5066 static int stbi__bmp_test(stbi__context *s)
5067 {
5068  int r = stbi__bmp_test_raw(s);
5069  stbi__rewind(s);
5070  return r;
5071 }
5072 
5073 
5074 // returns 0..31 for the highest set bit
5075 static int stbi__high_bit(unsigned int z)
5076 {
5077  int n=0;
5078  if (z == 0) return -1;
5079  if (z >= 0x10000) { n += 16; z >>= 16; }
5080  if (z >= 0x00100) { n += 8; z >>= 8; }
5081  if (z >= 0x00010) { n += 4; z >>= 4; }
5082  if (z >= 0x00004) { n += 2; z >>= 2; }
5083  if (z >= 0x00002) { n += 1;/* >>= 1;*/ }
5084  return n;
5085 }
5086 
5087 static int stbi__bitcount(unsigned int a)
5088 {
5089  a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2
5090  a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4
5091  a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
5092  a = (a + (a >> 8)); // max 16 per 8 bits
5093  a = (a + (a >> 16)); // max 32 per 8 bits
5094  return a & 0xff;
5095 }
5096 
5097 // extract an arbitrarily-aligned N-bit value (N=bits)
5098 // from v, and then make it 8-bits long and fractionally
5099 // extend it to full full range.
5100 static int stbi__shiftsigned(unsigned int v, int shift, int bits)
5101 {
5102  static unsigned int mul_table[9] = {
5103  0,
5104  0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/,
5105  0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/,
5106  };
5107  static unsigned int shift_table[9] = {
5108  0, 0,0,1,0,2,4,6,0,
5109  };
5110  if (shift < 0)
5111  v <<= -shift;
5112  else
5113  v >>= shift;
5114  STBI_ASSERT(v >= 0 && v < 256);
5115  v >>= (8-bits);
5116  STBI_ASSERT(bits >= 0 && bits <= 8);
5117  return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits];
5118 }
5119 
5120 typedef struct
5121 {
5122  int bpp, offset, hsz;
5123  unsigned int mr,mg,mb,ma, all_a;
5124 } stbi__bmp_data;
5125 
5126 static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
5127 {
5128  int hsz;
5129  if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
5130  stbi__get32le(s); // discard filesize
5131  stbi__get16le(s); // discard reserved
5132  stbi__get16le(s); // discard reserved
5133  info->offset = stbi__get32le(s);
5134  info->hsz = hsz = stbi__get32le(s);
5135  info->mr = info->mg = info->mb = info->ma = 0;
5136 
5137  if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
5138  if (hsz == 12) {
5139  s->img_x = stbi__get16le(s);
5140  s->img_y = stbi__get16le(s);
5141  } else {
5142  s->img_x = stbi__get32le(s);
5143  s->img_y = stbi__get32le(s);
5144  }
5145  if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
5146  info->bpp = stbi__get16le(s);
5147  if (hsz != 12) {
5148  int compress = stbi__get32le(s);
5149  if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
5150  stbi__get32le(s); // discard sizeof
5151  stbi__get32le(s); // discard hres
5152  stbi__get32le(s); // discard vres
5153  stbi__get32le(s); // discard colorsused
5154  stbi__get32le(s); // discard max important
5155  if (hsz == 40 || hsz == 56) {
5156  if (hsz == 56) {
5157  stbi__get32le(s);
5158  stbi__get32le(s);
5159  stbi__get32le(s);
5160  stbi__get32le(s);
5161  }
5162  if (info->bpp == 16 || info->bpp == 32) {
5163  if (compress == 0) {
5164  if (info->bpp == 32) {
5165  info->mr = 0xffu << 16;
5166  info->mg = 0xffu << 8;
5167  info->mb = 0xffu << 0;
5168  info->ma = 0xffu << 24;
5169  info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
5170  } else {
5171  info->mr = 31u << 10;
5172  info->mg = 31u << 5;
5173  info->mb = 31u << 0;
5174  }
5175  } else if (compress == 3) {
5176  info->mr = stbi__get32le(s);
5177  info->mg = stbi__get32le(s);
5178  info->mb = stbi__get32le(s);
5179  // not documented, but generated by photoshop and handled by mspaint
5180  if (info->mr == info->mg && info->mg == info->mb) {
5181  // ?!?!?
5182  return stbi__errpuc("bad BMP", "bad BMP");
5183  }
5184  } else
5185  return stbi__errpuc("bad BMP", "bad BMP");
5186  }
5187  } else {
5188  int i;
5189  if (hsz != 108 && hsz != 124)
5190  return stbi__errpuc("bad BMP", "bad BMP");
5191  info->mr = stbi__get32le(s);
5192  info->mg = stbi__get32le(s);
5193  info->mb = stbi__get32le(s);
5194  info->ma = stbi__get32le(s);
5195  stbi__get32le(s); // discard color space
5196  for (i=0; i < 12; ++i)
5197  stbi__get32le(s); // discard color space parameters
5198  if (hsz == 124) {
5199  stbi__get32le(s); // discard rendering intent
5200  stbi__get32le(s); // discard offset of profile data
5201  stbi__get32le(s); // discard size of profile data
5202  stbi__get32le(s); // discard reserved
5203  }
5204  }
5205  }
5206  return (void *) 1;
5207 }
5208 
5209 
5210 static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5211 {
5212  stbi_uc *out;
5213  unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
5214  stbi_uc pal[256][4];
5215  int psize=0,i,j,width;
5216  int flip_vertically, pad, target;
5217  stbi__bmp_data info;
5218  STBI_NOTUSED(ri);
5219 
5220  info.all_a = 255;
5221  if (stbi__bmp_parse_header(s, &info) == NULL)
5222  return NULL; // error code already set
5223 
5224  flip_vertically = ((int) s->img_y) > 0;
5225  s->img_y = abs((int) s->img_y);
5226 
5227  mr = info.mr;
5228  mg = info.mg;
5229  mb = info.mb;
5230  ma = info.ma;
5231  all_a = info.all_a;
5232 
5233  if (info.hsz == 12) {
5234  if (info.bpp < 24)
5235  psize = (info.offset - 14 - 24) / 3;
5236  } else {
5237  if (info.bpp < 16)
5238  psize = (info.offset - 14 - info.hsz) >> 2;
5239  }
5240 
5241  if (info.bpp == 24 && ma == 0xff000000)
5242  s->img_n = 3;
5243  else
5244  s->img_n = ma ? 4 : 3;
5245  if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
5246  target = req_comp;
5247  else
5248  target = s->img_n; // if they want monochrome, we'll post-convert
5249 
5250  // sanity-check size
5251  if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
5252  return stbi__errpuc("too large", "Corrupt BMP");
5253 
5254  out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
5255  if (!out) return stbi__errpuc("outofmem", "Out of memory");
5256  if (info.bpp < 16) {
5257  int z=0;
5258  if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
5259  for (i=0; i < psize; ++i) {
5260  pal[i][2] = stbi__get8(s);
5261  pal[i][1] = stbi__get8(s);
5262  pal[i][0] = stbi__get8(s);
5263  if (info.hsz != 12) stbi__get8(s);
5264  pal[i][3] = 255;
5265  }
5266  stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
5267  if (info.bpp == 1) width = (s->img_x + 7) >> 3;
5268  else if (info.bpp == 4) width = (s->img_x + 1) >> 1;
5269  else if (info.bpp == 8) width = s->img_x;
5270  else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
5271  pad = (-width)&3;
5272  if (info.bpp == 1) {
5273  for (j=0; j < (int) s->img_y; ++j) {
5274  int bit_offset = 7, v = stbi__get8(s);
5275  for (i=0; i < (int) s->img_x; ++i) {
5276  int color = (v>>bit_offset)&0x1;
5277  out[z++] = pal[color][0];
5278  out[z++] = pal[color][1];
5279  out[z++] = pal[color][2];
5280  if (target == 4) out[z++] = 255;
5281  if (i+1 == (int) s->img_x) break;
5282  if((--bit_offset) < 0) {
5283  bit_offset = 7;
5284  v = stbi__get8(s);
5285  }
5286  }
5287  stbi__skip(s, pad);
5288  }
5289  } else {
5290  for (j=0; j < (int) s->img_y; ++j) {
5291  for (i=0; i < (int) s->img_x; i += 2) {
5292  int v=stbi__get8(s),v2=0;
5293  if (info.bpp == 4) {
5294  v2 = v & 15;
5295  v >>= 4;
5296  }
5297  out[z++] = pal[v][0];
5298  out[z++] = pal[v][1];
5299  out[z++] = pal[v][2];
5300  if (target == 4) out[z++] = 255;
5301  if (i+1 == (int) s->img_x) break;
5302  v = (info.bpp == 8) ? stbi__get8(s) : v2;
5303  out[z++] = pal[v][0];
5304  out[z++] = pal[v][1];
5305  out[z++] = pal[v][2];
5306  if (target == 4) out[z++] = 255;
5307  }
5308  stbi__skip(s, pad);
5309  }
5310  }
5311  } else {
5312  int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
5313  int z = 0;
5314  int easy=0;
5315  stbi__skip(s, info.offset - 14 - info.hsz);
5316  if (info.bpp == 24) width = 3 * s->img_x;
5317  else if (info.bpp == 16) width = 2*s->img_x;
5318  else /* bpp = 32 and pad = 0 */ width=0;
5319  pad = (-width) & 3;
5320  if (info.bpp == 24) {
5321  easy = 1;
5322  } else if (info.bpp == 32) {
5323  if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
5324  easy = 2;
5325  }
5326  if (!easy) {
5327  if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
5328  // right shift amt to put high bit in position #7
5329  rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
5330  gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
5331  bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
5332  ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
5333  }
5334  for (j=0; j < (int) s->img_y; ++j) {
5335  if (easy) {
5336  for (i=0; i < (int) s->img_x; ++i) {
5337  unsigned char a;
5338  out[z+2] = stbi__get8(s);
5339  out[z+1] = stbi__get8(s);
5340  out[z+0] = stbi__get8(s);
5341  z += 3;
5342  a = (easy == 2 ? stbi__get8(s) : 255);
5343  all_a |= a;
5344  if (target == 4) out[z++] = a;
5345  }
5346  } else {
5347  int bpp = info.bpp;
5348  for (i=0; i < (int) s->img_x; ++i) {
5349  stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
5350  unsigned int a;
5351  out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
5352  out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
5353  out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
5354  a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
5355  all_a |= a;
5356  if (target == 4) out[z++] = STBI__BYTECAST(a);
5357  }
5358  }
5359  stbi__skip(s, pad);
5360  }
5361  }
5362 
5363  // if alpha channel is all 0s, replace with all 255s
5364  if (target == 4 && all_a == 0)
5365  for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
5366  out[i] = 255;
5367 
5368  if (flip_vertically) {
5369  stbi_uc t;
5370  for (j=0; j < (int) s->img_y>>1; ++j) {
5371  stbi_uc *p1 = out + j *s->img_x*target;
5372  stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
5373  for (i=0; i < (int) s->img_x*target; ++i) {
5374  t = p1[i]; p1[i] = p2[i]; p2[i] = t;
5375  }
5376  }
5377  }
5378 
5379  if (req_comp && req_comp != target) {
5380  out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
5381  if (out == NULL) return out; // stbi__convert_format frees input on failure
5382  }
5383 
5384  *x = s->img_x;
5385  *y = s->img_y;
5386  if (comp) *comp = s->img_n;
5387  return out;
5388 }
5389 #endif
5390 
5391 // Targa Truevision - TGA
5392 // by Jonathan Dummer
5393 #ifndef STBI_NO_TGA
5394 // returns STBI_rgb or whatever, 0 on error
5395 static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
5396 {
5397  // only RGB or RGBA (incl. 16bit) or grey allowed
5398  if (is_rgb16) *is_rgb16 = 0;
5399  switch(bits_per_pixel) {
5400  case 8: return STBI_grey;
5401  case 16: if(is_grey) return STBI_grey_alpha;
5402  // fallthrough
5403  case 15: if(is_rgb16) *is_rgb16 = 1;
5404  return STBI_rgb;
5405  case 24: // fallthrough
5406  case 32: return bits_per_pixel/8;
5407  default: return 0;
5408  }
5409 }
5410 
5411 static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
5412 {
5413  int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
5414  int sz, tga_colormap_type;
5415  stbi__get8(s); // discard Offset
5416  tga_colormap_type = stbi__get8(s); // colormap type
5417  if( tga_colormap_type > 1 ) {
5418  stbi__rewind(s);
5419  return 0; // only RGB or indexed allowed
5420  }
5421  tga_image_type = stbi__get8(s); // image type
5422  if ( tga_colormap_type == 1 ) { // colormapped (paletted) image
5423  if (tga_image_type != 1 && tga_image_type != 9) {
5424  stbi__rewind(s);
5425  return 0;
5426  }
5427  stbi__skip(s,4); // skip index of first colormap entry and number of entries
5428  sz = stbi__get8(s); // check bits per palette color entry
5429  if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) {
5430  stbi__rewind(s);
5431  return 0;
5432  }
5433  stbi__skip(s,4); // skip image x and y origin
5434  tga_colormap_bpp = sz;
5435  } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
5436  if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) {
5437  stbi__rewind(s);
5438  return 0; // only RGB or grey allowed, +/- RLE
5439  }
5440  stbi__skip(s,9); // skip colormap specification and image x/y origin
5441  tga_colormap_bpp = 0;
5442  }
5443  tga_w = stbi__get16le(s);
5444  if( tga_w < 1 ) {
5445  stbi__rewind(s);
5446  return 0; // test width
5447  }
5448  tga_h = stbi__get16le(s);
5449  if( tga_h < 1 ) {
5450  stbi__rewind(s);
5451  return 0; // test height
5452  }
5453  tga_bits_per_pixel = stbi__get8(s); // bits per pixel
5454  stbi__get8(s); // ignore alpha bits
5455  if (tga_colormap_bpp != 0) {
5456  if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
5457  // when using a colormap, tga_bits_per_pixel is the size of the indexes
5458  // I don't think anything but 8 or 16bit indexes makes sense
5459  stbi__rewind(s);
5460  return 0;
5461  }
5462  tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
5463  } else {
5464  tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
5465  }
5466  if(!tga_comp) {
5467  stbi__rewind(s);
5468  return 0;
5469  }
5470  if (x) *x = tga_w;
5471  if (y) *y = tga_h;
5472  if (comp) *comp = tga_comp;
5473  return 1; // seems to have passed everything
5474 }
5475 
5476 static int stbi__tga_test(stbi__context *s)
5477 {
5478  int res = 0;
5479  int sz, tga_color_type;
5480  stbi__get8(s); // discard Offset
5481  tga_color_type = stbi__get8(s); // color type
5482  if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed
5483  sz = stbi__get8(s); // image type
5484  if ( tga_color_type == 1 ) { // colormapped (paletted) image
5485  if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
5486  stbi__skip(s,4); // skip index of first colormap entry and number of entries
5487  sz = stbi__get8(s); // check bits per palette color entry
5488  if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5489  stbi__skip(s,4); // skip image x and y origin
5490  } else { // "normal" image w/o colormap
5491  if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE
5492  stbi__skip(s,9); // skip colormap specification and image x/y origin
5493  }
5494  if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width
5495  if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height
5496  sz = stbi__get8(s); // bits per pixel
5497  if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index
5498  if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5499 
5500  res = 1; // if we got this far, everything's good and we can return 1 instead of 0
5501 
5502 errorEnd:
5503  stbi__rewind(s);
5504  return res;
5505 }
5506 
5507 // read 16bit value and convert to 24bit RGB
5508 static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
5509 {
5510  stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
5511  stbi__uint16 fiveBitMask = 31;
5512  // we have 3 channels with 5bits each
5513  int r = (px >> 10) & fiveBitMask;
5514  int g = (px >> 5) & fiveBitMask;
5515  int b = px & fiveBitMask;
5516  // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
5517  out[0] = (stbi_uc)((r * 255)/31);
5518  out[1] = (stbi_uc)((g * 255)/31);
5519  out[2] = (stbi_uc)((b * 255)/31);
5520 
5521  // some people claim that the most significant bit might be used for alpha
5522  // (possibly if an alpha-bit is set in the "image descriptor byte")
5523  // but that only made 16bit test images completely translucent..
5524  // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
5525 }
5526 
5527 static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5528 {
5529  // read in the TGA header stuff
5530  int tga_offset = stbi__get8(s);
5531  int tga_indexed = stbi__get8(s);
5532  int tga_image_type = stbi__get8(s);
5533  int tga_is_RLE = 0;
5534  int tga_palette_start = stbi__get16le(s);
5535  int tga_palette_len = stbi__get16le(s);
5536  int tga_palette_bits = stbi__get8(s);
5537  int tga_x_origin = stbi__get16le(s);
5538  int tga_y_origin = stbi__get16le(s);
5539  int tga_width = stbi__get16le(s);
5540  int tga_height = stbi__get16le(s);
5541  int tga_bits_per_pixel = stbi__get8(s);
5542  int tga_comp, tga_rgb16=0;
5543  int tga_inverted = stbi__get8(s);
5544  // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
5545  // image data
5546  unsigned char *tga_data;
5547  unsigned char *tga_palette = NULL;
5548  int i, j;
5549  unsigned char raw_data[4] = {0};
5550  int RLE_count = 0;
5551  int RLE_repeating = 0;
5552  int read_next_pixel = 1;
5553  STBI_NOTUSED(ri);
5554  STBI_NOTUSED(tga_x_origin); // @TODO
5555  STBI_NOTUSED(tga_y_origin); // @TODO
5556 
5557  // do a tiny bit of precessing
5558  if ( tga_image_type >= 8 )
5559  {
5560  tga_image_type -= 8;
5561  tga_is_RLE = 1;
5562  }
5563  tga_inverted = 1 - ((tga_inverted >> 5) & 1);
5564 
5565  // If I'm paletted, then I'll use the number of bits from the palette
5566  if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
5567  else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
5568 
5569  if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
5570  return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
5571 
5572  // tga info
5573  *x = tga_width;
5574  *y = tga_height;
5575  if (comp) *comp = tga_comp;
5576 
5577  if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
5578  return stbi__errpuc("too large", "Corrupt TGA");
5579 
5580  tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
5581  if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
5582 
5583  // skip to the data's starting position (offset usually = 0)
5584  stbi__skip(s, tga_offset );
5585 
5586  if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) {
5587  for (i=0; i < tga_height; ++i) {
5588  int row = tga_inverted ? tga_height -i - 1 : i;
5589  stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
5590  stbi__getn(s, tga_row, tga_width * tga_comp);
5591  }
5592  } else {
5593  // do I need to load a palette?
5594  if ( tga_indexed)
5595  {
5596  // any data to skip? (offset usually = 0)
5597  stbi__skip(s, tga_palette_start );
5598  // load the palette
5599  tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
5600  if (!tga_palette) {
5601  STBI_FREE(tga_data);
5602  return stbi__errpuc("outofmem", "Out of memory");
5603  }
5604  if (tga_rgb16) {
5605  stbi_uc *pal_entry = tga_palette;
5606  STBI_ASSERT(tga_comp == STBI_rgb);
5607  for (i=0; i < tga_palette_len; ++i) {
5608  stbi__tga_read_rgb16(s, pal_entry);
5609  pal_entry += tga_comp;
5610  }
5611  } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
5612  STBI_FREE(tga_data);
5613  STBI_FREE(tga_palette);
5614  return stbi__errpuc("bad palette", "Corrupt TGA");
5615  }
5616  }
5617  // load the data
5618  for (i=0; i < tga_width * tga_height; ++i)
5619  {
5620  // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
5621  if ( tga_is_RLE )
5622  {
5623  if ( RLE_count == 0 )
5624  {
5625  // yep, get the next byte as a RLE command
5626  int RLE_cmd = stbi__get8(s);
5627  RLE_count = 1 + (RLE_cmd & 127);
5628  RLE_repeating = RLE_cmd >> 7;
5629  read_next_pixel = 1;
5630  } else if ( !RLE_repeating )
5631  {
5632  read_next_pixel = 1;
5633  }
5634  } else
5635  {
5636  read_next_pixel = 1;
5637  }
5638  // OK, if I need to read a pixel, do it now
5639  if ( read_next_pixel )
5640  {
5641  // load however much data we did have
5642  if ( tga_indexed )
5643  {
5644  // read in index, then perform the lookup
5645  int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
5646  if ( pal_idx >= tga_palette_len ) {
5647  // invalid index
5648  pal_idx = 0;
5649  }
5650  pal_idx *= tga_comp;
5651  for (j = 0; j < tga_comp; ++j) {
5652  raw_data[j] = tga_palette[pal_idx+j];
5653  }
5654  } else if(tga_rgb16) {
5655  STBI_ASSERT(tga_comp == STBI_rgb);
5656  stbi__tga_read_rgb16(s, raw_data);
5657  } else {
5658  // read in the data raw
5659  for (j = 0; j < tga_comp; ++j) {
5660  raw_data[j] = stbi__get8(s);
5661  }
5662  }
5663  // clear the reading flag for the next pixel
5664  read_next_pixel = 0;
5665  } // end of reading a pixel
5666 
5667  // copy data
5668  for (j = 0; j < tga_comp; ++j)
5669  tga_data[i*tga_comp+j] = raw_data[j];
5670 
5671  // in case we're in RLE mode, keep counting down
5672  --RLE_count;
5673  }
5674  // do I need to invert the image?
5675  if ( tga_inverted )
5676  {
5677  for (j = 0; j*2 < tga_height; ++j)
5678  {
5679  int index1 = j * tga_width * tga_comp;
5680  int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
5681  for (i = tga_width * tga_comp; i > 0; --i)
5682  {
5683  unsigned char temp = tga_data[index1];
5684  tga_data[index1] = tga_data[index2];
5685  tga_data[index2] = temp;
5686  ++index1;
5687  ++index2;
5688  }
5689  }
5690  }
5691  // clear my palette, if I had one
5692  if ( tga_palette != NULL )
5693  {
5694  STBI_FREE( tga_palette );
5695  }
5696  }
5697 
5698  // swap RGB - if the source data was RGB16, it already is in the right order
5699  if (tga_comp >= 3 && !tga_rgb16)
5700  {
5701  unsigned char* tga_pixel = tga_data;
5702  for (i=0; i < tga_width * tga_height; ++i)
5703  {
5704  unsigned char temp = tga_pixel[0];
5705  tga_pixel[0] = tga_pixel[2];
5706  tga_pixel[2] = temp;
5707  tga_pixel += tga_comp;
5708  }
5709  }
5710 
5711  // convert to target component count
5712  if (req_comp && req_comp != tga_comp)
5713  tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
5714 
5715  // the things I do to get rid of an error message, and yet keep
5716  // Microsoft's C compilers happy... [8^(
5717  tga_palette_start = tga_palette_len = tga_palette_bits =
5718  tga_x_origin = tga_y_origin = 0;
5719  STBI_NOTUSED(tga_palette_start);
5720  // OK, done
5721  return tga_data;
5722 }
5723 #endif
5724 
5725 // *************************************************************************************************
5726 // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
5727 
5728 #ifndef STBI_NO_PSD
5729 static int stbi__psd_test(stbi__context *s)
5730 {
5731  int r = (stbi__get32be(s) == 0x38425053);
5732  stbi__rewind(s);
5733  return r;
5734 }
5735 
5736 static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
5737 {
5738  int count, nleft, len;
5739 
5740  count = 0;
5741  while ((nleft = pixelCount - count) > 0) {
5742  len = stbi__get8(s);
5743  if (len == 128) {
5744  // No-op.
5745  } else if (len < 128) {
5746  // Copy next len+1 bytes literally.
5747  len++;
5748  if (len > nleft) return 0; // corrupt data
5749  count += len;
5750  while (len) {
5751  *p = stbi__get8(s);
5752  p += 4;
5753  len--;
5754  }
5755  } else if (len > 128) {
5756  stbi_uc val;
5757  // Next -len+1 bytes in the dest are replicated from next source byte.
5758  // (Interpret len as a negative 8-bit int.)
5759  len = 257 - len;
5760  if (len > nleft) return 0; // corrupt data
5761  val = stbi__get8(s);
5762  count += len;
5763  while (len) {
5764  *p = val;
5765  p += 4;
5766  len--;
5767  }
5768  }
5769  }
5770 
5771  return 1;
5772 }
5773 
5774 static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
5775 {
5776  int pixelCount;
5777  int channelCount, compression;
5778  int channel, i;
5779  int bitdepth;
5780  int w,h;
5781  stbi_uc *out;
5782  STBI_NOTUSED(ri);
5783 
5784  // Check identifier
5785  if (stbi__get32be(s) != 0x38425053) // "8BPS"
5786  return stbi__errpuc("not PSD", "Corrupt PSD image");
5787 
5788  // Check file type version.
5789  if (stbi__get16be(s) != 1)
5790  return stbi__errpuc("wrong version", "Unsupported version of PSD image");
5791 
5792  // Skip 6 reserved bytes.
5793  stbi__skip(s, 6 );
5794 
5795  // Read the number of channels (R, G, B, A, etc).
5796  channelCount = stbi__get16be(s);
5797  if (channelCount < 0 || channelCount > 16)
5798  return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
5799 
5800  // Read the rows and columns of the image.
5801  h = stbi__get32be(s);
5802  w = stbi__get32be(s);
5803 
5804  // Make sure the depth is 8 bits.
5805  bitdepth = stbi__get16be(s);
5806  if (bitdepth != 8 && bitdepth != 16)
5807  return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
5808 
5809  // Make sure the color mode is RGB.
5810  // Valid options are:
5811  // 0: Bitmap
5812  // 1: Grayscale
5813  // 2: Indexed color
5814  // 3: RGB color
5815  // 4: CMYK color
5816  // 7: Multichannel
5817  // 8: Duotone
5818  // 9: Lab color
5819  if (stbi__get16be(s) != 3)
5820  return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
5821 
5822  // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.)
5823  stbi__skip(s,stbi__get32be(s) );
5824 
5825  // Skip the image resources. (resolution, pen tool paths, etc)
5826  stbi__skip(s, stbi__get32be(s) );
5827 
5828  // Skip the reserved data.
5829  stbi__skip(s, stbi__get32be(s) );
5830 
5831  // Find out if the data is compressed.
5832  // Known values:
5833  // 0: no compression
5834  // 1: RLE compressed
5835  compression = stbi__get16be(s);
5836  if (compression > 1)
5837  return stbi__errpuc("bad compression", "PSD has an unknown compression format");
5838 
5839  // Check size
5840  if (!stbi__mad3sizes_valid(4, w, h, 0))
5841  return stbi__errpuc("too large", "Corrupt PSD");
5842 
5843  // Create the destination image.
5844 
5845  if (!compression && bitdepth == 16 && bpc == 16) {
5846  out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0);
5847  ri->bits_per_channel = 16;
5848  } else
5849  out = (stbi_uc *) stbi__malloc(4 * w*h);
5850 
5851  if (!out) return stbi__errpuc("outofmem", "Out of memory");
5852  pixelCount = w*h;
5853 
5854  // Initialize the data to zero.
5855  //memset( out, 0, pixelCount * 4 );
5856 
5857  // Finally, the image data.
5858  if (compression) {
5859  // RLE as used by .PSD and .TIFF
5860  // Loop until you get the number of unpacked bytes you are expecting:
5861  // Read the next source byte into n.
5862  // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
5863  // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
5864  // Else if n is 128, noop.
5865  // Endloop
5866 
5867  // The RLE-compressed data is preceded by a 2-byte data count for each row in the data,
5868  // which we're going to just skip.
5869  stbi__skip(s, h * channelCount * 2 );
5870 
5871  // Read the RLE data by channel.
5872  for (channel = 0; channel < 4; channel++) {
5873  stbi_uc *p;
5874 
5875  p = out+channel;
5876  if (channel >= channelCount) {
5877  // Fill this channel with default data.
5878  for (i = 0; i < pixelCount; i++, p += 4)
5879  *p = (channel == 3 ? 255 : 0);
5880  } else {
5881  // Read the RLE data.
5882  if (!stbi__psd_decode_rle(s, p, pixelCount)) {
5883  STBI_FREE(out);
5884  return stbi__errpuc("corrupt", "bad RLE data");
5885  }
5886  }
5887  }
5888 
5889  } else {
5890  // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...)
5891  // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
5892 
5893  // Read the data by channel.
5894  for (channel = 0; channel < 4; channel++) {
5895  if (channel >= channelCount) {
5896  // Fill this channel with default data.
5897  if (bitdepth == 16 && bpc == 16) {
5898  stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5899  stbi__uint16 val = channel == 3 ? 65535 : 0;
5900  for (i = 0; i < pixelCount; i++, q += 4)
5901  *q = val;
5902  } else {
5903  stbi_uc *p = out+channel;
5904  stbi_uc val = channel == 3 ? 255 : 0;
5905  for (i = 0; i < pixelCount; i++, p += 4)
5906  *p = val;
5907  }
5908  } else {
5909  if (ri->bits_per_channel == 16) { // output bpc
5910  stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5911  for (i = 0; i < pixelCount; i++, q += 4)
5912  *q = (stbi__uint16) stbi__get16be(s);
5913  } else {
5914  stbi_uc *p = out+channel;
5915  if (bitdepth == 16) { // input bpc
5916  for (i = 0; i < pixelCount; i++, p += 4)
5917  *p = (stbi_uc) (stbi__get16be(s) >> 8);
5918  } else {
5919  for (i = 0; i < pixelCount; i++, p += 4)
5920  *p = stbi__get8(s);
5921  }
5922  }
5923  }
5924  }
5925  }
5926 
5927  // remove weird white matte from PSD
5928  if (channelCount >= 4) {
5929  if (ri->bits_per_channel == 16) {
5930  for (i=0; i < w*h; ++i) {
5931  stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
5932  if (pixel[3] != 0 && pixel[3] != 65535) {
5933  float a = pixel[3] / 65535.0f;
5934  float ra = 1.0f / a;
5935  float inv_a = 65535.0f * (1 - ra);
5936  pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
5937  pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
5938  pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
5939  }
5940  }
5941  } else {
5942  for (i=0; i < w*h; ++i) {
5943  unsigned char *pixel = out + 4*i;
5944  if (pixel[3] != 0 && pixel[3] != 255) {
5945  float a = pixel[3] / 255.0f;
5946  float ra = 1.0f / a;
5947  float inv_a = 255.0f * (1 - ra);
5948  pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
5949  pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
5950  pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
5951  }
5952  }
5953  }
5954  }
5955 
5956  // convert to desired output format
5957  if (req_comp && req_comp != 4) {
5958  if (ri->bits_per_channel == 16)
5959  out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
5960  else
5961  out = stbi__convert_format(out, 4, req_comp, w, h);
5962  if (out == NULL) return out; // stbi__convert_format frees input on failure
5963  }
5964 
5965  if (comp) *comp = 4;
5966  *y = h;
5967  *x = w;
5968 
5969  return out;
5970 }
5971 #endif
5972 
5973 // *************************************************************************************************
5974 // Softimage PIC loader
5975 // by Tom Seddon
5976 //
5977 // See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
5978 // See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
5979 
5980 #ifndef STBI_NO_PIC
5981 static int stbi__pic_is4(stbi__context *s,const char *str)
5982 {
5983  int i;
5984  for (i=0; i<4; ++i)
5985  if (stbi__get8(s) != (stbi_uc)str[i])
5986  return 0;
5987 
5988  return 1;
5989 }
5990 
5991 static int stbi__pic_test_core(stbi__context *s)
5992 {
5993  int i;
5994 
5995  if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
5996  return 0;
5997 
5998  for(i=0;i<84;++i)
5999  stbi__get8(s);
6000 
6001  if (!stbi__pic_is4(s,"PICT"))
6002  return 0;
6003 
6004  return 1;
6005 }
6006 
6007 typedef struct
6008 {
6009  stbi_uc size,type,channel;
6010 } stbi__pic_packet;
6011 
6012 static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
6013 {
6014  int mask=0x80, i;
6015 
6016  for (i=0; i<4; ++i, mask>>=1) {
6017  if (channel & mask) {
6018  if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
6019  dest[i]=stbi__get8(s);
6020  }
6021  }
6022 
6023  return dest;
6024 }
6025 
6026 static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
6027 {
6028  int mask=0x80,i;
6029 
6030  for (i=0;i<4; ++i, mask>>=1)
6031  if (channel&mask)
6032  dest[i]=src[i];
6033 }
6034 
6035 static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
6036 {
6037  int act_comp=0,num_packets=0,y,chained;
6038  stbi__pic_packet packets[10];
6039 
6040  // this will (should...) cater for even some bizarre stuff like having data
6041  // for the same channel in multiple packets.
6042  do {
6043  stbi__pic_packet *packet;
6044 
6045  if (num_packets==sizeof(packets)/sizeof(packets[0]))
6046  return stbi__errpuc("bad format","too many packets");
6047 
6048  packet = &packets[num_packets++];
6049 
6050  chained = stbi__get8(s);
6051  packet->size = stbi__get8(s);
6052  packet->type = stbi__get8(s);
6053  packet->channel = stbi__get8(s);
6054 
6055  act_comp |= packet->channel;
6056 
6057  if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)");
6058  if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp");
6059  } while (chained);
6060 
6061  *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
6062 
6063  for(y=0; y<height; ++y) {
6064  int packet_idx;
6065 
6066  for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
6067  stbi__pic_packet *packet = &packets[packet_idx];
6068  stbi_uc *dest = result+y*width*4;
6069 
6070  switch (packet->type) {
6071  default:
6072  return stbi__errpuc("bad format","packet has bad compression type");
6073 
6074  case 0: {//uncompressed
6075  int x;
6076 
6077  for(x=0;x<width;++x, dest+=4)
6078  if (!stbi__readval(s,packet->channel,dest))
6079  return 0;
6080  break;
6081  }
6082 
6083  case 1://Pure RLE
6084  {
6085  int left=width, i;
6086 
6087  while (left>0) {
6088  stbi_uc count,value[4];
6089 
6090  count=stbi__get8(s);
6091  if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)");
6092 
6093  if (count > left)
6094  count = (stbi_uc) left;
6095 
6096  if (!stbi__readval(s,packet->channel,value)) return 0;
6097 
6098  for(i=0; i<count; ++i,dest+=4)
6099  stbi__copyval(packet->channel,dest,value);
6100  left -= count;
6101  }
6102  }
6103  break;
6104 
6105  case 2: {//Mixed RLE
6106  int left=width;
6107  while (left>0) {
6108  int count = stbi__get8(s), i;
6109  if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)");
6110 
6111  if (count >= 128) { // Repeated
6112  stbi_uc value[4];
6113 
6114  if (count==128)
6115  count = stbi__get16be(s);
6116  else
6117  count -= 127;
6118  if (count > left)
6119  return stbi__errpuc("bad file","scanline overrun");
6120 
6121  if (!stbi__readval(s,packet->channel,value))
6122  return 0;
6123 
6124  for(i=0;i<count;++i, dest += 4)
6125  stbi__copyval(packet->channel,dest,value);
6126  } else { // Raw
6127  ++count;
6128  if (count>left) return stbi__errpuc("bad file","scanline overrun");
6129 
6130  for(i=0;i<count;++i, dest+=4)
6131  if (!stbi__readval(s,packet->channel,dest))
6132  return 0;
6133  }
6134  left-=count;
6135  }
6136  break;
6137  }
6138  }
6139  }
6140  }
6141 
6142  return result;
6143 }
6144 
6145 static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
6146 {
6147  stbi_uc *result;
6148  int i, x,y, internal_comp;
6149  STBI_NOTUSED(ri);
6150 
6151  if (!comp) comp = &internal_comp;
6152 
6153  for (i=0; i<92; ++i)
6154  stbi__get8(s);
6155 
6156  x = stbi__get16be(s);
6157  y = stbi__get16be(s);
6158  if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)");
6159  if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
6160 
6161  stbi__get32be(s); //skip `ratio'
6162  stbi__get16be(s); //skip `fields'
6163  stbi__get16be(s); //skip `pad'
6164 
6165  // intermediate buffer is RGBA
6166  result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
6167  memset(result, 0xff, x*y*4);
6168 
6169  if (!stbi__pic_load_core(s,x,y,comp, result)) {
6170  STBI_FREE(result);
6171  result=0;
6172  }
6173  *px = x;
6174  *py = y;
6175  if (req_comp == 0) req_comp = *comp;
6176  result=stbi__convert_format(result,4,req_comp,x,y);
6177 
6178  return result;
6179 }
6180 
6181 static int stbi__pic_test(stbi__context *s)
6182 {
6183  int r = stbi__pic_test_core(s);
6184  stbi__rewind(s);
6185  return r;
6186 }
6187 #endif
6188 
6189 // *************************************************************************************************
6190 // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
6191 
6192 #ifndef STBI_NO_GIF
6193 typedef struct
6194 {
6195  stbi__int16 prefix;
6196  stbi_uc first;
6197  stbi_uc suffix;
6198 } stbi__gif_lzw;
6199 
6200 typedef struct
6201 {
6202  int w,h;
6203  stbi_uc *out; // output buffer (always 4 components)
6204  stbi_uc *background; // The current "background" as far as a gif is concerned
6205  stbi_uc *history;
6206  int flags, bgindex, ratio, transparent, eflags;
6207  stbi_uc pal[256][4];
6208  stbi_uc lpal[256][4];
6209  stbi__gif_lzw codes[8192];
6210  stbi_uc *color_table;
6211  int parse, step;
6212  int lflags;
6213  int start_x, start_y;
6214  int max_x, max_y;
6215  int cur_x, cur_y;
6216  int line_size;
6217  int delay;
6218 } stbi__gif;
6219 
6220 static int stbi__gif_test_raw(stbi__context *s)
6221 {
6222  int sz;
6223  if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
6224  sz = stbi__get8(s);
6225  if (sz != '9' && sz != '7') return 0;
6226  if (stbi__get8(s) != 'a') return 0;
6227  return 1;
6228 }
6229 
6230 static int stbi__gif_test(stbi__context *s)
6231 {
6232  int r = stbi__gif_test_raw(s);
6233  stbi__rewind(s);
6234  return r;
6235 }
6236 
6237 static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
6238 {
6239  int i;
6240  for (i=0; i < num_entries; ++i) {
6241  pal[i][2] = stbi__get8(s);
6242  pal[i][1] = stbi__get8(s);
6243  pal[i][0] = stbi__get8(s);
6244  pal[i][3] = transp == i ? 0 : 255;
6245  }
6246 }
6247 
6248 static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
6249 {
6250  stbi_uc version;
6251  if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
6252  return stbi__err("not GIF", "Corrupt GIF");
6253 
6254  version = stbi__get8(s);
6255  if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF");
6256  if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF");
6257 
6258  stbi__g_failure_reason = "";
6259  g->w = stbi__get16le(s);
6260  g->h = stbi__get16le(s);
6261  g->flags = stbi__get8(s);
6262  g->bgindex = stbi__get8(s);
6263  g->ratio = stbi__get8(s);
6264  g->transparent = -1;
6265 
6266  if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments
6267 
6268  if (is_info) return 1;
6269 
6270  if (g->flags & 0x80)
6271  stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
6272 
6273  return 1;
6274 }
6275 
6276 static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
6277 {
6278  stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
6279  if (!stbi__gif_header(s, g, comp, 1)) {
6280  STBI_FREE(g);
6281  stbi__rewind( s );
6282  return 0;
6283  }
6284  if (x) *x = g->w;
6285  if (y) *y = g->h;
6286  STBI_FREE(g);
6287  return 1;
6288 }
6289 
6290 static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
6291 {
6292  stbi_uc *p, *c;
6293  int idx;
6294 
6295  // recurse to decode the prefixes, since the linked-list is backwards,
6296  // and working backwards through an interleaved image would be nasty
6297  if (g->codes[code].prefix >= 0)
6298  stbi__out_gif_code(g, g->codes[code].prefix);
6299 
6300  if (g->cur_y >= g->max_y) return;
6301 
6302  idx = g->cur_x + g->cur_y;
6303  p = &g->out[idx];
6304  g->history[idx / 4] = 1;
6305 
6306  c = &g->color_table[g->codes[code].suffix * 4];
6307  if (c[3] > 128) { // don't render transparent pixels;
6308  p[0] = c[2];
6309  p[1] = c[1];
6310  p[2] = c[0];
6311  p[3] = c[3];
6312  }
6313  g->cur_x += 4;
6314 
6315  if (g->cur_x >= g->max_x) {
6316  g->cur_x = g->start_x;
6317  g->cur_y += g->step;
6318 
6319  while (g->cur_y >= g->max_y && g->parse > 0) {
6320  g->step = (1 << g->parse) * g->line_size;
6321  g->cur_y = g->start_y + (g->step >> 1);
6322  --g->parse;
6323  }
6324  }
6325 }
6326 
6327 static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
6328 {
6329  stbi_uc lzw_cs;
6330  stbi__int32 len, init_code;
6331  stbi__uint32 first;
6332  stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
6333  stbi__gif_lzw *p;
6334 
6335  lzw_cs = stbi__get8(s);
6336  if (lzw_cs > 12) return NULL;
6337  clear = 1 << lzw_cs;
6338  first = 1;
6339  codesize = lzw_cs + 1;
6340  codemask = (1 << codesize) - 1;
6341  bits = 0;
6342  valid_bits = 0;
6343  for (init_code = 0; init_code < clear; init_code++) {
6344  g->codes[init_code].prefix = -1;
6345  g->codes[init_code].first = (stbi_uc) init_code;
6346  g->codes[init_code].suffix = (stbi_uc) init_code;
6347  }
6348 
6349  // support no starting clear code
6350  avail = clear+2;
6351  oldcode = -1;
6352 
6353  len = 0;
6354  for(;;) {
6355  if (valid_bits < codesize) {
6356  if (len == 0) {
6357  len = stbi__get8(s); // start new block
6358  if (len == 0)
6359  return g->out;
6360  }
6361  --len;
6362  bits |= (stbi__int32) stbi__get8(s) << valid_bits;
6363  valid_bits += 8;
6364  } else {
6365  stbi__int32 code = bits & codemask;
6366  bits >>= codesize;
6367  valid_bits -= codesize;
6368  // @OPTIMIZE: is there some way we can accelerate the non-clear path?
6369  if (code == clear) { // clear code
6370  codesize = lzw_cs + 1;
6371  codemask = (1 << codesize) - 1;
6372  avail = clear + 2;
6373  oldcode = -1;
6374  first = 0;
6375  } else if (code == clear + 1) { // end of stream code
6376  stbi__skip(s, len);
6377  while ((len = stbi__get8(s)) > 0)
6378  stbi__skip(s,len);
6379  return g->out;
6380  } else if (code <= avail) {
6381  if (first) {
6382  return stbi__errpuc("no clear code", "Corrupt GIF");
6383  }
6384 
6385  if (oldcode >= 0) {
6386  p = &g->codes[avail++];
6387  if (avail > 8192) {
6388  return stbi__errpuc("too many codes", "Corrupt GIF");
6389  }
6390 
6391  p->prefix = (stbi__int16) oldcode;
6392  p->first = g->codes[oldcode].first;
6393  p->suffix = (code == avail) ? p->first : g->codes[code].first;
6394  } else if (code == avail)
6395  return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6396 
6397  stbi__out_gif_code(g, (stbi__uint16) code);
6398 
6399  if ((avail & codemask) == 0 && avail <= 0x0FFF) {
6400  codesize++;
6401  codemask = (1 << codesize) - 1;
6402  }
6403 
6404  oldcode = code;
6405  } else {
6406  return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6407  }
6408  }
6409  }
6410 }