HDK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
pugixml.cpp
Go to the documentation of this file.
1 /**
2  * pugixml parser - version 1.8
3  * --------------------------------------------------------
4  * Copyright (C) 2006-2016, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
5  * Report bugs and download new versions at http://pugixml.org/
6  *
7  * This library is distributed under the MIT License. See notice at the end
8  * of this file.
9  *
10  * This work is based on the pugxml parser, which is:
11  * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
12  */
13 // clang-format off
14 
15 #ifndef SOURCE_PUGIXML_CPP
16 #define SOURCE_PUGIXML_CPP
17 
18 #include "pugixml.hpp"
19 
20 #include <cstdlib>
21 #include <cstdio>
22 #include <cstring>
23 #include <cassert>
24 #include <climits>
25 
26 #ifdef PUGIXML_WCHAR_MODE
27 # include <wchar.h>
28 #endif
29 
30 #ifndef PUGIXML_NO_XPATH
31 # include <cmath>
32 # include <cfloat>
33 # ifdef PUGIXML_NO_EXCEPTIONS
34 # include <setjmp.h>
35 # endif
36 #endif
37 
38 #ifndef PUGIXML_NO_STL
39 # include <istream>
40 # include <ostream>
41 # include <string>
42 #endif
43 
44 // For placement new
45 #include <new>
46 
47 #ifdef _MSC_VER
48 # pragma warning(push)
49 # pragma warning(disable: 4127) // conditional expression is constant
50 # pragma warning(disable: 4324) // structure was padded due to __declspec(align())
51 # pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
52 # pragma warning(disable: 4702) // unreachable code
53 # pragma warning(disable: 4996) // this function or variable may be unsafe
54 # pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged
55 #endif
56 
57 #ifdef __INTEL_COMPILER
58 # pragma warning(disable: 177) // function was declared but never referenced
59 # pragma warning(disable: 279) // controlling expression is constant
60 # pragma warning(disable: 1478 1786) // function was declared "deprecated"
61 # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
62 #endif
63 
64 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
65 # pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
66 #endif
67 
68 #ifdef __BORLANDC__
69 # pragma option push
70 # pragma warn -8008 // condition is always false
71 # pragma warn -8066 // unreachable code
72 #endif
73 
74 #ifdef __SNC__
75 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
76 # pragma diag_suppress=178 // function was declared but never referenced
77 # pragma diag_suppress=237 // controlling expression is constant
78 #endif
79 
80 // Inlining controls
81 #if defined(_MSC_VER) && _MSC_VER >= 1300
82 # define PUGI__NO_INLINE __declspec(noinline)
83 #elif defined(__GNUC__)
84 # define PUGI__NO_INLINE __attribute__((noinline))
85 #else
86 # define PUGI__NO_INLINE
87 #endif
88 
89 // Branch weight controls
90 #if defined(__GNUC__)
91 # define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
92 #else
93 # define PUGI__UNLIKELY(cond) (cond)
94 #endif
95 
96 // Simple static assertion
97 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
98 
99 // Digital Mars C++ bug workaround for passing char loaded from memory via stack
100 #ifdef __DMC__
101 # define PUGI__DMC_VOLATILE volatile
102 #else
103 # define PUGI__DMC_VOLATILE
104 #endif
105 
106 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
107 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
108 using std::memcpy;
109 using std::memmove;
110 using std::memset;
111 #endif
112 
113 // Some MinGW versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions in strict ANSI mode
114 #if defined(PUGIXML_HAS_LONG_LONG) && defined(__MINGW32__) && defined(__STRICT_ANSI__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX)
115 # define LLONG_MAX 9223372036854775807LL
116 # define LLONG_MIN (-LLONG_MAX-1)
117 # define ULLONG_MAX (2ULL*LLONG_MAX+1)
118 #endif
119 
120 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
121 #if defined(_MSC_VER) && !defined(__S3E__)
122 # define PUGI__MSVC_CRT_VERSION _MSC_VER
123 #endif
124 
125 #ifdef PUGIXML_HEADER_ONLY
126 # define PUGI__NS_BEGIN OIIO_NAMESPACE_BEGIN namespace pugi { namespace impl {
127 # define PUGI__NS_END } } OIIO_NAMESPACE_END
128 # define PUGI__FN inline
129 # define PUGI__FN_NO_INLINE inline
130 #else
131 # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
132 # define PUGI__NS_BEGIN OIIO_NAMESPACE_BEGIN namespace pugi { namespace impl {
133 # define PUGI__NS_END } } OIIO_NAMESPACE_END
134 # else
135 # define PUGI__NS_BEGIN OIIO_NAMESPACE_BEGIN namespace pugi { namespace impl { namespace {
136 # define PUGI__NS_END } } } OIIO_NAMESPACE_END
137 # endif
138 # define PUGI__FN
139 # define PUGI__FN_NO_INLINE PUGI__NO_INLINE
140 #endif
141 
142 // uintptr_t
143 #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561)
144 OIIO_NAMESPACE_BEGIN namespace pugi
145 {
146 # ifndef _UINTPTR_T_DEFINED
147  typedef size_t uintptr_t;
148 # endif
149 
150  typedef unsigned __int8 uint8_t;
151  typedef unsigned __int16 uint16_t;
152  typedef unsigned __int32 uint32_t;
154 #else
155 # include <cstdint>
156 #endif
157 
158 // Memory allocation
161  {
162  return malloc(size);
163  }
164 
166  {
167  free(ptr);
168  }
169 
170  template <typename T>
172  {
175  };
176 
177  // Global allocation functions are stored in class statics so that in header mode linker deduplicates them
178  // Without a template<> we'll get multiple definitions of the same static
181 
184 
185 // String utilities
187  // Get string length
188  PUGI__FN size_t strlength(const char_t* s)
189  {
190  assert(s);
191 
192  #ifdef PUGIXML_WCHAR_MODE
193  return wcslen(s);
194  #else
195  return strlen(s);
196  #endif
197  }
198 
199  // Compare two strings
200  PUGI__FN bool strequal(const char_t* src, const char_t* dst)
201  {
202  assert(src && dst);
203 
204  #ifdef PUGIXML_WCHAR_MODE
205  return wcscmp(src, dst) == 0;
206  #else
207  return strcmp(src, dst) == 0;
208  #endif
209  }
210 
211  // Compare lhs with [rhs_begin, rhs_end)
212  PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
213  {
214  for (size_t i = 0; i < count; ++i)
215  if (lhs[i] != rhs[i])
216  return false;
217 
218  return lhs[count] == 0;
219  }
220 
221  // Get length of wide string, even if CRT lacks wide character support
222  PUGI__FN size_t strlength_wide(const wchar_t* s)
223  {
224  assert(s);
225 
226  #ifdef PUGIXML_WCHAR_MODE
227  return wcslen(s);
228  #else
229  const wchar_t* end = s;
230  while (*end) end++;
231  return static_cast<size_t>(end - s);
232  #endif
233  }
235 
236 // auto_ptr-like object for exception recovery
238  template <typename T> struct auto_deleter
239  {
240  typedef void (*D)(T*);
241 
242  T* data;
244 
245  auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
246  {
247  }
248 
250  {
251  if (data) deleter(data);
252  }
253 
255  {
256  T* result = data;
257  data = 0;
258  return result;
259  }
260  };
262 
263 #ifdef PUGIXML_COMPACT
265  class compact_hash_table
266  {
267  public:
268  compact_hash_table(): _items(0), _capacity(0), _count(0)
269  {
270  }
271 
272  void clear()
273  {
274  if (_items)
275  {
276  xml_memory::deallocate(_items);
277  _items = 0;
278  _capacity = 0;
279  _count = 0;
280  }
281  }
282 
283  void** find(const void* key)
284  {
285  assert(key);
286 
287  if (_capacity == 0) return 0;
288 
289  size_t hashmod = _capacity - 1;
290  size_t bucket = hash(key) & hashmod;
291 
292  for (size_t probe = 0; probe <= hashmod; ++probe)
293  {
294  item_t& probe_item = _items[bucket];
295 
296  if (probe_item.key == key)
297  return &probe_item.value;
298 
299  if (probe_item.key == 0)
300  return 0;
301 
302  // hash collision, quadratic probing
303  bucket = (bucket + probe + 1) & hashmod;
304  }
305 
306  assert(false && "Hash table is full");
307  return 0;
308  }
309 
310  void** insert(const void* key)
311  {
312  assert(key);
313  assert(_capacity != 0 && _count < _capacity - _capacity / 4);
314 
315  size_t hashmod = _capacity - 1;
316  size_t bucket = hash(key) & hashmod;
317 
318  for (size_t probe = 0; probe <= hashmod; ++probe)
319  {
320  item_t& probe_item = _items[bucket];
321 
322  if (probe_item.key == 0)
323  {
324  probe_item.key = key;
325  _count++;
326  return &probe_item.value;
327  }
328 
329  if (probe_item.key == key)
330  return &probe_item.value;
331 
332  // hash collision, quadratic probing
333  bucket = (bucket + probe + 1) & hashmod;
334  }
335 
336  assert(false && "Hash table is full");
337  return 0;
338  }
339 
340  bool reserve()
341  {
342  if (_count + 16 >= _capacity - _capacity / 4)
343  return rehash();
344 
345  return true;
346  }
347 
348  private:
349  struct item_t
350  {
351  const void* key;
352  void* value;
353  };
354 
355  item_t* _items;
356  size_t _capacity;
357 
358  size_t _count;
359 
360  bool rehash();
361 
362  static unsigned int hash(const void* key)
363  {
364  unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
365 
366  // MurmurHash3 32-bit finalizer
367  h ^= h >> 16;
368  h *= 0x85ebca6bu;
369  h ^= h >> 13;
370  h *= 0xc2b2ae35u;
371  h ^= h >> 16;
372 
373  return h;
374  }
375  };
376 
377  PUGI__FN_NO_INLINE bool compact_hash_table::rehash()
378  {
379  compact_hash_table rt;
380  rt._capacity = (_capacity == 0) ? 32 : _capacity * 2;
381  rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * rt._capacity));
382 
383  if (!rt._items)
384  return false;
385 
386  memset(rt._items, 0, sizeof(item_t) * rt._capacity);
387 
388  for (size_t i = 0; i < _capacity; ++i)
389  if (_items[i].key)
390  *rt.insert(_items[i].key) = _items[i].value;
391 
392  if (_items)
393  xml_memory::deallocate(_items);
394 
395  _capacity = rt._capacity;
396  _items = rt._items;
397 
398  assert(_count == rt._count);
399 
400  return true;
401  }
402 
404 #endif
405 
407 #ifdef PUGIXML_COMPACT
408  static const uintptr_t xml_memory_block_alignment = 4;
409 #else
410  static const uintptr_t xml_memory_block_alignment = sizeof(void*);
411 #endif
412 
413  // extra metadata bits
414  static const uintptr_t xml_memory_page_contents_shared_mask = 64;
415  static const uintptr_t xml_memory_page_name_allocated_mask = 32;
416  static const uintptr_t xml_memory_page_value_allocated_mask = 16;
417  static const uintptr_t xml_memory_page_type_mask = 15;
418 
419  // combined masks for string uniqueness
420  static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
421  static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
422 
423 #ifdef PUGIXML_COMPACT
424  #define PUGI__GETHEADER_IMPL(object, page, flags) // unused
425  #define PUGI__GETPAGE_IMPL(header) (header).get_page()
426 #else
427  #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags))
428  // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
429  #define PUGI__GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8))))
430 #endif
431 
432  #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
433  #define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask)
434 
435  struct xml_allocator;
436 
438  {
439  static xml_memory_page* construct(void* memory)
440  {
441  xml_memory_page* result = static_cast<xml_memory_page*>(memory);
442 
443  result->allocator = 0;
444  result->prev = 0;
445  result->next = 0;
446  result->busy_size = 0;
447  result->freed_size = 0;
448 
449  #ifdef PUGIXML_COMPACT
450  result->compact_string_base = 0;
451  result->compact_shared_parent = 0;
452  result->compact_page_marker = 0;
453  #endif
454 
455  return result;
456  }
457 
459 
462 
463  size_t busy_size;
464  size_t freed_size;
465 
466  #ifdef PUGIXML_COMPACT
467  char_t* compact_string_base;
468  void* compact_shared_parent;
469  uint32_t* compact_page_marker;
470  #endif
471  };
472 
473  static const size_t xml_memory_page_size =
474  #ifdef PUGIXML_MEMORY_PAGE_SIZE
475  (PUGIXML_MEMORY_PAGE_SIZE)
476  #else
477  32768
478  #endif
479  - sizeof(xml_memory_page);
480 
482  {
483  uint16_t page_offset; // offset from page->data
484  uint16_t full_size; // 0 if string occupies whole page
485  };
486 
488  {
489  xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
490  {
491  #ifdef PUGIXML_COMPACT
492  _hash = 0;
493  #endif
494  }
495 
496  xml_memory_page* allocate_page(size_t data_size)
497  {
498  size_t size = sizeof(xml_memory_page) + data_size;
499 
500  // allocate block with some alignment, leaving memory for worst-case padding
501  void* memory = xml_memory::allocate(size);
502  if (!memory) return 0;
503 
504  // prepare page structure
506  assert(page);
507 
508  page->allocator = _root->allocator;
509 
510  return page;
511  }
512 
513  static void deallocate_page(xml_memory_page* page)
514  {
516  }
517 
518  void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
519 
520  void* allocate_memory(size_t size, xml_memory_page*& out_page)
521  {
522  if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
523  return allocate_memory_oob(size, out_page);
524 
525  void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
526 
527  _busy_size += size;
528 
529  out_page = _root;
530 
531  return buf;
532  }
533 
534  #ifdef PUGIXML_COMPACT
535  void* allocate_object(size_t size, xml_memory_page*& out_page)
536  {
537  void* result = allocate_memory(size + sizeof(uint32_t), out_page);
538  if (!result) return 0;
539 
540  // adjust for marker
541  ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
542 
543  if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment))
544  {
545  // insert new marker
546  uint32_t* marker = static_cast<uint32_t*>(result);
547 
548  *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
549  out_page->compact_page_marker = marker;
550 
551  // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
552  // this will make sure deallocate_memory correctly tracks the size
553  out_page->freed_size += sizeof(uint32_t);
554 
555  return marker + 1;
556  }
557  else
558  {
559  // roll back uint32_t part
560  _busy_size -= sizeof(uint32_t);
561 
562  return result;
563  }
564  }
565  #else
566  void* allocate_object(size_t size, xml_memory_page*& out_page)
567  {
568  return allocate_memory(size, out_page);
569  }
570  #endif
571 
572  void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
573  {
574  if (page == _root) page->busy_size = _busy_size;
575 
576  assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
577  (void)!ptr;
578 
579  page->freed_size += size;
580  assert(page->freed_size <= page->busy_size);
581 
582  if (page->freed_size == page->busy_size)
583  {
584  if (page->next == 0)
585  {
586  assert(_root == page);
587 
588  // top page freed, just reset sizes
589  page->busy_size = 0;
590  page->freed_size = 0;
591 
592  #ifdef PUGIXML_COMPACT
593  // reset compact state to maximize efficiency
594  page->compact_string_base = 0;
595  page->compact_shared_parent = 0;
596  page->compact_page_marker = 0;
597  #endif
598 
599  _busy_size = 0;
600  }
601  else
602  {
603  assert(_root != page);
604  assert(page->prev);
605 
606  // remove from the list
607  page->prev->next = page->next;
608  page->next->prev = page->prev;
609 
610  // deallocate
611  deallocate_page(page);
612  }
613  }
614  }
615 
617  {
618  static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
619 
620  PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
621 
622  // allocate memory for string and header block
623  size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
624 
625  // round size up to block alignment boundary
626  size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
627 
628  xml_memory_page* page;
629  xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
630 
631  if (!header) return 0;
632 
633  // setup header
634  ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
635 
636  assert(page_offset % xml_memory_block_alignment == 0);
637  assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
638  header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
639 
640  // full_size == 0 for large strings that occupy the whole page
641  assert(full_size % xml_memory_block_alignment == 0);
642  assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
643  header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
644 
645  // round-trip through void* to avoid 'cast increases required alignment of target type' warning
646  // header is guaranteed a pointer-sized alignment, which should be enough for char_t
647  return static_cast<char_t*>(static_cast<void*>(header + 1));
648  }
649 
650  void deallocate_string(char_t* string)
651  {
652  // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
653  // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
654 
655  // get header
656  xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
657  assert(header);
658 
659  // deallocate
660  size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
661  xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
662 
663  // if full_size == 0 then this string occupies the whole page
664  size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
665 
666  deallocate_memory(header, full_size, page);
667  }
668 
669  bool reserve()
670  {
671  #ifdef PUGIXML_COMPACT
672  return _hash->reserve();
673  #else
674  return true;
675  #endif
676  }
677 
679  size_t _busy_size;
680 
681  #ifdef PUGIXML_COMPACT
682  compact_hash_table* _hash;
683  #endif
684  };
685 
687  {
688  const size_t large_allocation_threshold = xml_memory_page_size / 4;
689 
690  xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
691  out_page = page;
692 
693  if (!page) return 0;
694 
695  if (size <= large_allocation_threshold)
696  {
698 
699  // insert page at the end of linked list
700  page->prev = _root;
701  _root->next = page;
702  _root = page;
703 
704  _busy_size = size;
705  }
706  else
707  {
708  // insert page before the end of linked list, so that it is deleted as soon as possible
709  // the last page is not deleted even if it's empty (see deallocate_memory)
710  assert(_root->prev);
711 
712  page->prev = _root->prev;
713  page->next = _root;
714 
715  _root->prev->next = page;
716  _root->prev = page;
717 
718  page->busy_size = size;
719  }
720 
721  return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
722  }
724 
725 #ifdef PUGIXML_COMPACT
727  static const uintptr_t compact_alignment_log2 = 2;
728  static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
729 
730  class compact_header
731  {
732  public:
733  compact_header(xml_memory_page* page, unsigned int flags)
734  {
735  PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
736 
737  ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
738  assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
739 
740  _page = static_cast<unsigned char>(offset >> compact_alignment_log2);
741  _flags = static_cast<unsigned char>(flags);
742  }
743 
744  void operator&=(uintptr_t mod)
745  {
746  _flags &= static_cast<unsigned char>(mod);
747  }
748 
749  void operator|=(uintptr_t mod)
750  {
751  _flags |= static_cast<unsigned char>(mod);
752  }
753 
754  uintptr_t operator&(uintptr_t mod) const
755  {
756  return _flags & mod;
757  }
758 
759  xml_memory_page* get_page() const
760  {
761  // round-trip through void* to silence 'cast increases required alignment of target type' warnings
762  const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
763  const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
764 
765  return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
766  }
767 
768  private:
769  unsigned char _page;
770  unsigned char _flags;
771  };
772 
773  PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
774  {
775  const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
776 
777  return header->get_page();
778  }
779 
780  template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
781  {
782  return static_cast<T*>(*compact_get_page(object, header_offset)->allocator->_hash->find(object));
783  }
784 
785  template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
786  {
787  *compact_get_page(object, header_offset)->allocator->_hash->insert(object) = value;
788  }
789 
790  template <typename T, int header_offset, int start = -126> class compact_pointer
791  {
792  public:
793  compact_pointer(): _data(0)
794  {
795  }
796 
797  void operator=(const compact_pointer& rhs)
798  {
799  *this = rhs + 0;
800  }
801 
802  void operator=(T* value)
803  {
804  if (value)
805  {
806  // value is guaranteed to be compact-aligned; 'this' is not
807  // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
808  // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
809  // compensate for arithmetic shift rounding for negative values
810  ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
811  ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
812 
813  if (static_cast<uintptr_t>(offset) <= 253)
814  _data = static_cast<unsigned char>(offset + 1);
815  else
816  {
817  compact_set_value<header_offset>(this, value);
818 
819  _data = 255;
820  }
821  }
822  else
823  _data = 0;
824  }
825 
826  operator T*() const
827  {
828  if (_data)
829  {
830  if (_data < 255)
831  {
832  uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
833 
834  return reinterpret_cast<T*>(base + ((_data - 1 + start) << compact_alignment_log2));
835  }
836  else
837  return compact_get_value<header_offset, T>(this);
838  }
839  else
840  return 0;
841  }
842 
843  T* operator->() const
844  {
845  return *this;
846  }
847 
848  private:
849  unsigned char _data;
850  };
851 
852  template <typename T, int header_offset> class compact_pointer_parent
853  {
854  public:
855  compact_pointer_parent(): _data(0)
856  {
857  }
858 
859  void operator=(const compact_pointer_parent& rhs)
860  {
861  *this = rhs + 0;
862  }
863 
864  void operator=(T* value)
865  {
866  if (value)
867  {
868  // value is guaranteed to be compact-aligned; 'this' is not
869  // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
870  // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
871  // compensate for arithmetic shift behavior for negative values
872  ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
873  ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
874 
875  if (static_cast<uintptr_t>(offset) <= 65533)
876  {
877  _data = static_cast<unsigned short>(offset + 1);
878  }
879  else
880  {
881  xml_memory_page* page = compact_get_page(this, header_offset);
882 
883  if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
884  page->compact_shared_parent = value;
885 
886  if (page->compact_shared_parent == value)
887  {
888  _data = 65534;
889  }
890  else
891  {
892  compact_set_value<header_offset>(this, value);
893 
894  _data = 65535;
895  }
896  }
897  }
898  else
899  {
900  _data = 0;
901  }
902  }
903 
904  operator T*() const
905  {
906  if (_data)
907  {
908  if (_data < 65534)
909  {
910  uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
911 
912  return reinterpret_cast<T*>(base + ((_data - 1 - 65533) << compact_alignment_log2));
913  }
914  else if (_data == 65534)
915  return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
916  else
917  return compact_get_value<header_offset, T>(this);
918  }
919  else
920  return 0;
921  }
922 
923  T* operator->() const
924  {
925  return *this;
926  }
927 
928  private:
929  uint16_t _data;
930  };
931 
932  template <int header_offset, int base_offset> class compact_string
933  {
934  public:
935  compact_string(): _data(0)
936  {
937  }
938 
939  void operator=(const compact_string& rhs)
940  {
941  *this = rhs + 0;
942  }
943 
944  void operator=(char_t* value)
945  {
946  if (value)
947  {
948  xml_memory_page* page = compact_get_page(this, header_offset);
949 
950  if (PUGI__UNLIKELY(page->compact_string_base == 0))
951  page->compact_string_base = value;
952 
953  ptrdiff_t offset = value - page->compact_string_base;
954 
955  if (static_cast<uintptr_t>(offset) < (65535 << 7))
956  {
957  // round-trip through void* to silence 'cast increases required alignment of target type' warnings
958  uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
959 
960  if (*base == 0)
961  {
962  *base = static_cast<uint16_t>((offset >> 7) + 1);
963  _data = static_cast<unsigned char>((offset & 127) + 1);
964  }
965  else
966  {
967  ptrdiff_t remainder = offset - ((*base - 1) << 7);
968 
969  if (static_cast<uintptr_t>(remainder) <= 253)
970  {
971  _data = static_cast<unsigned char>(remainder + 1);
972  }
973  else
974  {
975  compact_set_value<header_offset>(this, value);
976 
977  _data = 255;
978  }
979  }
980  }
981  else
982  {
983  compact_set_value<header_offset>(this, value);
984 
985  _data = 255;
986  }
987  }
988  else
989  {
990  _data = 0;
991  }
992  }
993 
994  operator char_t*() const
995  {
996  if (_data)
997  {
998  if (_data < 255)
999  {
1000  xml_memory_page* page = compact_get_page(this, header_offset);
1001 
1002  // round-trip through void* to silence 'cast increases required alignment of target type' warnings
1003  const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
1004  assert(*base);
1005 
1006  ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
1007 
1008  return page->compact_string_base + offset;
1009  }
1010  else
1011  {
1012  return compact_get_value<header_offset, char_t>(this);
1013  }
1014  }
1015  else
1016  return 0;
1017  }
1018 
1019  private:
1020  unsigned char _data;
1021  };
1023 #endif
1024 
1025 #ifdef PUGIXML_COMPACT
1026 OIIO_NAMESPACE_BEGIN namespace pugi
1027 {
1028  struct xml_attribute_struct
1029  {
1030  xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
1031  {
1032  PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
1033  }
1034 
1035  impl::compact_header header;
1036 
1037  uint16_t namevalue_base;
1038 
1039  impl::compact_string<4, 2> name;
1040  impl::compact_string<5, 3> value;
1041 
1042  impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
1043  impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
1044  };
1045 
1046  struct xml_node_struct
1047  {
1048  xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0)
1049  {
1050  PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
1051  }
1052 
1053  impl::compact_header header;
1054 
1055  uint16_t namevalue_base;
1056 
1057  impl::compact_string<4, 2> name;
1058  impl::compact_string<5, 3> value;
1059 
1060  impl::compact_pointer_parent<xml_node_struct, 6> parent;
1061 
1062  impl::compact_pointer<xml_node_struct, 8, 0> first_child;
1063 
1064  impl::compact_pointer<xml_node_struct, 9> prev_sibling_c;
1065  impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
1066 
1067  impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
1068  };
1070 #else
1071 OIIO_NAMESPACE_BEGIN namespace pugi
1072 {
1074  {
1075  xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0)
1076  {
1077  header = PUGI__GETHEADER_IMPL(this, page, 0);
1078  }
1079 
1080  uintptr_t header;
1081 
1084 
1087  };
1088 
1090  {
1091  xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
1092  {
1093  header = PUGI__GETHEADER_IMPL(this, page, type);
1094  }
1095 
1096  uintptr_t header;
1097 
1100 
1102 
1104 
1107 
1109  };
1111 #endif
1112 
1115  {
1118  };
1119 
1120  struct xml_document_struct: public xml_node_struct, public xml_allocator
1121  {
1123  {
1124  }
1125 
1126  const char_t* buffer;
1127 
1129 
1130  #ifdef PUGIXML_COMPACT
1131  compact_hash_table hash;
1132  #endif
1133  };
1134 
1135  template <typename Object> inline xml_allocator& get_allocator(const Object* object)
1136  {
1137  assert(object);
1138 
1139  return *PUGI__GETPAGE(object)->allocator;
1140  }
1141 
1142  template <typename Object> inline xml_document_struct& get_document(const Object* object)
1143  {
1144  assert(object);
1145 
1146  return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
1147  }
1149 
1150 // Low-level DOM operations
1152  inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
1153  {
1154  xml_memory_page* page;
1155  void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
1156  if (!memory) return 0;
1157 
1158  return new (memory) xml_attribute_struct(page);
1159  }
1160 
1161  inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
1162  {
1163  xml_memory_page* page;
1164  void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
1165  if (!memory) return 0;
1166 
1167  return new (memory) xml_node_struct(page, type);
1168  }
1169 
1170  inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
1171  {
1172  if (a->header & impl::xml_memory_page_name_allocated_mask)
1173  alloc.deallocate_string(a->name);
1174 
1175  if (a->header & impl::xml_memory_page_value_allocated_mask)
1176  alloc.deallocate_string(a->value);
1177 
1178  alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
1179  }
1180 
1181  inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
1182  {
1183  if (n->header & impl::xml_memory_page_name_allocated_mask)
1184  alloc.deallocate_string(n->name);
1185 
1186  if (n->header & impl::xml_memory_page_value_allocated_mask)
1187  alloc.deallocate_string(n->value);
1188 
1189  for (xml_attribute_struct* attr = n->first_attribute; attr; )
1190  {
1191  xml_attribute_struct* next = attr->next_attribute;
1192 
1193  destroy_attribute(attr, alloc);
1194 
1195  attr = next;
1196  }
1197 
1198  for (xml_node_struct* child = n->first_child; child; )
1199  {
1200  xml_node_struct* next = child->next_sibling;
1201 
1202  destroy_node(child, alloc);
1203 
1204  child = next;
1205  }
1206 
1207  alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
1208  }
1209 
1210  inline void append_node(xml_node_struct* child, xml_node_struct* node)
1211  {
1212  child->parent = node;
1213 
1214  xml_node_struct* head = node->first_child;
1215 
1216  if (head)
1217  {
1218  xml_node_struct* tail = head->prev_sibling_c;
1219 
1220  tail->next_sibling = child;
1221  child->prev_sibling_c = tail;
1222  head->prev_sibling_c = child;
1223  }
1224  else
1225  {
1226  node->first_child = child;
1227  child->prev_sibling_c = child;
1228  }
1229  }
1230 
1231  inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
1232  {
1233  child->parent = node;
1234 
1235  xml_node_struct* head = node->first_child;
1236 
1237  if (head)
1238  {
1239  child->prev_sibling_c = head->prev_sibling_c;
1240  head->prev_sibling_c = child;
1241  }
1242  else
1243  child->prev_sibling_c = child;
1244 
1245  child->next_sibling = head;
1246  node->first_child = child;
1247  }
1248 
1249  inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
1250  {
1251  xml_node_struct* parent = node->parent;
1252 
1253  child->parent = parent;
1254 
1255  if (node->next_sibling)
1256  node->next_sibling->prev_sibling_c = child;
1257  else
1258  parent->first_child->prev_sibling_c = child;
1259 
1260  child->next_sibling = node->next_sibling;
1261  child->prev_sibling_c = node;
1262 
1263  node->next_sibling = child;
1264  }
1265 
1266  inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
1267  {
1268  xml_node_struct* parent = node->parent;
1269 
1270  child->parent = parent;
1271 
1272  if (node->prev_sibling_c->next_sibling)
1273  node->prev_sibling_c->next_sibling = child;
1274  else
1275  parent->first_child = child;
1276 
1277  child->prev_sibling_c = node->prev_sibling_c;
1278  child->next_sibling = node;
1279 
1280  node->prev_sibling_c = child;
1281  }
1282 
1283  inline void remove_node(xml_node_struct* node)
1284  {
1285  xml_node_struct* parent = node->parent;
1286 
1287  if (node->next_sibling)
1288  node->next_sibling->prev_sibling_c = node->prev_sibling_c;
1289  else
1290  parent->first_child->prev_sibling_c = node->prev_sibling_c;
1291 
1292  if (node->prev_sibling_c->next_sibling)
1293  node->prev_sibling_c->next_sibling = node->next_sibling;
1294  else
1295  parent->first_child = node->next_sibling;
1296 
1297  node->parent = 0;
1298  node->prev_sibling_c = 0;
1299  node->next_sibling = 0;
1300  }
1301 
1302  inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1303  {
1304  xml_attribute_struct* head = node->first_attribute;
1305 
1306  if (head)
1307  {
1308  xml_attribute_struct* tail = head->prev_attribute_c;
1309 
1310  tail->next_attribute = attr;
1311  attr->prev_attribute_c = tail;
1312  head->prev_attribute_c = attr;
1313  }
1314  else
1315  {
1316  node->first_attribute = attr;
1317  attr->prev_attribute_c = attr;
1318  }
1319  }
1320 
1321  inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1322  {
1323  xml_attribute_struct* head = node->first_attribute;
1324 
1325  if (head)
1326  {
1327  attr->prev_attribute_c = head->prev_attribute_c;
1328  head->prev_attribute_c = attr;
1329  }
1330  else
1331  attr->prev_attribute_c = attr;
1332 
1333  attr->next_attribute = head;
1334  node->first_attribute = attr;
1335  }
1336 
1337  inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1338  {
1339  if (place->next_attribute)
1340  place->next_attribute->prev_attribute_c = attr;
1341  else
1342  node->first_attribute->prev_attribute_c = attr;
1343 
1344  attr->next_attribute = place->next_attribute;
1345  attr->prev_attribute_c = place;
1346  place->next_attribute = attr;
1347  }
1348 
1349  inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1350  {
1351  if (place->prev_attribute_c->next_attribute)
1352  place->prev_attribute_c->next_attribute = attr;
1353  else
1354  node->first_attribute = attr;
1355 
1356  attr->prev_attribute_c = place->prev_attribute_c;
1357  attr->next_attribute = place;
1358  place->prev_attribute_c = attr;
1359  }
1360 
1361  inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1362  {
1363  if (attr->next_attribute)
1364  attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
1365  else
1366  node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
1367 
1368  if (attr->prev_attribute_c->next_attribute)
1369  attr->prev_attribute_c->next_attribute = attr->next_attribute;
1370  else
1371  node->first_attribute = attr->next_attribute;
1372 
1373  attr->prev_attribute_c = 0;
1374  attr->next_attribute = 0;
1375  }
1376 
1377  PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
1378  {
1379  if (!alloc.reserve()) return 0;
1380 
1381  xml_node_struct* child = allocate_node(alloc, type);
1382  if (!child) return 0;
1383 
1384  append_node(child, node);
1385 
1386  return child;
1387  }
1388 
1389  PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
1390  {
1391  if (!alloc.reserve()) return 0;
1392 
1393  xml_attribute_struct* attr = allocate_attribute(alloc);
1394  if (!attr) return 0;
1395 
1396  append_attribute(attr, node);
1397 
1398  return attr;
1399  }
1401 
1402 // Helper classes for code generation
1404  struct opt_false
1405  {
1406  enum { value = 0 };
1407  };
1408 
1409  struct opt_true
1410  {
1411  enum { value = 1 };
1412  };
1414 
1415 // Unicode utilities
1417  inline uint16_t endian_swap(uint16_t value)
1418  {
1419  return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
1420  }
1421 
1422  inline uint32_t endian_swap(uint32_t value)
1423  {
1424  return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
1425  }
1426 
1428  {
1429  typedef size_t value_type;
1430 
1431  static value_type low(value_type result, uint32_t ch)
1432  {
1433  // U+0000..U+007F
1434  if (ch < 0x80) return result + 1;
1435  // U+0080..U+07FF
1436  else if (ch < 0x800) return result + 2;
1437  // U+0800..U+FFFF
1438  else return result + 3;
1439  }
1440 
1441  static value_type high(value_type result, uint32_t)
1442  {
1443  // U+10000..U+10FFFF
1444  return result + 4;
1445  }
1446  };
1447 
1449  {
1450  typedef uint8_t* value_type;
1451 
1452  static value_type low(value_type result, uint32_t ch)
1453  {
1454  // U+0000..U+007F
1455  if (ch < 0x80)
1456  {
1457  *result = static_cast<uint8_t>(ch);
1458  return result + 1;
1459  }
1460  // U+0080..U+07FF
1461  else if (ch < 0x800)
1462  {
1463  result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
1464  result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1465  return result + 2;
1466  }
1467  // U+0800..U+FFFF
1468  else
1469  {
1470  result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
1471  result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1472  result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1473  return result + 3;
1474  }
1475  }
1476 
1477  static value_type high(value_type result, uint32_t ch)
1478  {
1479  // U+10000..U+10FFFF
1480  result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
1481  result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
1482  result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1483  result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1484  return result + 4;
1485  }
1486 
1487  static value_type any(value_type result, uint32_t ch)
1488  {
1489  return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1490  }
1491  };
1492 
1494  {
1495  typedef size_t value_type;
1496 
1497  static value_type low(value_type result, uint32_t)
1498  {
1499  return result + 1;
1500  }
1501 
1502  static value_type high(value_type result, uint32_t)
1503  {
1504  return result + 2;
1505  }
1506  };
1507 
1509  {
1510  typedef uint16_t* value_type;
1511 
1512  static value_type low(value_type result, uint32_t ch)
1513  {
1514  *result = static_cast<uint16_t>(ch);
1515 
1516  return result + 1;
1517  }
1518 
1519  static value_type high(value_type result, uint32_t ch)
1520  {
1521  uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
1522  uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
1523 
1524  result[0] = static_cast<uint16_t>(0xD800 + msh);
1525  result[1] = static_cast<uint16_t>(0xDC00 + lsh);
1526 
1527  return result + 2;
1528  }
1529 
1530  static value_type any(value_type result, uint32_t ch)
1531  {
1532  return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1533  }
1534  };
1535 
1537  {
1538  typedef size_t value_type;
1539 
1540  static value_type low(value_type result, uint32_t)
1541  {
1542  return result + 1;
1543  }
1544 
1545  static value_type high(value_type result, uint32_t)
1546  {
1547  return result + 1;
1548  }
1549  };
1550 
1552  {
1553  typedef uint32_t* value_type;
1554 
1555  static value_type low(value_type result, uint32_t ch)
1556  {
1557  *result = ch;
1558 
1559  return result + 1;
1560  }
1561 
1562  static value_type high(value_type result, uint32_t ch)
1563  {
1564  *result = ch;
1565 
1566  return result + 1;
1567  }
1568 
1569  static value_type any(value_type result, uint32_t ch)
1570  {
1571  *result = ch;
1572 
1573  return result + 1;
1574  }
1575  };
1576 
1578  {
1579  typedef uint8_t* value_type;
1580 
1581  static value_type low(value_type result, uint32_t ch)
1582  {
1583  *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
1584 
1585  return result + 1;
1586  }
1587 
1588  static value_type high(value_type result, uint32_t ch)
1589  {
1590  (void)ch;
1591 
1592  *result = '?';
1593 
1594  return result + 1;
1595  }
1596  };
1597 
1599  {
1600  typedef uint8_t type;
1601 
1602  template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1603  {
1604  const uint8_t utf8_byte_mask = 0x3f;
1605 
1606  while (size)
1607  {
1608  uint8_t lead = *data;
1609 
1610  // 0xxxxxxx -> U+0000..U+007F
1611  if (lead < 0x80)
1612  {
1613  result = Traits::low(result, lead);
1614  data += 1;
1615  size -= 1;
1616 
1617  // process aligned single-byte (ascii) blocks
1618  if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
1619  {
1620  // round-trip through void* to silence 'cast increases required alignment of target type' warnings
1621  while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
1622  {
1623  result = Traits::low(result, data[0]);
1624  result = Traits::low(result, data[1]);
1625  result = Traits::low(result, data[2]);
1626  result = Traits::low(result, data[3]);
1627  data += 4;
1628  size -= 4;
1629  }
1630  }
1631  }
1632  // 110xxxxx -> U+0080..U+07FF
1633  else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
1634  {
1635  result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
1636  data += 2;
1637  size -= 2;
1638  }
1639  // 1110xxxx -> U+0800-U+FFFF
1640  else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
1641  {
1642  result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
1643  data += 3;
1644  size -= 3;
1645  }
1646  // 11110xxx -> U+10000..U+10FFFF
1647  else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
1648  {
1649  result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
1650  data += 4;
1651  size -= 4;
1652  }
1653  // 10xxxxxx or 11111xxx -> invalid
1654  else
1655  {
1656  data += 1;
1657  size -= 1;
1658  }
1659  }
1660 
1661  return result;
1662  }
1663  };
1664 
1665  template <typename opt_swap> struct utf16_decoder
1666  {
1667  typedef uint16_t type;
1668 
1669  template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
1670  {
1671  while (size)
1672  {
1673  uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
1674 
1675  // U+0000..U+D7FF
1676  if (lead < 0xD800)
1677  {
1678  result = Traits::low(result, lead);
1679  data += 1;
1680  size -= 1;
1681  }
1682  // U+E000..U+FFFF
1683  else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
1684  {
1685  result = Traits::low(result, lead);
1686  data += 1;
1687  size -= 1;
1688  }
1689  // surrogate pair lead
1690  else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
1691  {
1692  uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
1693 
1694  if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
1695  {
1696  result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
1697  data += 2;
1698  size -= 2;
1699  }
1700  else
1701  {
1702  data += 1;
1703  size -= 1;
1704  }
1705  }
1706  else
1707  {
1708  data += 1;
1709  size -= 1;
1710  }
1711  }
1712 
1713  return result;
1714  }
1715  };
1716 
1717  template <typename opt_swap> struct utf32_decoder
1718  {
1719  typedef uint32_t type;
1720 
1721  template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
1722  {
1723  while (size)
1724  {
1725  uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
1726 
1727  // U+0000..U+FFFF
1728  if (lead < 0x10000)
1729  {
1730  result = Traits::low(result, lead);
1731  data += 1;
1732  size -= 1;
1733  }
1734  // U+10000..U+10FFFF
1735  else
1736  {
1737  result = Traits::high(result, lead);
1738  data += 1;
1739  size -= 1;
1740  }
1741  }
1742 
1743  return result;
1744  }
1745  };
1746 
1748  {
1749  typedef uint8_t type;
1750 
1751  template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1752  {
1753  while (size)
1754  {
1755  result = Traits::low(result, *data);
1756  data += 1;
1757  size -= 1;
1758  }
1759 
1760  return result;
1761  }
1762  };
1763 
1764  template <size_t size> struct wchar_selector;
1765 
1766  template <> struct wchar_selector<2>
1767  {
1768  typedef uint16_t type;
1772  };
1773 
1774  template <> struct wchar_selector<4>
1775  {
1776  typedef uint32_t type;
1780  };
1781 
1784 
1786  {
1787  typedef wchar_t type;
1788 
1789  template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
1790  {
1792 
1793  return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
1794  }
1795  };
1796 
1797 #ifdef PUGIXML_WCHAR_MODE
1798  PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
1799  {
1800  for (size_t i = 0; i < length; ++i)
1801  result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
1802  }
1803 #endif
1805 
1808  {
1809  ct_parse_pcdata = 1, // \0, &, \r, <
1810  ct_parse_attr = 2, // \0, &, \r, ', "
1811  ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
1812  ct_space = 8, // \r, \n, space, tab
1813  ct_parse_cdata = 16, // \0, ], >, \r
1814  ct_parse_comment = 32, // \0, -, >, \r
1815  ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
1816  ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
1817  };
1818 
1819  static const unsigned char chartype_table[256] =
1820  {
1821  55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
1822  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
1823  8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
1824  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
1825  0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
1826  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
1827  0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
1828  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
1829 
1830  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
1831  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1832  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1833  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1834  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1835  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1836  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1837  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
1838  };
1839 
1841  {
1842  ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
1843  ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
1844  ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
1845  ctx_digit = 8, // 0-9
1846  ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
1847  };
1848 
1849  static const unsigned char chartypex_table[256] =
1850  {
1851  3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
1852  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
1853  0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
1854  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
1855 
1856  0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
1857  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
1858  0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
1859  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
1860 
1861  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
1862  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1863  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1864  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1865  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1866  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1867  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1868  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
1869  };
1870 
1871 #ifdef PUGIXML_WCHAR_MODE
1872  #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
1873 #else
1874  #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
1875 #endif
1876 
1877  #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
1878  #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
1879 
1881  {
1882  unsigned int ui = 1;
1883 
1884  return *reinterpret_cast<unsigned char*>(&ui) == 1;
1885  }
1886 
1888  {
1889  PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1890 
1891  if (sizeof(wchar_t) == 2)
1893  else
1895  }
1896 
1897  PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length)
1898  {
1899  #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; }
1900  #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; }
1901 
1902  // check if we have a non-empty XML declaration
1903  if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space)))
1904  return false;
1905 
1906  // scan XML declaration until the encoding field
1907  for (size_t i = 6; i + 1 < size; ++i)
1908  {
1909  // declaration can not contain ? in quoted values
1910  if (data[i] == '?')
1911  return false;
1912 
1913  if (data[i] == 'e' && data[i + 1] == 'n')
1914  {
1915  size_t offset = i;
1916 
1917  // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed
1920 
1921  // S? = S?
1923  PUGI__SCANCHAR('=');
1925 
1926  // the only two valid delimiters are ' and "
1927  uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\'';
1928 
1929  PUGI__SCANCHAR(delimiter);
1930 
1931  size_t start = offset;
1932 
1933  out_encoding = data + offset;
1934 
1936 
1937  out_length = offset - start;
1938 
1939  PUGI__SCANCHAR(delimiter);
1940 
1941  return true;
1942  }
1943  }
1944 
1945  return false;
1946 
1947  #undef PUGI__SCANCHAR
1948  #undef PUGI__SCANCHARTYPE
1949  }
1950 
1951  PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size)
1952  {
1953  // skip encoding autodetection if input buffer is too small
1954  if (size < 4) return encoding_utf8;
1955 
1956  uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1957 
1958  // look for BOM in first few bytes
1959  if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
1960  if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
1961  if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
1962  if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
1963  if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
1964 
1965  // look for <, <? or <?xm in various encodings
1966  if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
1967  if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
1968  if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
1969  if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
1970 
1971  // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
1972  if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
1973  if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
1974 
1975  // no known BOM detected; parse declaration
1976  const uint8_t* enc = 0;
1977  size_t enc_length = 0;
1978 
1979  if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length))
1980  {
1981  // iso-8859-1 (case-insensitive)
1982  if (enc_length == 10
1983  && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o'
1984  && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9'
1985  && enc[8] == '-' && enc[9] == '1')
1986  return encoding_latin1;
1987 
1988  // latin1 (case-insensitive)
1989  if (enc_length == 6
1990  && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't'
1991  && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n'
1992  && enc[5] == '1')
1993  return encoding_latin1;
1994  }
1995 
1996  return encoding_utf8;
1997  }
1998 
1999  PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
2000  {
2001  // replace wchar encoding with utf implementation
2002  if (encoding == encoding_wchar) return get_wchar_encoding();
2003 
2004  // replace utf16 encoding with utf16 with specific endianness
2006 
2007  // replace utf32 encoding with utf32 with specific endianness
2009 
2010  // only do autodetection if no explicit encoding is requested
2011  if (encoding != encoding_auto) return encoding;
2012 
2013  // try to guess encoding (based on XML specification, Appendix F.1)
2014  const uint8_t* data = static_cast<const uint8_t*>(contents);
2015 
2016  return guess_buffer_encoding(data, size);
2017  }
2018 
2019  PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2020  {
2021  size_t length = size / sizeof(char_t);
2022 
2023  if (is_mutable)
2024  {
2025  out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
2026  out_length = length;
2027  }
2028  else
2029  {
2030  char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2031  if (!buffer) return false;
2032 
2033  if (contents)
2034  memcpy(buffer, contents, length * sizeof(char_t));
2035  else
2036  assert(length == 0);
2037 
2038  buffer[length] = 0;
2039 
2040  out_buffer = buffer;
2041  out_length = length + 1;
2042  }
2043 
2044  return true;
2045  }
2046 
2047 #ifdef PUGIXML_WCHAR_MODE
2048  PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
2049  {
2050  return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
2051  (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
2052  }
2053 
2054  PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2055  {
2056  const char_t* data = static_cast<const char_t*>(contents);
2057  size_t length = size / sizeof(char_t);
2058 
2059  if (is_mutable)
2060  {
2061  char_t* buffer = const_cast<char_t*>(data);
2062 
2063  convert_wchar_endian_swap(buffer, data, length);
2064 
2065  out_buffer = buffer;
2066  out_length = length;
2067  }
2068  else
2069  {
2070  char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2071  if (!buffer) return false;
2072 
2073  convert_wchar_endian_swap(buffer, data, length);
2074  buffer[length] = 0;
2075 
2076  out_buffer = buffer;
2077  out_length = length + 1;
2078  }
2079 
2080  return true;
2081  }
2082 
2083  template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2084  {
2085  const typename D::type* data = static_cast<const typename D::type*>(contents);
2086  size_t data_length = size / sizeof(typename D::type);
2087 
2088  // first pass: get length in wchar_t units
2089  size_t length = D::process(data, data_length, 0, wchar_counter());
2090 
2091  // allocate buffer of suitable length
2092  char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2093  if (!buffer) return false;
2094 
2095  // second pass: convert utf16 input to wchar_t
2096  wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
2097  wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
2098 
2099  assert(oend == obegin + length);
2100  *oend = 0;
2101 
2102  out_buffer = buffer;
2103  out_length = length + 1;
2104 
2105  return true;
2106  }
2107 
2108  PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2109  {
2110  // get native encoding
2111  xml_encoding wchar_encoding = get_wchar_encoding();
2112 
2113  // fast path: no conversion required
2114  if (encoding == wchar_encoding)
2115  return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2116 
2117  // only endian-swapping is required
2118  if (need_endian_swap_utf(encoding, wchar_encoding))
2119  return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
2120 
2121  // source encoding is utf8
2122  if (encoding == encoding_utf8)
2123  return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
2124 
2125  // source encoding is utf16
2126  if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2127  {
2129 
2130  return (native_encoding == encoding) ?
2131  convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2132  convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2133  }
2134 
2135  // source encoding is utf32
2136  if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2137  {
2139 
2140  return (native_encoding == encoding) ?
2141  convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2142  convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2143  }
2144 
2145  // source encoding is latin1
2146  if (encoding == encoding_latin1)
2147  return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
2148 
2149  assert(false && "Invalid encoding");
2150  return false;
2151  }
2152 #else
2153  template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2154  {
2155  const typename D::type* data = static_cast<const typename D::type*>(contents);
2156  size_t data_length = size / sizeof(typename D::type);
2157 
2158  // first pass: get length in utf8 units
2159  size_t length = D::process(data, data_length, 0, utf8_counter());
2160 
2161  // allocate buffer of suitable length
2162  char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2163  if (!buffer) return false;
2164 
2165  // second pass: convert utf16 input to utf8
2166  uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2167  uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
2168 
2169  assert(oend == obegin + length);
2170  *oend = 0;
2171 
2172  out_buffer = buffer;
2173  out_length = length + 1;
2174 
2175  return true;
2176  }
2177 
2178  PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
2179  {
2180  for (size_t i = 0; i < size; ++i)
2181  if (data[i] > 127)
2182  return i;
2183 
2184  return size;
2185  }
2186 
2187  PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2188  {
2189  const uint8_t* data = static_cast<const uint8_t*>(contents);
2190  size_t data_length = size;
2191 
2192  // get size of prefix that does not need utf8 conversion
2193  size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
2194  assert(prefix_length <= data_length);
2195 
2196  const uint8_t* postfix = data + prefix_length;
2197  size_t postfix_length = data_length - prefix_length;
2198 
2199  // if no conversion is needed, just return the original buffer
2200  if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2201 
2202  // first pass: get length in utf8 units
2203  size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
2204 
2205  // allocate buffer of suitable length
2206  char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2207  if (!buffer) return false;
2208 
2209  // second pass: convert latin1 input to utf8
2210  memcpy(buffer, data, prefix_length);
2211 
2212  uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2213  uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
2214 
2215  assert(oend == obegin + length);
2216  *oend = 0;
2217 
2218  out_buffer = buffer;
2219  out_length = length + 1;
2220 
2221  return true;
2222  }
2223 
2224  PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2225  {
2226  // fast path: no conversion required
2227  if (encoding == encoding_utf8)
2228  return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2229 
2230  // source encoding is utf16
2231  if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2232  {
2234 
2235  return (native_encoding == encoding) ?
2236  convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2237  convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2238  }
2239 
2240  // source encoding is utf32
2241  if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2242  {
2244 
2245  return (native_encoding == encoding) ?
2246  convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2247  convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2248  }
2249 
2250  // source encoding is latin1
2251  if (encoding == encoding_latin1)
2252  return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
2253 
2254  assert(false && "Invalid encoding");
2255  return false;
2256  }
2257 #endif
2258 
2259  PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
2260  {
2261  // get length in utf8 characters
2262  return wchar_decoder::process(str, length, 0, utf8_counter());
2263  }
2264 
2265  PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
2266  {
2267  // convert to utf8
2268  uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
2269  uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
2270 
2271  assert(begin + size == end);
2272  (void)!end;
2273  (void)!size;
2274  }
2275 
2276 #ifndef PUGIXML_NO_STL
2277  PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
2278  {
2279  // first pass: get length in utf8 characters
2280  size_t size = as_utf8_begin(str, length);
2281 
2282  // allocate resulting string
2284  result.resize(size);
2285 
2286  // second pass: convert to utf8
2287  if (size > 0) as_utf8_end(&result[0], size, str, length);
2288 
2289  return result;
2290  }
2291 
2292  PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
2293  {
2294  const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
2295 
2296  // first pass: get length in wchar_t units
2297  size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
2298 
2299  // allocate resulting string
2300  std::basic_string<wchar_t> result;
2301  result.resize(length);
2302 
2303  // second pass: convert to wchar_t
2304  if (length > 0)
2305  {
2306  wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
2308 
2309  assert(begin + length == end);
2310  (void)!end;
2311  }
2312 
2313  return result;
2314  }
2315 #endif
2316 
2317  template <typename Header>
2318  inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
2319  {
2320  // never reuse shared memory
2321  if (header & xml_memory_page_contents_shared_mask) return false;
2322 
2323  size_t target_length = strlength(target);
2324 
2325  // always reuse document buffer memory if possible
2326  if ((header & header_mask) == 0) return target_length >= length;
2327 
2328  // reuse heap memory if waste is not too great
2329  const size_t reuse_threshold = 32;
2330 
2331  return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
2332  }
2333 
2334  template <typename String, typename Header>
2335  PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
2336  {
2337  if (source_length == 0)
2338  {
2339  // empty string and null pointer are equivalent, so just deallocate old memory
2340  xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2341 
2342  if (header & header_mask) alloc->deallocate_string(dest);
2343 
2344  // mark the string as not allocated
2345  dest = 0;
2346  header &= ~header_mask;
2347 
2348  return true;
2349  }
2350  else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
2351  {
2352  // we can reuse old buffer, so just copy the new data (including zero terminator)
2353  memcpy(dest, source, source_length * sizeof(char_t));
2354  dest[source_length] = 0;
2355 
2356  return true;
2357  }
2358  else
2359  {
2360  xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2361 
2362  if (!alloc->reserve()) return false;
2363 
2364  // allocate new buffer
2365  char_t* buf = alloc->allocate_string(source_length + 1);
2366  if (!buf) return false;
2367 
2368  // copy the string (including zero terminator)
2369  memcpy(buf, source, source_length * sizeof(char_t));
2370  buf[source_length] = 0;
2371 
2372  // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
2373  if (header & header_mask) alloc->deallocate_string(dest);
2374 
2375  // the string is now allocated, so set the flag
2376  dest = buf;
2377  header |= header_mask;
2378 
2379  return true;
2380  }
2381  }
2382 
2383  struct gap
2384  {
2386  size_t size;
2387 
2388  gap(): end(0), size(0)
2389  {
2390  }
2391 
2392  // Push new gap, move s count bytes further (skipping the gap).
2393  // Collapse previous gap.
2394  void push(char_t*& s, size_t count)
2395  {
2396  if (end) // there was a gap already; collapse it
2397  {
2398  // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
2399  assert(s >= end);
2400  memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2401  }
2402 
2403  s += count; // end of current gap
2404 
2405  // "merge" two gaps
2406  end = s;
2407  size += count;
2408  }
2409 
2410  // Collapse all gaps, return past-the-end pointer
2412  {
2413  if (end)
2414  {
2415  // Move [old_gap_end, current_pos) to [old_gap_start, ...)
2416  assert(s >= end);
2417  memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2418 
2419  return s - size;
2420  }
2421  else return s;
2422  }
2423  };
2424 
2426  {
2427  char_t* stre = s + 1;
2428 
2429  switch (*stre)
2430  {
2431  case '#': // &#...
2432  {
2433  unsigned int ucsc = 0;
2434 
2435  if (stre[1] == 'x') // &#x... (hex code)
2436  {
2437  stre += 2;
2438 
2439  char_t ch = *stre;
2440 
2441  if (ch == ';') return stre;
2442 
2443  for (;;)
2444  {
2445  if (static_cast<unsigned int>(ch - '0') <= 9)
2446  ucsc = 16 * ucsc + (ch - '0');
2447  else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
2448  ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
2449  else if (ch == ';')
2450  break;
2451  else // cancel
2452  return stre;
2453 
2454  ch = *++stre;
2455  }
2456 
2457  ++stre;
2458  }
2459  else // &#... (dec code)
2460  {
2461  char_t ch = *++stre;
2462 
2463  if (ch == ';') return stre;
2464 
2465  for (;;)
2466  {
2467  if (static_cast<unsigned int>(static_cast<unsigned int>(ch) - '0') <= 9)
2468  ucsc = 10 * ucsc + (ch - '0');
2469  else if (ch == ';')
2470  break;
2471  else // cancel
2472  return stre;
2473 
2474  ch = *++stre;
2475  }
2476 
2477  ++stre;
2478  }
2479 
2480  #ifdef PUGIXML_WCHAR_MODE
2481  s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
2482  #else
2483  s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
2484  #endif
2485 
2486  g.push(s, stre - s);
2487  return stre;
2488  }
2489 
2490  case 'a': // &a
2491  {
2492  ++stre;
2493 
2494  if (*stre == 'm') // &am
2495  {
2496  if (*++stre == 'p' && *++stre == ';') // &amp;
2497  {
2498  *s++ = '&';
2499  ++stre;
2500 
2501  g.push(s, stre - s);
2502  return stre;
2503  }
2504  }
2505  else if (*stre == 'p') // &ap
2506  {
2507  if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
2508  {
2509  *s++ = '\'';
2510  ++stre;
2511 
2512  g.push(s, stre - s);
2513  return stre;
2514  }
2515  }
2516  break;
2517  }
2518 
2519  case 'g': // &g
2520  {
2521  if (*++stre == 't' && *++stre == ';') // &gt;
2522  {
2523  *s++ = '>';
2524  ++stre;
2525 
2526  g.push(s, stre - s);
2527  return stre;
2528  }
2529  break;
2530  }
2531 
2532  case 'l': // &l
2533  {
2534  if (*++stre == 't' && *++stre == ';') // &lt;
2535  {
2536  *s++ = '<';
2537  ++stre;
2538 
2539  g.push(s, stre - s);
2540  return stre;
2541  }
2542  break;
2543  }
2544 
2545  case 'q': // &q
2546  {
2547  if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
2548  {
2549  *s++ = '"';
2550  ++stre;
2551 
2552  g.push(s, stre - s);
2553  return stre;
2554  }
2555  break;
2556  }
2557 
2558  default:
2559  break;
2560  }
2561 
2562  return stre;
2563  }
2564 
2565  // Parser utilities
2566  #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
2567  #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
2568  #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
2569  #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
2570  #define PUGI__POPNODE() { cursor = cursor->parent; }
2571  #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
2572  #define PUGI__SCANWHILE(X) { while (X) ++s; }
2573  #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
2574  #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
2575  #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
2576  #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
2577 
2579  {
2580  gap g;
2581 
2582  while (true)
2583  {
2585 
2586  if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2587  {
2588  *s++ = '\n'; // replace first one with 0x0a
2589 
2590  if (*s == '\n') g.push(s, 1);
2591  }
2592  else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
2593  {
2594  *g.flush(s) = 0;
2595 
2596  return s + (s[2] == '>' ? 3 : 2);
2597  }
2598  else if (*s == 0)
2599  {
2600  return 0;
2601  }
2602  else ++s;
2603  }
2604  }
2605 
2607  {
2608  gap g;
2609 
2610  while (true)
2611  {
2613 
2614  if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2615  {
2616  *s++ = '\n'; // replace first one with 0x0a
2617 
2618  if (*s == '\n') g.push(s, 1);
2619  }
2620  else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
2621  {
2622  *g.flush(s) = 0;
2623 
2624  return s + 1;
2625  }
2626  else if (*s == 0)
2627  {
2628  return 0;
2629  }
2630  else ++s;
2631  }
2632  }
2633 
2634  typedef char_t* (*strconv_pcdata_t)(char_t*);
2635 
2636  template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
2637  {
2638  static char_t* parse(char_t* s)
2639  {
2640  gap g;
2641 
2642  char_t* begin = s;
2643 
2644  while (true)
2645  {
2647 
2648  if (*s == '<') // PCDATA ends here
2649  {
2650  char_t* end = g.flush(s);
2651 
2652  if (opt_trim::value)
2653  while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2654  --end;
2655 
2656  *end = 0;
2657 
2658  return s + 1;
2659  }
2660  else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2661  {
2662  *s++ = '\n'; // replace first one with 0x0a
2663 
2664  if (*s == '\n') g.push(s, 1);
2665  }
2666  else if (opt_escape::value && *s == '&')
2667  {
2668  s = strconv_escape(s, g);
2669  }
2670  else if (*s == 0)
2671  {
2672  char_t* end = g.flush(s);
2673 
2674  if (opt_trim::value)
2675  while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2676  --end;
2677 
2678  *end = 0;
2679 
2680  return s;
2681  }
2682  else ++s;
2683  }
2684  }
2685  };
2686 
2688  {
2689  PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
2690 
2691  switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim)
2692  {
2701  default: assert(false); return 0; // should not get here
2702  }
2703  }
2704 
2705  typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
2706 
2707  template <typename opt_escape> struct strconv_attribute_impl
2708  {
2709  static char_t* parse_wnorm(char_t* s, char_t end_quote)
2710  {
2711  gap g;
2712 
2713  // trim leading whitespaces
2714  if (PUGI__IS_CHARTYPE(*s, ct_space))
2715  {
2716  char_t* str = s;
2717 
2718  do ++str;
2719  while (PUGI__IS_CHARTYPE(*str, ct_space));
2720 
2721  g.push(s, str - s);
2722  }
2723 
2724  while (true)
2725  {
2727 
2728  if (*s == end_quote)
2729  {
2730  char_t* str = g.flush(s);
2731 
2732  do *str-- = 0;
2733  while (PUGI__IS_CHARTYPE(*str, ct_space));
2734 
2735  return s + 1;
2736  }
2737  else if (PUGI__IS_CHARTYPE(*s, ct_space))
2738  {
2739  *s++ = ' ';
2740 
2741  if (PUGI__IS_CHARTYPE(*s, ct_space))
2742  {
2743  char_t* str = s + 1;
2744  while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
2745 
2746  g.push(s, str - s);
2747  }
2748  }
2749  else if (opt_escape::value && *s == '&')
2750  {
2751  s = strconv_escape(s, g);
2752  }
2753  else if (!*s)
2754  {
2755  return 0;
2756  }
2757  else ++s;
2758  }
2759  }
2760 
2761  static char_t* parse_wconv(char_t* s, char_t end_quote)
2762  {
2763  gap g;
2764 
2765  while (true)
2766  {
2768 
2769  if (*s == end_quote)
2770  {
2771  *g.flush(s) = 0;
2772 
2773  return s + 1;
2774  }
2775  else if (PUGI__IS_CHARTYPE(*s, ct_space))
2776  {
2777  if (*s == '\r')
2778  {
2779  *s++ = ' ';
2780 
2781  if (*s == '\n') g.push(s, 1);
2782  }
2783  else *s++ = ' ';
2784  }
2785  else if (opt_escape::value && *s == '&')
2786  {
2787  s = strconv_escape(s, g);
2788  }
2789  else if (!*s)
2790  {
2791  return 0;
2792  }
2793  else ++s;
2794  }
2795  }
2796 
2797  static char_t* parse_eol(char_t* s, char_t end_quote)
2798  {
2799  gap g;
2800 
2801  while (true)
2802  {
2804 
2805  if (*s == end_quote)
2806  {
2807  *g.flush(s) = 0;
2808 
2809  return s + 1;
2810  }
2811  else if (*s == '\r')
2812  {
2813  *s++ = '\n';
2814 
2815  if (*s == '\n') g.push(s, 1);
2816  }
2817  else if (opt_escape::value && *s == '&')
2818  {
2819  s = strconv_escape(s, g);
2820  }
2821  else if (!*s)
2822  {
2823  return 0;
2824  }
2825  else ++s;
2826  }
2827  }
2828 
2829  static char_t* parse_simple(char_t* s, char_t end_quote)
2830  {
2831  gap g;
2832 
2833  while (true)
2834  {
2836 
2837  if (*s == end_quote)
2838  {
2839  *g.flush(s) = 0;
2840 
2841  return s + 1;
2842  }
2843  else if (opt_escape::value && *s == '&')
2844  {
2845  s = strconv_escape(s, g);
2846  }
2847  else if (!*s)
2848  {
2849  return 0;
2850  }
2851  else ++s;
2852  }
2853  }
2854  };
2855 
2857  {
2859 
2860  switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
2861  {
2878  default: assert(false); return 0; // should not get here
2879  }
2880  }
2881 
2882  inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
2883  {
2884  xml_parse_result result;
2885  result.status = status;
2886  result.offset = offset;
2887 
2888  return result;
2889  }
2890 
2891  struct xml_parser
2892  {
2894  char_t* error_offset;
2896 
2898  {
2899  }
2900 
2901  // DOCTYPE consists of nested sections of the following possible types:
2902  // <!-- ... -->, <? ... ?>, "...", '...'
2903  // <![...]]>
2904  // <!...>
2905  // First group can not contain nested groups
2906  // Second group can contain nested groups of the same type
2907  // Third group can contain all other groups
2908  char_t* parse_doctype_primitive(char_t* s)
2909  {
2910  if (*s == '"' || *s == '\'')
2911  {
2912  // quoted string
2913  char_t ch = *s++;
2914  PUGI__SCANFOR(*s == ch);
2916 
2917  s++;
2918  }
2919  else if (s[0] == '<' && s[1] == '?')
2920  {
2921  // <? ... ?>
2922  s += 2;
2923  PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
2925 
2926  s += 2;
2927  }
2928  else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
2929  {
2930  s += 4;
2931  PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
2933 
2934  s += 3;
2935  }
2937 
2938  return s;
2939  }
2940 
2941  char_t* parse_doctype_ignore(char_t* s)
2942  {
2943  size_t depth = 0;
2944 
2945  assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
2946  s += 3;
2947 
2948  while (*s)
2949  {
2950  if (s[0] == '<' && s[1] == '!' && s[2] == '[')
2951  {
2952  // nested ignore section
2953  s += 3;
2954  depth++;
2955  }
2956  else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
2957  {
2958  // ignore section end
2959  s += 3;
2960 
2961  if (depth == 0)
2962  return s;
2963 
2964  depth--;
2965  }
2966  else s++;
2967  }
2968 
2970  }
2971 
2972  char_t* parse_doctype_group(char_t* s, char_t endch)
2973  {
2974  size_t depth = 0;
2975 
2976  assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
2977  s += 2;
2978 
2979  while (*s)
2980  {
2981  if (s[0] == '<' && s[1] == '!' && s[2] != '-')
2982  {
2983  if (s[2] == '[')
2984  {
2985  // ignore
2986  s = parse_doctype_ignore(s);
2987  if (!s) return s;
2988  }
2989  else
2990  {
2991  // some control group
2992  s += 2;
2993  depth++;
2994  }
2995  }
2996  else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
2997  {
2998  // unknown tag (forbidden), or some primitive group
2999  s = parse_doctype_primitive(s);
3000  if (!s) return s;
3001  }
3002  else if (*s == '>')
3003  {
3004  if (depth == 0)
3005  return s;
3006 
3007  depth--;
3008  s++;
3009  }
3010  else s++;
3011  }
3012 
3013  if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
3014 
3015  return s;
3016  }
3017 
3018  char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
3019  {
3020  // parse node contents, starting with exclamation mark
3021  ++s;
3022 
3023  if (*s == '-') // '<!-...'
3024  {
3025  ++s;
3026 
3027  if (*s == '-') // '<!--...'
3028  {
3029  ++s;
3030 
3032  {
3033  PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
3034  cursor->value = s; // Save the offset.
3035  }
3036 
3038  {
3039  s = strconv_comment(s, endch);
3040 
3041  if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
3042  }
3043  else
3044  {
3045  // Scan for terminating '-->'.
3046  PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
3048 
3050  *s = 0; // Zero-terminate this segment at the first terminating '-'.
3051 
3052  s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
3053  }
3054  }
3056  }
3057  else if (*s == '[')
3058  {
3059  // '<![CDATA[...'
3060  if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
3061  {
3062  ++s;
3063 
3065  {
3066  PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
3067  cursor->value = s; // Save the offset.
3068 
3069  if (PUGI__OPTSET(parse_eol))
3070  {
3071  s = strconv_cdata(s, endch);
3072 
3073  if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
3074  }
3075  else
3076  {
3077  // Scan for terminating ']]>'.
3078  PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3080 
3081  *s++ = 0; // Zero-terminate this segment.
3082  }
3083  }
3084  else // Flagged for discard, but we still have to scan for the terminator.
3085  {
3086  // Scan for terminating ']]>'.
3087  PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3089 
3090  ++s;
3091  }
3092 
3093  s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
3094  }
3096  }
3097  else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
3098  {
3099  s -= 2;
3100 
3101  if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
3102 
3103  char_t* mark = s + 9;
3104 
3105  s = parse_doctype_group(s, endch);
3106  if (!s) return s;
3107 
3108  assert((*s == 0 && endch == '>') || *s == '>');
3109  if (*s) *s++ = 0;
3110 
3112  {
3113  while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
3114 
3116 
3117  cursor->value = mark;
3118  }
3119  }
3120  else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
3121  else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
3123 
3124  return s;
3125  }
3126 
3127  char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
3128  {
3129  // load into registers
3130  xml_node_struct* cursor = ref_cursor;
3131  char_t ch = 0;
3132 
3133  // parse node contents, starting with question mark
3134  ++s;
3135 
3136  // read PI target
3137  char_t* target = s;
3138 
3140 
3143 
3144  // determine node type; stricmp / strcasecmp is not portable
3145  bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
3146 
3147  if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
3148  {
3149  if (declaration)
3150  {
3151  // disallow non top-level declarations
3152  if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
3153 
3155  }
3156  else
3157  {
3159  }
3160 
3161  cursor->name = target;
3162 
3163  PUGI__ENDSEG();
3164 
3165  // parse value/attributes
3166  if (ch == '?')
3167  {
3168  // empty node
3169  if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
3170  s += (*s == '>');
3171 
3172  PUGI__POPNODE();
3173  }
3174  else if (PUGI__IS_CHARTYPE(ch, ct_space))
3175  {
3176  PUGI__SKIPWS();
3177 
3178  // scan for tag end
3179  char_t* value = s;
3180 
3181  PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3183 
3184  if (declaration)
3185  {
3186  // replace ending ? with / so that 'element' terminates properly
3187  *s = '/';
3188 
3189  // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
3190  s = value;
3191  }
3192  else
3193  {
3194  // store value and step over >
3195  cursor->value = value;
3196 
3197  PUGI__POPNODE();
3198 
3199  PUGI__ENDSEG();
3200 
3201  s += (*s == '>');
3202  }
3203  }
3205  }
3206  else
3207  {
3208  // scan for tag end
3209  PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3211 
3212  s += (s[1] == '>' ? 2 : 1);
3213  }
3214 
3215  // store from registers
3216  ref_cursor = cursor;
3217 
3218  return s;
3219  }
3220 
3221  char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
3222  {
3223  strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
3224  strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
3225 
3226  char_t ch = 0;
3227  xml_node_struct* cursor = root;
3228  char_t* mark = s;
3229 
3230  while (*s != 0)
3231  {
3232  if (*s == '<')
3233  {
3234  ++s;
3235 
3236  LOC_TAG:
3237  if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
3238  {
3239  PUGI__PUSHNODE(node_element); // Append a new node to the tree.
3240 
3241  cursor->name = s;
3242 
3243  PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3244  PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3245 
3246  if (ch == '>')
3247  {
3248  // end of tag
3249  }
3250  else if (PUGI__IS_CHARTYPE(ch, ct_space))
3251  {
3252  LOC_ATTRIBUTES:
3253  while (true)
3254  {
3255  PUGI__SKIPWS(); // Eat any whitespace.
3256 
3257  if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
3258  {
3259  xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute.
3261 
3262  a->name = s; // Save the offset.
3263 
3264  PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3265  PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3266 
3267  if (PUGI__IS_CHARTYPE(ch, ct_space))
3268  {
3269  PUGI__SKIPWS(); // Eat any whitespace.
3270 
3271  ch = *s;
3272  ++s;
3273  }
3274 
3275  if (ch == '=') // '<... #=...'
3276  {
3277  PUGI__SKIPWS(); // Eat any whitespace.
3278 
3279  if (*s == '"' || *s == '\'') // '<... #="...'
3280  {
3281  ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
3282  ++s; // Step over the quote.
3283  a->value = s; // Save the offset.
3284 
3285  s = strconv_attribute(s, ch);
3286 
3287  if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
3288 
3289  // After this line the loop continues from the start;
3290  // Whitespaces, / and > are ok, symbols and EOF are wrong,
3291  // everything else will be detected
3293  }
3295  }
3297  }
3298  else if (*s == '/')
3299  {
3300  ++s;
3301 
3302  if (*s == '>')
3303  {
3304  PUGI__POPNODE();
3305  s++;
3306  break;
3307  }
3308  else if (*s == 0 && endch == '>')
3309  {
3310  PUGI__POPNODE();
3311  break;
3312  }
3314  }
3315  else if (*s == '>')
3316  {
3317  ++s;
3318 
3319  break;
3320  }
3321  else if (*s == 0 && endch == '>')
3322  {
3323  break;
3324  }
3326  }
3327 
3328  // !!!
3329  }
3330  else if (ch == '/') // '<#.../'
3331  {
3333 
3334  PUGI__POPNODE(); // Pop.
3335 
3336  s += (*s == '>');
3337  }
3338  else if (ch == 0)
3339  {
3340  // we stepped over null terminator, backtrack & handle closing tag
3341  --s;
3342 
3343  if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
3344  }
3346  }
3347  else if (*s == '/')
3348  {
3349  ++s;
3350 
3351  mark = s;
3352 
3353  char_t* name = cursor->name;
3355 
3356  while (PUGI__IS_CHARTYPE(*s, ct_symbol))
3357  {
3358  if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3359  }
3360 
3361  if (*name)
3362  {
3363  if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
3365  }
3366 
3367  PUGI__POPNODE(); // Pop.
3368 
3369  PUGI__SKIPWS();
3370 
3371  if (*s == 0)
3372  {
3373  if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3374  }
3375  else
3376  {
3377  if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3378  ++s;
3379  }
3380  }
3381  else if (*s == '?') // '<?...'
3382  {
3383  s = parse_question(s, cursor, optmsk, endch);
3384  if (!s) return s;
3385 
3386  assert(cursor);
3387  if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
3388  }
3389  else if (*s == '!') // '<!...'
3390  {
3391  s = parse_exclamation(s, cursor, optmsk, endch);
3392  if (!s) return s;
3393  }
3394  else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
3396  }
3397  else
3398  {
3399  mark = s; // Save this offset while searching for a terminator.
3400 
3401  PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
3402 
3403  if (*s == '<' || !*s)
3404  {
3405  // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
3406  assert(mark != s);
3407 
3409  {
3410  continue;
3411  }
3413  {
3414  if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
3415  }
3416  }
3417 
3419  s = mark;
3420 
3421  if (cursor->parent || PUGI__OPTSET(parse_fragment))
3422  {
3423  if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
3424  {
3425  cursor->value = s; // Save the offset.
3426  }
3427  else
3428  {
3429  PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
3430 
3431  cursor->value = s; // Save the offset.
3432 
3433  PUGI__POPNODE(); // Pop since this is a standalone.
3434  }
3435 
3436  s = strconv_pcdata(s);
3437 
3438  if (!*s) break;
3439  }
3440  else
3441  {
3442  PUGI__SCANFOR(*s == '<'); // '...<'
3443  if (!*s) break;
3444 
3445  ++s;
3446  }
3447 
3448  // We're after '<'
3449  goto LOC_TAG;
3450  }
3451  }
3452 
3453  // check that last tag is closed
3454  if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3455 
3456  return s;
3457  }
3458 
3459  #ifdef PUGIXML_WCHAR_MODE
3460  static char_t* parse_skip_bom(char_t* s)
3461  {
3462  unsigned int bom = 0xfeff;
3463  return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
3464  }
3465  #else
3466  static char_t* parse_skip_bom(char_t* s)
3467  {
3468  return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
3469  }
3470  #endif
3471 
3472  static bool has_element_node_siblings(xml_node_struct* node)
3473  {
3474  while (node)
3475  {
3476  if (PUGI__NODETYPE(node) == node_element) return true;
3477 
3478  node = node->next_sibling;
3479  }
3480 
3481  return false;
3482  }
3483 
3484  static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
3485  {
3486  // early-out for empty documents
3487  if (length == 0)
3489 
3490  // get last child of the root before parsing
3491  xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
3492 
3493  // create parser on stack
3494  xml_parser parser(static_cast<xml_allocator*>(xmldoc));
3495 
3496  // save last character and make buffer zero-terminated (speeds up parsing)
3497  char_t endch = buffer[length - 1];
3498  buffer[length - 1] = 0;
3499 
3500  // skip BOM to make sure it does not end up as part of parse output
3501  char_t* buffer_data = parse_skip_bom(buffer);
3502 
3503  // perform actual parsing
3504  parser.parse_tree(buffer_data, root, optmsk, endch);
3505 
3506  xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
3507  assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
3508 
3509  if (result)
3510  {
3511  // since we removed last character, we have to handle the only possible false positive (stray <)
3512  if (endch == '<')
3513  return make_parse_result(status_unrecognized_tag, length - 1);
3514 
3515  // check if there are any element nodes parsed
3516  xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
3517 
3518  if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
3519  return make_parse_result(status_no_document_element, length - 1);
3520  }
3521  else
3522  {
3523  // roll back offset if it occurs on a null terminator in the source buffer
3524  if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
3525  result.offset--;
3526  }
3527 
3528  return result;
3529  }
3530  };
3531 
3532  // Output facilities
3534  {
3535  #ifdef PUGIXML_WCHAR_MODE
3536  return get_wchar_encoding();
3537  #else
3538  return encoding_utf8;
3539  #endif
3540  }
3541 
3543  {
3544  // replace wchar encoding with utf implementation
3545  if (encoding == encoding_wchar) return get_wchar_encoding();
3546 
3547  // replace utf16 encoding with utf16 with specific endianness
3549 
3550  // replace utf32 encoding with utf32 with specific endianness
3552 
3553  // only do autodetection if no explicit encoding is requested
3554  if (encoding != encoding_auto) return encoding;
3555 
3556  // assume utf8 encoding
3557  return encoding_utf8;
3558  }
3559 
3560  template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
3561  {
3562  PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3563 
3564  typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3565 
3566  return static_cast<size_t>(end - dest) * sizeof(*dest);
3567  }
3568 
3569  template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
3570  {
3571  PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3572 
3573  typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3574 
3575  if (opt_swap)
3576  {
3577  for (typename T::value_type i = dest; i != end; ++i)
3578  *i = endian_swap(*i);
3579  }
3580 
3581  return static_cast<size_t>(end - dest) * sizeof(*dest);
3582  }
3583 
3584 #ifdef PUGIXML_WCHAR_MODE
3585  PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3586  {
3587  if (length < 1) return 0;
3588 
3589  // discard last character if it's the lead of a surrogate pair
3590  return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
3591  }
3592 
3593  PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3594  {
3595  // only endian-swapping is required
3596  if (need_endian_swap_utf(encoding, get_wchar_encoding()))
3597  {
3598  convert_wchar_endian_swap(r_char, data, length);
3599 
3600  return length * sizeof(char_t);
3601  }
3602 
3603  // convert to utf8
3604  if (encoding == encoding_utf8)
3605  return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
3606 
3607  // convert to utf16
3608  if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3609  {
3611 
3612  return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
3613  }
3614 
3615  // convert to utf32
3616  if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3617  {
3619 
3620  return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
3621  }
3622 
3623  // convert to latin1
3624  if (encoding == encoding_latin1)
3625  return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
3626 
3627  assert(false && "Invalid encoding");
3628  return 0;
3629  }
3630 #else
3631  PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3632  {
3633  if (length < 5) return 0;
3634 
3635  for (size_t i = 1; i <= 4; ++i)
3636  {
3637  uint8_t ch = static_cast<uint8_t>(data[length - i]);
3638 
3639  // either a standalone character or a leading one
3640  if ((ch & 0xc0) != 0x80) return length - i;
3641  }
3642 
3643  // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
3644  return length;
3645  }
3646 
3647  PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3648  {
3649  if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3650  {
3652 
3653  return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
3654  }
3655 
3656  if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3657  {
3659 
3660  return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
3661  }
3662 
3663  if (encoding == encoding_latin1)
3664  return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
3665 
3666  assert(false && "Invalid encoding");
3667  return 0;
3668  }
3669 #endif
3670 
3672  {
3674  xml_buffered_writer& operator=(const xml_buffered_writer&);
3675 
3676  public:
3677  xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
3678  {
3680  }
3681 
3682  size_t flush()
3683  {
3684  flush(buffer, bufsize);
3685  bufsize = 0;
3686  return 0;
3687  }
3688 
3689  void flush(const char_t* data, size_t size)
3690  {
3691  if (size == 0) return;
3692 
3693  // fast path, just write data
3694  if (encoding == get_write_native_encoding())
3695  writer.write(data, size * sizeof(char_t));
3696  else
3697  {
3698  // convert chunk
3699  size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
3700  assert(result <= sizeof(scratch));
3701 
3702  // write data
3703  writer.write(scratch.data_u8, result);
3704  }
3705  }
3706 
3707  void write_direct(const char_t* data, size_t length)
3708  {
3709  // flush the remaining buffer contents
3710  flush();
3711 
3712  // handle large chunks
3713  if (length > bufcapacity)
3714  {
3715  if (encoding == get_write_native_encoding())
3716  {
3717  // fast path, can just write data chunk
3718  writer.write(data, length * sizeof(char_t));
3719  return;
3720  }
3721 
3722  // need to convert in suitable chunks
3723  while (length > bufcapacity)
3724  {
3725  // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
3726  // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
3727  size_t chunk_size = get_valid_length(data, bufcapacity);
3728  assert(chunk_size);
3729 
3730  // convert chunk and write
3731  flush(data, chunk_size);
3732 
3733  // iterate
3734  data += chunk_size;
3735  length -= chunk_size;
3736  }
3737 
3738  // small tail is copied below
3739  bufsize = 0;
3740  }
3741 
3742  memcpy(buffer + bufsize, data, length * sizeof(char_t));
3743  bufsize += length;
3744  }
3745 
3746  void write_buffer(const char_t* data, size_t length)
3747  {
3748  size_t offset = bufsize;
3749 
3750  if (offset + length <= bufcapacity)
3751  {
3752  memcpy(buffer + offset, data, length * sizeof(char_t));
3753  bufsize = offset + length;
3754  }
3755  else
3756  {
3757  write_direct(data, length);
3758  }
3759  }
3760 
3761  void write_string(const char_t* data)
3762  {
3763  // write the part of the string that fits in the buffer
3764  size_t offset = bufsize;
3765 
3766  while (*data && offset < bufcapacity)
3767  buffer[offset++] = *data++;
3768 
3769  // write the rest
3770  if (offset < bufcapacity)
3771  {
3772  bufsize = offset;
3773  }
3774  else
3775  {
3776  // backtrack a bit if we have split the codepoint
3777  size_t length = offset - bufsize;
3778  size_t extra = length - get_valid_length(data - length, length);
3779 
3780  bufsize = offset - extra;
3781 
3782  write_direct(data - extra, strlength(data) + extra);
3783  }
3784  }
3785 
3786  void write(char_t d0)
3787  {
3788  size_t offset = bufsize;
3789  if (offset > bufcapacity - 1) offset = flush();
3790 
3791  buffer[offset + 0] = d0;
3792  bufsize = offset + 1;
3793  }
3794 
3795  void write(char_t d0, char_t d1)
3796  {
3797  size_t offset = bufsize;
3798  if (offset > bufcapacity - 2) offset = flush();
3799 
3800  buffer[offset + 0] = d0;
3801  buffer[offset + 1] = d1;
3802  bufsize = offset + 2;
3803  }
3804 
3805  void write(char_t d0, char_t d1, char_t d2)
3806  {
3807  size_t offset = bufsize;
3808  if (offset > bufcapacity - 3) offset = flush();
3809 
3810  buffer[offset + 0] = d0;
3811  buffer[offset + 1] = d1;
3812  buffer[offset + 2] = d2;
3813  bufsize = offset + 3;
3814  }
3815 
3816  void write(char_t d0, char_t d1, char_t d2, char_t d3)
3817  {
3818  size_t offset = bufsize;
3819  if (offset > bufcapacity - 4) offset = flush();
3820 
3821  buffer[offset + 0] = d0;
3822  buffer[offset + 1] = d1;
3823  buffer[offset + 2] = d2;
3824  buffer[offset + 3] = d3;
3825  bufsize = offset + 4;
3826  }
3827 
3828  void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
3829  {
3830  size_t offset = bufsize;
3831  if (offset > bufcapacity - 5) offset = flush();
3832 
3833  buffer[offset + 0] = d0;
3834  buffer[offset + 1] = d1;
3835  buffer[offset + 2] = d2;
3836  buffer[offset + 3] = d3;
3837  buffer[offset + 4] = d4;
3838  bufsize = offset + 5;
3839  }
3840 
3841  void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
3842  {
3843  size_t offset = bufsize;
3844  if (offset > bufcapacity - 6) offset = flush();
3845 
3846  buffer[offset + 0] = d0;
3847  buffer[offset + 1] = d1;
3848  buffer[offset + 2] = d2;
3849  buffer[offset + 3] = d3;
3850  buffer[offset + 4] = d4;
3851  buffer[offset + 5] = d5;
3852  bufsize = offset + 6;
3853  }
3854 
3855  // utf8 maximum expansion: x4 (-> utf32)
3856  // utf16 maximum expansion: x2 (-> utf32)
3857  // utf32 maximum expansion: x1
3858  enum
3859  {
3861  #ifdef PUGIXML_MEMORY_OUTPUT_STACK
3862  PUGIXML_MEMORY_OUTPUT_STACK
3863  #else
3864  10240
3865  #endif
3866  ,
3868  };
3869 
3870  char_t buffer[bufcapacity];
3871 
3872  union
3873  {
3874  uint8_t data_u8[4 * bufcapacity];
3875  uint16_t data_u16[2 * bufcapacity];
3878  } scratch;
3879 
3880  xml_writer& writer;
3881  size_t bufsize;
3883  };
3884 
3886  {
3887  while (*s)
3888  {
3889  const char_t* prev = s;
3890 
3891  // While *s is a usual symbol
3893 
3894  writer.write_buffer(prev, static_cast<size_t>(s - prev));
3895 
3896  switch (*s)
3897  {
3898  case 0: break;
3899  case '&':
3900  writer.write('&', 'a', 'm', 'p', ';');
3901  ++s;
3902  break;
3903  case '<':
3904  writer.write('&', 'l', 't', ';');
3905  ++s;
3906  break;
3907  case '>':
3908  writer.write('&', 'g', 't', ';');
3909  ++s;
3910  break;
3911  case '"':
3912  writer.write('&', 'q', 'u', 'o', 't', ';');
3913  ++s;
3914  break;
3915  default: // s is not a usual symbol
3916  {
3917  unsigned int ch = static_cast<unsigned int>(*s++);
3918  assert(ch < 32);
3919 
3920  writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
3921  }
3922  }
3923  }
3924  }
3925 
3926  PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3927  {
3928  if (flags & format_no_escapes)
3929  writer.write_string(s);
3930  else
3931  text_output_escaped(writer, s, type);
3932  }
3933 
3935  {
3936  do
3937  {
3938  writer.write('<', '!', '[', 'C', 'D');
3939  writer.write('A', 'T', 'A', '[');
3940 
3941  const char_t* prev = s;
3942 
3943  // look for ]]> sequence - we can't output it as is since it terminates CDATA
3944  while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
3945 
3946  // skip ]] if we stopped at ]]>, > will go to the next CDATA section
3947  if (*s) s += 2;
3948 
3949  writer.write_buffer(prev, static_cast<size_t>(s - prev));
3950 
3951  writer.write(']', ']', '>');
3952  }
3953  while (*s);
3954  }
3955 
3956  PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
3957  {
3958  switch (indent_length)
3959  {
3960  case 1:
3961  {
3962  for (unsigned int i = 0; i < depth; ++i)
3963  writer.write(indent[0]);
3964  break;
3965  }
3966 
3967  case 2:
3968  {
3969  for (unsigned int i = 0; i < depth; ++i)
3970  writer.write(indent[0], indent[1]);
3971  break;
3972  }
3973 
3974  case 3:
3975  {
3976  for (unsigned int i = 0; i < depth; ++i)
3977  writer.write(indent[0], indent[1], indent[2]);
3978  break;
3979  }
3980 
3981  case 4:
3982  {
3983  for (unsigned int i = 0; i < depth; ++i)
3984  writer.write(indent[0], indent[1], indent[2], indent[3]);
3985  break;
3986  }
3987 
3988  default:
3989  {
3990  for (unsigned int i = 0; i < depth; ++i)
3991  writer.write_buffer(indent, indent_length);
3992  }
3993  }
3994  }
3995 
3997  {
3998  writer.write('<', '!', '-', '-');
3999 
4000  while (*s)
4001  {
4002  const char_t* prev = s;
4003 
4004  // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
4005  while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
4006 
4007  writer.write_buffer(prev, static_cast<size_t>(s - prev));
4008 
4009  if (*s)
4010  {
4011  assert(*s == '-');
4012 
4013  writer.write('-', ' ');
4014  ++s;
4015  }
4016  }
4017 
4018  writer.write('-', '-', '>');
4019  }
4020 
4022  {
4023  while (*s)
4024  {
4025  const char_t* prev = s;
4026 
4027  // look for ?> sequence - we can't output it since ?> terminates PI
4028  while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
4029 
4030  writer.write_buffer(prev, static_cast<size_t>(s - prev));
4031 
4032  if (*s)
4033  {
4034  assert(s[0] == '?' && s[1] == '>');
4035 
4036  writer.write('?', ' ', '>');
4037  s += 2;
4038  }
4039  }
4040  }
4041 
4042  PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4043  {
4044  const char_t* default_name = PUGIXML_TEXT(":anonymous");
4045 
4046  for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4047  {
4049  {
4050  writer.write('\n');
4051 
4052  text_output_indent(writer, indent, indent_length, depth + 1);
4053  }
4054  else
4055  {
4056  writer.write(' ');
4057  }
4058 
4059  writer.write_string(a->name ? a->name + 0 : default_name);
4060  writer.write('=', '"');
4061 
4062  if (a->value)
4063  text_output(writer, a->value, ctx_special_attr, flags);
4064 
4065  writer.write('"');
4066  }
4067  }
4068 
4069  PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4070  {
4071  const char_t* default_name = PUGIXML_TEXT(":anonymous");
4072  const char_t* name = node->name ? node->name + 0 : default_name;
4073 
4074  writer.write('<');
4075  writer.write_string(name);
4076 
4077  if (node->first_attribute)
4078  node_output_attributes(writer, node, indent, indent_length, flags, depth);
4079 
4080  // element nodes can have value if parse_embed_pcdata was used
4081  if (!node->value)
4082  {
4083  if (!node->first_child)
4084  {
4085  if (flags & format_no_empty_element_tags)
4086  {
4087  writer.write('>', '<', '/');
4088  writer.write_string(name);
4089  writer.write('>');
4090 
4091  return false;
4092  }
4093  else
4094  {
4095  if ((flags & format_raw) == 0)
4096  writer.write(' ');
4097 
4098  writer.write('/', '>');
4099 
4100  return false;
4101  }
4102  }
4103  else
4104  {
4105  writer.write('>');
4106 
4107  return true;
4108  }
4109  }
4110  else
4111  {
4112  writer.write('>');
4113 
4114  text_output(writer, node->value, ctx_special_pcdata, flags);
4115 
4116  if (!node->first_child)
4117  {
4118  writer.write('<', '/');
4119  writer.write_string(name);
4120  writer.write('>');
4121 
4122  return false;
4123  }
4124  else
4125  {
4126  return true;
4127  }
4128  }
4129  }
4130 
4131  PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
4132  {
4133  const char_t* default_name = PUGIXML_TEXT(":anonymous");
4134  const char_t* name = node->name ? node->name + 0 : default_name;
4135 
4136  writer.write('<', '/');
4137  writer.write_string(name);
4138  writer.write('>');
4139  }
4140 
4141  PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
4142  {
4143  const char_t* default_name = PUGIXML_TEXT(":anonymous");
4144 
4145  switch (PUGI__NODETYPE(node))
4146  {
4147  case node_pcdata:
4148  text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
4149  break;
4150 
4151  case node_cdata:
4152  text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4153  break;
4154 
4155  case node_comment:
4156  node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4157  break;
4158 
4159  case node_pi:
4160  writer.write('<', '?');
4161  writer.write_string(node->name ? node->name + 0 : default_name);
4162 
4163  if (node->value)
4164  {
4165  writer.write(' ');
4166  node_output_pi_value(writer, node->value);
4167  }
4168 
4169  writer.write('?', '>');
4170  break;
4171 
4172  case node_declaration:
4173  writer.write('<', '?');
4174  writer.write_string(node->name ? node->name + 0 : default_name);
4175  node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
4176  writer.write('?', '>');
4177  break;
4178 
4179  case node_doctype:
4180  writer.write('<', '!', 'D', 'O', 'C');
4181  writer.write('T', 'Y', 'P', 'E');
4182 
4183  if (node->value)
4184  {
4185  writer.write(' ');
4186  writer.write_string(node->value);
4187  }
4188 
4189  writer.write('>');
4190  break;
4191 
4192  default:
4193  assert(false && "Invalid node type");
4194  }
4195  }
4196 
4198  {
4201  };
4202 
4203  PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
4204  {
4205  size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
4206  unsigned int indent_flags = indent_indent;
4207 
4208  xml_node_struct* node = root;
4209 
4210  do
4211  {
4212  assert(node);
4213 
4214  // begin writing current node
4215  if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
4216  {
4217  node_output_simple(writer, node, flags);
4218 
4219  indent_flags = 0;
4220  }
4221  else
4222  {
4223  if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4224  writer.write('\n');
4225 
4226  if ((indent_flags & indent_indent) && indent_length)
4227  text_output_indent(writer, indent, indent_length, depth);
4228 
4229  if (PUGI__NODETYPE(node) == node_element)
4230  {
4231  indent_flags = indent_newline | indent_indent;
4232 
4233  if (node_output_start(writer, node, indent, indent_length, flags, depth))
4234  {
4235  // element nodes can have value if parse_embed_pcdata was used
4236  if (node->value)
4237  indent_flags = 0;
4238 
4239  node = node->first_child;
4240  depth++;
4241  continue;
4242  }
4243  }
4244  else if (PUGI__NODETYPE(node) == node_document)
4245  {
4246  indent_flags = indent_indent;
4247 
4248  if (node->first_child)
4249  {
4250  node = node->first_child;
4251  continue;
4252  }
4253  }
4254  else
4255  {
4256  node_output_simple(writer, node, flags);
4257 
4258  indent_flags = indent_newline | indent_indent;
4259  }
4260  }
4261 
4262  // continue to the next node
4263  while (node != root)
4264  {
4265  if (node->next_sibling)
4266  {
4267  node = node->next_sibling;
4268  break;
4269  }
4270 
4271  node = node->parent;
4272 
4273  // write closing node
4274  if (PUGI__NODETYPE(node) == node_element)
4275  {
4276  depth--;
4277 
4278  if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4279  writer.write('\n');
4280 
4281  if ((indent_flags & indent_indent) && indent_length)
4282  text_output_indent(writer, indent, indent_length, depth);
4283 
4284  node_output_end(writer, node);
4285 
4286  indent_flags = indent_newline | indent_indent;
4287  }
4288  }
4289  }
4290  while (node != root);
4291 
4292  if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4293  writer.write('\n');
4294  }
4295 
4296  PUGI__FN bool has_declaration(xml_node_struct* node)
4297  {
4298  for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
4299  {
4301 
4302  if (type == node_declaration) return true;
4303  if (type == node_element) return false;
4304  }
4305 
4306  return false;
4307  }
4308 
4309  PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
4310  {
4311  for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4312  if (a == attr)
4313  return true;
4314 
4315  return false;
4316  }
4317 
4319  {
4320  return parent == node_element || parent == node_declaration;
4321  }
4322 
4324  {
4325  if (parent != node_document && parent != node_element) return false;
4326  if (child == node_document || child == node_null) return false;
4327  if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
4328 
4329  return true;
4330  }
4331 
4332  PUGI__FN bool allow_move(xml_node parent, xml_node child)
4333  {
4334  // check that child can be a child of parent
4335  if (!allow_insert_child(parent.type(), child.type()))
4336  return false;
4337 
4338  // check that node is not moved between documents
4339  if (parent.root() != child.root())
4340  return false;
4341 
4342  // check that new parent is not in the child subtree
4343  xml_node cur = parent;
4344 
4345  while (cur)
4346  {
4347  if (cur == child)
4348  return false;
4349 
4350  cur = cur.parent();
4351  }
4352 
4353  return true;
4354  }
4355 
4356  template <typename String, typename Header>
4357  PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
4358  {
4359  assert(!dest && (header & header_mask) == 0);
4360 
4361  if (source)
4362  {
4363  if (alloc && (source_header & header_mask) == 0)
4364  {
4365  dest = source;
4366 
4367  // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
4368  header |= xml_memory_page_contents_shared_mask;
4369  source_header |= xml_memory_page_contents_shared_mask;
4370  }
4371  else
4372  strcpy_insitu(dest, header, header_mask, source, strlength(source));
4373  }
4374  }
4375 
4376  PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
4377  {
4378  node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
4379  node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
4380 
4381  for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
4382  {
4383  xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
4384 
4385  if (da)
4386  {
4387  node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4388  node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4389  }
4390  }
4391  }
4392 
4393  PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
4394  {
4395  xml_allocator& alloc = get_allocator(dn);
4396  xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
4397 
4398  node_copy_contents(dn, sn, shared_alloc);
4399 
4400  xml_node_struct* dit = dn;
4401  xml_node_struct* sit = sn->first_child;
4402 
4403  while (sit && sit != sn)
4404  {
4405  if (sit != dn)
4406  {
4407  xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
4408 
4409  if (copy)
4410  {
4411  node_copy_contents(copy, sit, shared_alloc);
4412 
4413  if (sit->first_child)
4414  {
4415  dit = copy;
4416  sit = sit->first_child;
4417  continue;
4418  }
4419  }
4420  }
4421 
4422  // continue to the next node
4423  do
4424  {
4425  if (sit->next_sibling)
4426  {
4427  sit = sit->next_sibling;
4428  break;
4429  }
4430 
4431  sit = sit->parent;
4432  dit = dit->parent;
4433  }
4434  while (sit != sn);
4435  }
4436  }
4437 
4438  PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
4439  {
4440  xml_allocator& alloc = get_allocator(da);
4441  xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
4442 
4443  node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4444  node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4445  }
4446 
4447  inline bool is_text_node(xml_node_struct* node)
4448  {
4450 
4451  return type == node_pcdata || type == node_cdata;
4452  }
4453 
4454  // get value with conversion functions
4455  template <typename U> U string_to_integer(const char_t* value, U minneg, U maxpos)
4456  {
4457  U result = 0;
4458  const char_t* s = value;
4459 
4460  while (PUGI__IS_CHARTYPE(*s, ct_space))
4461  s++;
4462 
4463  bool negative = (*s == '-');
4464 
4465  s += (*s == '+' || *s == '-');
4466 
4467  bool overflow = false;
4468 
4469  if (s[0] == '0' && (s[1] | ' ') == 'x')
4470  {
4471  s += 2;
4472 
4473  // since overflow detection relies on length of the sequence skip leading zeros
4474  while (*s == '0')
4475  s++;
4476 
4477  const char_t* start = s;
4478 
4479  for (;;)
4480  {
4481  if (static_cast<unsigned>(*s - '0') < 10)
4482  result = result * 16 + (*s - '0');
4483  else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
4484  result = result * 16 + ((*s | ' ') - 'a' + 10);
4485  else
4486  break;
4487 
4488  s++;
4489  }
4490 
4491  size_t digits = static_cast<size_t>(s - start);
4492 
4493  overflow = digits > sizeof(U) * 2;
4494  }
4495  else
4496  {
4497  // since overflow detection relies on length of the sequence skip leading zeros
4498  while (*s == '0')
4499  s++;
4500 
4501  const char_t* start = s;
4502 
4503  for (;;)
4504  {
4505  if (static_cast<unsigned>(*s - '0') < 10)
4506  result = result * 10 + (*s - '0');
4507  else
4508  break;
4509 
4510  s++;
4511  }
4512 
4513  size_t digits = static_cast<size_t>(s - start);
4514 
4515  PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
4516 
4517  const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
4518  const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
4519  const size_t high_bit = sizeof(U) * 8 - 1;
4520 
4521  overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
4522  }
4523 
4524  if (negative)
4525  return (overflow || result > minneg) ? 0 - minneg : 0 - result;
4526  else
4527  return (overflow || result > maxpos) ? maxpos : result;
4528  }
4529 
4530  PUGI__FN int get_value_int(const char_t* value)
4531  {
4532  return string_to_integer<unsigned int>(value, 0 - static_cast<unsigned int>(INT_MIN), INT_MAX);
4533  }
4534 
4535  PUGI__FN unsigned int get_value_uint(const char_t* value)
4536  {
4537  return string_to_integer<unsigned int>(value, 0, UINT_MAX);
4538  }
4539 
4540  PUGI__FN double get_value_double(const char_t* value)
4541  {
4542  #ifdef PUGIXML_WCHAR_MODE
4543  return wcstod(value, 0);
4544  #else
4545  return strtod(value, 0);
4546  #endif
4547  }
4548 
4549  PUGI__FN float get_value_float(const char_t* value)
4550  {
4551  #ifdef PUGIXML_WCHAR_MODE
4552  return static_cast<float>(wcstod(value, 0));
4553  #else
4554  return static_cast<float>(strtod(value, 0));
4555  #endif
4556  }
4557 
4558  PUGI__FN bool get_value_bool(const char_t* value)
4559  {
4560  // only look at first char
4561  char_t first = *value;
4562 
4563  // 1*, t* (true), T* (True), y* (yes), Y* (YES)
4564  return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
4565  }
4566 
4567 #ifdef PUGIXML_HAS_LONG_LONG
4568  PUGI__FN long long get_value_llong(const char_t* value)
4569  {
4570  return string_to_integer<unsigned long long>(value, 0 - static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
4571  }
4572 
4573  PUGI__FN unsigned long long get_value_ullong(const char_t* value)
4574  {
4575  return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
4576  }
4577 #endif
4578 
4579  template <typename U> PUGI__FN char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative)
4580  {
4581  char_t* result = end - 1;
4582  U rest = negative ? 0 - value : value;
4583 
4584  do
4585  {
4586  *result-- = static_cast<char_t>('0' + (rest % 10));
4587  rest /= 10;
4588  }
4589  while (rest);
4590 
4591  assert(result >= begin);
4592  (void)begin;
4593 
4594  *result = '-';
4595 
4596  return result + !negative;
4597  }
4598 
4599  // set value with conversion functions
4600  template <typename String, typename Header>
4601  PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
4602  {
4603  #ifdef PUGIXML_WCHAR_MODE
4604  char_t wbuf[128];
4605  assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
4606 
4607  size_t offset = 0;
4608  for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
4609 
4610  return strcpy_insitu(dest, header, header_mask, wbuf, offset);
4611  #else
4612  return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
4613  #endif
4614  }
4615 
4616  template <typename U, typename String, typename Header>
4617  PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative)
4618  {
4619  char_t buf[64];
4620  char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
4621  char_t* begin = integer_to_string(buf, end, value, negative);
4622 
4623  return strcpy_insitu(dest, header, header_mask, begin, end - begin);
4624  }
4625 
4626  template <typename String, typename Header>
4627  PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value)
4628  {
4629  char buf[128];
4630  sprintf(buf, "%.9g", value);
4631 
4632  return set_value_ascii(dest, header, header_mask, buf);
4633  }
4634 
4635  template <typename String, typename Header>
4636  PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value)
4637  {
4638  char buf[128];
4639  sprintf(buf, "%.17g", value);
4640 
4641  return set_value_ascii(dest, header, header_mask, buf);
4642  }
4643 
4644  template <typename String, typename Header>
4645  PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value)
4646  {
4647  return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
4648  }
4649 
4650  PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
4651  {
4652  // check input buffer
4653  if (!contents && size) return make_parse_result(status_io_error);
4654 
4655  // get actual encoding
4656  xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
4657 
4658  // get private buffer
4659  char_t* buffer = 0;
4660  size_t length = 0;
4661 
4662  if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
4663 
4664  // delete original buffer if we performed a conversion
4665  if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
4666 
4667  // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
4668  if (own || buffer != contents) *out_buffer = buffer;
4669 
4670  // store buffer for offset_debug
4671  doc->buffer = buffer;
4672 
4673  // parse
4674  xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
4675 
4676  // remember encoding
4677  res.encoding = buffer_encoding;
4678 
4679  return res;
4680  }
4681 
4682  // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
4683  PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
4684  {
4685  #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
4686  // there are 64-bit versions of fseek/ftell, let's use them
4687  typedef __int64 length_type;
4688 
4689  _fseeki64(file, 0, SEEK_END);
4690  length_type length = _ftelli64(file);
4691  _fseeki64(file, 0, SEEK_SET);
4692  #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
4693  // there are 64-bit versions of fseek/ftell, let's use them
4694  typedef off64_t length_type;
4695 
4696  fseeko64(file, 0, SEEK_END);
4697  length_type length = ftello64(file);
4698  fseeko64(file, 0, SEEK_SET);
4699  #else
4700  // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
4701  typedef long length_type;
4702 
4703  fseek(file, 0, SEEK_END);
4704  length_type length = ftell(file);
4705  fseek(file, 0, SEEK_SET);
4706  #endif
4707 
4708  // check for I/O errors
4709  if (length < 0) return status_io_error;
4710 
4711  // check for overflow
4712  size_t result = static_cast<size_t>(length);
4713 
4714  if (static_cast<length_type>(result) != length) return status_out_of_memory;
4715 
4716  // finalize
4717  out_result = result;
4718 
4719  return status_ok;
4720  }
4721 
4722  // This function assumes that buffer has extra sizeof(char_t) writable bytes after size
4723  PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
4724  {
4725  // We only need to zero-terminate if encoding conversion does not do it for us
4726  #ifdef PUGIXML_WCHAR_MODE
4727  xml_encoding wchar_encoding = get_wchar_encoding();
4728 
4729  if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
4730  {
4731  size_t length = size / sizeof(char_t);
4732 
4733  static_cast<char_t*>(buffer)[length] = 0;
4734  return (length + 1) * sizeof(char_t);
4735  }
4736  #else
4737  if (encoding == encoding_utf8)
4738  {
4739  static_cast<char*>(buffer)[size] = 0;
4740  return size + 1;
4741  }
4742  #endif
4743 
4744  return size;
4745  }
4746 
4747  PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4748  {
4749  if (!file) return make_parse_result(status_file_not_found);
4750 
4751  // get file size (can result in I/O errors)
4752  size_t size = 0;
4753  xml_parse_status size_status = get_file_size(file, size);
4754  if (size_status != status_ok) return make_parse_result(size_status);
4755 
4756  size_t max_suffix_size = sizeof(char_t);
4757 
4758  // allocate buffer for the whole file
4759  char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
4760  if (!contents) return make_parse_result(status_out_of_memory);
4761 
4762  // read file in memory
4763  size_t read_size = fread(contents, 1, size, file);
4764 
4765  if (read_size != size)
4766  {
4767  xml_memory::deallocate(contents);
4769  }
4770 
4771  xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
4772 
4773  return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
4774  }
4775 
4776  PUGI__FN void close_file(FILE* file)
4777  {
4778  fclose(file);
4779  }
4780 
4781 #ifndef PUGIXML_NO_STL
4782  template <typename T> struct xml_stream_chunk
4783  {
4785  {
4786  void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
4787  if (!memory) return 0;
4788 
4789  return new (memory) xml_stream_chunk();
4790  }
4791 
4792  static void destroy(xml_stream_chunk* chunk)
4793  {
4794  // free chunk chain
4795  while (chunk)
4796  {
4797  xml_stream_chunk* next_ = chunk->next;
4798 
4799  xml_memory::deallocate(chunk);
4800 
4801  chunk = next_;
4802  }
4803  }
4804 
4805  xml_stream_chunk(): next(0), size(0)
4806  {
4807  }
4808 
4810  size_t size;
4811 
4812  T data[xml_memory_page_size / sizeof(T)];
4813  };
4814 
4815  template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4816  {
4818 
4819  // read file to a chunk list
4820  size_t total = 0;
4821  xml_stream_chunk<T>* last = 0;
4822 
4823  while (!stream.eof())
4824  {
4825  // allocate new chunk
4827  if (!chunk) return status_out_of_memory;
4828 
4829  // append chunk to list
4830  if (last) last = last->next = chunk;
4831  else chunks.data = last = chunk;
4832 
4833  // read data to chunk
4834  stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
4835  chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
4836 
4837  // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
4838  if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4839 
4840  // guard against huge files (chunk size is small enough to make this overflow check work)
4841  if (total + chunk->size < total) return status_out_of_memory;
4842  total += chunk->size;
4843  }
4844 
4845  size_t max_suffix_size = sizeof(char_t);
4846 
4847  // copy chunk list to a contiguous buffer
4848  char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
4849  if (!buffer) return status_out_of_memory;
4850 
4851  char* write = buffer;
4852 
4853  for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
4854  {
4855  assert(write + chunk->size <= buffer + total);
4856  memcpy(write, chunk->data, chunk->size);
4857  write += chunk->size;
4858  }
4859 
4860  assert(write == buffer + total);
4861 
4862  // return buffer
4863  *out_buffer = buffer;
4864  *out_size = total;
4865 
4866  return status_ok;
4867  }
4868 
4869  template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4870  {
4871  // get length of remaining data in stream
4872  typename std::basic_istream<T>::pos_type pos = stream.tellg();
4873  stream.seekg(0, std::ios::end);
4874  std::streamoff length = stream.tellg() - pos;
4875  stream.seekg(pos);
4876 
4877  if (stream.fail() || pos < 0) return status_io_error;
4878 
4879  // guard against huge files
4880  size_t read_length = static_cast<size_t>(length);
4881 
4882  if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
4883 
4884  size_t max_suffix_size = sizeof(char_t);
4885 
4886  // read stream data into memory (guard against stream exceptions with buffer holder)
4887  auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
4888  if (!buffer.data) return status_out_of_memory;
4889 
4890  stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
4891 
4892  // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
4893  if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4894 
4895  // return buffer
4896  size_t actual_length = static_cast<size_t>(stream.gcount());
4897  assert(actual_length <= read_length);
4898 
4899  *out_buffer = buffer.release();
4900  *out_size = actual_length * sizeof(T);
4901 
4902  return status_ok;
4903  }
4904 
4905  template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4906  {
4907  void* buffer = 0;
4908  size_t size = 0;
4909  xml_parse_status status = status_ok;
4910 
4911  // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
4912  if (stream.fail()) return make_parse_result(status_io_error);
4913 
4914  // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
4915  if (stream.tellg() < 0)
4916  {
4917  stream.clear(); // clear error flags that could be set by a failing tellg
4918  status = load_stream_data_noseek(stream, &buffer, &size);
4919  }
4920  else
4921  status = load_stream_data_seek(stream, &buffer, &size);
4922 
4923  if (status != status_ok) return make_parse_result(status);
4924 
4925  xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
4926 
4927  return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
4928  }
4929 #endif
4930 
4931 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
4932  PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4933  {
4934  return _wfopen(path, mode);
4935  }
4936 #else
4937  PUGI__FN char* convert_path_heap(const wchar_t* str)
4938  {
4939  assert(str);
4940 
4941  // first pass: get length in utf8 characters
4942  size_t length = strlength_wide(str);
4943  size_t size = as_utf8_begin(str, length);
4944 
4945  // allocate resulting string
4946  char* result = static_cast<char*>(xml_memory::allocate(size + 1));
4947  if (!result) return 0;
4948 
4949  // second pass: convert to utf8
4950  as_utf8_end(result, size, str, length);
4951 
4952  // zero-terminate
4953  result[size] = 0;
4954 
4955  return result;
4956  }
4957 
4958  PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4959  {
4960  // there is no standard function to open wide paths, so our best bet is to try utf8 path
4961  char* path_utf8 = convert_path_heap(path);
4962  if (!path_utf8) return 0;
4963 
4964  // convert mode to ASCII (we mirror _wfopen interface)
4965  char mode_ascii[4] = {0};
4966  for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
4967 
4968  // try to open the utf8 path
4969  FILE* result = fopen(path_utf8, mode_ascii);
4970 
4971  // free dummy buffer
4972  xml_memory::deallocate(path_utf8);
4973 
4974  return result;
4975  }
4976 #endif
4977 
4978  PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
4979  {
4980  if (!file) return false;
4981 
4982  xml_writer_file writer(file);
4983  doc.save(writer, indent, flags, encoding);
4984 
4985  return ferror(file) == 0;
4986  }
4987 
4989  {
4990  xml_node_struct* node;
4991  char_t* name;
4992 
4993  name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name)
4994  {
4995  node->name = 0;
4996  }
4997 
4999  {
5000  node->name = name;
5001  }
5002  };
5004 
5005 OIIO_NAMESPACE_BEGIN namespace pugi
5006 {
5007  PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
5008  {
5009  }
5010 
5011  PUGI__FN void xml_writer_file::write(const void* data, size_t size)
5012  {
5013  size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
5014  (void)!result; // unfortunately we can't do proper error handling here
5015  }
5016 
5017 #ifndef PUGIXML_NO_STL
5018  PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
5019  {
5020  }
5021 
5022  PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
5023  {
5024  }
5025 
5026  PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
5027  {
5028  if (narrow_stream)
5029  {
5030  assert(!wide_stream);
5031  narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
5032  }
5033  else
5034  {
5035  assert(wide_stream);
5036  assert(size % sizeof(wchar_t) == 0);
5037 
5038  wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
5039  }
5040  }
5041 #endif
5042 
5044  {
5045  }
5046 
5048  {
5049  }
5050 
5052  {
5053  return _depth;
5054  }
5055 
5057  {
5058  return true;
5059  }
5060 
5062  {
5063  return true;
5064  }
5065 
5067  {
5068  }
5069 
5071  {
5072  }
5073 
5074  PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
5075  {
5076  }
5077 
5078  PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
5079  {
5080  return _attr ? unspecified_bool_xml_attribute : 0;
5081  }
5082 
5084  {
5085  return !_attr;
5086  }
5087 
5089  {
5090  return (_attr == r._attr);
5091  }
5092 
5094  {
5095  return (_attr != r._attr);
5096  }
5097 
5099  {
5100  return (_attr < r._attr);
5101  }
5102 
5104  {
5105  return (_attr > r._attr);
5106  }
5107 
5109  {
5110  return (_attr <= r._attr);
5111  }
5112 
5114  {
5115  return (_attr >= r._attr);
5116  }
5117 
5119  {
5120  return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
5121  }
5122 
5124  {
5125  return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
5126  }
5127 
5128  PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
5129  {
5130  return (_attr && _attr->value) ? _attr->value + 0 : def;
5131  }
5132 
5133  PUGI__FN int xml_attribute::as_int(int def) const
5134  {
5135  return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def;
5136  }
5137 
5138  PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
5139  {
5140  return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def;
5141  }
5142 
5143  PUGI__FN double xml_attribute::as_double(double def) const
5144  {
5145  return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def;
5146  }
5147 
5148  PUGI__FN float xml_attribute::as_float(float def) const
5149  {
5150  return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def;
5151  }
5152 
5153  PUGI__FN bool xml_attribute::as_bool(bool def) const
5154  {
5155  return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def;
5156  }
5157 
5158 #ifdef PUGIXML_HAS_LONG_LONG
5159  PUGI__FN long long xml_attribute::as_llong(long long def) const
5160  {
5161  return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def;
5162  }
5163 
5164  PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
5165  {
5166  return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def;
5167  }
5168 #endif
5169 
5171  {
5172  return !_attr;
5173  }
5174 
5175  PUGI__FN const char_t* xml_attribute::name() const
5176  {
5177  return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT("");
5178  }
5179 
5180  PUGI__FN const char_t* xml_attribute::value() const
5181  {
5182  return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT("");
5183  }
5184 
5186  {
5187  return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
5188  }
5189 
5191  {
5192  return _attr;
5193  }
5194 
5196  {
5197  set_value(rhs);
5198  return *this;
5199  }
5200 
5202  {
5203  set_value(rhs);
5204  return *this;
5205  }
5206 
5208  {
5209  set_value(rhs);
5210  return *this;
5211  }
5212 
5214  {
5215  set_value(rhs);
5216  return *this;
5217  }
5218 
5220  {
5221  set_value(rhs);
5222  return *this;
5223  }
5224 
5226  {
5227  set_value(rhs);
5228  return *this;
5229  }
5230 
5232  {
5233  set_value(rhs);
5234  return *this;
5235  }
5236 
5238  {
5239  set_value(rhs);
5240  return *this;
5241  }
5242 
5243 #ifdef PUGIXML_HAS_LONG_LONG
5245  {
5246  set_value(rhs);
5247  return *this;
5248  }
5249 
5250  PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
5251  {
5252  set_value(rhs);
5253  return *this;
5254  }
5255 #endif
5256 
5257  PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
5258  {
5259  if (!_attr) return false;
5260 
5261  return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5262  }
5263 
5264  PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
5265  {
5266  if (!_attr) return false;
5267 
5268  return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5269  }
5270 
5272  {
5273  if (!_attr) return false;
5274 
5275  return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5276  }
5277 
5278  PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
5279  {
5280  if (!_attr) return false;
5281 
5282  return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5283  }
5284 
5286  {
5287  if (!_attr) return false;
5288 
5289  return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5290  }
5291 
5292  PUGI__FN bool xml_attribute::set_value(unsigned long rhs)
5293  {
5294  if (!_attr) return false;
5295 
5296  return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5297  }
5298 
5300  {
5301  if (!_attr) return false;
5302 
5303  return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5304  }
5305 
5307  {
5308  if (!_attr) return false;
5309 
5310  return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5311  }
5312 
5314  {
5315  if (!_attr) return false;
5316 
5317  return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5318  }
5319 
5320 #ifdef PUGIXML_HAS_LONG_LONG
5321  PUGI__FN bool xml_attribute::set_value(long long rhs)
5322  {
5323  if (!_attr) return false;
5324 
5325  return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5326  }
5327 
5328  PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
5329  {
5330  if (!_attr) return false;
5331 
5332  return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5333  }
5334 #endif
5335 
5336 #ifdef __BORLANDC__
5337  PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
5338  {
5339  return (bool)lhs && rhs;
5340  }
5341 
5342  PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
5343  {
5344  return (bool)lhs || rhs;
5345  }
5346 #endif
5347 
5349  {
5350  }
5351 
5353  {
5354  }
5355 
5356  PUGI__FN static void