MezzanineEngine 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
xml.cpp
1 // © Copyright 2010 - 2014 BlackTopp Studios Inc.
2 /* This file is part of The Mezzanine Engine.
3 
4  The Mezzanine Engine is free software: you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation, either version 3 of the License, or
7  (at your option) any later version.
8 
9  The Mezzanine Engine is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU General Public License for more details.
13 
14  You should have received a copy of the GNU General Public License
15  along with The Mezzanine Engine. If not, see <http://www.gnu.org/licenses/>.
16 */
17 /* The original authors have included a copy of the license specified above in the
18  'Docs' folder. See 'gpl.txt'
19 */
20 /* We welcome the use of the Mezzanine engine to anyone, including companies who wish to
21  Build professional software and charge for their product.
22 
23  However there are some practical restrictions, so if your project involves
24  any of the following you should contact us and we will try to work something
25  out:
26  - DRM or Copy Protection of any kind(except Copyrights)
27  - Software Patents You Do Not Wish to Freely License
28  - Any Kind of Linking to Non-GPL licensed Works
29  - Are Currently In Violation of Another Copyright Holder's GPL License
30  - If You want to change our code and not add a few hundred MB of stuff to
31  your distribution
32 
33  These and other limitations could cause serious legal problems if you ignore
34  them, so it is best to simply contact us or the Free Software Foundation, if
35  you have any questions.
36 
37  Joseph Toppi - toppij@gmail.com
38  John Blackwood - makoenergy02@gmail.com
39 */
40 
41 /// @cond DontDocumentInternal
42 
43 /*
44  * pugixml parser - version 1.2
45  * --------------------------------------------------------
46  * Copyright © 2006-2012, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
47  * Report bugs and download new versions at http://pugixml.org/
48  *
49  * This library is distributed under the MIT License. See notice at the end
50  * of this file.
51  *
52  * This work is based on the pugxml parser, which is:
53  * Copyright © 2003, by Kristen Wegner (kristen@tima.net)
54  */
55 
56 #ifndef SOURCE_XML_CPP
57 #define SOURCE_XML_CPP
58 
59 #ifndef SWIG
60  #include "XML/xml.h"
61 #endif
62 #include "exception.h"
63 
64 
65 #include <stdlib.h>
66 #include <stdio.h>
67 #include <string.h>
68 #include <assert.h>
69 #include <wchar.h>
70 
71 
72 #include <math.h>
73 #include <float.h>
74 
75 
76 #include <istream>
77 #include <ostream>
78 #include <string>
79 
80 
81 // For placement new
82 #include <new>
83 
84 #ifdef _MSC_VER
85 # pragma warning(push)
86 # pragma warning(disable: 4127) // conditional expression is constant
87 # pragma warning(disable: 4324) // structure was padded due to __declspec(align())
88 # pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
89 # pragma warning(disable: 4702) // unreachable code
90 # pragma warning(disable: 4996) // this function or variable may be unsafe
91 # pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged
92 #endif
93 
94 #ifdef __INTEL_COMPILER
95 # pragma warning(disable: 177) // function was declared but never referenced
96 # pragma warning(disable: 279) // controlling expression is constant
97 # pragma warning(disable: 1478 1786) // function was declared "deprecated"
98 # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
99 #endif
100 
101 #ifdef __SNC__
102 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
103 # pragma diag_suppress=178 // function was declared but never referenced
104 # pragma diag_suppress=237 // controlling expression is constant
105 #endif
106 
107 // Inlining controls
108 #if defined(_MSC_VER) && _MSC_VER >= 1300
109 # define PUGI__NO_INLINE __declspec(noinline)
110 #elif defined(__GNUC__)
111 # define PUGI__NO_INLINE __attribute__((noinline))
112 #else
113 # define PUGI__NO_INLINE
114 #endif
115 
116 // Simple static assertion
117 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
118 
119 // Digital Mars C++ bug workaround for passing char Loaded from memory via stack
120 #ifdef __DMC__
121 # define PUGI__DMC_VOLATILE volatile
122 #else
123 # define PUGI__DMC_VOLATILE
124 #endif
125 
126 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
127 #if defined(_MSC_VER) && !defined(__S3E__)
128 # define PUGI__MSVC_CRT_VERSION _MSC_VER
129 #endif
130 
131 #ifdef XML_HEADER_ONLY
132 # define PUGI__NS_BEGIN namespace XML { namespace internal {
133 # define PUGI__NS_END } }
134 # define PUGI__FN inline
135 # define PUGI__FN_NO_INLINE inline
136 #else
137 # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
138 # define PUGI__NS_BEGIN namespace XML { namespace internal {
139 # define PUGI__NS_END } }
140 # else
141 # define PUGI__NS_BEGIN namespace XML { namespace internal { namespace {
142 # define PUGI__NS_END } } }
143 # endif
144 # define PUGI__FN
145 # define PUGI__FN_NO_INLINE PUGI__NO_INLINE
146 #endif
147 
148 // uintptr_t
149 #if !defined(_MSC_VER) || _MSC_VER >= 1600
150 # include <stdint.h>
151 #else
152 # ifndef _UINTPTR_T_DEFINED
153 // No native uintptr_t in MSVC6 and in some WinCE versions
154 typedef size_t uintptr_t;
155 #define _UINTPTR_T_DEFINED
156 # endif
157 PUGI__NS_BEGIN
158  typedef unsigned __int8 uint8_t;
159  typedef unsigned __int16 uint16_t;
160  typedef unsigned __int32 uint32_t;
161 PUGI__NS_END
162 #endif
163 
164 namespace Mezzanine {
165 // Memory allocation
166 
167 PUGI__NS_BEGIN
168  PUGI__FN void* default_allocate(size_t size)
169  {
170  return malloc(size);
171  }
172 
173  PUGI__FN void default_deallocate(void* ptr)
174  {
175  free(ptr);
176  }
177 
178  template <typename T>
179  struct MemoryManagement_function_storage
180  {
181  static AllocationFunction allocate;
182  static DeAllocationFunction deallocate;
183  };
184 
185  template <typename T> AllocationFunction MemoryManagement_function_storage<T>::allocate = default_allocate;
186  template <typename T> DeAllocationFunction MemoryManagement_function_storage<T>::deallocate = default_deallocate;
187 
188  typedef MemoryManagement_function_storage<int> Memory;
189 PUGI__NS_END
190 
191 // String utilities
192 PUGI__NS_BEGIN
193  // Get string length
194  PUGI__FN size_t strlength(const Char8* s)
195  {
196  assert(s);
197 
198  return strlen(s);
199  }
200 
201  // Compare two strings
202  PUGI__FN bool strequal(const Char8* src, const Char8* dst)
203  {
204  assert(src && dst);
205 
206  return strcmp(src, dst) == 0;
207 
208  }
209 
210  // Compare lhs with [rhs_begin, rhs_end)
211  PUGI__FN bool strequalrange(const Char8* lhs, const Char8* rhs, size_t count)
212  {
213  for (size_t i = 0; i < count; ++i)
214  if (lhs[i] != rhs[i])
215  return false;
216 
217  return lhs[count] == 0;
218  }
219 
220 PUGI__NS_END
221 
222 // auto_ptr-like buffer holder for exception recovery
223 PUGI__NS_BEGIN
224  struct buffer_holder
225  {
226  void* data;
227  void (*deleter)(void*);
228 
229  buffer_holder(void* data_, void (*deleter_)(void*)): data(data_), deleter(deleter_)
230  {
231  }
232 
233  ~buffer_holder()
234  {
235  if (data) deleter(data);
236  }
237 
238  void* release()
239  {
240  void* Result = data;
241  data = 0;
242  return Result;
243  }
244  };
245 PUGI__NS_END
246 
247 
248 PUGI__NS_BEGIN
249  static const size_t MemoryPage_size =
250  #ifdef XML_MEMORY_PAGE_SIZE
251  XML_MEMORY_PAGE_SIZE
252  #else
253  32768
254  #endif
255  ;
256 
257  static const uintptr_t MemoryPage_alignment = 32;
258  static const uintptr_t MemoryPage_pointer_mask = ~(MemoryPage_alignment - 1);
259  static const uintptr_t MemoryPage_Name_allocated_mask = 16;
260  static const uintptr_t MemoryPage_Value_allocated_mask = 8;
261  static const uintptr_t MemoryPage_type_mask = 7;
262 
263  struct Allocator;
264 
265  struct MemoryPage
266  {
267  static MemoryPage* construct(void* memory)
268  {
269  if (!memory) return 0; //$ redundant, left for performance
270 
271  MemoryPage* Result = static_cast<MemoryPage*>(memory);
272 
273  Result->allocator = 0;
274  Result->memory = 0;
275  Result->prev = 0;
276  Result->next = 0;
277  Result->busy_size = 0;
278  Result->freed_size = 0;
279 
280  return Result;
281  }
282 
283  Allocator* allocator;
284 
285  void* memory;
286 
287  MemoryPage* prev;
288  MemoryPage* next;
289 
290  size_t busy_size;
291  size_t freed_size;
292 
293  char data[1];
294  };
295 
296  struct MemoryString_header
297  {
298  uint16_t page_Offset; // Offset from page->data
299  uint16_t full_size; // 0 if string occupies whole page
300  };
301 
302  struct Allocator {
303  Allocator(MemoryPage* GetRoot): _GetRoot(GetRoot), _busy_size(GetRoot->busy_size)
304  {
305  }
306 
307  MemoryPage* allocate_page(size_t data_size)
308  {
309  size_t size = offsetof(MemoryPage, data) + data_size;
310 
311  // allocate block with some alignment, leaving memory for worst-case padding
312  void* memory = Memory::allocate(size + MemoryPage_alignment);
313  if (!memory) return 0;
314 
315  // align upwards to page boundary
316  void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(memory) + (MemoryPage_alignment - 1)) & ~(MemoryPage_alignment - 1));
317 
318  // prepare page structure
319  MemoryPage* page = MemoryPage::construct(page_memory);
320 
321  page->memory = memory;
322  page->allocator = _GetRoot->allocator;
323 
324  return page;
325  }
326 
327  static void deallocate_page(MemoryPage* page)
328  {
329  Memory::deallocate(page->memory);
330  }
331 
332  void* allocate_memory_oob(size_t size, MemoryPage*& out_page);
333 
334  void* allocate_memory(size_t size, MemoryPage*& out_page)
335  {
336  if (_busy_size + size > MemoryPage_size) return allocate_memory_oob(size, out_page);
337 
338  void* buf = _GetRoot->data + _busy_size;
339 
340  _busy_size += size;
341 
342  out_page = _GetRoot;
343 
344  return buf;
345  }
346 
347  void deallocate_memory(void* ptr, size_t size, MemoryPage* page)
348  {
349  if (page == _GetRoot) page->busy_size = _busy_size;
350 
351  assert(ptr >= page->data && ptr < page->data + page->busy_size);
352  (void)!ptr;
353 
354  page->freed_size += size;
355  assert(page->freed_size <= page->busy_size);
356 
357  if (page->freed_size == page->busy_size)
358  {
359  if (page->next == 0)
360  {
361  assert(_GetRoot == page);
362 
363  // top page freed, just Reset sizes
364  page->busy_size = page->freed_size = 0;
365  _busy_size = 0;
366  }
367  else
368  {
369  assert(_GetRoot != page);
370  assert(page->prev);
371 
372  // remove from the list
373  page->prev->next = page->next;
374  page->next->prev = page->prev;
375 
376  // deallocate
377  deallocate_page(page);
378  }
379  }
380  }
381 
382  Char8* allocate_string(size_t length)
383  {
384  // allocate memory for string and header block
385  size_t size = sizeof(MemoryString_header) + length * sizeof(Char8);
386 
387  // round size up to pointer alignment boundary
388  size_t full_size = (size + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1);
389 
390  MemoryPage* page;
391  MemoryString_header* header = static_cast<MemoryString_header*>(allocate_memory(full_size, page));
392 
393  if (!header) return 0;
394 
395  // setup header
396  ptrdiff_t page_Offset = reinterpret_cast<char*>(header) - page->data;
397 
398  assert(page_Offset >= 0 && page_Offset < (1 << 16));
399  header->page_Offset = static_cast<uint16_t>(page_Offset);
400 
401  // full_size == 0 for large strings that occupy the whole page
402  assert(full_size < (1 << 16) || (page->busy_size == full_size && page_Offset == 0));
403  header->full_size = static_cast<uint16_t>(full_size < (1 << 16) ? full_size : 0);
404 
405  // round-trip through void* to avoid 'cast increases required alignment of target Type' warning
406  // header is guaranteed a pointer-sized alignment, which should be enough for char_t
407  return static_cast<Char8*>(static_cast<void*>(header + 1));
408  }
409 
410  void deallocate_string(Char8* string)
411  {
412  // this function casts pointers through void* to avoid 'cast increases required alignment of target Type' warnings
413  // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
414 
415  // get header
416  MemoryString_header* header = static_cast<MemoryString_header*>(static_cast<void*>(string)) - 1;
417 
418  // deallocate
419  size_t page_Offset = offsetof(MemoryPage, data) + header->page_Offset;
420  MemoryPage* page = reinterpret_cast<MemoryPage*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_Offset));
421 
422  // if full_size == 0 then this string occupies the whole page
423  size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size;
424 
425  deallocate_memory(header, full_size, page);
426  }
427 
428  MemoryPage* _GetRoot;
429  size_t _busy_size;
430  };
431 
432  PUGI__FN_NO_INLINE void* Allocator::allocate_memory_oob(size_t size, MemoryPage*& out_page)
433  {
434  const size_t large_allocation_threshold = MemoryPage_size / 4;
435 
436  MemoryPage* page = allocate_page(size <= large_allocation_threshold ? MemoryPage_size : size);
437  out_page = page;
438 
439  if (!page) return 0;
440 
441  if (size <= large_allocation_threshold)
442  {
443  _GetRoot->busy_size = _busy_size;
444 
445  // insert page at the end of linked list
446  page->prev = _GetRoot;
447  _GetRoot->next = page;
448  _GetRoot = page;
449 
450  _busy_size = size;
451  }
452  else
453  {
454  // insert page before the end of linked list, so that it is deleted as soon as possible
455  // the last page is not deleted even if it's empty (see deallocate_memory)
456  assert(_GetRoot->prev);
457 
458  page->prev = _GetRoot->prev;
459  page->next = _GetRoot;
460 
461  _GetRoot->prev->next = page;
462  _GetRoot->prev = page;
463  }
464 
465  // allocate inside page
466  page->busy_size = size;
467 
468  return page->data;
469  }
470 PUGI__NS_END
471 
472 namespace XML
473 {
474  //// A 'Name=Value' XML GetAttribute structure.
475  struct AttributeStruct
476  {
477  //// Default ctor
478  AttributeStruct(internal::MemoryPage* page): header(reinterpret_cast<uintptr_t>(page)), Name(0), Value(0), prev_attribute_c(0), GetNextAttribute(0)
479  {
480  }
481 
482  uintptr_t header;
483 
484  Char8* Name; ////< Pointer to GetAttribute Name.
485  Char8* Value; ////< Pointer to GetAttribute Value.
486 
487  AttributeStruct* prev_attribute_c; ////< Previous GetAttribute (cyclic list)
488  AttributeStruct* GetNextAttribute; ////< Next attribute
489  };
490 
491  //// An XML document tree node.
492  struct NodeStruct
493  {
494  //// Default ctor
495  //// \param Type - node type
496  NodeStruct(internal::MemoryPage* page, NodeType Type): header(reinterpret_cast<uintptr_t>(page) | (Type - 1)), GetParent(0), Name(0), Value(0), GetFirstChild(0), prev_sibling_c(0), GetNextSibling(0), GetFirstAttribute(0)
497  {
498  }
499 
500  uintptr_t header;
501 
502  NodeStruct* GetParent; ////< Pointer to GetParent
503 
504  Char8* Name; ////< Pointer to element Name.
505  Char8* Value; ////< Pointer to any associated string data.
506 
507  NodeStruct* GetFirstChild; ////< First GetChild
508 
509  NodeStruct* prev_sibling_c; ////< Left brother (cyclic list)
510  NodeStruct* GetNextSibling; ////< Right brother
511 
512  AttributeStruct* GetFirstAttribute; ////< First attribute
513  };
514 }
515 
516 PUGI__NS_BEGIN
517  struct DocumentStruct: public NodeStruct, public Allocator
518  {
519  DocumentStruct(MemoryPage* page): NodeStruct(page, NodeDocument), Allocator(page), buffer(0)
520  {
521  }
522 
523  const Char8* buffer;
524  };
525 
526  inline Allocator& GetAllocator(const NodeStruct* node)
527  {
528  assert(node);
529 
530  return *reinterpret_cast<MemoryPage*>(node->header & MemoryPage_pointer_mask)->allocator;
531  }
532 PUGI__NS_END
533 
534 // Low-level DOM operations
535 PUGI__NS_BEGIN
536  inline AttributeStruct* allocate_attribute(Allocator& alloc)
537  {
538  MemoryPage* page;
539  void* memory = alloc.allocate_memory(sizeof(AttributeStruct), page);
540 
541  return new (memory) AttributeStruct(page);
542  }
543 
544  inline NodeStruct* allocate_node(Allocator& alloc, NodeType Type)
545  {
546  MemoryPage* page;
547  void* memory = alloc.allocate_memory(sizeof(NodeStruct), page);
548 
549  return new (memory) NodeStruct(page, Type);
550  }
551 
552  inline void destroy_attribute(AttributeStruct* a, Allocator& alloc)
553  {
554  uintptr_t header = a->header;
555 
556  if (header & internal::MemoryPage_Name_allocated_mask) alloc.deallocate_string(a->Name);
557  if (header & internal::MemoryPage_Value_allocated_mask) alloc.deallocate_string(a->Value);
558 
559  alloc.deallocate_memory(a, sizeof(AttributeStruct), reinterpret_cast<MemoryPage*>(header & MemoryPage_pointer_mask));
560  }
561 
562  inline void destroy_node(NodeStruct* n, Allocator& alloc)
563  {
564  uintptr_t header = n->header;
565 
566  if (header & internal::MemoryPage_Name_allocated_mask) alloc.deallocate_string(n->Name);
567  if (header & internal::MemoryPage_Value_allocated_mask) alloc.deallocate_string(n->Value);
568 
569  for (AttributeStruct* attr = n->GetFirstAttribute; attr; )
570  {
571  AttributeStruct* next = attr->GetNextAttribute;
572 
573  destroy_attribute(attr, alloc);
574 
575  attr = next;
576  }
577 
578  for (NodeStruct* GetChild = n->GetFirstChild; GetChild; )
579  {
580  NodeStruct* next = GetChild->GetNextSibling;
581 
582  destroy_node(GetChild, alloc);
583 
584  GetChild = next;
585  }
586 
587  alloc.deallocate_memory(n, sizeof(NodeStruct), reinterpret_cast<MemoryPage*>(header & MemoryPage_pointer_mask));
588  }
589 
590  PUGI__FN_NO_INLINE NodeStruct* AppendNode(NodeStruct* node, Allocator& alloc, NodeType Type = NodeElement)
591  {
592  NodeStruct* GetChild = allocate_node(alloc, Type);
593  if (!GetChild) return 0;
594 
595  GetChild->GetParent = node;
596 
597  NodeStruct* GetFirstChild = node->GetFirstChild;
598 
599  if (GetFirstChild)
600  {
601  NodeStruct* GetLastChild = GetFirstChild->prev_sibling_c;
602 
603  GetLastChild->GetNextSibling = GetChild;
604  GetChild->prev_sibling_c = GetLastChild;
605  GetFirstChild->prev_sibling_c = GetChild;
606  }
607  else
608  {
609  node->GetFirstChild = GetChild;
610  GetChild->prev_sibling_c = GetChild;
611  }
612 
613  return GetChild;
614  }
615 
616  PUGI__FN_NO_INLINE AttributeStruct* AppendAttribute_ll(NodeStruct* node, Allocator& alloc)
617  {
618  AttributeStruct* a = allocate_attribute(alloc);
619  if (!a) return 0;
620 
621  AttributeStruct* GetFirstAttribute = node->GetFirstAttribute;
622 
623  if (GetFirstAttribute)
624  {
625  AttributeStruct* GetLastAttribute = GetFirstAttribute->prev_attribute_c;
626 
627  GetLastAttribute->GetNextAttribute = a;
628  a->prev_attribute_c = GetLastAttribute;
629  GetFirstAttribute->prev_attribute_c = a;
630  }
631  else
632  {
633  node->GetFirstAttribute = a;
634  a->prev_attribute_c = a;
635  }
636 
637  return a;
638  }
639 PUGI__NS_END
640 
641 // Helper classes for code generation
642 PUGI__NS_BEGIN
643  struct opt_false
644  {
645  enum { Value = 0 };
646  };
647 
648  struct opt_true
649  {
650  enum { Value = 1 };
651  };
652 PUGI__NS_END
653 
654 // Unicode utilities
655 PUGI__NS_BEGIN
656  inline uint16_t endian_swap(uint16_t Value)
657  {
658  return static_cast<uint16_t>(((Value & 0xff) << 8) | (Value >> 8));
659  }
660 
661  inline uint32_t endian_swap(uint32_t Value)
662  {
663  return ((Value & 0xff) << 24) | ((Value & 0xff00) << 8) | ((Value & 0xff0000) >> 8) | (Value >> 24);
664  }
665 
666  struct utf8_counter
667  {
668  typedef size_t value_type;
669 
670  static value_type low(value_type Result, uint32_t ch)
671  {
672  // U+0000..U+007F
673  if (ch < 0x80) return Result + 1;
674  // U+0080..U+07FF
675  else if (ch < 0x800) return Result + 2;
676  // U+0800..U+FFFF
677  else return Result + 3;
678  }
679 
680  static value_type high(value_type Result, uint32_t)
681  {
682  // U+10000..U+10FFFF
683  return Result + 4;
684  }
685  };
686 
687  struct utf8_WriterInstance
688  {
689  typedef uint8_t* value_type;
690 
691  static value_type low(value_type Result, uint32_t ch)
692  {
693  // U+0000..U+007F
694  if (ch < 0x80)
695  {
696  *Result = static_cast<uint8_t>(ch);
697  return Result + 1;
698  }
699  // U+0080..U+07FF
700  else if (ch < 0x800)
701  {
702  Result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
703  Result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
704  return Result + 2;
705  }
706  // U+0800..U+FFFF
707  else
708  {
709  Result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
710  Result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
711  Result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
712  return Result + 3;
713  }
714  }
715 
716  static value_type high(value_type Result, uint32_t ch)
717  {
718  // U+10000..U+10FFFF
719  Result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
720  Result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
721  Result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
722  Result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
723  return Result + 4;
724  }
725 
726  static value_type any(value_type Result, uint32_t ch)
727  {
728  return (ch < 0x10000) ? low(Result, ch) : high(Result, ch);
729  }
730  };
731 
732  struct utf16_counter
733  {
734  typedef size_t value_type;
735 
736  static value_type low(value_type Result, uint32_t)
737  {
738  return Result + 1;
739  }
740 
741  static value_type high(value_type Result, uint32_t)
742  {
743  return Result + 2;
744  }
745  };
746 
747  struct utf16_WriterInstance
748  {
749  typedef uint16_t* value_type;
750 
751  static value_type low(value_type Result, uint32_t ch)
752  {
753  *Result = static_cast<uint16_t>(ch);
754 
755  return Result + 1;
756  }
757 
758  static value_type high(value_type Result, uint32_t ch)
759  {
760  uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
761  uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
762 
763  Result[0] = static_cast<uint16_t>(0xD800 + msh);
764  Result[1] = static_cast<uint16_t>(0xDC00 + lsh);
765 
766  return Result + 2;
767  }
768 
769  static value_type any(value_type Result, uint32_t ch)
770  {
771  return (ch < 0x10000) ? low(Result, ch) : high(Result, ch);
772  }
773  };
774 
775  struct utf32_counter
776  {
777  typedef size_t value_type;
778 
779  static value_type low(value_type Result, uint32_t)
780  {
781  return Result + 1;
782  }
783 
784  static value_type high(value_type Result, uint32_t)
785  {
786  return Result + 1;
787  }
788  };
789 
790  struct utf32_WriterInstance
791  {
792  typedef uint32_t* value_type;
793 
794  static value_type low(value_type Result, uint32_t ch)
795  {
796  *Result = ch;
797 
798  return Result + 1;
799  }
800 
801  static value_type high(value_type Result, uint32_t ch)
802  {
803  *Result = ch;
804 
805  return Result + 1;
806  }
807 
808  static value_type any(value_type Result, uint32_t ch)
809  {
810  *Result = ch;
811 
812  return Result + 1;
813  }
814  };
815 
816  struct latin1_WriterInstance
817  {
818  typedef uint8_t* value_type;
819 
820  static value_type low(value_type Result, uint32_t ch)
821  {
822  *Result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
823 
824  return Result + 1;
825  }
826 
827  static value_type high(value_type Result, uint32_t ch)
828  {
829  (void)ch;
830 
831  *Result = '?';
832 
833  return Result + 1;
834  }
835  };
836 
837  template <size_t size> struct wchar_selector;
838 
839  template <> struct wchar_selector<2>
840  {
841  typedef uint16_t Type;
842  typedef utf16_counter counter;
843  typedef utf16_WriterInstance WriterInstance;
844  };
845 
846  template <> struct wchar_selector<4>
847  {
848  typedef uint32_t Type;
849  typedef utf32_counter counter;
850  typedef utf32_WriterInstance WriterInstance;
851  };
852 
853  typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
854  typedef wchar_selector<sizeof(wchar_t)>::WriterInstance wchar_WriterInstance;
855 
856  template <typename Traits, typename opt_swap = opt_false> struct utf_decoder
857  {
858  static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type Result)
859  {
860  const uint8_t utf8_byte_mask = 0x3f;
861 
862  while (size)
863  {
864  uint8_t lead = *data;
865 
866  // 0xxxxxxx -> U+0000..U+007F
867  if (lead < 0x80)
868  {
869  Result = Traits::low(Result, lead);
870  data += 1;
871  size -= 1;
872 
873  // process aligned single-byte (ascii) blocks
874  if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
875  {
876  // round-trip through void* to silence 'cast increases required alignment of target Type' warnings
877  while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
878  {
879  Result = Traits::low(Result, data[0]);
880  Result = Traits::low(Result, data[1]);
881  Result = Traits::low(Result, data[2]);
882  Result = Traits::low(Result, data[3]);
883  data += 4;
884  size -= 4;
885  }
886  }
887  }
888  // 110xxxxx -> U+0080..U+07FF
889  else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
890  {
891  Result = Traits::low(Result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
892  data += 2;
893  size -= 2;
894  }
895  // 1110xxxx -> U+0800-U+FFFF
896  else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
897  {
898  Result = Traits::low(Result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
899  data += 3;
900  size -= 3;
901  }
902  // 11110xxx -> U+10000..U+10FFFF
903  else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
904  {
905  Result = Traits::high(Result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
906  data += 4;
907  size -= 4;
908  }
909  // 10xxxxxx or 11111xxx -> invalid
910  else
911  {
912  data += 1;
913  size -= 1;
914  }
915  }
916 
917  return Result;
918  }
919 
920  static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type Result)
921  {
922  const uint16_t* end = data + size;
923 
924  while (data < end)
925  {
926  uint16_t lead = opt_swap::Value ? endian_swap(*data) : *data;
927 
928  // U+0000..U+D7FF
929  if (lead < 0xD800)
930  {
931  Result = Traits::low(Result, lead);
932  data += 1;
933  }
934  // U+E000..U+FFFF
935  else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
936  {
937  Result = Traits::low(Result, lead);
938  data += 1;
939  }
940  // surrogate pair lead
941  else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && data + 1 < end)
942  {
943  uint16_t next = opt_swap::Value ? endian_swap(data[1]) : data[1];
944 
945  if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
946  {
947  Result = Traits::high(Result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
948  data += 2;
949  }
950  else
951  {
952  data += 1;
953  }
954  }
955  else
956  {
957  data += 1;
958  }
959  }
960 
961  return Result;
962  }
963 
964  static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type Result)
965  {
966  const uint32_t* end = data + size;
967 
968  while (data < end)
969  {
970  uint32_t lead = opt_swap::Value ? endian_swap(*data) : *data;
971 
972  // U+0000..U+FFFF
973  if (lead < 0x10000)
974  {
975  Result = Traits::low(Result, lead);
976  data += 1;
977  }
978  // U+10000..U+10FFFF
979  else
980  {
981  Result = Traits::high(Result, lead);
982  data += 1;
983  }
984  }
985 
986  return Result;
987  }
988 
989  static inline typename Traits::value_type decode_latin1_block(const uint8_t* data, size_t size, typename Traits::value_type Result)
990  {
991  for (size_t i = 0; i < size; ++i)
992  {
993  Result = Traits::low(Result, data[i]);
994  }
995 
996  return Result;
997  }
998 
999  static inline typename Traits::value_type decode_wchar_block_impl(const uint16_t* data, size_t size, typename Traits::value_type Result)
1000  {
1001  return decode_utf16_block(data, size, Result);
1002  }
1003 
1004  static inline typename Traits::value_type decode_wchar_block_impl(const uint32_t* data, size_t size, typename Traits::value_type Result)
1005  {
1006  return decode_utf32_block(data, size, Result);
1007  }
1008 
1009  static inline typename Traits::value_type decode_wchar_block(const wchar_t* data, size_t size, typename Traits::value_type Result)
1010  {
1011  return decode_wchar_block_impl(reinterpret_cast<const wchar_selector<sizeof(wchar_t)>::Type*>(data), size, Result);
1012  }
1013  };
1014 
1015  template <typename T> PUGI__FN void convert_utf_endian_swap(T* Result, const T* data, size_t length)
1016  {
1017  for (size_t i = 0; i < length; ++i) Result[i] = endian_swap(data[i]);
1018  }
1019 
1020 PUGI__NS_END
1021 
1022 PUGI__NS_BEGIN
1023  enum charCollectionType
1024  {
1025  ct_ParsePcdata = 1, // \0, &, \r, <
1026  ct_ParseAttr = 2, // \0, &, \r, ', "
1027  ct_ParseAttrWs = 4, // \0, &, \r, ', ", \n, tab
1028  ct_space = 8, // \r, \n, space, tab
1029  ct_ParseCdata = 16, // \0, ], >, \r
1030  ct_ParseComment = 32, // \0, -, >, \r
1031  ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
1032  ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
1033  };
1034 
1035  static const unsigned char charCollectionTypeable[256] =
1036  {
1037  55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
1038  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
1039  8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
1040  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
1041  0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
1042  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
1043  0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
1044  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
1045 
1046  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
1047  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1048  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1049  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1050  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1051  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1052  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1053  192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
1054  };
1055 
1056  enum charTypex_t
1057  {
1058  ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
1059  ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
1060  ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
1061  ctx_digit = 8, // 0-9
1062  ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
1063  };
1064 
1065  static const unsigned char charTypex_table[256] =
1066  {
1067  3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
1068  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
1069  0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
1070  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
1071 
1072  0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
1073  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
1074  0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
1075  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
1076 
1077  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
1078  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1079  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1080  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1081  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1082  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1083  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1084  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
1085  };
1086 
1087  #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
1088 
1089  #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, charCollectionTypeable)
1090  #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, charTypex_table)
1091 
1092  PUGI__FN bool is_little_endian()
1093  {
1094  unsigned int ui = 1;
1095 
1096  return *reinterpret_cast<unsigned char*>(&ui) == 1;
1097  }
1098 
1099  PUGI__FN Encoding GetWchar_DocumentEncoding()
1100  {
1101  PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1102 
1103  if (sizeof(wchar_t) == 2)
1104  return is_little_endian() ? EncodingUTF16LE : EncodingUTF16BE;
1105  else
1106  return is_little_endian() ? EncodingUTF32LE : EncodingUTF32BE;
1107  }
1108 
1109  PUGI__FN Encoding guess_buffer_DocumentEncoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
1110  {
1111  // look for BOM in first few bytes
1112  if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return EncodingUTF32BE;
1113  if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return EncodingUTF32LE;
1114  if (d0 == 0xfe && d1 == 0xff) return EncodingUTF16BE;
1115  if (d0 == 0xff && d1 == 0xfe) return EncodingUTF16LE;
1116  if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return EncodingUTF8;
1117 
1118  // look for <, <? or <?xm in various DocumentEncodings
1119  if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return EncodingUTF32BE;
1120  if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return EncodingUTF32LE;
1121  if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return EncodingUTF16BE;
1122  if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return EncodingUTF16LE;
1123  if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return EncodingUTF8;
1124 
1125  // look for utf16 < followed by node Name (this may fail, but is better than utf8 since it's zero terminated so early)
1126  if (d0 == 0 && d1 == 0x3c) return EncodingUTF16BE;
1127  if (d0 == 0x3c && d1 == 0) return EncodingUTF16LE;
1128 
1129  // no known BOM detected, assume utf8
1130  return EncodingUTF8;
1131  }
1132 
1133  PUGI__FN Encoding GetBuffer_DocumentEncoding(Encoding DocumentEncoding, const void* contents, size_t size)
1134  {
1135  // replace wchar DocumentEncoding with utf implementation
1136  if (DocumentEncoding == Encodingwchar_t) return GetWchar_DocumentEncoding();
1137 
1138  // replace utf16 DocumentEncoding with utf16 with specific endianness
1139  if (DocumentEncoding == EncodingUTF16) return is_little_endian() ? EncodingUTF16LE : EncodingUTF16BE;
1140 
1141  // replace utf32 DocumentEncoding with utf32 with specific endianness
1142  if (DocumentEncoding == EncodingUTF32) return is_little_endian() ? EncodingUTF32LE : EncodingUTF32BE;
1143 
1144  // only do autodetection if no explicit DocumentEncoding is requested
1145  if (DocumentEncoding != EncodingAuto) return DocumentEncoding;
1146 
1147  // skip DocumentEncoding autodetection if input buffer is too small
1148  if (size < 4) return EncodingUTF8;
1149 
1150  // try to guess DocumentEncoding (based on XML specification, Appendix F.1)
1151  const uint8_t* data = static_cast<const uint8_t*>(contents);
1152 
1153  PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1154 
1155  return guess_buffer_DocumentEncoding(d0, d1, d2, d3);
1156  }
1157 
1158  PUGI__FN bool GetMutable_buffer(Char8*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
1159  {
1160  if (is_mutable)
1161  {
1162  out_buffer = static_cast<Char8*>(const_cast<void*>(contents));
1163  }
1164  else
1165  {
1166  void* buffer = Memory::allocate(size > 0 ? size : 1);
1167  if (!buffer) return false;
1168 
1169  memcpy(buffer, contents, size);
1170 
1171  out_buffer = static_cast<Char8*>(buffer);
1172  }
1173 
1174  out_length = size / sizeof(Char8);
1175 
1176  return true;
1177  }
1178 
1179  template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(Char8*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1180  {
1181  const uint16_t* data = static_cast<const uint16_t*>(contents);
1182  size_t length = size / sizeof(uint16_t);
1183 
1184  // first pass: get length in utf8 units
1185  out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf16_block(data, length, 0);
1186 
1187  // allocate buffer of suitable length
1188  out_buffer = static_cast<Char8*>(Memory::allocate((out_length > 0 ? out_length : 1) * sizeof(Char8)));
1189  if (!out_buffer) return false;
1190 
1191  // second pass: convert utf16 input to utf8
1192  uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
1193  uint8_t* out_end = utf_decoder<utf8_WriterInstance, opt_swap>::decode_utf16_block(data, length, out_begin);
1194 
1195  assert(out_end == out_begin + out_length);
1196  (void)!out_end;
1197 
1198  return true;
1199  }
1200 
1201  template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(Char8*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
1202  {
1203  const uint32_t* data = static_cast<const uint32_t*>(contents);
1204  size_t length = size / sizeof(uint32_t);
1205 
1206  // first pass: get length in utf8 units
1207  out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf32_block(data, length, 0);
1208 
1209  // allocate buffer of suitable length
1210  out_buffer = static_cast<Char8*>(Memory::allocate((out_length > 0 ? out_length : 1) * sizeof(Char8)));
1211  if (!out_buffer) return false;
1212 
1213  // second pass: convert utf32 input to utf8
1214  uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
1215  uint8_t* out_end = utf_decoder<utf8_WriterInstance, opt_swap>::decode_utf32_block(data, length, out_begin);
1216 
1217  assert(out_end == out_begin + out_length);
1218  (void)!out_end;
1219 
1220  return true;
1221  }
1222 
1223  PUGI__FN size_t GetLatin1_7bit_prefix_length(const uint8_t* data, size_t size)
1224  {
1225  for (size_t i = 0; i < size; ++i)
1226  if (data[i] > 127)
1227  return i;
1228 
1229  return size;
1230  }
1231 
1232  PUGI__FN bool convert_buffer_latin1(Char8*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
1233  {
1234  const uint8_t* data = static_cast<const uint8_t*>(contents);
1235 
1236  // get size of prefix that does not need utf8 conversion
1237  size_t prefix_length = GetLatin1_7bit_prefix_length(data, size);
1238  assert(prefix_length <= size);
1239 
1240  const uint8_t* postfix = data + prefix_length;
1241  size_t postfix_length = size - prefix_length;
1242 
1243  // if no conversion is needed, just return the original buffer
1244  if (postfix_length == 0) return GetMutable_buffer(out_buffer, out_length, contents, size, is_mutable);
1245 
1246  // first pass: get length in utf8 units
1247  out_length = prefix_length + utf_decoder<utf8_counter>::decode_latin1_block(postfix, postfix_length, 0);
1248 
1249  // allocate buffer of suitable length
1250  out_buffer = static_cast<Char8*>(Memory::allocate((out_length > 0 ? out_length : 1) * sizeof(Char8)));
1251  if (!out_buffer) return false;
1252 
1253  // second pass: convert latin1 input to utf8
1254  memcpy(out_buffer, data, prefix_length);
1255 
1256  uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
1257  uint8_t* out_end = utf_decoder<utf8_WriterInstance>::decode_latin1_block(postfix, postfix_length, out_begin + prefix_length);
1258 
1259  assert(out_end == out_begin + out_length);
1260  (void)!out_end;
1261 
1262  return true;
1263  }
1264 
1265  PUGI__FN bool convert_buffer(Char8*& out_buffer, size_t& out_length, Encoding DocumentEncoding, const void* contents, size_t size, bool is_mutable)
1266  {
1267  // fast Path: no conversion required
1268  if (DocumentEncoding == EncodingUTF8) return GetMutable_buffer(out_buffer, out_length, contents, size, is_mutable);
1269 
1270  // source DocumentEncoding is utf16
1271  if (DocumentEncoding == EncodingUTF16BE || DocumentEncoding == EncodingUTF16LE)
1272  {
1273  Encoding native_DocumentEncoding = is_little_endian() ? EncodingUTF16LE : EncodingUTF16BE;
1274 
1275  return (native_DocumentEncoding == DocumentEncoding) ?
1276  convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
1277  convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
1278  }
1279 
1280  // source DocumentEncoding is utf32
1281  if (DocumentEncoding == EncodingUTF32BE || DocumentEncoding == EncodingUTF32LE)
1282  {
1283  Encoding native_DocumentEncoding = is_little_endian() ? EncodingUTF32LE : EncodingUTF32BE;
1284 
1285  return (native_DocumentEncoding == DocumentEncoding) ?
1286  convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
1287  convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
1288  }
1289 
1290  // source DocumentEncoding is latin1
1291  if (DocumentEncoding == EncodingLatin1) return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
1292 
1293  assert(!"Invalid DocumentEncoding");
1294  return false;
1295  }
1296 
1297 
1298  PUGI__FN size_t AsUtf8_begin(const wchar_t* str, size_t length)
1299  {
1300  // get length in utf8 characters
1301  return utf_decoder<utf8_counter>::decode_wchar_block(str, length, 0);
1302  }
1303 
1304  PUGI__FN void AsUtf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
1305  {
1306  // convert to utf8
1307  uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
1308  uint8_t* end = utf_decoder<utf8_WriterInstance>::decode_wchar_block(str, length, begin);
1309 
1310  assert(begin + size == end);
1311  (void)!end;
1312 
1313  // zero-terminate
1314  buffer[size] = 0;
1315  }
1316 
1317 
1318  PUGI__FN std::string AsUtf8_impl(const wchar_t* str, size_t length)
1319  {
1320  // first pass: get length in utf8 characters
1321  size_t size = AsUtf8_begin(str, length);
1322 
1323  // allocate Resulting string
1324  std::string Result;
1325  Result.resize(size);
1326 
1327  // second pass: convert to utf8
1328  if (size > 0) AsUtf8_end(&Result[0], size, str, length);
1329 
1330  return Result;
1331  }
1332 
1333  PUGI__FN std::basic_string<wchar_t> AsWide_impl(const char* str, size_t size)
1334  {
1335  const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
1336 
1337  // first pass: get length in wchar_t units
1338  size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
1339 
1340  // allocate Resulting string
1341  std::basic_string<wchar_t> Result;
1342  Result.resize(length);
1343 
1344  // second pass: convert to wchar_t
1345  if (length > 0)
1346  {
1347  wchar_WriterInstance::value_type begin = reinterpret_cast<wchar_WriterInstance::value_type>(&Result[0]);
1348  wchar_WriterInstance::value_type end = utf_decoder<wchar_WriterInstance>::decode_utf8_block(data, size, begin);
1349 
1350  assert(begin + length == end);
1351  (void)!end;
1352  }
1353 
1354  return Result;
1355  }
1356 
1357 
1358  inline bool strcpy_insitu_allow(size_t length, uintptr_t allocated, Char8* target)
1359  {
1360  assert(target);
1361  size_t tarGetLength = strlength(target);
1362 
1363  // always reuse document buffer memory if possible
1364  if (!allocated) return tarGetLength >= length;
1365 
1366  // reuse heap memory if waste is not too great
1367  const size_t reuse_threshold = 32;
1368 
1369  return tarGetLength >= length && (tarGetLength < reuse_threshold || tarGetLength - length < tarGetLength / 2);
1370  }
1371 
1372  PUGI__FN bool strcpy_insitu(Char8*& dest, uintptr_t& header, uintptr_t header_mask, const Char8* source)
1373  {
1374  size_t source_length = strlength(source);
1375 
1376  if (source_length == 0)
1377  {
1378  // empty string and null pointer are equivalent, so just deallocate old memory
1379  Allocator* alloc = reinterpret_cast<MemoryPage*>(header & MemoryPage_pointer_mask)->allocator;
1380 
1381  if (header & header_mask) alloc->deallocate_string(dest);
1382 
1383  // mark the string as not allocated
1384  dest = 0;
1385  header &= ~header_mask;
1386 
1387  return true;
1388  }
1389  else if (dest && strcpy_insitu_allow(source_length, header & header_mask, dest))
1390  {
1391  // we can reuse old buffer, so just copy the new data (including zero terminator)
1392  memcpy(dest, source, (source_length + 1) * sizeof(Char8));
1393 
1394  return true;
1395  }
1396  else
1397  {
1398  Allocator* alloc = reinterpret_cast<MemoryPage*>(header & MemoryPage_pointer_mask)->allocator;
1399 
1400  // allocate new buffer
1401  Char8* buf = alloc->allocate_string(source_length + 1);
1402  if (!buf) return false;
1403 
1404  // copy the string (including zero terminator)
1405  memcpy(buf, source, (source_length + 1) * sizeof(Char8));
1406 
1407  // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
1408  if (header & header_mask) alloc->deallocate_string(dest);
1409 
1410  // the string is now allocated, so set the flag
1411  dest = buf;
1412  header |= header_mask;
1413 
1414  return true;
1415  }
1416  }
1417 
1418  struct gap
1419  {
1420  Char8* end;
1421  size_t size;
1422 
1423  gap(): end(0), size(0)
1424  {
1425  }
1426 
1427  // Push new gap, move s count bytes further (skipping the gap).
1428  // Collapse previous gap.
1429  void push(Char8*& s, size_t count)
1430  {
1431  if (end) // there was a gap already; collapse it
1432  {
1433  // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
1434  assert(s >= end);
1435  memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
1436  }
1437 
1438  s += count; // end of current gap
1439 
1440  // "merge" two gaps
1441  end = s;
1442  size += count;
1443  }
1444 
1445  // Collapse all gaps, return past-the-end pointer
1446  Char8* flush(Char8* s)
1447  {
1448  if (end)
1449  {
1450  // Move [old_gap_end, current_pos) to [old_gap_start, ...)
1451  assert(s >= end);
1452  memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
1453 
1454  return s - size;
1455  }
1456  else return s;
1457  }
1458  };
1459 
1460  PUGI__FN Char8* strconv_escape(Char8* s, gap& g)
1461  {
1462  Char8* stre = s + 1;
1463 
1464  switch (*stre)
1465  {
1466  case '#': // &#...
1467  {
1468  unsigned int ucsc = 0;
1469 
1470  if (stre[1] == 'x') // &#x... (hex code)
1471  {
1472  stre += 2;
1473 
1474  Char8 ch = *stre;
1475 
1476  if (ch == ';') return stre;
1477 
1478  for (;;)
1479  {
1480  if (static_cast<unsigned int>(ch - '0') <= 9)
1481  ucsc = 16 * ucsc + (ch - '0');
1482  else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
1483  ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
1484  else if (ch == ';')
1485  break;
1486  else // cancel
1487  return stre;
1488 
1489  ch = *++stre;
1490  }
1491 
1492  ++stre;
1493  }
1494  else // &#... (dec code)
1495  {
1496  Char8 ch = *++stre;
1497 
1498  if (ch == ';') return stre;
1499 
1500  for (;;)
1501  {
1502  if (static_cast<unsigned int>(ch - '0') <= 9)
1503  ucsc = 10 * ucsc + (ch - '0');
1504  else if (ch == ';')
1505  break;
1506  else // cancel
1507  return stre;
1508 
1509  ch = *++stre;
1510  }
1511 
1512  ++stre;
1513  }
1514 
1515 
1516  s = reinterpret_cast<Char8*>(utf8_WriterInstance::any(reinterpret_cast<uint8_t*>(s), ucsc));
1517 
1518 
1519  g.push(s, stre - s);
1520  return stre;
1521  }
1522 
1523  case 'a': // &a
1524  {
1525  ++stre;
1526 
1527  if (*stre == 'm') // &am
1528  {
1529  if (*++stre == 'p' && *++stre == ';') // &amp;
1530  {
1531  *s++ = '&';
1532  ++stre;
1533 
1534  g.push(s, stre - s);
1535  return stre;
1536  }
1537  }
1538  else if (*stre == 'p') // &ap
1539  {
1540  if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
1541  {
1542  *s++ = '\'';
1543  ++stre;
1544 
1545  g.push(s, stre - s);
1546  return stre;
1547  }
1548  }
1549  break;
1550  }
1551 
1552  case 'g': // &g
1553  {
1554  if (*++stre == 't' && *++stre == ';') // &gt;
1555  {
1556  *s++ = '>';
1557  ++stre;
1558 
1559  g.push(s, stre - s);
1560  return stre;
1561  }
1562  break;
1563  }
1564 
1565  case 'l': // &l
1566  {
1567  if (*++stre == 't' && *++stre == ';') // &lt;
1568  {
1569  *s++ = '<';
1570  ++stre;
1571 
1572  g.push(s, stre - s);
1573  return stre;
1574  }
1575  break;
1576  }
1577 
1578  case 'q': // &q
1579  {
1580  if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
1581  {
1582  *s++ = '"';
1583  ++stre;
1584 
1585  g.push(s, stre - s);
1586  return stre;
1587  }
1588  break;
1589  }
1590 
1591  default:
1592  break;
1593  }
1594 
1595  return stre;
1596  }
1597 
1598  // Utility macro for last character handling
1599  #define ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
1600 
1601  PUGI__FN Char8* strconv_comment(Char8* s, Char8 endch)
1602  {
1603  gap g;
1604 
1605  while (true)
1606  {
1607  while (!PUGI__IS_CHARTYPE(*s, ct_ParseComment)) ++s;
1608 
1609  if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
1610  {
1611  *s++ = '\n'; // replace first one with 0x0a
1612 
1613  if (*s == '\n') g.push(s, 1);
1614  }
1615  else if (s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')) // comment ends here
1616  {
1617  *g.flush(s) = 0;
1618 
1619  return s + (s[2] == '>' ? 3 : 2);
1620  }
1621  else if (*s == 0)
1622  {
1623  return 0;
1624  }
1625  else ++s;
1626  }
1627  }
1628 
1629  PUGI__FN Char8* strconv_cdata(Char8* s, Char8 endch)
1630  {
1631  gap g;
1632 
1633  while (true)
1634  {
1635  while (!PUGI__IS_CHARTYPE(*s, ct_ParseCdata)) ++s;
1636 
1637  if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
1638  {
1639  *s++ = '\n'; // replace first one with 0x0a
1640 
1641  if (*s == '\n') g.push(s, 1);
1642  }
1643  else if (s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')) // CDATA ends here
1644  {
1645  *g.flush(s) = 0;
1646 
1647  return s + 1;
1648  }
1649  else if (*s == 0)
1650  {
1651  return 0;
1652  }
1653  else ++s;
1654  }
1655  }
1656 
1657  typedef Char8* (*strconv_pcdata_t)(Char8*);
1658 
1659  template <typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
1660  {
1661  static Char8* parse(Char8* s)
1662  {
1663  gap g;
1664 
1665  while (true)
1666  {
1667  while (!PUGI__IS_CHARTYPE(*s, ct_ParsePcdata)) ++s;
1668 
1669  if (*s == '<') // PCDATA ends here
1670  {
1671  *g.flush(s) = 0;
1672 
1673  return s + 1;
1674  }
1675  else if (opt_eol::Value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
1676  {
1677  *s++ = '\n'; // replace first one with 0x0a
1678 
1679  if (*s == '\n') g.push(s, 1);
1680  }
1681  else if (opt_escape::Value && *s == '&')
1682  {
1683  s = strconv_escape(s, g);
1684  }
1685  else if (*s == 0)
1686  {
1687  return s;
1688  }
1689  else ++s;
1690  }
1691  }
1692  };
1693 
1694  PUGI__FN strconv_pcdata_t GetStrconv_pcdata(unsigned int optmask)
1695  {
1696  PUGI__STATIC_ASSERT(ParseEscapes == 0x10 && ParseEol == 0x20);
1697 
1698  switch ((optmask >> 4) & 3) // get bitmask for flags (eol escapes)
1699  {
1700  case 0: return strconv_pcdata_impl<opt_false, opt_false>::parse;
1701  case 1: return strconv_pcdata_impl<opt_false, opt_true>::parse;
1702  case 2: return strconv_pcdata_impl<opt_true, opt_false>::parse;
1703  case 3: return strconv_pcdata_impl<opt_true, opt_true>::parse;
1704  default: return 0; // should not get here
1705  }
1706  }
1707 
1708  typedef Char8* (*strconv_attribute_t)(Char8*, Char8);
1709 
1710  template <typename opt_escape> struct strconv_attribute_impl
1711  {
1712  static Char8* ParseWnorm(Char8* s, Char8 end_quote)
1713  {
1714  gap g;
1715 
1716  // trim leading whitespaces
1717  if (PUGI__IS_CHARTYPE(*s, ct_space))
1718  {
1719  Char8* str = s;
1720 
1721  do ++str;
1722  while (PUGI__IS_CHARTYPE(*str, ct_space));
1723 
1724  g.push(s, str - s);
1725  }
1726 
1727  while (true)
1728  {
1729  while (!PUGI__IS_CHARTYPE(*s, ct_ParseAttrWs | ct_space)) ++s;
1730 
1731  if (*s == end_quote)
1732  {
1733  Char8* str = g.flush(s);
1734 
1735  do *str-- = 0;
1736  while (PUGI__IS_CHARTYPE(*str, ct_space));
1737 
1738  return s + 1;
1739  }
1740  else if (PUGI__IS_CHARTYPE(*s, ct_space))
1741  {
1742  *s++ = ' ';
1743 
1744  if (PUGI__IS_CHARTYPE(*s, ct_space))
1745  {
1746  Char8* str = s + 1;
1747  while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
1748 
1749  g.push(s, str - s);
1750  }
1751  }
1752  else if (opt_escape::Value && *s == '&')
1753  {
1754  s = strconv_escape(s, g);
1755  }
1756  else if (!*s)
1757  {
1758  return 0;
1759  }
1760  else ++s;
1761  }
1762  }
1763 
1764  static Char8* ParseWconv(Char8* s, Char8 end_quote)
1765  {
1766  gap g;
1767 
1768  while (true)
1769  {
1770  while (!PUGI__IS_CHARTYPE(*s, ct_ParseAttrWs)) ++s;
1771 
1772  if (*s == end_quote)
1773  {
1774  *g.flush(s) = 0;
1775 
1776  return s + 1;
1777  }
1778  else if (PUGI__IS_CHARTYPE(*s, ct_space))
1779  {
1780  if (*s == '\r')
1781  {
1782  *s++ = ' ';
1783 
1784  if (*s == '\n') g.push(s, 1);
1785  }
1786  else *s++ = ' ';
1787  }
1788  else if (opt_escape::Value && *s == '&')
1789  {
1790  s = strconv_escape(s, g);
1791  }
1792  else if (!*s)
1793  {
1794  return 0;
1795  }
1796  else ++s;
1797  }
1798  }
1799 
1800  static Char8* ParseEol(Char8* s, Char8 end_quote)
1801  {
1802  gap g;
1803 
1804  while (true)
1805  {
1806  while (!PUGI__IS_CHARTYPE(*s, ct_ParseAttr)) ++s;
1807 
1808  if (*s == end_quote)
1809  {
1810  *g.flush(s) = 0;
1811 
1812  return s + 1;
1813  }
1814  else if (*s == '\r')
1815  {
1816  *s++ = '\n';
1817 
1818  if (*s == '\n') g.push(s, 1);
1819  }
1820  else if (opt_escape::Value && *s == '&')
1821  {
1822  s = strconv_escape(s, g);
1823  }
1824  else if (!*s)
1825  {
1826  return 0;
1827  }
1828  else ++s;
1829  }
1830  }
1831 
1832  static Char8* ParseSimple(Char8* s, Char8 end_quote)
1833  {
1834  gap g;
1835 
1836  while (true)
1837  {
1838  while (!PUGI__IS_CHARTYPE(*s, ct_ParseAttr)) ++s;
1839 
1840  if (*s == end_quote)
1841  {
1842  *g.flush(s) = 0;
1843 
1844  return s + 1;
1845  }
1846  else if (opt_escape::Value && *s == '&')
1847  {
1848  s = strconv_escape(s, g);
1849  }
1850  else if (!*s)
1851  {
1852  return 0;
1853  }
1854  else ++s;
1855  }
1856  }
1857  };
1858 
1859  PUGI__FN strconv_attribute_t GetStrconv_attribute(unsigned int optmask)
1860  {
1861  PUGI__STATIC_ASSERT(ParseEscapes == 0x10 && ParseEol == 0x20 && ParseWconvAttribute == 0x40 && ParseWnormAttribute == 0x80);
1862 
1863  switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
1864  {
1865  case 0: return strconv_attribute_impl<opt_false>::ParseSimple;
1866  case 1: return strconv_attribute_impl<opt_true>::ParseSimple;
1869  case 4: return strconv_attribute_impl<opt_false>::ParseWconv;
1870  case 5: return strconv_attribute_impl<opt_true>::ParseWconv;
1871  case 6: return strconv_attribute_impl<opt_false>::ParseWconv;
1872  case 7: return strconv_attribute_impl<opt_true>::ParseWconv;
1873  case 8: return strconv_attribute_impl<opt_false>::ParseWnorm;
1874  case 9: return strconv_attribute_impl<opt_true>::ParseWnorm;
1875  case 10: return strconv_attribute_impl<opt_false>::ParseWnorm;
1876  case 11: return strconv_attribute_impl<opt_true>::ParseWnorm;
1877  case 12: return strconv_attribute_impl<opt_false>::ParseWnorm;
1878  case 13: return strconv_attribute_impl<opt_true>::ParseWnorm;
1879  case 14: return strconv_attribute_impl<opt_false>::ParseWnorm;
1880  case 15: return strconv_attribute_impl<opt_true>::ParseWnorm;
1881  default: return 0; // should not get here
1882  }
1883  }
1884 
1885  inline ParseResult make_ParseResult(ParseStatus Status, ptrdiff_t Offset = 0)
1886  {
1887  ParseResult Result;
1888  Result.Status = Status;
1889  Result.Offset = Offset;
1890 
1891  return Result;
1892  }
1893 
1894  struct Parser
1895  {
1896  Allocator alloc;
1897  Char8* error_Offset;
1898  ParseStatus error_Status;
1899 
1900  // Parser utilities.
1901  #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
1902  #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
1903  #define PUGI__PUSHNODE(TYPE) { cursor = AppendNode(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(StatusOutOfMemory, s); }
1904  #define PUGI__POPNODE() { cursor = cursor->GetParent; }
1905  #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
1906  #define PUGI__SCANWHILE(X) { while ((X)) ++s; }
1907  #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
1908  #define PUGI__THROW_ERROR(err, m) return error_Offset = m, error_Status = err, static_cast<Char8*>(0)
1909  #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
1910 
1911  Parser(const Allocator& alloc_): alloc(alloc_), error_Offset(0), error_Status(StatusOk)
1912  {
1913  }
1914 
1915  // DOCTYPE consists of nested sections of the following possible Types:
1916  // <!-- ... -->, <? ... ?>, "...", '...'
1917  // <![...]]>
1918  // <!...>
1919  // First group can not contain nested groups
1920  // Second group can contain nested groups of the same type
1921  // Third group can contain all other groups
1922  Char8* ParseDocTypePrimitive(Char8* s)
1923  {
1924  if (*s == '"' || *s == '\'')
1925  {
1926  // quoted string
1927  Char8 ch = *s++;
1928  PUGI__SCANFOR(*s == ch);
1929  if (!*s) PUGI__THROW_ERROR(StatusBadDocType, s);
1930 
1931  s++;
1932  }
1933  else if (s[0] == '<' && s[1] == '?')
1934  {
1935  // <? ... ?>
1936  s += 2;
1937  PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
1938  if (!*s) PUGI__THROW_ERROR(StatusBadDocType, s);
1939 
1940  s += 2;
1941  }
1942  else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
1943  {
1944  s += 4;
1945  PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
1946  if (!*s) PUGI__THROW_ERROR(StatusBadDocType, s);
1947 
1948  s += 4;
1949  }
1950  else PUGI__THROW_ERROR(StatusBadDocType, s);
1951 
1952  return s;
1953  }
1954 
1955  Char8* ParseDocTypeIgnore(Char8* s)
1956  {
1957  assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
1958  s++;
1959 
1960  while (*s)
1961  {
1962  if (s[0] == '<' && s[1] == '!' && s[2] == '[')
1963  {
1964  // nested ignore section
1965  s = ParseDocTypeIgnore(s);
1966  if (!s) return s;
1967  }
1968  else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
1969  {
1970  // ignore section end
1971  s += 3;
1972 
1973  return s;
1974  }
1975  else s++;
1976  }
1977 
1978  PUGI__THROW_ERROR(StatusBadDocType, s);
1979  }
1980 
1981  Char8* ParseDocTypeGroup(Char8* s, Char8 endch, bool toplevel)
1982  {
1983  assert(s[0] == '<' && s[1] == '!');
1984  s++;
1985 
1986  while (*s)
1987  {
1988  if (s[0] == '<' && s[1] == '!' && s[2] != '-')
1989  {
1990  if (s[2] == '[')
1991  {
1992  // ignore
1993  s = ParseDocTypeIgnore(s);
1994  if (!s) return s;
1995  }
1996  else
1997  {
1998  // some control group
1999  s = ParseDocTypeGroup(s, endch, false);
2000  if (!s) return s;
2001  }
2002  }
2003  else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
2004  {
2005  // unknown tag (forbidden), or some primitive group
2006  s = ParseDocTypePrimitive(s);
2007  if (!s) return s;
2008  }
2009  else if (*s == '>')
2010  {
2011  s++;
2012 
2013  return s;
2014  }
2015  else s++;
2016  }
2017 
2018  if (!toplevel || endch != '>') PUGI__THROW_ERROR(StatusBadDocType, s);
2019 
2020  return s;
2021  }
2022 
2023  Char8* ParseExclamation(Char8* s, NodeStruct* cursor, unsigned int optmsk, Char8 endch)
2024  {
2025  // parse node contents, starting with exclamation mark
2026  ++s;
2027 
2028  if (*s == '-') // '<!-...'
2029  {
2030  ++s;
2031 
2032  if (*s == '-') // '<!--...'
2033  {
2034  ++s;
2035 
2036  if (PUGI__OPTSET(ParseComments))
2037  {
2038  PUGI__PUSHNODE(NodeComment); // Append a new node on the tree.
2039  cursor->Value = s; // Save the Offset.
2040  }
2041 
2042  if (PUGI__OPTSET(ParseEol) && PUGI__OPTSET(ParseComments))
2043  {
2044  s = strconv_comment(s, endch);
2045 
2046  if (!s) PUGI__THROW_ERROR(StatusBadComment, cursor->Value);
2047  }
2048  else
2049  {
2050  // Scan for terminating '-->'.
2051  PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>'));
2052  PUGI__CHECK_ERROR(StatusBadComment, s);
2053 
2054  if (PUGI__OPTSET(ParseComments))
2055  *s = 0; // Zero-terminate this segment at the first terminating '-'.
2056 
2057  s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
2058  }
2059  }
2060  else PUGI__THROW_ERROR(StatusBadComment, s);
2061  }
2062  else if (*s == '[')
2063  {
2064  // '<![CDATA[...'
2065  if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
2066  {
2067  ++s;
2068 
2069  if (PUGI__OPTSET(ParseCdata))
2070  {
2071  PUGI__PUSHNODE(NodeCdata); // Append a new node on the tree.
2072  cursor->Value = s; // Save the Offset.
2073 
2074  if (PUGI__OPTSET(ParseEol))
2075  {
2076  s = strconv_cdata(s, endch);
2077 
2078  if (!s) PUGI__THROW_ERROR(StatusBadCdata, cursor->Value);
2079  }
2080  else
2081  {
2082  // Scan for terminating ']]>'.
2083  PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
2084  PUGI__CHECK_ERROR(StatusBadCdata, s);
2085 
2086  *s++ = 0; // Zero-terminate this segment.
2087  }
2088  }
2089  else // Flagged for discard, but we still have to scan for the terminator.
2090  {
2091  // Scan for terminating ']]>'.
2092  PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
2093  PUGI__CHECK_ERROR(StatusBadCdata, s);
2094 
2095  ++s;
2096  }
2097 
2098  s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
2099  }
2100  else PUGI__THROW_ERROR(StatusBadCdata, s);
2101  }
2102  else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && ENDSWITH(s[6], 'E'))
2103  {
2104  s -= 2;
2105 
2106  if (cursor->GetParent) PUGI__THROW_ERROR(StatusBadDocType, s);
2107 
2108  Char8* mark = s + 9;
2109 
2110  s = ParseDocTypeGroup(s, endch, true);
2111  if (!s) return s;
2112 
2113  if (PUGI__OPTSET(ParseDocType))
2114  {
2115  while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
2116 
2117  PUGI__PUSHNODE(NodeDocType);
2118 
2119  cursor->Value = mark;
2120 
2121  assert((s[0] == 0 && endch == '>') || s[-1] == '>');
2122  s[*s == 0 ? 0 : -1] = 0;
2123 
2124  PUGI__POPNODE();
2125  }
2126  }
2127  else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(StatusBadComment, s);
2128  else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(StatusBadCdata, s);
2129  else PUGI__THROW_ERROR(StatusUnrecognizedTag, s);
2130 
2131  return s;
2132  }
2133 
2134  Char8* ParseQuestion(Char8* s, NodeStruct*& ref_cursor, unsigned int optmsk, Char8 endch)
2135  {
2136  // Load into registers
2137  NodeStruct* cursor = ref_cursor;
2138  Char8 ch = 0;
2139 
2140  // parse node contents, starting with question mark
2141  ++s;
2142 
2143  // read PI target
2144  Char8* target = s;
2145 
2146  if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(StatusBadProcessingInstruction, s);
2147 
2148  PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
2149  PUGI__CHECK_ERROR(StatusBadProcessingInstruction, s);
2150 
2151  // determine node Type; stricmp / strcasecmp is not portable
2152  bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
2153 
2154  if (declaration ? PUGI__OPTSET(ParseDeclaration) : PUGI__OPTSET(ParsePi))
2155  {
2156  if (declaration)
2157  {
2158  // disallow non top-level declarations
2159  if (cursor->GetParent) PUGI__THROW_ERROR(StatusBadProcessingInstruction, s);
2160 
2161  PUGI__PUSHNODE(NodeDeclaration);
2162  }
2163  else
2164  {
2165  PUGI__PUSHNODE(NodePi);
2166  }
2167 
2168  cursor->Name = target;
2169 
2170  PUGI__ENDSEG();
2171 
2172  // parse Value/attributes
2173  if (ch == '?')
2174  {
2175  // empty node
2176  if (!ENDSWITH(*s, '>')) PUGI__THROW_ERROR(StatusBadProcessingInstruction, s);
2177  s += (*s == '>');
2178 
2179  PUGI__POPNODE();
2180  }
2181  else if (PUGI__IS_CHARTYPE(ch, ct_space))
2182  {
2183  PUGI__SKIPWS();
2184 
2185  // scan for tag end
2186  Char8* Value = s;
2187 
2188  PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
2189  PUGI__CHECK_ERROR(StatusBadProcessingInstruction, s);
2190 
2191  if (declaration)
2192  {
2193  // replace ending ? with / so that 'element' terminates properly
2194  *s = '/';
2195 
2196  // we exit from this function with cursor at NodeDeclaration, which is a signal to parse() to go to LOC_ATTRIBUTES
2197  s = Value;
2198  }
2199  else
2200  {
2201  // store Value and step over >
2202  cursor->Value = Value;
2203  PUGI__POPNODE();
2204 
2205  PUGI__ENDSEG();
2206 
2207  s += (*s == '>');
2208  }
2209  }
2210  else PUGI__THROW_ERROR(StatusBadProcessingInstruction, s);
2211  }
2212  else
2213  {
2214  // scan for tag end
2215  PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
2216  PUGI__CHECK_ERROR(StatusBadProcessingInstruction, s);
2217 
2218  s += (s[1] == '>' ? 2 : 1);
2219  }
2220 
2221  // store from registers
2222  ref_cursor = cursor;
2223 
2224  return s;
2225  }
2226 
2227  Char8* parse(Char8* s, NodeStruct* xmldoc, unsigned int optmsk, Char8 endch)
2228  {
2229  strconv_attribute_t strconv_attribute = GetStrconv_attribute(optmsk);
2230  strconv_pcdata_t strconv_pcdata = GetStrconv_pcdata(optmsk);
2231 
2232  Char8 ch = 0;
2233  NodeStruct* cursor = xmldoc;
2234  Char8* mark = s;
2235 
2236  while (*s != 0)
2237  {
2238  if (*s == '<')
2239  {
2240  ++s;
2241 
2242  LOC_TAG:
2243  if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
2244  {
2245  PUGI__PUSHNODE(NodeElement); // Append a new node to the tree.
2246 
2247  cursor->Name = s;
2248 
2249  PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
2250  PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
2251 
2252  if (ch == '>')
2253  {
2254  // end of tag
2255  }
2256  else if (PUGI__IS_CHARTYPE(ch, ct_space))
2257  {
2258  LOC_ATTRIBUTES:
2259  while (true)
2260  {
2261  PUGI__SKIPWS(); // Eat any whitespace.
2262 
2263  if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
2264  {
2265  AttributeStruct* a = AppendAttribute_ll(cursor, alloc); // Make space for this GetAttribute.
2266  if (!a) PUGI__THROW_ERROR(StatusOutOfMemory, s);
2267 
2268  a->Name = s; // Save the Offset.
2269 
2270  PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
2271  PUGI__CHECK_ERROR(StatusBadAttribute, s); //$ redundant, left for performance
2272 
2273  PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
2274  PUGI__CHECK_ERROR(StatusBadAttribute, s); //$ redundant, left for performance
2275 
2276  if (PUGI__IS_CHARTYPE(ch, ct_space))
2277  {
2278  PUGI__SKIPWS(); // Eat any whitespace.
2279  PUGI__CHECK_ERROR(StatusBadAttribute, s); //$ redundant, left for performance
2280 
2281  ch = *s;
2282  ++s;
2283  }
2284 
2285  if (ch == '=') // '<... #=...'
2286  {
2287  PUGI__SKIPWS(); // Eat any whitespace.
2288 
2289  if (*s == '"' || *s == '\'') // '<... #="...'
2290  {
2291  ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
2292  ++s; // Step over the quote.
2293  a->Value = s; // Save the Offset.
2294 
2295  s = strconv_attribute(s, ch);
2296 
2297  if (!s) PUGI__THROW_ERROR(StatusBadAttribute, a->Value);
2298 
2299  // After this line the loop continues from the start;
2300  // Whitespaces, / and > are ok, symbols and EOF are wrong,
2301  // everything else will be detected
2302  if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(StatusBadAttribute, s);
2303  }
2304  else PUGI__THROW_ERROR(StatusBadAttribute, s);
2305  }
2306  else PUGI__THROW_ERROR(StatusBadAttribute, s);
2307  }
2308  else if (*s == '/')
2309  {
2310  ++s;
2311 
2312  if (*s == '>')
2313  {
2314  PUGI__POPNODE();
2315  s++;
2316  break;
2317  }
2318  else if (*s == 0 && endch == '>')
2319  {
2320  PUGI__POPNODE();
2321  break;
2322  }
2323  else PUGI__THROW_ERROR(StatusBadStartElement, s);
2324  }
2325  else if (*s == '>')
2326  {
2327  ++s;
2328 
2329  break;
2330  }
2331  else if (*s == 0 && endch == '>')
2332  {
2333  break;
2334  }
2335  else PUGI__THROW_ERROR(StatusBadStartElement, s);
2336  }
2337 
2338  // !!!
2339  }
2340  else if (ch == '/') // '<#.../'
2341  {
2342  if (!ENDSWITH(*s, '>')) PUGI__THROW_ERROR(StatusBadStartElement, s);
2343 
2344  PUGI__POPNODE(); // Pop.
2345 
2346  s += (*s == '>');
2347  }
2348  else if (ch == 0)
2349  {
2350  // we stepped over null terminator, backtrack & handle closing tag
2351  --s;
2352 
2353  if (endch != '>') PUGI__THROW_ERROR(StatusBadStartElement, s);
2354  }
2355  else PUGI__THROW_ERROR(StatusBadStartElement, s);
2356  }
2357  else if (*s == '/')
2358  {
2359  ++s;
2360 
2361  Char8* Name = cursor->Name;
2362  if (!Name) PUGI__THROW_ERROR(StatusEndElementMismatch, s);
2363 
2364  while (PUGI__IS_CHARTYPE(*s, ct_symbol))
2365  {
2366  if (*s++ != *Name++) PUGI__THROW_ERROR(StatusEndElementMismatch, s);
2367  }
2368 
2369  if (*Name)
2370  {
2371  if (*s == 0 && Name[0] == endch && Name[1] == 0) PUGI__THROW_ERROR(StatusBadEndElement, s);
2372  else PUGI__THROW_ERROR(StatusEndElementMismatch, s);
2373  }
2374 
2375  PUGI__POPNODE(); // Pop.
2376 
2377  PUGI__SKIPWS();
2378 
2379  if (*s == 0)
2380  {
2381  if (endch != '>') PUGI__THROW_ERROR(StatusBadEndElement, s);
2382  }
2383  else
2384  {
2385  if (*s != '>') PUGI__THROW_ERROR(StatusBadEndElement, s);
2386  ++s;
2387  }
2388  }
2389  else if (*s == '?') // '<?...'
2390  {
2391  s = ParseQuestion(s, cursor, optmsk, endch);
2392  if (!s) return s;
2393 
2394  assert(cursor);
2395  if ((cursor->header & MemoryPage_type_mask) + 1 == NodeDeclaration) goto LOC_ATTRIBUTES;
2396  }
2397  else if (*s == '!') // '<!...'
2398  {
2399  s = ParseExclamation(s, cursor, optmsk, endch);
2400  if (!s) return s;
2401  }
2402  else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(StatusBadProcessingInstruction, s);
2403  else PUGI__THROW_ERROR(StatusUnrecognizedTag, s);
2404  }
2405  else
2406  {
2407  mark = s; // Save this Offset while searching for a terminator.
2408 
2409  PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
2410 
2411  if (*s == '<')
2412  {
2413  // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
2414  assert(mark != s);
2415 
2416  if (!PUGI__OPTSET(ParseWsPcdata | ParseWsPcdata_single))
2417  {
2418  continue;
2419  }
2420  else if (PUGI__OPTSET(ParseWsPcdata_single))
2421  {
2422  if (s[1] != '/' || cursor->GetFirstChild) continue;
2423  }
2424  }
2425 
2426  s = mark;
2427 
2428  if (cursor->GetParent)
2429  {
2430  PUGI__PUSHNODE(NodePcdata); // Append a new node on the tree.
2431  cursor->Value = s; // Save the Offset.
2432 
2433  s = strconv_pcdata(s);
2434 
2435  PUGI__POPNODE(); // Pop since this is a standalone.
2436 
2437  if (!*s) break;
2438  }
2439  else
2440  {
2441  PUGI__SCANFOR(*s == '<'); // '...<'
2442  if (!*s) break;
2443 
2444  ++s;
2445  }
2446 
2447  // We're after '<'
2448  goto LOC_TAG;
2449  }
2450  }
2451 
2452  // check that last tag is closed
2453  if (cursor != xmldoc) PUGI__THROW_ERROR(StatusEndElementMismatch, s);
2454 
2455  return s;
2456  }
2457 
2458  static ParseResult parse(Char8* buffer, size_t length, NodeStruct* GetRoot, unsigned int optmsk)
2459  {
2460  DocumentStruct* xmldoc = static_cast<DocumentStruct*>(GetRoot);
2461 
2462  // store buffer for OffSetDebug
2463  xmldoc->buffer = buffer;
2464 
2465  // early-out for empty documents
2466  if (length == 0) return make_ParseResult(StatusOk);
2467 
2468  // create parser on stack
2469  Parser parser(*xmldoc);
2470 
2471  // Save last character and make buffer zero-terminated (speeds up parsing)
2472  Char8 endch = buffer[length - 1];
2473  buffer[length - 1] = 0;
2474 
2475  // perform actual parsing
2476  parser.parse(buffer, xmldoc, optmsk, endch);
2477 
2478  ParseResult Result = make_ParseResult(parser.error_Status, parser.error_Offset ? parser.error_Offset - buffer : 0);
2479  assert(Result.Offset >= 0 && static_cast<size_t>(Result.Offset) <= length);
2480 
2481  // update allocator state
2482  *static_cast<Allocator*>(xmldoc) = parser.alloc;
2483 
2484  // since we removed last character, we have to handle the only possible false positive
2485  if (Result && endch == '<')
2486  {
2487  // there's no possible well-formed document with < at the end
2488  return make_ParseResult(StatusUnrecognizedTag, length);
2489  }
2490 
2491  return Result;
2492  }
2493  };
2494 
2495  // Output facilities
2496  PUGI__FN Encoding GetWrite_native_DocumentEncoding()
2497  {
2498  return EncodingUTF8;
2499  }
2500 
2501  PUGI__FN Encoding GetWrite_DocumentEncoding(Encoding DocumentEncoding)
2502  {
2503  // replace wchar DocumentEncoding with utf implementation
2504  if (DocumentEncoding == Encodingwchar_t) return GetWchar_DocumentEncoding();
2505 
2506  // replace utf16 DocumentEncoding with utf16 with specific endianness
2507  if (DocumentEncoding == EncodingUTF16) return is_little_endian() ? EncodingUTF16LE : EncodingUTF16BE;
2508 
2509  // replace utf32 DocumentEncoding with utf32 with specific endianness
2510  if (DocumentEncoding == EncodingUTF32) return is_little_endian() ? EncodingUTF32LE : EncodingUTF32BE;
2511 
2512  // only do autodetection if no explicit DocumentEncoding is requested
2513  if (DocumentEncoding != EncodingAuto) return DocumentEncoding;
2514 
2515  // assume utf8 DocumentEncoding
2516  return EncodingUTF8;
2517  }
2518 
2519  PUGI__FN size_t GetValid_length(const Char8* data, size_t length)
2520  {
2521  assert(length > 4);
2522 
2523  for (size_t i = 1; i <= 4; ++i)
2524  {
2525  uint8_t ch = static_cast<uint8_t>(data[length - i]);
2526 
2527  // either a standalone character or a leading one
2528  if ((ch & 0xc0) != 0x80) return length - i;
2529  }
2530 
2531  // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
2532  return length;
2533  }
2534 
2535  PUGI__FN size_t convert_buffer(Char8* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const Char8* data, size_t length, Encoding DocumentEncoding)
2536  {
2537  if (DocumentEncoding == EncodingUTF16BE || DocumentEncoding == EncodingUTF16LE)
2538  {
2539  uint16_t* dest = r_u16;
2540 
2541  // convert to native utf16
2542  uint16_t* end = utf_decoder<utf16_WriterInstance>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
2543 
2544  // swap if necessary
2545  Encoding native_DocumentEncoding = is_little_endian() ? EncodingUTF16LE : EncodingUTF16BE;
2546 
2547  if (native_DocumentEncoding != DocumentEncoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2548 
2549  return static_cast<size_t>(end - dest) * sizeof(uint16_t);
2550  }
2551 
2552  if (DocumentEncoding == EncodingUTF32BE || DocumentEncoding == EncodingUTF32LE)
2553  {
2554  uint32_t* dest = r_u32;
2555 
2556  // convert to native utf32
2557  uint32_t* end = utf_decoder<utf32_WriterInstance>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
2558 
2559  // swap if necessary
2560  Encoding native_DocumentEncoding = is_little_endian() ? EncodingUTF32LE : EncodingUTF32BE;
2561 
2562  if (native_DocumentEncoding != DocumentEncoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
2563 
2564  return static_cast<size_t>(end - dest) * sizeof(uint32_t);
2565  }
2566 
2567  if (DocumentEncoding == EncodingLatin1)
2568  {
2569  uint8_t* dest = r_u8;
2570  uint8_t* end = utf_decoder<latin1_WriterInstance>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
2571 
2572  return static_cast<size_t>(end - dest);
2573  }
2574 
2575  assert(!"Invalid DocumentEncoding");
2576  return 0;
2577  }
2578 
2579 
2580  class BufferedWriter
2581  {
2582  BufferedWriter(const BufferedWriter&);
2583  BufferedWriter& operator=(const BufferedWriter&);
2584 
2585  public:
2586  BufferedWriter(Writer& WriterInstance_, Encoding user_DocumentEncoding): WriterInstance(WriterInstance_), bufsize(0), DocumentEncoding(GetWrite_DocumentEncoding(user_DocumentEncoding))
2587  {
2588  PUGI__STATIC_ASSERT(bufcapacity >= 8);
2589  }
2590 
2591  ~BufferedWriter()
2592  {
2593  flush();
2594  }
2595 
2596  void flush()
2597  {
2598  flush(buffer, bufsize);
2599  bufsize = 0;
2600  }
2601 
2602  void flush(const Char8* data, size_t size)
2603  {
2604  if (size == 0) return;
2605 
2606  // fast Path, just Write data
2607  if (DocumentEncoding == GetWrite_native_DocumentEncoding())
2608  WriterInstance.Write(data, size * sizeof(Char8));
2609  else
2610  {
2611  // convert chunk
2612  size_t Result = convert_buffer(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, DocumentEncoding);
2613  assert(Result <= sizeof(scratch));
2614 
2615  // Write data
2616  WriterInstance.Write(scratch.data_u8, Result);
2617  }
2618  }
2619 
2620  void Write(const Char8* data, size_t length)
2621  {
2622  if (bufsize + length > bufcapacity)
2623  {
2624  // flush the remaining buffer contents
2625  flush();
2626 
2627  // handle large chunks
2628  if (length > bufcapacity)
2629  {
2630  if (DocumentEncoding == GetWrite_native_DocumentEncoding())
2631  {
2632  // fast Path, can just Write data chunk
2633  WriterInstance.Write(data, length * sizeof(Char8));
2634  return;
2635  }
2636 
2637  // need to convert in suitable chunks
2638  while (length > bufcapacity)
2639  {
2640  // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
2641  // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
2642  size_t chunk_size = GetValid_length(data, bufcapacity);
2643 
2644  // convert chunk and Write
2645  flush(data, chunk_size);
2646 
2647  // iterate
2648  data += chunk_size;
2649  length -= chunk_size;
2650  }
2651 
2652  // small tail is copied below
2653  bufsize = 0;
2654  }
2655  }
2656 
2657  memcpy(buffer + bufsize, data, length * sizeof(Char8));
2658  bufsize += length;
2659  }
2660 
2661  void Write(const Char8* data)
2662  {
2663  Write(data, strlength(data));
2664  }
2665 
2666  void Write(Char8 d0)
2667  {
2668  if (bufsize + 1 > bufcapacity) flush();
2669 
2670  buffer[bufsize + 0] = d0;
2671  bufsize += 1;
2672  }
2673 
2674  void Write(Char8 d0, Char8 d1)
2675  {
2676  if (bufsize + 2 > bufcapacity) flush();
2677 
2678  buffer[bufsize + 0] = d0;
2679  buffer[bufsize + 1] = d1;
2680  bufsize += 2;
2681  }
2682 
2683  void Write(Char8 d0, Char8 d1, Char8 d2)
2684  {
2685  if (bufsize + 3 > bufcapacity) flush();
2686 
2687  buffer[bufsize + 0] = d0;
2688  buffer[bufsize + 1] = d1;
2689  buffer[bufsize + 2] = d2;
2690  bufsize += 3;
2691  }
2692 
2693  void Write(Char8 d0, Char8 d1, Char8 d2, Char8 d3)
2694  {
2695  if (bufsize + 4 > bufcapacity) flush();
2696 
2697  buffer[bufsize + 0] = d0;
2698  buffer[bufsize + 1] = d1;
2699  buffer[bufsize + 2] = d2;
2700  buffer[bufsize + 3] = d3;
2701  bufsize += 4;
2702  }
2703 
2704  void Write(Char8 d0, Char8 d1, Char8 d2, Char8 d3, Char8 d4)
2705  {
2706  if (bufsize + 5 > bufcapacity) flush();
2707 
2708  buffer[bufsize + 0] = d0;
2709  buffer[bufsize + 1] = d1;
2710  buffer[bufsize + 2] = d2;
2711  buffer[bufsize + 3] = d3;
2712  buffer[bufsize + 4] = d4;
2713  bufsize += 5;
2714  }
2715 
2716  void Write(Char8 d0, Char8 d1, Char8 d2, Char8 d3, Char8 d4, Char8 d5)
2717  {
2718  if (bufsize + 6 > bufcapacity) flush();
2719 
2720  buffer[bufsize + 0] = d0;
2721  buffer[bufsize + 1] = d1;
2722  buffer[bufsize + 2] = d2;
2723  buffer[bufsize + 3] = d3;
2724  buffer[bufsize + 4] = d4;
2725  buffer[bufsize + 5] = d5;
2726  bufsize += 6;
2727  }
2728 
2729  // utf8 maximum expansion: x4 (-> utf32)
2730  // utf16 maximum expansion: x2 (-> utf32)
2731  // utf32 maximum expansion: x1
2732  enum
2733  {
2734  bufcapacitybytes =
2735  #ifdef XML_MEMORY_OUTPUT_STACK
2736  XML_MEMORY_OUTPUT_STACK
2737  #else
2738  10240
2739  #endif
2740  ,
2741  bufcapacity = bufcapacitybytes / (sizeof(Char8) + 4)
2742  };
2743 
2744  Char8 buffer[bufcapacity];
2745 
2746  union
2747  {
2748  uint8_t data_u8[4 * bufcapacity];
2749  uint16_t data_u16[2 * bufcapacity];
2750  uint32_t data_u32[bufcapacity];
2751  Char8 data_char[bufcapacity];
2752  } scratch;
2753 
2754  Writer& WriterInstance;
2755  size_t bufsize;
2756  Encoding DocumentEncoding;
2757  };
2758 
2759  PUGI__FN void text_output_escaped(BufferedWriter& WriterInstance, const Char8* s, charTypex_t Type)
2760  {
2761  while (*s)
2762  {
2763  const Char8* prev = s;
2764 
2765  // While *s is a usual symbol
2766  while (!PUGI__IS_CHARTYPEX(*s, Type)) ++s;
2767 
2768  WriterInstance.Write(prev, static_cast<size_t>(s - prev));
2769 
2770  switch (*s)
2771  {
2772  case 0: break;
2773  case '&':
2774  WriterInstance.Write('&', 'a', 'm', 'p', ';');
2775  ++s;
2776  break;
2777  case '<':
2778  WriterInstance.Write('&', 'l', 't', ';');
2779  ++s;
2780  break;
2781  case '>':
2782  WriterInstance.Write('&', 'g', 't', ';');
2783  ++s;
2784  break;
2785  case '"':
2786  WriterInstance.Write('&', 'q', 'u', 'o', 't', ';');
2787  ++s;
2788  break;
2789  default: // s is not a usual symbol
2790  {
2791  unsigned int ch = static_cast<unsigned int>(*s++);
2792  assert(ch < 32);
2793 
2794  WriterInstance.Write('&', '#', static_cast<Char8>((ch / 10) + '0'), static_cast<Char8>((ch % 10) + '0'), ';');
2795  }
2796  }
2797  }
2798  }
2799 
2800  PUGI__FN void text_output(BufferedWriter& WriterInstance, const Char8* s, charTypex_t Type, unsigned int flags)
2801  {
2802  if (flags & FormatNoEscapes)
2803  WriterInstance.Write(s);
2804  else
2805  text_output_escaped(WriterInstance, s, Type);
2806  }
2807 
2808  PUGI__FN void text_output_cdata(BufferedWriter& WriterInstance, const Char8* s)
2809  {
2810  do
2811  {
2812  WriterInstance.Write('<', '!', '[', 'C', 'D');
2813  WriterInstance.Write('A', 'T', 'A', '[');
2814 
2815  const Char8* prev = s;
2816 
2817  // look for ]]> sequence - we can't output it as is since it terminates CDATA
2818  while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
2819 
2820  // skip ]] if we stopped at ]]>, > will go to the next CDATA section
2821  if (*s) s += 2;
2822 
2823  WriterInstance.Write(prev, static_cast<size_t>(s - prev));
2824 
2825  WriterInstance.Write(']', ']', '>');
2826  }
2827  while (*s);
2828  }
2829 
2830  PUGI__FN void NodeOutput_attributes(BufferedWriter& WriterInstance, const Node& node, unsigned int flags)
2831  {
2832  const Char8* default_Name = ":anonymous";
2833 
2834  for (Attribute a = node.GetFirstAttribute(); a; a = a.GetNextAttribute())
2835  {
2836  WriterInstance.Write(' ');
2837  WriterInstance.Write(a.Name()[0] ? a.Name() : default_Name);
2838  WriterInstance.Write('=', '"');
2839 
2840  text_output(WriterInstance, a.Value(), ctx_special_attr, flags);
2841 
2842  WriterInstance.Write('"');
2843  }
2844  }
2845 
2846  PUGI__FN void NodeOutput(BufferedWriter& WriterInstance, const Node& node, const Char8* indent, unsigned int flags, unsigned int Depth)
2847  {
2848  const Char8* default_Name = ":anonymous";
2849 
2850  if ((flags & FormatIndent) != 0 && (flags & FormatRaw) == 0)
2851  for (unsigned int i = 0; i < Depth; ++i) WriterInstance.Write(indent);
2852 
2853  switch (node.Type())
2854  {
2855  case NodeDocument:
2856  {
2857  for (Node n = node.GetFirstChild(); n; n = n.GetNextSibling())
2858  NodeOutput(WriterInstance, n, indent, flags, Depth);
2859  break;
2860  }
2861 
2862  case NodeElement:
2863  {
2864  const Char8* Name = node.Name()[0] ? node.Name() : default_Name;
2865 
2866  WriterInstance.Write('<');
2867  WriterInstance.Write(Name);
2868 
2869  NodeOutput_attributes(WriterInstance, node, flags);
2870 
2871  if (flags & FormatRaw)
2872  {
2873  if (!node.GetFirstChild())
2874  WriterInstance.Write(' ', '/', '>');
2875  else
2876  {
2877  WriterInstance.Write('>');
2878 
2879  for (Node n = node.GetFirstChild(); n; n = n.GetNextSibling())
2880  NodeOutput(WriterInstance, n, indent, flags, Depth + 1);
2881 
2882  WriterInstance.Write('<', '/');
2883  WriterInstance.Write(Name);
2884  WriterInstance.Write('>');
2885  }
2886  }
2887  else if (!node.GetFirstChild())
2888  WriterInstance.Write(' ', '/', '>', '\n');
2889  else if (node.GetFirstChild() == node.GetLastChild() && (node.GetFirstChild().Type() == NodePcdata || node.GetFirstChild().Type() == NodeCdata))
2890  {
2891  WriterInstance.Write('>');
2892 
2893  if (node.GetFirstChild().Type() == NodePcdata)
2894  text_output(WriterInstance, node.GetFirstChild().Value(), ctx_special_pcdata, flags);
2895  else
2896  text_output_cdata(WriterInstance, node.GetFirstChild().Value());
2897 
2898  WriterInstance.Write('<', '/');
2899  WriterInstance.Write(Name);
2900  WriterInstance.Write('>', '\n');
2901  }
2902  else
2903  {
2904  WriterInstance.Write('>', '\n');
2905 
2906  for (Node n = node.GetFirstChild(); n; n = n.GetNextSibling())
2907  NodeOutput(WriterInstance, n, indent, flags, Depth + 1);
2908 
2909  if ((flags & FormatIndent) != 0 && (flags & FormatRaw) == 0)
2910  for (unsigned int i = 0; i < Depth; ++i) WriterInstance.Write(indent);
2911 
2912  WriterInstance.Write('<', '/');
2913  WriterInstance.Write(Name);
2914  WriterInstance.Write('>', '\n');
2915  }
2916 
2917  break;
2918  }
2919 
2920  case NodePcdata:
2921  text_output(WriterInstance, node.Value(), ctx_special_pcdata, flags);
2922  if ((flags & FormatRaw) == 0) WriterInstance.Write('\n');
2923  break;
2924 
2925  case NodeCdata:
2926  text_output_cdata(WriterInstance, node.Value());
2927  if ((flags & FormatRaw) == 0) WriterInstance.Write('\n');
2928  break;
2929 
2930  case NodeComment:
2931  WriterInstance.Write('<', '!', '-', '-');
2932  WriterInstance.Write(node.Value());
2933  WriterInstance.Write('-', '-', '>');
2934  if ((flags & FormatRaw) == 0) WriterInstance.Write('\n');
2935  break;
2936 
2937  case NodePi:
2938  case NodeDeclaration:
2939  WriterInstance.Write('<', '?');
2940  WriterInstance.Write(node.Name()[0] ? node.Name() : default_Name);
2941 
2942  if (node.Type() == NodeDeclaration)
2943  {
2944  NodeOutput_attributes(WriterInstance, node, flags);
2945  }
2946  else if (node.Value()[0])
2947  {
2948  WriterInstance.Write(' ');
2949  WriterInstance.Write(node.Value());
2950  }
2951 
2952  WriterInstance.Write('?', '>');
2953  if ((flags & FormatRaw) == 0) WriterInstance.Write('\n');
2954  break;
2955 
2956  case NodeDocType:
2957  WriterInstance.Write('<', '!', 'D', 'O', 'C');
2958  WriterInstance.Write('T', 'Y', 'P', 'E');
2959 
2960  if (node.Value()[0])
2961  {
2962  WriterInstance.Write(' ');
2963  WriterInstance.Write(node.Value());
2964  }
2965 
2966  WriterInstance.Write('>');
2967  if ((flags & FormatRaw) == 0) WriterInstance.Write('\n');
2968  break;
2969 
2970  default:
2971  assert(!"Invalid node Type");
2972  }
2973  }
2974 
2975  inline bool hAsDeclaration(const Node& node)
2976  {
2977  for (Node GetChild = node.GetFirstChild(); GetChild; GetChild = GetChild.GetNextSibling())
2978  {
2979  NodeType Type = GetChild.Type();
2980 
2981  if (Type == NodeDeclaration) return true;
2982  if (Type == NodeElement) return false;
2983  }
2984 
2985  return false;
2986  }
2987 
2988  inline bool allow_InsertChild(NodeType GetParent, NodeType GetChild)
2989  {
2990  if (GetParent != NodeDocument && GetParent != NodeElement) return false;
2991  if (GetChild == NodeDocument || GetChild == NodeNull) return false;
2992  if (GetParent != NodeDocument && (GetChild == NodeDeclaration || GetChild == NodeDocType)) return false;
2993 
2994  return true;
2995  }
2996 
2997  PUGI__FN void recursive_copy_skip(Node& dest, const Node& source, const Node& skip)
2998  {
2999  assert(dest.Type() == source.Type());
3000 
3001  switch (source.Type())
3002  {
3003  case NodeElement:
3004  {
3005  dest.SetName(source.Name());
3006 
3007  for (Attribute a = source.GetFirstAttribute(); a; a = a.GetNextAttribute())
3008  dest.AppendAttribute(a.Name()).SetValue(a.Value());
3009 
3010  for (Node c = source.GetFirstChild(); c; c = c.GetNextSibling())
3011  {
3012  if (c == skip) continue;
3013 
3014  Node cc = dest.AppendChild(c.Type());
3015  assert(cc);
3016 
3017  recursive_copy_skip(cc, c, skip);
3018  }
3019 
3020  break;
3021  }
3022 
3023  case NodePcdata:
3024  case NodeCdata:
3025  case NodeComment:
3026  case NodeDocType:
3027  dest.SetValue(source.Value());
3028  break;
3029 
3030  case NodePi:
3031  dest.SetName(source.Name());
3032  dest.SetValue(source.Value());
3033  break;
3034 
3035  case NodeDeclaration:
3036  {
3037  dest.SetName(source.Name());
3038 
3039  for (Attribute a = source.GetFirstAttribute(); a; a = a.GetNextAttribute())
3040  dest.AppendAttribute(a.Name()).SetValue(a.Value());
3041 
3042  break;
3043  }
3044 
3045  default:
3046  assert(!"Invalid node Type");
3047  }
3048  }
3049 
3050  inline bool is_text_node(NodeStruct* node)
3051  {
3052  NodeType Type = static_cast<NodeType>((node->header & internal::MemoryPage_type_mask) + 1);
3053 
3054  return Type == NodePcdata || Type == NodeCdata;
3055  }
3056 
3057  // get Value with conversion functions
3058  PUGI__FN int GetValue_int(const Char8* Value, int def)
3059  {
3060  if (!Value) return def;
3061 
3062  return static_cast<int>(strtol(Value, 0, 10));
3063  }
3064 
3065  PUGI__FN unsigned int GetValue_uint(const Char8* Value, unsigned int def)
3066  {
3067  if (!Value) return def;
3068 
3069  return static_cast<unsigned int>(strtoul(Value, 0, 10));
3070  }
3071 
3072  PUGI__FN double GetValue_double(const Char8* Value, double def)
3073  {
3074  if (!Value) return def;
3075 
3076  return strtod(Value, 0);
3077  }
3078 
3079  PUGI__FN float GetValue_float(const Char8* Value, float def)
3080  {
3081  if (!Value) return def;
3082 
3083  return static_cast<float>(strtod(Value, 0));
3084  }
3085 
3086  PUGI__FN bool GetValue_bool(const Char8* Value, bool def)
3087  {
3088  if (!Value) return def;
3089 
3090  // only look at first char
3091  Char8 first = *Value;
3092 
3093  // 1*, t* (true), T* (True), y* (yes), Y* (YES)
3094  return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
3095  }
3096 
3097  // set Value with conversion functions
3098  PUGI__FN bool SetValue_buffer(Char8*& dest, uintptr_t& header, uintptr_t header_mask, char (&buf)[128])
3099  {
3100  return strcpy_insitu(dest, header, header_mask, buf);
3101  }
3102 
3103  PUGI__FN bool SetValue_convert(Char8*& dest, uintptr_t& header, uintptr_t header_mask, int Value)
3104  {
3105  char buf[128];
3106  sprintf(buf, "%d", Value);
3107 
3108  return SetValue_buffer(dest, header, header_mask, buf);
3109  }
3110 
3111  PUGI__FN bool SetValue_convert(Char8*& dest, uintptr_t& header, uintptr_t header_mask, unsigned int Value)
3112  {
3113  char buf[128];
3114  sprintf(buf, "%u", Value);
3115 
3116  return SetValue_buffer(dest, header, header_mask, buf);
3117  }
3118 
3119  PUGI__FN bool SetValue_convert(Char8*& dest, uintptr_t& header, uintptr_t header_mask, double Value)
3120  {
3121  char buf[128];
3122  sprintf(buf, "%g", Value);
3123 
3124  return SetValue_buffer(dest, header, header_mask, buf);
3125  }
3126 
3127  PUGI__FN bool SetValue_convert(Char8*& dest, uintptr_t& header, uintptr_t header_mask, bool Value)
3128  {
3129  return strcpy_insitu(dest, header, header_mask, Value ? "true" : "false");
3130  }
3131 
3132  // we need to get length of entire file to Load it in memory; the only (relatively) sane way to do it is via seek/tell trick
3133  PUGI__FN ParseStatus GetFile_size(FILE* file, size_t& out_Result)
3134  {
3135  // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
3136  typedef long length_type;
3137 
3138  fseek(file, 0, SEEK_END);
3139  length_type length = ftell(file);
3140  fseek(file, 0, SEEK_SET);
3141 
3142  // check for I/O errors
3143  if (length < 0) return StatusIOError;
3144 
3145  // check for overflow
3146  size_t Result = static_cast<size_t>(length);
3147 
3148  if (static_cast<length_type>(Result) != length) return StatusOutOfMemory;
3149 
3150  // finalize
3151  out_Result = Result;
3152 
3153  return StatusOk;
3154  }
3155 
3156  PUGI__FN
3157  ParseResult LoadDataStreamImpl(Document& doc, Mezzanine::Resource::DataStream& stream, unsigned int options, Encoding DocumentEncoding)
3158  {
3159  // Copying mostly from the function below, a lot of what they try to do is not applicable with data streams since they already do it to some extent.
3160  size_t pos = stream.GetStreamPosition();
3161  size_t length = stream.GetSize() - pos;
3162 
3163  if (pos < 0) return make_ParseResult(StatusIOError);
3164 
3165  buffer_holder buffer(Memory::allocate(stream.GetSize() > 0 ? length : 1), Memory::deallocate);
3166  if (!buffer.data) return make_ParseResult(StatusOutOfMemory);
3167 
3168  size_t actual_length = stream.Read(buffer.data, length);
3169  assert(actual_length <= length);
3170 
3171  return doc.LoadBufferInplaceOwn(buffer.release(), actual_length, options, DocumentEncoding);
3172  }
3173 
3174  PUGI__FN ParseResult LoadFileImpl(Document& doc, FILE* file, unsigned int options, Encoding DocumentEncoding)
3175  {
3176  if (!file) return make_ParseResult(StatusFileNotFound);
3177 
3178  // get file size (can Result in I/O errors)
3179  size_t size = 0;
3180  ParseStatus size_Status = GetFile_size(file, size);
3181 
3182  if (size_Status != StatusOk)
3183  {
3184  fclose(file);
3185  return make_ParseResult(size_Status);
3186  }
3187 
3188  // allocate buffer for the whole file
3189  char* contents = static_cast<char*>(Memory::allocate(size > 0 ? size : 1));
3190 
3191  if (!contents)
3192  {
3193  fclose(file);
3194  return make_ParseResult(StatusOutOfMemory);
3195  }
3196 
3197  // read file in memory
3198  size_t read_size = fread(contents, 1, size, file);
3199  fclose(file);
3200 
3201  if (read_size != size)
3202  {
3203  Memory::deallocate(contents);
3204  return make_ParseResult(StatusIOError);
3205  }
3206 
3207  return doc.LoadBufferInplaceOwn(contents, size, options, DocumentEncoding);
3208  }
3209 
3210  template <typename T> struct StreamChunk
3211  {
3212  static StreamChunk* create()
3213  {
3214  void* memory = Memory::allocate(sizeof(StreamChunk));
3215 
3216  return new (memory) StreamChunk();
3217  }
3218 
3219  static void destroy(void* ptr)
3220  {
3221  StreamChunk* chunk = static_cast<StreamChunk*>(ptr);
3222 
3223  // free chunk chain
3224  while (chunk)
3225  {
3226  StreamChunk* next = chunk->next;
3227  Memory::deallocate(chunk);
3228  chunk = next;
3229  }
3230  }
3231 
3232  StreamChunk(): next(0), size(0)
3233  {
3234  }
3235 
3236  StreamChunk* next;
3237  size_t size;
3238 
3239  T data[MemoryPage_size / sizeof(T)];
3240  };
3241 
3242  template <typename T> PUGI__FN ParseStatus LoadStreamDataNoseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
3243  {
3244  buffer_holder chunks(0, StreamChunk<T>::destroy);
3245 
3246  // read file to a chunk list
3247  size_t total = 0;
3248  StreamChunk<T>* last = 0;
3249 
3250  while (!stream.eof())
3251  {
3252  // allocate new chunk
3253  StreamChunk<T>* chunk = StreamChunk<T>::create();
3254  if (!chunk) return StatusOutOfMemory;
3255 
3256  // append chunk to list
3257  if (last) last = last->next = chunk;
3258  else chunks.data = last = chunk;
3259 
3260  // read data to chunk
3261  stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
3262  chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
3263 
3264  // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
3265  if (stream.bad() || (!stream.eof() && stream.fail())) return StatusIOError;
3266 
3267  // guard against huge files (chunk size is small enough to make this overflow check work)
3268  if (total + chunk->size < total) return StatusOutOfMemory;
3269  total += chunk->size;
3270  }
3271 
3272  // copy chunk list to a contiguous buffer
3273  char* buffer = static_cast<char*>(Memory::allocate(total));
3274  if (!buffer) return StatusOutOfMemory;
3275 
3276  char* Write = buffer;
3277 
3278  for (StreamChunk<T>* chunk = static_cast<StreamChunk<T>*>(chunks.data); chunk; chunk = chunk->next)
3279  {
3280  assert(Write + chunk->size <= buffer + total);
3281  memcpy(Write, chunk->data, chunk->size);
3282  Write += chunk->size;
3283  }
3284 
3285  assert(Write == buffer + total);
3286 
3287  // return buffer
3288  *out_buffer = buffer;
3289  *out_size = total;
3290 
3291  return StatusOk;
3292  }
3293 
3294  template <typename T> PUGI__FN ParseStatus LoadStreamDataSeek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
3295  {
3296  // get length of remaining data in stream
3297  typename std::basic_istream<T>::pos_type pos = stream.tellg();
3298  stream.seekg(0, std::ios::end);
3299  std::streamoff length = stream.tellg() - pos;
3300  stream.seekg(pos);
3301 
3302  if (stream.fail() || pos < 0) return StatusIOError;
3303 
3304  // guard against huge files
3305  size_t read_length = static_cast<size_t>(length);
3306 
3307  if (static_cast<std::streamsize>(read_length) != length || length < 0) return StatusOutOfMemory;
3308 
3309  // read stream data into memory (guard against stream exceptions with buffer holder)
3310  buffer_holder buffer(Memory::allocate((read_length > 0 ? read_length : 1) * sizeof(T)), Memory::deallocate);
3311  if (!buffer.data) return StatusOutOfMemory;
3312 
3313  stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
3314 
3315  // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
3316  if (stream.bad() || (!stream.eof() && stream.fail())) return StatusIOError;
3317 
3318  // return buffer
3319  size_t actual_length = static_cast<size_t>(stream.gcount());
3320  assert(actual_length <= read_length);
3321 
3322  *out_buffer = buffer.release();
3323  *out_size = actual_length * sizeof(T);
3324 
3325  return StatusOk;
3326  }
3327 
3328  template <typename T> PUGI__FN ParseResult LoadStreamImpl(Document& doc, std::basic_istream<T>& stream, unsigned int options, Encoding DocumentEncoding)
3329  {
3330  void* buffer = 0;
3331  size_t size = 0;
3332 
3333  // Load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
3334  ParseStatus Status = (stream.tellg() < 0) ? LoadStreamDataNoseek(stream, &buffer, &size) : LoadStreamDataSeek(stream, &buffer, &size);
3335  if (Status != StatusOk) return make_ParseResult(Status);
3336 
3337  return doc.LoadBufferInplaceOwn(buffer, size, options, DocumentEncoding);
3338  }
3339 
3340 
3341 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && !defined(__STRICT_ANSI__))
3342  PUGI__FN FILE* open_file_wide(const wchar_t* Path, const wchar_t* mode)
3343  {
3344  return _wfopen(Path, mode);
3345  }
3346 #else
3347  PUGI__FN char* convert_Path_heap(const wchar_t* str)
3348  {
3349  assert(str);
3350 
3351  // first pass: get length in utf8 characters
3352  size_t length = wcslen(str);
3353  size_t size = AsUtf8_begin(str, length);
3354 
3355  // allocate Resulting string
3356  char* Result = static_cast<char*>(Memory::allocate(size + 1));
3357  if (!Result) return 0;
3358 
3359  // second pass: convert to utf8
3360  AsUtf8_end(Result, size, str, length);
3361 
3362  return Result;
3363  }
3364 
3365  PUGI__FN FILE* open_file_wide(const wchar_t* Path, const wchar_t* mode)
3366  {
3367  // there is no standard function to open wide Paths, so our best bet is to try utf8 Path
3368  char* Path_utf8 = convert_Path_heap(Path);
3369  if (!Path_utf8) return 0;
3370 
3371  // convert mode to ASCII (we mirror _wfopen interface)
3372  char mode_ascii[4] = {0};
3373  for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
3374 
3375  // try to open the utf8 Path
3376  FILE* Result = fopen(Path_utf8, mode_ascii);
3377 
3378  // free dummy buffer
3379  Memory::deallocate(Path_utf8);
3380 
3381  return Result;
3382  }
3383 #endif
3384 
3385  PUGI__FN bool SaveFileImpl(const Document& doc, FILE* file, const Char8* indent, unsigned int flags, Encoding DocumentEncoding)
3386  {
3387  if (!file) return false;
3388 
3389  WriterFile WriterInstance(file);
3390  doc.Save(WriterInstance, indent, flags, DocumentEncoding);
3391 
3392  int Result = ferror(file);
3393 
3394  fclose(file);
3395 
3396  return Result == 0;
3397  }
3398 PUGI__NS_END
3399 
3400 namespace XML
3401 {
3402  #ifndef SWIG_SAFE
3403  PUGI__FN WriterFile::WriterFile(void* FilePtr): TargetFile(FilePtr)
3404  {
3405  }
3406 
3407  PUGI__FN void WriterFile::Write(const void* data, size_t size)
3408  {
3409  size_t Result = fwrite(data, 1, size, static_cast<FILE*>(TargetFile));
3410  (void)!Result; // unfortunately we can't do proper error handling here
3411  }
3412 
3413 
3414  PUGI__FN WriterStream::WriterStream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
3415  {
3416  }
3417 
3418  PUGI__FN WriterStream::WriterStream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
3419  {
3420  }
3421 
3422  PUGI__FN void WriterStream::Write(const void* data, size_t size)
3423  {
3424  if (narrow_stream)
3425  {
3426  assert(!wide_stream);
3427  narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
3428  }
3429  else
3430  {
3431  assert(wide_stream);
3432  assert(size % sizeof(wchar_t) == 0);
3433 
3434  wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
3435  }
3436  }
3437  #endif //SWIG_SAFE
3438 
3439  PUGI__FN TreeWalker::TreeWalker(): TraversalDepth(0)
3440  {
3441  }
3442 
3443  PUGI__FN TreeWalker::~TreeWalker()
3444  {
3445  }
3446 
3447  PUGI__FN int TreeWalker::Depth() const
3448  {
3449  return TraversalDepth;
3450  }
3451 
3452  PUGI__FN bool TreeWalker::OnTraversalBegin(Node&)
3453  {
3454  return true;
3455  }
3456 
3457  PUGI__FN bool TreeWalker::OnTraversalEnd(Node&)
3458  {
3459  return true;
3460  }
3461 
3462  PUGI__FN Attribute::Attribute(): AttributeData(0)
3463  {
3464  }
3465 
3466  PUGI__FN Attribute::Attribute(AttributeStruct* attr): AttributeData(attr)
3467  {
3468  }
3469 
3470  PUGI__FN static void unspecified_bool_Attribute(Attribute***)
3471  {
3472  }
3473 
3474  PUGI__FN Attribute::operator Attribute::unspecified_bool_type() const
3475  {
3476  return AttributeData ? unspecified_bool_Attribute : 0;
3477  }
3478 
3479  PUGI__FN bool Attribute::operator!() const
3480  {
3481  return !AttributeData;
3482  }
3483 
3484  PUGI__FN bool Attribute::operator==(const Attribute& r) const
3485  {
3486  return (AttributeData == r.AttributeData);
3487  }
3488 
3489  PUGI__FN bool Attribute::operator!=(const Attribute& r) const
3490  {
3491  return (AttributeData != r.AttributeData);
3492  }
3493 
3494  PUGI__FN bool Attribute::operator<(const Attribute& r) const
3495  {
3496  return (AttributeData < r.AttributeData);
3497  }
3498 
3499  PUGI__FN bool Attribute::operator>(const Attribute& r) const
3500  {
3501  return (AttributeData > r.AttributeData);
3502  }
3503 
3504  PUGI__FN bool Attribute::operator<=(const Attribute& r) const
3505  {
3506  return (AttributeData <= r.AttributeData);
3507  }
3508 
3509  PUGI__FN bool Attribute::operator>=(const Attribute& r) const
3510  {
3511  return (AttributeData >= r.AttributeData);
3512  }
3513 
3514  PUGI__FN Attribute Attribute::GetNextAttribute() const
3515  {
3516  return AttributeData ? Attribute(AttributeData->GetNextAttribute) : Attribute();
3517  }
3518 
3519  PUGI__FN Attribute Attribute::GetPreviousAttribute() const
3520  {
3521  return AttributeData && AttributeData->prev_attribute_c->GetNextAttribute ? Attribute(AttributeData->prev_attribute_c) : Attribute();
3522  }
3523 
3524  PUGI__FN const Char8* Attribute::AsString(const Char8* def) const
3525  {
3526  return (AttributeData && AttributeData->Value) ? AttributeData->Value : def;
3527  }
3528 
3529  PUGI__FN int Attribute::AsInt(int def) const
3530  {
3531  return internal::GetValue_int(AttributeData ? AttributeData->Value : 0, def);
3532  }
3533 
3534  PUGI__FN unsigned int Attribute::AsUint(unsigned int def) const
3535  {
3536  return internal::GetValue_uint(AttributeData ? AttributeData->Value : 0, def);
3537  }
3538 
3539  PUGI__FN double Attribute::AsDouble(double def) const
3540  {
3541  return internal::GetValue_double(AttributeData ? AttributeData->Value : 0, def);
3542  }
3543 
3544  PUGI__FN Whole Attribute::AsWhole(Whole def) const
3545  { return (AttributeData ? ToWhole(AttributeData->Value) : def); }
3546 
3547  PUGI__FN Integer Attribute::AsInteger(Integer def) const
3548  { return (AttributeData ? ToInteger(AttributeData->Value) : def); }
3549 
3550  PUGI__FN Real Attribute::AsReal(Real def) const
3551  { return (AttributeData ? ToReal(AttributeData->Value) : def); }
3552 
3553  PUGI__FN float Attribute::AsFloat(float def) const
3554  {
3555  return internal::GetValue_float(AttributeData ? AttributeData->Value : 0, def);
3556  }
3557 
3558  PUGI__FN bool Attribute::AsBool(bool def) const
3559  {
3560  return internal::GetValue_bool(AttributeData ? AttributeData->Value : 0, def);
3561  }
3562 
3563  PUGI__FN bool Attribute::Empty() const
3564  {
3565  return !AttributeData;
3566  }
3567 
3568  PUGI__FN const Char8* Attribute::Name() const
3569  {
3570  return (AttributeData && AttributeData->Name) ? AttributeData->Name : "";
3571  }
3572 
3573  PUGI__FN const Char8* Attribute::Value() const
3574  {
3575  return (AttributeData && AttributeData->Value) ? AttributeData->Value : "";
3576  }
3577 
3578  PUGI__FN size_t Attribute::HashValue() const
3579  {
3580  return static_cast<size_t>(reinterpret_cast<uintptr_t>(AttributeData) / sizeof(AttributeStruct));
3581  }
3582 
3583  PUGI__FN AttributeStruct* Attribute::InternalObject() const
3584  {
3585  return AttributeData;
3586  }
3587 
3588  PUGI__FN Attribute& Attribute::operator=(const Char8* rhs)
3589  {
3590  SetValue(rhs);
3591  return *this;
3592  }
3593 
3594  PUGI__FN Attribute& Attribute::operator=(int rhs)
3595  {
3596  SetValue(rhs);
3597  return *this;
3598  }
3599 
3600  PUGI__FN Attribute& Attribute::operator=(unsigned int rhs)
3601  {
3602  SetValue(rhs);
3603  return *this;
3604  }
3605 
3606  PUGI__FN Attribute& Attribute::operator=(double rhs)
3607  {
3608  SetValue(rhs);
3609  return *this;
3610  }
3611 
3612  PUGI__FN Attribute& Attribute::operator=(bool rhs)
3613  {
3614  SetValue(rhs);
3615  return *this;
3616  }
3617 
3618  PUGI__FN bool Attribute::SetName(const Char8* rhs)
3619  {
3620  if (!AttributeData) return false;
3621 
3622  return internal::strcpy_insitu(AttributeData->Name, AttributeData->header, internal::MemoryPage_Name_allocated_mask, rhs);
3623  }
3624 
3625  PUGI__FN bool Attribute::SetValue(const Char8* rhs)
3626  {
3627  if (!AttributeData) return false;
3628 
3629  return internal::strcpy_insitu(AttributeData->Value, AttributeData->header, internal::MemoryPage_Value_allocated_mask, rhs);
3630  }
3631 
3632  PUGI__FN bool Attribute::SetValue(int rhs)
3633  {
3634  if (!AttributeData) return false;
3635 
3636  return internal::SetValue_convert(AttributeData->Value, AttributeData->header, internal::MemoryPage_Value_allocated_mask, rhs);
3637  }
3638 
3639  PUGI__FN bool Attribute::SetValue(unsigned int rhs)
3640  {
3641  if (!AttributeData) return false;
3642 
3643  return internal::SetValue_convert(AttributeData->Value, AttributeData->header, internal::MemoryPage_Value_allocated_mask, rhs);
3644  }
3645 
3646  PUGI__FN bool Attribute::SetValue(double rhs)
3647  {
3648  if (!AttributeData) return false;
3649 
3650  return internal::SetValue_convert(AttributeData->Value, AttributeData->header, internal::MemoryPage_Value_allocated_mask, rhs);
3651  }
3652 
3653  PUGI__FN bool Attribute::SetValue(bool rhs)
3654  {
3655  if (!AttributeData) return false;
3656 
3657  return internal::SetValue_convert(AttributeData->Value, AttributeData->header, internal::MemoryPage_Value_allocated_mask, rhs);
3658  }
3659 
3660 #ifdef __BORLANDC__
3661  PUGI__FN bool operator&&(const Attribute& lhs, bool rhs)
3662  {
3663  return (bool)lhs && rhs;
3664  }
3665 
3666  PUGI__FN bool operator||(const Attribute& lhs, bool rhs)
3667  {
3668  return (bool)lhs || rhs;
3669  }
3670 #endif
3671 
3672  PUGI__FN Node::Node(): NodeData(0)
3673  {
3674  }
3675 
3676  PUGI__FN Node::~Node()
3677  {
3678  }
3679 
3680  PUGI__FN Node::Node(NodeStruct* p): NodeData(p)
3681  {
3682  }
3683 
3684  PUGI__FN static void unspecified_bool_Node(Node***)
3685  {
3686  }
3687 
3688  PUGI__FN Node::operator Node::unspecified_bool_type() const
3689  {
3690  return NodeData ? unspecified_bool_Node : 0;
3691  }
3692 
3693  PUGI__FN bool Node::operator!() const
3694  {
3695  return !NodeData;
3696  }
3697 
3698  PUGI__FN Node::iterator Node::begin() const
3699  {
3700  return iterator(NodeData ? NodeData->GetFirstChild : 0, NodeData);
3701  }
3702 
3703  PUGI__FN Node::iterator Node::end() const
3704  {
3705  return iterator(0, NodeData);
3706  }
3707 
3709  {
3710  return attribute_iterator(NodeData ? NodeData->GetFirstAttribute : 0, NodeData);
3711  }
3712 
3714  {
3715  return attribute_iterator(0, NodeData);
3716  }
3717 
3718  PUGI__FN ObjectRange<NodeIterator> Node::GetChildren() const
3719  {
3720  return ObjectRange<NodeIterator>(begin(), end());
3721  }
3722 
3723  PUGI__FN ObjectRange<NamedNodeIterator> Node::GetChildren(const Char8* Name_) const
3724  {
3725  return ObjectRange<NamedNodeIterator>(NamedNodeIterator(GetChild(Name_), Name_), NamedNodeIterator());
3726  }
3727 
3728  PUGI__FN ObjectRange<AttributeIterator> Node::attributes() const
3729  {
3730  return ObjectRange<AttributeIterator>(attributes_begin(), attributes_end());
3731  }
3732 
3733  PUGI__FN bool Node::operator==(const Node& r) const
3734  {
3735  return (NodeData == r.NodeData);
3736  }
3737 
3738  PUGI__FN bool Node::operator!=(const Node& r) const
3739  {
3740  return (NodeData != r.NodeData);
3741  }
3742 
3743  PUGI__FN bool Node::operator<(const Node& r) const
3744  {
3745  return (NodeData < r.NodeData);
3746  }
3747 
3748  PUGI__FN bool Node::operator>(const Node& r) const
3749  {
3750  return (NodeData > r.NodeData);
3751  }
3752 
3753  PUGI__FN bool Node::operator<=(const Node& r) const
3754  {
3755  return (NodeData <= r.NodeData);
3756  }
3757 
3758  PUGI__FN bool Node::operator>=(const Node& r) const
3759  {
3760  return (NodeData >= r.NodeData);
3761  }
3762 
3763  PUGI__FN bool Node::Empty() const
3764  {
3765  return !NodeData;
3766  }
3767 
3768  PUGI__FN const Char8* Node::Name() const
3769  {
3770  return (NodeData && NodeData->Name) ? NodeData->Name : "";
3771  }
3772 
3773  PUGI__FN NodeType Node::Type() const
3774  {
3775  return NodeData ? static_cast<NodeType>((NodeData->header & internal::MemoryPage_type_mask) + 1) : NodeNull;
3776  }
3777 
3778  PUGI__FN const Char8* Node::Value() const
3779  {
3780  return (NodeData && NodeData->Value) ? NodeData->Value : "";
3781  }
3782 
3783  PUGI__FN Node Node::GetChild(const Char8* Name_) const
3784  {
3785  if (!NodeData) return Node();
3786 
3787  for (NodeStruct* i = NodeData->GetFirstChild; i; i = i->GetNextSibling)
3788  if (i->Name && internal::strequal(Name_, i->Name)) return Node(i);
3789 
3790  return Node();
3791  }
3792 
3793  PUGI__FN Attribute Node::GetAttribute(const Char8* Name_) const
3794  {
3795  if (!NodeData) return Attribute();
3796 
3797  for (AttributeStruct* i = NodeData->GetFirstAttribute; i; i = i->GetNextAttribute)
3798  if (i->Name && internal::strequal(Name_, i->Name))
3799  return Attribute(i);
3800 
3801  return Attribute();
3802  }
3803 
3804  PUGI__FN Node Node::GetNextSibling(const Char8* Name_) const
3805  {
3806  if (!NodeData) return Node();
3807 
3808  for (NodeStruct* i = NodeData->GetNextSibling; i; i = i->GetNextSibling)
3809  if (i->Name && internal::strequal(Name_, i->Name)) return Node(i);
3810 
3811  return Node();
3812  }
3813 
3814  PUGI__FN Node Node::GetNextSibling() const
3815  {
3816  if (!NodeData) return Node();
3817 
3818  if (NodeData->GetNextSibling) return Node(NodeData->GetNextSibling);
3819  else return Node();
3820  }
3821 
3822  PUGI__FN Node Node::GetPreviousSibling(const Char8* Name_) const
3823  {
3824  if (!NodeData) return Node();
3825 
3826  for (NodeStruct* i = NodeData->prev_sibling_c; i->GetNextSibling; i = i->prev_sibling_c)
3827  if (i->Name && internal::strequal(Name_, i->Name)) return Node(i);
3828 
3829  return Node();
3830  }
3831 
3832  PUGI__FN Node Node::GetPreviousSibling() const
3833  {
3834  if (!NodeData) return Node();
3835 
3836  if (NodeData->prev_sibling_c->GetNextSibling) return Node(NodeData->prev_sibling_c);
3837  else return Node();
3838  }
3839 
3840  PUGI__FN Node Node::GetParent() const
3841  {
3842  return NodeData ? Node(NodeData->GetParent) : Node();
3843  }
3844 
3845  PUGI__FN Node Node::GetRoot() const
3846  {
3847  if (!NodeData) return Node();
3848 
3849  internal::MemoryPage* page = reinterpret_cast<internal::MemoryPage*>(NodeData->header & internal::MemoryPage_pointer_mask);
3850 
3851  return Node(static_cast<internal::DocumentStruct*>(page->allocator));
3852  }
3853 
3854  PUGI__FN NodeText Node::GetText() const
3855  {
3856  return NodeText(NodeData);
3857  }
3858 
3859  PUGI__FN const Char8* Node::GetChildValue() const
3860  {
3861  if (!NodeData) return "";
3862 
3863  for (NodeStruct* i = NodeData->GetFirstChild; i; i = i->GetNextSibling)
3864  if (i->Value && internal::is_text_node(i))
3865  return i->Value;
3866 
3867  return "";
3868  }
3869 
3870  PUGI__FN const Char8* Node::GetChildValue(const Char8* Name_) const
3871  {
3872  return GetChild(Name_).GetChildValue();
3873  }
3874 
3875  PUGI__FN Attribute Node::GetFirstAttribute() const
3876  {
3877  return NodeData ? Attribute(NodeData->GetFirstAttribute) : Attribute();
3878  }
3879 
3880  PUGI__FN Attribute Node::GetLastAttribute() const
3881  {
3882  return NodeData && NodeData->GetFirstAttribute ? Attribute(NodeData->GetFirstAttribute->prev_attribute_c) : Attribute();
3883  }
3884 
3885  PUGI__FN Node Node::GetFirstChild() const
3886  {
3887  return NodeData ? Node(NodeData->GetFirstChild) : Node();
3888  }
3889 
3890  PUGI__FN Node Node::GetLastChild() const
3891  {
3892  return NodeData && NodeData->GetFirstChild ? Node(NodeData->GetFirstChild->prev_sibling_c) : Node();
3893  }
3894 
3895  PUGI__FN bool Node::SetName(const Char8* rhs)
3896  {
3897  switch (Type())
3898  {
3899  case NodePi:
3900  case NodeDeclaration:
3901  case NodeElement:
3902  return internal::strcpy_insitu(NodeData->Name, NodeData->header, internal::MemoryPage_Name_allocated_mask, rhs);
3903 
3904  default:
3905  return false;
3906  }
3907  }
3908 
3909  PUGI__FN bool Node::SetValue(const Char8* rhs)
3910  {
3911  switch (Type())
3912  {
3913  case NodePi:
3914  case NodeCdata:
3915  case NodePcdata:
3916  case NodeComment:
3917  case NodeDocType:
3918  return internal::strcpy_insitu(NodeData->Value, NodeData->header, internal::MemoryPage_Value_allocated_mask, rhs);
3919 
3920  default:
3921  return false;
3922  }
3923  }
3924 
3925  PUGI__FN Attribute Node::AppendAttribute(const Char8* Name_)
3926  {
3927  if (Type() != NodeElement && Type() != NodeDeclaration) return Attribute();
3928 
3929  Attribute a(internal::AppendAttribute_ll(NodeData, internal::GetAllocator(NodeData)));
3930  a.SetName(Name_);
3931 
3932  return a;
3933  }
3934 
3935  PUGI__FN Attribute Node::PrependAttribute(const Char8* Name_)
3936  {
3937  if (Type() != NodeElement && Type() != NodeDeclaration) return Attribute();
3938 
3939  Attribute a(internal::allocate_attribute(internal::GetAllocator(NodeData)));
3940  if (!a) return Attribute();
3941 
3942  a.SetName(Name_);
3943 
3944  AttributeStruct* head = NodeData->GetFirstAttribute;
3945 
3946  if (head)
3947  {
3948  a.AttributeData->prev_attribute_c = head->prev_attribute_c;
3949  head->prev_attribute_c = a.AttributeData;
3950  }
3951  else
3952  a.AttributeData->prev_attribute_c = a.AttributeData;
3953 
3954  a.AttributeData->GetNextAttribute = head;
3955  NodeData->GetFirstAttribute = a.AttributeData;
3956 
3957  return a;
3958  }
3959 
3960  PUGI__FN Attribute Node::InsertAttributeBefore(const Char8* Name_, const Attribute& attr)
3961  {
3962  if ((Type() != NodeElement && Type() != NodeDeclaration) || attr.Empty()) return Attribute();
3963 
3964  // check that GetAttribute belongs to *this
3965  AttributeStruct* cur = attr.AttributeData;
3966 
3967  while (cur->prev_attribute_c->GetNextAttribute) cur = cur->prev_attribute_c;
3968 
3969  if (cur != NodeData->GetFirstAttribute) return Attribute();
3970 
3971  Attribute a(internal::allocate_attribute(internal::GetAllocator(NodeData)));
3972  if (!a) return Attribute();
3973 
3974  a.SetName(Name_);
3975 
3976  if (attr.AttributeData->prev_attribute_c->GetNextAttribute)
3977  attr.AttributeData->prev_attribute_c->GetNextAttribute = a.AttributeData;
3978  else
3979  NodeData->GetFirstAttribute = a.AttributeData;
3980 
3981  a.AttributeData->prev_attribute_c = attr.AttributeData->prev_attribute_c;
3982  a.AttributeData->GetNextAttribute = attr.AttributeData;
3983  attr.AttributeData->prev_attribute_c = a.AttributeData;
3984 
3985  return a;
3986  }
3987 
3988  PUGI__FN Attribute Node::InsertAttributeAfter(const Char8* Name_, const Attribute& attr)
3989  {
3990  if ((Type() != NodeElement && Type() != NodeDeclaration) || attr.Empty()) return Attribute();
3991 
3992  // check that GetAttribute belongs to *this
3993  AttributeStruct* cur = attr.AttributeData;
3994 
3995  while (cur->prev_attribute_c->GetNextAttribute) cur = cur->prev_attribute_c;
3996 
3997  if (cur != NodeData->GetFirstAttribute) return Attribute();
3998 
3999  Attribute a(internal::allocate_attribute(internal::GetAllocator(NodeData)));
4000  if (!a) return Attribute();
4001 
4002  a.SetName(Name_);
4003 
4004  if (attr.AttributeData->GetNextAttribute)
4005  attr.AttributeData->GetNextAttribute->prev_attribute_c = a.AttributeData;
4006  else
4007  NodeData->GetFirstAttribute->prev_attribute_c = a.AttributeData;
4008 
4009  a.AttributeData->GetNextAttribute = attr.AttributeData->GetNextAttribute;
4010  a.AttributeData->prev_attribute_c = attr.AttributeData;
4011  attr.AttributeData->GetNextAttribute = a.AttributeData;
4012 
4013  return a;
4014  }
4015 
4016  PUGI__FN Attribute Node::AppendCopy(const Attribute& proto)
4017  {
4018  if (!proto) return Attribute();
4019 
4020  Attribute Result = AppendAttribute(proto.Name());
4021  Result.SetValue(proto.Value());
4022 
4023  return Result;
4024  }
4025 
4026  PUGI__FN Attribute Node::PrependCopy(const Attribute& proto)
4027  {
4028  if (!proto) return Attribute();
4029 
4030  Attribute Result = PrependAttribute(proto.Name());
4031  Result.SetValue(proto.Value());
4032 
4033  return Result;
4034  }
4035 
4036  PUGI__FN Attribute Node::InsertCopyAfter(const Attribute& proto, const Attribute& attr)
4037  {
4038  if (!proto) return Attribute();
4039 
4040  Attribute Result = InsertAttributeAfter(proto.Name(), attr);
4041  Result.SetValue(proto.Value());
4042 
4043  return Result;
4044  }
4045 
4046  PUGI__FN Attribute Node::InsertCopyBefore(const Attribute& proto, const Attribute& attr)
4047  {
4048  if (!proto) return Attribute();
4049 
4050  Attribute Result = InsertAttributeBefore(proto.Name(), attr);
4051  Result.SetValue(proto.Value());
4052 
4053  return Result;
4054  }
4055 
4056  PUGI__FN Node Node::AppendChild(NodeType Type_)
4057  {
4058  if (!internal::allow_InsertChild(this->Type(), Type_)) return Node();
4059 
4060  Node n(internal::AppendNode(NodeData, internal::GetAllocator(NodeData), Type_));
4061 
4062  if (Type_ == NodeDeclaration) n.SetName("xml");
4063 
4064  return n;
4065  }
4066 
4067  PUGI__FN Node Node::PrependChild(NodeType Type_)
4068  {
4069  if (!internal::allow_InsertChild(this->Type(), Type_)) return Node();
4070 
4071  Node n(internal::allocate_node(internal::GetAllocator(NodeData), Type_));
4072  if (!n) return Node();
4073 
4074  n.NodeData->GetParent = NodeData;
4075 
4076  NodeStruct* head = NodeData->GetFirstChild;
4077 
4078  if (head)
4079  {
4080  n.NodeData->prev_sibling_c = head->prev_sibling_c;
4081  head->prev_sibling_c = n.NodeData;
4082  }
4083  else
4084  n.NodeData->prev_sibling_c = n.NodeData;
4085 
4086  n.NodeData->GetNextSibling = head;
4087  NodeData->GetFirstChild = n.NodeData;
4088 
4089  if (Type_ == NodeDeclaration) n.SetName("xml");
4090 
4091  return n;
4092  }
4093 
4094  PUGI__FN Node Node::InsertChildBefore(NodeType Type_, const Node& node)
4095  {
4096  if (!internal::allow_InsertChild(this->Type(), Type_)) return Node();
4097  if (!node.NodeData || node.NodeData->GetParent != NodeData) return Node();
4098 
4099  Node n(internal::allocate_node(internal::GetAllocator(NodeData), Type_));
4100  if (!n) return Node();
4101 
4102  n.NodeData->GetParent = NodeData;
4103 
4104  if (node.NodeData->prev_sibling_c->GetNextSibling)
4105  node.NodeData->prev_sibling_c->GetNextSibling = n.NodeData;
4106  else
4107  NodeData->GetFirstChild = n.NodeData;
4108 
4109  n.NodeData->prev_sibling_c = node.NodeData->prev_sibling_c;
4110  n.NodeData->GetNextSibling = node.NodeData;
4111  node.NodeData->prev_sibling_c = n.NodeData;
4112 
4113  if (Type_ == NodeDeclaration) n.SetName("xml");
4114 
4115  return n;
4116  }
4117 
4118  PUGI__FN Node Node::InsertChildAfter(NodeType Type_, const Node& node)
4119  {
4120  if (!internal::allow_InsertChild(this->Type(), Type_)) return Node();
4121  if (!node.NodeData || node.NodeData->GetParent != NodeData) return Node();
4122 
4123  Node n(internal::allocate_node(internal::GetAllocator(NodeData), Type_));
4124  if (!n) return Node();
4125 
4126  n.NodeData->GetParent = NodeData;
4127 
4128  if (node.NodeData->GetNextSibling)
4129  node.NodeData->GetNextSibling->prev_sibling_c = n.NodeData;
4130  else
4131  NodeData->GetFirstChild->prev_sibling_c = n.NodeData;
4132 
4133  n.NodeData->GetNextSibling = node.NodeData->GetNextSibling;
4134  n.NodeData->prev_sibling_c = node.NodeData;
4135  node.NodeData->GetNextSibling = n.NodeData;
4136 
4137  if (Type_ == NodeDeclaration) n.SetName("xml");
4138 
4139  return n;
4140  }
4141 
4142  PUGI__FN Node Node::AppendChild(const Char8* Name_)
4143  {
4144  Node Result = AppendChild(NodeElement);
4145 
4146  Result.SetName(Name_);
4147 
4148  return Result;
4149  }
4150 
4151  PUGI__FN Node Node::PrependChild(const Char8* Name_)
4152  {
4153  Node Result = PrependChild(NodeElement);
4154 
4155  Result.SetName(Name_);
4156 
4157  return Result;
4158  }
4159 
4160  PUGI__FN Node Node::InsertChildAfter(const Char8* Name_, const Node& node)
4161  {
4162  Node Result = InsertChildAfter(NodeElement, node);
4163 
4164  Result.SetName(Name_);
4165 
4166  return Result;
4167  }
4168 
4169  PUGI__FN Node Node::InsertChildBefore(const Char8* Name_, const Node& node)
4170  {
4171  Node Result = InsertChildBefore(NodeElement, node);
4172 
4173  Result.SetName(Name_);
4174 
4175  return Result;
4176  }
4177 
4178  PUGI__FN Node Node::AppendCopy(const Node& proto)
4179  {
4180  Node Result = AppendChild(proto.Type());
4181 
4182  if (Result) internal::recursive_copy_skip(Result, proto, Result);
4183 
4184  return Result;
4185  }
4186 
4187  PUGI__FN Node Node::PrependCopy(const Node& proto)
4188  {
4189  Node Result = PrependChild(proto.Type());
4190 
4191  if (Result) internal::recursive_copy_skip(Result, proto, Result);
4192 
4193  return Result;
4194  }
4195 
4196  PUGI__FN Node Node::InsertCopyAfter(const Node& proto, const Node& node)
4197  {
4198  Node Result = InsertChildAfter(proto.Type(), node);
4199 
4200  if (Result) internal::recursive_copy_skip(Result, proto, Result);
4201 
4202  return Result;
4203  }
4204 
4205  PUGI__FN Node Node::InsertCopyBefore(const Node& proto, const Node& node)
4206  {
4207  Node Result = InsertChildBefore(proto.Type(), node);
4208 
4209  if (Result) internal::recursive_copy_skip(Result, proto, Result);
4210 
4211  return Result;
4212  }
4213 
4214  PUGI__FN bool Node::RemoveAttribute(const Char8* Name_)
4215  {
4216  return RemoveAttribute(GetAttribute(Name_));
4217  }
4218 
4219  PUGI__FN bool Node::RemoveAttribute(const Attribute& a)
4220  {
4221  if (!NodeData || !a.AttributeData) return false;
4222 
4223  // check that GetAttribute belongs to *this
4224  AttributeStruct* attr = a.AttributeData;
4225 
4226  while (attr->prev_attribute_c->GetNextAttribute) attr = attr->prev_attribute_c;
4227 
4228  if (attr != NodeData->GetFirstAttribute) return false;
4229 
4230  if (a.AttributeData->GetNextAttribute) a.AttributeData->GetNextAttribute->prev_attribute_c = a.AttributeData->prev_attribute_c;
4231  else if (NodeData->GetFirstAttribute) NodeData->GetFirstAttribute->prev_attribute_c = a.AttributeData->prev_attribute_c;
4232 
4233  if (a.AttributeData->prev_attribute_c->GetNextAttribute) a.AttributeData->prev_attribute_c->GetNextAttribute = a.AttributeData->GetNextAttribute;
4234  else NodeData->GetFirstAttribute = a.AttributeData->GetNextAttribute;
4235 
4236  internal::destroy_attribute(a.AttributeData, internal::GetAllocator(NodeData));
4237 
4238  return true;
4239  }
4240 
4241  PUGI__FN bool Node::RemoveChild(const Char8* Name_)
4242  {
4243  return RemoveChild(GetChild(Name_));
4244  }
4245 
4246  PUGI__FN bool Node::RemoveChild(const Node& n)
4247  {
4248  if (!NodeData || !n.NodeData || n.NodeData->GetParent != NodeData) return false;
4249 
4250  if (n.NodeData->GetNextSibling) n.NodeData->GetNextSibling->prev_sibling_c = n.NodeData->prev_sibling_c;
4251  else if (NodeData->GetFirstChild) NodeData->GetFirstChild->prev_sibling_c = n.NodeData->prev_sibling_c;
4252 
4253  if (n.NodeData->prev_sibling_c->GetNextSibling) n.NodeData->prev_sibling_c->GetNextSibling = n.NodeData->GetNextSibling;
4254  else NodeData->GetFirstChild = n.NodeData->GetNextSibling;
4255 
4256  internal::destroy_node(n.NodeData, internal::GetAllocator(NodeData));
4257 
4258  return true;
4259  }
4260 
4261  PUGI__FN Node Node::FindChildbyAttribute(const Char8* Name_, const Char8* AttrName, const Char8* AttrValue) const
4262  {
4263  if (!NodeData) return Node();
4264 
4265  for (NodeStruct* i = NodeData->GetFirstChild; i; i = i->GetNextSibling)
4266  if (i->Name && internal::strequal(Name_, i->Name))
4267  {
4268  for (AttributeStruct* a = i->GetFirstAttribute; a; a = a->GetNextAttribute)
4269  if (internal::strequal(AttrName, a->Name) && internal::strequal(AttrValue, a->Value))
4270  return Node(i);
4271  }
4272 
4273  return Node();
4274  }
4275 
4276  PUGI__FN Node Node::FindChildbyAttribute(const Char8* AttrName, const Char8* AttrValue) const
4277  {
4278  if (!NodeData) return Node();
4279 
4280  for (NodeStruct* i = NodeData->GetFirstChild; i; i = i->GetNextSibling)
4281  for (AttributeStruct* a = i->GetFirstAttribute; a; a = a->GetNextAttribute)
4282  if (internal::strequal(AttrName, a->Name) && internal::strequal(AttrValue, a->Value))
4283  return Node(i);
4284 
4285  return Node();
4286  }
4287 
4288  PUGI__FN String Node::Path(Char8 delimiter) const
4289  {
4290  Node cursor = *this; // Make a copy.
4291 
4292  String Result = cursor.Name();
4293 
4294  while (cursor.GetParent())
4295  {
4296  cursor = cursor.GetParent();
4297 
4298  String temp = cursor.Name();
4299  temp += delimiter;
4300  temp += Result;
4301  Result.swap(temp);
4302  }
4303 
4304  return Result;
4305  }
4306 
4307  PUGI__FN Node Node::FirstElementByPath(const Char8* Path_, Char8 delimiter) const
4308  {
4309  Node found = *this; // Current search context.
4310 
4311  if (!NodeData || !Path_ || !Path_[0]) return found;
4312 
4313  if (Path_[0] == delimiter)
4314  {
4315  // Absolute Path; e.g. '/foo/bar'
4316  found = found.GetRoot();
4317  ++Path_;
4318  }
4319 
4320  const Char8* Path_segment = Path_;
4321 
4322  while (*Path_segment == delimiter) ++Path_segment;
4323 
4324  const Char8* Path_segment_end = Path_segment;
4325 
4326  while (*Path_segment_end && *Path_segment_end != delimiter) ++Path_segment_end;
4327 
4328  if (Path_segment == Path_segment_end) return found;
4329 
4330  const Char8* NextSegment = Path_segment_end;
4331 
4332  while (*NextSegment == delimiter) ++NextSegment;
4333 
4334  if (*Path_segment == '.' && Path_segment + 1 == Path_segment_end)
4335  return found.FirstElementByPath(NextSegment, delimiter);
4336  else if (*Path_segment == '.' && *(Path_segment+1) == '.' && Path_segment + 2 == Path_segment_end)
4337  return found.GetParent().FirstElementByPath(NextSegment, delimiter);
4338  else
4339  {
4340  for (NodeStruct* j = found.NodeData->GetFirstChild; j; j = j->GetNextSibling)
4341  {
4342  if (j->Name && internal::strequalrange(j->Name, Path_segment, static_cast<size_t>(Path_segment_end - Path_segment)))
4343  {
4344  Node subsearch = Node(j).FirstElementByPath(NextSegment, delimiter);
4345 
4346  if (subsearch) return subsearch;
4347  }
4348  }
4349 
4350  return Node();
4351  }
4352  }
4353 
4354  PUGI__FN bool Node::Traverse(TreeWalker& walker)
4355  {
4356  walker.TraversalDepth = -1;
4357 
4358  Node arg_begin = *this;
4359  if (!walker.OnTraversalBegin(arg_begin)) return false;
4360 
4361  Node cur = GetFirstChild();
4362 
4363  if (cur)
4364  {
4365  ++walker.TraversalDepth;
4366 
4367  do
4368  {
4369  Node arg_for_each = cur;
4370  if (!walker.OnEachNode(arg_for_each))
4371  return false;
4372 
4373  if (cur.GetFirstChild())
4374  {
4375  ++walker.TraversalDepth;
4376  cur = cur.GetFirstChild();
4377  }
4378  else if (cur.GetNextSibling())
4379  cur = cur.GetNextSibling();
4380  else
4381  {
4382  // Borland C++ workaround
4383  while (!cur.GetNextSibling() && cur != *this && !cur.GetParent().Empty())
4384  {
4385  --walker.TraversalDepth;
4386  cur = cur.GetParent();
4387  }
4388 
4389  if (cur != *this)
4390  cur = cur.GetNextSibling();
4391  }
4392  }
4393  while (cur && cur != *this);
4394  }
4395 
4396  assert(walker.TraversalDepth == -1);
4397 
4398  Node arg_end = *this;
4399  return walker.OnTraversalEnd(arg_end);
4400  }
4401 
4402  PUGI__FN size_t Node::HashValue() const
4403  {
4404  return static_cast<size_t>(reinterpret_cast<uintptr_t>(NodeData) / sizeof(NodeStruct));
4405  }
4406 
4407  PUGI__FN NodeStruct* Node::InternalObject() const
4408  {
4409  return NodeData;
4410  }
4411 
4412  PUGI__FN void Node::Print(Writer& WriterInstance, const Char8* indent, unsigned int flags, Encoding DocumentEncoding, unsigned int Depth) const
4413  {
4414  if (!NodeData) return;
4415 
4416  internal::BufferedWriter buffered_WriterInstance(WriterInstance, DocumentEncoding);
4417 
4418  internal::NodeOutput(buffered_WriterInstance, *this, indent, flags, Depth);
4419  }
4420 
4421  PUGI__FN void Node::Print(std::basic_ostream<char, std::char_traits<char> >& stream, const Char8* indent, unsigned int flags, Encoding DocumentEncoding, unsigned int Depth) const
4422  {
4423  WriterStream WriterInstance(stream);
4424 
4425  Print(WriterInstance, indent, flags, DocumentEncoding, Depth);
4426  }
4427 
4428  PUGI__FN void Node::Print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const Char8* indent, unsigned int flags, unsigned int Depth) const
4429  {
4430  WriterStream WriterInstance(stream);
4431 
4432  Print(WriterInstance, indent, flags, Encodingwchar_t, Depth);
4433  }
4434 
4435  PUGI__FN ptrdiff_t Node::OffSetDebug() const
4436  {
4437  NodeStruct* r = GetRoot().NodeData;
4438 
4439  if (!r) return -1;
4440 
4441  const Char8* buffer = static_cast<internal::DocumentStruct*>(r)->buffer;
4442 
4443  if (!buffer) return -1;
4444 
4445  switch (Type())
4446  {
4447  case NodeDocument:
4448  return 0;
4449 
4450  case NodeElement:
4451  case NodeDeclaration:
4452  case NodePi:
4453  return (NodeData->header & internal::MemoryPage_Name_allocated_mask) ? -1 : NodeData->Name - buffer;
4454 
4455  case NodePcdata:
4456  case NodeCdata:
4457  case NodeComment:
4458  case NodeDocType:
4459  return (NodeData->header & internal::MemoryPage_Value_allocated_mask) ? -1 : NodeData->Value - buffer;
4460 
4461  default:
4462  return -1;
4463  }
4464  }
4465 
4466 #ifdef __BORLANDC__
4467  PUGI__FN bool operator&&(const Node& lhs, bool rhs)
4468  {
4469  return (bool)lhs && rhs;
4470  }
4471 
4472  PUGI__FN bool operator||(const Node& lhs, bool rhs)
4473  {
4474  return (bool)lhs || rhs;
4475  }
4476 #endif
4477 
4478  PUGI__FN NodeText::NodeText(NodeStruct* OtherRoot): RootNode(OtherRoot)
4479  {
4480  }
4481 
4482  PUGI__FN NodeStruct* NodeText::Data() const
4483  {
4484  if (!RootNode || internal::is_text_node(RootNode)) return RootNode;
4485 
4486  for (NodeStruct* node = RootNode->GetFirstChild; node; node = node->GetNextSibling)
4487  if (internal::is_text_node(node))
4488  return node;
4489 
4490  return 0;
4491  }
4492 
4493  PUGI__FN NodeStruct* NodeText::DataNew()
4494  {
4495  NodeStruct* d = Data();
4496  if (d) return d;
4497 
4498  return Node(RootNode).AppendChild(NodePcdata).InternalObject();
4499  }
4500 
4501  PUGI__FN NodeText::NodeText(): RootNode(0)
4502  {
4503  }
4504 
4505  PUGI__FN static void unspecified_bool_Text(NodeText***)
4506  {
4507  }
4508 
4509  PUGI__FN NodeText::operator NodeText::unspecified_bool_type() const
4510  {
4511  return Data() ? unspecified_bool_Text : 0;
4512  }
4513 
4514  PUGI__FN bool NodeText::operator!() const
4515  {
4516  return !Data();
4517  }
4518 
4519  PUGI__FN bool NodeText::Empty() const
4520  {
4521  return Data() == 0;
4522  }
4523 
4524  PUGI__FN const Char8* NodeText::GetString() const
4525  {
4526  NodeStruct* d = Data();
4527 
4528  return (d && d->Value) ? d->Value : "";
4529  }
4530 
4531  PUGI__FN const Char8* NodeText::AsString(const Char8* def) const
4532  {
4533  NodeStruct* d = Data();
4534 
4535  return (d && d->Value) ? d->Value : def;
4536  }
4537 
4538  PUGI__FN int NodeText::AsInt(int def) const
4539  {
4540  NodeStruct* d = Data();
4541 
4542  return internal::GetValue_int(d ? d->Value : 0, def);
4543  }
4544 
4545  PUGI__FN unsigned int NodeText::AsUint(unsigned int def) const
4546  {
4547  NodeStruct* d = Data();
4548 
4549  return internal::GetValue_uint(d ? d->Value : 0, def);
4550  }
4551 
4552  PUGI__FN double NodeText::AsDouble(double def) const
4553  {
4554  NodeStruct* d = Data();
4555 
4556  return internal::GetValue_double(d ? d->Value : 0, def);
4557  }
4558 
4559  PUGI__FN float NodeText::AsFloat(float def) const
4560  {
4561  NodeStruct* d = Data();
4562 
4563  return internal::GetValue_float(d ? d->Value : 0, def);
4564  }
4565 
4566  PUGI__FN Real NodeText::AsReal(Real def) const
4567  {
4568  return AsFloat(def);
4569  }
4570 
4571  PUGI__FN Whole NodeText::AsWhole(Whole def) const
4572  {
4573  return AsUint(def);
4574  }
4575 
4576  PUGI__FN Integer NodeText::AsInteger(Integer def) const
4577  {
4578  return AsInt(def);
4579  }
4580 
4581  PUGI__FN bool NodeText::AsBool(bool def) const
4582  {
4583  NodeStruct* d = Data();
4584 
4585  return internal::GetValue_bool(d ? d->Value : 0, def);
4586  }
4587 
4588  PUGI__FN bool NodeText::Set(const Char8* rhs)
4589  {
4590  NodeStruct* dn = DataNew();
4591 
4592  return dn ? internal::strcpy_insitu(dn->Value, dn->header, internal::MemoryPage_Value_allocated_mask, rhs) : false;
4593  }
4594 
4595  PUGI__FN bool NodeText::Set(int rhs)
4596  {
4597  NodeStruct* dn = DataNew();
4598 
4599  return dn ? internal::SetValue_convert(dn->Value, dn->header, internal::MemoryPage_Value_allocated_mask, rhs) : false;
4600  }
4601 
4602  PUGI__FN bool NodeText::Set(unsigned int rhs)
4603  {
4604  NodeStruct* dn = DataNew();
4605 
4606  return dn ? internal::SetValue_convert(dn->Value, dn->header, internal::MemoryPage_Value_allocated_mask, rhs) : false;
4607  }
4608 
4609  PUGI__FN bool NodeText::Set(double rhs)
4610  {
4611  NodeStruct* dn = DataNew();
4612 
4613  return dn ? internal::SetValue_convert(dn->Value, dn->header, internal::MemoryPage_Value_allocated_mask, rhs) : false;
4614  }
4615 
4616  PUGI__FN bool NodeText::Set(bool rhs)
4617  {
4618  NodeStruct* dn = DataNew();
4619 
4620  return dn ? internal::SetValue_convert(dn->Value, dn->header, internal::MemoryPage_Value_allocated_mask, rhs) : false;
4621  }
4622 
4623  PUGI__FN NodeText& NodeText::operator=(const Char8* rhs)
4624  {
4625  Set(rhs);
4626  return *this;
4627  }
4628 
4629  PUGI__FN NodeText& NodeText::operator=(int rhs)
4630  {
4631  Set(rhs);
4632  return *this;
4633  }
4634 
4635  PUGI__FN NodeText& NodeText::operator=(unsigned int rhs)
4636  {
4637  Set(rhs);
4638  return *this;
4639  }
4640 
4641  PUGI__FN NodeText& NodeText::operator=(double rhs)
4642  {
4643  Set(rhs);
4644  return *this;
4645  }
4646 
4647  PUGI__FN NodeText& NodeText::operator=(bool rhs)
4648  {
4649  Set(rhs);
4650  return *this;
4651  }
4652 
4653  PUGI__FN Node NodeText::data() const
4654  {
4655  return Node(Data());
4656  }
4657 
4658 #ifdef __BORLANDC__
4659  PUGI__FN bool operator&&(const Text& lhs, bool rhs)
4660  {
4661  return (bool)lhs && rhs;
4662  }
4663 
4664  PUGI__FN bool operator||(const Text& lhs, bool rhs)
4665  {
4666  return (bool)lhs || rhs;
4667  }
4668 #endif
4669 
4670  PUGI__FN NodeIterator::NodeIterator()
4671  {
4672  }
4673 
4674  PUGI__FN NodeIterator::NodeIterator(const Node& node): TargetNode(node), ParentNode(node.GetParent())
4675  {
4676  }
4677 
4678  PUGI__FN NodeIterator::NodeIterator(NodeStruct* ref, NodeStruct* ParentNode): TargetNode(ref), ParentNode(ParentNode)
4679  {
4680  }
4681 
4682  PUGI__FN bool NodeIterator::operator==(const NodeIterator& rhs) const
4683  {
4684  return TargetNode.NodeData == rhs.TargetNode.NodeData && ParentNode.NodeData == rhs.ParentNode.NodeData;
4685  }
4686 
4687  PUGI__FN bool NodeIterator::operator!=(const NodeIterator& rhs) const
4688  {
4689  return TargetNode.NodeData != rhs.TargetNode.NodeData || ParentNode.NodeData != rhs.ParentNode.NodeData;
4690  }
4691 
4692  PUGI__FN Node& NodeIterator::operator*() const
4693  {
4694  assert(TargetNode.NodeData);
4695  return TargetNode;
4696  }
4697 
4698  PUGI__FN Node* NodeIterator::operator->() const
4699  {
4700  assert(TargetNode.NodeData);
4701  return const_cast<Node*>(&TargetNode); // BCC32 workaround
4702  }
4703 
4704  PUGI__FN const NodeIterator& NodeIterator::operator++()
4705  {
4706  assert(TargetNode.NodeData);
4707  TargetNode.NodeData = TargetNode.NodeData->GetNextSibling;
4708  return *this;
4709  }
4710 
4711  PUGI__FN NodeIterator NodeIterator::operator++(int)
4712  {
4713  NodeIterator temp = *this;
4714  ++*this;
4715  return temp;
4716  }
4717 
4718  PUGI__FN const NodeIterator& NodeIterator::operator--()
4719  {
4720  TargetNode = TargetNode.NodeData ? TargetNode.GetPreviousSibling() : ParentNode.GetLastChild();
4721  return *this;
4722  }
4723 
4724  PUGI__FN NodeIterator NodeIterator::operator--(int)
4725  {
4726  NodeIterator temp = *this;
4727  --*this;
4728  return temp;
4729  }
4730 
4732  {
4733  }
4734 
4735  PUGI__FN AttributeIterator::AttributeIterator(const Attribute& attr, const Node& GetParent): TargetAttribute(attr), ParentNode(GetParent)
4736  {
4737  }
4738 
4739  PUGI__FN AttributeIterator::AttributeIterator(AttributeStruct* ref, NodeStruct* GetParent): TargetAttribute(ref), ParentNode(GetParent)
4740  {
4741  }
4742 
4743  PUGI__FN bool AttributeIterator::operator==(const AttributeIterator& rhs) const
4744  {
4745  return TargetAttribute.AttributeData == rhs.TargetAttribute.AttributeData && ParentNode.NodeData == rhs.ParentNode.NodeData;
4746  }
4747 
4748  PUGI__FN bool AttributeIterator::operator!=(const AttributeIterator& rhs) const
4749  {
4750  return TargetAttribute.AttributeData != rhs.TargetAttribute.AttributeData || ParentNode.NodeData != rhs.ParentNode.NodeData;
4751  }
4752 
4753  PUGI__FN Attribute& AttributeIterator::operator*() const
4754  {
4755  assert(TargetAttribute.AttributeData);
4756  return TargetAttribute;
4757  }
4758 
4759  PUGI__FN Attribute* AttributeIterator::operator->() const
4760  {
4761  assert(TargetAttribute.AttributeData);
4762  return const_cast<Attribute*>(&TargetAttribute); // BCC32 workaround
4763  }
4764 
4765  PUGI__FN const AttributeIterator& AttributeIterator::operator++()
4766  {
4767  assert(TargetAttribute.AttributeData);
4768  TargetAttribute.AttributeData = TargetAttribute.AttributeData->GetNextAttribute;
4769  return *this;
4770  }
4771 
4772  PUGI__FN AttributeIterator AttributeIterator::operator++(int)
4773  {
4774  AttributeIterator temp = *this;
4775  ++*this;
4776  return temp;
4777  }
4778 
4779  PUGI__FN const AttributeIterator& AttributeIterator::operator--()
4780  {
4781  TargetAttribute = TargetAttribute.AttributeData ? TargetAttribute.GetPreviousAttribute() : ParentNode.GetLastAttribute();
4782  return *this;
4783  }
4784 
4785  PUGI__FN AttributeIterator AttributeIterator::operator--(int)
4786  {
4787  AttributeIterator temp = *this;
4788  --*this;
4789  return temp;
4790  }
4791 
4792  PUGI__FN NamedNodeIterator::NamedNodeIterator(): TargetName(0)
4793  {
4794  }
4795 
4796  PUGI__FN NamedNodeIterator::NamedNodeIterator(const Node& node, const Char8* Name): TargetNode(node), TargetName(Name)
4797  {
4798  }
4799 
4800  PUGI__FN bool NamedNodeIterator::operator==(const NamedNodeIterator& rhs) const
4801  {
4802  return TargetNode == rhs.TargetNode;
4803  }
4804 
4805  PUGI__FN bool NamedNodeIterator::operator!=(const NamedNodeIterator& rhs) const
4806  {
4807  return TargetNode != rhs.TargetNode;
4808  }
4809 
4810  PUGI__FN Node& NamedNodeIterator::operator*() const
4811  {
4812  assert(TargetNode.NodeData);
4813  return TargetNode;
4814  }
4815 
4816  PUGI__FN Node* NamedNodeIterator::operator->() const
4817  {
4818  assert(TargetNode.NodeData);
4819  return const_cast<Node*>(&TargetNode); // BCC32 workaround
4820  }
4821 
4822  PUGI__FN const NamedNodeIterator& NamedNodeIterator::operator++()
4823  {
4824  assert(TargetNode.NodeData);
4825  TargetNode = TargetNode.GetNextSibling(TargetName);
4826  return *this;
4827  }
4828 
4829  PUGI__FN NamedNodeIterator NamedNodeIterator::operator++(int)
4830  {
4831  NamedNodeIterator temp = *this;
4832  ++*this;
4833  return temp;
4834  }
4835 
4836  PUGI__FN ParseResult::ParseResult(): Status(StatusInternalError), Offset(0), DocumentEncoding(EncodingAuto)
4837  {
4838  }
4839 
4840  PUGI__FN ParseResult::operator bool() const
4841  {
4842  return Status == StatusOk;
4843  }
4844 
4845  PUGI__FN const char* ParseResult::Description() const
4846  {
4847  switch (Status)
4848  {
4849  case StatusOk: return "No error";
4850 
4851  case StatusFileNotFound: return "File was not found";
4852  case StatusIOError: return "Error reading from file/stream";
4853  case StatusOutOfMemory: return "Could not allocate memory";
4854  case StatusInternalError: return "Internal error occurred";
4855 
4856  case StatusUnrecognizedTag: return "Could not determine tag Type";
4857 
4858  case StatusBadProcessingInstruction: return "Error parsing document declaration/processing instruction";
4859  case StatusBadComment: return "Error parsing comment";
4860  case StatusBadCdata: return "Error parsing CDATA section";
4861  case StatusBadDocType: return "Error parsing document Type declaration";
4862  case StatusBadPcdata: return "Error parsing PCDATA section";
4863  case StatusBadStartElement: return "Error parsing start element tag";
4864  case StatusBadAttribute: return "Error parsing element GetAttribute";
4865  case StatusBadEndElement: return "Error parsing end element tag";
4866  case StatusEndElementMismatch: return "Start-end tags mismatch";
4867 
4868  default: return "Unknown error";
4869  }
4870  }
4871 
4872  PUGI__FN Document::Document(): _buffer(0)
4873  {
4874  create();
4875  }
4876 
4877  PUGI__FN Document::~Document()
4878  {
4879  destroy();
4880  }
4881 
4882  PUGI__FN void Document::Reset()
4883  {
4884  destroy();
4885  create();
4886  }
4887 
4888  PUGI__FN void Document::Reset(const Document& proto)
4889  {
4890  Reset();
4891 
4892  for (Node cur = proto.GetFirstChild(); cur; cur = cur.GetNextSibling())
4893  AppendCopy(cur);
4894  }
4895 
4896  PUGI__FN void Document::create()
4897  {
4898  // initialize sentinel page
4899  PUGI__STATIC_ASSERT(offsetof(internal::MemoryPage, data) + sizeof(internal::DocumentStruct) + internal::MemoryPage_alignment <= sizeof(_memory));
4900 
4901  // align upwards to page boundary
4902  void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (internal::MemoryPage_alignment - 1)) & ~(internal::MemoryPage_alignment - 1));
4903 
4904  // prepare page structure
4905  internal::MemoryPage* page = internal::MemoryPage::construct(page_memory);
4906 
4907  page->busy_size = internal::MemoryPage_size;
4908 
4909  // allocate new GetRoot
4910  NodeData = new (page->data) internal::DocumentStruct(page);
4911  NodeData->prev_sibling_c = NodeData;
4912 
4913  // setup sentinel page
4914  page->allocator = static_cast<internal::DocumentStruct*>(NodeData);
4915  }
4916 
4917  PUGI__FN ParseResult Document::Load(Resource::DataStream& stream, unsigned int options, Encoding DocumentEncoding)
4918  {
4919  Reset();
4920  return internal::LoadDataStreamImpl(*this, stream, options, DocumentEncoding);
4921  }
4922 
4923  PUGI__FN void Document::Save(Resource::DataStream& stream, const Char8* indent, unsigned int flags, Encoding DocumentEncoding) const
4924  {
4925  XMLStreamWrapper WriterInstance(&stream);
4926  Save(WriterInstance, indent, flags, DocumentEncoding);
4927  }
4928 
4929  PUGI__FN void Document::destroy()
4930  {
4931  // destroy static storage
4932  if (_buffer)
4933  {
4934  internal::Memory::deallocate(_buffer);
4935  _buffer = 0;
4936  }
4937 
4938  // destroy dynamic storage, leave sentinel page (it's in static memory)
4939  if (NodeData)
4940  {
4941  internal::MemoryPage* GetRoot_page = reinterpret_cast<internal::MemoryPage*>(NodeData->header & internal::MemoryPage_pointer_mask);
4942  assert(GetRoot_page && !GetRoot_page->prev && !GetRoot_page->memory);
4943 
4944  // destroy all pages
4945  for (internal::MemoryPage* page = GetRoot_page->next; page; )
4946  {
4947  internal::MemoryPage* next = page->next;
4948 
4949  internal::Allocator::deallocate_page(page);
4950 
4951  page = next;
4952  }
4953 
4954  // cleanup GetRoot page
4955  GetRoot_page->allocator = 0;
4956  GetRoot_page->next = 0;
4957  GetRoot_page->busy_size = GetRoot_page->freed_size = 0;
4958 
4959  NodeData = 0;
4960  }
4961  }
4962 
4963  PUGI__FN ParseResult Document::Load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, Encoding DocumentEncoding)
4964  {
4965  Reset();
4966 
4967  return internal::LoadStreamImpl(*this, stream, options, DocumentEncoding);
4968  }
4969 
4970  PUGI__FN ParseResult Document::Load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
4971  {
4972  Reset();
4973 
4974  return internal::LoadStreamImpl(*this, stream, options, Encodingwchar_t);
4975  }
4976 
4977  PUGI__FN ParseResult Document::Load(const Char8* contents, unsigned int options)
4978  {
4979  // Force native DocumentEncoding (skip autodetection)
4980  Encoding DocumentEncoding = EncodingUTF8;
4981 
4982  return LoadBuffer(contents, internal::strlength(contents) * sizeof(Char8), options, DocumentEncoding);
4983  }
4984 
4985  PUGI__FN ParseResult Document::LoadFile(const char* Path_, unsigned int options, Encoding DocumentEncoding)
4986  {
4987  Reset();
4988 
4989  FILE* file = fopen(Path_, "rb");
4990 
4991  return internal::LoadFileImpl(*this, file, options, DocumentEncoding);
4992  }
4993 
4994  PUGI__FN ParseResult Document::LoadFile(const wchar_t* Path_, unsigned int options, Encoding DocumentEncoding)
4995  {
4996  Reset();
4997 
4998  FILE* file = internal::open_file_wide(Path_, L"rb");
4999 
5000  return internal::LoadFileImpl(*this, file, options, DocumentEncoding);
5001  }
5002 
5003  PUGI__FN ParseResult Document::LoadBufferImpl(void* contents, size_t size, unsigned int options, Encoding DocumentEncoding, bool is_mutable, bool own)
5004  {
5005  Reset();
5006 
5007  // check input buffer
5008  assert(contents || size == 0);
5009 
5010  // get actual DocumentEncoding
5011  Encoding buffer_DocumentEncoding = internal::GetBuffer_DocumentEncoding(DocumentEncoding, contents, size);
5012 
5013  // get private buffer
5014  Char8* buffer = 0;
5015  size_t length = 0;
5016 
5017  if (!internal::convert_buffer(buffer, length, buffer_DocumentEncoding, contents, size, is_mutable)) return internal::make_ParseResult(StatusOutOfMemory);
5018 
5019  // delete original buffer if we performed a conversion
5020  if (own && buffer != contents && contents) internal::Memory::deallocate(contents);
5021 
5022  // parse
5023  ParseResult res = internal::Parser::parse(buffer, length, NodeData, options);
5024 
5025  // remember DocumentEncoding
5026  res.DocumentEncoding = buffer_DocumentEncoding;
5027 
5028  // grab onto buffer if it's our buffer, user is responsible for deallocating contens himself
5029  if (own || buffer != contents) _buffer = buffer;
5030 
5031  return res;
5032  }
5033 
5034  PUGI__FN ParseResult Document::LoadBuffer(const void* contents, size_t size, unsigned int options, Encoding DocumentEncoding)
5035  {
5036  return LoadBufferImpl(const_cast<void*>(contents), size, options, DocumentEncoding, false, false);
5037  }
5038 
5039  PUGI__FN ParseResult Document::LoadBufferInplace(void* contents, size_t size, unsigned int options, Encoding DocumentEncoding)
5040  {
5041  return LoadBufferImpl(contents, size, options, DocumentEncoding, true, false);
5042  }
5043 
5044  PUGI__FN ParseResult Document::LoadBufferInplaceOwn(void* contents, size_t size, unsigned int options, Encoding DocumentEncoding)
5045  {
5046  return LoadBufferImpl(contents, size, options, DocumentEncoding, true, true);
5047  }
5048 
5049  PUGI__FN void Document::Save(Writer& WriterInstance, const Char8* indent, unsigned int flags, Encoding DocumentEncoding) const
5050  {
5051  internal::BufferedWriter buffered_WriterInstance(WriterInstance, DocumentEncoding);
5052 
5053  if ((flags & FormatWriteBom) && DocumentEncoding != EncodingLatin1)
5054  {
5055  // BOM always represents the codepoint U+FEFF, so just Write it in native DocumentEncoding
5056  buffered_WriterInstance.Write('\xef', '\xbb', '\xbf');
5057  }
5058 
5059  if (!(flags & FormatNoDeclaration) && !internal::hAsDeclaration(*this))
5060  {
5061  buffered_WriterInstance.Write("<?xml version=\"1.0\"");
5062  if (DocumentEncoding == EncodingLatin1) buffered_WriterInstance.Write(" DocumentEncoding=\"ISO-8859-1\"");
5063  buffered_WriterInstance.Write('?', '>');
5064  if (!(flags & FormatRaw)) buffered_WriterInstance.Write('\n');
5065  }
5066 
5067  internal::NodeOutput(buffered_WriterInstance, *this, indent, flags, 0);
5068  }
5069 
5070  PUGI__FN void Document::Save(std::basic_ostream<char, std::char_traits<char> >& stream, const Char8* indent, unsigned int flags, Encoding DocumentEncoding) const
5071  {
5072  WriterStream WriterInstance(stream);
5073 
5074  Save(WriterInstance, indent, flags, DocumentEncoding);
5075  }
5076 
5077  PUGI__FN void Document::Save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const Char8* indent, unsigned int flags) const
5078  {
5079  WriterStream WriterInstance(stream);
5080 
5081  Save(WriterInstance, indent, flags, Encodingwchar_t);
5082  }
5083 
5084  PUGI__FN bool Document::SaveFile(const char* Path_, const Char8* indent, unsigned int flags, Encoding DocumentEncoding) const
5085  {
5086  FILE* file = fopen(Path_, (flags & FormatSaveFileText) ? "w" : "wb");
5087  return internal::SaveFileImpl(*this, file, indent, flags, DocumentEncoding);
5088  }
5089 
5090  PUGI__FN bool Document::SaveFile(const wchar_t* Path_, const Char8* indent, unsigned int flags, Encoding DocumentEncoding) const
5091  {
5092  FILE* file = internal::open_file_wide(Path_, (flags & FormatSaveFileText) ? L"w" : L"wb");
5093  return internal::SaveFileImpl(*this, file, indent, flags, DocumentEncoding);
5094  }
5095 
5096  PUGI__FN Node Document::DocumentElement() const
5097  {
5098  for (NodeStruct* i = NodeData->GetFirstChild; i; i = i->GetNextSibling)
5099  if ((i->header & internal::MemoryPage_type_mask) + 1 == NodeElement)
5100  return Node(i);
5101 
5102  return Node();
5103  }
5104 
5105  PUGI__FN std::string MEZZ_LIB AsUtf8(const wchar_t* str)
5106  {
5107  assert(str);
5108 
5109  return internal::AsUtf8_impl(str, wcslen(str));
5110  }
5111 
5112  PUGI__FN std::string MEZZ_LIB AsUtf8(const std::basic_string<wchar_t>& str)
5113  {
5114  return internal::AsUtf8_impl(str.c_str(), str.size());
5115  }
5116 
5117  PUGI__FN std::basic_string<wchar_t> MEZZ_LIB AsWide(const char* str)
5118  {
5119  assert(str);
5120 
5121  return internal::AsWide_impl(str, strlen(str));
5122  }
5123 
5124  PUGI__FN std::basic_string<wchar_t> MEZZ_LIB AsWide(const std::string& str)
5125  {
5126  return internal::AsWide_impl(str.c_str(), str.size());
5127  }
5128 
5129 
5131  {
5132  internal::Memory::allocate = allocate;
5133  internal::Memory::deallocate = deallocate;
5134  }
5135 
5137  {
5138  return internal::Memory::allocate;
5139  }
5140 
5142  {
5143  return internal::Memory::deallocate;
5144  }
5145 }
5146 
5147 
5148 
5149 // STL replacements
5150 PUGI__NS_BEGIN
5151  struct equal_to
5152  {
5153  template <typename T> bool operator()(const T& lhs, const T& rhs) const
5154  {
5155  return lhs == rhs;
5156  }
5157  };
5158 
5159  struct not_equal_to
5160  {
5161  template <typename T> bool operator()(const T& lhs, const T& rhs) const
5162  {
5163  return lhs != rhs;
5164  }
5165  };
5166 
5167  struct less
5168  {
5169  template <typename T> bool operator()(const T& lhs, const T& rhs) const
5170  {
5171  return lhs < rhs;
5172  }
5173  };
5174 
5175  struct less_equal
5176  {
5177  template <typename T> bool operator()(const T& lhs, const T& rhs) const
5178  {
5179  return lhs <= rhs;
5180  }
5181  };
5182 
5183  template <typename T> void swap(T& lhs, T& rhs)
5184  {
5185  T temp = lhs;
5186  lhs = rhs;
5187  rhs = temp;
5188  }
5189 
5190  template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
5191  {
5192  I Result = begin;
5193 
5194  for (I it = begin + 1; it != end; ++it)
5195  if (pred(*it, *Result))
5196  Result = it;
5197 
5198  return Result;
5199  }
5200 
5201  template <typename I> void reverse(I begin, I end)
5202  {
5203  while (begin + 1 < end) swap(*begin++, *--end);
5204  }
5205 
5206  template <typename I> I unique(I begin, I end)
5207  {
5208  // fast skip head
5209  while (begin + 1 < end && *begin != *(begin + 1)) begin++;
5210 
5211  if (begin == end) return begin;
5212 
5213  // last written element
5214  I Write = begin++;
5215 
5216  // merge unique elements
5217  while (begin != end)
5218  {
5219  if (*begin != *Write)
5220  *++Write = *begin++;
5221  else
5222  begin++;
5223  }
5224 
5225  // past-the-end (Write points to live element)
5226  return Write + 1;
5227  }
5228 
5229  template <typename I> void copy_backwards(I begin, I end, I target)
5230  {
5231  while (begin != end) *--target = *--end;
5232  }
5233 
5234  template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
5235  {
5236  assert(begin != end);
5237 
5238  for (I it = begin + 1; it != end; ++it)
5239  {
5240  T val = *it;
5241 
5242  if (pred(val, *begin))
5243  {
5244  // move to front
5245  copy_backwards(begin, it, it + 1);
5246  *begin = val;
5247  }
5248  else
5249  {
5250  I hole = it;
5251 
5252  // move hole backwards
5253  while (pred(val, *(hole - 1)))
5254  {
5255  *hole = *(hole - 1);
5256  hole--;
5257  }
5258 
5259  // fill hole with element
5260  *hole = val;
5261  }
5262  }
5263  }
5264 
5265  // std variant for elements with ==
5266  template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
5267  {
5268  I eqbeg = middle, eqend = middle + 1;
5269 
5270  // expand equal range
5271  while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
5272  while (eqend != end && *eqend == *eqbeg) ++eqend;
5273 
5274  // process outer elements
5275  I ltend = eqbeg, gtbeg = eqend;
5276 
5277  for (;;)
5278  {
5279  // find the element from the right side that belongs to the left one
5280  for (; gtbeg != end; ++gtbeg)
5281  if (!pred(*eqbeg, *gtbeg))
5282  {
5283  if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
5284  else break;
5285  }
5286 
5287  // find the element from the left side that belongs to the right one
5288  for (; ltend != begin; --ltend)
5289  if (!pred(*(ltend - 1), *eqbeg))
5290  {
5291  if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
5292  else break;
5293  }
5294 
5295  // scanned all elements
5296  if (gtbeg == end && ltend == begin)
5297  {
5298  *out_eqbeg = eqbeg;
5299  *out_eqend = eqend;
5300  return;
5301  }
5302 
5303  // make room for elements by moving equal area
5304  if (gtbeg == end)
5305  {
5306  if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
5307  swap(*eqbeg, *--eqend);
5308  }
5309  else if (ltend == begin)
5310  {
5311  if (eqend != gtbeg) swap(*eqbeg, *eqend);
5312  ++eqend;
5313  swap(*gtbeg++, *eqbeg++);
5314  }
5315  else swap(*gtbeg++, *--ltend);
5316  }
5317  }
5318 
5319  template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
5320  {
5321  if (pred(*middle, *first)) swap(*middle, *first);
5322  if (pred(*last, *middle)) swap(*last, *middle);
5323  if (pred(*middle, *first)) swap(*middle, *first);
5324  }
5325 
5326  template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
5327  {
5328  if (last - first <= 40)
5329  {
5330  // median of three for small chunks
5331  median3(first, middle, last, pred);
5332  }
5333  else
5334  {
5335  // median of nine
5336  size_t step = (last - first + 1) / 8;
5337 
5338  median3(first, first + step, first + 2 * step, pred);
5339  median3(middle - step, middle, middle + step, pred);
5340  median3(last - 2 * step, last - step, last, pred);
5341  median3(first + step, middle, last - step, pred);
5342  }
5343  }
5344 
5345  template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
5346  {
5347  // sort large chunks
5348  while (end - begin > 32)
5349  {
5350  // find median element
5351  I middle = begin + (end - begin) / 2;
5352  median(begin, middle, end - 1, pred);
5353 
5354  // partition in three chunks (< = >)
5355  I eqbeg, eqend;
5356  partition(begin, middle, end, pred, &eqbeg, &eqend);
5357 
5358  // loop on larger half
5359  if (eqbeg - begin > end - eqend)
5360  {
5361  sort(eqend, end, pred);
5362  end = eqbeg;
5363  }
5364  else
5365  {
5366  sort(begin, eqbeg, pred);
5367  begin = eqend;
5368  }
5369  }
5370 
5371  // insertion sort small chunk
5372  if (begin != end) insertion_sort(begin, end, pred, &*begin);
5373  }
5374 PUGI__NS_END
5375 
5376 // Allocator used for AST and evaluation stacks
5377 PUGI__NS_BEGIN
5378  struct XPathMemoryBlock
5379  {
5380  XPathMemoryBlock* next;
5381 
5382  char data[
5383  #ifdef XML_MEMORY_XPATH_PAGE_SIZE
5384  XML_MEMORY_XPATH_PAGE_SIZE
5385  #else
5386  4096
5387  #endif
5388  ];
5389  };
5390 
5391  class XPathAllocator
5392  {
5393  XPathMemoryBlock* _GetRoot;
5394  size_t _GetRoot_size;
5395 
5396  public:
5397 
5398 
5399  XPathAllocator(XPathMemoryBlock* GetRoot, size_t GetRoot_size = 0): _GetRoot(GetRoot), _GetRoot_size(GetRoot_size)
5400  {
5401 
5402  }
5403 
5404  void* allocate_nothrow(size_t size)
5405  {
5406  const size_t block_capacity = sizeof(_GetRoot->data);
5407 
5408  // align size so that we're able to store pointers in subsequent blocks
5409  size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
5410 
5411  if (_GetRoot_size + size <= block_capacity)
5412  {
5413  void* buf = _GetRoot->data + _GetRoot_size;
5414  _GetRoot_size += size;
5415  return buf;
5416  }
5417  else
5418  {
5419  size_t block_data_size = (size > block_capacity) ? size : block_capacity;
5420  size_t block_size = block_data_size + offsetof(XPathMemoryBlock, data);
5421 
5422  XPathMemoryBlock* block = static_cast<XPathMemoryBlock*>(Memory::allocate(block_size));
5423  if (!block) return 0;
5424 
5425  block->next = _GetRoot;
5426 
5427  _GetRoot = block;
5428  _GetRoot_size = size;
5429 
5430  return block->data;
5431  }
5432  }
5433 
5434  void* allocate(size_t size)
5435  {
5436  void* Result = allocate_nothrow(size);
5437 
5438  if (!Result)
5439  {
5440  throw std::bad_alloc();
5441  }
5442 
5443  return Result;
5444  }
5445 
5446  void* reallocate(void* ptr, size_t old_size, size_t new_size)
5447  {
5448  // align size so that we're able to store pointers in subsequent blocks
5449  old_size = (old_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
5450  new_size = (new_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
5451 
5452  // we can only reallocate the last object
5453  assert(ptr == 0 || static_cast<char*>(ptr) + old_size == _GetRoot->data + _GetRoot_size);
5454 
5455  // adjust GetRoot size so that we have not allocated the object at all
5456  bool only_object = (_GetRoot_size == old_size);
5457 
5458  if (ptr) _GetRoot_size -= old_size;
5459 
5460  // allocate a new version (this will obviously reuse the memory if possible)
5461  void* Result = allocate(new_size);
5462  assert(Result);
5463 
5464  // we have a new block
5465  if (Result != ptr && ptr)
5466  {
5467  // copy old data
5468  assert(new_size > old_size);
5469  memcpy(Result, ptr, old_size);
5470 
5471  // free the previous page if it had no other objects
5472  if (only_object)
5473  {
5474  assert(_GetRoot->data == Result);
5475  assert(_GetRoot->next);
5476 
5477  XPathMemoryBlock* next = _GetRoot->next->next;
5478 
5479  if (next)
5480  {
5481  // deallocate the whole page, unless it was the first one
5482  Memory::deallocate(_GetRoot->next);
5483  _GetRoot->next = next;
5484  }
5485  }
5486  }
5487 
5488  return Result;
5489  }
5490 
5491  void revert(const XPathAllocator& state)
5492  {
5493  // free all new pages
5494  XPathMemoryBlock* cur = _GetRoot;
5495 
5496  while (cur != state._GetRoot)
5497  {
5498  XPathMemoryBlock* next = cur->next;
5499 
5500  Memory::deallocate(cur);
5501 
5502  cur = next;
5503  }
5504 
5505  // restore state
5506  _GetRoot = state._GetRoot;
5507  _GetRoot_size = state._GetRoot_size;
5508  }
5509 
5510  void release()
5511  {
5512  XPathMemoryBlock* cur = _GetRoot;
5513  assert(cur);
5514 
5515  while (cur->next)
5516  {
5517  XPathMemoryBlock* next = cur->next;
5518 
5519  Memory::deallocate(cur);
5520 
5521  cur = next;
5522  }
5523  }
5524  };
5525 
5526  struct XPathAllocatorCapture
5527  {
5528  XPathAllocatorCapture(XPathAllocator* alloc): _target(alloc), _state(*alloc)
5529  {
5530  }
5531 
5532  ~XPathAllocatorCapture()
5533  {
5534  _target->revert(_state);
5535  }
5536 
5537  XPathAllocator* _target;
5538  XPathAllocator _state;
5539  };
5540 
5541  struct XPathStack
5542  {
5543  XPathAllocator* Result;
5544  XPathAllocator* temp;
5545  };
5546 
5547  struct XPathStackData
5548  {
5549  XPathMemoryBlock blocks[2];
5550  XPathAllocator Result;
5551  XPathAllocator temp;
5552  XPathStack stack;
5553 
5554  XPathStackData(): Result(blocks + 0), temp(blocks + 1)
5555  {
5556  blocks[0].next = blocks[1].next = 0;
5557 
5558  stack.Result = &Result;
5559  stack.temp = &temp;
5560 
5561  }
5562 
5563  ~XPathStackData()
5564  {
5565  Result.release();
5566  temp.release();
5567  }
5568  };
5569 PUGI__NS_END
5570 
5571 // String class
5572 PUGI__NS_BEGIN
5573  class XPathString
5574  {
5575  const Char8* _buffer;
5576  bool _uses_heap;
5577 
5578  static Char8* duplicate_string(const Char8* string, size_t length, XPathAllocator* alloc)
5579  {
5580  Char8* Result = static_cast<Char8*>(alloc->allocate((length + 1) * sizeof(Char8)));
5581  assert(Result);
5582 
5583  memcpy(Result, string, length * sizeof(Char8));
5584  Result[length] = 0;
5585 
5586  return Result;
5587  }
5588 
5589  static Char8* duplicate_string(const Char8* string, XPathAllocator* alloc)
5590  {
5591  return duplicate_string(string, strlength(string), alloc);
5592  }
5593 
5594  public:
5595  XPathString(): _buffer(""), _uses_heap(false)
5596  {
5597  }
5598 
5599  explicit XPathString(const Char8* str, XPathAllocator* alloc)
5600  {
5601  bool empty_ = (*str == 0);
5602 
5603  _buffer = empty_ ? "" : duplicate_string(str, alloc);
5604  _uses_heap = !empty_;
5605  }
5606 
5607  explicit XPathString(const Char8* str, bool use_heap): _buffer(str), _uses_heap(use_heap)
5608  {
5609  }
5610 
5611  XPathString(const Char8* begin, const Char8* end, XPathAllocator* alloc)
5612  {
5613  assert(begin <= end);
5614 
5615  bool empty_ = (begin == end);
5616 
5617  _buffer = empty_ ? "" : duplicate_string(begin, static_cast<size_t>(end - begin), alloc);
5618  _uses_heap = !empty_;
5619  }
5620 
5621  void append(const XPathString& o, XPathAllocator* alloc)
5622  {
5623  // skip empty sources
5624  if (!*o._buffer) return;
5625 
5626  // fast append for constant empty target and constant source
5627  if (!*_buffer && !_uses_heap && !o._uses_heap)
5628  {
5629  _buffer = o._buffer;
5630  }
5631  else
5632  {
5633  // need to make heap copy
5634  size_t tarGetLength = strlength(_buffer);
5635  size_t source_length = strlength(o._buffer);
5636  size_t Result_length = tarGetLength + source_length;
5637 
5638  // allocate new buffer
5639  Char8* Result = static_cast<Char8*>(alloc->reallocate(_uses_heap ? const_cast<Char8*>(_buffer) : 0, (tarGetLength + 1) * sizeof(Char8), (Result_length + 1) * sizeof(Char8)));
5640  assert(Result);
5641 
5642  // append first string to the new buffer in case there was no reallocation
5643  if (!_uses_heap) memcpy(Result, _buffer, tarGetLength * sizeof(Char8));
5644 
5645  // append second string to the new buffer
5646  memcpy(Result + tarGetLength, o._buffer, source_length * sizeof(Char8));
5647  Result[Result_length] = 0;
5648 
5649  // finalize
5650  _buffer = Result;
5651  _uses_heap = true;
5652  }
5653  }
5654 
5655  const Char8* c_str() const
5656  {
5657  return _buffer;
5658  }
5659 
5660  size_t length() const
5661  {
5662  return strlength(_buffer);
5663  }
5664 
5665  Char8* data(XPathAllocator* alloc)
5666  {
5667  // make private heap copy
5668  if (!_uses_heap)
5669  {
5670  _buffer = duplicate_string(_buffer, alloc);
5671  _uses_heap = true;
5672  }
5673 
5674  return const_cast<Char8*>(_buffer);
5675  }
5676 
5677  bool Empty() const
5678  {
5679  return *_buffer == 0;
5680  }
5681 
5682  bool operator==(const XPathString& o) const
5683  {
5684  return strequal(_buffer, o._buffer);
5685  }
5686 
5687  bool operator!=(const XPathString& o) const
5688  {
5689  return !strequal(_buffer, o._buffer);
5690  }
5691 
5692  bool uses_heap() const
5693  {
5694  return _uses_heap;
5695  }
5696  };
5697 
5698  PUGI__FN XPathString XPathStringConst(const Char8* str)
5699  {
5700  return XPathString(str, false);
5701  }
5702 PUGI__NS_END
5703 
5704 PUGI__NS_BEGIN
5705  PUGI__FN bool starts_with(const Char8* string, const Char8* pattern)
5706  {
5707  while (*pattern && *string == *pattern)
5708  {
5709  string++;
5710  pattern++;
5711  }
5712 
5713  return *pattern == 0;
5714  }
5715 
5716  PUGI__FN const Char8* FindChar(const Char8* s, Char8 c)
5717  {
5718  return strchr(s, c);
5719  }
5720 
5721  PUGI__FN const Char8* FindSubstring(const Char8* s, const Char8* p)
5722  {
5723  return strstr(s, p);
5724  }
5725 
5726  // Converts symbol to lower case, if it is an ASCII one
5727  PUGI__FN Char8 tolower_ascii(Char8 ch)
5728  {
5729  return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<Char8>(ch | ' ') : ch;
5730  }
5731 
5732  PUGI__FN XPathString string_Value(const XPathNode& na, XPathAllocator* alloc)
5733  {
5734  if (na.GetAttribute())
5735  return XPathStringConst(na.GetAttribute().Value());
5736  else
5737  {
5738  const Node& n = na.GetNode();
5739 
5740  switch (n.Type())
5741  {
5742  case NodePcdata:
5743  case NodeCdata:
5744  case NodeComment:
5745  case NodePi:
5746  return XPathStringConst(n.Value());
5747 
5748  case NodeDocument:
5749  case NodeElement:
5750  {
5751  XPathString Result;
5752 
5753  Node cur = n.GetFirstChild();
5754 
5755  while (cur && cur != n)
5756  {
5757  if (cur.Type() == NodePcdata || cur.Type() == NodeCdata)
5758  Result.append(XPathStringConst(cur.Value()), alloc);
5759 
5760  if (cur.GetFirstChild())
5761  cur = cur.GetFirstChild();
5762  else if (cur.GetNextSibling())
5763  cur = cur.GetNextSibling();
5764  else
5765  {
5766  while (!cur.GetNextSibling() && cur != n)
5767  cur = cur.GetParent();
5768 
5769  if (cur != n) cur = cur.GetNextSibling();
5770  }
5771  }
5772 
5773  return Result;
5774  }
5775 
5776  default:
5777  return XPathString();
5778  }
5779  }
5780  }
5781 
5782  PUGI__FN unsigned int NodeHeight(Node n)
5783  {
5784  unsigned int Result = 0;
5785 
5786  while (n)
5787  {
5788  ++Result;
5789  n = n.GetParent();
5790  }
5791 
5792  return Result;
5793  }
5794 
5795  PUGI__FN bool NodeIs_before(Node ln, unsigned int lh, Node rn, unsigned int rh)
5796  {
5797  // normalize heights
5798  for (unsigned int i = rh; i < lh; i++) ln = ln.GetParent();
5799  for (unsigned int j = lh; j < rh; j++) rn = rn.GetParent();
5800 
5801  // one node is the ancestor of the other
5802  if (ln == rn) return lh < rh;
5803 
5804  // find common ancestor
5805  while (ln.GetParent() != rn.GetParent())
5806  {
5807  ln = ln.GetParent();
5808  rn = rn.GetParent();
5809  }
5810 
5811  // there is no common ancestor (the shared GetParent is null), nodes are from different documents
5812  if (!ln.GetParent()) return ln < rn;
5813 
5814  // determine sibling order
5815  for (; ln; ln = ln.GetNextSibling())
5816  if (ln == rn)
5817  return true;
5818 
5819  return false;
5820  }
5821 
5822  PUGI__FN bool NodeIs_ancestor(Node GetParent, Node node)
5823  {
5824  while (node && node != GetParent) node = node.GetParent();
5825 
5826  return GetParent && node == GetParent;
5827  }
5828 
5829  PUGI__FN const void* document_order(const XPathNode& xnode)
5830  {
5831  NodeStruct* node = xnode.GetNode().InternalObject();
5832 
5833  if (node)
5834  {
5835  if (node->Name && (node->header & MemoryPage_Name_allocated_mask) == 0) return node->Name;
5836  if (node->Value && (node->header & MemoryPage_Value_allocated_mask) == 0) return node->Value;
5837  return 0;
5838  }
5839 
5840  AttributeStruct* attr = xnode.GetAttribute().InternalObject();
5841 
5842  if (attr)
5843  {
5844  if ((attr->header & MemoryPage_Name_allocated_mask) == 0) return attr->Name;
5845  if ((attr->header & MemoryPage_Value_allocated_mask) == 0) return attr->Value;
5846  return 0;
5847  }
5848 
5849  return 0;
5850  }
5851 
5852  struct document_order_comparator
5853  {
5854  bool operator()(const XPathNode& lhs, const XPathNode& rhs) const
5855  {
5856  // optimized document order based check
5857  const void* lo = document_order(lhs);
5858  const void* ro = document_order(rhs);
5859 
5860  if (lo && ro) return lo < ro;
5861 
5862  // slow comparison
5863  Node ln = lhs.GetNode(), rn = rhs.GetNode();
5864 
5865  // compare attributes
5866  if (lhs.GetAttribute() && rhs.GetAttribute())
5867  {
5868  // shared GetParent
5869  if (lhs.GetParent() == rhs.GetParent())
5870  {
5871  // determine sibling order
5872  for (Attribute a = lhs.GetAttribute(); a; a = a.GetNextAttribute())
5873  if (a == rhs.GetAttribute())
5874  return true;
5875 
5876  return false;
5877  }
5878 
5879  // compare GetAttribute GetParents
5880  ln = lhs.GetParent();
5881  rn = rhs.GetParent();
5882  }
5883  else if (lhs.GetAttribute())
5884  {
5885  // attributes go after the GetParent element
5886  if (lhs.GetParent() == rhs.GetNode()) return false;
5887 
5888  ln = lhs.GetParent();
5889  }
5890  else if (rhs.GetAttribute())
5891  {
5892  // attributes go after the GetParent element
5893  if (rhs.GetParent() == lhs.GetNode()) return true;
5894 
5895  rn = rhs.GetParent();
5896  }
5897 
5898  if (ln == rn) return false;
5899 
5900  unsigned int lh = NodeHeight(ln);
5901  unsigned int rh = NodeHeight(rn);
5902 
5903  return NodeIs_before(ln, lh, rn, rh);
5904  }
5905  };
5906 
5907  struct duplicate_comparator
5908  {
5909  bool operator()(const XPathNode& lhs, const XPathNode& rhs) const
5910  {
5911  if (lhs.GetAttribute()) return rhs.GetAttribute() ? lhs.GetAttribute() < rhs.GetAttribute() : true;
5912  else return rhs.GetAttribute() ? false : lhs.GetNode() < rhs.GetNode();
5913  }
5914  };
5915 
5916  PUGI__FN double gen_nan()
5917  {
5918  #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
5919  union { float f; uint32_t i; } u[sizeof(float) == sizeof(uint32_t) ? 1 : -1];
5920  u[0].i = 0x7fc00000;
5921  return u[0].f;
5922  #else
5923  // fallback
5924  const volatile double zero = 0.0;
5925  return zero / zero;
5926  #endif
5927  }
5928 
5929  PUGI__FN bool is_nan(double Value)
5930  {
5931  #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
5932  return !!_isnan(Value);
5933  #elif defined(fpclassify) && defined(FP_NAN)
5934  return fpclassify(Value) == FP_NAN;
5935  #else
5936  // fallback
5937  const volatile double v = Value;
5938  return v != v;
5939  #endif
5940  }
5941 
5942  PUGI__FN const Char8* convert_number_to_string_special(double Value)
5943  {
5944  #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
5945  if (_finite(Value)) return (Value == 0) ? "0" : 0;
5946  if (_isnan(Value)) return "NaN";
5947  return Value > 0 ? "Infinity" : "-Infinity";
5948  #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
5949  switch (fpclassify(Value))
5950  {
5951  case FP_NAN:
5952  return "NaN";
5953 
5954  case FP_INFINITE:
5955  return Value > 0 ? "Infinity" : "-Infinity";
5956 
5957  case FP_ZERO:
5958  return "0";
5959 
5960  default:
5961  return 0;
5962  }
5963  #else
5964  // fallback
5965  const volatile double v = Value;
5966 
5967  if (v == 0) return "0";
5968  if (v != v) return "NaN";
5969  if (v * 2 == v) return Value > 0 ? "Infinity" : "-Infinity";
5970  return 0;
5971  #endif
5972  }
5973 
5974  PUGI__FN bool convert_number_to_boolean(double Value)
5975  {
5976  return (Value != 0 && !is_nan(Value));
5977  }
5978 
5979  PUGI__FN void truncate_zeros(char* begin, char* end)
5980  {
5981  while (begin != end && end[-1] == '0') end--;
5982 
5983  *end = 0;
5984  }
5985 
5986  // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
5987 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
5988  PUGI__FN void convert_number_to_mantissa_exponent(double Value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
5989  {
5990  // get base Values
5991  int sign, exponent;
5992  _ecvt_s(buffer, buffer_size, Value, DBL_DIG + 1, &exponent, &sign);
5993 
5994  // truncate redundant zeros
5995  truncate_zeros(buffer, buffer + strlen(buffer));
5996 
5997  // fill Results
5998  *out_mantissa = buffer;
5999  *out_exponent = exponent;
6000  }
6001 #else
6002  PUGI__FN void convert_number_to_mantissa_exponent(double Value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
6003  {
6004  // get a scientific notation Value with IEEE DBL_DIG decimals
6005  sprintf(buffer, "%.*e", DBL_DIG, Value);
6006  assert(strlen(buffer) < buffer_size);
6007  (void)!buffer_size;
6008 
6009  // get the exponent (possibly negative)
6010  char* exponent_string = strchr(buffer, 'e');
6011  assert(exponent_string);
6012 
6013  int exponent = atoi(exponent_string + 1);
6014 
6015  // extract mantissa string: skip sign
6016  char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
6017  assert(mantissa[0] != '0' && mantissa[1] == '.');
6018 
6019  // divide mantissa by 10 to eliminate integer part
6020  mantissa[1] = mantissa[0];
6021  mantissa++;
6022  exponent++;
6023 
6024  // remove extra mantissa digits and zero-terminate mantissa
6025  truncate_zeros(mantissa, exponent_string);
6026 
6027  // fill Results
6028  *out_mantissa = mantissa;
6029  *out_exponent = exponent;
6030  }
6031 #endif
6032 
6033  PUGI__FN XPathString convert_number_to_string(double Value, XPathAllocator* alloc)
6034  {
6035  // try special number conversion
6036  const Char8* special = convert_number_to_string_special(Value);
6037  if (special) return XPathStringConst(special);
6038 
6039  // get mantissa + exponent form
6040  char mantissa_buffer[64];
6041 
6042  char* mantissa;
6043  int exponent;
6044  convert_number_to_mantissa_exponent(Value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
6045 
6046  // make the number!
6047  Char8 Result[512];
6048  Char8* s = Result;
6049 
6050  // sign
6051  if (Value < 0) *s++ = '-';
6052 
6053  // integer part
6054  if (exponent <= 0)
6055  {
6056  *s++ = '0';
6057  }
6058  else
6059  {
6060  while (exponent > 0)
6061  {
6062  assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9);
6063  *s++ = *mantissa ? *mantissa++ : '0';
6064  exponent--;
6065  }
6066  }
6067 
6068  // fractional part
6069  if (*mantissa)
6070  {
6071  // decimal point
6072  *s++ = '.';
6073 
6074  // extra zeroes from negative exponent
6075  while (exponent < 0)
6076  {
6077  *s++ = '0';
6078  exponent++;
6079  }
6080 
6081  // extra mantissa digits
6082  while (*mantissa)
6083  {
6084  assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
6085  *s++ = *mantissa++;
6086  }
6087  }
6088 
6089  // zero-terminate
6090  assert(s < Result + sizeof(Result) / sizeof(Result[0]));
6091  *s = 0;
6092 
6093  return XPathString(Result, alloc);
6094  }
6095 
6096  PUGI__FN bool check_Stringo_number_format(const Char8* string)
6097  {
6098  // parse leading whitespace
6099  while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
6100 
6101  // parse sign
6102  if (*string == '-') ++string;
6103 
6104  if (!*string) return false;
6105 
6106  // if there is no integer part, there should be a decimal part with at least one digit
6107  if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
6108 
6109  // parse integer part
6110  while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
6111 
6112  // parse decimal part
6113  if (*string == '.')
6114  {
6115  ++string;
6116 
6117  while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
6118  }
6119 
6120  // parse trailing whitespace
6121  while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
6122 
6123  return *string == 0;
6124  }
6125 
6126  PUGI__FN double convert_Stringo_number(const Char8* string)
6127  {
6128  // check string format
6129  if (!check_Stringo_number_format(string)) return gen_nan();
6130 
6131  // parse string
6132  return atof(string);
6133  }
6134 
6135  PUGI__FN bool convert_Stringo_number(const Char8* begin, const Char8* end, double* out_Result)
6136  {
6137  Char8 buffer[32];
6138 
6139  size_t length = static_cast<size_t>(end - begin);
6140  Char8* scratch = buffer;
6141 
6142  if (length >= sizeof(buffer) / sizeof(buffer[0]))
6143  {
6144  // need to make dummy on-heap copy
6145  scratch = static_cast<Char8*>(Memory::allocate((length + 1) * sizeof(Char8)));
6146  if (!scratch) return false;
6147  }
6148 
6149  // copy string to zero-terminated buffer and perform conversion
6150  memcpy(scratch, begin, length * sizeof(Char8));
6151  scratch[length] = 0;
6152 
6153  *out_Result = convert_Stringo_number(scratch);
6154 
6155  // free dummy buffer
6156  if (scratch != buffer) Memory::deallocate(scratch);
6157 
6158  return true;
6159  }
6160 
6161  PUGI__FN double round_nearest(double Value)
6162  {
6163  return floor(Value + 0.5);
6164  }
6165 
6166  PUGI__FN double round_nearest_nzero(double Value)
6167  {
6168  // same as round_nearest, but returns -0 for [-0.5, -0]
6169  // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
6170  return (Value >= -0.5 && Value <= 0) ? ceil(Value) : floor(Value + 0.5);
6171  }
6172 
6173  PUGI__FN const Char8* qualified_Name(const XPathNode& node)
6174  {
6175  return node.GetAttribute() ? node.GetAttribute().Name() : node.GetNode().Name();
6176  }
6177 
6178  PUGI__FN const Char8* local_Name(const XPathNode& node)
6179  {
6180  const Char8* Name = qualified_Name(node);
6181  const Char8* p = FindChar(Name, ':');
6182 
6183  return p ? p + 1 : Name;
6184  }
6185 
6186  struct namespace_uri_predicate
6187  {
6188  const Char8* prefix;
6189  size_t prefix_length;
6190 
6191  namespace_uri_predicate(const Char8* Name)
6192  {
6193  const Char8* pos = FindChar(Name, ':');
6194 
6195  prefix = pos ? Name : 0;
6196  prefix_length = pos ? static_cast<size_t>(pos - Name) : 0;
6197  }
6198 
6199  bool operator()(const Attribute& a) const
6200  {
6201  const Char8* Name = a.Name();
6202 
6203  if (!starts_with(Name, "xmlns")) return false;
6204 
6205  return prefix ? Name[5] == ':' && strequalrange(Name + 6, prefix, prefix_length) : Name[5] == 0;
6206  }
6207  };
6208 
6209  PUGI__FN const Char8* namespace_uri(const Node& node)
6210  {
6211  namespace_uri_predicate pred = node.Name();
6212 
6213  Node p = node;
6214 
6215  while (p)
6216  {
6217  Attribute a = p.FindAttribute(pred);
6218 
6219  if (a) return a.Value();
6220 
6221  p = p.GetParent();
6222  }
6223 
6224  return "";
6225  }
6226 
6227  PUGI__FN const Char8* namespace_uri(const Attribute& attr, const Node& GetParent)
6228  {
6229  namespace_uri_predicate pred = attr.Name();
6230 
6231  // Default namespace does not apply to attributes
6232  if (!pred.prefix) return "";
6233 
6234  Node p = GetParent;
6235 
6236  while (p)
6237  {
6238  Attribute a = p.FindAttribute(pred);
6239 
6240  if (a) return a.Value();
6241 
6242  p = p.GetParent();
6243  }
6244 
6245  return "";
6246  }
6247 
6248  PUGI__FN const Char8* namespace_uri(const XPathNode& node)
6249  {
6250  return node.GetAttribute() ? namespace_uri(node.GetAttribute(), node.GetParent()) : namespace_uri(node.GetNode());
6251  }
6252 
6253  PUGI__FN void normalize_space(Char8* buffer)
6254  {
6255  Char8* Write = buffer;
6256 
6257  for (Char8* it = buffer; *it; )
6258  {
6259  Char8 ch = *it++;
6260 
6261  if (PUGI__IS_CHARTYPE(ch, ct_space))
6262  {
6263  // replace whitespace sequence with single space
6264  while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
6265 
6266  // avoid leading spaces
6267  if (Write != buffer) *Write++ = ' ';
6268  }
6269  else *Write++ = ch;
6270  }
6271 
6272  // remove trailing space
6273  if (Write != buffer && PUGI__IS_CHARTYPE(Write[-1], ct_space)) Write--;
6274 
6275  // zero-terminate
6276  *Write = 0;
6277  }
6278 
6279  PUGI__FN void translate(Char8* buffer, const Char8* from, const Char8* to)
6280  {
6281  size_t to_length = strlength(to);
6282 
6283  Char8* Write = buffer;
6284 
6285  while (*buffer)
6286  {
6287  PUGI__DMC_VOLATILE Char8 ch = *buffer++;
6288 
6289  const Char8* pos = FindChar(from, ch);
6290 
6291  if (!pos)
6292  *Write++ = ch; // do not process
6293  else if (static_cast<size_t>(pos - from) < to_length)
6294  *Write++ = to[pos - from]; // replace
6295  }
6296 
6297  // zero-terminate
6298  *Write = 0;
6299  }
6300 
6301  struct XPathVariableBoolean: XPathVariable
6302  {
6303  XPathVariableBoolean(): Value(false)
6304  {
6305  }
6306 
6307  bool Value;
6308  Char8 Name[1];
6309  };
6310 
6311  struct XPathVariableNumber: XPathVariable
6312  {
6313  XPathVariableNumber(): Value(0)
6314  {
6315  }
6316 
6317  double Value;
6318  Char8 Name[1];
6319  };
6320 
6321  struct XPathVariableString: XPathVariable
6322  {
6323  XPathVariableString(): Value(0)
6324  {
6325  }
6326 
6327  ~XPathVariableString()
6328  {
6329  if (Value) Memory::deallocate(Value);
6330  }
6331 
6332  Char8* Value;
6333  Char8 Name[1];
6334  };
6335 
6336  struct XPathVariableNodeSet: XPathVariable
6337  {
6338  XPathNodeSet Value;
6339  Char8 Name[1];
6340  };
6341 
6342  static const XPathNodeSet dummy_NodeSet;
6343 
6344  PUGI__FN unsigned int hash_string(const Char8* str)
6345  {
6346  // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
6347  unsigned int Result = 0;
6348 
6349  while (*str)
6350  {
6351  Result += static_cast<unsigned int>(*str++);
6352  Result += Result << 10;
6353  Result ^= Result >> 6;
6354  }
6355 
6356  Result += Result << 3;
6357  Result ^= Result >> 11;
6358  Result += Result << 15;
6359 
6360  return Result;
6361  }
6362 
6363  template <typename T> PUGI__FN T* new_XPathVariable(const Char8* Name)
6364  {
6365  size_t length = strlength(Name);
6366  if (length == 0) return 0; // empty variable names are invalid
6367 
6368  // $$ we can't use offsetof(T, Name) because T is non-POD, so we just allocate additional length characters
6369  void* memory = Memory::allocate(sizeof(T) + length * sizeof(Char8));
6370  if (!memory) return 0;
6371 
6372  T* Result = new (memory) T();
6373 
6374  memcpy(Result->Name, Name, (length + 1) * sizeof(Char8));
6375 
6376  return Result;
6377  }
6378 
6379  PUGI__FN XPathVariable* new_XPathVariable(XPathValueType Type, const Char8* Name)
6380  {
6381  switch (Type)
6382  {
6383  case XPathTypeNodeSet:
6384  return new_XPathVariable<XPathVariableNodeSet>(Name);
6385 
6386  case XPathTypeNumber:
6387  return new_XPathVariable<XPathVariableNumber>(Name);
6388 
6389  case XPathTypeString:
6390  return new_XPathVariable<XPathVariableString>(Name);
6391 
6392  case XPathTypeBoolean:
6393  return new_XPathVariable<XPathVariableBoolean>(Name);
6394 
6395  default:
6396  return 0;
6397  }
6398  }
6399 
6400  template <typename T> PUGI__FN void delete_XPathVariable(T* var)
6401  {
6402  var->~T();
6403  Memory::deallocate(var);
6404  }
6405 
6406  PUGI__FN void delete_XPathVariable(XPathValueType Type, XPathVariable* var)
6407  {
6408  switch (Type)
6409  {
6410  case XPathTypeNodeSet:
6411  delete_XPathVariable(static_cast<XPathVariableNodeSet*>(var));
6412  break;
6413 
6414  case XPathTypeNumber:
6415  delete_XPathVariable(static_cast<XPathVariableNumber*>(var));
6416  break;
6417 
6418  case XPathTypeString:
6419  delete_XPathVariable(static_cast<XPathVariableString*>(var));
6420  break;
6421 
6422  case XPathTypeBoolean:
6423  delete_XPathVariable(static_cast<XPathVariableBoolean*>(var));
6424  break;
6425 
6426  default:
6427  assert(!"Invalid variable Type");
6428  }
6429  }
6430 
6431  PUGI__FN XPathVariable* GetVariable(XPathVariableSet* set, const Char8* begin, const Char8* end)
6432  {
6433  Char8 buffer[32];
6434 
6435  size_t length = static_cast<size_t>(end - begin);
6436  Char8* scratch = buffer;
6437 
6438  if (length >= sizeof(buffer) / sizeof(buffer[0]))
6439  {
6440  // need to make dummy on-heap copy
6441  scratch = static_cast<Char8*>(Memory::allocate((length + 1) * sizeof(Char8)));
6442  if (!scratch) return 0;
6443  }
6444 
6445  // copy string to zero-terminated buffer and perform lookup
6446  memcpy(scratch, begin, length * sizeof(Char8));
6447  scratch[length] = 0;
6448 
6449  XPathVariable* Result = set->Get(scratch);
6450 
6451  // free dummy buffer
6452  if (scratch != buffer) Memory::deallocate(scratch);
6453 
6454  return Result;
6455  }
6456 PUGI__NS_END
6457 
6458 // Internal node set class
6459 PUGI__NS_BEGIN
6460  PUGI__FN XPathNodeSet::CollectionType XPathSort(XPathNode* begin, XPathNode* end, XPathNodeSet::CollectionType Type, bool rev)
6461  {
6462  XPathNodeSet::CollectionType order = rev ? XPathNodeSet::TypeSortedReverse : XPathNodeSet::TypeSorted;
6463 
6464  if (Type == XPathNodeSet::TypeUnsorted)
6465  {
6466  sort(begin, end, document_order_comparator());
6467 
6468  Type = XPathNodeSet::TypeSorted;
6469  }
6470 
6471  if (Type != order) reverse(begin, end);
6472 
6473  return order;
6474  }
6475 
6476  PUGI__FN XPathNode XPathFirst(const XPathNode* begin, const XPathNode* end, XPathNodeSet::CollectionType Type)
6477  {
6478  if (begin == end) return XPathNode();
6479 
6480  switch (Type)
6481  {
6482  case XPathNodeSet::TypeSorted:
6483  return *begin;
6484 
6485  case XPathNodeSet::TypeSortedReverse:
6486  return *(end - 1);
6487 
6488  case XPathNodeSet::TypeUnsorted:
6489  return *min_element(begin, end, document_order_comparator());
6490 
6491  default:
6492  assert(!"Invalid node set Type");
6493  return XPathNode();
6494  }
6495  }
6496 
6497  class XPathNodeSet_raw
6498  {
6499  XPathNodeSet::CollectionType _type;
6500 
6501  XPathNode* _begin;
6502  XPathNode* _end;
6503  XPathNode* _eos;
6504 
6505  public:
6506  XPathNodeSet_raw(): _type(XPathNodeSet::TypeUnsorted), _begin(0), _end(0), _eos(0)
6507  {
6508  }
6509 
6510  XPathNode* begin() const
6511  {
6512  return _begin;
6513  }
6514 
6515  XPathNode* end() const
6516  {
6517  return _end;
6518  }
6519 
6520  bool Empty() const
6521  {
6522  return _begin == _end;
6523  }
6524 
6525  size_t size() const
6526  {
6527  return static_cast<size_t>(_end - _begin);
6528  }
6529 
6530  XPathNode first() const
6531  {
6532  return XPathFirst(_begin, _end, _type);
6533  }
6534 
6535  void push_back(const XPathNode& node, XPathAllocator* alloc)
6536  {
6537  if (_end == _eos)
6538  {
6539  size_t capacity = static_cast<size_t>(_eos - _begin);
6540 
6541  // get new capacity (1.5x rule)
6542  size_t new_capacity = capacity + capacity / 2 + 1;
6543 
6544  // reallocate the old array or allocate a new one
6545  XPathNode* data = static_cast<XPathNode*>(alloc->reallocate(_begin, capacity * sizeof(XPathNode), new_capacity * sizeof(XPathNode)));
6546  assert(data);
6547 
6548  // finalize
6549  _begin = data;
6550  _end = data + capacity;
6551  _eos = data + new_capacity;
6552  }
6553 
6554  *_end++ = node;
6555  }
6556 
6557  void append(const XPathNode* begin_, const XPathNode* end_, XPathAllocator* alloc)
6558  {
6559  size_t size_ = static_cast<size_t>(_end - _begin);
6560  size_t capacity = static_cast<size_t>(_eos - _begin);
6561  size_t count = static_cast<size_t>(end_ - begin_);
6562 
6563  if (size_ + count > capacity)
6564  {
6565  // reallocate the old array or allocate a new one
6566  XPathNode* data = static_cast<XPathNode*>(alloc->reallocate(_begin, capacity * sizeof(XPathNode), (size_ + count) * sizeof(XPathNode)));
6567  assert(data);
6568 
6569  // finalize
6570  _begin = data;
6571  _end = data + size_;
6572  _eos = data + size_ + count;
6573  }
6574 
6575  memcpy(_end, begin_, count * sizeof(XPathNode));
6576  _end += count;
6577  }
6578 
6579  void sort_do()
6580  {
6581  _type = XPathSort(_begin, _end, _type, false);
6582  }
6583 
6584  void truncate(XPathNode* pos)
6585  {
6586  assert(_begin <= pos && pos <= _end);
6587 
6588  _end = pos;
6589  }
6590 
6591  void RemoveDuplicates()
6592  {
6593  if (_type == XPathNodeSet::TypeUnsorted)
6594  sort(_begin, _end, duplicate_comparator());
6595 
6596  _end = unique(_begin, _end);
6597  }
6598 
6599  XPathNodeSet::CollectionType Type() const
6600  {
6601  return _type;
6602  }
6603 
6604  void SetType(XPathNodeSet::CollectionType Value)
6605  {
6606  _type = Value;
6607  }
6608  };
6609 PUGI__NS_END
6610 
6611 PUGI__NS_BEGIN
6612  struct XPathContext
6613  {
6614  XPathNode n;
6615  size_t position, size;
6616 
6617  XPathContext(const XPathNode& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
6618  {
6619  }
6620  };
6621 
6622  enum lexeme_t
6623  {
6624  lex_none = 0,
6625  lex_equal,
6626  lex_not_equal,
6627  lex_less,
6628  lex_greater,
6629  lex_less_or_equal,
6630  lex_greater_or_equal,
6631  lex_plus,
6632  lex_minus,
6633  lex_multiply,
6634  lex_union,
6635  lex_var_ref,
6636  lex_open_brace,
6637  lex_close_brace,
6638  lex_quoted_string,
6639  lex_number,
6640  lex_slash,
6641  lex_double_slash,
6642  lex_open_square_brace,
6643  lex_close_square_brace,
6644  lex_string,
6645  lex_comma,
6646  lex_axis_attribute,
6647  lex_dot,
6648  lex_double_dot,
6649  lex_double_colon,
6650  lex_eof
6651  };
6652 
6653  struct XPathLexerString
6654  {
6655  const Char8* begin;
6656  const Char8* end;
6657 
6658  XPathLexerString(): begin(0), end(0)
6659  {
6660  }
6661 
6662  bool operator==(const Char8* other) const
6663  {
6664  size_t length = static_cast<size_t>(end - begin);
6665 
6666  return strequalrange(other, begin, length);
6667  }
6668  };
6669 
6670  class XPathLexer
6671  {
6672  const Char8* _cur;
6673  const Char8* _cur_lexeme_pos;
6674  XPathLexerString _cur_lexeme_contents;
6675 
6676  lexeme_t _cur_lexeme;
6677 
6678  public:
6679  explicit XPathLexer(const Char8* query): _cur(query)
6680  {
6681  next();
6682  }
6683 
6684  const Char8* state() const
6685  {
6686  return _cur;
6687  }
6688 
6689  void next()
6690  {
6691  const Char8* cur = _cur;
6692 
6693  while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
6694 
6695  // Save lexeme position for error reporting
6696  _cur_lexeme_pos = cur;
6697 
6698  switch (*cur)
6699  {
6700  case 0:
6701  _cur_lexeme = lex_eof;
6702  break;
6703 
6704  case '>':
6705  if (*(cur+1) == '=')
6706  {
6707  cur += 2;
6708  _cur_lexeme = lex_greater_or_equal;
6709  }
6710  else
6711  {
6712  cur += 1;
6713  _cur_lexeme = lex_greater;
6714  }
6715  break;
6716 
6717  case '<':
6718  if (*(cur+1) == '=')
6719  {
6720  cur += 2;
6721  _cur_lexeme = lex_less_or_equal;
6722  }
6723  else
6724  {
6725  cur += 1;
6726  _cur_lexeme = lex_less;
6727  }
6728  break;
6729 
6730  case '!':
6731  if (*(cur+1) == '=')
6732  {
6733  cur += 2;
6734  _cur_lexeme = lex_not_equal;
6735  }
6736  else
6737  {
6738  _cur_lexeme = lex_none;
6739  }
6740  break;
6741 
6742  case '=':
6743  cur += 1;
6744  _cur_lexeme = lex_equal;
6745 
6746  break;
6747 
6748  case '+':
6749  cur += 1;
6750  _cur_lexeme = lex_plus;
6751 
6752  break;
6753 
6754  case '-':
6755  cur += 1;
6756  _cur_lexeme = lex_minus;
6757 
6758  break;
6759 
6760  case '*':
6761  cur += 1;
6762  _cur_lexeme = lex_multiply;
6763 
6764  break;
6765 
6766  case '|':
6767  cur += 1;
6768  _cur_lexeme = lex_union;
6769 
6770  break;
6771 
6772  case '$':
6773  cur += 1;
6774 
6775  if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
6776  {
6777  _cur_lexeme_contents.begin = cur;
6778 
6779  while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
6780 
6781  if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
6782  {
6783  cur++; // :
6784 
6785  while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
6786  }
6787 
6788  _cur_lexeme_contents.end = cur;
6789 
6790  _cur_lexeme = lex_var_ref;
6791  }
6792  else
6793  {
6794  _cur_lexeme = lex_none;
6795  }
6796 
6797  break;
6798 
6799  case '(':
6800  cur += 1;
6801  _cur_lexeme = lex_open_brace;
6802 
6803  break;
6804 
6805  case ')':
6806  cur += 1;
6807  _cur_lexeme = lex_close_brace;
6808 
6809  break;
6810 
6811  case '[':
6812  cur += 1;
6813  _cur_lexeme = lex_open_square_brace;
6814 
6815  break;
6816 
6817  case ']':
6818  cur += 1;
6819  _cur_lexeme = lex_close_square_brace;
6820 
6821  break;
6822 
6823  case ',':
6824  cur += 1;
6825  _cur_lexeme = lex_comma;
6826 
6827  break;
6828 
6829  case '/':
6830  if (*(cur+1) == '/')
6831  {
6832  cur += 2;
6833  _cur_lexeme = lex_double_slash;
6834  }
6835  else
6836  {
6837  cur += 1;
6838  _cur_lexeme = lex_slash;
6839  }
6840  break;
6841 
6842  case '.':
6843  if (*(cur+1) == '.')
6844  {
6845  cur += 2;
6846  _cur_lexeme = lex_double_dot;
6847  }
6848  else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
6849  {
6850  _cur_lexeme_contents.begin = cur; // .
6851 
6852  ++cur;
6853 
6854  while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
6855 
6856  _cur_lexeme_contents.end = cur;
6857 
6858  _cur_lexeme = lex_number;
6859  }
6860  else
6861  {
6862  cur += 1;
6863  _cur_lexeme = lex_dot;
6864  }
6865  break;
6866 
6867  case '@':
6868  cur += 1;
6869  _cur_lexeme = lex_axis_attribute;
6870 
6871  break;
6872 
6873  case '"':
6874  case '\'':
6875  {
6876  Char8 terminator = *cur;
6877 
6878  ++cur;
6879 
6880  _cur_lexeme_contents.begin = cur;
6881  while (*cur && *cur != terminator) cur++;
6882  _cur_lexeme_contents.end = cur;
6883 
6884  if (!*cur)
6885  _cur_lexeme = lex_none;
6886  else
6887  {
6888  cur += 1;
6889  _cur_lexeme = lex_quoted_string;
6890  }
6891 
6892  break;
6893  }
6894 
6895  case ':':
6896  if (*(cur+1) == ':')
6897  {
6898  cur += 2;
6899  _cur_lexeme = lex_double_colon;
6900  }
6901  else
6902  {
6903  _cur_lexeme = lex_none;
6904  }
6905  break;
6906 
6907  default:
6908  if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
6909  {
6910  _cur_lexeme_contents.begin = cur;
6911 
6912  while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
6913 
6914  if (*cur == '.')
6915  {
6916  cur++;
6917 
6918  while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
6919  }
6920 
6921  _cur_lexeme_contents.end = cur;
6922 
6923  _cur_lexeme = lex_number;
6924  }
6925  else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
6926  {
6927  _cur_lexeme_contents.begin = cur;
6928 
6929  while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
6930 
6931  if (cur[0] == ':')
6932  {
6933  if (cur[1] == '*') // namespace test ncName:*
6934  {
6935  cur += 2; // :*
6936  }
6937  else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
6938  {
6939  cur++; // :
6940 
6941  while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
6942  }
6943  }
6944 
6945  _cur_lexeme_contents.end = cur;
6946 
6947  _cur_lexeme = lex_string;
6948  }
6949  else
6950  {
6951  _cur_lexeme = lex_none;
6952  }
6953  }
6954 
6955  _cur = cur;
6956  }
6957 
6958  lexeme_t current() const
6959  {
6960  return _cur_lexeme;
6961  }
6962 
6963  const Char8* current_pos() const
6964  {
6965  return _cur_lexeme_pos;
6966  }
6967 
6968  const XPathLexerString& contents() const
6969  {
6970  assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
6971 
6972  return _cur_lexeme_contents;
6973  }
6974  };
6975 
6976  enum ast_type_t
6977  {
6978  ast_op_or, // left or right
6979  ast_op_and, // left and right
6980  ast_op_equal, // left = right
6981  ast_op_not_equal, // left != right
6982  ast_op_less, // left < right
6983  ast_op_greater, // left > right
6984  ast_op_less_or_equal, // left <= right
6985  ast_op_greater_or_equal, // left >= right
6986  ast_op_add, // left + right
6987  ast_op_subtract, // left - right
6988  ast_op_multiply, // left * right
6989  ast_op_divide, // left / right
6990  ast_op_mod, // left % right
6991  ast_op_negate, // left - right
6992  ast_op_union, // left | right
6993  ast_predicate, // apply predicate to set; next points to next predicate
6994  ast_filter, // select * from left where right
6995  ast_filter_posinv, // select * from left where right; proximity position invariant
6996  ast_string_constant, // string constant
6997  ast_number_constant, // number constant
6998  ast_variable, // variable
6999  ast_func_last, // last()
7000  ast_func_position, // position()
7001  ast_func_count, // count(left)
7002  ast_func_id, // id(left)
7003  ast_func_local_Name_0, // local-Name()
7004  ast_func_local_Name_1, // local-Name(left)
7005  ast_func_namespace_uri_0, // namespace-uri()
7006  ast_func_namespace_uri_1, // namespace-uri(left)
7007  ast_func_Name_0, // Name()
7008  ast_func_Name_1, // Name(left)
7009  ast_func_string_0, // string()
7010  ast_func_string_1, // string(left)
7011  ast_func_concat, // concat(left, right, siblings)
7012  ast_func_starts_with, // starts_with(left, right)
7013  ast_func_contains, // contains(left, right)
7014  ast_func_substring_before, // substring-before(left, right)
7015  ast_func_substring_after, // substring-after(left, right)
7016  ast_func_substring_2, // substring(left, right)
7017  ast_func_substring_3, // substring(left, right, third)
7018  ast_func_string_length_0, // string-length()
7019  ast_func_string_length_1, // string-length(left)
7020  ast_func_normalize_space_0, // normalize-space()
7021  ast_func_normalize_space_1, // normalize-space(left)
7022  ast_func_translate, // translate(left, right, third)
7023  ast_func_boolean, // boolean(left)
7024  ast_func_not, // not(left)
7025  ast_func_true, // true()
7026  ast_func_false, // false()
7027  ast_func_lang, // lang(left)
7028  ast_func_number_0, // number()
7029  ast_func_number_1, // number(left)
7030  ast_func_sum, // sum(left)
7031  ast_func_floor, // floor(left)
7032  ast_func_ceiling, // ceiling(left)
7033  ast_func_round, // round(left)
7034  ast_step, // process set left with step
7035  ast_step_GetRoot // select GetRoot node
7036  };
7037 
7038  enum axis_t
7039  {
7040  axis_ancestor,
7041  axis_ancestor_or_self,
7042  axis_attribute,
7043  axis_GetChild,
7044  axis_descendant,
7045  axis_descendant_or_self,
7046  axis_following,
7047  axis_following_sibling,
7048  axis_namespace,
7049  axis_GetParent,
7050  axis_preceding,
7051  axis_preceding_sibling,
7052  axis_self
7053  };
7054 
7055  enum nodetest_t
7056  {
7057  nodetest_none,
7058  nodetest_Name,
7059  nodetest_type_node,
7060  nodetest_type_comment,
7061  nodetest_type_pi,
7062  nodetest_type_text,
7063  nodetest_pi,
7064  nodetest_all,
7065  nodetest_all_in_namespace
7066  };
7067 
7068  template <axis_t N> struct axis_to_type
7069  {
7070  static const axis_t axis;
7071  };
7072 
7073  template <axis_t N> const axis_t axis_to_type<N>::axis = N;
7074 
7075  class XPathAstNode
7076  {
7077  private:
7078  // node type
7079  char _type;
7080  char _retType;
7081 
7082  // for ast_step / ast_predicate
7083  char _axis;
7084  char _test;
7085 
7086  // tree node structure
7087  XPathAstNode* _left;
7088  XPathAstNode* _right;
7089  XPathAstNode* _next;
7090 
7091  union
7092  {
7093  // Value for ast_string_constant
7094  const Char8* string;
7095  // Value for ast_number_constant
7096  double number;
7097  // variable for ast_variable
7098  XPathVariable* variable;
7099  // node test for ast_step (node Name/namespace/node Type/pi target)
7100  const Char8* nodetest;
7101  } _data;
7102 
7103  XPathAstNode(const XPathAstNode&);
7104  XPathAstNode& operator=(const XPathAstNode&);
7105 
7106  template <class Comp> static bool compare_eq(XPathAstNode* lhs, XPathAstNode* rhs, const XPathContext& c, const XPathStack& stack, const Comp& comp)
7107  {
7108  XPathValueType lt = lhs->retType(), rt = rhs->retType();
7109 
7110  if (lt != XPathTypeNodeSet && rt != XPathTypeNodeSet)
7111  {
7112  if (lt == XPathTypeBoolean || rt == XPathTypeBoolean)
7113  return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
7114  else if (lt == XPathTypeNumber || rt == XPathTypeNumber)
7115  return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
7116  else if (lt == XPathTypeString || rt == XPathTypeString)
7117  {
7118  XPathAllocatorCapture cr(stack.Result);
7119 
7120  XPathString ls = lhs->eval_string(c, stack);
7121  XPathString rs = rhs->eval_string(c, stack);
7122 
7123  return comp(ls, rs);
7124  }
7125  }
7126  else if (lt == XPathTypeNodeSet && rt == XPathTypeNodeSet)
7127  {
7128  XPathAllocatorCapture cr(stack.Result);
7129 
7130  XPathNodeSet_raw ls = lhs->eval_NodeSet(c, stack);
7131  XPathNodeSet_raw rs = rhs->eval_NodeSet(c, stack);
7132 
7133  for (const XPathNode* li = ls.begin(); li != ls.end(); ++li)
7134  for (const XPathNode* ri = rs.begin(); ri != rs.end(); ++ri)
7135  {
7136  XPathAllocatorCapture cri(stack.Result);
7137 
7138  if (comp(string_Value(*li, stack.Result), string_Value(*ri, stack.Result)))
7139  return true;
7140  }
7141 
7142  return false;
7143  }
7144  else
7145  {
7146  if (lt == XPathTypeNodeSet)
7147  {
7148  swap(lhs, rhs);
7149  swap(lt, rt);
7150  }
7151 
7152  if (lt == XPathTypeBoolean)
7153  return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
7154  else if (lt == XPathTypeNumber)
7155  {
7156  XPathAllocatorCapture cr(stack.Result);
7157 
7158  double l = lhs->eval_number(c, stack);
7159  XPathNodeSet_raw rs = rhs->eval_NodeSet(c, stack);
7160 
7161  for (const XPathNode* ri = rs.begin(); ri != rs.end(); ++ri)
7162  {
7163  XPathAllocatorCapture cri(stack.Result);
7164 
7165  if (comp(l, convert_Stringo_number(string_Value(*ri, stack.Result).c_str())))
7166  return true;
7167  }
7168 
7169  return false;
7170  }
7171  else if (lt == XPathTypeString)
7172  {
7173  XPathAllocatorCapture cr(stack.Result);
7174 
7175  XPathString l = lhs->eval_string(c, stack);
7176  XPathNodeSet_raw rs = rhs->eval_NodeSet(c, stack);
7177 
7178  for (const XPathNode* ri = rs.begin(); ri != rs.end(); ++ri)
7179  {
7180  XPathAllocatorCapture cri(stack.Result);
7181 
7182  if (comp(l, string_Value(*ri, stack.Result)))
7183  return true;
7184  }
7185 
7186  return false;
7187  }
7188  }
7189 
7190  assert(!"Wrong Types");
7191  return false;
7192  }
7193 
7194  template <class Comp> static bool compare_rel(XPathAstNode* lhs, XPathAstNode* rhs, const XPathContext& c, const XPathStack& stack, const Comp& comp)
7195  {
7196  XPathValueType lt = lhs->retType(), rt = rhs->retType();
7197 
7198  if (lt != XPathTypeNodeSet && rt != XPathTypeNodeSet)
7199  return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
7200  else if (lt == XPathTypeNodeSet && rt == XPathTypeNodeSet)
7201  {
7202  XPathAllocatorCapture cr(stack.Result);
7203 
7204  XPathNodeSet_raw ls = lhs->eval_NodeSet(c, stack);
7205  XPathNodeSet_raw rs = rhs->eval_NodeSet(c, stack);
7206 
7207  for (const XPathNode* li = ls.begin(); li != ls.end(); ++li)
7208  {
7209  XPathAllocatorCapture cri(stack.Result);
7210 
7211  double l = convert_Stringo_number(string_Value(*li, stack.Result).c_str());
7212 
7213  for (const XPathNode* ri = rs.begin(); ri != rs.end(); ++ri)
7214  {
7215  XPathAllocatorCapture crii(stack.Result);
7216 
7217  if (comp(l, convert_Stringo_number(string_Value(*ri, stack.Result).c_str())))
7218  return true;
7219  }
7220  }
7221 
7222  return false;
7223  }
7224  else if (lt != XPathTypeNodeSet && rt == XPathTypeNodeSet)
7225  {
7226  XPathAllocatorCapture cr(stack.Result);
7227 
7228  double l = lhs->eval_number(c, stack);
7229  XPathNodeSet_raw rs = rhs->eval_NodeSet(c, stack);
7230 
7231  for (const XPathNode* ri = rs.begin(); ri != rs.end(); ++ri)
7232  {
7233  XPathAllocatorCapture cri(stack.Result);
7234 
7235  if (comp(l, convert_Stringo_number(string_Value(*ri, stack.Result).c_str())))
7236  return true;
7237  }
7238 
7239  return false;
7240  }
7241  else if (lt == XPathTypeNodeSet && rt != XPathTypeNodeSet)
7242  {
7243  XPathAllocatorCapture cr(stack.Result);
7244 
7245  XPathNodeSet_raw ls = lhs->eval_NodeSet(c, stack);
7246  double r = rhs->eval_number(c, stack);
7247 
7248  for (const XPathNode* li = ls.begin(); li != ls.end(); ++li)
7249  {
7250  XPathAllocatorCapture cri(stack.Result);
7251 
7252  if (comp(convert_Stringo_number(string_Value(*li, stack.Result).c_str()), r))
7253  return true;
7254  }
7255 
7256  return false;
7257  }
7258  else
7259  {
7260  assert(!"Wrong Types");
7261  return false;
7262  }
7263  }
7264 
7265  void apply_predicate(XPathNodeSet_raw& ns, size_t first, XPathAstNode* expr, const XPathStack& stack)
7266  {
7267  assert(ns.size() >= first);
7268 
7269  size_t i = 1;
7270  size_t size = ns.size() - first;
7271 
7272  XPathNode* last = ns.begin() + first;
7273 
7274  // RemoveIf... or well, sort of
7275  for (XPathNode* it = last; it != ns.end(); ++it, ++i)
7276  {
7277  XPathContext c(*it, i, size);
7278 
7279  if (expr->retType() == XPathTypeNumber)
7280  {
7281  if (expr->eval_number(c, stack) == i)
7282  *last++ = *it;
7283  }
7284  else if (expr->eval_boolean(c, stack))
7285  *last++ = *it;
7286  }
7287 
7288  ns.truncate(last);
7289  }
7290 
7291  void apply_predicates(XPathNodeSet_raw& ns, size_t first, const XPathStack& stack)
7292  {
7293  if (ns.size() == first) return;
7294 
7295  for (XPathAstNode* pred = _right; pred; pred = pred->_next)
7296  {
7297  apply_predicate(ns, first, pred->_left, stack);
7298  }
7299  }
7300 
7301  void step_push(XPathNodeSet_raw& ns, const Attribute& a, const Node& GetParent, XPathAllocator* alloc)
7302  {
7303  if (!a) return;
7304 
7305  const Char8* Name = a.Name();
7306 
7307  // There are no GetAttribute nodes corresponding to attributes that declare namespaces
7308  // That is, "xmlns:..." or "xmlns"
7309  if (starts_with(Name, "xmlns") && (Name[5] == 0 || Name[5] == ':')) return;
7310 
7311  switch (_test)
7312  {
7313  case nodetest_Name:
7314  if (strequal(Name, _data.nodetest)) ns.push_back(XPathNode(a, GetParent), alloc);
7315  break;
7316 
7317  case nodetest_type_node:
7318  case nodetest_all:
7319  ns.push_back(XPathNode(a, GetParent), alloc);
7320  break;
7321 
7322  case nodetest_all_in_namespace:
7323  if (starts_with(Name, _data.nodetest))
7324  ns.push_back(XPathNode(a, GetParent), alloc);
7325  break;
7326 
7327  default:
7328  ;
7329  }
7330  }
7331 
7332  void step_push(XPathNodeSet_raw& ns, const Node& n, XPathAllocator* alloc)
7333  {
7334  if (!n) return;
7335 
7336  switch (_test)
7337  {
7338  case nodetest_Name:
7339  if (n.Type() == NodeElement && strequal(n.Name(), _data.nodetest)) ns.push_back(n, alloc);
7340  break;
7341 
7342  case nodetest_type_node:
7343  ns.push_back(n, alloc);
7344  break;
7345 
7346  case nodetest_type_comment:
7347  if (n.Type() == NodeComment)
7348  ns.push_back(n, alloc);
7349  break;
7350 
7351  case nodetest_type_text:
7352  if (n.Type() == NodePcdata || n.Type() == NodeCdata)
7353  ns.push_back(n, alloc);
7354  break;
7355 
7356  case nodetest_type_pi:
7357  if (n.Type() == NodePi)
7358  ns.push_back(n, alloc);
7359  break;
7360 
7361  case nodetest_pi:
7362  if (n.Type() == NodePi && strequal(n.Name(), _data.nodetest))
7363  ns.push_back(n, alloc);
7364  break;
7365 
7366  case nodetest_all:
7367  if (n.Type() == NodeElement)
7368  ns.push_back(n, alloc);
7369  break;
7370 
7371  case nodetest_all_in_namespace:
7372  if (n.Type() == NodeElement && starts_with(n.Name(), _data.nodetest))
7373  ns.push_back(n, alloc);
7374  break;
7375 
7376  default:
7377  assert(!"Unknown axis");
7378  }
7379  }
7380 
7381  template <class T> void step_fill(XPathNodeSet_raw& ns, const Node& n, XPathAllocator* alloc, T)
7382  {
7383  const axis_t axis = T::axis;
7384 
7385  switch (axis)
7386  {
7387  case axis_attribute:
7388  {
7389  for (Attribute a = n.GetFirstAttribute(); a; a = a.GetNextAttribute())
7390  step_push(ns, a, n, alloc);
7391 
7392  break;
7393  }
7394 
7395  case axis_GetChild:
7396  {
7397  for (Node c = n.GetFirstChild(); c; c = c.GetNextSibling())
7398  step_push(ns, c, alloc);
7399 
7400  break;
7401  }
7402 
7403  case axis_descendant:
7404  case axis_descendant_or_self:
7405  {
7406  if (axis == axis_descendant_or_self)
7407  step_push(ns, n, alloc);
7408 
7409  Node cur = n.GetFirstChild();
7410 
7411  while (cur && cur != n)
7412  {
7413  step_push(ns, cur, alloc);
7414 
7415  if (cur.GetFirstChild())
7416  cur = cur.GetFirstChild();
7417  else if (cur.GetNextSibling())
7418  cur = cur.GetNextSibling();
7419  else
7420  {
7421  while (!cur.GetNextSibling() && cur != n)
7422  cur = cur.GetParent();
7423 
7424  if (cur != n) cur = cur.GetNextSibling();
7425  }
7426  }
7427 
7428  break;
7429  }
7430 
7431  case axis_following_sibling:
7432  {
7433  for (Node c = n.GetNextSibling(); c; c = c.GetNextSibling())
7434  step_push(ns, c, alloc);
7435 
7436  break;
7437  }
7438 
7439  case axis_preceding_sibling:
7440  {
7441  for (Node c = n.GetPreviousSibling(); c; c = c.GetPreviousSibling())
7442  step_push(ns, c, alloc);
7443 
7444  break;
7445  }
7446 
7447  case axis_following:
7448  {
7449  Node cur = n;
7450 
7451  // exit from this node so that we don't include descendants
7452  while (cur && !cur.GetNextSibling()) cur = cur.GetParent();
7453  cur = cur.GetNextSibling();
7454 
7455  for (;;)
7456  {
7457  step_push(ns, cur, alloc);
7458 
7459  if (cur.GetFirstChild())
7460  cur = cur.GetFirstChild();
7461  else if (cur.GetNextSibling())
7462  cur = cur.GetNextSibling();
7463  else
7464  {
7465  while (cur && !cur.GetNextSibling()) cur = cur.GetParent();
7466  cur = cur.GetNextSibling();
7467 
7468  if (!cur) break;
7469  }
7470  }
7471 
7472  break;
7473  }
7474 
7475  case axis_preceding:
7476  {
7477  Node cur = n;
7478 
7479  while (cur && !cur.GetPreviousSibling()) cur = cur.GetParent();
7480  cur = cur.GetPreviousSibling();
7481 
7482  for (;;)
7483  {
7484  if (cur.GetLastChild())
7485  cur = cur.GetLastChild();
7486  else
7487  {
7488  // leaf node, can't be ancestor
7489  step_push(ns, cur, alloc);
7490 
7491  if (cur.GetPreviousSibling())
7492  cur = cur.GetPreviousSibling();
7493  else
7494  {
7495  do
7496  {
7497  cur = cur.GetParent();
7498  if (!cur) break;
7499 
7500  if (!NodeIs_ancestor(cur, n)) step_push(ns, cur, alloc);
7501  }
7502  while (!cur.GetPreviousSibling());
7503 
7504  cur = cur.GetPreviousSibling();
7505 
7506  if (!cur) break;
7507  }
7508  }
7509  }
7510 
7511  break;
7512  }
7513 
7514  case axis_ancestor:
7515  case axis_ancestor_or_self:
7516  {
7517  if (axis == axis_ancestor_or_self)
7518  step_push(ns, n, alloc);
7519 
7520  Node cur = n.GetParent();
7521 
7522  while (cur)
7523  {
7524  step_push(ns, cur, alloc);
7525 
7526  cur = cur.GetParent();
7527  }
7528 
7529  break;
7530  }
7531 
7532  case axis_self:
7533  {
7534  step_push(ns, n, alloc);
7535 
7536  break;
7537  }
7538 
7539  case axis_GetParent:
7540  {
7541  if (n.GetParent()) step_push(ns, n.GetParent(), alloc);
7542 
7543  break;
7544  }
7545 
7546  default:
7547  assert(!"Unimplemented axis");
7548  }
7549  }
7550 
7551  template <class T> void step_fill(XPathNodeSet_raw& ns, const Attribute& a, const Node& p, XPathAllocator* alloc, T v)
7552  {
7553  const axis_t axis = T::axis;
7554 
7555  switch (axis)
7556  {
7557  case axis_ancestor:
7558  case axis_ancestor_or_self:
7559  {
7560  if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node Type test
7561  step_push(ns, a, p, alloc);
7562 
7563  Node cur = p;
7564 
7565  while (cur)
7566  {
7567  step_push(ns, cur, alloc);
7568 
7569  cur = cur.GetParent();
7570  }
7571 
7572  break;
7573  }
7574 
7575  case axis_descendant_or_self:
7576  case axis_self:
7577  {
7578  if (_test == nodetest_type_node) // reject attributes based on principal node Type test
7579  step_push(ns, a, p, alloc);
7580 
7581  break;
7582  }
7583 
7584  case axis_following:
7585  {
7586  Node cur = p;
7587 
7588  for (;;)
7589  {
7590  if (cur.GetFirstChild())
7591  cur = cur.GetFirstChild();
7592  else if (cur.GetNextSibling())
7593  cur = cur.GetNextSibling();
7594  else
7595  {
7596  while (cur && !cur.GetNextSibling()) cur = cur.GetParent();
7597  cur = cur.GetNextSibling();
7598 
7599  if (!cur) break;
7600  }
7601 
7602  step_push(ns, cur, alloc);
7603  }
7604 
7605  break;
7606  }
7607 
7608  case axis_GetParent:
7609  {
7610  step_push(ns, p, alloc);
7611 
7612  break;
7613  }
7614 
7615  case axis_preceding:
7616  {
7617  // preceding:: axis does not include GetAttribute nodes and GetAttribute ancestors (they are the same as GetParent's ancestors), so we can reuse node preceding
7618  step_fill(ns, p, alloc, v);
7619  break;
7620  }
7621 
7622  default:
7623  assert(!"Unimplemented axis");
7624  }
7625  }
7626 
7627  template <class T> XPathNodeSet_raw step_do(const XPathContext& c, const XPathStack& stack, T v)
7628  {
7629  const axis_t axis = T::axis;
7630  bool attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_GetParent || axis == axis_preceding || axis == axis_self);
7631 
7632  XPathNodeSet_raw ns;
7633  ns.SetType((axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling) ? XPathNodeSet::TypeSortedReverse : XPathNodeSet::TypeSorted);
7634 
7635  if (_left)
7636  {
7637  XPathNodeSet_raw s = _left->eval_NodeSet(c, stack);
7638 
7639  // self axis preserves the original order
7640  if (axis == axis_self) ns.SetType(s.Type());
7641 
7642  for (const XPathNode* it = s.begin(); it != s.end(); ++it)
7643  {
7644  size_t size = ns.size();
7645 
7646  // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
7647  if (axis != axis_self && size != 0) ns.SetType(XPathNodeSet::TypeUnsorted);
7648 
7649  if (it->GetNode())
7650  step_fill(ns, it->GetNode(), stack.Result, v);
7651  else if (attributes)
7652  step_fill(ns, it->GetAttribute(), it->GetParent(), stack.Result, v);
7653 
7654  apply_predicates(ns, size, stack);
7655  }
7656  }
7657  else
7658  {
7659  if (c.n.GetNode())
7660  step_fill(ns, c.n.GetNode(), stack.Result, v);
7661  else if (attributes)
7662  step_fill(ns, c.n.GetAttribute(), c.n.GetParent(), stack.Result, v);
7663 
7664  apply_predicates(ns, 0, stack);
7665  }
7666 
7667  // GetChild, GetAttribute and self axes always generate unique set of nodes
7668  // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
7669  if (axis != axis_GetChild && axis != axis_attribute && axis != axis_self && ns.Type() == XPathNodeSet::TypeUnsorted)
7670  ns.RemoveDuplicates();
7671 
7672  return ns;
7673  }
7674 
7675  public:
7676  XPathAstNode(ast_type_t Type, XPathValueType retType_, const Char8* Value):
7677  _type(static_cast<char>(Type)), _retType(static_cast<char>(retType_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
7678  {
7679  assert(Type == ast_string_constant);
7680  _data.string = Value;
7681  }
7682 
7683  XPathAstNode(ast_type_t Type, XPathValueType retType_, double Value):
7684  _type(static_cast<char>(Type)), _retType(static_cast<char>(retType_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
7685  {
7686  assert(Type == ast_number_constant);
7687  _data.number = Value;
7688  }
7689 
7690  XPathAstNode(ast_type_t Type, XPathValueType retType_, XPathVariable* Value):
7691  _type(static_cast<char>(Type)), _retType(static_cast<char>(retType_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
7692  {
7693  assert(Type == ast_variable);
7694  _data.variable = Value;
7695  }
7696 
7697  XPathAstNode(ast_type_t Type, XPathValueType retType_, XPathAstNode* left = 0, XPathAstNode* right = 0):
7698  _type(static_cast<char>(Type)), _retType(static_cast<char>(retType_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
7699  {
7700  }
7701 
7702  XPathAstNode(ast_type_t Type, XPathAstNode* left, axis_t axis, nodetest_t test, const Char8* contents):
7703  _type(static_cast<char>(Type)), _retType(XPathTypeNodeSet), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
7704  {
7705  _data.nodetest = contents;
7706  }
7707 
7708  void SetNext(XPathAstNode* Value)
7709  {
7710  _next = Value;
7711  }
7712 
7713  void SetRight(XPathAstNode* Value)
7714  {
7715  _right = Value;
7716  }
7717 
7718  bool eval_boolean(const XPathContext& c, const XPathStack& stack)
7719  {
7720  switch (_type)
7721  {
7722  case ast_op_or:
7723  return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
7724 
7725  case ast_op_and:
7726  return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
7727 
7728  case ast_op_equal:
7729  return compare_eq(_left, _right, c, stack, equal_to());
7730 
7731  case ast_op_not_equal:
7732  return compare_eq(_left, _right, c, stack, not_equal_to());
7733 
7734  case ast_op_less:
7735  return compare_rel(_left, _right, c, stack, less());
7736 
7737  case ast_op_greater:
7738  return compare_rel(_right, _left, c, stack, less());
7739 
7740  case ast_op_less_or_equal:
7741  return compare_rel(_left, _right, c, stack, less_equal());
7742 
7743  case ast_op_greater_or_equal:
7744  return compare_rel(_right, _left, c, stack, less_equal());
7745 
7746  case ast_func_starts_with:
7747  {
7748  XPathAllocatorCapture cr(stack.Result);
7749 
7750  XPathString lr = _left->eval_string(c, stack);
7751  XPathString rr = _right->eval_string(c, stack);
7752 
7753  return starts_with(lr.c_str(), rr.c_str());
7754  }
7755 
7756  case ast_func_contains:
7757  {
7758  XPathAllocatorCapture cr(stack.Result);
7759 
7760  XPathString lr = _left->eval_string(c, stack);
7761  XPathString rr = _right->eval_string(c, stack);
7762 
7763  return FindSubstring(lr.c_str(), rr.c_str()) != 0;
7764  }
7765 
7766  case ast_func_boolean:
7767  return _left->eval_boolean(c, stack);
7768 
7769  case ast_func_not:
7770  return !_left->eval_boolean(c, stack);
7771 
7772  case ast_func_true:
7773  return true;
7774 
7775  case ast_func_false:
7776  return false;
7777 
7778  case ast_func_lang:
7779  {
7780  if (c.n.GetAttribute()) return false;
7781 
7782  XPathAllocatorCapture cr(stack.Result);
7783 
7784  XPathString lang = _left->eval_string(c, stack);
7785 
7786  for (Node n = c.n.GetNode(); n; n = n.GetParent())
7787  {
7788  Attribute a = n.GetAttribute("xml:lang");
7789 
7790  if (a)
7791  {
7792  const Char8* Value = a.Value();
7793 
7794  // strnicmp / strncasecmp is not portable
7795  for (const Char8* lit = lang.c_str(); *lit; ++lit)
7796  {
7797  if (tolower_ascii(*lit) != tolower_ascii(*Value)) return false;
7798  ++Value;
7799  }
7800 
7801  return *Value == 0 || *Value == '-';
7802  }
7803  }
7804 
7805  return false;
7806  }
7807 
7808  case ast_variable:
7809  {
7810  assert(_retType == _data.variable->Type());
7811 
7812  if (_retType == XPathTypeBoolean)
7813  return _data.variable->GetBoolean();
7814 
7815  // fallthrough to Type conversion
7816  }
7817 
7818  default:
7819  {
7820  switch (_retType)
7821  {
7822  case XPathTypeNumber:
7823  return convert_number_to_boolean(eval_number(c, stack));
7824 
7825  case XPathTypeString:
7826  {
7827  XPathAllocatorCapture cr(stack.Result);
7828 
7829  return !eval_string(c, stack).Empty();
7830  }
7831 
7832  case XPathTypeNodeSet:
7833  {
7834  XPathAllocatorCapture cr(stack.Result);
7835 
7836  return !eval_NodeSet(c, stack).Empty();
7837  }
7838 
7839  default:
7840  assert(!"Wrong expression for return Type boolean");
7841  return false;
7842  }
7843  }
7844  }
7845  }
7846 
7847  double eval_number(const XPathContext& c, const XPathStack& stack)
7848  {
7849  switch (_type)
7850  {
7851  case ast_op_add:
7852  return _left->eval_number(c, stack) + _right->eval_number(c, stack);
7853 
7854  case ast_op_subtract:
7855  return _left->eval_number(c, stack) - _right->eval_number(c, stack);
7856 
7857  case ast_op_multiply:
7858  return _left->eval_number(c, stack) * _right->eval_number(c, stack);
7859 
7860  case ast_op_divide:
7861  return _left->eval_number(c, stack) / _right->eval_number(c, stack);
7862 
7863  case ast_op_mod:
7864  return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
7865 
7866  case ast_op_negate:
7867  return -_left->eval_number(c, stack);
7868 
7869  case ast_number_constant:
7870  return _data.number;
7871 
7872  case ast_func_last:
7873  return static_cast<double>(c.size);
7874 
7875  case ast_func_position:
7876  return static_cast<double>(c.position);
7877 
7878  case ast_func_count:
7879  {
7880  XPathAllocatorCapture cr(stack.Result);
7881 
7882  return static_cast<double>(_left->eval_NodeSet(c, stack).size());
7883  }
7884 
7885  case ast_func_string_length_0:
7886  {
7887  XPathAllocatorCapture cr(stack.Result);
7888 
7889  return static_cast<double>(string_Value(c.n, stack.Result).length());
7890  }
7891 
7892  case ast_func_string_length_1:
7893  {
7894  XPathAllocatorCapture cr(stack.Result);
7895 
7896  return static_cast<double>(_left->eval_string(c, stack).length());
7897  }
7898 
7899  case ast_func_number_0:
7900  {
7901  XPathAllocatorCapture cr(stack.Result);
7902 
7903  return convert_Stringo_number(string_Value(c.n, stack.Result).c_str());
7904  }
7905 
7906  case ast_func_number_1:
7907  return _left->eval_number(c, stack);
7908 
7909  case ast_func_sum:
7910  {
7911  XPathAllocatorCapture cr(stack.Result);
7912 
7913  double r = 0;
7914 
7915  XPathNodeSet_raw ns = _left->eval_NodeSet(c, stack);
7916 
7917  for (const XPathNode* it = ns.begin(); it != ns.end(); ++it)
7918  {
7919  XPathAllocatorCapture cri(stack.Result);
7920 
7921  r += convert_Stringo_number(string_Value(*it, stack.Result).c_str());
7922  }
7923 
7924  return r;
7925  }
7926 
7927  case ast_func_floor:
7928  {
7929  double r = _left->eval_number(c, stack);
7930 
7931  return r == r ? floor(r) : r;
7932  }
7933 
7934  case ast_func_ceiling:
7935  {
7936  double r = _left->eval_number(c, stack);
7937 
7938  return r == r ? ceil(r) : r;
7939  }
7940 
7941  case ast_func_round:
7942  return round_nearest_nzero(_left->eval_number(c, stack));
7943 
7944  case ast_variable:
7945  {
7946  assert(_retType == _data.variable->Type());
7947 
7948  if (_retType == XPathTypeNumber)
7949  return _data.variable->GetNumber();
7950 
7951  // fallthrough to Type conversion
7952  }
7953 
7954  default:
7955  {
7956  switch (_retType)
7957  {
7958  case XPathTypeBoolean:
7959  return eval_boolean(c, stack) ? 1 : 0;
7960 
7961  case XPathTypeString:
7962  {
7963  XPathAllocatorCapture cr(stack.Result);
7964 
7965  return convert_Stringo_number(eval_string(c, stack).c_str());
7966  }
7967 
7968  case XPathTypeNodeSet:
7969  {
7970  XPathAllocatorCapture cr(stack.Result);
7971 
7972  return convert_Stringo_number(eval_string(c, stack).c_str());
7973  }
7974 
7975  default:
7976  assert(!"Wrong expression for return Type number");
7977  return 0;
7978  }
7979 
7980  }
7981  }
7982  }
7983 
7984  XPathString eval_string_concat(const XPathContext& c, const XPathStack& stack)
7985  {
7986  assert(_type == ast_func_concat);
7987 
7988  XPathAllocatorCapture ct(stack.temp);
7989 
7990  // count the string number
7991  size_t count = 1;
7992  for (XPathAstNode* nc = _right; nc; nc = nc->_next) count++;
7993 
7994  // gather all strings
7995  XPathString static_buffer[4];
7996  XPathString* buffer = static_buffer;
7997 
7998  // allocate on-heap for large concats
7999  if (count > sizeof(static_buffer) / sizeof(static_buffer[0]))
8000  {
8001  buffer = static_cast<XPathString*>(stack.temp->allocate(count * sizeof(XPathString)));
8002  assert(buffer);
8003  }
8004 
8005  // evaluate all strings to temporary stack
8006  XPathStack swapped_stack = {stack.temp, stack.Result};
8007 
8008  buffer[0] = _left->eval_string(c, swapped_stack);
8009 
8010  size_t pos = 1;
8011  for (XPathAstNode* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
8012  assert(pos == count);
8013 
8014  // get total length
8015  size_t length = 0;
8016  for (size_t i = 0; i < count; ++i) length += buffer[i].length();
8017 
8018  // create final string
8019  Char8* Result = static_cast<Char8*>(stack.Result->allocate((length + 1) * sizeof(Char8)));
8020  assert(Result);
8021 
8022  Char8* ri = Result;
8023 
8024  for (size_t j = 0; j < count; ++j)
8025  for (const Char8* bi = buffer[j].c_str(); *bi; ++bi)
8026  *ri++ = *bi;
8027 
8028  *ri = 0;
8029 
8030  return XPathString(Result, true);
8031  }
8032 
8033  XPathString eval_string(const XPathContext& c, const XPathStack& stack)
8034  {
8035  switch (_type)
8036  {
8037  case ast_string_constant:
8038  return XPathStringConst(_data.string);
8039 
8040  case ast_func_local_Name_0:
8041  {
8042  XPathNode na = c.n;
8043 
8044  return XPathStringConst(local_Name(na));
8045  }
8046 
8047  case ast_func_local_Name_1:
8048  {
8049  XPathAllocatorCapture cr(stack.Result);
8050 
8051  XPathNodeSet_raw ns = _left->eval_NodeSet(c, stack);
8052  XPathNode na = ns.first();
8053 
8054  return XPathStringConst(local_Name(na));
8055  }
8056 
8057  case ast_func_Name_0:
8058  {
8059  XPathNode na = c.n;
8060 
8061  return XPathStringConst(qualified_Name(na));
8062  }
8063 
8064  case ast_func_Name_1:
8065  {
8066  XPathAllocatorCapture cr(stack.Result);
8067 
8068  XPathNodeSet_raw ns = _left->eval_NodeSet(c, stack);
8069  XPathNode na = ns.first();
8070 
8071  return XPathStringConst(qualified_Name(na));
8072  }
8073 
8074  case ast_func_namespace_uri_0:
8075  {
8076  XPathNode na = c.n;
8077 
8078  return XPathStringConst(namespace_uri(na));
8079  }
8080 
8081  case ast_func_namespace_uri_1:
8082  {
8083  XPathAllocatorCapture cr(stack.Result);
8084 
8085  XPathNodeSet_raw ns = _left->eval_NodeSet(c, stack);
8086  XPathNode na = ns.first();
8087 
8088  return XPathStringConst(namespace_uri(na));
8089  }
8090 
8091  case ast_func_string_0:
8092  return string_Value(c.n, stack.Result);
8093 
8094  case ast_func_string_1:
8095  return _left->eval_string(c, stack);
8096 
8097  case ast_func_concat:
8098  return eval_string_concat(c, stack);
8099 
8100  case ast_func_substring_before:
8101  {
8102  XPathAllocatorCapture cr(stack.temp);
8103 
8104  XPathStack swapped_stack = {stack.temp, stack.Result};
8105 
8106  XPathString s = _left->eval_string(c, swapped_stack);
8107  XPathString p = _right->eval_string(c, swapped_stack);
8108 
8109  const Char8* pos = FindSubstring(s.c_str(), p.c_str());
8110 
8111  return pos ? XPathString(s.c_str(), pos, stack.Result) : XPathString();
8112  }
8113 
8114  case ast_func_substring_after:
8115  {
8116  XPathAllocatorCapture cr(stack.temp);
8117 
8118  XPathStack swapped_stack = {stack.temp, stack.Result};
8119 
8120  XPathString s = _left->eval_string(c, swapped_stack);
8121  XPathString p = _right->eval_string(c, swapped_stack);
8122 
8123  const Char8* pos = FindSubstring(s.c_str(), p.c_str());
8124  if (!pos) return XPathString();
8125 
8126  const Char8* Result = pos + p.length();
8127 
8128  return s.uses_heap() ? XPathString(Result, stack.Result) : XPathStringConst(Result);
8129  }
8130 
8131  case ast_func_substring_2:
8132  {
8133  XPathAllocatorCapture cr(stack.temp);
8134 
8135  XPathStack swapped_stack = {stack.temp, stack.Result};
8136 
8137  XPathString s = _left->eval_string(c, swapped_stack);
8138  size_t s_length = s.length();
8139 
8140  double first = round_nearest(_right->eval_number(c, stack));
8141 
8142  if (is_nan(first)) return XPathString(); // NaN
8143  else if (first >= s_length + 1) return XPathString();
8144 
8145  size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
8146  assert(1 <= pos && pos <= s_length + 1);
8147 
8148  const Char8* rbegin = s.c_str() + (pos - 1);
8149 
8150  return s.uses_heap() ? XPathString(rbegin, stack.Result) : XPathStringConst(rbegin);
8151  }
8152 
8153  case ast_func_substring_3:
8154  {
8155  XPathAllocatorCapture cr(stack.temp);
8156 
8157  XPathStack swapped_stack = {stack.temp, stack.Result};
8158 
8159  XPathString s = _left->eval_string(c, swapped_stack);
8160  size_t s_length = s.length();
8161 
8162  double first = round_nearest(_right->eval_number(c, stack));
8163  double last = first + round_nearest(_right->_next->eval_number(c, stack));
8164 
8165  if (is_nan(first) || is_nan(last)) return XPathString();
8166  else if (first >= s_length + 1) return XPathString();
8167  else if (first >= last) return XPathString();
8168  else if (last < 1) return XPathString();
8169 
8170  size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
8171  size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
8172 
8173  assert(1 <= pos && pos <= end && end <= s_length + 1);
8174  const Char8* rbegin = s.c_str() + (pos - 1);
8175  const Char8* rend = s.c_str() + (end - 1);
8176 
8177  return (end == s_length + 1 && !s.uses_heap()) ? XPathStringConst(rbegin) : XPathString(rbegin, rend, stack.Result);
8178  }
8179 
8180  case ast_func_normalize_space_0:
8181  {
8182  XPathString s = string_Value(c.n, stack.Result);
8183 
8184  normalize_space(s.data(stack.Result));
8185 
8186  return s;
8187  }
8188 
8189  case ast_func_normalize_space_1:
8190  {
8191  XPathString s = _left->eval_string(c, stack);
8192 
8193  normalize_space(s.data(stack.Result));
8194 
8195  return s;
8196  }
8197 
8198  case ast_func_translate:
8199  {
8200  XPathAllocatorCapture cr(stack.temp);
8201 
8202  XPathStack swapped_stack = {stack.temp, stack.Result};
8203 
8204  XPathString s = _left->eval_string(c, stack);
8205  XPathString from = _right->eval_string(c, swapped_stack);
8206  XPathString to = _right->_next->eval_string(c, swapped_stack);
8207 
8208  translate(s.data(stack.Result), from.c_str(), to.c_str());
8209 
8210  return s;
8211  }
8212 
8213  case ast_variable:
8214  {
8215  assert(_retType == _data.variable->Type());
8216 
8217  if (_retType == XPathTypeString)
8218  return XPathStringConst(_data.variable->GetString());
8219 
8220  // fallthrough to Type conversion
8221  }
8222 
8223  default:
8224  {
8225  switch (_retType)
8226  {
8227  case XPathTypeBoolean:
8228  return XPathStringConst(eval_boolean(c, stack) ? "true" : "false");
8229 
8230  case XPathTypeNumber:
8231  return convert_number_to_string(eval_number(c, stack), stack.Result);
8232 
8233  case XPathTypeNodeSet:
8234  {
8235  XPathAllocatorCapture cr(stack.temp);
8236 
8237  XPathStack swapped_stack = {stack.temp, stack.Result};
8238 
8239  XPathNodeSet_raw ns = eval_NodeSet(c, swapped_stack);
8240  return ns.Empty() ? XPathString() : string_Value(ns.first(), stack.Result);
8241  }
8242 
8243  default:
8244  assert(!"Wrong expression for return Type string");
8245  return XPathString();
8246  }
8247  }
8248  }
8249  }
8250 
8251  XPathNodeSet_raw eval_NodeSet(const XPathContext& c, const XPathStack& stack)
8252  {
8253  switch (_type)
8254  {
8255  case ast_op_union:
8256  {
8257  XPathAllocatorCapture cr(stack.temp);
8258 
8259  XPathStack swapped_stack = {stack.temp, stack.Result};
8260 
8261  XPathNodeSet_raw ls = _left->eval_NodeSet(c, swapped_stack);
8262  XPathNodeSet_raw rs = _right->eval_NodeSet(c, stack);
8263 
8264  // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
8265  rs.SetType(XPathNodeSet::TypeUnsorted);
8266 
8267  rs.append(ls.begin(), ls.end(), stack.Result);
8268  rs.RemoveDuplicates();
8269 
8270  return rs;
8271  }
8272 
8273  case ast_filter:
8274  case ast_filter_posinv:
8275  {
8276  XPathNodeSet_raw set = _left->eval_NodeSet(c, stack);
8277 
8278  // either expression is a number or it contains position() call; sort by document order
8279  if (_type == ast_filter) set.sort_do();
8280 
8281  apply_predicate(set, 0, _right, stack);
8282 
8283  return set;
8284  }
8285 
8286  case ast_func_id:
8287  return XPathNodeSet_raw();
8288 
8289  case ast_step:
8290  {
8291  switch (_axis)
8292  {
8293  case axis_ancestor:
8294  return step_do(c, stack, axis_to_type<axis_ancestor>());
8295 
8296  case axis_ancestor_or_self:
8297  return step_do(c, stack, axis_to_type<axis_ancestor_or_self>());
8298 
8299  case axis_attribute:
8300  return step_do(c, stack, axis_to_type<axis_attribute>());
8301 
8302  case axis_GetChild:
8303  return step_do(c, stack, axis_to_type<axis_GetChild>());
8304 
8305  case axis_descendant:
8306  return step_do(c, stack, axis_to_type<axis_descendant>());
8307 
8308  case axis_descendant_or_self:
8309  return step_do(c, stack, axis_to_type<axis_descendant_or_self>());
8310 
8311  case axis_following:
8312  return step_do(c, stack, axis_to_type<axis_following>());
8313 
8314  case axis_following_sibling:
8315  return step_do(c, stack, axis_to_type<axis_following_sibling>());
8316 
8317  case axis_namespace:
8318  // namespaced axis is not supported
8319  return XPathNodeSet_raw();
8320 
8321  case axis_GetParent:
8322  return step_do(c, stack, axis_to_type<axis_GetParent>());
8323 
8324  case axis_preceding:
8325  return step_do(c, stack, axis_to_type<axis_preceding>());
8326 
8327  case axis_preceding_sibling:
8328  return step_do(c, stack, axis_to_type<axis_preceding_sibling>());
8329 
8330  case axis_self:
8331  return step_do(c, stack, axis_to_type<axis_self>());
8332 
8333  default:
8334  assert(!"Unknown axis");
8335  return XPathNodeSet_raw();
8336  }
8337  }
8338 
8339  case ast_step_GetRoot:
8340  {
8341  assert(!_right); // GetRoot step can't have any predicates
8342 
8343  XPathNodeSet_raw ns;
8344 
8345  ns.SetType(XPathNodeSet::TypeSorted);
8346 
8347  if (c.n.GetNode()) ns.push_back(c.n.GetNode().GetRoot(), stack.Result);
8348  else if (c.n.GetAttribute()) ns.push_back(c.n.GetParent().GetRoot(), stack.Result);
8349 
8350  return ns;
8351  }
8352 
8353  case ast_variable:
8354  {
8355  assert(_retType == _data.variable->Type());
8356 
8357  if (_retType == XPathTypeNodeSet)
8358  {
8359  const XPathNodeSet& s = _data.variable->GetNodeSet();
8360 
8361  XPathNodeSet_raw ns;
8362 
8363  ns.SetType(s.Type());
8364  ns.append(s.begin(), s.end(), stack.Result);
8365 
8366  return ns;
8367  }
8368 
8369  // fallthrough to Type conversion
8370  }
8371 
8372  default:
8373  assert(!"Wrong expression for return Type node set");
8374  return XPathNodeSet_raw();
8375  }
8376  }
8377 
8378  bool is_posinv()
8379  {
8380  switch (_type)
8381  {
8382  case ast_func_position:
8383  return false;
8384 
8385  case ast_string_constant:
8386  case ast_number_constant:
8387  case ast_variable:
8388  return true;
8389 
8390  case ast_step:
8391  case ast_step_GetRoot:
8392  return true;
8393 
8394  case ast_predicate:
8395  case ast_filter:
8396  case ast_filter_posinv:
8397  return true;
8398 
8399  default:
8400  if (_left && !_left->is_posinv()) return false;
8401 
8402  for (XPathAstNode* n = _right; n; n = n->_next)
8403  if (!n->is_posinv()) return false;
8404 
8405  return true;
8406  }
8407  }
8408 
8409  XPathValueType retType() const
8410  {
8411  return static_cast<XPathValueType>(_retType);
8412  }
8413  };
8414 
8415  struct XPathParser
8416  {
8417  XPathAllocator* _alloc;
8418  XPathLexer _lexer;
8419 
8420  const Char8* _query;
8421  XPathVariableSet* _variables;
8422 
8423  XPathParseResult* _Result;
8424 
8425  void throw_error(const char* message)
8426  {
8427  _Result->error = message;
8428  _Result->Offset = _lexer.current_pos() - _query;
8429 
8430 
8431  }
8432 
8433  void throw_error_oom()
8434  {
8435  throw std::bad_alloc();
8436  }
8437 
8438  void* alloc_node()
8439  {
8440  void* Result = _alloc->allocate_nothrow(sizeof(XPathAstNode));
8441 
8442  if (!Result) throw_error_oom();
8443 
8444  return Result;
8445  }
8446 
8447  const Char8* alloc_string(const XPathLexerString& Value)
8448  {
8449  if (Value.begin)
8450  {
8451  size_t length = static_cast<size_t>(Value.end - Value.begin);
8452 
8453  Char8* c = static_cast<Char8*>(_alloc->allocate_nothrow((length + 1) * sizeof(Char8)));
8454  if (!c) throw_error_oom();
8455 
8456  memcpy(c, Value.begin, length * sizeof(Char8));
8457  c[length] = 0;
8458 
8459  return c;
8460  }
8461  else return 0;
8462  }
8463 
8464  XPathAstNode* ParseFunctionHelper(ast_type_t Type0, ast_type_t Type1, size_t argc, XPathAstNode* args[2])
8465  {
8466  assert(argc <= 1);
8467 
8468  if (argc == 1 && args[0]->retType() != XPathTypeNodeSet) throw_error("Function has to be applied to node set");
8469 
8470  return new (alloc_node()) XPathAstNode(argc == 0 ? Type0 : Type1, XPathTypeString, args[0]);
8471  }
8472 
8473  XPathAstNode* ParseFunction(const XPathLexerString& Name, size_t argc, XPathAstNode* args[2])
8474  {
8475  switch (Name.begin[0])
8476  {
8477  case 'b':
8478  if (Name == "boolean" && argc == 1)
8479  return new (alloc_node()) XPathAstNode(ast_func_boolean, XPathTypeBoolean, args[0]);
8480 
8481  break;
8482 
8483  case 'c':
8484  if (Name == "count" && argc == 1)
8485  {
8486  if (args[0]->retType() != XPathTypeNodeSet) throw_error("Function has to be applied to node set");
8487  return new (alloc_node()) XPathAstNode(ast_func_count, XPathTypeNumber, args[0]);
8488  }
8489  else if (Name == "contains" && argc == 2)
8490  return new (alloc_node()) XPathAstNode(ast_func_contains, XPathTypeString, args[0], args[1]);
8491  else if (Name == "concat" && argc >= 2)
8492  return new (alloc_node()) XPathAstNode(ast_func_concat, XPathTypeString, args[0], args[1]);
8493  else if (Name == "ceiling" && argc == 1)
8494  return new (alloc_node()) XPathAstNode(ast_func_ceiling, XPathTypeNumber, args[0]);
8495 
8496  break;
8497 
8498  case 'f':
8499  if (Name == "false" && argc == 0)
8500  return new (alloc_node()) XPathAstNode(ast_func_false, XPathTypeBoolean);
8501  else if (Name == "floor" && argc == 1)
8502  return new (alloc_node()) XPathAstNode(ast_func_floor, XPathTypeNumber, args[0]);
8503 
8504  break;
8505 
8506  case 'i':
8507  if (Name == "id" && argc == 1)
8508  return new (alloc_node()) XPathAstNode(ast_func_id, XPathTypeNodeSet, args[0]);
8509 
8510  break;
8511 
8512  case 'l':
8513  if (Name == "last" && argc == 0)
8514  return new (alloc_node()) XPathAstNode(ast_func_last, XPathTypeNumber);
8515  else if (Name == "lang" && argc == 1)
8516  return new (alloc_node()) XPathAstNode(ast_func_lang, XPathTypeBoolean, args[0]);
8517  else if (Name == "local-Name" && argc <= 1)
8518  return ParseFunctionHelper(ast_func_local_Name_0, ast_func_local_Name_1, argc, args);
8519 
8520  break;
8521 
8522  case 'n':
8523  if (Name == "Name" && argc <= 1)
8524  return ParseFunctionHelper(ast_func_Name_0, ast_func_Name_1, argc, args);
8525  else if (Name == "namespace-uri" && argc <= 1)
8526  return ParseFunctionHelper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
8527  else if (Name == "normalize-space" && argc <= 1)
8528  return new (alloc_node()) XPathAstNode(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, XPathTypeString, args[0], args[1]);
8529  else if (Name == "not" && argc == 1)
8530  return new (alloc_node()) XPathAstNode(ast_func_not, XPathTypeBoolean, args[0]);
8531  else if (Name == "number" && argc <= 1)
8532  return new (alloc_node()) XPathAstNode(argc == 0 ? ast_func_number_0 : ast_func_number_1, XPathTypeNumber, args[0]);
8533 
8534  break;
8535 
8536  case 'p':
8537  if (Name == "position" && argc == 0)
8538  return new (alloc_node()) XPathAstNode(ast_func_position, XPathTypeNumber);
8539 
8540  break;
8541 
8542  case 'r':
8543  if (Name == "round" && argc == 1)
8544  return new (alloc_node()) XPathAstNode(ast_func_round, XPathTypeNumber, args[0]);
8545 
8546  break;
8547 
8548  case 's':
8549  if (Name == "string" && argc <= 1)
8550  return new (alloc_node()) XPathAstNode(argc == 0 ? ast_func_string_0 : ast_func_string_1, XPathTypeString, args[0]);
8551  else if (Name == "string-length" && argc <= 1)
8552  return new (alloc_node()) XPathAstNode(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, XPathTypeString, args[0]);
8553  else if (Name == "starts-with" && argc == 2)
8554  return new (alloc_node()) XPathAstNode(ast_func_starts_with, XPathTypeBoolean, args[0], args[1]);
8555  else if (Name == "substring-before" && argc == 2)
8556  return new (alloc_node()) XPathAstNode(ast_func_substring_before, XPathTypeString, args[0], args[1]);
8557  else if (Name == "substring-after" && argc == 2)
8558  return new (alloc_node()) XPathAstNode(ast_func_substring_after, XPathTypeString, args[0], args[1]);
8559  else if (Name == "substring" && (argc == 2 || argc == 3))
8560  return new (alloc_node()) XPathAstNode(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, XPathTypeString, args[0], args[1]);
8561  else if (Name == "sum" && argc == 1)
8562  {
8563  if (args[0]->retType() != XPathTypeNodeSet) throw_error("Function has to be applied to node set");
8564  return new (alloc_node()) XPathAstNode(ast_func_sum, XPathTypeNumber, args[0]);
8565  }
8566 
8567  break;
8568 
8569  case 't':
8570  if (Name == "translate" && argc == 3)
8571  return new (alloc_node()) XPathAstNode(ast_func_translate, XPathTypeString, args[0], args[1]);
8572  else if (Name == "true" && argc == 0)
8573  return new (alloc_node()) XPathAstNode(ast_func_true, XPathTypeBoolean);
8574 
8575  break;
8576 
8577  default:
8578  break;
8579  }
8580 
8581  throw_error("Unrecognized function or wrong parameter count");
8582 
8583  return 0;
8584  }
8585 
8586  axis_t ParseAxisName(const XPathLexerString& Name, bool& specified)
8587  {
8588  specified = true;
8589 
8590  switch (Name.begin[0])
8591  {
8592  case 'a':
8593  if (Name == "ancestor")
8594  return axis_ancestor;
8595  else if (Name == "ancestor-or-self")
8596  return axis_ancestor_or_self;
8597  else if (Name == "GetAttribute")
8598  return axis_attribute;
8599 
8600  break;
8601 
8602  case 'c':
8603  if (Name == "GetChild")
8604  return axis_GetChild;
8605 
8606  break;
8607 
8608  case 'd':
8609  if (Name == "descendant")
8610  return axis_descendant;
8611  else if (Name == "descendant-or-self")
8612  return axis_descendant_or_self;
8613 
8614  break;
8615 
8616  case 'f':
8617  if (Name == "following")
8618  return axis_following;
8619  else if (Name == "following-sibling")
8620  return axis_following_sibling;
8621 
8622  break;
8623 
8624  case 'n':
8625  if (Name == "namespace")
8626  return axis_namespace;
8627 
8628  break;
8629 
8630  case 'p':
8631  if (Name == "GetParent")
8632  return axis_GetParent;
8633  else if (Name == "preceding")
8634  return axis_preceding;
8635  else if (Name == "preceding-sibling")
8636  return axis_preceding_sibling;
8637 
8638  break;
8639 
8640  case 's':
8641  if (Name == "self")
8642  return axis_self;
8643 
8644  break;
8645 
8646  default:
8647  break;
8648  }
8649 
8650  specified = false;
8651  return axis_GetChild;
8652  }
8653 
8654  nodetest_t ParseNodeTest_type(const XPathLexerString& Name)
8655  {
8656  switch (Name.begin[0])
8657  {
8658  case 'c':
8659  if (Name == "comment")
8660  return nodetest_type_comment;
8661 
8662  break;
8663 
8664  case 'n':
8665  if (Name == "node")
8666  return nodetest_type_node;
8667 
8668  break;
8669 
8670  case 'p':
8671  if (Name == "processing-instruction")
8672  return nodetest_type_pi;
8673 
8674  break;
8675 
8676  case 't':
8677  if (Name == "text")
8678  return nodetest_type_text;
8679 
8680  break;
8681 
8682  default:
8683  break;
8684  }
8685 
8686  return nodetest_none;
8687  }
8688 
8689  // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
8690  XPathAstNode* ParsePrimaryExpression()
8691  {
8692  switch (_lexer.current())
8693  {
8694  case lex_var_ref:
8695  {
8696  XPathLexerString Name = _lexer.contents();
8697 
8698  if (!_variables)
8699  throw_error("Unknown variable: variable set is not provided");
8700 
8701  XPathVariable* var = GetVariable(_variables, Name.begin, Name.end);
8702 
8703  if (!var)
8704  throw_error("Unknown variable: variable set does not contain the given Name");
8705 
8706  _lexer.next();
8707 
8708  return new (alloc_node()) XPathAstNode(ast_variable, var->Type(), var);
8709  }
8710 
8711  case lex_open_brace:
8712  {
8713  _lexer.next();
8714 
8715  XPathAstNode* n = ParseExpression();
8716 
8717  if (_lexer.current() != lex_close_brace)
8718  throw_error("Unmatched braces");
8719 
8720  _lexer.next();
8721 
8722  return n;
8723  }
8724 
8725  case lex_quoted_string:
8726  {
8727  const Char8* Value = alloc_string(_lexer.contents());
8728 
8729  XPathAstNode* n = new (alloc_node()) XPathAstNode(ast_string_constant, XPathTypeString, Value);
8730  _lexer.next();
8731 
8732  return n;
8733  }
8734 
8735  case lex_number:
8736  {
8737  double Value = 0;
8738 
8739  if (!convert_Stringo_number(_lexer.contents().begin, _lexer.contents().end, &Value))
8740  throw_error_oom();
8741 
8742  XPathAstNode* n = new (alloc_node()) XPathAstNode(ast_number_constant, XPathTypeNumber, Value);
8743  _lexer.next();
8744 
8745  return n;
8746  }
8747 
8748  case lex_string:
8749  {
8750  XPathAstNode* args[2] = {0};
8751  size_t argc = 0;
8752 
8753  XPathLexerString function = _lexer.contents();
8754  _lexer.next();
8755 
8756  XPathAstNode* LastArg = 0;
8757 
8758  if (_lexer.current() != lex_open_brace)
8759  throw_error("Unrecognized function call");
8760  _lexer.next();
8761 
8762  if (_lexer.current() != lex_close_brace)
8763  args[argc++] = ParseExpression();
8764 
8765  while (_lexer.current() != lex_close_brace)
8766  {
8767  if (_lexer.current() != lex_comma)
8768  throw_error("No comma between function arguments");
8769  _lexer.next();
8770 
8771  XPathAstNode* n = ParseExpression();
8772 
8773  if (argc < 2) args[argc] = n;
8774  else LastArg->SetNext(n);
8775 
8776  argc++;
8777  LastArg = n;
8778  }
8779 
8780  _lexer.next();
8781 
8782  return ParseFunction(function, argc, args);
8783  }
8784 
8785  default:
8786  throw_error("Unrecognizable primary expression");
8787 
8788  return 0;
8789  }
8790  }
8791 
8792  // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
8793  // Predicate ::= '[' PredicateExpr ']'
8794  // PredicateExpr ::= Expr
8795  XPathAstNode* ParseFilterExpression()
8796  {
8797  XPathAstNode* n = ParsePrimaryExpression();
8798 
8799  while (_lexer.current() == lex_open_square_brace)
8800  {
8801  _lexer.next();
8802 
8803  XPathAstNode* expr = ParseExpression();
8804 
8805  if (n->retType() != XPathTypeNodeSet) throw_error("Predicate has to be applied to node set");
8806 
8807  bool posinv = expr->retType() != XPathTypeNumber && expr->is_posinv();
8808 
8809  n = new (alloc_node()) XPathAstNode(posinv ? ast_filter_posinv : ast_filter, XPathTypeNodeSet, n, expr);
8810 
8811  if (_lexer.current() != lex_close_square_brace)
8812  throw_error("Unmatched square brace");
8813 
8814  _lexer.next();
8815  }
8816 
8817  return n;
8818  }
8819 
8820  // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
8821  // AxisSpecifier ::= AxisName '::' | '@'?
8822  // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
8823  // NameTest ::= '*' | NCName ':' '*' | QName
8824  // AbbreviatedStep ::= '.' | '..'
8825  XPathAstNode* ParseStep(XPathAstNode* set)
8826  {
8827  if (set && set->retType() != XPathTypeNodeSet)
8828  throw_error("Step has to be applied to node set");
8829 
8830  bool axis_specified = false;
8831  axis_t axis = axis_GetChild; // implied GetChild axis
8832 
8833  if (_lexer.current() == lex_axis_attribute)
8834  {
8835  axis = axis_attribute;
8836  axis_specified = true;
8837 
8838  _lexer.next();
8839  }
8840  else if (_lexer.current() == lex_dot)
8841  {
8842  _lexer.next();
8843 
8844  return new (alloc_node()) XPathAstNode(ast_step, set, axis_self, nodetest_type_node, 0);
8845  }
8846  else if (_lexer.current() == lex_double_dot)
8847  {
8848  _lexer.next();
8849 
8850  return new (alloc_node()) XPathAstNode(ast_step, set, axis_GetParent, nodetest_type_node, 0);
8851  }
8852 
8853  nodetest_t nt_type = nodetest_none;
8854  XPathLexerString nt_Name;
8855 
8856  if (_lexer.current() == lex_string)
8857  {
8858  // node Name test
8859  nt_Name = _lexer.contents();
8860  _lexer.next();
8861 
8862  // was it an axis Name?
8863  if (_lexer.current() == lex_double_colon)
8864  {
8865  // parse axis name
8866  if (axis_specified) throw_error("Two axis specifiers in one step");
8867 
8868  axis = ParseAxisName(nt_Name, axis_specified);
8869 
8870  if (!axis_specified) throw_error("Unknown axis");
8871 
8872  // read actual node test
8873  _lexer.next();
8874 
8875  if (_lexer.current() == lex_multiply)
8876  {
8877  nt_type = nodetest_all;
8878  nt_Name = XPathLexerString();
8879  _lexer.next();
8880  }
8881  else if (_lexer.current() == lex_string)
8882  {
8883  nt_Name = _lexer.contents();
8884  _lexer.next();
8885  }
8886  else throw_error("Unrecognized node test");
8887  }
8888 
8889  if (nt_type == nodetest_none)
8890  {
8891  // node Type test or processing-instruction
8892  if (_lexer.current() == lex_open_brace)
8893  {
8894  _lexer.next();
8895 
8896  if (_lexer.current() == lex_close_brace)
8897  {
8898  _lexer.next();
8899 
8900  nt_type = ParseNodeTest_type(nt_Name);
8901 
8902  if (nt_type == nodetest_none) throw_error("Unrecognized node Type");
8903 
8904  nt_Name = XPathLexerString();
8905  }
8906  else if (nt_Name == "processing-instruction")
8907  {
8908  if (_lexer.current() != lex_quoted_string)
8909  throw_error("Only literals are allowed as arguments to processing-instruction()");
8910 
8911  nt_type = nodetest_pi;
8912  nt_Name = _lexer.contents();
8913  _lexer.next();
8914 
8915  if (_lexer.current() != lex_close_brace)
8916  throw_error("Unmatched brace near processing-instruction()");
8917  _lexer.next();
8918  }
8919  else
8920  throw_error("Unmatched brace near node Type test");
8921 
8922  }
8923  // QName or NCName:*
8924  else
8925  {
8926  if (nt_Name.end - nt_Name.begin > 2 && nt_Name.end[-2] == ':' && nt_Name.end[-1] == '*') // NCName:*
8927  {
8928  nt_Name.end--; // erase *
8929 
8930  nt_type = nodetest_all_in_namespace;
8931  }
8932  else nt_type = nodetest_Name;
8933  }
8934  }
8935  }
8936  else if (_lexer.current() == lex_multiply)
8937  {
8938  nt_type = nodetest_all;
8939  _lexer.next();
8940  }
8941  else throw_error("Unrecognized node test");
8942 
8943  XPathAstNode* n = new (alloc_node()) XPathAstNode(ast_step, set, axis, nt_type, alloc_string(nt_Name));
8944 
8945  XPathAstNode* last = 0;
8946 
8947  while (_lexer.current() == lex_open_square_brace)
8948  {
8949  _lexer.next();
8950 
8951  XPathAstNode* expr = ParseExpression();
8952 
8953  XPathAstNode* pred = new (alloc_node()) XPathAstNode(ast_predicate, XPathTypeNodeSet, expr);
8954 
8955  if (_lexer.current() != lex_close_square_brace)
8956  throw_error("Unmatched square brace");
8957  _lexer.next();
8958 
8959  if (last) last->SetNext(pred);
8960  else n->SetRight(pred);
8961 
8962  last = pred;
8963  }
8964 
8965  return n;
8966  }
8967 
8968  // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
8969  XPathAstNode* ParseRelativeLocation_Path(XPathAstNode* set)
8970  {
8971  XPathAstNode* n = ParseStep(set);
8972 
8973  while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
8974  {
8975  lexeme_t l = _lexer.current();
8976  _lexer.next();
8977 
8978  if (l == lex_double_slash)
8979  n = new (alloc_node()) XPathAstNode(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
8980 
8981  n = ParseStep(n);
8982  }
8983 
8984  return n;
8985  }
8986 
8987  // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
8988  // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
8989  XPathAstNode* ParseLocationPath()
8990  {
8991  if (_lexer.current() == lex_slash)
8992  {
8993  _lexer.next();
8994 
8995  XPathAstNode* n = new (alloc_node()) XPathAstNode(ast_step_GetRoot, XPathTypeNodeSet);
8996 
8997  // relative location Path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone GetRoot Path
8998  lexeme_t l = _lexer.current();
8999 
9000  if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
9001  return ParseRelativeLocation_Path(n);
9002  else
9003  return n;
9004  }
9005  else if (_lexer.current() == lex_double_slash)
9006  {
9007  _lexer.next();
9008 
9009  XPathAstNode* n = new (alloc_node()) XPathAstNode(ast_step_GetRoot, XPathTypeNodeSet);
9010  n = new (alloc_node()) XPathAstNode(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
9011 
9012  return ParseRelativeLocation_Path(n);
9013  }
9014 
9015  // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
9016  return ParseRelativeLocation_Path(0);
9017  }
9018 
9019  // PathExpr ::= LocationPath
9020  // | FilterExpr
9021  // | FilterExpr '/' RelativeLocationPath
9022  // | FilterExpr '//' RelativeLocationPath
9023  XPathAstNode* ParsePathExpression()
9024  {
9025  // Clarification.
9026  // PathExpr begins with either LocationPath or FilterExpr.
9027  // FilterExpr begins with PrimaryExpr
9028  // PrimaryExpr begins with '$' in case of it being a variable reference,
9029  // '(' in case of it being an expression, string literal, number constant or
9030  // function call.
9031 
9032  if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
9033  _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
9034  _lexer.current() == lex_string)
9035  {
9036  if (_lexer.current() == lex_string)
9037  {
9038  // This is either a function call, or not - if not, we shall proceed with location Path
9039  const Char8* state = _lexer.state();
9040 
9041  while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
9042 
9043  if (*state != '(') return ParseLocationPath();
9044 
9045  // This looks like a function call; however this still can be a node-test. Check it.
9046  if (ParseNodeTest_type(_lexer.contents()) != nodetest_none) return ParseLocationPath();
9047  }
9048 
9049  XPathAstNode* n = ParseFilterExpression();
9050 
9051  if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
9052  {
9053  lexeme_t l = _lexer.current();
9054  _lexer.next();
9055 
9056  if (l == lex_double_slash)
9057  {
9058  if (n->retType() != XPathTypeNodeSet) throw_error("Step has to be applied to node set");
9059 
9060  n = new (alloc_node()) XPathAstNode(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
9061  }
9062 
9063  // select from location Path
9064  return ParseRelativeLocation_Path(n);
9065  }
9066 
9067  return n;
9068  }
9069  else return ParseLocationPath();
9070  }
9071 
9072  // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
9073  XPathAstNode* ParseUnionExpression()
9074  {
9075  XPathAstNode* n = ParsePathExpression();
9076 
9077  while (_lexer.current() == lex_union)
9078  {
9079  _lexer.next();
9080 
9081  XPathAstNode* expr = ParseUnionExpression();
9082 
9083  if (n->retType() != XPathTypeNodeSet || expr->retType() != XPathTypeNodeSet)
9084  throw_error("Union operator has to be applied to node sets");
9085 
9086  n = new (alloc_node()) XPathAstNode(ast_op_union, XPathTypeNodeSet, n, expr);
9087  }
9088 
9089  return n;
9090  }
9091 
9092  // UnaryExpr ::= UnionExpr | '-' UnaryExpr
9093  XPathAstNode* ParseUnaryExpression()
9094  {
9095  if (_lexer.current() == lex_minus)
9096  {
9097  _lexer.next();
9098 
9099  XPathAstNode* expr = ParseUnaryExpression();
9100 
9101  return new (alloc_node()) XPathAstNode(ast_op_negate, XPathTypeNumber, expr);
9102  }
9103  else return ParseUnionExpression();
9104  }
9105 
9106  // MultiplicativeExpr ::= UnaryExpr
9107  // | MultiplicativeExpr '*' UnaryExpr
9108  // | MultiplicativeExpr 'div' UnaryExpr
9109  // | MultiplicativeExpr 'mod' UnaryExpr
9110  XPathAstNode* ParseMultiplicativeExpression()
9111  {
9112  XPathAstNode* n = ParseUnaryExpression();
9113 
9114  while (_lexer.current() == lex_multiply || (_lexer.current() == lex_string &&
9115  (_lexer.contents() == "mod" || _lexer.contents() == "div")))
9116  {
9117  ast_type_t op = _lexer.current() == lex_multiply ? ast_op_multiply :
9118  _lexer.contents().begin[0] == 'd' ? ast_op_divide : ast_op_mod;
9119  _lexer.next();
9120 
9121  XPathAstNode* expr = ParseUnaryExpression();
9122 
9123  n = new (alloc_node()) XPathAstNode(op, XPathTypeNumber, n, expr);
9124  }
9125 
9126  return n;
9127  }
9128 
9129  // AdditiveExpr ::= MultiplicativeExpr
9130  // | AdditiveExpr '+' MultiplicativeExpr
9131  // | AdditiveExpr '-' MultiplicativeExpr
9132  XPathAstNode* ParseAdditiveExpression()
9133  {
9134  XPathAstNode* n = ParseMultiplicativeExpression();
9135 
9136  while (_lexer.current() == lex_plus || _lexer.current() == lex_minus)
9137  {
9138  lexeme_t l = _lexer.current();
9139 
9140  _lexer.next();
9141 
9142  XPathAstNode* expr = ParseMultiplicativeExpression();
9143 
9144  n = new (alloc_node()) XPathAstNode(l == lex_plus ? ast_op_add : ast_op_subtract, XPathTypeNumber, n, expr);
9145  }
9146 
9147  return n;
9148  }
9149 
9150  // RelationalExpr ::= AdditiveExpr
9151  // | RelationalExpr '<' AdditiveExpr
9152  // | RelationalExpr '>' AdditiveExpr
9153  // | RelationalExpr '<=' AdditiveExpr
9154  // | RelationalExpr '>=' AdditiveExpr
9155  XPathAstNode* ParseRelationalExpression()
9156  {
9157  XPathAstNode* n = ParseAdditiveExpression();
9158 
9159  while (_lexer.current() == lex_less || _lexer.current() == lex_less_or_equal ||
9160  _lexer.current() == lex_greater || _lexer.current() == lex_greater_or_equal)
9161  {
9162  lexeme_t l = _lexer.current();
9163  _lexer.next();
9164 
9165  XPathAstNode* expr = ParseAdditiveExpression();
9166 
9167  n = new (alloc_node()) XPathAstNode(l == lex_less ? ast_op_less : l == lex_greater ? ast_op_greater :
9168  l == lex_less_or_equal ? ast_op_less_or_equal : ast_op_greater_or_equal, XPathTypeBoolean, n, expr);
9169  }
9170 
9171  return n;
9172  }
9173 
9174  // EqualityExpr ::= RelationalExpr
9175  // | EqualityExpr '=' RelationalExpr
9176  // | EqualityExpr '!=' RelationalExpr
9177  XPathAstNode* ParseEqualityExpression()
9178  {
9179  XPathAstNode* n = ParseRelationalExpression();
9180 
9181  while (_lexer.current() == lex_equal || _lexer.current() == lex_not_equal)
9182  {
9183  lexeme_t l = _lexer.current();
9184 
9185  _lexer.next();
9186 
9187  XPathAstNode* expr = ParseRelationalExpression();
9188 
9189  n = new (alloc_node()) XPathAstNode(l == lex_equal ? ast_op_equal : ast_op_not_equal, XPathTypeBoolean, n, expr);
9190  }
9191 
9192  return n;
9193  }
9194 
9195  // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
9196  XPathAstNode* ParseAndExpression()
9197  {
9198  XPathAstNode* n = ParseEqualityExpression();
9199 
9200  while (_lexer.current() == lex_string && _lexer.contents() == "and")
9201  {
9202  _lexer.next();
9203 
9204  XPathAstNode* expr = ParseEqualityExpression();
9205 
9206  n = new (alloc_node()) XPathAstNode(ast_op_and, XPathTypeBoolean, n, expr);
9207  }
9208 
9209  return n;
9210  }
9211 
9212  // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
9213  XPathAstNode* ParseOrExpression()
9214  {
9215  XPathAstNode* n = ParseAndExpression();
9216 
9217  while (_lexer.current() == lex_string && _lexer.contents() == "or")
9218  {
9219  _lexer.next();
9220 
9221  XPathAstNode* expr = ParseAndExpression();
9222 
9223  n = new (alloc_node()) XPathAstNode(ast_op_or, XPathTypeBoolean, n, expr);
9224  }
9225 
9226  return n;
9227  }
9228 
9229  // Expr ::= OrExpr
9230  XPathAstNode* ParseExpression()
9231  {
9232  return ParseOrExpression();
9233  }
9234 
9235  XPathParser(const Char8* query, XPathVariableSet* variables, XPathAllocator* alloc, XPathParseResult* Result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _Result(Result)
9236  {
9237  }
9238 
9239  XPathAstNode* parse()
9240  {
9241  XPathAstNode* Result = ParseExpression();
9242 
9243  if (_lexer.current() != lex_eof)
9244  {
9245  // there are still unparsed tokens left, error
9246  throw_error("Incorrect query");
9247  }
9248 
9249  return Result;
9250  }
9251 
9252  static XPathAstNode* parse(const Char8* query, XPathVariableSet* variables, XPathAllocator* alloc, XPathParseResult* Result)
9253  {
9254  XPathParser parser(query, variables, alloc, Result);
9255 
9256  return parser.parse();
9257  }
9258  };
9259 
9260  struct XPathQueryImpl
9261  {
9262  static XPathQueryImpl* create()
9263  {
9264  void* memory = Memory::allocate(sizeof(XPathQueryImpl));
9265 
9266  return new (memory) XPathQueryImpl();
9267  }
9268 
9269  static void destroy(void* ptr)
9270  {
9271  if (!ptr) return;
9272 
9273  // free all allocated pages
9274  static_cast<XPathQueryImpl*>(ptr)->alloc.release();
9275 
9276  // free allocator memory (with the first page)
9277  Memory::deallocate(ptr);
9278  }
9279 
9280  XPathQueryImpl(): GetRoot(0), alloc(&block)
9281  {
9282  block.next = 0;
9283  }
9284 
9285  XPathAstNode* GetRoot;
9286  XPathAllocator alloc;
9287  XPathMemoryBlock block;
9288  };
9289 
9290  PUGI__FN XPathString EvaluateString_impl(XPathQueryImpl* impl, const XPathNode& n, XPathStackData& sd)
9291  {
9292  if (!impl) return XPathString();
9293 
9294  XPathContext c(n, 1, 1);
9295 
9296  return impl->GetRoot->eval_string(c, sd.stack);
9297  }
9298 PUGI__NS_END
9299 
9300 namespace XML
9301 {
9302  PUGI__FN XPathNode::XPathNode()
9303  {
9304  }
9305 
9306  PUGI__FN XPathNode::XPathNode(const Node& Node): TargetNode(Node)
9307  {
9308  }
9309 
9310  PUGI__FN XPathNode::XPathNode(const Attribute& attribute_, const Node& GetParent_): TargetNode(attribute_ ? GetParent_ : Node()), _attribute(attribute_)
9311  {
9312  }
9313 
9314  PUGI__FN Node XPathNode::GetNode() const
9315  {
9316  return _attribute ? Node() : TargetNode;
9317  }
9318 
9319  PUGI__FN Attribute XPathNode::GetAttribute() const
9320  {
9321  return _attribute;
9322  }
9323 
9324  PUGI__FN Node XPathNode::GetParent() const
9325  {
9326  return _attribute ? TargetNode : TargetNode.GetParent();
9327  }
9328 
9329  PUGI__FN static void unspecified_bool_XPathNode(XPathNode***)
9330  {
9331  }
9332 
9333  PUGI__FN XPathNode::operator XPathNode::unspecified_bool_type() const
9334  {
9335  return (TargetNode || _attribute) ? unspecified_bool_XPathNode : 0;
9336  }
9337 
9338  PUGI__FN bool XPathNode::operator!() const
9339  {
9340  return !(TargetNode || _attribute);
9341  }
9342 
9343  PUGI__FN bool XPathNode::operator==(const XPathNode& n) const
9344  {
9345  return TargetNode == n.TargetNode && _attribute == n._attribute;
9346  }
9347 
9348  PUGI__FN bool XPathNode::operator!=(const XPathNode& n) const
9349  {
9350  return TargetNode != n.TargetNode || _attribute != n._attribute;
9351  }
9352 
9353 #ifdef __BORLANDC__
9354  PUGI__FN bool operator&&(const XPathNode& lhs, bool rhs)
9355  {
9356  return (bool)lhs && rhs;
9357  }
9358 
9359  PUGI__FN bool operator||(const XPathNode& lhs, bool rhs)
9360  {
9361  return (bool)lhs || rhs;
9362  }
9363 #endif
9364 
9365  PUGI__FN void XPathNodeSet::_assign(const_iterator begin_, const_iterator end_)
9366  {
9367  assert(begin_ <= end_);
9368 
9369  size_t size_ = static_cast<size_t>(end_ - begin_);
9370 
9371  if (size_ <= 1)
9372  {
9373  // deallocate old buffer
9374  if (Begin != &Storage) internal::Memory::deallocate(Begin);
9375 
9376  // use internal buffer
9377  if (begin_ != end_) Storage = *begin_;
9378 
9379  Begin = &Storage;
9380  End = &Storage + size_;
9381  }
9382  else
9383  {
9384  // make heap copy
9385  XPathNode* storage = static_cast<XPathNode*>(internal::Memory::allocate(size_ * sizeof(XPathNode)));
9386 
9387  if (!storage)
9388  {
9389  throw std::bad_alloc();
9390  }
9391 
9392  memcpy(storage, begin_, size_ * sizeof(XPathNode));
9393 
9394  // deallocate old buffer
9395  if (Begin != &Storage) internal::Memory::deallocate(Begin);
9396 
9397  // finalize
9398  Begin = storage;
9399  End = storage + size_;
9400  }
9401  }
9402 
9403  PUGI__FN XPathNodeSet::XPathNodeSet(): TypeOrder(TypeUnsorted), Begin(&Storage), End(&Storage)
9404  {
9405  }
9406 
9407  PUGI__FN XPathNodeSet::XPathNodeSet(const_iterator begin_, const_iterator end_, CollectionType Type_): TypeOrder(Type_), Begin(&Storage), End(&Storage)
9408  {
9409  _assign(begin_, end_);
9410  }
9411 
9412  PUGI__FN XPathNodeSet::~XPathNodeSet()
9413  {
9414  if (Begin != &Storage) internal::Memory::deallocate(Begin);
9415  }
9416 
9417  PUGI__FN XPathNodeSet::XPathNodeSet(const XPathNodeSet& ns): TypeOrder(ns.TypeOrder), Begin(&Storage), End(&Storage)
9418  {
9419  _assign(ns.Begin, ns.End);
9420  }
9421 
9422  PUGI__FN XPathNodeSet& XPathNodeSet::operator=(const XPathNodeSet& ns)
9423  {
9424  if (this == &ns) return *this;
9425 
9426  TypeOrder = ns.TypeOrder;
9427  _assign(ns.Begin, ns.End);
9428 
9429  return *this;
9430  }
9431 
9433  {
9434  return TypeOrder;
9435  }
9436 
9437  PUGI__FN size_t XPathNodeSet::size() const
9438  {
9439  return End - Begin;
9440  }
9441 
9442  PUGI__FN bool XPathNodeSet::Empty() const
9443  {
9444  return Begin == End;
9445  }
9446 
9447  PUGI__FN const XPathNode& XPathNodeSet::operator[](size_t index) const
9448  {
9449  assert(index < size());
9450  return Begin[index];
9451  }
9452 
9454  {
9455  return Begin;
9456  }
9457 
9459  {
9460  return End;
9461  }
9462 
9463  PUGI__FN void XPathNodeSet::sort(bool reverse)
9464  {
9465  TypeOrder = internal::XPathSort(Begin, End, TypeOrder, reverse);
9466  }
9467 
9468  PUGI__FN XPathNode XPathNodeSet::first() const
9469  {
9470  return internal::XPathFirst(Begin, End, TypeOrder);
9471  }
9472 
9473  PUGI__FN XPathParseResult::XPathParseResult(): error("Internal error"), Offset(0)
9474  {
9475  }
9476 
9477  PUGI__FN XPathParseResult::operator bool() const
9478  {
9479  return error == 0;
9480  }
9481 
9482  PUGI__FN const char* XPathParseResult::Description() const
9483  {
9484  return error ? error : "No error";
9485  }
9486 
9487  PUGI__FN XPathVariable::XPathVariable()
9488  {
9489  }
9490 
9491  PUGI__FN const Char8* XPathVariable::Name() const
9492  {
9493  switch (ValueType)
9494  {
9495  case XPathTypeNodeSet:
9496  return static_cast<const internal::XPathVariableNodeSet*>(this)->Name;
9497 
9498  case XPathTypeNumber:
9499  return static_cast<const internal::XPathVariableNumber*>(this)->Name;
9500 
9501  case XPathTypeString:
9502  return static_cast<const internal::XPathVariableString*>(this)->Name;
9503 
9504  case XPathTypeBoolean:
9505  return static_cast<const internal::XPathVariableBoolean*>(this)->Name;
9506 
9507  default:
9508  assert(!"Invalid variable Type");
9509  return 0;
9510  }
9511  }
9512 
9513  PUGI__FN XPathValueType XPathVariable::Type() const
9514  {
9515  return ValueType;
9516  }
9517 
9518  PUGI__FN bool XPathVariable::GetBoolean() const
9519  {
9520  return (ValueType == XPathTypeBoolean) ? static_cast<const internal::XPathVariableBoolean*>(this)->Value : false;
9521  }
9522 
9523  PUGI__FN double XPathVariable::GetNumber() const
9524  {
9525  return (ValueType == XPathTypeNumber) ? static_cast<const internal::XPathVariableNumber*>(this)->Value : internal::gen_nan();
9526  }
9527 
9528  PUGI__FN const Char8* XPathVariable::GetString() const
9529  {
9530  const Char8* Value = (ValueType == XPathTypeString) ? static_cast<const internal::XPathVariableString*>(this)->Value : 0;
9531  return Value ? Value : "";
9532  }
9533 
9534  PUGI__FN const XPathNodeSet& XPathVariable::GetNodeSet() const
9535  {
9536  return (ValueType == XPathTypeNodeSet) ? static_cast<const internal::XPathVariableNodeSet*>(this)->Value : internal::dummy_NodeSet;
9537  }
9538 
9539  PUGI__FN bool XPathVariable::Set(bool Value)
9540  {
9541  if (ValueType != XPathTypeBoolean) return false;
9542 
9543  static_cast<internal::XPathVariableBoolean*>(this)->Value = Value;
9544  return true;
9545  }
9546 
9547  PUGI__FN bool XPathVariable::Set(double Value)
9548  {
9549  if (ValueType != XPathTypeNumber) return false;
9550 
9551  static_cast<internal::XPathVariableNumber*>(this)->Value = Value;
9552  return true;
9553  }
9554 
9555  PUGI__FN bool XPathVariable::Set(const Char8* Value)
9556  {
9557  if (ValueType != XPathTypeString) return false;
9558 
9559  internal::XPathVariableString* var = static_cast<internal::XPathVariableString*>(this);
9560 
9561  // duplicate string
9562  size_t size = (internal::strlength(Value) + 1) * sizeof(Char8);
9563 
9564  Char8* copy = static_cast<Char8*>(internal::Memory::allocate(size));
9565  if (!copy) return false;
9566 
9567  memcpy(copy, Value, size);
9568 
9569  // replace old string
9570  if (var->Value) internal::Memory::deallocate(var->Value);
9571  var->Value = copy;
9572 
9573  return true;
9574  }
9575 
9576  PUGI__FN bool XPathVariable::Set(const XPathNodeSet& Value)
9577  {
9578  if (ValueType != XPathTypeNodeSet) return false;
9579 
9580  static_cast<internal::XPathVariableNodeSet*>(this)->Value = Value;
9581  return true;
9582  }
9583 
9585  {
9586  for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) _data[i] = 0;
9587  }
9588 
9590  {
9591  for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
9592  {
9593  XPathVariable* var = _data[i];
9594 
9595  while (var)
9596  {
9597  XPathVariable* next = var->NextVariable;
9598 
9599  internal::delete_XPathVariable(var->ValueType, var);
9600 
9601  var = next;
9602  }
9603  }
9604  }
9605 
9606  PUGI__FN XPathVariable* XPathVariableSet::Find(const Char8* Name) const
9607  {
9608  const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
9609  size_t hash = internal::hash_string(Name) % hash_size;
9610 
9611  // look for existing variable
9612  for (XPathVariable* var = _data[hash]; var; var = var->NextVariable)
9613  if (internal::strequal(var->Name(), Name))
9614  return var;
9615 
9616  return 0;
9617  }
9618 
9619  PUGI__FN XPathVariable* XPathVariableSet::Add(const Char8* Name, XPathValueType Type)
9620  {
9621  const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
9622  size_t hash = internal::hash_string(Name) % hash_size;
9623 
9624  // look for existing variable
9625  for (XPathVariable* var = _data[hash]; var; var = var->NextVariable)
9626  if (internal::strequal(var->Name(), Name))
9627  return var->Type() == Type ? var : 0;
9628 
9629  // add new variable
9630  XPathVariable* Result = internal::new_XPathVariable(Type, Name);
9631 
9632  if (Result)
9633  {
9634  Result->ValueType = Type;
9635  Result->NextVariable = _data[hash];
9636 
9637  _data[hash] = Result;
9638  }
9639 
9640  return Result;
9641  }
9642 
9643  PUGI__FN bool XPathVariableSet::Set(const Char8* Name, bool Value)
9644  {
9645  XPathVariable* var = Add(Name, XPathTypeBoolean);
9646  return var ? var->Set(Value) : false;
9647  }
9648 
9649  PUGI__FN bool XPathVariableSet::Set(const Char8* Name, double Value)
9650  {
9651  XPathVariable* var = Add(Name, XPathTypeNumber);
9652  return var ? var->Set(Value) : false;
9653  }
9654 
9655  PUGI__FN bool XPathVariableSet::Set(const Char8* Name, const Char8* Value)
9656  {
9657  XPathVariable* var = Add(Name, XPathTypeString);
9658  return var ? var->Set(Value) : false;
9659  }
9660 
9661  PUGI__FN bool XPathVariableSet::Set(const Char8* Name, const XPathNodeSet& Value)
9662  {
9663  XPathVariable* var = Add(Name, XPathTypeNodeSet);
9664  return var ? var->Set(Value) : false;
9665  }
9666 
9667  PUGI__FN XPathVariable* XPathVariableSet::Get(const Char8* Name)
9668  {
9669  return Find(Name);
9670  }
9671 
9672  PUGI__FN const XPathVariable* XPathVariableSet::Get(const Char8* Name) const
9673  {
9674  return Find(Name);
9675  }
9676 
9677  PUGI__FN XPathQuery::XPathQuery(const Char8* query, XPathVariableSet* variables): QueryImplementation(0)
9678  {
9679  internal::XPathQueryImpl* qimpl = internal::XPathQueryImpl::create();
9680 
9681  if (!qimpl)
9682  {
9683  throw std::bad_alloc();
9684  }
9685  else
9686  {
9687  internal::buffer_holder impl_holder(qimpl, internal::XPathQueryImpl::destroy);
9688 
9689  qimpl->GetRoot = internal::XPathParser::parse(query, variables, &qimpl->alloc, &ResultCache);
9690 
9691  if (qimpl->GetRoot)
9692  {
9693  QueryImplementation = static_cast<internal::XPathQueryImpl*>(impl_holder.release());
9694  ResultCache.error = 0;
9695  }
9696  }
9697  }
9698 
9699  PUGI__FN XPathQuery::~XPathQuery()
9700  {
9701  internal::XPathQueryImpl::destroy(QueryImplementation);
9702  }
9703 
9704  PUGI__FN XPathValueType XPathQuery::ReturnType() const
9705  {
9706  if (!QueryImplementation) return XPathTypeNone;
9707 
9708  return static_cast<internal::XPathQueryImpl*>(QueryImplementation)->GetRoot->retType();
9709  }
9710 
9711  PUGI__FN bool XPathQuery::EvaluateBoolean(const XPathNode& n) const
9712  {
9713  if (!QueryImplementation) return false;
9714 
9715  internal::XPathContext c(n, 1, 1);
9716  internal::XPathStackData sd;
9717 
9718  return static_cast<internal::XPathQueryImpl*>(QueryImplementation)->GetRoot->eval_boolean(c, sd.stack);
9719  }
9720 
9721  PUGI__FN double XPathQuery::EvaluateNumber(const XPathNode& n) const
9722  {
9723  if (!QueryImplementation) return internal::gen_nan();
9724 
9725  internal::XPathContext c(n, 1, 1);
9726  internal::XPathStackData sd;
9727 
9728  return static_cast<internal::XPathQueryImpl*>(QueryImplementation)->GetRoot->eval_number(c, sd.stack);
9729  }
9730 
9731  PUGI__FN String XPathQuery::EvaluateString(const XPathNode& n) const
9732  {
9733  internal::XPathStackData sd;
9734 
9735  return internal::EvaluateString_impl(static_cast<internal::XPathQueryImpl*>(QueryImplementation), n, sd).c_str();
9736  }
9737 
9738  PUGI__FN size_t XPathQuery::EvaluateString(Char8* buffer, size_t capacity, const XPathNode& n) const
9739  {
9740  internal::XPathStackData sd;
9741 
9742  internal::XPathString r = internal::EvaluateString_impl(static_cast<internal::XPathQueryImpl*>(QueryImplementation), n, sd);
9743 
9744  size_t full_size = r.length() + 1;
9745 
9746  if (capacity > 0)
9747  {
9748  size_t size = (full_size < capacity) ? full_size : capacity;
9749  assert(size > 0);
9750 
9751  memcpy(buffer, r.c_str(), (size - 1) * sizeof(Char8));
9752  buffer[size - 1] = 0;
9753  }
9754 
9755  return full_size;
9756  }
9757 
9758  PUGI__FN XPathNodeSet XPathQuery::EvaluateNodeSet(const XPathNode& n) const
9759  {
9760  if (!QueryImplementation) return XPathNodeSet();
9761 
9762  internal::XPathAstNode* GetRoot = static_cast<internal::XPathQueryImpl*>(QueryImplementation)->GetRoot;
9763 
9764  if (GetRoot->retType() != XPathTypeNodeSet)
9765  {
9766  XPathParseResult res;
9767  res.error = "Expression does not evaluate to node set";
9768 
9769  String ErrorMessage(String(res.Description()) + "\nError:" + res.error + "\nAt Offset: " + ToString(res.Offset));
9770 
9771  MEZZ_EXCEPTION(Exception::SYNTAX_ERROR_EXCEPTION_XPATH,(ErrorMessage.c_str()));
9772  }
9773 
9774  internal::XPathContext c(n, 1, 1);
9775  internal::XPathStackData sd;
9776 
9777 
9778  internal::XPathNodeSet_raw r = GetRoot->eval_NodeSet(c, sd.stack);
9779 
9780  return XPathNodeSet(r.begin(), r.end(), r.Type());
9781  }
9782 
9783  PUGI__FN const XPathParseResult& XPathQuery::Result() const
9784  {
9785  return ResultCache;
9786  }
9787 
9788  PUGI__FN static void unspecified_bool_XPathQuery(XPathQuery***)
9789  {
9790  }
9791 
9792  PUGI__FN XPathQuery::operator XPathQuery::unspecified_bool_type() const
9793  {
9794  return QueryImplementation ? unspecified_bool_XPathQuery : 0;
9795  }
9796 
9797  PUGI__FN bool XPathQuery::operator!() const
9798  {
9799  return !QueryImplementation;
9800  }
9801 
9802  PUGI__FN XPathNode Node::FindSingleNode(const Char8* query, XPathVariableSet* variables) const
9803  {
9804  XPathQuery q(query, variables);
9805  return FindSingleNode(q);
9806  }
9807 
9808  PUGI__FN XPathNode Node::FindSingleNode(const XPathQuery& query) const
9809  {
9810  XPathNodeSet s = query.EvaluateNodeSet(*this);
9811  return s.Empty() ? XPathNode() : s.first();
9812  }
9813 
9814  PUGI__FN XPathNodeSet Node::FindNodes(const Char8* query, XPathVariableSet* variables) const
9815  {
9816  XPathQuery q(query, variables);
9817  return FindNodes(q);
9818  }
9819 
9820  PUGI__FN XPathNodeSet Node::FindNodes(const XPathQuery& query) const
9821  {
9822  return query.EvaluateNodeSet(*this);
9823  }
9824 }
9825 
9826 #ifdef __BORLANDC__
9827 # pragma option pop
9828 #endif
9829 
9830 // Intel C++ does not properly keep warning state for function templates,
9831 // so popping warning state at the end of translation unit leads to warnings in the middle.
9832 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
9833 # pragma warning(pop)
9834 #endif
9835 
9836 // Undefine all local macros (makes sure we're not leaking macros in header-only mode)
9837 #undef PUGI__NO_INLINE
9838 #undef PUGI__STATIC_ASSERT
9839 #undef PUGI__DMC_VOLATILE
9840 #undef PUGI__MSVC_CRT_VERSION
9841 #undef PUGI__NS_BEGIN
9842 #undef PUGI__NS_END
9843 #undef PUGI__FN
9844 #undef PUGI__FN_NO_INLINE
9845 #undef PUGI__IS_CHARTYPE_IMPL
9846 #undef PUGI__IS_CHARTYPE
9847 #undef PUGI__IS_CHARTYPEX
9848 #undef PUGI__SKIPWS
9849 #undef PUGI__OPTSET
9850 #undef PUGI__PUSHNODE
9851 #undef PUGI__POPNODE
9852 #undef PUGI__SCANFOR
9853 #undef PUGI__SCANWHILE
9854 #undef PUGI__ENDSEG
9855 #undef PUGI__THROW_ERROR
9856 #undef PUGI__CHECK_ERROR
9857 } // Mezzanine namespace
9858 
9859 #endif
9860 
9861 /*
9862  * Copyright (c) 2006-2012 Arseny Kapoulkine
9863  *
9864  * Permission is hereby granted, free of charge, to any person
9865  * obtaining a copy of this software and associated documentation
9866  * files (the "Software"), to deal in the Software without
9867  * restriction, including without limitation the rights to use,
9868  * copy, modify, merge, publish, distribute, sublicense, and/or sell
9869  * copies of the Software, and to permit persons to whom the
9870  * Software is furnished to do so, subject to the following
9871  * conditions:
9872  *
9873  * The above copyright notice and this permission notice shall be
9874  * included in all copies or substantial portions of the Software.
9875  *
9876  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
9877  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
9878  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
9879  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
9880  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
9881  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
9882  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
9883  * OTHER DEALINGS IN THE SOFTWARE.
9884  */
9885 
9886 /// @endcond
9887