From 1ed25ae12700816c348a039c4f761fca69d63250 Mon Sep 17 00:00:00 2001
From: plc-user <74435298+plc-user@users.noreply.github.com>
Date: Mon, 3 Oct 2022 11:13:05 +0200
Subject: [PATCH] rewrite: from Pascal to C++
---
QET_ElementScaler.cbp | 35 +
README.md | 53 +-
.../QET_ElementScaler.lpi | 0
.../QET_ElementScaler.lpr | 0
_old_pascal-version/README.md | 35 +
_old_pascal-version/scale.cmd | 37 +
_old_pascal-version/scale.sh | 35 +
compile.cmd | 7 +
compile.sh | 7 +
inc/pugixml/pugiconfig.hpp | 77 +
inc/pugixml/pugixml.cpp | 13134 ++++++++++++++++
inc/pugixml/pugixml.hpp | 1503 ++
main.cpp | 654 +
main.h | 221 +
scale.cmd | 31 +-
scale.sh | 33 +-
16 files changed, 15816 insertions(+), 46 deletions(-)
create mode 100644 QET_ElementScaler.cbp
rename QET_ElementScaler.lpi => _old_pascal-version/QET_ElementScaler.lpi (100%)
rename QET_ElementScaler.lpr => _old_pascal-version/QET_ElementScaler.lpr (100%)
create mode 100644 _old_pascal-version/README.md
create mode 100644 _old_pascal-version/scale.cmd
create mode 100644 _old_pascal-version/scale.sh
create mode 100644 compile.cmd
create mode 100755 compile.sh
create mode 100644 inc/pugixml/pugiconfig.hpp
create mode 100644 inc/pugixml/pugixml.cpp
create mode 100644 inc/pugixml/pugixml.hpp
create mode 100644 main.cpp
create mode 100644 main.h
mode change 100644 => 100755 scale.sh
diff --git a/QET_ElementScaler.cbp b/QET_ElementScaler.cbp
new file mode 100644
index 0000000..53073a9
--- /dev/null
+++ b/QET_ElementScaler.cbp
@@ -0,0 +1,35 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/README.md b/README.md
index 4c0f6a1..ef7ec18 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,10 @@
# QET_ElementScaler
QET_ElementScaler is a commandline-tool to scale QElectroTech-Elements with constant factor(s) and save the changed data to a renamed file.
-With the optional parameter "-o" or "--stdout" the scaled element is written to stdout and no new file is created.
-The optional parameter "-i" or "--stdin" is used to read the input-data from stdin and the output is forced to stdout.
+With the optional parameter "-o" or "--stdout" the changed data is written to stdout and no new file is created.
-It compiles with Lazarus 2.2.2 and FreePascal 3.2.2 on Debian/GNU Linux (unstable) and ReactOS (0.4.15-dev-4888).
+
+It compiles with GCC with C++17 activated on Debian/GNU Linux (unstable) and ReactOS (0.4.15-dev-5075).
The ReactOS-Build can also be used with Win in a cmd-window.
QET_ElementScaler can process one file at a time.
@@ -13,15 +13,44 @@ If you want to edit all files in a directory, you need to call QET_ElementScaler
You do not want to (or can't) compile the FreePascal program yourself? Download the executable file suitable for your operating system from the releases-page and use that.
Hint:
Get familiar with the command line before using this software.
-General support for using the command line for the various systems cannot be provided here.
+General support for using the command line for the various systems cannot be provided here.
+
usage:
-QET_ElementScaler [-i] [-o] <file> <scaling-factor>
-(as used in Batch-File "scale.cmd" and shell-script "scale.sh")
+QET_ElementScaler [options] <file> <scaling-factor>
+(as used in Batch-File "scale.cmd" and shell-script "scale.sh")
or:
-QET_ElementScaler [-i] [-o] [-x FactorForX] [-y FactorForY] -f FILENAME
+QET_ElementScaler [options] [-x FactorForX] [-y FactorForY] -f FILENAME
+
+
+
+
+ available options:
+ -i | --stdin input-data is read from stdin, a given filename is
+ ignored and scaled element will be written to stdout
+ -o | --stdout output will be written to stdout
+ -x VALUE or
+ --factorx VALUE factor for x-values (x, rx, width, ...)
+ -y VALUE or
+ --factory VALUE factor for y-values (y, ry, height, ...)
+ -f FILENAME or
+ --file FILENAME the file that will be used
+ -h | --help show this help
+
+
+
+ there are also some "long-opt"-only options:
+ "--RemoveAllTerminals" removes all terminals from the element
+ (useful for front-views or "thumbnails")
+ "--FlipPolyHorizontal" flips ALL Lines and polygons horizontally
+ (useful during creation of elements)
+ "--FlipPolyVertical" flips ALL Lines and polygons vertically
+ (useful during creation of elements)
+ "--OverwriteOriginal" the original file is replaced by scaled one
+ (CAUTION: Be careful with this option!)
+
examples to use data from stdin:
@@ -30,6 +59,12 @@ QET_ElementScaler -i 2.0 < ElementToScale.elmt > ScaledElement.elmt <
or use a pipe like this
cat ElementToScale.elmt | QET_ElementScaler -i 2.0 | OtherSoftwareBinary
-extended mode and long-options are also possible:
-QET_ElementScaler --stdin -x 2 -y 3 < ElementToScale.elmt > ScaledElement.elmt
+extended mode is also possible:
+QET_ElementScaler -i -x 2 -y 3 < ElementToScale.elmt > ScaledElement.elmt
+
+
+
+QET_ElementScaler uses Arseny Kapoulkine's "pugixml" (https://pugixml.org)
+to handle the content of the Element-File.
+Thank you, Arseny!
diff --git a/QET_ElementScaler.lpi b/_old_pascal-version/QET_ElementScaler.lpi
similarity index 100%
rename from QET_ElementScaler.lpi
rename to _old_pascal-version/QET_ElementScaler.lpi
diff --git a/QET_ElementScaler.lpr b/_old_pascal-version/QET_ElementScaler.lpr
similarity index 100%
rename from QET_ElementScaler.lpr
rename to _old_pascal-version/QET_ElementScaler.lpr
diff --git a/_old_pascal-version/README.md b/_old_pascal-version/README.md
new file mode 100644
index 0000000..4c0f6a1
--- /dev/null
+++ b/_old_pascal-version/README.md
@@ -0,0 +1,35 @@
+# QET_ElementScaler
+
+QET_ElementScaler is a commandline-tool to scale QElectroTech-Elements with constant factor(s) and save the changed data to a renamed file.
+With the optional parameter "-o" or "--stdout" the scaled element is written to stdout and no new file is created.
+The optional parameter "-i" or "--stdin" is used to read the input-data from stdin and the output is forced to stdout.
+
+It compiles with Lazarus 2.2.2 and FreePascal 3.2.2 on Debian/GNU Linux (unstable) and ReactOS (0.4.15-dev-4888).
+The ReactOS-Build can also be used with Win in a cmd-window.
+
+QET_ElementScaler can process one file at a time.
+If you want to edit all files in a directory, you need to call QET_ElementScaler from a batch-, cmd- or shell-script-file such as the ones available here.
+
+You do not want to (or can't) compile the FreePascal program yourself? Download the executable file suitable for your operating system from the releases-page and use that.
+Hint:
+Get familiar with the command line before using this software.
+General support for using the command line for the various systems cannot be provided here.
+
+
+usage:
+QET_ElementScaler [-i] [-o] <file> <scaling-factor>
+(as used in Batch-File "scale.cmd" and shell-script "scale.sh")
+
+or:
+QET_ElementScaler [-i] [-o] [-x FactorForX] [-y FactorForY] -f FILENAME
+
+
+examples to use data from stdin:
+QET_ElementScaler -i 2.0 < ElementToScale.elmt > ScaledElement.elmt
+
+or use a pipe like this
+cat ElementToScale.elmt | QET_ElementScaler -i 2.0 | OtherSoftwareBinary
+
+extended mode and long-options are also possible:
+QET_ElementScaler --stdin -x 2 -y 3 < ElementToScale.elmt > ScaledElement.elmt
+
diff --git a/_old_pascal-version/scale.cmd b/_old_pascal-version/scale.cmd
new file mode 100644
index 0000000..1d4691b
--- /dev/null
+++ b/_old_pascal-version/scale.cmd
@@ -0,0 +1,37 @@
+@echo skaliere QET - Element-Dateien ...
+
+@echo off & setlocal
+
+rem Wo liegt das Programm?
+rem Where does the Program lie?
+set "ProgPfad=C:\Tools\QET_ElementScaler"
+
+rem Wo liegen die zu skalierenden Element-Dateien?
+rem Path to the directory of element-files to be scaled:
+set "DataPfad=Z:\win\Elemente"
+
+rem Welcher Faktor soll angewendet werden?
+set "Skalierung=0.9"
+
+
+rem ------ Ab hier nichts mehr ändern! ------
+rem ------ No changes below this line! ------
+
+
+rem Findet rekursiv alle passenden Dateien:
+@for /r "%DataPfad%" %%i in ("*.elmt") do (
+ rem gefundene Dateien bearbeiten:
+ "%ProgPfad%\QET_ElementScaler.exe" "%%i" %Skalierung%
+ )
+
+
+rem Rekursiv den XML-Header entfernen:
+@for /r "%DataPfad%" %%i in ("*.SCALED.elmt") do (
+ rem gefundene Dateien bearbeiten:
+ more +1 < "%%i" > "%%i.txt"
+ rem und wieder umbenennen:
+ move /Y "%%i.txt" "%%i"
+ )
+
+
+@echo + + + Fertig! + + +
diff --git a/_old_pascal-version/scale.sh b/_old_pascal-version/scale.sh
new file mode 100644
index 0000000..6278800
--- /dev/null
+++ b/_old_pascal-version/scale.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+# scales QET - Element-files in the directory and all sub-dirs
+# by the factor given here:
+
+Skalierung="0.5"
+SkalierProg="/usr/local/bin/QET_ElementScaler"
+
+# um Leerzeichen in Dateinamen verarbeiten zu können:
+# Originalzustand merken:
+OFS=$IFS
+# Einstellen:
+IFS="
+"
+
+for i in `find . -name "*.elmt"` ; do
+ echo "processing $i"
+ # Grafik skalieren:
+ "$SkalierProg" "$i" "$Skalierung"
+ done
+
+for i in `find . -name "*.SCALED.elmt"` ; do
+ echo "processing $i"
+ # "xml version" entfernen
+ grep -v -i "xml version" "$i" > /tmp/tempfile.elmt
+ # Dateiname wieder herstellen
+ cat /tmp/tempfile.elmt > "$i"
+ done
+
+# aufräumen:
+rm -r /tmp/tempfile.elmt
+
+# Leerzeichen in Dateinamen - wieder Originalzustand:
+IFS=$OFS
+# FERTIG!
diff --git a/compile.cmd b/compile.cmd
new file mode 100644
index 0000000..2dbe2ff
--- /dev/null
+++ b/compile.cmd
@@ -0,0 +1,7 @@
+rem compile with GCC in "path"
+
+g++.exe -Wall -O2 -std=c++17 -c inc\pugixml\pugixml.cpp -o obj\inc\pugixml\pugixml.o
+g++.exe -Wall -O2 -std=c++17 -c main.cpp -o obj\main.o
+g++.exe -o QET_ElementScaler.exe obj\inc\pugixml\pugixml.o obj\main.o -s
+
+pause
diff --git a/compile.sh b/compile.sh
new file mode 100755
index 0000000..dd423ed
--- /dev/null
+++ b/compile.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+# compile QET_ElementScaler
+
+g++ -Wall -O2 std=c++17 -c inc/pugixml/pugixml.cpp -o obj/inc/pugixml/pugixml.o
+g++ -Wall -O2 std=c++17 -c main.cpp -o obj/main.o
+g++ -o QET_ElementScaler obj/inc/pugixml/pugixml.o obj/main.o -s
diff --git a/inc/pugixml/pugiconfig.hpp b/inc/pugixml/pugiconfig.hpp
new file mode 100644
index 0000000..0713b0e
--- /dev/null
+++ b/inc/pugixml/pugiconfig.hpp
@@ -0,0 +1,77 @@
+/**
+ * pugixml parser - version 1.12
+ * --------------------------------------------------------
+ * Copyright (C) 2006-2022, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
+ * Report bugs and download new versions at https://pugixml.org/
+ *
+ * This library is distributed under the MIT License. See notice at the end
+ * of this file.
+ *
+ * This work is based on the pugxml parser, which is:
+ * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
+ */
+
+#ifndef HEADER_PUGICONFIG_HPP
+#define HEADER_PUGICONFIG_HPP
+
+// Uncomment this to enable wchar_t mode
+// #define PUGIXML_WCHAR_MODE
+
+// Uncomment this to enable compact mode
+// #define PUGIXML_COMPACT
+
+// Uncomment this to disable XPath
+// #define PUGIXML_NO_XPATH
+
+// Uncomment this to disable STL
+// #define PUGIXML_NO_STL
+
+// Uncomment this to disable exceptions
+// #define PUGIXML_NO_EXCEPTIONS
+
+// Set this to control attributes for public classes/functions, i.e.:
+// #define PUGIXML_API __declspec(dllexport) // to export all public symbols from DLL
+// #define PUGIXML_CLASS __declspec(dllimport) // to import all classes from DLL
+// #define PUGIXML_FUNCTION __fastcall // to set calling conventions to all public functions to fastcall
+// In absence of PUGIXML_CLASS/PUGIXML_FUNCTION definitions PUGIXML_API is used instead
+
+// Tune these constants to adjust memory-related behavior
+// #define PUGIXML_MEMORY_PAGE_SIZE 32768
+// #define PUGIXML_MEMORY_OUTPUT_STACK 10240
+// #define PUGIXML_MEMORY_XPATH_PAGE_SIZE 4096
+
+// Tune this constant to adjust max nesting for XPath queries
+// #define PUGIXML_XPATH_DEPTH_LIMIT 1024
+
+// Uncomment this to switch to header-only version
+// #define PUGIXML_HEADER_ONLY
+
+// Uncomment this to enable long long support
+// #define PUGIXML_HAS_LONG_LONG
+
+#endif
+
+/**
+ * Copyright (c) 2006-2022 Arseny Kapoulkine
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
diff --git a/inc/pugixml/pugixml.cpp b/inc/pugixml/pugixml.cpp
new file mode 100644
index 0000000..dce9a13
--- /dev/null
+++ b/inc/pugixml/pugixml.cpp
@@ -0,0 +1,13134 @@
+/**
+ * pugixml parser - version 1.12
+ * --------------------------------------------------------
+ * Copyright (C) 2006-2022, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
+ * Report bugs and download new versions at https://pugixml.org/
+ *
+ * This library is distributed under the MIT License. See notice at the end
+ * of this file.
+ *
+ * This work is based on the pugxml parser, which is:
+ * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
+ */
+
+#ifndef SOURCE_PUGIXML_CPP
+#define SOURCE_PUGIXML_CPP
+
+#include "pugixml.hpp"
+
+#include
+#include
+#include
+#include
+#include
+
+#ifdef PUGIXML_WCHAR_MODE
+# include
+#endif
+
+#ifndef PUGIXML_NO_XPATH
+# include
+# include
+#endif
+
+#ifndef PUGIXML_NO_STL
+# include
+# include
+# include
+#endif
+
+// For placement new
+#include
+
+#ifdef _MSC_VER
+# pragma warning(push)
+# pragma warning(disable: 4127) // conditional expression is constant
+# pragma warning(disable: 4324) // structure was padded due to __declspec(align())
+# pragma warning(disable: 4702) // unreachable code
+# pragma warning(disable: 4996) // this function or variable may be unsafe
+#endif
+
+#if defined(_MSC_VER) && defined(__c2__)
+# pragma clang diagnostic push
+# pragma clang diagnostic ignored "-Wdeprecated" // this function or variable may be unsafe
+#endif
+
+#ifdef __INTEL_COMPILER
+# pragma warning(disable: 177) // function was declared but never referenced
+# pragma warning(disable: 279) // controlling expression is constant
+# pragma warning(disable: 1478 1786) // function was declared "deprecated"
+# pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
+#endif
+
+#if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
+# pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
+#endif
+
+#ifdef __BORLANDC__
+# pragma option push
+# pragma warn -8008 // condition is always false
+# pragma warn -8066 // unreachable code
+#endif
+
+#ifdef __SNC__
+// Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
+# pragma diag_suppress=178 // function was declared but never referenced
+# pragma diag_suppress=237 // controlling expression is constant
+#endif
+
+#ifdef __TI_COMPILER_VERSION__
+# pragma diag_suppress 179 // function was declared but never referenced
+#endif
+
+// Inlining controls
+#if defined(_MSC_VER) && _MSC_VER >= 1300
+# define PUGI__NO_INLINE __declspec(noinline)
+#elif defined(__GNUC__)
+# define PUGI__NO_INLINE __attribute__((noinline))
+#else
+# define PUGI__NO_INLINE
+#endif
+
+// Branch weight controls
+#if defined(__GNUC__) && !defined(__c2__)
+# define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
+#else
+# define PUGI__UNLIKELY(cond) (cond)
+#endif
+
+// Simple static assertion
+#define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
+
+// Digital Mars C++ bug workaround for passing char loaded from memory via stack
+#ifdef __DMC__
+# define PUGI__DMC_VOLATILE volatile
+#else
+# define PUGI__DMC_VOLATILE
+#endif
+
+// Integer sanitizer workaround; we only apply this for clang since gcc8 has no_sanitize but not unsigned-integer-overflow and produces "attribute directive ignored" warnings
+#if defined(__clang__) && defined(__has_attribute)
+# if __has_attribute(no_sanitize)
+# define PUGI__UNSIGNED_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow")))
+# else
+# define PUGI__UNSIGNED_OVERFLOW
+# endif
+#else
+# define PUGI__UNSIGNED_OVERFLOW
+#endif
+
+// Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
+#if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
+using std::memcpy;
+using std::memmove;
+using std::memset;
+#endif
+
+// Some MinGW/GCC versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions from limits.h in some configurations
+#if defined(PUGIXML_HAS_LONG_LONG) && defined(__GNUC__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX)
+# define LLONG_MIN (-LLONG_MAX - 1LL)
+# define LLONG_MAX __LONG_LONG_MAX__
+# define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL)
+#endif
+
+// In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
+#if defined(_MSC_VER) && !defined(__S3E__) && !defined(_WIN32_WCE)
+# define PUGI__MSVC_CRT_VERSION _MSC_VER
+#elif defined(_WIN32_WCE)
+# define PUGI__MSVC_CRT_VERSION 1310 // MSVC7.1
+#endif
+
+// Not all platforms have snprintf; we define a wrapper that uses snprintf if possible. This only works with buffers with a known size.
+#if __cplusplus >= 201103
+# define PUGI__SNPRINTF(buf, ...) snprintf(buf, sizeof(buf), __VA_ARGS__)
+#elif defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
+# define PUGI__SNPRINTF(buf, ...) _snprintf_s(buf, _countof(buf), _TRUNCATE, __VA_ARGS__)
+#else
+# define PUGI__SNPRINTF sprintf
+#endif
+
+// We put implementation details into an anonymous namespace in source mode, but have to keep it in non-anonymous namespace in header-only mode to prevent binary bloat.
+#ifdef PUGIXML_HEADER_ONLY
+# define PUGI__NS_BEGIN namespace pugi { namespace impl {
+# define PUGI__NS_END } }
+# define PUGI__FN inline
+# define PUGI__FN_NO_INLINE inline
+#else
+# if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
+# define PUGI__NS_BEGIN namespace pugi { namespace impl {
+# define PUGI__NS_END } }
+# else
+# define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
+# define PUGI__NS_END } } }
+# endif
+# define PUGI__FN
+# define PUGI__FN_NO_INLINE PUGI__NO_INLINE
+#endif
+
+// uintptr_t
+#if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561)
+namespace pugi
+{
+# ifndef _UINTPTR_T_DEFINED
+ typedef size_t uintptr_t;
+# endif
+
+ typedef unsigned __int8 uint8_t;
+ typedef unsigned __int16 uint16_t;
+ typedef unsigned __int32 uint32_t;
+}
+#else
+# include
+#endif
+
+// Memory allocation
+PUGI__NS_BEGIN
+ PUGI__FN void* default_allocate(size_t size)
+ {
+ return malloc(size);
+ }
+
+ PUGI__FN void default_deallocate(void* ptr)
+ {
+ free(ptr);
+ }
+
+ template
+ struct xml_memory_management_function_storage
+ {
+ static allocation_function allocate;
+ static deallocation_function deallocate;
+ };
+
+ // Global allocation functions are stored in class statics so that in header mode linker deduplicates them
+ // Without a template<> we'll get multiple definitions of the same static
+ template allocation_function xml_memory_management_function_storage::allocate = default_allocate;
+ template deallocation_function xml_memory_management_function_storage::deallocate = default_deallocate;
+
+ typedef xml_memory_management_function_storage xml_memory;
+PUGI__NS_END
+
+// String utilities
+PUGI__NS_BEGIN
+ // Get string length
+ PUGI__FN size_t strlength(const char_t* s)
+ {
+ assert(s);
+
+ #ifdef PUGIXML_WCHAR_MODE
+ return wcslen(s);
+ #else
+ return strlen(s);
+ #endif
+ }
+
+ // Compare two strings
+ PUGI__FN bool strequal(const char_t* src, const char_t* dst)
+ {
+ assert(src && dst);
+
+ #ifdef PUGIXML_WCHAR_MODE
+ return wcscmp(src, dst) == 0;
+ #else
+ return strcmp(src, dst) == 0;
+ #endif
+ }
+
+ // Compare lhs with [rhs_begin, rhs_end)
+ PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
+ {
+ for (size_t i = 0; i < count; ++i)
+ if (lhs[i] != rhs[i])
+ return false;
+
+ return lhs[count] == 0;
+ }
+
+ // Get length of wide string, even if CRT lacks wide character support
+ PUGI__FN size_t strlength_wide(const wchar_t* s)
+ {
+ assert(s);
+
+ #ifdef PUGIXML_WCHAR_MODE
+ return wcslen(s);
+ #else
+ const wchar_t* end = s;
+ while (*end) end++;
+ return static_cast(end - s);
+ #endif
+ }
+PUGI__NS_END
+
+// auto_ptr-like object for exception recovery
+PUGI__NS_BEGIN
+ template struct auto_deleter
+ {
+ typedef void (*D)(T*);
+
+ T* data;
+ D deleter;
+
+ auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
+ {
+ }
+
+ ~auto_deleter()
+ {
+ if (data) deleter(data);
+ }
+
+ T* release()
+ {
+ T* result = data;
+ data = 0;
+ return result;
+ }
+ };
+PUGI__NS_END
+
+#ifdef PUGIXML_COMPACT
+PUGI__NS_BEGIN
+ class compact_hash_table
+ {
+ public:
+ compact_hash_table(): _items(0), _capacity(0), _count(0)
+ {
+ }
+
+ void clear()
+ {
+ if (_items)
+ {
+ xml_memory::deallocate(_items);
+ _items = 0;
+ _capacity = 0;
+ _count = 0;
+ }
+ }
+
+ void* find(const void* key)
+ {
+ if (_capacity == 0) return 0;
+
+ item_t* item = get_item(key);
+ assert(item);
+ assert(item->key == key || (item->key == 0 && item->value == 0));
+
+ return item->value;
+ }
+
+ void insert(const void* key, void* value)
+ {
+ assert(_capacity != 0 && _count < _capacity - _capacity / 4);
+
+ item_t* item = get_item(key);
+ assert(item);
+
+ if (item->key == 0)
+ {
+ _count++;
+ item->key = key;
+ }
+
+ item->value = value;
+ }
+
+ bool reserve(size_t extra = 16)
+ {
+ if (_count + extra >= _capacity - _capacity / 4)
+ return rehash(_count + extra);
+
+ return true;
+ }
+
+ private:
+ struct item_t
+ {
+ const void* key;
+ void* value;
+ };
+
+ item_t* _items;
+ size_t _capacity;
+
+ size_t _count;
+
+ bool rehash(size_t count);
+
+ item_t* get_item(const void* key)
+ {
+ assert(key);
+ assert(_capacity > 0);
+
+ size_t hashmod = _capacity - 1;
+ size_t bucket = hash(key) & hashmod;
+
+ for (size_t probe = 0; probe <= hashmod; ++probe)
+ {
+ item_t& probe_item = _items[bucket];
+
+ if (probe_item.key == key || probe_item.key == 0)
+ return &probe_item;
+
+ // hash collision, quadratic probing
+ bucket = (bucket + probe + 1) & hashmod;
+ }
+
+ assert(false && "Hash table is full"); // unreachable
+ return 0;
+ }
+
+ static PUGI__UNSIGNED_OVERFLOW unsigned int hash(const void* key)
+ {
+ unsigned int h = static_cast(reinterpret_cast(key) & 0xffffffff);
+
+ // MurmurHash3 32-bit finalizer
+ h ^= h >> 16;
+ h *= 0x85ebca6bu;
+ h ^= h >> 13;
+ h *= 0xc2b2ae35u;
+ h ^= h >> 16;
+
+ return h;
+ }
+ };
+
+ PUGI__FN_NO_INLINE bool compact_hash_table::rehash(size_t count)
+ {
+ size_t capacity = 32;
+ while (count >= capacity - capacity / 4)
+ capacity *= 2;
+
+ compact_hash_table rt;
+ rt._capacity = capacity;
+ rt._items = static_cast(xml_memory::allocate(sizeof(item_t) * capacity));
+
+ if (!rt._items)
+ return false;
+
+ memset(rt._items, 0, sizeof(item_t) * capacity);
+
+ for (size_t i = 0; i < _capacity; ++i)
+ if (_items[i].key)
+ rt.insert(_items[i].key, _items[i].value);
+
+ if (_items)
+ xml_memory::deallocate(_items);
+
+ _capacity = capacity;
+ _items = rt._items;
+
+ assert(_count == rt._count);
+
+ return true;
+ }
+
+PUGI__NS_END
+#endif
+
+PUGI__NS_BEGIN
+#ifdef PUGIXML_COMPACT
+ static const uintptr_t xml_memory_block_alignment = 4;
+#else
+ static const uintptr_t xml_memory_block_alignment = sizeof(void*);
+#endif
+
+ // extra metadata bits
+ static const uintptr_t xml_memory_page_contents_shared_mask = 64;
+ static const uintptr_t xml_memory_page_name_allocated_mask = 32;
+ static const uintptr_t xml_memory_page_value_allocated_mask = 16;
+ static const uintptr_t xml_memory_page_type_mask = 15;
+
+ // combined masks for string uniqueness
+ static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
+ static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
+
+#ifdef PUGIXML_COMPACT
+ #define PUGI__GETHEADER_IMPL(object, page, flags) // unused
+ #define PUGI__GETPAGE_IMPL(header) (header).get_page()
+#else
+ #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast(object) - reinterpret_cast(page)) << 8) | (flags))
+ // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
+ #define PUGI__GETPAGE_IMPL(header) static_cast(const_cast(static_cast(reinterpret_cast(&header) - (header >> 8))))
+#endif
+
+ #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
+ #define PUGI__NODETYPE(n) static_cast((n)->header & impl::xml_memory_page_type_mask)
+
+ struct xml_allocator;
+
+ struct xml_memory_page
+ {
+ static xml_memory_page* construct(void* memory)
+ {
+ xml_memory_page* result = static_cast(memory);
+
+ result->allocator = 0;
+ result->prev = 0;
+ result->next = 0;
+ result->busy_size = 0;
+ result->freed_size = 0;
+
+ #ifdef PUGIXML_COMPACT
+ result->compact_string_base = 0;
+ result->compact_shared_parent = 0;
+ result->compact_page_marker = 0;
+ #endif
+
+ return result;
+ }
+
+ xml_allocator* allocator;
+
+ xml_memory_page* prev;
+ xml_memory_page* next;
+
+ size_t busy_size;
+ size_t freed_size;
+
+ #ifdef PUGIXML_COMPACT
+ char_t* compact_string_base;
+ void* compact_shared_parent;
+ uint32_t* compact_page_marker;
+ #endif
+ };
+
+ static const size_t xml_memory_page_size =
+ #ifdef PUGIXML_MEMORY_PAGE_SIZE
+ (PUGIXML_MEMORY_PAGE_SIZE)
+ #else
+ 32768
+ #endif
+ - sizeof(xml_memory_page);
+
+ struct xml_memory_string_header
+ {
+ uint16_t page_offset; // offset from page->data
+ uint16_t full_size; // 0 if string occupies whole page
+ };
+
+ struct xml_allocator
+ {
+ xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
+ {
+ #ifdef PUGIXML_COMPACT
+ _hash = 0;
+ #endif
+ }
+
+ xml_memory_page* allocate_page(size_t data_size)
+ {
+ size_t size = sizeof(xml_memory_page) + data_size;
+
+ // allocate block with some alignment, leaving memory for worst-case padding
+ void* memory = xml_memory::allocate(size);
+ if (!memory) return 0;
+
+ // prepare page structure
+ xml_memory_page* page = xml_memory_page::construct(memory);
+ assert(page);
+
+ assert(this == _root->allocator);
+ page->allocator = this;
+
+ return page;
+ }
+
+ static void deallocate_page(xml_memory_page* page)
+ {
+ xml_memory::deallocate(page);
+ }
+
+ void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
+
+ void* allocate_memory(size_t size, xml_memory_page*& out_page)
+ {
+ if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
+ return allocate_memory_oob(size, out_page);
+
+ void* buf = reinterpret_cast(_root) + sizeof(xml_memory_page) + _busy_size;
+
+ _busy_size += size;
+
+ out_page = _root;
+
+ return buf;
+ }
+
+ #ifdef PUGIXML_COMPACT
+ void* allocate_object(size_t size, xml_memory_page*& out_page)
+ {
+ void* result = allocate_memory(size + sizeof(uint32_t), out_page);
+ if (!result) return 0;
+
+ // adjust for marker
+ ptrdiff_t offset = static_cast(result) - reinterpret_cast(out_page->compact_page_marker);
+
+ if (PUGI__UNLIKELY(static_cast(offset) >= 256 * xml_memory_block_alignment))
+ {
+ // insert new marker
+ uint32_t* marker = static_cast(result);
+
+ *marker = static_cast(reinterpret_cast(marker) - reinterpret_cast(out_page));
+ out_page->compact_page_marker = marker;
+
+ // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
+ // this will make sure deallocate_memory correctly tracks the size
+ out_page->freed_size += sizeof(uint32_t);
+
+ return marker + 1;
+ }
+ else
+ {
+ // roll back uint32_t part
+ _busy_size -= sizeof(uint32_t);
+
+ return result;
+ }
+ }
+ #else
+ void* allocate_object(size_t size, xml_memory_page*& out_page)
+ {
+ return allocate_memory(size, out_page);
+ }
+ #endif
+
+ void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
+ {
+ if (page == _root) page->busy_size = _busy_size;
+
+ assert(ptr >= reinterpret_cast(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast(page) + sizeof(xml_memory_page) + page->busy_size);
+ (void)!ptr;
+
+ page->freed_size += size;
+ assert(page->freed_size <= page->busy_size);
+
+ if (page->freed_size == page->busy_size)
+ {
+ if (page->next == 0)
+ {
+ assert(_root == page);
+
+ // top page freed, just reset sizes
+ page->busy_size = 0;
+ page->freed_size = 0;
+
+ #ifdef PUGIXML_COMPACT
+ // reset compact state to maximize efficiency
+ page->compact_string_base = 0;
+ page->compact_shared_parent = 0;
+ page->compact_page_marker = 0;
+ #endif
+
+ _busy_size = 0;
+ }
+ else
+ {
+ assert(_root != page);
+ assert(page->prev);
+
+ // remove from the list
+ page->prev->next = page->next;
+ page->next->prev = page->prev;
+
+ // deallocate
+ deallocate_page(page);
+ }
+ }
+ }
+
+ char_t* allocate_string(size_t length)
+ {
+ static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
+
+ PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
+
+ // allocate memory for string and header block
+ size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
+
+ // round size up to block alignment boundary
+ size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
+
+ xml_memory_page* page;
+ xml_memory_string_header* header = static_cast(allocate_memory(full_size, page));
+
+ if (!header) return 0;
+
+ // setup header
+ ptrdiff_t page_offset = reinterpret_cast(header) - reinterpret_cast(page) - sizeof(xml_memory_page);
+
+ assert(page_offset % xml_memory_block_alignment == 0);
+ assert(page_offset >= 0 && static_cast(page_offset) < max_encoded_offset);
+ header->page_offset = static_cast(static_cast(page_offset) / xml_memory_block_alignment);
+
+ // full_size == 0 for large strings that occupy the whole page
+ assert(full_size % xml_memory_block_alignment == 0);
+ assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
+ header->full_size = static_cast(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
+
+ // round-trip through void* to avoid 'cast increases required alignment of target type' warning
+ // header is guaranteed a pointer-sized alignment, which should be enough for char_t
+ return static_cast(static_cast(header + 1));
+ }
+
+ void deallocate_string(char_t* string)
+ {
+ // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
+ // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
+
+ // get header
+ xml_memory_string_header* header = static_cast(static_cast(string)) - 1;
+ assert(header);
+
+ // deallocate
+ size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
+ xml_memory_page* page = reinterpret_cast(static_cast(reinterpret_cast(header) - page_offset));
+
+ // if full_size == 0 then this string occupies the whole page
+ size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
+
+ deallocate_memory(header, full_size, page);
+ }
+
+ bool reserve()
+ {
+ #ifdef PUGIXML_COMPACT
+ return _hash->reserve();
+ #else
+ return true;
+ #endif
+ }
+
+ xml_memory_page* _root;
+ size_t _busy_size;
+
+ #ifdef PUGIXML_COMPACT
+ compact_hash_table* _hash;
+ #endif
+ };
+
+ PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
+ {
+ const size_t large_allocation_threshold = xml_memory_page_size / 4;
+
+ xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
+ out_page = page;
+
+ if (!page) return 0;
+
+ if (size <= large_allocation_threshold)
+ {
+ _root->busy_size = _busy_size;
+
+ // insert page at the end of linked list
+ page->prev = _root;
+ _root->next = page;
+ _root = page;
+
+ _busy_size = size;
+ }
+ else
+ {
+ // insert page before the end of linked list, so that it is deleted as soon as possible
+ // the last page is not deleted even if it's empty (see deallocate_memory)
+ assert(_root->prev);
+
+ page->prev = _root->prev;
+ page->next = _root;
+
+ _root->prev->next = page;
+ _root->prev = page;
+
+ page->busy_size = size;
+ }
+
+ return reinterpret_cast(page) + sizeof(xml_memory_page);
+ }
+PUGI__NS_END
+
+#ifdef PUGIXML_COMPACT
+PUGI__NS_BEGIN
+ static const uintptr_t compact_alignment_log2 = 2;
+ static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
+
+ class compact_header
+ {
+ public:
+ compact_header(xml_memory_page* page, unsigned int flags)
+ {
+ PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
+
+ ptrdiff_t offset = (reinterpret_cast(this) - reinterpret_cast(page->compact_page_marker));
+ assert(offset % compact_alignment == 0 && static_cast(offset) < 256 * compact_alignment);
+
+ _page = static_cast(offset >> compact_alignment_log2);
+ _flags = static_cast(flags);
+ }
+
+ void operator&=(uintptr_t mod)
+ {
+ _flags &= static_cast(mod);
+ }
+
+ void operator|=(uintptr_t mod)
+ {
+ _flags |= static_cast(mod);
+ }
+
+ uintptr_t operator&(uintptr_t mod) const
+ {
+ return _flags & mod;
+ }
+
+ xml_memory_page* get_page() const
+ {
+ // round-trip through void* to silence 'cast increases required alignment of target type' warnings
+ const char* page_marker = reinterpret_cast(this) - (_page << compact_alignment_log2);
+ const char* page = page_marker - *reinterpret_cast(static_cast(page_marker));
+
+ return const_cast(reinterpret_cast(static_cast(page)));
+ }
+
+ private:
+ unsigned char _page;
+ unsigned char _flags;
+ };
+
+ PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
+ {
+ const compact_header* header = reinterpret_cast(static_cast(object) - header_offset);
+
+ return header->get_page();
+ }
+
+ template PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
+ {
+ return static_cast(compact_get_page(object, header_offset)->allocator->_hash->find(object));
+ }
+
+ template PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
+ {
+ compact_get_page(object, header_offset)->allocator->_hash->insert(object, value);
+ }
+
+ template class compact_pointer
+ {
+ public:
+ compact_pointer(): _data(0)
+ {
+ }
+
+ void operator=(const compact_pointer& rhs)
+ {
+ *this = rhs + 0;
+ }
+
+ void operator=(T* value)
+ {
+ if (value)
+ {
+ // value is guaranteed to be compact-aligned; 'this' is not
+ // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
+ // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
+ // compensate for arithmetic shift rounding for negative values
+ ptrdiff_t diff = reinterpret_cast(value) - reinterpret_cast(this);
+ ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
+
+ if (static_cast(offset) <= 253)
+ _data = static_cast(offset + 1);
+ else
+ {
+ compact_set_value(this, value);
+
+ _data = 255;
+ }
+ }
+ else
+ _data = 0;
+ }
+
+ operator T*() const
+ {
+ if (_data)
+ {
+ if (_data < 255)
+ {
+ uintptr_t base = reinterpret_cast(this) & ~(compact_alignment - 1);
+
+ return reinterpret_cast(base + (_data - 1 + start) * compact_alignment);
+ }
+ else
+ return compact_get_value(this);
+ }
+ else
+ return 0;
+ }
+
+ T* operator->() const
+ {
+ return *this;
+ }
+
+ private:
+ unsigned char _data;
+ };
+
+ template class compact_pointer_parent
+ {
+ public:
+ compact_pointer_parent(): _data(0)
+ {
+ }
+
+ void operator=(const compact_pointer_parent& rhs)
+ {
+ *this = rhs + 0;
+ }
+
+ void operator=(T* value)
+ {
+ if (value)
+ {
+ // value is guaranteed to be compact-aligned; 'this' is not
+ // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
+ // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
+ // compensate for arithmetic shift behavior for negative values
+ ptrdiff_t diff = reinterpret_cast(value) - reinterpret_cast(this);
+ ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
+
+ if (static_cast(offset) <= 65533)
+ {
+ _data = static_cast(offset + 1);
+ }
+ else
+ {
+ xml_memory_page* page = compact_get_page(this, header_offset);
+
+ if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
+ page->compact_shared_parent = value;
+
+ if (page->compact_shared_parent == value)
+ {
+ _data = 65534;
+ }
+ else
+ {
+ compact_set_value(this, value);
+
+ _data = 65535;
+ }
+ }
+ }
+ else
+ {
+ _data = 0;
+ }
+ }
+
+ operator T*() const
+ {
+ if (_data)
+ {
+ if (_data < 65534)
+ {
+ uintptr_t base = reinterpret_cast(this) & ~(compact_alignment - 1);
+
+ return reinterpret_cast(base + (_data - 1 - 65533) * compact_alignment);
+ }
+ else if (_data == 65534)
+ return static_cast(compact_get_page(this, header_offset)->compact_shared_parent);
+ else
+ return compact_get_value(this);
+ }
+ else
+ return 0;
+ }
+
+ T* operator->() const
+ {
+ return *this;
+ }
+
+ private:
+ uint16_t _data;
+ };
+
+ template class compact_string
+ {
+ public:
+ compact_string(): _data(0)
+ {
+ }
+
+ void operator=(const compact_string& rhs)
+ {
+ *this = rhs + 0;
+ }
+
+ void operator=(char_t* value)
+ {
+ if (value)
+ {
+ xml_memory_page* page = compact_get_page(this, header_offset);
+
+ if (PUGI__UNLIKELY(page->compact_string_base == 0))
+ page->compact_string_base = value;
+
+ ptrdiff_t offset = value - page->compact_string_base;
+
+ if (static_cast(offset) < (65535 << 7))
+ {
+ // round-trip through void* to silence 'cast increases required alignment of target type' warnings
+ uint16_t* base = reinterpret_cast(static_cast(reinterpret_cast(this) - base_offset));
+
+ if (*base == 0)
+ {
+ *base = static_cast((offset >> 7) + 1);
+ _data = static_cast((offset & 127) + 1);
+ }
+ else
+ {
+ ptrdiff_t remainder = offset - ((*base - 1) << 7);
+
+ if (static_cast(remainder) <= 253)
+ {
+ _data = static_cast(remainder + 1);
+ }
+ else
+ {
+ compact_set_value(this, value);
+
+ _data = 255;
+ }
+ }
+ }
+ else
+ {
+ compact_set_value(this, value);
+
+ _data = 255;
+ }
+ }
+ else
+ {
+ _data = 0;
+ }
+ }
+
+ operator char_t*() const
+ {
+ if (_data)
+ {
+ if (_data < 255)
+ {
+ xml_memory_page* page = compact_get_page(this, header_offset);
+
+ // round-trip through void* to silence 'cast increases required alignment of target type' warnings
+ const uint16_t* base = reinterpret_cast(static_cast(reinterpret_cast(this) - base_offset));
+ assert(*base);
+
+ ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
+
+ return page->compact_string_base + offset;
+ }
+ else
+ {
+ return compact_get_value(this);
+ }
+ }
+ else
+ return 0;
+ }
+
+ private:
+ unsigned char _data;
+ };
+PUGI__NS_END
+#endif
+
+#ifdef PUGIXML_COMPACT
+namespace pugi
+{
+ struct xml_attribute_struct
+ {
+ xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
+ {
+ PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
+ }
+
+ impl::compact_header header;
+
+ uint16_t namevalue_base;
+
+ impl::compact_string<4, 2> name;
+ impl::compact_string<5, 3> value;
+
+ impl::compact_pointer prev_attribute_c;
+ impl::compact_pointer next_attribute;
+ };
+
+ struct xml_node_struct
+ {
+ xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0)
+ {
+ PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
+ }
+
+ impl::compact_header header;
+
+ uint16_t namevalue_base;
+
+ impl::compact_string<4, 2> name;
+ impl::compact_string<5, 3> value;
+
+ impl::compact_pointer_parent parent;
+
+ impl::compact_pointer first_child;
+
+ impl::compact_pointer prev_sibling_c;
+ impl::compact_pointer next_sibling;
+
+ impl::compact_pointer first_attribute;
+ };
+}
+#else
+namespace pugi
+{
+ struct xml_attribute_struct
+ {
+ xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0)
+ {
+ header = PUGI__GETHEADER_IMPL(this, page, 0);
+ }
+
+ uintptr_t header;
+
+ char_t* name;
+ char_t* value;
+
+ xml_attribute_struct* prev_attribute_c;
+ xml_attribute_struct* next_attribute;
+ };
+
+ struct xml_node_struct
+ {
+ xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
+ {
+ header = PUGI__GETHEADER_IMPL(this, page, type);
+ }
+
+ uintptr_t header;
+
+ char_t* name;
+ char_t* value;
+
+ xml_node_struct* parent;
+
+ xml_node_struct* first_child;
+
+ xml_node_struct* prev_sibling_c;
+ xml_node_struct* next_sibling;
+
+ xml_attribute_struct* first_attribute;
+ };
+}
+#endif
+
+PUGI__NS_BEGIN
+ struct xml_extra_buffer
+ {
+ char_t* buffer;
+ xml_extra_buffer* next;
+ };
+
+ struct xml_document_struct: public xml_node_struct, public xml_allocator
+ {
+ xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
+ {
+ }
+
+ const char_t* buffer;
+
+ xml_extra_buffer* extra_buffers;
+
+ #ifdef PUGIXML_COMPACT
+ compact_hash_table hash;
+ #endif
+ };
+
+ template inline xml_allocator& get_allocator(const Object* object)
+ {
+ assert(object);
+
+ return *PUGI__GETPAGE(object)->allocator;
+ }
+
+ template inline xml_document_struct& get_document(const Object* object)
+ {
+ assert(object);
+
+ return *static_cast(PUGI__GETPAGE(object)->allocator);
+ }
+PUGI__NS_END
+
+// Low-level DOM operations
+PUGI__NS_BEGIN
+ inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
+ {
+ xml_memory_page* page;
+ void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
+ if (!memory) return 0;
+
+ return new (memory) xml_attribute_struct(page);
+ }
+
+ inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
+ {
+ xml_memory_page* page;
+ void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
+ if (!memory) return 0;
+
+ return new (memory) xml_node_struct(page, type);
+ }
+
+ inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
+ {
+ if (a->header & impl::xml_memory_page_name_allocated_mask)
+ alloc.deallocate_string(a->name);
+
+ if (a->header & impl::xml_memory_page_value_allocated_mask)
+ alloc.deallocate_string(a->value);
+
+ alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
+ }
+
+ inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
+ {
+ if (n->header & impl::xml_memory_page_name_allocated_mask)
+ alloc.deallocate_string(n->name);
+
+ if (n->header & impl::xml_memory_page_value_allocated_mask)
+ alloc.deallocate_string(n->value);
+
+ for (xml_attribute_struct* attr = n->first_attribute; attr; )
+ {
+ xml_attribute_struct* next = attr->next_attribute;
+
+ destroy_attribute(attr, alloc);
+
+ attr = next;
+ }
+
+ for (xml_node_struct* child = n->first_child; child; )
+ {
+ xml_node_struct* next = child->next_sibling;
+
+ destroy_node(child, alloc);
+
+ child = next;
+ }
+
+ alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
+ }
+
+ inline void append_node(xml_node_struct* child, xml_node_struct* node)
+ {
+ child->parent = node;
+
+ xml_node_struct* head = node->first_child;
+
+ if (head)
+ {
+ xml_node_struct* tail = head->prev_sibling_c;
+
+ tail->next_sibling = child;
+ child->prev_sibling_c = tail;
+ head->prev_sibling_c = child;
+ }
+ else
+ {
+ node->first_child = child;
+ child->prev_sibling_c = child;
+ }
+ }
+
+ inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
+ {
+ child->parent = node;
+
+ xml_node_struct* head = node->first_child;
+
+ if (head)
+ {
+ child->prev_sibling_c = head->prev_sibling_c;
+ head->prev_sibling_c = child;
+ }
+ else
+ child->prev_sibling_c = child;
+
+ child->next_sibling = head;
+ node->first_child = child;
+ }
+
+ inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
+ {
+ xml_node_struct* parent = node->parent;
+
+ child->parent = parent;
+
+ xml_node_struct* next = node->next_sibling;
+
+ if (next)
+ next->prev_sibling_c = child;
+ else
+ parent->first_child->prev_sibling_c = child;
+
+ child->next_sibling = next;
+ child->prev_sibling_c = node;
+
+ node->next_sibling = child;
+ }
+
+ inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
+ {
+ xml_node_struct* parent = node->parent;
+
+ child->parent = parent;
+
+ xml_node_struct* prev = node->prev_sibling_c;
+
+ if (prev->next_sibling)
+ prev->next_sibling = child;
+ else
+ parent->first_child = child;
+
+ child->prev_sibling_c = prev;
+ child->next_sibling = node;
+
+ node->prev_sibling_c = child;
+ }
+
+ inline void remove_node(xml_node_struct* node)
+ {
+ xml_node_struct* parent = node->parent;
+
+ xml_node_struct* next = node->next_sibling;
+ xml_node_struct* prev = node->prev_sibling_c;
+
+ if (next)
+ next->prev_sibling_c = prev;
+ else
+ parent->first_child->prev_sibling_c = prev;
+
+ if (prev->next_sibling)
+ prev->next_sibling = next;
+ else
+ parent->first_child = next;
+
+ node->parent = 0;
+ node->prev_sibling_c = 0;
+ node->next_sibling = 0;
+ }
+
+ inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
+ {
+ xml_attribute_struct* head = node->first_attribute;
+
+ if (head)
+ {
+ xml_attribute_struct* tail = head->prev_attribute_c;
+
+ tail->next_attribute = attr;
+ attr->prev_attribute_c = tail;
+ head->prev_attribute_c = attr;
+ }
+ else
+ {
+ node->first_attribute = attr;
+ attr->prev_attribute_c = attr;
+ }
+ }
+
+ inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
+ {
+ xml_attribute_struct* head = node->first_attribute;
+
+ if (head)
+ {
+ attr->prev_attribute_c = head->prev_attribute_c;
+ head->prev_attribute_c = attr;
+ }
+ else
+ attr->prev_attribute_c = attr;
+
+ attr->next_attribute = head;
+ node->first_attribute = attr;
+ }
+
+ inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
+ {
+ xml_attribute_struct* next = place->next_attribute;
+
+ if (next)
+ next->prev_attribute_c = attr;
+ else
+ node->first_attribute->prev_attribute_c = attr;
+
+ attr->next_attribute = next;
+ attr->prev_attribute_c = place;
+ place->next_attribute = attr;
+ }
+
+ inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
+ {
+ xml_attribute_struct* prev = place->prev_attribute_c;
+
+ if (prev->next_attribute)
+ prev->next_attribute = attr;
+ else
+ node->first_attribute = attr;
+
+ attr->prev_attribute_c = prev;
+ attr->next_attribute = place;
+ place->prev_attribute_c = attr;
+ }
+
+ inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
+ {
+ xml_attribute_struct* next = attr->next_attribute;
+ xml_attribute_struct* prev = attr->prev_attribute_c;
+
+ if (next)
+ next->prev_attribute_c = prev;
+ else
+ node->first_attribute->prev_attribute_c = prev;
+
+ if (prev->next_attribute)
+ prev->next_attribute = next;
+ else
+ node->first_attribute = next;
+
+ attr->prev_attribute_c = 0;
+ attr->next_attribute = 0;
+ }
+
+ PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
+ {
+ if (!alloc.reserve()) return 0;
+
+ xml_node_struct* child = allocate_node(alloc, type);
+ if (!child) return 0;
+
+ append_node(child, node);
+
+ return child;
+ }
+
+ PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
+ {
+ if (!alloc.reserve()) return 0;
+
+ xml_attribute_struct* attr = allocate_attribute(alloc);
+ if (!attr) return 0;
+
+ append_attribute(attr, node);
+
+ return attr;
+ }
+PUGI__NS_END
+
+// Helper classes for code generation
+PUGI__NS_BEGIN
+ struct opt_false
+ {
+ enum { value = 0 };
+ };
+
+ struct opt_true
+ {
+ enum { value = 1 };
+ };
+PUGI__NS_END
+
+// Unicode utilities
+PUGI__NS_BEGIN
+ inline uint16_t endian_swap(uint16_t value)
+ {
+ return static_cast(((value & 0xff) << 8) | (value >> 8));
+ }
+
+ inline uint32_t endian_swap(uint32_t value)
+ {
+ return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
+ }
+
+ struct utf8_counter
+ {
+ typedef size_t value_type;
+
+ static value_type low(value_type result, uint32_t ch)
+ {
+ // U+0000..U+007F
+ if (ch < 0x80) return result + 1;
+ // U+0080..U+07FF
+ else if (ch < 0x800) return result + 2;
+ // U+0800..U+FFFF
+ else return result + 3;
+ }
+
+ static value_type high(value_type result, uint32_t)
+ {
+ // U+10000..U+10FFFF
+ return result + 4;
+ }
+ };
+
+ struct utf8_writer
+ {
+ typedef uint8_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch)
+ {
+ // U+0000..U+007F
+ if (ch < 0x80)
+ {
+ *result = static_cast(ch);
+ return result + 1;
+ }
+ // U+0080..U+07FF
+ else if (ch < 0x800)
+ {
+ result[0] = static_cast(0xC0 | (ch >> 6));
+ result[1] = static_cast(0x80 | (ch & 0x3F));
+ return result + 2;
+ }
+ // U+0800..U+FFFF
+ else
+ {
+ result[0] = static_cast(0xE0 | (ch >> 12));
+ result[1] = static_cast(0x80 | ((ch >> 6) & 0x3F));
+ result[2] = static_cast(0x80 | (ch & 0x3F));
+ return result + 3;
+ }
+ }
+
+ static value_type high(value_type result, uint32_t ch)
+ {
+ // U+10000..U+10FFFF
+ result[0] = static_cast(0xF0 | (ch >> 18));
+ result[1] = static_cast(0x80 | ((ch >> 12) & 0x3F));
+ result[2] = static_cast(0x80 | ((ch >> 6) & 0x3F));
+ result[3] = static_cast(0x80 | (ch & 0x3F));
+ return result + 4;
+ }
+
+ static value_type any(value_type result, uint32_t ch)
+ {
+ return (ch < 0x10000) ? low(result, ch) : high(result, ch);
+ }
+ };
+
+ struct utf16_counter
+ {
+ typedef size_t value_type;
+
+ static value_type low(value_type result, uint32_t)
+ {
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t)
+ {
+ return result + 2;
+ }
+ };
+
+ struct utf16_writer
+ {
+ typedef uint16_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch)
+ {
+ *result = static_cast(ch);
+
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t ch)
+ {
+ uint32_t msh = static_cast(ch - 0x10000) >> 10;
+ uint32_t lsh = static_cast(ch - 0x10000) & 0x3ff;
+
+ result[0] = static_cast(0xD800 + msh);
+ result[1] = static_cast(0xDC00 + lsh);
+
+ return result + 2;
+ }
+
+ static value_type any(value_type result, uint32_t ch)
+ {
+ return (ch < 0x10000) ? low(result, ch) : high(result, ch);
+ }
+ };
+
+ struct utf32_counter
+ {
+ typedef size_t value_type;
+
+ static value_type low(value_type result, uint32_t)
+ {
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t)
+ {
+ return result + 1;
+ }
+ };
+
+ struct utf32_writer
+ {
+ typedef uint32_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch)
+ {
+ *result = ch;
+
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t ch)
+ {
+ *result = ch;
+
+ return result + 1;
+ }
+
+ static value_type any(value_type result, uint32_t ch)
+ {
+ *result = ch;
+
+ return result + 1;
+ }
+ };
+
+ struct latin1_writer
+ {
+ typedef uint8_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch)
+ {
+ *result = static_cast(ch > 255 ? '?' : ch);
+
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t ch)
+ {
+ (void)ch;
+
+ *result = '?';
+
+ return result + 1;
+ }
+ };
+
+ struct utf8_decoder
+ {
+ typedef uint8_t type;
+
+ template static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
+ {
+ const uint8_t utf8_byte_mask = 0x3f;
+
+ while (size)
+ {
+ uint8_t lead = *data;
+
+ // 0xxxxxxx -> U+0000..U+007F
+ if (lead < 0x80)
+ {
+ result = Traits::low(result, lead);
+ data += 1;
+ size -= 1;
+
+ // process aligned single-byte (ascii) blocks
+ if ((reinterpret_cast(data) & 3) == 0)
+ {
+ // round-trip through void* to silence 'cast increases required alignment of target type' warnings
+ while (size >= 4 && (*static_cast(static_cast(data)) & 0x80808080) == 0)
+ {
+ result = Traits::low(result, data[0]);
+ result = Traits::low(result, data[1]);
+ result = Traits::low(result, data[2]);
+ result = Traits::low(result, data[3]);
+ data += 4;
+ size -= 4;
+ }
+ }
+ }
+ // 110xxxxx -> U+0080..U+07FF
+ else if (static_cast(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
+ {
+ result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
+ data += 2;
+ size -= 2;
+ }
+ // 1110xxxx -> U+0800-U+FFFF
+ else if (static_cast(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
+ {
+ result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
+ data += 3;
+ size -= 3;
+ }
+ // 11110xxx -> U+10000..U+10FFFF
+ else if (static_cast(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
+ {
+ result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
+ data += 4;
+ size -= 4;
+ }
+ // 10xxxxxx or 11111xxx -> invalid
+ else
+ {
+ data += 1;
+ size -= 1;
+ }
+ }
+
+ return result;
+ }
+ };
+
+ template struct utf16_decoder
+ {
+ typedef uint16_t type;
+
+ template static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
+ {
+ while (size)
+ {
+ uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
+
+ // U+0000..U+D7FF
+ if (lead < 0xD800)
+ {
+ result = Traits::low(result, lead);
+ data += 1;
+ size -= 1;
+ }
+ // U+E000..U+FFFF
+ else if (static_cast(lead - 0xE000) < 0x2000)
+ {
+ result = Traits::low(result, lead);
+ data += 1;
+ size -= 1;
+ }
+ // surrogate pair lead
+ else if (static_cast(lead - 0xD800) < 0x400 && size >= 2)
+ {
+ uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
+
+ if (static_cast(next - 0xDC00) < 0x400)
+ {
+ result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
+ data += 2;
+ size -= 2;
+ }
+ else
+ {
+ data += 1;
+ size -= 1;
+ }
+ }
+ else
+ {
+ data += 1;
+ size -= 1;
+ }
+ }
+
+ return result;
+ }
+ };
+
+ template struct utf32_decoder
+ {
+ typedef uint32_t type;
+
+ template static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
+ {
+ while (size)
+ {
+ uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
+
+ // U+0000..U+FFFF
+ if (lead < 0x10000)
+ {
+ result = Traits::low(result, lead);
+ data += 1;
+ size -= 1;
+ }
+ // U+10000..U+10FFFF
+ else
+ {
+ result = Traits::high(result, lead);
+ data += 1;
+ size -= 1;
+ }
+ }
+
+ return result;
+ }
+ };
+
+ struct latin1_decoder
+ {
+ typedef uint8_t type;
+
+ template static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
+ {
+ while (size)
+ {
+ result = Traits::low(result, *data);
+ data += 1;
+ size -= 1;
+ }
+
+ return result;
+ }
+ };
+
+ template struct wchar_selector;
+
+ template <> struct wchar_selector<2>
+ {
+ typedef uint16_t type;
+ typedef utf16_counter counter;
+ typedef utf16_writer writer;
+ typedef utf16_decoder decoder;
+ };
+
+ template <> struct wchar_selector<4>
+ {
+ typedef uint32_t type;
+ typedef utf32_counter counter;
+ typedef utf32_writer writer;
+ typedef utf32_decoder decoder;
+ };
+
+ typedef wchar_selector::counter wchar_counter;
+ typedef wchar_selector::writer wchar_writer;
+
+ struct wchar_decoder
+ {
+ typedef wchar_t type;
+
+ template static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
+ {
+ typedef wchar_selector::decoder decoder;
+
+ return decoder::process(reinterpret_cast(data), size, result, traits);
+ }
+ };
+
+#ifdef PUGIXML_WCHAR_MODE
+ PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
+ {
+ for (size_t i = 0; i < length; ++i)
+ result[i] = static_cast(endian_swap(static_cast::type>(data[i])));
+ }
+#endif
+PUGI__NS_END
+
+PUGI__NS_BEGIN
+ enum chartype_t
+ {
+ ct_parse_pcdata = 1, // \0, &, \r, <
+ ct_parse_attr = 2, // \0, &, \r, ', "
+ ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
+ ct_space = 8, // \r, \n, space, tab
+ ct_parse_cdata = 16, // \0, ], >, \r
+ ct_parse_comment = 32, // \0, -, >, \r
+ ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
+ ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
+ };
+
+ static const unsigned char chartype_table[256] =
+ {
+ 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
+ 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
+ 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
+ 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
+
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
+ };
+
+ enum chartypex_t
+ {
+ ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
+ ctx_special_attr = 2, // Any symbol >= 0 and < 32, &, <, ", '
+ ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
+ ctx_digit = 8, // 0-9
+ ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
+ };
+
+ static const unsigned char chartypex_table[256] =
+ {
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, // 0-15
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
+ 0, 0, 2, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 1, 0, // 48-63
+
+ 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
+ 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
+
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+ };
+
+#ifdef PUGIXML_WCHAR_MODE
+ #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast(c) < 128 ? table[static_cast(c)] : table[128]) & (ct))
+#else
+ #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast(c)] & (ct))
+#endif
+
+ #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
+ #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
+
+ PUGI__FN bool is_little_endian()
+ {
+ unsigned int ui = 1;
+
+ return *reinterpret_cast(&ui) == 1;
+ }
+
+ PUGI__FN xml_encoding get_wchar_encoding()
+ {
+ PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
+
+ if (sizeof(wchar_t) == 2)
+ return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+ else
+ return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+ }
+
+ PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length)
+ {
+ #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; }
+ #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; }
+
+ // check if we have a non-empty XML declaration
+ if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space)))
+ return false;
+
+ // scan XML declaration until the encoding field
+ for (size_t i = 6; i + 1 < size; ++i)
+ {
+ // declaration can not contain ? in quoted values
+ if (data[i] == '?')
+ return false;
+
+ if (data[i] == 'e' && data[i + 1] == 'n')
+ {
+ size_t offset = i;
+
+ // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed
+ PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o');
+ PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g');
+
+ // S? = S?
+ PUGI__SCANCHARTYPE(ct_space);
+ PUGI__SCANCHAR('=');
+ PUGI__SCANCHARTYPE(ct_space);
+
+ // the only two valid delimiters are ' and "
+ uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\'';
+
+ PUGI__SCANCHAR(delimiter);
+
+ size_t start = offset;
+
+ out_encoding = data + offset;
+
+ PUGI__SCANCHARTYPE(ct_symbol);
+
+ out_length = offset - start;
+
+ PUGI__SCANCHAR(delimiter);
+
+ return true;
+ }
+ }
+
+ return false;
+
+ #undef PUGI__SCANCHAR
+ #undef PUGI__SCANCHARTYPE
+ }
+
+ PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size)
+ {
+ // skip encoding autodetection if input buffer is too small
+ if (size < 4) return encoding_utf8;
+
+ uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
+
+ // look for BOM in first few bytes
+ if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
+ if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
+ if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
+ if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
+ if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
+
+ // look for <, or (contents);
+
+ return guess_buffer_encoding(data, size);
+ }
+
+ PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+ {
+ size_t length = size / sizeof(char_t);
+
+ if (is_mutable)
+ {
+ out_buffer = static_cast(const_cast(contents));
+ out_length = length;
+ }
+ else
+ {
+ char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ if (contents)
+ memcpy(buffer, contents, length * sizeof(char_t));
+ else
+ assert(length == 0);
+
+ buffer[length] = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+ }
+
+ return true;
+ }
+
+#ifdef PUGIXML_WCHAR_MODE
+ PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
+ {
+ return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
+ (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
+ }
+
+ PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+ {
+ const char_t* data = static_cast(contents);
+ size_t length = size / sizeof(char_t);
+
+ if (is_mutable)
+ {
+ char_t* buffer = const_cast(data);
+
+ convert_wchar_endian_swap(buffer, data, length);
+
+ out_buffer = buffer;
+ out_length = length;
+ }
+ else
+ {
+ char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ convert_wchar_endian_swap(buffer, data, length);
+ buffer[length] = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+ }
+
+ return true;
+ }
+
+ template PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
+ {
+ const typename D::type* data = static_cast(contents);
+ size_t data_length = size / sizeof(typename D::type);
+
+ // first pass: get length in wchar_t units
+ size_t length = D::process(data, data_length, 0, wchar_counter());
+
+ // allocate buffer of suitable length
+ char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ // second pass: convert utf16 input to wchar_t
+ wchar_writer::value_type obegin = reinterpret_cast(buffer);
+ wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
+
+ assert(oend == obegin + length);
+ *oend = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+
+ return true;
+ }
+
+ PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
+ {
+ // get native encoding
+ xml_encoding wchar_encoding = get_wchar_encoding();
+
+ // fast path: no conversion required
+ if (encoding == wchar_encoding)
+ return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+
+ // only endian-swapping is required
+ if (need_endian_swap_utf(encoding, wchar_encoding))
+ return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
+
+ // source encoding is utf8
+ if (encoding == encoding_utf8)
+ return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
+
+ // source encoding is utf16
+ if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
+ {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ return (native_encoding == encoding) ?
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder()) :
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder());
+ }
+
+ // source encoding is utf32
+ if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
+ {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ return (native_encoding == encoding) ?
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder()) :
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder());
+ }
+
+ // source encoding is latin1
+ if (encoding == encoding_latin1)
+ return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
+
+ assert(false && "Invalid encoding"); // unreachable
+ return false;
+ }
+#else
+ template PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
+ {
+ const typename D::type* data = static_cast(contents);
+ size_t data_length = size / sizeof(typename D::type);
+
+ // first pass: get length in utf8 units
+ size_t length = D::process(data, data_length, 0, utf8_counter());
+
+ // allocate buffer of suitable length
+ char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ // second pass: convert utf16 input to utf8
+ uint8_t* obegin = reinterpret_cast(buffer);
+ uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
+
+ assert(oend == obegin + length);
+ *oend = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+
+ return true;
+ }
+
+ PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
+ {
+ for (size_t i = 0; i < size; ++i)
+ if (data[i] > 127)
+ return i;
+
+ return size;
+ }
+
+ PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+ {
+ const uint8_t* data = static_cast(contents);
+ size_t data_length = size;
+
+ // get size of prefix that does not need utf8 conversion
+ size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
+ assert(prefix_length <= data_length);
+
+ const uint8_t* postfix = data + prefix_length;
+ size_t postfix_length = data_length - prefix_length;
+
+ // if no conversion is needed, just return the original buffer
+ if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+
+ // first pass: get length in utf8 units
+ size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
+
+ // allocate buffer of suitable length
+ char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ // second pass: convert latin1 input to utf8
+ memcpy(buffer, data, prefix_length);
+
+ uint8_t* obegin = reinterpret_cast(buffer);
+ uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
+
+ assert(oend == obegin + length);
+ *oend = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+
+ return true;
+ }
+
+ PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
+ {
+ // fast path: no conversion required
+ if (encoding == encoding_utf8)
+ return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+
+ // source encoding is utf16
+ if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
+ {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ return (native_encoding == encoding) ?
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder()) :
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder());
+ }
+
+ // source encoding is utf32
+ if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
+ {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ return (native_encoding == encoding) ?
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder()) :
+ convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder());
+ }
+
+ // source encoding is latin1
+ if (encoding == encoding_latin1)
+ return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
+
+ assert(false && "Invalid encoding"); // unreachable
+ return false;
+ }
+#endif
+
+ PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
+ {
+ // get length in utf8 characters
+ return wchar_decoder::process(str, length, 0, utf8_counter());
+ }
+
+ PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
+ {
+ // convert to utf8
+ uint8_t* begin = reinterpret_cast(buffer);
+ uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
+
+ assert(begin + size == end);
+ (void)!end;
+ (void)!size;
+ }
+
+#ifndef PUGIXML_NO_STL
+ PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
+ {
+ // first pass: get length in utf8 characters
+ size_t size = as_utf8_begin(str, length);
+
+ // allocate resulting string
+ std::string result;
+ result.resize(size);
+
+ // second pass: convert to utf8
+ if (size > 0) as_utf8_end(&result[0], size, str, length);
+
+ return result;
+ }
+
+ PUGI__FN std::basic_string as_wide_impl(const char* str, size_t size)
+ {
+ const uint8_t* data = reinterpret_cast(str);
+
+ // first pass: get length in wchar_t units
+ size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
+
+ // allocate resulting string
+ std::basic_string result;
+ result.resize(length);
+
+ // second pass: convert to wchar_t
+ if (length > 0)
+ {
+ wchar_writer::value_type begin = reinterpret_cast(&result[0]);
+ wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
+
+ assert(begin + length == end);
+ (void)!end;
+ }
+
+ return result;
+ }
+#endif
+
+ template
+ inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
+ {
+ // never reuse shared memory
+ if (header & xml_memory_page_contents_shared_mask) return false;
+
+ size_t target_length = strlength(target);
+
+ // always reuse document buffer memory if possible
+ if ((header & header_mask) == 0) return target_length >= length;
+
+ // reuse heap memory if waste is not too great
+ const size_t reuse_threshold = 32;
+
+ return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
+ }
+
+ template
+ PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
+ {
+ if (source_length == 0)
+ {
+ // empty string and null pointer are equivalent, so just deallocate old memory
+ xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
+
+ if (header & header_mask) alloc->deallocate_string(dest);
+
+ // mark the string as not allocated
+ dest = 0;
+ header &= ~header_mask;
+
+ return true;
+ }
+ else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
+ {
+ // we can reuse old buffer, so just copy the new data (including zero terminator)
+ memcpy(dest, source, source_length * sizeof(char_t));
+ dest[source_length] = 0;
+
+ return true;
+ }
+ else
+ {
+ xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
+
+ if (!alloc->reserve()) return false;
+
+ // allocate new buffer
+ char_t* buf = alloc->allocate_string(source_length + 1);
+ if (!buf) return false;
+
+ // copy the string (including zero terminator)
+ memcpy(buf, source, source_length * sizeof(char_t));
+ buf[source_length] = 0;
+
+ // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
+ if (header & header_mask) alloc->deallocate_string(dest);
+
+ // the string is now allocated, so set the flag
+ dest = buf;
+ header |= header_mask;
+
+ return true;
+ }
+ }
+
+ struct gap
+ {
+ char_t* end;
+ size_t size;
+
+ gap(): end(0), size(0)
+ {
+ }
+
+ // Push new gap, move s count bytes further (skipping the gap).
+ // Collapse previous gap.
+ void push(char_t*& s, size_t count)
+ {
+ if (end) // there was a gap already; collapse it
+ {
+ // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
+ assert(s >= end);
+ memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end));
+ }
+
+ s += count; // end of current gap
+
+ // "merge" two gaps
+ end = s;
+ size += count;
+ }
+
+ // Collapse all gaps, return past-the-end pointer
+ char_t* flush(char_t* s)
+ {
+ if (end)
+ {
+ // Move [old_gap_end, current_pos) to [old_gap_start, ...)
+ assert(s >= end);
+ memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end));
+
+ return s - size;
+ }
+ else return s;
+ }
+ };
+
+ PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
+ {
+ char_t* stre = s + 1;
+
+ switch (*stre)
+ {
+ case '#': // ...
+ {
+ unsigned int ucsc = 0;
+
+ if (stre[1] == 'x') // ... (hex code)
+ {
+ stre += 2;
+
+ char_t ch = *stre;
+
+ if (ch == ';') return stre;
+
+ for (;;)
+ {
+ if (static_cast(ch - '0') <= 9)
+ ucsc = 16 * ucsc + (ch - '0');
+ else if (static_cast((ch | ' ') - 'a') <= 5)
+ ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
+ else if (ch == ';')
+ break;
+ else // cancel
+ return stre;
+
+ ch = *++stre;
+ }
+
+ ++stre;
+ }
+ else // ... (dec code)
+ {
+ char_t ch = *++stre;
+
+ if (ch == ';') return stre;
+
+ for (;;)
+ {
+ if (static_cast(ch - '0') <= 9)
+ ucsc = 10 * ucsc + (ch - '0');
+ else if (ch == ';')
+ break;
+ else // cancel
+ return stre;
+
+ ch = *++stre;
+ }
+
+ ++stre;
+ }
+
+ #ifdef PUGIXML_WCHAR_MODE
+ s = reinterpret_cast(wchar_writer::any(reinterpret_cast(s), ucsc));
+ #else
+ s = reinterpret_cast(utf8_writer::any(reinterpret_cast(s), ucsc));
+ #endif
+
+ g.push(s, stre - s);
+ return stre;
+ }
+
+ case 'a': // &a
+ {
+ ++stre;
+
+ if (*stre == 'm') // &am
+ {
+ if (*++stre == 'p' && *++stre == ';') // &
+ {
+ *s++ = '&';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ }
+ else if (*stre == 'p') // &ap
+ {
+ if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // '
+ {
+ *s++ = '\'';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ }
+ break;
+ }
+
+ case 'g': // &g
+ {
+ if (*++stre == 't' && *++stre == ';') // >
+ {
+ *s++ = '>';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ break;
+ }
+
+ case 'l': // &l
+ {
+ if (*++stre == 't' && *++stre == ';') // <
+ {
+ *s++ = '<';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ break;
+ }
+
+ case 'q': // &q
+ {
+ if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // "
+ {
+ *s++ = '"';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ return stre;
+ }
+
+ // Parser utilities
+ #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
+ #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
+ #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
+ #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
+ #define PUGI__POPNODE() { cursor = cursor->parent; }
+ #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
+ #define PUGI__SCANWHILE(X) { while (X) ++s; }
+ #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
+ #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
+ #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast(0)
+ #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
+
+ PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
+ {
+ gap g;
+
+ while (true)
+ {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
+
+ if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
+ {
+ *s++ = '\n'; // replace first one with 0x0a
+
+ if (*s == '\n') g.push(s, 1);
+ }
+ else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
+ {
+ *g.flush(s) = 0;
+
+ return s + (s[2] == '>' ? 3 : 2);
+ }
+ else if (*s == 0)
+ {
+ return 0;
+ }
+ else ++s;
+ }
+ }
+
+ PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
+ {
+ gap g;
+
+ while (true)
+ {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
+
+ if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
+ {
+ *s++ = '\n'; // replace first one with 0x0a
+
+ if (*s == '\n') g.push(s, 1);
+ }
+ else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
+ {
+ *g.flush(s) = 0;
+
+ return s + 1;
+ }
+ else if (*s == 0)
+ {
+ return 0;
+ }
+ else ++s;
+ }
+ }
+
+ typedef char_t* (*strconv_pcdata_t)(char_t*);
+
+ template struct strconv_pcdata_impl
+ {
+ static char_t* parse(char_t* s)
+ {
+ gap g;
+
+ char_t* begin = s;
+
+ while (true)
+ {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
+
+ if (*s == '<') // PCDATA ends here
+ {
+ char_t* end = g.flush(s);
+
+ if (opt_trim::value)
+ while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
+ --end;
+
+ *end = 0;
+
+ return s + 1;
+ }
+ else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
+ {
+ *s++ = '\n'; // replace first one with 0x0a
+
+ if (*s == '\n') g.push(s, 1);
+ }
+ else if (opt_escape::value && *s == '&')
+ {
+ s = strconv_escape(s, g);
+ }
+ else if (*s == 0)
+ {
+ char_t* end = g.flush(s);
+
+ if (opt_trim::value)
+ while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
+ --end;
+
+ *end = 0;
+
+ return s;
+ }
+ else ++s;
+ }
+ }
+ };
+
+ PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
+ {
+ PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
+
+ switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (trim eol escapes); this simultaneously checks 3 options from assertion above
+ {
+ case 0: return strconv_pcdata_impl::parse;
+ case 1: return strconv_pcdata_impl::parse;
+ case 2: return strconv_pcdata_impl::parse;
+ case 3: return strconv_pcdata_impl::parse;
+ case 4: return strconv_pcdata_impl::parse;
+ case 5: return strconv_pcdata_impl::parse;
+ case 6: return strconv_pcdata_impl::parse;
+ case 7: return strconv_pcdata_impl::parse;
+ default: assert(false); return 0; // unreachable
+ }
+ }
+
+ typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
+
+ template struct strconv_attribute_impl
+ {
+ static char_t* parse_wnorm(char_t* s, char_t end_quote)
+ {
+ gap g;
+
+ // trim leading whitespaces
+ if (PUGI__IS_CHARTYPE(*s, ct_space))
+ {
+ char_t* str = s;
+
+ do ++str;
+ while (PUGI__IS_CHARTYPE(*str, ct_space));
+
+ g.push(s, str - s);
+ }
+
+ while (true)
+ {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
+
+ if (*s == end_quote)
+ {
+ char_t* str = g.flush(s);
+
+ do *str-- = 0;
+ while (PUGI__IS_CHARTYPE(*str, ct_space));
+
+ return s + 1;
+ }
+ else if (PUGI__IS_CHARTYPE(*s, ct_space))
+ {
+ *s++ = ' ';
+
+ if (PUGI__IS_CHARTYPE(*s, ct_space))
+ {
+ char_t* str = s + 1;
+ while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
+
+ g.push(s, str - s);
+ }
+ }
+ else if (opt_escape::value && *s == '&')
+ {
+ s = strconv_escape(s, g);
+ }
+ else if (!*s)
+ {
+ return 0;
+ }
+ else ++s;
+ }
+ }
+
+ static char_t* parse_wconv(char_t* s, char_t end_quote)
+ {
+ gap g;
+
+ while (true)
+ {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
+
+ if (*s == end_quote)
+ {
+ *g.flush(s) = 0;
+
+ return s + 1;
+ }
+ else if (PUGI__IS_CHARTYPE(*s, ct_space))
+ {
+ if (*s == '\r')
+ {
+ *s++ = ' ';
+
+ if (*s == '\n') g.push(s, 1);
+ }
+ else *s++ = ' ';
+ }
+ else if (opt_escape::value && *s == '&')
+ {
+ s = strconv_escape(s, g);
+ }
+ else if (!*s)
+ {
+ return 0;
+ }
+ else ++s;
+ }
+ }
+
+ static char_t* parse_eol(char_t* s, char_t end_quote)
+ {
+ gap g;
+
+ while (true)
+ {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
+
+ if (*s == end_quote)
+ {
+ *g.flush(s) = 0;
+
+ return s + 1;
+ }
+ else if (*s == '\r')
+ {
+ *s++ = '\n';
+
+ if (*s == '\n') g.push(s, 1);
+ }
+ else if (opt_escape::value && *s == '&')
+ {
+ s = strconv_escape(s, g);
+ }
+ else if (!*s)
+ {
+ return 0;
+ }
+ else ++s;
+ }
+ }
+
+ static char_t* parse_simple(char_t* s, char_t end_quote)
+ {
+ gap g;
+
+ while (true)
+ {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
+
+ if (*s == end_quote)
+ {
+ *g.flush(s) = 0;
+
+ return s + 1;
+ }
+ else if (opt_escape::value && *s == '&')
+ {
+ s = strconv_escape(s, g);
+ }
+ else if (!*s)
+ {
+ return 0;
+ }
+ else ++s;
+ }
+ }
+ };
+
+ PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
+ {
+ PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
+
+ switch ((optmask >> 4) & 15) // get bitmask for flags (wnorm wconv eol escapes); this simultaneously checks 4 options from assertion above
+ {
+ case 0: return strconv_attribute_impl::parse_simple;
+ case 1: return strconv_attribute_impl::parse_simple;
+ case 2: return strconv_attribute_impl::parse_eol;
+ case 3: return strconv_attribute_impl::parse_eol;
+ case 4: return strconv_attribute_impl::parse_wconv;
+ case 5: return strconv_attribute_impl::parse_wconv;
+ case 6: return strconv_attribute_impl::parse_wconv;
+ case 7: return strconv_attribute_impl::parse_wconv;
+ case 8: return strconv_attribute_impl::parse_wnorm;
+ case 9: return strconv_attribute_impl::parse_wnorm;
+ case 10: return strconv_attribute_impl::parse_wnorm;
+ case 11: return strconv_attribute_impl::parse_wnorm;
+ case 12: return strconv_attribute_impl::parse_wnorm;
+ case 13: return strconv_attribute_impl::parse_wnorm;
+ case 14: return strconv_attribute_impl::parse_wnorm;
+ case 15: return strconv_attribute_impl::parse_wnorm;
+ default: assert(false); return 0; // unreachable
+ }
+ }
+
+ inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
+ {
+ xml_parse_result result;
+ result.status = status;
+ result.offset = offset;
+
+ return result;
+ }
+
+ struct xml_parser
+ {
+ xml_allocator* alloc;
+ char_t* error_offset;
+ xml_parse_status error_status;
+
+ xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
+ {
+ }
+
+ // DOCTYPE consists of nested sections of the following possible types:
+ // , ... ?>, "...", '...'
+ //
+ //
+ // First group can not contain nested groups
+ // Second group can contain nested groups of the same type
+ // Third group can contain all other groups
+ char_t* parse_doctype_primitive(char_t* s)
+ {
+ if (*s == '"' || *s == '\'')
+ {
+ // quoted string
+ char_t ch = *s++;
+ PUGI__SCANFOR(*s == ch);
+ if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ s++;
+ }
+ else if (s[0] == '<' && s[1] == '?')
+ {
+ // ... ?>
+ s += 2;
+ PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
+ if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ s += 2;
+ }
+ else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
+ {
+ s += 4;
+ PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
+ if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ s += 3;
+ }
+ else PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ return s;
+ }
+
+ char_t* parse_doctype_ignore(char_t* s)
+ {
+ size_t depth = 0;
+
+ assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
+ s += 3;
+
+ while (*s)
+ {
+ if (s[0] == '<' && s[1] == '!' && s[2] == '[')
+ {
+ // nested ignore section
+ s += 3;
+ depth++;
+ }
+ else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
+ {
+ // ignore section end
+ s += 3;
+
+ if (depth == 0)
+ return s;
+
+ depth--;
+ }
+ else s++;
+ }
+
+ PUGI__THROW_ERROR(status_bad_doctype, s);
+ }
+
+ char_t* parse_doctype_group(char_t* s, char_t endch)
+ {
+ size_t depth = 0;
+
+ assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
+ s += 2;
+
+ while (*s)
+ {
+ if (s[0] == '<' && s[1] == '!' && s[2] != '-')
+ {
+ if (s[2] == '[')
+ {
+ // ignore
+ s = parse_doctype_ignore(s);
+ if (!s) return s;
+ }
+ else
+ {
+ // some control group
+ s += 2;
+ depth++;
+ }
+ }
+ else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
+ {
+ // unknown tag (forbidden), or some primitive group
+ s = parse_doctype_primitive(s);
+ if (!s) return s;
+ }
+ else if (*s == '>')
+ {
+ if (depth == 0)
+ return s;
+
+ depth--;
+ s++;
+ }
+ else s++;
+ }
+
+ if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ return s;
+ }
+
+ char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
+ {
+ // parse node contents, starting with exclamation mark
+ ++s;
+
+ if (*s == '-') // 'value = s; // Save the offset.
+ }
+
+ if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
+ {
+ s = strconv_comment(s, endch);
+
+ if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
+ }
+ else
+ {
+ // Scan for terminating '-->'.
+ PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
+ PUGI__CHECK_ERROR(status_bad_comment, s);
+
+ if (PUGI__OPTSET(parse_comments))
+ *s = 0; // Zero-terminate this segment at the first terminating '-'.
+
+ s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
+ }
+ }
+ else PUGI__THROW_ERROR(status_bad_comment, s);
+ }
+ else if (*s == '[')
+ {
+ // 'value = s; // Save the offset.
+
+ if (PUGI__OPTSET(parse_eol))
+ {
+ s = strconv_cdata(s, endch);
+
+ if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
+ }
+ else
+ {
+ // Scan for terminating ']]>'.
+ PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
+ PUGI__CHECK_ERROR(status_bad_cdata, s);
+
+ *s++ = 0; // Zero-terminate this segment.
+ }
+ }
+ else // Flagged for discard, but we still have to scan for the terminator.
+ {
+ // Scan for terminating ']]>'.
+ PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
+ PUGI__CHECK_ERROR(status_bad_cdata, s);
+
+ ++s;
+ }
+
+ s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
+ }
+ else PUGI__THROW_ERROR(status_bad_cdata, s);
+ }
+ else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
+ {
+ s -= 2;
+
+ if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ char_t* mark = s + 9;
+
+ s = parse_doctype_group(s, endch);
+ if (!s) return s;
+
+ assert((*s == 0 && endch == '>') || *s == '>');
+ if (*s) *s++ = 0;
+
+ if (PUGI__OPTSET(parse_doctype))
+ {
+ while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
+
+ PUGI__PUSHNODE(node_doctype);
+
+ cursor->value = mark;
+ }
+ }
+ else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
+ else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
+ else PUGI__THROW_ERROR(status_unrecognized_tag, s);
+
+ return s;
+ }
+
+ char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
+ {
+ // load into registers
+ xml_node_struct* cursor = ref_cursor;
+ char_t ch = 0;
+
+ // parse node contents, starting with question mark
+ ++s;
+
+ // read PI target
+ char_t* target = s;
+
+ if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
+
+ PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
+ PUGI__CHECK_ERROR(status_bad_pi, s);
+
+ // determine node type; stricmp / strcasecmp is not portable
+ bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
+
+ if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
+ {
+ if (declaration)
+ {
+ // disallow non top-level declarations
+ if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
+
+ PUGI__PUSHNODE(node_declaration);
+ }
+ else
+ {
+ PUGI__PUSHNODE(node_pi);
+ }
+
+ cursor->name = target;
+
+ PUGI__ENDSEG();
+
+ // parse value/attributes
+ if (ch == '?')
+ {
+ // empty node
+ if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
+ s += (*s == '>');
+
+ PUGI__POPNODE();
+ }
+ else if (PUGI__IS_CHARTYPE(ch, ct_space))
+ {
+ PUGI__SKIPWS();
+
+ // scan for tag end
+ char_t* value = s;
+
+ PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
+ PUGI__CHECK_ERROR(status_bad_pi, s);
+
+ if (declaration)
+ {
+ // replace ending ? with / so that 'element' terminates properly
+ *s = '/';
+
+ // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
+ s = value;
+ }
+ else
+ {
+ // store value and step over >
+ cursor->value = value;
+
+ PUGI__POPNODE();
+
+ PUGI__ENDSEG();
+
+ s += (*s == '>');
+ }
+ }
+ else PUGI__THROW_ERROR(status_bad_pi, s);
+ }
+ else
+ {
+ // scan for tag end
+ PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
+ PUGI__CHECK_ERROR(status_bad_pi, s);
+
+ s += (s[1] == '>' ? 2 : 1);
+ }
+
+ // store from registers
+ ref_cursor = cursor;
+
+ return s;
+ }
+
+ char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
+ {
+ strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
+ strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
+
+ char_t ch = 0;
+ xml_node_struct* cursor = root;
+ char_t* mark = s;
+
+ while (*s != 0)
+ {
+ if (*s == '<')
+ {
+ ++s;
+
+ LOC_TAG:
+ if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
+ {
+ PUGI__PUSHNODE(node_element); // Append a new node to the tree.
+
+ cursor->name = s;
+
+ PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
+ PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
+
+ if (ch == '>')
+ {
+ // end of tag
+ }
+ else if (PUGI__IS_CHARTYPE(ch, ct_space))
+ {
+ LOC_ATTRIBUTES:
+ while (true)
+ {
+ PUGI__SKIPWS(); // Eat any whitespace.
+
+ if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
+ {
+ xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute.
+ if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
+
+ a->name = s; // Save the offset.
+
+ PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
+ PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
+
+ if (PUGI__IS_CHARTYPE(ch, ct_space))
+ {
+ PUGI__SKIPWS(); // Eat any whitespace.
+
+ ch = *s;
+ ++s;
+ }
+
+ if (ch == '=') // '<... #=...'
+ {
+ PUGI__SKIPWS(); // Eat any whitespace.
+
+ if (*s == '"' || *s == '\'') // '<... #="...'
+ {
+ ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
+ ++s; // Step over the quote.
+ a->value = s; // Save the offset.
+
+ s = strconv_attribute(s, ch);
+
+ if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
+
+ // After this line the loop continues from the start;
+ // Whitespaces, / and > are ok, symbols and EOF are wrong,
+ // everything else will be detected
+ if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
+ }
+ else PUGI__THROW_ERROR(status_bad_attribute, s);
+ }
+ else PUGI__THROW_ERROR(status_bad_attribute, s);
+ }
+ else if (*s == '/')
+ {
+ ++s;
+
+ if (*s == '>')
+ {
+ PUGI__POPNODE();
+ s++;
+ break;
+ }
+ else if (*s == 0 && endch == '>')
+ {
+ PUGI__POPNODE();
+ break;
+ }
+ else PUGI__THROW_ERROR(status_bad_start_element, s);
+ }
+ else if (*s == '>')
+ {
+ ++s;
+
+ break;
+ }
+ else if (*s == 0 && endch == '>')
+ {
+ break;
+ }
+ else PUGI__THROW_ERROR(status_bad_start_element, s);
+ }
+
+ // !!!
+ }
+ else if (ch == '/') // '<#.../'
+ {
+ if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
+
+ PUGI__POPNODE(); // Pop.
+
+ s += (*s == '>');
+ }
+ else if (ch == 0)
+ {
+ // we stepped over null terminator, backtrack & handle closing tag
+ --s;
+
+ if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
+ }
+ else PUGI__THROW_ERROR(status_bad_start_element, s);
+ }
+ else if (*s == '/')
+ {
+ ++s;
+
+ mark = s;
+
+ char_t* name = cursor->name;
+ if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
+
+ while (PUGI__IS_CHARTYPE(*s, ct_symbol))
+ {
+ if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
+ }
+
+ if (*name)
+ {
+ if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
+ else PUGI__THROW_ERROR(status_end_element_mismatch, mark);
+ }
+
+ PUGI__POPNODE(); // Pop.
+
+ PUGI__SKIPWS();
+
+ if (*s == 0)
+ {
+ if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
+ }
+ else
+ {
+ if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
+ ++s;
+ }
+ }
+ else if (*s == '?') // '...'
+ {
+ s = parse_question(s, cursor, optmsk, endch);
+ if (!s) return s;
+
+ assert(cursor);
+ if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
+ }
+ else if (*s == '!') // 'first_child) continue;
+ }
+ }
+
+ if (!PUGI__OPTSET(parse_trim_pcdata))
+ s = mark;
+
+ if (cursor->parent || PUGI__OPTSET(parse_fragment))
+ {
+ if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
+ {
+ cursor->value = s; // Save the offset.
+ }
+ else
+ {
+ PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
+
+ cursor->value = s; // Save the offset.
+
+ PUGI__POPNODE(); // Pop since this is a standalone.
+ }
+
+ s = strconv_pcdata(s);
+
+ if (!*s) break;
+ }
+ else
+ {
+ PUGI__SCANFOR(*s == '<'); // '...<'
+ if (!*s) break;
+
+ ++s;
+ }
+
+ // We're after '<'
+ goto LOC_TAG;
+ }
+ }
+
+ // check that last tag is closed
+ if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
+
+ return s;
+ }
+
+ #ifdef PUGIXML_WCHAR_MODE
+ static char_t* parse_skip_bom(char_t* s)
+ {
+ unsigned int bom = 0xfeff;
+ return (s[0] == static_cast(bom)) ? s + 1 : s;
+ }
+ #else
+ static char_t* parse_skip_bom(char_t* s)
+ {
+ return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
+ }
+ #endif
+
+ static bool has_element_node_siblings(xml_node_struct* node)
+ {
+ while (node)
+ {
+ if (PUGI__NODETYPE(node) == node_element) return true;
+
+ node = node->next_sibling;
+ }
+
+ return false;
+ }
+
+ static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
+ {
+ // early-out for empty documents
+ if (length == 0)
+ return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
+
+ // get last child of the root before parsing
+ xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
+
+ // create parser on stack
+ xml_parser parser(static_cast(xmldoc));
+
+ // save last character and make buffer zero-terminated (speeds up parsing)
+ char_t endch = buffer[length - 1];
+ buffer[length - 1] = 0;
+
+ // skip BOM to make sure it does not end up as part of parse output
+ char_t* buffer_data = parse_skip_bom(buffer);
+
+ // perform actual parsing
+ parser.parse_tree(buffer_data, root, optmsk, endch);
+
+ xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
+ assert(result.offset >= 0 && static_cast(result.offset) <= length);
+
+ if (result)
+ {
+ // since we removed last character, we have to handle the only possible false positive (stray <)
+ if (endch == '<')
+ return make_parse_result(status_unrecognized_tag, length - 1);
+
+ // check if there are any element nodes parsed
+ xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
+
+ if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
+ return make_parse_result(status_no_document_element, length - 1);
+ }
+ else
+ {
+ // roll back offset if it occurs on a null terminator in the source buffer
+ if (result.offset > 0 && static_cast(result.offset) == length - 1 && endch == 0)
+ result.offset--;
+ }
+
+ return result;
+ }
+ };
+
+ // Output facilities
+ PUGI__FN xml_encoding get_write_native_encoding()
+ {
+ #ifdef PUGIXML_WCHAR_MODE
+ return get_wchar_encoding();
+ #else
+ return encoding_utf8;
+ #endif
+ }
+
+ PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
+ {
+ // replace wchar encoding with utf implementation
+ if (encoding == encoding_wchar) return get_wchar_encoding();
+
+ // replace utf16 encoding with utf16 with specific endianness
+ if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ // replace utf32 encoding with utf32 with specific endianness
+ if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ // only do autodetection if no explicit encoding is requested
+ if (encoding != encoding_auto) return encoding;
+
+ // assume utf8 encoding
+ return encoding_utf8;
+ }
+
+ template PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
+ {
+ PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
+
+ typename T::value_type end = D::process(reinterpret_cast(data), length, dest, T());
+
+ return static_cast(end - dest) * sizeof(*dest);
+ }
+
+ template PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
+ {
+ PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
+
+ typename T::value_type end = D::process(reinterpret_cast(data), length, dest, T());
+
+ if (opt_swap)
+ {
+ for (typename T::value_type i = dest; i != end; ++i)
+ *i = endian_swap(*i);
+ }
+
+ return static_cast(end - dest) * sizeof(*dest);
+ }
+
+#ifdef PUGIXML_WCHAR_MODE
+ PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
+ {
+ if (length < 1) return 0;
+
+ // discard last character if it's the lead of a surrogate pair
+ return (sizeof(wchar_t) == 2 && static_cast(static_cast(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
+ }
+
+ PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
+ {
+ // only endian-swapping is required
+ if (need_endian_swap_utf(encoding, get_wchar_encoding()))
+ {
+ convert_wchar_endian_swap(r_char, data, length);
+
+ return length * sizeof(char_t);
+ }
+
+ // convert to utf8
+ if (encoding == encoding_utf8)
+ return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
+
+ // convert to utf16
+ if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
+ {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
+ }
+
+ // convert to utf32
+ if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
+ {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
+ }
+
+ // convert to latin1
+ if (encoding == encoding_latin1)
+ return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
+
+ assert(false && "Invalid encoding"); // unreachable
+ return 0;
+ }
+#else
+ PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
+ {
+ if (length < 5) return 0;
+
+ for (size_t i = 1; i <= 4; ++i)
+ {
+ uint8_t ch = static_cast(data[length - i]);
+
+ // either a standalone character or a leading one
+ if ((ch & 0xc0) != 0x80) return length - i;
+ }
+
+ // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
+ return length;
+ }
+
+ PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
+ {
+ if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
+ {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
+ }
+
+ if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
+ {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
+ }
+
+ if (encoding == encoding_latin1)
+ return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
+
+ assert(false && "Invalid encoding"); // unreachable
+ return 0;
+ }
+#endif
+
+ class xml_buffered_writer
+ {
+ xml_buffered_writer(const xml_buffered_writer&);
+ xml_buffered_writer& operator=(const xml_buffered_writer&);
+
+ public:
+ xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
+ {
+ PUGI__STATIC_ASSERT(bufcapacity >= 8);
+ }
+
+ size_t flush()
+ {
+ flush(buffer, bufsize);
+ bufsize = 0;
+ return 0;
+ }
+
+ void flush(const char_t* data, size_t size)
+ {
+ if (size == 0) return;
+
+ // fast path, just write data
+ if (encoding == get_write_native_encoding())
+ writer.write(data, size * sizeof(char_t));
+ else
+ {
+ // convert chunk
+ size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
+ assert(result <= sizeof(scratch));
+
+ // write data
+ writer.write(scratch.data_u8, result);
+ }
+ }
+
+ void write_direct(const char_t* data, size_t length)
+ {
+ // flush the remaining buffer contents
+ flush();
+
+ // handle large chunks
+ if (length > bufcapacity)
+ {
+ if (encoding == get_write_native_encoding())
+ {
+ // fast path, can just write data chunk
+ writer.write(data, length * sizeof(char_t));
+ return;
+ }
+
+ // need to convert in suitable chunks
+ while (length > bufcapacity)
+ {
+ // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
+ // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
+ size_t chunk_size = get_valid_length(data, bufcapacity);
+ assert(chunk_size);
+
+ // convert chunk and write
+ flush(data, chunk_size);
+
+ // iterate
+ data += chunk_size;
+ length -= chunk_size;
+ }
+
+ // small tail is copied below
+ bufsize = 0;
+ }
+
+ memcpy(buffer + bufsize, data, length * sizeof(char_t));
+ bufsize += length;
+ }
+
+ void write_buffer(const char_t* data, size_t length)
+ {
+ size_t offset = bufsize;
+
+ if (offset + length <= bufcapacity)
+ {
+ memcpy(buffer + offset, data, length * sizeof(char_t));
+ bufsize = offset + length;
+ }
+ else
+ {
+ write_direct(data, length);
+ }
+ }
+
+ void write_string(const char_t* data)
+ {
+ // write the part of the string that fits in the buffer
+ size_t offset = bufsize;
+
+ while (*data && offset < bufcapacity)
+ buffer[offset++] = *data++;
+
+ // write the rest
+ if (offset < bufcapacity)
+ {
+ bufsize = offset;
+ }
+ else
+ {
+ // backtrack a bit if we have split the codepoint
+ size_t length = offset - bufsize;
+ size_t extra = length - get_valid_length(data - length, length);
+
+ bufsize = offset - extra;
+
+ write_direct(data - extra, strlength(data) + extra);
+ }
+ }
+
+ void write(char_t d0)
+ {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 1) offset = flush();
+
+ buffer[offset + 0] = d0;
+ bufsize = offset + 1;
+ }
+
+ void write(char_t d0, char_t d1)
+ {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 2) offset = flush();
+
+ buffer[offset + 0] = d0;
+ buffer[offset + 1] = d1;
+ bufsize = offset + 2;
+ }
+
+ void write(char_t d0, char_t d1, char_t d2)
+ {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 3) offset = flush();
+
+ buffer[offset + 0] = d0;
+ buffer[offset + 1] = d1;
+ buffer[offset + 2] = d2;
+ bufsize = offset + 3;
+ }
+
+ void write(char_t d0, char_t d1, char_t d2, char_t d3)
+ {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 4) offset = flush();
+
+ buffer[offset + 0] = d0;
+ buffer[offset + 1] = d1;
+ buffer[offset + 2] = d2;
+ buffer[offset + 3] = d3;
+ bufsize = offset + 4;
+ }
+
+ void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
+ {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 5) offset = flush();
+
+ buffer[offset + 0] = d0;
+ buffer[offset + 1] = d1;
+ buffer[offset + 2] = d2;
+ buffer[offset + 3] = d3;
+ buffer[offset + 4] = d4;
+ bufsize = offset + 5;
+ }
+
+ void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
+ {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 6) offset = flush();
+
+ buffer[offset + 0] = d0;
+ buffer[offset + 1] = d1;
+ buffer[offset + 2] = d2;
+ buffer[offset + 3] = d3;
+ buffer[offset + 4] = d4;
+ buffer[offset + 5] = d5;
+ bufsize = offset + 6;
+ }
+
+ // utf8 maximum expansion: x4 (-> utf32)
+ // utf16 maximum expansion: x2 (-> utf32)
+ // utf32 maximum expansion: x1
+ enum
+ {
+ bufcapacitybytes =
+ #ifdef PUGIXML_MEMORY_OUTPUT_STACK
+ PUGIXML_MEMORY_OUTPUT_STACK
+ #else
+ 10240
+ #endif
+ ,
+ bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
+ };
+
+ char_t buffer[bufcapacity];
+
+ union
+ {
+ uint8_t data_u8[4 * bufcapacity];
+ uint16_t data_u16[2 * bufcapacity];
+ uint32_t data_u32[bufcapacity];
+ char_t data_char[bufcapacity];
+ } scratch;
+
+ xml_writer& writer;
+ size_t bufsize;
+ xml_encoding encoding;
+ };
+
+ PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
+ {
+ while (*s)
+ {
+ const char_t* prev = s;
+
+ // While *s is a usual symbol
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
+
+ writer.write_buffer(prev, static_cast(s - prev));
+
+ switch (*s)
+ {
+ case 0: break;
+ case '&':
+ writer.write('&', 'a', 'm', 'p', ';');
+ ++s;
+ break;
+ case '<':
+ writer.write('&', 'l', 't', ';');
+ ++s;
+ break;
+ case '>':
+ writer.write('&', 'g', 't', ';');
+ ++s;
+ break;
+ case '"':
+ if (flags & format_attribute_single_quote)
+ writer.write('"');
+ else
+ writer.write('&', 'q', 'u', 'o', 't', ';');
+ ++s;
+ break;
+ case '\'':
+ if (flags & format_attribute_single_quote)
+ writer.write('&', 'a', 'p', 'o', 's', ';');
+ else
+ writer.write('\'');
+ ++s;
+ break;
+ default: // s is not a usual symbol
+ {
+ unsigned int ch = static_cast(*s++);
+ assert(ch < 32);
+
+ if (!(flags & format_skip_control_chars))
+ writer.write('&', '#', static_cast((ch / 10) + '0'), static_cast((ch % 10) + '0'), ';');
+ }
+ }
+ }
+ }
+
+ PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
+ {
+ if (flags & format_no_escapes)
+ writer.write_string(s);
+ else
+ text_output_escaped(writer, s, type, flags);
+ }
+
+ PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
+ {
+ do
+ {
+ writer.write('<', '!', '[', 'C', 'D');
+ writer.write('A', 'T', 'A', '[');
+
+ const char_t* prev = s;
+
+ // look for ]]> sequence - we can't output it as is since it terminates CDATA
+ while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
+
+ // skip ]] if we stopped at ]]>, > will go to the next CDATA section
+ if (*s) s += 2;
+
+ writer.write_buffer(prev, static_cast(s - prev));
+
+ writer.write(']', ']', '>');
+ }
+ while (*s);
+ }
+
+ PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
+ {
+ switch (indent_length)
+ {
+ case 1:
+ {
+ for (unsigned int i = 0; i < depth; ++i)
+ writer.write(indent[0]);
+ break;
+ }
+
+ case 2:
+ {
+ for (unsigned int i = 0; i < depth; ++i)
+ writer.write(indent[0], indent[1]);
+ break;
+ }
+
+ case 3:
+ {
+ for (unsigned int i = 0; i < depth; ++i)
+ writer.write(indent[0], indent[1], indent[2]);
+ break;
+ }
+
+ case 4:
+ {
+ for (unsigned int i = 0; i < depth; ++i)
+ writer.write(indent[0], indent[1], indent[2], indent[3]);
+ break;
+ }
+
+ default:
+ {
+ for (unsigned int i = 0; i < depth; ++i)
+ writer.write_buffer(indent, indent_length);
+ }
+ }
+ }
+
+ PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
+ {
+ writer.write('<', '!', '-', '-');
+
+ while (*s)
+ {
+ const char_t* prev = s;
+
+ // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
+ while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
+
+ writer.write_buffer(prev, static_cast(s - prev));
+
+ if (*s)
+ {
+ assert(*s == '-');
+
+ writer.write('-', ' ');
+ ++s;
+ }
+ }
+
+ writer.write('-', '-', '>');
+ }
+
+ PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
+ {
+ while (*s)
+ {
+ const char_t* prev = s;
+
+ // look for ?> sequence - we can't output it since ?> terminates PI
+ while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
+
+ writer.write_buffer(prev, static_cast