diff --git a/externals/coda-oss/ReleaseNotes.md b/externals/coda-oss/ReleaseNotes.md index d9aa5247b..8838f1089 100644 --- a/externals/coda-oss/ReleaseNotes.md +++ b/externals/coda-oss/ReleaseNotes.md @@ -11,9 +11,11 @@ ``` # coda-oss Release Notes -## Release 2022-??-?? +## [Release 2022-08-02](https://github.com/mdaus/coda-oss/releases/tag/2022-08-02) * remove *Expat* and *libXML* modules and support in **xml.lite**; only *Xerces* was actively used. -* **xml.lite** now uses UTF-8 internally and is no longer tries to preserve incorrect behavior. +* fix `waf` to work-around FIPS warning because of insecure *md5* hashing. +* tweak `str::EncodedStringView` and `str::EncodedString` for + [future XML changes](https://github.com/mdaus/coda-oss/tree/feature/always-write-xml-as-utf8). ## [Release 2022-06-29](https://github.com/mdaus/coda-oss/releases/tag/2022-06-29) * remove **modules/drivers/boost** as it was empty (and unused); diff --git a/externals/coda-oss/modules/c++/config/include/config/Version.h b/externals/coda-oss/modules/c++/config/include/config/Version.h index 75bb473d9..0bb1009fc 100644 --- a/externals/coda-oss/modules/c++/config/include/config/Version.h +++ b/externals/coda-oss/modules/c++/config/include/config/Version.h @@ -42,12 +42,12 @@ static_assert(CODA_OSS_MAKE_VERSION_MMPB(9999, 9999, 9999, 9999) <= UINT64_MAX, // Do this ala C++ ... we don't currently have major/minor/patch //#define CODA_OSS_VERSION_ 20210910L // c.f. __cplusplus -#define CODA_OSS_VERSION_ 2022 ## 0006 ## 0029 ## 0000 ## L +#define CODA_OSS_VERSION_ 2022 ## 0008 ## 0002 ## 0000 ## L // Use the same macros other projects might want to use; overkill for us. #define CODA_OSS_VERSION_MAJOR 2022 -#define CODA_OSS_VERSION_MINOR 6 -#define CODA_OSS_VERSION_PATCH 29 +#define CODA_OSS_VERSION_MINOR 8 +#define CODA_OSS_VERSION_PATCH 2 #define CODA_OSS_VERSION_BUILD 0 #define CODA_OSS_VERSION CODA_OSS_MAKE_VERSION_MMPB(CODA_OSS_VERSION_MAJOR, CODA_OSS_VERSION_MINOR, CODA_OSS_VERSION_PATCH, CODA_OSS_VERSION_BUILD) diff --git a/externals/coda-oss/modules/c++/io/include/io/ReadUtils.h b/externals/coda-oss/modules/c++/io/include/io/ReadUtils.h index 7bf079323..4d16a06d9 100644 --- a/externals/coda-oss/modules/c++/io/include/io/ReadUtils.h +++ b/externals/coda-oss/modules/c++/io/include/io/ReadUtils.h @@ -42,7 +42,7 @@ namespace io */ void readFileContents(const std::string& pathname, std::vector& buffer); -void readFileContents(const sys::filesystem::path& pathname, std::vector& buffer); +void readFileContents(const coda_oss::filesystem::path& pathname, std::vector& buffer); /*! * Reads the contents of a file into a string. The file is assumed to be a diff --git a/externals/coda-oss/modules/c++/io/source/ReadUtils.cpp b/externals/coda-oss/modules/c++/io/source/ReadUtils.cpp index 4f83c7e0a..b85fbfdd5 100644 --- a/externals/coda-oss/modules/c++/io/source/ReadUtils.cpp +++ b/externals/coda-oss/modules/c++/io/source/ReadUtils.cpp @@ -40,7 +40,7 @@ void readFileContents(const std::string& pathname, { readFileContents_(pathname, buffer); } -void readFileContents(const sys::filesystem::path& pathname, std::vector& buffer) +void readFileContents(const coda_oss::filesystem::path& pathname, std::vector& buffer) { readFileContents_(pathname, buffer); } diff --git a/externals/coda-oss/modules/c++/str/include/str/EncodedString.h b/externals/coda-oss/modules/c++/str/include/str/EncodedString.h index b726225cb..0dd6655d2 100644 --- a/externals/coda-oss/modules/c++/str/include/str/EncodedString.h +++ b/externals/coda-oss/modules/c++/str/include/str/EncodedString.h @@ -52,18 +52,13 @@ class CODA_OSS_API EncodedString final // We can do most everything through the view, so keep one around. EncodedStringView v_; - const std::string& string() const - { - return s_; - } - // No "public" operator=() for these; this class is mostly for storage and/or conversion, // not extensive manipulation. Create a new instance and assign/move that. void assign(coda_oss::u8string::const_pointer); void assign(str::W1252string::const_pointer); public: - EncodedString() = default; + EncodedString(); ~EncodedString() = default; EncodedString(const EncodedString&); EncodedString& operator=(const EncodedString&); @@ -134,7 +129,7 @@ class CODA_OSS_API EncodedString final { static const std::string& string(const EncodedString& es) // for unit-testing { - return es.string(); + return es.s_; } }; }; diff --git a/externals/coda-oss/modules/c++/str/include/str/EncodedStringView.h b/externals/coda-oss/modules/c++/str/include/str/EncodedStringView.h index 6c0b98350..5f5056f74 100644 --- a/externals/coda-oss/modules/c++/str/include/str/EncodedStringView.h +++ b/externals/coda-oss/modules/c++/str/include/str/EncodedStringView.h @@ -52,7 +52,11 @@ class CODA_OSS_API EncodedStringView final // Since we only support two encodings--UTF-8 (native on Linux) and Windows-1252 // (native on Windows)--both of which are 8-bits, a simple "bool" flag will do. coda_oss::span mString; - static constexpr bool mNativeIsUtf8 = details::Platform == details::PlatformType::Linux ? true : false; + #if _WIN32 + static constexpr bool mNativeIsUtf8 = false; // Windows-1252 + #else + static constexpr bool mNativeIsUtf8 = true; // !_WIN32, assume Linux + #endif bool mIsUtf8 = mNativeIsUtf8; // Want to create an EncodedString from EncodedStringView. The public interface @@ -89,7 +93,6 @@ class CODA_OSS_API EncodedStringView final // Convert (perhaps) whatever we're looking at to UTF-8 coda_oss::u8string u8string() const; // c.f. std::filesystem::path::u8string() - std::string& toUtf8(std::string&) const; // std::string is encoded as UTF-8, always. // Convert whatever we're looking at to UTF-16 or UTF-32 std::u16string u16string() const; // c.f. std::filesystem::path::u8string() @@ -111,34 +114,34 @@ class CODA_OSS_API EncodedStringView final { return mIsUtf8 ? cast(c_str()) : nullptr; } + str::W1252string::const_pointer c_w1252str() const + { + return mIsUtf8 ? nullptr : cast(c_str()); + } size_t size() const { return mString.size(); } // Input is encoded as specified on all platforms. - static EncodedStringView fromUtf8(const std::string& s) + static EncodedStringView fromUtf8(const std::string& utf8) { - return EncodedStringView(str::c_str(s)); + return EncodedStringView(str::c_str(utf8)); } - static EncodedStringView fromUtf8(std::string::const_pointer p) + static EncodedStringView fromUtf8(std::string::const_pointer pUtf8) { - return EncodedStringView(str::cast(p)); + return EncodedStringView(str::cast(pUtf8)); } - static EncodedStringView fromWindows1252(const std::string& s) + static EncodedStringView fromWindows1252(const std::string& w1252) { - return EncodedStringView(str::c_str(s)); + return EncodedStringView(str::c_str(w1252)); } - static EncodedStringView fromWindows1252(std::string::const_pointer p) + static EncodedStringView fromWindows1252(std::string::const_pointer pW1252) { - return EncodedStringView(str::cast(p)); + return EncodedStringView(str::cast(pW1252)); } - std::string asUtf8() const - { - std::string retval; - return toUtf8(retval); - } + std::string asUtf8() const; std::string asWindows1252() const; bool operator_eq(const EncodedStringView&) const; diff --git a/externals/coda-oss/modules/c++/str/include/str/Encoding.h b/externals/coda-oss/modules/c++/str/include/str/Encoding.h index 0cace45b2..8fcdeb5c4 100644 --- a/externals/coda-oss/modules/c++/str/include/str/Encoding.h +++ b/externals/coda-oss/modules/c++/str/include/str/Encoding.h @@ -37,27 +37,6 @@ #include "gsl/gsl.h" #include "config/Exports.h" -// This can be useful for code that will compile on all platforms, but needs -// different platform-specific behavior. This avoids the use of more #ifdefs -// (no preprocessor) and also squelches compiler-warnings about unused local -// functions. -namespace str { namespace details // YOU should be using sys::PlatformType -{ -enum class PlatformType -{ - Windows, - Linux, - // MacOS -}; - -#if _WIN32 -constexpr auto Platform = PlatformType::Windows; -#else -constexpr auto Platform = PlatformType::Linux; -#endif -} } - - namespace str { template @@ -83,17 +62,6 @@ inline typename TBasicStringT::const_pointer c_str(const std::basic_string; // https://en.cppreference.com/w/cpp/string -CODA_OSS_API coda_oss::u8string fromWindows1252(std::string::const_pointer, size_t); // std::string is Windows-1252 **ON ALL PLATFORMS** -inline coda_oss::u8string fromWindows1252(std::string::const_pointer s) -{ - return fromWindows1252(s, gsl::narrow(strlen(s))); -} -CODA_OSS_API coda_oss::u8string fromUtf8(std::string::const_pointer, size_t); // std::string is UTF-8 **ON ALL PLATFORMS** -inline coda_oss::u8string fromUtf8(std::string::const_pointer s) -{ - return fromUtf8(s, gsl::narrow(strlen(s))); -} - // With some older C++ compilers, uint16_t may be used instead of char16_t :-( using ui16string = std::basic_string; // ui = UInt16_t @@ -110,76 +78,39 @@ static_assert(!std::is_same::value, "wchar_t should not be the // When the encoding is important, we want to "traffic" in coda_oss::u8string (UTF-8), not // str::W1252string (Windows-1252) or std::string (unknown). Make it easy to get those from other encodings. -CODA_OSS_API coda_oss::u8string to_u8string(std::string::const_pointer, size_t); // std::string is Windows-1252 or UTF-8 depending on platform CODA_OSS_API coda_oss::u8string to_u8string(str::W1252string::const_pointer, size_t); inline coda_oss::u8string to_u8string(coda_oss::u8string::const_pointer s, size_t sz) { return coda_oss::u8string(s, sz); } -CODA_OSS_API coda_oss::u8string to_u8string(std::wstring::const_pointer, size_t); // std::wstring is UTF-16 or UTF-32 depending on platform -// UTF-16 is typically uses on Windows (where it is std::wstring::value_type); -// Linux preferred UTF-32. +// UTF-16 is typically uses on Windows (where it is std::wstring::value_type); Linux prefers UTF-32. CODA_OSS_API coda_oss::u8string to_u8string(std::u16string::const_pointer, size_t); + CODA_OSS_API std::u16string to_u16string(coda_oss::u8string::const_pointer, size_t); +str::ui16string to_ui16string(coda_oss::u8string::const_pointer, size_t); +std::u16string to_u16string(str::W1252string::const_pointer, size_t); +str::ui16string to_ui16string(str::W1252string::const_pointer, size_t); // UTF-32 is convenient because each code-point is a single 32-bit integer. // It's typically std::wstring::value_type on Linux, but NOT Windows. CODA_OSS_API coda_oss::u8string to_u8string(std::u32string::const_pointer, size_t); CODA_OSS_API std::u32string to_u32string(coda_oss::u8string::const_pointer, size_t); +std::u32string to_u32string(str::W1252string::const_pointer, size_t); template inline coda_oss::u8string to_u8string(const std::basic_string& s) { return to_u8string(s.c_str(), s.size()); } -template -inline std::u16string to_u16string(const std::basic_string& s) -{ - return to_u16string(s.c_str(), s.size()); -} -template -inline std::u32string to_u32string(const std::basic_string& s) -{ - return to_u32string(s.c_str(), s.size()); -} -namespace details // YOU should use EncodedStringView -{ -coda_oss::u8string to_u8string(std::string::const_pointer, size_t, bool is_utf8 /* is 's' UTF-8? */); -std::u16string to_u16string(std::string::const_pointer, size_t, bool is_utf8 /* is 's' UTF-8? */); -ui16string to_ui16string(std::string::const_pointer, size_t, bool is_utf8 /* is 's' UTF-8? */); // older C++ compilers -std::u32string to_u32string(std::string::const_pointer, size_t, bool is_utf8 /* is 's' UTF-8? */); -std::wstring to_wstring(std::string::const_pointer, size_t, bool is_utf8 /* is 's' UTF-8? */); - -std::string& to_u8string(std::string::const_pointer, size_t, bool is_utf8 /* is 's' UTF-8? */, std::string&); // encoding is lost -std::string& to_u8string(std::u16string::const_pointer, size_t, std::string&); // encoding is lost -std::string& to_u8string(std::u32string::const_pointer, size_t, std::string&); // encoding is lost - -str::W1252string to_w1252string(std::string::const_pointer, size_t); // std::string is Windows-1252 or UTF-8 depending on platform -str::W1252string to_w1252string(std::string::const_pointer, size_t, bool is_utf8 /* is 's' UTF-8? */); -CODA_OSS_API str::W1252string to_w1252string(coda_oss::u8string::const_pointer, size_t); -inline str::W1252string to_w1252string(str::W1252string::const_pointer s, size_t sz) -{ - return str::W1252string(s, sz); -} +CODA_OSS_API str::W1252string to_w1252string(coda_oss::u8string::const_pointer p, size_t sz); -std::string to_native(coda_oss::u8string::const_pointer, size_t); // std::string is Windows-1252 or UTF-8 depending on platform -std::string to_native(str::W1252string::const_pointer s, size_t sz); // std::string is Windows-1252 or UTF-8 depending on platform -inline std::string to_native(std::string::const_pointer s, size_t sz, bool is_utf8 /* is 's' UTF-8? */) // std::string is Windows-1252 or UTF-8 depending on platform -{ - return is_utf8 ? to_native(cast(s), sz) - : to_native(cast(s), sz); -} -inline std::string to_native(std::string::const_pointer s, size_t sz) -{ - return std::string(s, sz); -} -template -inline std::string to_native(const std::basic_string& s) +namespace details // YOU should use EncodedStringView { - return to_native(s.c_str(), s.size()); -} +void w1252to8(str::W1252string::const_pointer p, size_t sz, std::string&); // encoding is lost +void utf16to8(std::u16string::const_pointer, size_t, std::string&); // encoding is lost +void utf8to1252(coda_oss::u8string::const_pointer p, size_t sz, std::string&); // encoding is lost } } diff --git a/externals/coda-oss/modules/c++/str/source/EncodedString.cpp b/externals/coda-oss/modules/c++/str/source/EncodedString.cpp index c87d46093..1a43d926f 100644 --- a/externals/coda-oss/modules/c++/str/source/EncodedString.cpp +++ b/externals/coda-oss/modules/c++/str/source/EncodedString.cpp @@ -30,7 +30,7 @@ void str::EncodedString::assign(coda_oss::u8string::const_pointer s) using char_t = std::remove_pointer::type; // avoid copy-paste error using string_t = std::basic_string::type>; s_ = cast(s); // copy - v_ = EncodedStringView(c_str(s_)); + v_ = EncodedStringView(str::c_str(s_)); } void str::EncodedString::assign(str::W1252string::const_pointer s) @@ -38,7 +38,7 @@ void str::EncodedString::assign(str::W1252string::const_pointer s) using char_t = std::remove_pointer::type; // avoid copy-paste error using string_t = std::basic_string::type>; s_ = cast(s); // copy - v_ = EncodedStringView(c_str(s_)); // avoid copy-paste error + v_ = EncodedStringView(str::c_str(s_)); // avoid copy-paste error } static str::EncodedStringView make_EncodedStringView(const std::string& s, bool isUtf8) @@ -52,8 +52,9 @@ static str::EncodedStringView make_EncodedStringView(const std::string& s, bool return str::EncodedStringView(str::c_str(s)); } -str::EncodedString::EncodedString(std::string::const_pointer s) : s_(s) /*copy*/, v_ (s_) { } -str::EncodedString::EncodedString(const std::string& s) : s_(s) /*copy*/, v_ (s_) { } +str::EncodedString::EncodedString(std::string::const_pointer s) : s_(s) /*copy*/, v_(s_) { } +str::EncodedString::EncodedString(const std::string& s) : s_(s) /*copy*/, v_(s_) { } +str::EncodedString::EncodedString() : EncodedString(""){ } str::EncodedString::EncodedString(coda_oss::u8string::const_pointer s) { @@ -70,9 +71,21 @@ str::EncodedString::EncodedString(const str::W1252string& s) : EncodedString(s.c str::EncodedString::EncodedString(const std::u16string& s) : EncodedString(to_u8string(s)) { } str::EncodedString::EncodedString(const std::u32string& s) : EncodedString(to_u8string(s)) { } -str::EncodedString::EncodedString(std::wstring::const_pointer s) : EncodedString(to_u8string(s, wcslen(s))) { } -str::EncodedString::EncodedString(const std::wstring& s) : EncodedString(to_u8string(s)) { } +static inline coda_oss::u8string to_u8string_(std::wstring::const_pointer p_, size_t sz) // std::wstring is UTF-16 or UTF-32 depending on platform +{ + const auto p = + // Need to use #ifdef's because str::cast() checks to be sure the sizes are correct. + #if _WIN32 + str::cast(p_); // std::wstring is UTF-16 on Windows + #endif + #if !_WIN32 + str::cast(p_); // std::wstring is UTF-32 on Linux + #endif + return str::to_u8string(p, sz); +} +str::EncodedString::EncodedString(std::wstring::const_pointer s) : EncodedString(to_u8string_(s, wcslen(s))) { } +str::EncodedString::EncodedString(const std::wstring& s) : EncodedString(to_u8string_(s.c_str(), s.size())) { } // create from a view str::EncodedString& str::EncodedString::operator=(const EncodedStringView& v) diff --git a/externals/coda-oss/modules/c++/str/source/EncodedStringView.cpp b/externals/coda-oss/modules/c++/str/source/EncodedStringView.cpp index 008c5cb73..bb3293dd5 100644 --- a/externals/coda-oss/modules/c++/str/source/EncodedStringView.cpp +++ b/externals/coda-oss/modules/c++/str/source/EncodedStringView.cpp @@ -34,6 +34,66 @@ #include "str/Encoding.h" #include "str/EncodedString.h" +enum class PlatformType +{ + Windows, + Linux, + // MacOS +}; + +#if _WIN32 +static auto Platform = PlatformType::Windows; +#else +static auto Platform = PlatformType::Linux; +#endif + +inline std::u16string to_u16string(std::string::const_pointer s, size_t sz, bool is_utf8 /* is 's' UTF-8? */) +{ + if (is_utf8) + { + return str::to_u16string(str::cast(s), sz); + } + return str::to_u16string(str::cast(s), sz); +} +inline str::ui16string to_ui16string(std::string::const_pointer s, size_t sz, bool is_utf8 /* is 's' UTF-8? */) +{ + if (is_utf8) + { + return str::to_ui16string(str::cast(s), sz); + } + return str::to_ui16string(str::cast(s), sz); +} + +static std::string to_native(coda_oss::u8string::const_pointer p, size_t sz) +{ + if (Platform == PlatformType::Windows) + { + std::string retval; + str::details::utf8to1252(p, sz, retval); + return retval; + } + if (Platform == PlatformType::Linux) + { + return str::cast(p); // copy + } + throw std::logic_error("Unknown platform."); +} + +static std::string to_native(str::W1252string::const_pointer p, size_t sz) +{ + if (Platform == PlatformType::Windows) + { + return str::cast(p); // copy + } + if (Platform == PlatformType::Linux) + { + std::string retval; + str::details::w1252to8(p, sz, retval); + return retval; + } + throw std::logic_error("Unknown platform."); +} + template inline coda_oss::span make_span(const CharT* s) { @@ -56,40 +116,66 @@ str::EncodedStringView::EncodedStringView(const str::W1252string& s) : mString(m std::string str::EncodedStringView::native() const { - return str::details::to_native(mString.data(), mString.size(), mIsUtf8); + const auto s = mString.data(); + const auto sz = mString.size(); + return mIsUtf8 ? to_native(str::cast(s), sz) + : to_native(str::cast(s), sz); } coda_oss::u8string str::EncodedStringView::u8string() const { - return str::details::to_u8string(mString.data(), mString.size(), mIsUtf8); + return mIsUtf8 ? + str::cast(mString.data()) : // copy + str::to_u8string(str::cast(mString.data()), mString.size()); } -std::string& str::EncodedStringView::toUtf8(std::string& result) const +std::string str::EncodedStringView::asUtf8() const { - return str::details::to_u8string(mString.data(), mString.size(), mIsUtf8, result); + const auto result = u8string(); + return str::c_str(result); // cast & copy } std::u16string str::EncodedStringView::u16string() const { - return str::details::to_u16string(mString.data(), mString.size(), mIsUtf8); + return ::to_u16string(mString.data(), mString.size(), mIsUtf8); } str::ui16string str::EncodedStringView::ui16string_() const { - return str::details::to_ui16string(mString.data(), mString.size(), mIsUtf8); + return ::to_ui16string(mString.data(), mString.size(), mIsUtf8); } +inline std::u32string to_u32string(std::string::const_pointer s, size_t sz, bool is_utf8 /* is 's' UTF-8? */) +{ + if (is_utf8) + { + return str::to_u32string(str::cast(s), sz); + } + return str::to_u32string(str::cast(s), sz); +} std::u32string str::EncodedStringView::u32string() const { - return str::details::to_u32string(mString.data(), mString.size(), mIsUtf8); + return ::to_u32string(mString.data(), mString.size(), mIsUtf8); } + std::wstring str::EncodedStringView::wstring() const // UTF-16 on Windows, UTF-32 on Linux { - return str::details::to_wstring(mString.data(), mString.size(), mIsUtf8); + const auto p = mString.data(); + const auto sz = mString.size(); + const auto s = + // Need to use #ifdef's because str::cast() checks to be sure the sizes are correct. + #if _WIN32 + ::to_u16string(p, sz, mIsUtf8); // std::wstring is UTF-16 on Windows + #endif + #if !_WIN32 + ::to_u32string(p, sz, mIsUtf8); // std::wstring is UTF-32 on Linux + #endif + return str::c_str(s); // copy } - str::W1252string str::EncodedStringView::w1252string() const { - return str::details::to_w1252string(mString.data(), mString.size(), mIsUtf8); + return mIsUtf8 ? + str::to_w1252string(str::cast(mString.data()), mString.size()) : + str::cast(mString.data()); // copy } std::string str::EncodedStringView::asWindows1252() const { diff --git a/externals/coda-oss/modules/c++/str/source/Encoding.cpp b/externals/coda-oss/modules/c++/str/source/Encoding.cpp index 65c42a213..19cb02d9c 100644 --- a/externals/coda-oss/modules/c++/str/source/Encoding.cpp +++ b/externals/coda-oss/modules/c++/str/source/Encoding.cpp @@ -34,6 +34,7 @@ #include "str/Manip.h" #include "str/Convert.h" #include "str/utf8.h" +#include "str/EncodedStringView.h" // Need to look up characters from \x80 (EURO SIGN) to \x9F (LATIN CAPITAL LETTER Y WITH DIAERESIS) // in a map: http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT @@ -145,13 +146,41 @@ static void fromWindows1252_(str::W1252string::value_type ch, std::basic_string< append(result, replacement_character); } template -void windows1252_to_string(str::W1252string::const_pointer p, size_t sz, std::basic_string& result) +void windows1252_to_string_(str::W1252string::const_pointer p, size_t sz, std::basic_string& result) { for (size_t i = 0; i < sz; i++) { fromWindows1252_(p[i], result); } } +template +inline TReturn to_Tstring(str::W1252string::const_pointer p, size_t sz) +{ + TReturn retval; + windows1252_to_string_(p, sz, retval); + return retval; +} + +inline void windows1252_to_string(str::W1252string::const_pointer p, size_t sz, coda_oss::u8string& result) +{ + windows1252_to_string_(p, sz, result); +} +void str::details::w1252to8(str::W1252string::const_pointer p, size_t sz, std::string& result) +{ + result = to_Tstring(p, sz); +} +std::u16string str::to_u16string(str::W1252string::const_pointer p, size_t sz) +{ + return to_Tstring(p, sz); +} +str::ui16string str::to_ui16string(str::W1252string::const_pointer p, size_t sz) +{ + return to_Tstring(p, sz); +} +std::u32string str::to_u32string(str::W1252string::const_pointer p, size_t sz) +{ + return to_Tstring(p, sz); +} template std::map kv_to_vk(const std::map& kv) @@ -223,6 +252,16 @@ static void utf8to1252(coda_oss::u8string::const_pointer p, size_t sz, std::basi } } } +void str::details::utf8to1252(coda_oss::u8string::const_pointer p, size_t sz, std::string& result) +{ + ::utf8to1252(p, sz, result); +} +str::W1252string str::to_w1252string(coda_oss::u8string::const_pointer p, size_t sz) +{ + str::W1252string retval; + utf8to1252(p, sz, retval); + return retval; +} struct back_inserter final { @@ -260,10 +299,9 @@ coda_oss::u8string str::to_u8string(std::u16string::const_pointer p, size_t sz) utf8::utf8to16(begin, begin+result.size(), std::back_inserter(utf16line)); */ } -std::string& str::details::to_u8string(std::u16string::const_pointer p, size_t sz, std::string& result) +void str::details::utf16to8(std::u16string::const_pointer p, size_t sz, std::string& result) { utf8::utf16to8(p, p + sz, std::back_inserter(result)); - return result; } std::u16string str::to_u16string(coda_oss::u8string::const_pointer p_, size_t sz) @@ -273,6 +311,13 @@ std::u16string str::to_u16string(coda_oss::u8string::const_pointer p_, size_t sz utf8::utf8to16(p, p + sz, std::back_inserter(retval)); return retval; } +str::ui16string str::to_ui16string(coda_oss::u8string::const_pointer p_, size_t sz) +{ + auto p = str::cast(p_); + str::ui16string retval; + utf8::utf8to16(p, p + sz, std::back_inserter(retval)); + return retval; +} std::u32string str::to_u32string(coda_oss::u8string::const_pointer p_, size_t sz) { @@ -288,171 +333,16 @@ coda_oss::u8string str::to_u8string(std::u32string::const_pointer p, size_t sz) utf8::utf32to8(p, p + sz, back_inserter(retval)); return retval; } -std::string& str::details::to_u8string(std::u32string::const_pointer p, size_t sz, std::string& result) -{ - utf8::utf32to8(p, p + sz, std::back_inserter(result)); - return result; -} coda_oss::u8string str::to_u8string(W1252string::const_pointer p, size_t sz) { coda_oss::u8string retval; - windows1252_to_string(p, sz, retval); + ::windows1252_to_string(p, sz, retval); return retval; } -std::string& str::details::to_u8string(std::string::const_pointer p, size_t sz, bool is_utf8 /* is 'p' UTF-8? */, std::string& result) -{ - if (is_utf8) - { - result = p; // copy - } - else - { - windows1252_to_string(cast(p), sz, result); - } - return result; -} - -coda_oss::u8string str::details::to_u8string(std::string::const_pointer p, size_t sz, bool is_utf8 /* is 'p' UTF-8? */) -{ - return is_utf8 ? - cast(p) : // copy - to_u8string(cast(p), sz); -} -coda_oss::u8string str::to_u8string(std::string::const_pointer p, size_t sz) -{ - auto platform = details::Platform; // "conditional expression is constant" - return details::to_u8string(p, sz, platform == details::PlatformType::Linux); // std::string is UTF-8 on Linux -} - -template -static inline TReturn to_16string(std::string::const_pointer s, size_t sz, bool is_utf8 /* is 's' UTF-8? */) -{ - TReturn retval; - if (is_utf8) - { - auto p_ = str::cast(s); - auto p = str::cast(p_); - utf8::utf8to16(p, p + sz, std::back_inserter(retval)); - } - else - { - windows1252_to_string(str::cast(s), sz, retval); - } - return retval; -} -std::u16string str::details::to_u16string(std::string::const_pointer s, size_t sz, bool is_utf8 /* is 's' UTF-8? */) -{ - return to_16string(s, sz, is_utf8); -} -str::ui16string str::details::to_ui16string(std::string::const_pointer s, size_t sz, bool is_utf8 /* is 's' UTF-8? */) -{ - return to_16string(s, sz, is_utf8); -} - -std::u32string str::details::to_u32string(std::string::const_pointer s, size_t sz, bool is_utf8 /* is 's' UTF-8? */) -{ - if (is_utf8) - { - return str::to_u32string(cast(s), sz); - } - - std::u32string retval; - windows1252_to_string(cast(s), sz, retval); - return retval; -} -std::wstring str::details::to_wstring(std::string::const_pointer p, size_t sz, bool is_utf8 /* is 's' UTF-8? */) -{ - const auto s = - // Need to use #ifdef's because str::cast() checks to be sure the sizes are correct. - #if _WIN32 - to_u16string(p, sz, is_utf8); // std::wstring is UTF-16 on Windows - #endif - #if !_WIN32 - to_u32string(p, sz, is_utf8); // std::wstring is UTF-32 on Linux - #endif - return str::c_str(s); // copy -} - -coda_oss::u8string str::to_u8string(std::wstring::const_pointer p_, size_t sz) // std::wstring is UTF-16 or UTF-32 depending on platform -{ - const auto p = - // Need to use #ifdef's because str::cast() checks to be sure the sizes are correct. - #if _WIN32 - str::cast(p_); // std::wstring is UTF-16 on Windows - #endif - #if !_WIN32 - str::cast(p_); // std::wstring is UTF-32 on Linux - #endif - return to_u8string(p, sz); -} - -str::W1252string str::details::to_w1252string(coda_oss::u8string::const_pointer p, size_t sz) -{ - str::W1252string retval; - utf8to1252(p, sz, retval); - return retval; -} - -str::W1252string str::details::to_w1252string(std::string::const_pointer p, size_t sz, bool is_utf8 /* is 's' UTF-8? */) -{ - return is_utf8 ? - to_w1252string(cast(p), sz) : - cast(p); // copy -} -str::W1252string str::details::to_w1252string(std::string::const_pointer p, size_t sz) -{ - auto platform = details::Platform; // "conditional expression is constant" - return to_w1252string(p, sz, platform == details::PlatformType::Linux); // std::string is UTF-8 on Linux -} - -std::string str::details::to_native(coda_oss::u8string::const_pointer p, size_t sz) -{ - auto platform = str::details::Platform; // "conditional expression is constant" - if (platform == str::details::PlatformType::Windows) - { - std::string retval; - utf8to1252(p, sz, retval); - return retval; - } - if (platform == str::details::PlatformType::Linux) - { - auto retval = cast(p); - return retval != nullptr ? retval /* copy */ : ""; - } - throw std::logic_error("Unknown platform."); -} - -std::string str::details::to_native(W1252string::const_pointer p, size_t sz) -{ - auto platform = details::Platform; // "conditional expression is constant" - if (platform == details::PlatformType::Windows) - { - auto retval = cast(p); - return retval != nullptr ? retval /* copy */ : ""; - } - if (platform == details::PlatformType::Linux) - { - std::string retval; - windows1252_to_string(p, sz, retval); - return retval; - } - throw std::logic_error("Unknown platform."); -} - -coda_oss::u8string str::fromWindows1252(std::string::const_pointer p, size_t sz) -{ - return to_u8string(cast(p), sz); -} - -coda_oss::u8string str::fromUtf8(std::string::const_pointer p, size_t sz) -{ - return to_u8string(cast(p), sz); -} - template <> std::string str::toString(const coda_oss::u8string& utf8) { - return str::details::to_native(utf8); + return str::EncodedStringView(utf8).native(); } diff --git a/externals/coda-oss/modules/c++/str/unittests/test_base_convert.cpp b/externals/coda-oss/modules/c++/str/unittests/test_base_convert.cpp index 9e8774f73..ce581c6c6 100644 --- a/externals/coda-oss/modules/c++/str/unittests/test_base_convert.cpp +++ b/externals/coda-oss/modules/c++/str/unittests/test_base_convert.cpp @@ -1,442 +1,434 @@ -/* ========================================================================= - * This file is part of str-c++ - * ========================================================================= - * - * (C) Copyright 2004 - 2014, MDA Information Systems LLC - * - * str-c++ is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; If not, - * see . - * - */ - -#include - -#include -#include -#include -#include - -#include "coda_oss/CPlusPlus.h" - -#include -#include -#include - -#include "TestCase.h" - -// It seems that a macro is better than a utility routine, see https://github.com/tahonermann/char8_t-remediation -// C++20 changed the type of u8 to char8_t* https://en.cppreference.com/w/cpp/language/string_literal -// Not putting this everywhere because (1) well, it's a macro, and (2) it's mostly -// only test code that uses string literals. -#if CODA_OSS_cpp20 -#define U8(ch) u8##ch -#define U8s(s) u8##s -#else -#define U8(ch) static_cast(ch) -#define U8s(s) static_cast(static_cast(s)) -#endif - -static std::string to_string(const std::u8string& value) -{ - return str::c_str(value); // copy -} -static std::string to_string(const std::u32string& s) -{ - return to_string(str::to_u8string(s)); -} - -TEST_CASE(testConvert) -{ - TEST_ASSERT_EQ(str::toType("0x3BC7", 16), (long long) 0x3BC7); - TEST_ASSERT_EQ(str::toType("1101", 2), (long long) 13); - TEST_ASSERT_EQ(str::toType("231", 5), (long long) 66); - TEST_ASSERT_EQ(str::toType("0xFFFFFFFFFFFFFFFF", 16), - (unsigned long long) 0xFFFFFFFFFFFFFFFF); - TEST_ASSERT_EQ(str::toType("-10", 10), - (unsigned long long) -10); - TEST_ASSERT_EQ(str::toType("13", 4), (short) 7); -} - -TEST_CASE(testBadConvert) -{ - TEST_EXCEPTION(str::toType("Not a number", 10)); - TEST_EXCEPTION(str::toType("0xFFFFFFFFFFFFFFFF", 16)); - TEST_EXCEPTION(str::toType("0xFFFFF", 16)); -} - -TEST_CASE(testEightBitIntToString) -{ - TEST_ASSERT_EQ(str::toString(1), "1"); - TEST_ASSERT_EQ(str::toString(2), "2"); - TEST_ASSERT_EQ(str::toString(-2), "-2"); -} - -TEST_CASE(testCharToString) -{ - TEST_ASSERT_EQ(str::toString('a'), "a"); - TEST_ASSERT_EQ(str::toString(65), "A"); -} - -static inline std::u8string fromWindows1252(const std::string& s) -{ - // s is Windows-1252 on ALL platforms - return str::fromWindows1252(s.c_str(), s.size()); -} - -template -static inline constexpr std::u32string::value_type U(TChar ch) -{ - return static_cast(ch); -} - -TEST_CASE(test_string_to_u8string_ascii) -{ - { - const std::string input = "|\x00"; // ASCII, "|" - const auto actual = fromWindows1252(input); - const std::u8string expected{U8('|')}; // '\x00' is the end of the string in C/C++ - TEST_ASSERT_EQ(actual, expected); - } - constexpr uint8_t start_of_heading = 0x01; - constexpr uint8_t delete_character = 0x7f; - for (uint8_t ch = start_of_heading; ch <= delete_character; ch++) // ASCII - { - const std::string input { '|', static_cast(ch), '|'}; - const auto actual = fromWindows1252(input); - const std::u8string expected8{U8('|'), U8(ch), U8('|')}; - TEST_ASSERT_EQ(actual, expected8); - const std::u32string expected{U'|', U(ch), U'|'}; - TEST_ASSERT_EQ(to_string(actual), to_string(expected)); - } -} - -TEST_CASE(test_string_to_u8string_windows_1252) -{ - // Windows-1252 only characters must be mapped to UTF-8 - { - const std::string input = "|\x80|"; // Windows-1252, "|�|" - const auto actual = fromWindows1252(input); - const std::u8string expected8{U8s("|\xE2\x82\xAC|")}; // UTF-8, "|�|" - TEST_ASSERT_EQ(actual, expected8); - const std::u32string expected{U'|', 0x20AC, U'|'}; // UTF-32, "|�|" - TEST_ASSERT_EQ(to_string(actual), to_string(expected)); - } - { - const std::string input = "|\x9F|"; // Windows-1252, "|�|" - const auto actual = fromWindows1252(input); - const std::u8string expected8{U8s("|\xC5\xB8|")}; // UTF-8, "|�|" - TEST_ASSERT_EQ(actual, expected8); - const std::u32string expected{U'|', 0x0178, U'|'}; // UTF-32, "|�|" - TEST_ASSERT_EQ(to_string(actual), to_string(expected)); - } - { - const std::vector undefined{ '\x81', '\x8d', '\x8f', '\x90', '\x9d' }; - for (const auto& ch : undefined) - { - const std::string input{'|', ch, '|'}; - const auto actual = fromWindows1252(input); - static const std::u8string expected8{U8s("|\xEF\xBF\xBD|")}; // UTF-8, "||" - TEST_ASSERT_EQ(actual, expected8); - const std::u32string expected{U'|', 0xfffd, U'|'}; // UTF-32, "||" - TEST_ASSERT_EQ(to_string(actual), to_string(expected)); - } - } - { - // http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT - const std::vector windows1252_characters{ - //0x80, // EURO SIGN - //0x82, // SINGLE LOW-9 QUOTATION MARK - //0x83, // LATIN SMALL LETTER F WITH HOOK - //0x84, // DOUBLE LOW-9 QUOTATION MARK - //0x85, // HORIZONTAL ELLIPSIS - //0x86, // DAGGER - //0x87, // DOUBLE DAGGER - //0x88, // MODIFIER LETTER CIRCUMFLEX ACCENT - //0x89, // PER MILLE SIGN - //0x8A, // LATIN CAPITAL LETTER S WITH CARON - //0x8B, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK - //0x8C, // LATIN CAPITAL LIGATURE OE - 0x8E, // LATIN CAPITAL LETTER Z WITH CARON - 0x91, // LEFT SINGLE QUOTATION MARK - 0x92, // RIGHT SINGLE QUOTATION MARK - 0x93, // LEFT DOUBLE QUOTATION MARK - 0x94, // RIGHT DOUBLE QUOTATION MARK - 0x95, // BULLET - 0x96, // EN DASH - 0x97, // EM DASH - 0x98, // SMALL TILDE - 0x99, // TRADE MARK SIGN - 0x9A, // LATIN SMALL LETTER S WITH CARON - 0x9B, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x9C, // LATIN SMALL LIGATURE OE - 0x9E, // LATIN SMALL LETTER Z WITH CARON - 0x9F}; // LATIN CAPITAL LETTER Y WITH DIAERESIS - for (const auto& ch : windows1252_characters) - { - const std::string input_ { '|', static_cast(ch), '|'}; - const str::W1252string input(str::c_str(input_)); - const auto actual = to_u8string(input); - - // No "expected" to test against as the UTF-8 values for these Windows-1252 characters - // are mapped one-by-one. However, we can test that UTF-8 to Windows-1252 - // works as that walks through a UTF-8 string which can have 1-, 2-, 3- and 4-bytes - // for a single code-point. - const str::W1252string w1252 = str::details::to_w1252string(actual.data(), actual.size()); - TEST_ASSERT(input == w1252); - - // Can't compare the values with == because TEST_ASSERT_EQ() - // wants to do toString() and that doesn't work on Linux as the encoding - // is wrong (see above). - //const std::string w1252_ = str::c_str(w1252); - //TEST_ASSERT_EQ(input_, w1252_); - const str::EncodedStringView inputView(input); - const str::EncodedStringView w1252View(w1252); - TEST_ASSERT_EQ(inputView, w1252View); - } - } -} - -TEST_CASE(test_string_to_u8string_iso8859_1) -{ - constexpr uint8_t nobreak_space = 0xa0; - constexpr uint8_t latin_small_letter_y_with_diaeresis = 0xff; // '�' - for (uint32_t ch = nobreak_space; ch <= latin_small_letter_y_with_diaeresis; ch++) // ISO8859-1 - { - const std::string input_ { '|', static_cast(ch), '|'}; - const str::W1252string input(str::c_str(input_)); - const auto actual = to_u8string(input); - const std::u32string expected{U'|', U(ch), U'|'}; - TEST_ASSERT_EQ(to_string(actual), to_string(expected)); - - // Can't compare the values with == because TEST_ASSERT_EQ() - // wants to do toString() and that doesn't work on Linux as the encoding - // is wrong (see above). - //std::string actual_; - //str::details::toString(actual.c_str(), actual_); - //TEST_ASSERT_EQ(input_, actual_); - const str::EncodedStringView inputView(input); - const str::EncodedStringView actualView(actual); - TEST_ASSERT_EQ(inputView, actualView); - } -} - -template -static void test_change_case_(const std::string& testName, const TString& lower, const TString& upper) -{ - auto s = upper; - str::lower(s); - TEST_ASSERT(s == lower); - s = lower; - str::upper(s); - TEST_ASSERT(s == upper); - - s = upper; - str::upper(s); - TEST_ASSERT(s == upper); - s = lower; - str::lower(s); - TEST_ASSERT(s == lower); -} -TEST_CASE(test_change_case) -{ - const std::string ABC = "ABC"; - const std::string abc = "abc"; - test_change_case_(testName, abc, ABC); - - //const std::wstring ABC_w = L"ABC"; - //const std::wstring abc_w = L"abc"; - //test_change_case_(testName, abc_w, ABC_w); - - //// Yes, this can really come up, "non classifi�" is French (Canadian) for "unclassified". - //const std::string DEF_1252{'D', '\xc9', 'F'}; // "D�F" Windows-1252 - //const auto DEF8 = fromWindows1252(DEF_1252); - - //const std::string def_1252{'d', '\xe9', 'f'}; // "d�f" Windows-1252 - //const auto def8 = fromWindows1252(def_1252); - - ////test_change_case_(testName, def, DEF); - //test_change_case_(testName, def_1252, DEF_1252); -} - -// https://en.wikipedia.org/wiki/%C3%89#Character_mappings -static const str::EncodedString& classificationText_utf_8() -{ - static const str::EncodedString retval(str::cast("A\xc3\x89IOU")); // UTF-8 "A�IOU" - return retval; - } -static const str::EncodedString& classificationText_iso8859_1() -{ - static const str::EncodedString retval(str::cast("A\xc9IOU")); // ISO8859-1 "A�IOU" - return retval; - } -// UTF-16 on Windows, UTF-32 on Linux -static const wchar_t* classificationText_wide_() { return L"A\xc9IOU"; } // UTF-8 "A�IOU" -static str::EncodedString classificationText_wide() { return str::EncodedString(classificationText_wide_()); } -static std::string classificationText_platform() { return - sys::Platform == sys::PlatformType::Linux ? classificationText_utf_8().native() : classificationText_iso8859_1().native(); } - -TEST_CASE(test_u8string_to_string) -{ - { - const auto utf8 = classificationText_utf_8().u8string(); - const str::EncodedStringView utf8View(utf8); - const auto actual = utf8View.native(); - TEST_ASSERT_EQ(classificationText_platform(), actual); - } - { - const auto utf8 = classificationText_iso8859_1().u8string(); - const str::EncodedStringView utf8View(utf8); - const auto actual = utf8View.native(); - TEST_ASSERT_EQ(classificationText_platform(), actual); - } -} - -TEST_CASE(test_u8string_to_u16string) -{ - #if _WIN32 - const auto actual = classificationText_utf_8().u16string(); - const std::wstring s = str::c_str(actual); // Windows: std::wstring == std::u16string - TEST_ASSERT(classificationText_wide_() == s); // _EQ wants to do toString() - #endif - - TEST_ASSERT_EQ(classificationText_wide(), classificationText_utf_8()); - TEST_ASSERT_EQ(classificationText_wide(), classificationText_iso8859_1()); - - TEST_ASSERT(classificationText_wide().u16string() == classificationText_utf_8().u16string()); // _EQ wants to do toString() - TEST_ASSERT(classificationText_wide().u16string() == classificationText_iso8859_1().u16string()); // _EQ wants to do toString() -} - -TEST_CASE(test_u8string_to_u32string) -{ - #if !_WIN32 - const auto actual = classificationText_utf_8().u32string(); - const std::wstring s = str::c_str(actual); // Linux: std::wstring == std::u32string - TEST_ASSERT(classificationText_wide_() == s); // _EQ wants to do toString() - #endif - - TEST_ASSERT_EQ(classificationText_wide(), classificationText_utf_8()); - TEST_ASSERT_EQ(classificationText_wide(), classificationText_iso8859_1()); - - TEST_ASSERT(classificationText_wide().u32string() == classificationText_utf_8().u32string()); // _EQ wants to do toString() - TEST_ASSERT(classificationText_wide().u32string() == classificationText_iso8859_1().u32string()); // _EQ wants to do toString() -} - -static void test_EncodedStringView_(const std::string& testName, - const str::EncodedStringView& utf_8_view, const str::EncodedStringView& iso8859_1_view) -{ - (void)testName; - TEST_ASSERT_EQ(iso8859_1_view, iso8859_1_view); - TEST_ASSERT_EQ(utf_8_view, utf_8_view); - TEST_ASSERT_EQ(iso8859_1_view, utf_8_view); - TEST_ASSERT_EQ(utf_8_view, iso8859_1_view); - - TEST_ASSERT_EQ(iso8859_1_view.native(), utf_8_view.native()); - const auto native = classificationText_platform(); - TEST_ASSERT_EQ(iso8859_1_view.native(), native); - TEST_ASSERT_EQ(utf_8_view.native(), native); - - TEST_ASSERT(utf_8_view == classificationText_utf_8()); - TEST_ASSERT_EQ(utf_8_view, classificationText_utf_8()); - TEST_ASSERT(iso8859_1_view == classificationText_utf_8()); - TEST_ASSERT_EQ(iso8859_1_view, classificationText_utf_8()); - TEST_ASSERT(iso8859_1_view.u8string() == utf_8_view.u8string()); - - const auto expected = str::EncodedString::details::string(classificationText_utf_8()); - { - std::string buf; - const auto& actual = utf_8_view.toUtf8(buf); - TEST_ASSERT_EQ(actual, expected); - } - { - std::string buf; - const auto& actual = iso8859_1_view.toUtf8(buf); - TEST_ASSERT_EQ(actual, expected); - } -} -TEST_CASE(test_EncodedStringView) -{ - str::EncodedStringView esv; - auto copy(esv); - copy = esv; // assignment - - { - auto utf_8_view(classificationText_utf_8().view()); - auto iso8859_1_view(classificationText_iso8859_1().view()); - test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); - - utf_8_view = classificationText_iso8859_1().view(); - iso8859_1_view = classificationText_utf_8().view(); - test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); - } - { - auto utf_8_view = classificationText_utf_8().view(); - auto iso8859_1_view = classificationText_iso8859_1().view(); - test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); - - utf_8_view = classificationText_iso8859_1().view(); - iso8859_1_view = classificationText_utf_8().view(); - test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); - } - { - str::EncodedStringView utf_8_view; - utf_8_view = classificationText_iso8859_1().view(); - str::EncodedStringView iso8859_1_view; - iso8859_1_view = classificationText_utf_8().view(); - test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); - } -} - -TEST_CASE(test_EncodedString) -{ - str::EncodedString es; - TEST_ASSERT_TRUE(es.native().empty()); - { - str::EncodedString es_copy(es); // copy - TEST_ASSERT_TRUE(es_copy.native().empty()); - } - es = str::EncodedString("abc"); // assignment - TEST_ASSERT_EQ(es.native(), "abc"); - { - str::EncodedString es_copy(es); // copy, again; this time w/o default content - TEST_ASSERT_EQ(es_copy.native(), "abc"); - } - - str::EncodedString abc(es); // copy, for use below - TEST_ASSERT_EQ(abc.native(), "abc"); - - str::EncodedString es2; - es = std::move(es2); // move assignment - TEST_ASSERT_TRUE(es.native().empty()); - str::EncodedString abc_(abc); // copy - es = std::move(abc_); // move assignment, w/o default content - TEST_ASSERT_EQ(es.native(), "abc"); - - str::EncodedString es3(std::move(abc)); // move constructor - TEST_ASSERT_EQ(es3.native(), "abc"); -} -TEST_MAIN( - TEST_CHECK(testConvert); - TEST_CHECK(testBadConvert); - TEST_CHECK(testEightBitIntToString); - TEST_CHECK(testCharToString); - TEST_CHECK(test_string_to_u8string_ascii); - TEST_CHECK(test_string_to_u8string_windows_1252); - TEST_CHECK(test_string_to_u8string_iso8859_1); - TEST_CHECK(test_change_case); - TEST_CHECK(test_u8string_to_string); - TEST_CHECK(test_u8string_to_u16string); - TEST_CHECK(test_u8string_to_u32string); - TEST_CHECK(test_EncodedStringView); - TEST_CHECK(test_EncodedString); +/* ========================================================================= + * This file is part of str-c++ + * ========================================================================= + * + * (C) Copyright 2004 - 2014, MDA Information Systems LLC + * + * str-c++ is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; If not, + * see . + * + */ + +#include + +#include +#include +#include + +#include +#include +#include + +#include "TestCase.h" + +inline static void test_assert_eq(const std::string& testName, const std::u8string& actual, const std::u8string& expected) +{ + TEST_ASSERT_EQ(actual, expected); +} +inline static void test_assert_eq(const std::string& testName, const std::u8string& actual, const std::u32string& expected) +{ + test_assert_eq(testName, actual, str::to_u8string(expected)); +} + +TEST_CASE(testConvert) +{ + TEST_ASSERT_EQ(str::toType("0x3BC7", 16), (long long) 0x3BC7); + TEST_ASSERT_EQ(str::toType("1101", 2), (long long) 13); + TEST_ASSERT_EQ(str::toType("231", 5), (long long) 66); + TEST_ASSERT_EQ(str::toType("0xFFFFFFFFFFFFFFFF", 16), + (unsigned long long) 0xFFFFFFFFFFFFFFFF); + TEST_ASSERT_EQ(str::toType("-10", 10), + (unsigned long long) -10); + TEST_ASSERT_EQ(str::toType("13", 4), (short) 7); +} + +TEST_CASE(testBadConvert) +{ + TEST_EXCEPTION(str::toType("Not a number", 10)); + TEST_EXCEPTION(str::toType("0xFFFFFFFFFFFFFFFF", 16)); + TEST_EXCEPTION(str::toType("0xFFFFF", 16)); +} + +TEST_CASE(testEightBitIntToString) +{ + TEST_ASSERT_EQ(str::toString(1), "1"); + TEST_ASSERT_EQ(str::toString(2), "2"); + TEST_ASSERT_EQ(str::toString(-2), "-2"); +} + +TEST_CASE(testCharToString) +{ + TEST_ASSERT_EQ(str::toString('a'), "a"); + TEST_ASSERT_EQ(str::toString(65), "A"); +} + +static std::u8string fromWindows1252(const std::string& s) +{ + // s is Windows-1252 on ALL platforms + return str::EncodedStringView::fromWindows1252(s).u8string(); +} + +template +static constexpr std::u8string::value_type cast8(T ch) +{ + static_assert(sizeof(std::u8string::value_type) == sizeof(char), "sizeof(Char8_T) != sizeof(char)"); + return static_cast(ch); +} +template +static inline constexpr std::u32string::value_type U(TChar ch) +{ + return static_cast(ch); +} + +TEST_CASE(test_string_to_u8string_ascii) +{ + { + const std::string input = "|\x00"; // ASCII, "|" + const auto actual = fromWindows1252(input); + const std::u8string expected{cast8('|')}; // '\x00' is the end of the string in C/C++ + TEST_ASSERT_EQ(actual, expected); + } + constexpr uint8_t start_of_heading = 0x01; + constexpr uint8_t delete_character = 0x7f; + for (uint8_t ch = start_of_heading; ch <= delete_character; ch++) // ASCII + { + const std::string input { '|', static_cast(ch), '|'}; + const auto actual = fromWindows1252(input); + const std::u8string expected8{cast8('|'), cast8(ch), cast8('|')}; + test_assert_eq(testName, actual, expected8); + const std::u32string expected{U'|', U(ch), U'|'}; + test_assert_eq(testName, actual, expected); + } +} + +TEST_CASE(test_string_to_u8string_windows_1252) +{ + // Windows-1252 only characters must be mapped to UTF-8 + { + const std::string input = "|\x80|"; // Windows-1252, "|�|" + const auto actual = fromWindows1252(input); + const std::u8string expected8{cast8('|'), cast8('\xE2'), cast8('\x82'), cast8('\xAC'), cast8('|')}; // UTF-8, "|�|" + test_assert_eq(testName, actual, expected8); + const std::u32string expected{U"|\U000020AC|"}; // UTF-32, "|�|" + test_assert_eq(testName, actual, expected); + } + { + const std::string input = "|\x9F|"; // Windows-1252, "|�|" + const auto actual = fromWindows1252(input); + const std::u8string expected8{cast8('|'), cast8('\xC5'), cast8('\xB8'), cast8('|')}; // UTF-8, "|�|" + test_assert_eq(testName, actual, expected8); + const std::u32string expected{U"|\U00000178|"}; // UTF-32, "|�|" + test_assert_eq(testName, actual, expected); + } + { + const std::vector undefined{ '\x81', '\x8d', '\x8f', '\x90', '\x9d' }; + for (const auto& ch : undefined) + { + const std::string input{'|', ch, '|'}; + const auto actual = fromWindows1252(input); + static const std::u8string expected8{cast8('|'), cast8('\xEF'), cast8('\xBF'), cast8('\xBD'), cast8('|')}; // UTF-8, "||" + test_assert_eq(testName, actual, expected8); + const std::u32string expected{U"|\U0000fffd|"}; // UTF-32, "||" + test_assert_eq(testName, actual, expected); + } + } + { + // http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT + const std::vector windows1252_characters{ + //0x80, // EURO SIGN + //0x82, // SINGLE LOW-9 QUOTATION MARK + //0x83, // LATIN SMALL LETTER F WITH HOOK + //0x84, // DOUBLE LOW-9 QUOTATION MARK + //0x85, // HORIZONTAL ELLIPSIS + //0x86, // DAGGER + //0x87, // DOUBLE DAGGER + //0x88, // MODIFIER LETTER CIRCUMFLEX ACCENT + //0x89, // PER MILLE SIGN + //0x8A, // LATIN CAPITAL LETTER S WITH CARON + //0x8B, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK + //0x8C, // LATIN CAPITAL LIGATURE OE + 0x8E, // LATIN CAPITAL LETTER Z WITH CARON + 0x91, // LEFT SINGLE QUOTATION MARK + 0x92, // RIGHT SINGLE QUOTATION MARK + 0x93, // LEFT DOUBLE QUOTATION MARK + 0x94, // RIGHT DOUBLE QUOTATION MARK + 0x95, // BULLET + 0x96, // EN DASH + 0x97, // EM DASH + 0x98, // SMALL TILDE + 0x99, // TRADE MARK SIGN + 0x9A, // LATIN SMALL LETTER S WITH CARON + 0x9B, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x9C, // LATIN SMALL LIGATURE OE + 0x9E, // LATIN SMALL LETTER Z WITH CARON + 0x9F}; // LATIN CAPITAL LETTER Y WITH DIAERESIS + for (const auto& ch : windows1252_characters) + { + const std::string input_ { '|', static_cast(ch), '|'}; + const str::W1252string input(str::c_str(input_)); + const auto actual = to_u8string(input); + + // No "expected" to test against as the UTF-8 values for these Windows-1252 characters + // are mapped one-by-one. However, we can test that UTF-8 to Windows-1252 + // works as that walks through a UTF-8 string which can have 1-, 2-, 3- and 4-bytes + // for a single code-point. + const auto w1252 = str::EncodedStringView::details::w1252string(str::EncodedStringView(actual)); + TEST_ASSERT(input == w1252); + + // Can't compare the values with == because TEST_ASSERT_EQ() + // wants to do toString() and that doesn't work on Linux as the encoding + // is wrong (see above). + //const std::string w1252_ = str::c_str(w1252); + //TEST_ASSERT_EQ(input_, w1252_); + const str::EncodedStringView inputView(input); + const str::EncodedStringView w1252View(w1252); + TEST_ASSERT_EQ(inputView, w1252View); + } + } +} + +TEST_CASE(test_string_to_u8string_iso8859_1) +{ + constexpr uint8_t nobreak_space = 0xa0; + constexpr uint8_t latin_small_letter_y_with_diaeresis = 0xff; // '�' + for (uint32_t ch = nobreak_space; ch <= latin_small_letter_y_with_diaeresis; ch++) // ISO8859-1 + { + const std::string input_ { '|', static_cast(ch), '|'}; + const str::W1252string input(str::c_str(input_)); + const auto actual = to_u8string(input); + const std::u32string expected{U'|', U(ch), U'|'}; + test_assert_eq(testName, actual, expected); + + // Can't compare the values with == because TEST_ASSERT_EQ() + // wants to do toString() and that doesn't work on Linux as the encoding + // is wrong (see above). + //std::string actual_; + //str::details::toString(actual.c_str(), actual_); + //TEST_ASSERT_EQ(input_, actual_); + const str::EncodedStringView inputView(input); + const str::EncodedStringView actualView(actual); + TEST_ASSERT_EQ(inputView, actualView); + } +} + +template +static void test_change_case_(const std::string& testName, const TString& lower, const TString& upper) +{ + auto s = upper; + str::lower(s); + TEST_ASSERT(s == lower); + s = lower; + str::upper(s); + TEST_ASSERT(s == upper); + + s = upper; + str::upper(s); + TEST_ASSERT(s == upper); + s = lower; + str::lower(s); + TEST_ASSERT(s == lower); +} +TEST_CASE(test_change_case) +{ + const std::string ABC = "ABC"; + const std::string abc = "abc"; + test_change_case_(testName, abc, ABC); + + //const std::wstring ABC_w = L"ABC"; + //const std::wstring abc_w = L"abc"; + //test_change_case_(testName, abc_w, ABC_w); + + //// Yes, this can really come up, "non classifi�" is French (Canadian) for "unclassified". + //const std::string DEF_1252{'D', '\xc9', 'F'}; // "D�F" Windows-1252 + //const auto DEF8 = fromWindows1252(DEF_1252); + + //const std::string def_1252{'d', '\xe9', 'f'}; // "d�f" Windows-1252 + //const auto def8 = fromWindows1252(def_1252); + + ////test_change_case_(testName, def, DEF); + //test_change_case_(testName, def_1252, DEF_1252); +} + +// https://en.wikipedia.org/wiki/%C3%89#Character_mappings +static const str::EncodedString& classificationText_utf_8() +{ + static const str::EncodedString retval(str::cast("A\xc3\x89IOU")); // UTF-8 "A�IOU" + return retval; + } +static const str::EncodedString& classificationText_iso8859_1() +{ + static const str::EncodedString retval(str::cast("A\xc9IOU")); // ISO8859-1 "A�IOU" + return retval; + } +// UTF-16 on Windows, UTF-32 on Linux +static const wchar_t* classificationText_wide_() { return L"A\xc9IOU"; } // UTF-8 "A�IOU" +static str::EncodedString classificationText_wide() { return str::EncodedString(classificationText_wide_()); } +static std::string classificationText_platform() { return + sys::Platform == sys::PlatformType::Linux ? classificationText_utf_8().native() : classificationText_iso8859_1().native(); } + +TEST_CASE(test_u8string_to_string) +{ + { + const auto utf8 = classificationText_utf_8().u8string(); + const str::EncodedStringView utf8View(utf8); + const auto actual = utf8View.native(); + TEST_ASSERT_EQ(classificationText_platform(), actual); + } + { + const auto utf8 = classificationText_iso8859_1().u8string(); + const str::EncodedStringView utf8View(utf8); + const auto actual = utf8View.native(); + TEST_ASSERT_EQ(classificationText_platform(), actual); + } +} + +TEST_CASE(test_u8string_to_u16string) +{ + #if _WIN32 + const auto actual = classificationText_utf_8().u16string(); + const std::wstring s = str::c_str(actual); // Windows: std::wstring == std::u16string + TEST_ASSERT(classificationText_wide_() == s); // _EQ wants to do toString() + #endif + + TEST_ASSERT_EQ(classificationText_wide(), classificationText_utf_8()); + TEST_ASSERT_EQ(classificationText_wide(), classificationText_iso8859_1()); + + TEST_ASSERT(classificationText_wide().u16string() == classificationText_utf_8().u16string()); // _EQ wants to do toString() + TEST_ASSERT(classificationText_wide().u16string() == classificationText_iso8859_1().u16string()); // _EQ wants to do toString() +} + +TEST_CASE(test_u8string_to_u32string) +{ + #if !_WIN32 + const auto actual = classificationText_utf_8().u32string(); + const std::wstring s = str::c_str(actual); // Linux: std::wstring == std::u32string + TEST_ASSERT(classificationText_wide_() == s); // _EQ wants to do toString() + #endif + + TEST_ASSERT_EQ(classificationText_wide(), classificationText_utf_8()); + TEST_ASSERT_EQ(classificationText_wide(), classificationText_iso8859_1()); + + TEST_ASSERT(classificationText_wide().u32string() == classificationText_utf_8().u32string()); // _EQ wants to do toString() + TEST_ASSERT(classificationText_wide().u32string() == classificationText_iso8859_1().u32string()); // _EQ wants to do toString() +} + +static void test_EncodedStringView_(const std::string& testName, + const str::EncodedStringView& utf_8_view, const str::EncodedStringView& iso8859_1_view) +{ + (void)testName; + TEST_ASSERT_EQ(iso8859_1_view, iso8859_1_view); + TEST_ASSERT_EQ(utf_8_view, utf_8_view); + TEST_ASSERT_EQ(iso8859_1_view, utf_8_view); + TEST_ASSERT_EQ(utf_8_view, iso8859_1_view); + + TEST_ASSERT_EQ(iso8859_1_view.native(), utf_8_view.native()); + const auto native = classificationText_platform(); + TEST_ASSERT_EQ(iso8859_1_view.native(), native); + TEST_ASSERT_EQ(utf_8_view.native(), native); + + TEST_ASSERT(utf_8_view == classificationText_utf_8()); + TEST_ASSERT_EQ(utf_8_view, classificationText_utf_8()); + TEST_ASSERT(iso8859_1_view == classificationText_utf_8()); + TEST_ASSERT_EQ(iso8859_1_view, classificationText_utf_8()); + TEST_ASSERT(iso8859_1_view.u8string() == utf_8_view.u8string()); + + const auto expected = str::EncodedString::details::string(classificationText_utf_8()); + { + const auto actual = utf_8_view.asUtf8(); + TEST_ASSERT_EQ(actual, expected); + } + { + const auto actual = iso8859_1_view.asUtf8(); + TEST_ASSERT_EQ(actual, expected); + } +} +TEST_CASE(test_EncodedStringView) +{ + str::EncodedStringView esv; + auto copy(esv); + copy = esv; // assignment + + { + auto utf_8_view(classificationText_utf_8().view()); + auto iso8859_1_view(classificationText_iso8859_1().view()); + test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); + + utf_8_view = classificationText_iso8859_1().view(); + iso8859_1_view = classificationText_utf_8().view(); + test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); + } + { + auto utf_8_view = classificationText_utf_8().view(); + auto iso8859_1_view = classificationText_iso8859_1().view(); + test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); + + utf_8_view = classificationText_iso8859_1().view(); + iso8859_1_view = classificationText_utf_8().view(); + test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); + } + { + str::EncodedStringView utf_8_view; + utf_8_view = classificationText_iso8859_1().view(); + str::EncodedStringView iso8859_1_view; + iso8859_1_view = classificationText_utf_8().view(); + test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); + } +} + +TEST_CASE(test_EncodedString) +{ + str::EncodedString es; + TEST_ASSERT_TRUE(es.empty()); + TEST_ASSERT_TRUE(es.native().empty()); + { + str::EncodedString es_copy(es); // copy + TEST_ASSERT_TRUE(es_copy.empty()); + TEST_ASSERT_TRUE(es_copy.native().empty()); + } + es = str::EncodedString("abc"); // assignment + TEST_ASSERT_EQ(es.native(), "abc"); + { + str::EncodedString es_copy(es); // copy, again; this time w/o default content + TEST_ASSERT_EQ(es_copy.native(), "abc"); + } + + str::EncodedString abc(es); // copy, for use below + TEST_ASSERT_EQ(abc.native(), "abc"); + + str::EncodedString es2; + es = std::move(es2); // move assignment + TEST_ASSERT_TRUE(es.empty()); + TEST_ASSERT_TRUE(es.native().empty()); + str::EncodedString abc_(abc); // copy + es = std::move(abc_); // move assignment, w/o default content + TEST_ASSERT_EQ(es.native(), "abc"); + + str::EncodedString es3(std::move(abc)); // move constructor + TEST_ASSERT_EQ(es3.native(), "abc"); +} +TEST_MAIN( + TEST_CHECK(testConvert); + TEST_CHECK(testBadConvert); + TEST_CHECK(testEightBitIntToString); + TEST_CHECK(testCharToString); + TEST_CHECK(test_string_to_u8string_ascii); + TEST_CHECK(test_string_to_u8string_windows_1252); + TEST_CHECK(test_string_to_u8string_iso8859_1); + TEST_CHECK(test_change_case); + TEST_CHECK(test_u8string_to_string); + TEST_CHECK(test_u8string_to_u16string); + TEST_CHECK(test_u8string_to_u32string); + TEST_CHECK(test_EncodedStringView); + TEST_CHECK(test_EncodedString); ) \ No newline at end of file diff --git a/externals/coda-oss/modules/c++/sys/unittests/test_byte_swap.cpp b/externals/coda-oss/modules/c++/sys/unittests/test_byte_swap.cpp index 095bc40e3..fd2ee6f5e 100644 --- a/externals/coda-oss/modules/c++/sys/unittests/test_byte_swap.cpp +++ b/externals/coda-oss/modules/c++/sys/unittests/test_byte_swap.cpp @@ -31,10 +31,10 @@ TEST_CASE(testEndianness) { - /*const*/ auto native = coda_oss::endian::native; // "const" causes "conditional expression is constant." + /*const*/ auto native = std::endian::native; // "const" causes "conditional expression is constant." - if (native == coda_oss::endian::big) { } - else if (native == coda_oss::endian::little) { } + if (native == std::endian::big) { } + else if (native == std::endian::little) { } else { TEST_FAIL("Mixed-endian not supported!"); @@ -42,7 +42,7 @@ TEST_CASE(testEndianness) const bool isBigEndianSystem = sys::isBigEndianSystem(); - if (native == coda_oss::endian::big) + if (native == std::endian::big) { TEST_ASSERT(isBigEndianSystem); } @@ -50,7 +50,7 @@ TEST_CASE(testEndianness) { TEST_ASSERT(!isBigEndianSystem); } - if (native == coda_oss::endian::little) + if (native == std::endian::little) { TEST_ASSERT(!isBigEndianSystem); } @@ -62,11 +62,11 @@ TEST_CASE(testEndianness) if (isBigEndianSystem) { - TEST_ASSERT(native == coda_oss::endian::big); + TEST_ASSERT(native == std::endian::big); } else { - TEST_ASSERT(native == coda_oss::endian::little); + TEST_ASSERT(native == std::endian::little); } } diff --git a/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/ContentHandler.h b/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/ContentHandler.h index 50729c782..763a94981 100644 --- a/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/ContentHandler.h +++ b/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/ContentHandler.h @@ -95,6 +95,10 @@ class ContentHandler virtual bool vcharacters(const void/*XMLCh*/*, size_t /*length*/) // avoid XMLCh, it's specific to Xerces { return false; /* continue on to existing characters()*/ } /* =0 would break existing code */ + virtual bool call_vcharacters() const // =0 would break existing code + { + return false; // don't call vcharacters(const void*) + } /*! * Receive notification of the beginning of an element. diff --git a/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/Document.h b/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/Document.h index cb0a53553..29bb84ea9 100644 --- a/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/Document.h +++ b/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/Document.h @@ -38,9 +38,7 @@ #include -#include #include "coda_oss/string.h" -#include "coda_oss/memory.h" #include "xml/lite/Element.h" #include "xml/lite/QName.h" @@ -56,41 +54,33 @@ namespace lite * Use the Document to access the Element nodes contained within. * The DocumentParser will build a tree that you can use. */ -struct Document final +class Document { +public: //! Constructor Document(Element* rootNode = nullptr, bool own = true) : mRootNode(rootNode), mOwnRoot(own) { } - explicit Document(std::unique_ptr&& rootNode) : // implicitly own=true - Document(rootNode.release(), true /*own*/) - { - } /*! * Destroy the xml tree. This deletes the nodes if they exist * Careful, this may delete your copy if you are not careful */ - ~Document() + virtual ~Document() { destroy(); } - std::unique_ptr& clone(std::unique_ptr& doc) const + virtual Document* clone() const { - doc = coda_oss::make_unique(); + Document* doc = new Document(); - auto cloneRoot = coda_oss::make_unique(); + Element* cloneRoot = new Element(); cloneRoot->clone(*mRootNode); - doc->setRootElement(std::move(cloneRoot)); + doc->setRootElement(cloneRoot); return doc; } - Document* clone() const - { - std::unique_ptr doc; - return clone(doc).release(); - } /*! * Factory-type method for creating a new Element @@ -99,9 +89,21 @@ struct Document final * \param characterData The character data (if any) * \return A new element */ - Element *createElement(const std::string & qname, const std::string & uri, std::string characterData = ""); - std::unique_ptr createElement(const xml::lite::QName&, const std::string& characterData) const; - std::unique_ptr createElement(const xml::lite::QName&, const coda_oss::u8string& characterData) const; + virtual Element *createElement(const std::string & qname, + const std::string & uri, + std::string characterData = ""); + #ifndef SWIG // SWIG doesn't like unique_ptr or StringEncoding + Element* createElement(const std::string& qname, + const std::string & uri, + const std::string& characterData, StringEncoding) const; + Element* createElement(const std::string& qname, + const std::string& uri, + const coda_oss::u8string& characterData) const; + std::unique_ptr createElement(const xml::lite::QName& qname, const std::string& characterData) const; + std::unique_ptr createElement(const xml::lite::QName& qname, + const std::string& characterData, StringEncoding) const; + #endif // SWIG + /*! * Blanket destructor. This thing deletes everything @@ -116,13 +118,13 @@ struct Document final * \param element Element to add * \param underThis Element to add element to */ - void insert(Element * element, Element * underThis); + virtual void insert(Element * element, Element * underThis); /*! * Remove an element from the tree, starting at the root * \param toDelete The node to delete (This DOES do deletion) */ - void remove(Element * toDelete); + virtual void remove(Element * toDelete); /*! * Remove an element from the tree, starting at the second param @@ -131,17 +133,13 @@ struct Document final * be an optimization depending on the task, so I allow it to remain * public */ - void remove(Element * toDelete, Element * fromHere); + virtual void remove(Element * toDelete, Element * fromHere); /*! * Sets the internal root element * \param element The node to set. */ void setRootElement(Element * element, bool own = true); - void setRootElement(std::unique_ptr&& element) // implicitly own=true - { - setRootElement(element.release(), true /*own*/); - } /*! * Retrieves the internal root element @@ -153,18 +151,17 @@ struct Document final mOwnRoot = false; return mRootNode; } - std::unique_ptr& getRootElement(std::unique_ptr& rootNode) // implicitly steal=true - { - rootNode.reset(getRootElement(true /*steal*/)); - return rootNode; - } + Element *getRootElement() const { return mRootNode; } -private: +protected: + //! Copy constructor Document(const Document&); + + //! Assignment operator Document& operator=(const Document&); //! The root node element diff --git a/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/Element.h b/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/Element.h index e721698bd..657ce12af 100644 --- a/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/Element.h +++ b/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/Element.h @@ -27,16 +27,15 @@ #include #include #include // std::nothrow_t -#include #include #include #include -#include #include "xml/lite/XMLException.h" #include "xml/lite/Attributes.h" #include "xml/lite/QName.h" #include "sys/Conf.h" +#include "coda_oss/optional.h" #include "mem/SharedPtr.h" /*! @@ -60,9 +59,19 @@ namespace lite * This class stores all of the element information about an XML * document. */ -struct Element final +class Element { - Element() = default; + Element(const std::string& qname, const std::string& uri, std::nullptr_t) : + mParent(nullptr), mName(uri, qname) + { + } + +public: + //! Default constructor + Element() : + mParent(nullptr) + { + } /*! * Constructor taking the namespace prefix and the local name @@ -70,23 +79,37 @@ struct Element final * \param uri The uri of the object * \param characterData The character data (if any) */ - explicit Element(const std::string& qname, const std::string& uri = "", const std::string& characterData = "") : - mName(uri, qname) + Element(const std::string& qname, const std::string& uri = "", + const std::string& characterData = "") : + Element(qname, uri, nullptr) { setCharacterData(characterData); } - Element(const xml::lite::QName& qname, const coda_oss::u8string& characterData) : - mName(qname.getName(), qname.getUri().value) + #ifndef SWIG // SWIG doesn't like unique_ptr or StringEncoding + Element(const std::string& qname, const std::string& uri, + const std::string& characterData, StringEncoding encoding) : + Element(qname, uri, nullptr) + { + setCharacterData(characterData, encoding); + } + Element(const std::string& qname, const std::string& uri, + const coda_oss::u8string& characterData) : + Element(qname, uri, nullptr) { setCharacterData(characterData); } + // StringEncoding is assumed based on the platform: Windows-1252 or UTF-8. static std::unique_ptr create(const std::string& qname, const std::string& uri = "", const std::string& characterData = ""); + static std::unique_ptr create(const std::string& qname, const xml::lite::Uri& uri, const std::string& characterData = ""); static std::unique_ptr create(const xml::lite::QName&, const std::string& characterData = ""); static std::unique_ptr create(const xml::lite::QName&, const coda_oss::u8string&); + // Encoding of "characterData" is always UTF-8 + static std::unique_ptr createU8(const xml::lite::QName&, const std::string& characterData = ""); + #endif // SWIG //! Destructor - ~Element() + virtual ~Element() { destroyChildren(); } @@ -95,14 +118,14 @@ struct Element final void destroyChildren(); // use clone() to duplicate an Element - #if !(defined(SWIG) || defined(SWIGPYTHON) || defined(HAVE_PYTHON_H)) // SWIG needs these - //private: // encoded as part of the C++ name mangling by some compilers - #endif +#if !(defined(SWIG) || defined(SWIGPYTHON) || defined(HAVE_PYTHON_H)) // SWIG needs these +//private: // encoded as part of the C++ name mangling by some compilers +#endif Element(const Element&); Element& operator=(const Element&); - #if !(defined(SWIG) || defined(SWIGPYTHON) || defined(HAVE_PYTHON_H)) - public: - #endif +#if !(defined(SWIG) || defined(SWIGPYTHON) || defined(HAVE_PYTHON_H)) +public: +#endif Element(Element&&) = default; Element& operator=(Element&&) = default; @@ -267,17 +290,21 @@ struct Element final * \todo Add format capability */ void print(io::OutputStream& stream) const; + + // This is another slightly goofy routine to maintain backwards compatibility. + // XML documents must be properly (UTF-8, UTF-16 or UTF-32). The legacy + // print() routine (above) can write documents with a Windows-1252 encoding + // as the string is just copied to the output. + // + // The only valid setting for StringEncoding is Utf8; but defaulting that + // could change behavior on Windows. void prettyPrint(io::OutputStream& stream, const std::string& formatter = " ") const; - - // Outputs (presumablly to the console) using the **NATIVE** encoding. - // For most XML processing, **THIS IS WRONG** as output should - // always be UTF-8. However, for displaying XML on the console in Windows, - // the native (Windows-1252) encoding will work better as "special" characters - // will be displayed. - void consoleOutput_(io::OutputStream& stream) const; // be sure OutputStream is the console, not a file - void prettyConsoleOutput_(io::OutputStream& stream, // be sure OutputStream is the console, not a file + #ifndef SWIG // SWIG doesn't like unique_ptr or StringEncoding + void print(io::OutputStream& stream, StringEncoding /*=Utf8*/) const; + void prettyPrint(io::OutputStream& stream, StringEncoding /*=Utf8*/, const std::string& formatter = " ") const; + #endif // SWIG /*! * Determines if a child element exists @@ -302,21 +329,33 @@ struct Element final * Returns the character data of this element. * \return the charater data */ - std::string getCharacterData() const; - coda_oss::u8string& getCharacterData(coda_oss::u8string& result) const; + std::string getCharacterData() const + { + return mCharacterData; + } + #ifndef SWIG // SWIG doesn't like unique_ptr or StringEncoding + const coda_oss::optional& getEncoding() const + { + return mEncoding; + } + const coda_oss::optional& getCharacterData(std::string& result) const + { + result = getCharacterData(); + return getEncoding(); + } + void getCharacterData(coda_oss::u8string& result) const; + #endif // SWIG /*! * Sets the character data for this element. * \param characters The data to add to this element */ - void setCharacterData(const std::string&); - void setCharacterData(coda_oss::u8string s) - { - // See Item #41 in "Effective Modern C++" by Scott Meyers. - // std::basic_string is "cheap to move" and "always copied" - // into mCharacterData. - mCharacterData = std::move(s); - } + void setCharacterData(const std::string& characters); + #ifndef SWIG // SWIG doesn't like unique_ptr or StringEncoding + void setCharacterData_(const std::string& characters, const StringEncoding*); + void setCharacterData(const std::string& characters, StringEncoding); + void setCharacterData(const coda_oss::u8string& characters); + #endif // SWIG /*! * Sets the local name for this element. @@ -443,7 +482,8 @@ struct Element final mParent = parent; } -private: +protected: + void changePrefix(Element* element, const std::string& prefix, const std::string& uri); @@ -452,18 +492,28 @@ struct Element final const std::string& prefix, const std::string& uri); - void depthPrint(io::OutputStream& stream, int depth, const std::string& formatter, bool isConsoleOutput = false) const; + void depthPrint(io::OutputStream& stream, int depth, + const std::string& formatter) const; + void depthPrint(io::OutputStream& stream, StringEncoding, int depth, + const std::string& formatter) const; - Element* mParent = nullptr; + Element* mParent; //! The children of this element std::vector mChildren; xml::lite::QName mName; //! The attributes for this element xml::lite::Attributes mAttributes; - coda_oss::u8string mCharacterData; + //! The character data ... + std::string mCharacterData; + + private: + // ... and how that data is encoded + coda_oss::optional mEncoding; + void depthPrint(io::OutputStream& stream, bool utf8, int depth, + const std::string& formatter) const; }; -Element& add(const xml::lite::QName&, const std::string& value, Element& parent); +extern Element& add(const xml::lite::QName&, const std::string& value, Element& parent); #ifndef SWIG // The (old) version of SWIG we're using doesn't like certain C++11 features. diff --git a/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/MinidomHandler.h b/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/MinidomHandler.h index 8dc3ddc4d..a3b225134 100644 --- a/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/MinidomHandler.h +++ b/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/MinidomHandler.h @@ -47,11 +47,7 @@ #include #include -#include "coda_oss/string.h" -#include "coda_oss/memory.h" -#include "str/EncodedString.h" -#include "str/EncodedStringView.h" #include "XMLReader.h" #include "io/StandardStreams.h" #include "Document.h" @@ -72,13 +68,14 @@ namespace lite struct MinidomHandler final : public ContentHandler { //! Constructor. Uses default document - MinidomHandler() + MinidomHandler() : + mDocument(nullptr), mOwnDocument(true), mPreserveCharData(false) { - setDocument(coda_oss::make_unique()); + setDocument(new Document()); } //! Destructor - ~ MinidomHandler() + virtual ~ MinidomHandler() { setDocument(nullptr, true); } @@ -87,22 +84,22 @@ struct MinidomHandler final : public ContentHandler MinidomHandler(MinidomHandler&&) = default; MinidomHandler& operator=(MinidomHandler&&) = default; - void setDocument(Document *newDocument, bool own = true); + virtual void setDocument(Document *newDocument, bool own = true); void setDocument(std::unique_ptr&&); // own = true /** * Retrieves the Document. * @param steal if specified, ownership will be given up (if owned) */ - Document *getDocument(bool steal = false) + virtual Document *getDocument(bool steal = false) { if (steal) mOwnDocument = false; return mDocument; } - std::unique_ptr& getDocument(std::unique_ptr&); // steal = true + void getDocument(std::unique_ptr&); // steal = true - Document *getDocument() const + virtual Document *getDocument() const { return mDocument; } @@ -114,8 +111,10 @@ struct MinidomHandler final : public ContentHandler * \param value The value of the char data * \param length The length of the char data */ - void characters(const char* value, int length) override; + virtual void characters(const char* value, int length) override; + bool vcharacters(const void /*XMLCh*/*, size_t length) override; + bool call_vcharacters() const override; /*! * This method is fired when a new tag is entered. @@ -128,10 +127,18 @@ struct MinidomHandler final : public ContentHandler * \param qname The qname * \param atts The attributes */ - void startElement(const std::string & uri, + virtual void startElement(const std::string & uri, const std::string & localName, const std::string & qname, - const Attributes & atts) override; + const Attributes & atts); + + /*! + * We want to push only the proper amount of bytes + * to the node when we start writing. Here we chew + * up the pieces we take as we are taking them. + * \return The chracter data for the node + */ + virtual std::string adjustCharacterData(); /*! * Handles the actual popping of the node off the node @@ -141,40 +148,51 @@ struct MinidomHandler final : public ContentHandler * \param localName The local name * \param qname The qname */ - void endElement(const std::string & uri, + virtual void endElement(const std::string & uri, const std::string & localName, - const std::string & qname) override; + const std::string & qname); + + virtual void clear(); - void clear(); + /*! + * Trim the white space off the back and front of a string + * \param s String to trim + */ + static void trim(std::string & s); /*! * If set to true, whitespaces will be preserved in the parsed * character data. Otherwise, it will be trimmed. */ - void preserveCharacterData(bool preserve); + virtual void preserveCharacterData(bool preserve); -private: /*! - * We want to push only the proper amount of bytes - * to the node when we start writing. Here we chew - * up the pieces we take as we are taking them. - * \return The chracter data for the node - */ - coda_oss::u8string adjustCharacterData(); - - /*! - * Trim the white space off the back and front of a string - * \param s String to trim + * If set to true, how std::string values are encoded will be set. + * + * This is a bit goofy to preserve existing behavior: on *ix, + * XML containing non-ASCII data is lost (it turns into + * Windows-1252 on Windows). + * + * When set, there won't be any change on Windows. However, + * on *ix, std::string will be encoding as UTF-8 thus preserving + * the non-ASCII data. */ - static void trim(coda_oss::u8string& s); + virtual void storeEncoding(bool value); + bool storeEncoding() const; - coda_oss::u8string currentCharacterData; +protected: + std::string currentCharacterData; std::stack bytesForElement; std::stack nodeStack; - Document* mDocument = nullptr; - bool mOwnDocument = true; - bool mPreserveCharData = false; - void characters(coda_oss::u8string&&); + Document *mDocument; + bool mOwnDocument; + bool mPreserveCharData; + + private: + void characters(const char* value, int length, const StringEncoding*); + void call_characters(const std::string&, StringEncoding); + std::shared_ptr mpEncoding; + bool mStoreEncoding = false; }; } } diff --git a/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/MinidomParser.h b/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/MinidomParser.h index 19da9ced1..d25148755 100644 --- a/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/MinidomParser.h +++ b/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/MinidomParser.h @@ -56,15 +56,17 @@ namespace lite * bloat of the spec. It was inspired by python's xml.dom.minidom * module. */ -struct MinidomParser final +struct MinidomParser { /*! * Constructor. Set our SAX ContentHandler. */ - explicit MinidomParser(bool storeEncoding = true); + MinidomParser(bool storeEncoding = false); // see MinidomHandler::storeEncoding() //! Destructor. - ~MinidomParser() = default; + virtual ~MinidomParser() + { + } MinidomParser(const MinidomParser&) = delete; MinidomParser& operator=(const MinidomParser&) = delete; @@ -77,25 +79,27 @@ struct MinidomParser final * \param is This is the input stream to feed the parser * \param size This is the size of the stream to feed the parser */ - void parse(io::InputStream& is, int size = io::InputStream::IS_END); - void parse(io::InputStream& is, const void*pInitialEncoding, const void* pFallbackEncoding, - int size = io::InputStream::IS_END); + virtual void parse(io::InputStream& is, int size = io::InputStream::IS_END); + #ifndef SWIG // SWIG doesn't like unique_ptr or StringEncoding + virtual void parse(io::InputStream& is, StringEncoding, int size = io::InputStream::IS_END); + #endif // SWIG /*! * This clears the MinidomHandler, killing its underlying Document * tree. The Document node is preserved, however -- it must * be explicitly reset to another document to change element type. */ - void clear(); + virtual void clear(); /*! * Return a pointer to the document. Note that its a reference * so you dont get to keep it. * \return Pointer to document. */ - Document *getDocument() const; - Document *getDocument(bool steal = false); - std::unique_ptr& getDocument(std::unique_ptr&); // steal = true + virtual Document *getDocument() const; + + virtual Document *getDocument(bool steal = false); + void getDocument(std::unique_ptr&); // steal = true /*! * Reader accessor @@ -122,15 +126,20 @@ struct MinidomParser final * * \param newDocument The new document. */ - void setDocument(Document * newDocument, bool own = true); + virtual void setDocument(Document * newDocument, bool own = true); void setDocument(std::unique_ptr&&); // own = true /*! * @see MinidomHandler::preserveCharacterData */ - void preserveCharacterData(bool preserve); + virtual void preserveCharacterData(bool preserve); + + /*! + * @see MinidomHandler::storeEncoding + */ + virtual void storeEncoding(bool preserve); -private: +protected: MinidomHandler mHandler; XMLReader mReader; }; diff --git a/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/QName.h b/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/QName.h index f4b9fbc4f..d9304a746 100644 --- a/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/QName.h +++ b/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/QName.h @@ -49,6 +49,32 @@ namespace xml namespace lite { + /*! + * \class StringEncoding + * \brief Specifies how std::string is encoded by MinidomParser. + * + * This is needed because our use of Xerces generates different + * results on Windows/Linux, and changing things might break existing + * code. + * + * On Windows, the UTF-16 strings (internal to Xerces) are converted + * to std::strings with Windows-1252 (more-or-less ISO8859-1) encoding; + * this allows Western European languages to be displayed. On *ix, + * UTF-8 is the norm ... + */ +#ifndef SWIG // SWIG doesn't like unique_ptr or StringEncoding +enum class StringEncoding +{ + Windows1252 // more-or-less ISO5589-1, https://en.wikipedia.org/wiki/Windows-1252 + , Utf8 +}; +constexpr auto PlatformEncoding = sys::Platform == sys::PlatformType::Windows + ? xml::lite::StringEncoding::Windows1252 + : xml::lite::StringEncoding::Utf8; +// Could do the same for std::wstring, but there isn't any code needing it right now. +// Probably better to use std::u16string and std::u32string anyway. +#endif + /*! * \class QName * \brief A Qualified name (includes the namespace stuff) @@ -68,7 +94,7 @@ namespace lite struct Uri final // help prevent mixups with std::string { Uri() = default; - explicit Uri(const std::string& v); + Uri(const std::string& v); std::string value; bool empty() const { @@ -118,7 +144,7 @@ class QName final * Constructor taking just the local name (no namespace). * \param lName Just the local name of the object. */ - explicit QName(const std::string& lName) + QName(const std::string& lName) { setName(lName); } diff --git a/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/ValidatorInterface.h b/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/ValidatorInterface.h index b7fc57787..8eccf673f 100644 --- a/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/ValidatorInterface.h +++ b/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/ValidatorInterface.h @@ -130,22 +130,18 @@ class ValidatorInterface * \param xmlID Identifier for this input xml within the error log * \param errors Object for returning errors found (errors are appended) */ - template - bool vallidateT(io::InputStream& xml, - TStringStream&& oss, - const std::string& xmlID, - std::vector& errors) const + bool validate(io::InputStream& xml, + const std::string& xmlID, + std::vector& errors) const { // convert to std::string + io::StringStream oss; xml.streamTo(oss); return validate(oss.stream().str(), xmlID, errors); } - bool validate(io::InputStream& xml, + bool validate(io::InputStream& xml, StringEncoding, const std::string& xmlID, - std::vector& errors) const - { - return vallidateT(xml, io::StringStream(), xmlID, errors); - } + std::vector& errors) const; /*! * Validation against the internal schema pool @@ -158,7 +154,7 @@ class ValidatorInterface std::vector& errors) const { // convert to stream - io::U8StringStream oss; + io::StringStream oss; xml->print(oss); return validate(oss.stream().str(), xmlID, errors); } @@ -172,8 +168,14 @@ class ValidatorInterface virtual bool validate(const std::string& xml, const std::string& xmlID, std::vector& errors) const = 0; - virtual bool validate(const coda_oss::u8string&, const std::string& /*xmlID*/, std::vector&) const = 0; - virtual bool validate(const str::W1252string&, const std::string& /*xmlID*/, std::vector&) const = 0; + virtual bool validate(const coda_oss::u8string&, const std::string& /*xmlID*/, std::vector&) const // =0 would cause existing code to break + { + return true; // i.e., an error + } + virtual bool validate(const str::W1252string&, const std::string& /*xmlID*/, std::vector&) const // =0 would cause existing code to break + { + return true; // i.e., an error + } }; inline std::ostream& operator<< (std::ostream& out, diff --git a/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/ValidatorXerces.h b/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/ValidatorXerces.h index b78a15272..37c0ac9b8 100644 --- a/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/ValidatorXerces.h +++ b/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/ValidatorXerces.h @@ -29,7 +29,6 @@ #include #include -#include #include #include @@ -100,6 +99,7 @@ struct ValidationErrorHandler : public xercesc::DOMErrorHandler class ValidatorXerces : public ValidatorInterface { XercesContext mCtxt; //! this must be the first member listed + bool mLegacyStringConversion = true; // use exsiting code for XMLCh* conversion public: @@ -138,7 +138,8 @@ class ValidatorXerces : public ValidatorInterface bool validate(const str::W1252string&, const std::string& xmlID, std::vector&) const override; private: - bool validate_(const coda_oss::u8string& xml, + template + bool validate_(const std::basic_string& xml, bool legacyStringConversion, const std::string& xmlID, std::vector& errors) const; diff --git a/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/XMLReaderInterface.h b/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/XMLReaderInterface.h index c0540b8cb..e94dbfc4f 100644 --- a/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/XMLReaderInterface.h +++ b/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/XMLReaderInterface.h @@ -25,6 +25,7 @@ #include #include "XMLException.h" +#include "Element.h" // StringEncoding namespace xml { diff --git a/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/XMLReaderXerces.h b/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/XMLReaderXerces.h index beca1fa5c..5fc3dc751 100644 --- a/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/XMLReaderXerces.h +++ b/externals/coda-oss/modules/c++/xml.lite/include/xml/lite/XMLReaderXerces.h @@ -56,7 +56,7 @@ namespace lite * the Expat C Parser underneath, and wiring it to * generic event calls, via the content handler. */ -class XMLReaderXerces final : public XMLReaderInterface +class XMLReaderXerces : public XMLReaderInterface { XercesContext mCtxt; //! this must be the first member listed std::unique_ptr mNative; @@ -69,7 +69,10 @@ class XMLReaderXerces final : public XMLReaderInterface XMLReaderXerces(); //! Destructor. - ~XMLReaderXerces() = default; + ~XMLReaderXerces() + { + } + XMLReaderXerces(const XMLReaderXerces&) = delete; XMLReaderXerces& operator=(const XMLReaderXerces&) = delete; @@ -100,8 +103,9 @@ class XMLReaderXerces final : public XMLReaderInterface void parse(io::InputStream& is, int size = io::InputStream::IS_END); void parse(bool storeEncoding, io::InputStream& is, int size = io::InputStream::IS_END); - void parse(io::InputStream& is, const void*pInitialEncoding, const void* pFallbackEncoding, - int size = io::InputStream::IS_END); + #ifndef SWIG // SWIG doesn't like unique_ptr or StringEncoding + void parse(bool storeEncoding, io::InputStream& is, StringEncoding, int size = io::InputStream::IS_END); + #endif // SWIG //! Method to create an xml reader void create(); @@ -111,9 +115,10 @@ class XMLReaderXerces final : public XMLReaderInterface std::string getDriverName() const { return "xerces"; } - static const void* getWindows1252Encoding(); - private: + void parse(bool storeEncoding, io::InputStream& is, const StringEncoding*, int size); + void parse(bool storeEncoding, const std::vector&, const StringEncoding* pEncoding); + void write(const void*, size_t) override { throw xml::lite::XMLException(Ctxt("I'm not sure how you got here!")); diff --git a/externals/coda-oss/modules/c++/xml.lite/source/Document.cpp b/externals/coda-oss/modules/c++/xml.lite/source/Document.cpp index 20fec58da..f4ce03915 100644 --- a/externals/coda-oss/modules/c++/xml.lite/source/Document.cpp +++ b/externals/coda-oss/modules/c++/xml.lite/source/Document.cpp @@ -22,8 +22,6 @@ #include "xml/lite/Document.h" -#include - void xml::lite::Document::setRootElement(Element * element, bool own) { destroy(); @@ -49,41 +47,60 @@ void xml::lite::Document::remove(Element * toDelete) remove(toDelete, mRootNode); } -static std::unique_ptr newElement(const std::string& qname, const std::string& uri) +static +xml::lite::Element * +newElement(const std::string& qname, const std::string& uri) { - std::unique_ptr elem(new xml::lite::Element()); + auto elem = new xml::lite::Element(); elem->setQName(qname); //std::cout << "qname: " << qname << std::endl; elem->setUri(uri); return elem; } -static std::unique_ptrnewElement(const xml::lite::QName& qname) +static xml::lite::Element* newElement(const xml::lite::QName& qname) { return newElement(qname.getName(), qname.getAssociatedUri()); } - -xml::lite::Element* xml::lite::Document::createElement(const std::string& qname, const std::string& uri, +xml::lite::Element* xml::lite::Document::createElement(const std::string& qname, + const std::string& uri, std::string characterData) { auto elem = newElement(qname, uri); elem->setCharacterData(characterData); - return elem.release(); + return elem; +} +xml::lite::Element* xml::lite::Document::createElement(const std::string& qname, + const std::string& uri, + const std::string& characterData, StringEncoding encoding) const +{ + auto elem = newElement(qname, uri); + elem->setCharacterData(characterData, encoding); + return elem; } -std::unique_ptr xml::lite::Document::createElement(const QName& qname, +xml::lite::Element* xml::lite::Document::createElement(const std::string& qname, + const std::string& uri, const coda_oss::u8string& characterData) const { - auto elem = newElement(qname); + auto elem = newElement(qname, uri); elem->setCharacterData(characterData); return elem; } -std::unique_ptr xml::lite::Document::createElement(const QName& qname, + +std::unique_ptr xml::lite::Document::createElement(const xml::lite::QName& qname, const std::string& characterData) const { - auto elem = newElement(qname); + std::unique_ptr elem(newElement(qname)); elem->setCharacterData(characterData); return elem; } +std::unique_ptr xml::lite::Document::createElement(const xml::lite::QName& qname, + const std::string& characterData, StringEncoding encoding) const +{ + std::unique_ptr elem(newElement(qname)); + elem->setCharacterData(characterData, encoding); + return elem; +} void xml::lite::Document::insert(xml::lite::Element * element, xml::lite::Element * underThis) diff --git a/externals/coda-oss/modules/c++/xml.lite/source/Element.cpp b/externals/coda-oss/modules/c++/xml.lite/source/Element.cpp index 911befe40..5904d86c4 100644 --- a/externals/coda-oss/modules/c++/xml.lite/source/Element.cpp +++ b/externals/coda-oss/modules/c++/xml.lite/source/Element.cpp @@ -20,11 +20,8 @@ * */ -#include - #include #include -#include #include "xml/lite/Element.h" #include @@ -33,17 +30,29 @@ #include #include +constexpr auto PlatformEncoding = sys::Platform == sys::PlatformType::Windows + ? xml::lite::StringEncoding::Windows1252 + : xml::lite::StringEncoding::Utf8; + std::unique_ptr xml::lite::Element::create(const std::string& qname, const std::string& uri, const std::string& characterData) { - return coda_oss::make_unique(qname, uri, characterData); + return coda_oss::make_unique(qname, uri, characterData, PlatformEncoding); +} +std::unique_ptr xml::lite::Element::create(const std::string& qname, const Uri& uri, const std::string& characterData) +{ + return create(qname, uri.value, characterData); } std::unique_ptr xml::lite::Element::create(const QName& qname, const std::string& characterData) { - return create(qname.getName(), qname.getUri().value, characterData); + return create(qname.getName(), qname.getUri(), characterData); } std::unique_ptr xml::lite::Element::create(const QName& qname, const coda_oss::u8string& characterData) { - return coda_oss::make_unique(qname, characterData); + return coda_oss::make_unique(qname.getName(), qname.getUri().value, characterData); +} +std::unique_ptr xml::lite::Element::createU8(const QName& qname, const std::string& characterData) +{ + return create(qname, str::EncodedStringView(characterData).u8string()); } xml::lite::Element::Element(const xml::lite::Element& node) @@ -56,9 +65,11 @@ xml::lite::Element& xml::lite::Element::operator=(const xml::lite::Element& node { mName = node.mName; mCharacterData = node.mCharacterData; + mEncoding = node.mEncoding; mAttributes = node.mAttributes; mChildren = node.mChildren; mParent = node.mParent; + mEncoding = node.mEncoding; } return *this; } @@ -234,6 +245,10 @@ void xml::lite::Element::print(io::OutputStream& stream) const { depthPrint(stream, 0, ""); } +void xml::lite::Element::print(io::OutputStream& stream, StringEncoding encoding) const +{ + depthPrint(stream, encoding, 0, ""); +} void xml::lite::Element::prettyPrint(io::OutputStream& stream, const std::string& formatter) const @@ -241,50 +256,96 @@ void xml::lite::Element::prettyPrint(io::OutputStream& stream, depthPrint(stream, 0, formatter); stream.writeln(""); } - -void xml::lite::Element::consoleOutput_(io::OutputStream& stream) const -{ - depthPrint(stream, 0, "", true /*isConsoleOutput*/); -} -void xml::lite::Element::prettyConsoleOutput_(io::OutputStream& stream, +void xml::lite::Element::prettyPrint(io::OutputStream& stream, StringEncoding encoding, const std::string& formatter) const { - depthPrint(stream, 0, formatter, true /*isConsoleOutput*/); + depthPrint(stream, encoding, 0, formatter); stream.writeln(""); } - -std::string xml::lite::Element::getCharacterData() const +static xml::lite::StringEncoding getEncoding_(const coda_oss::optional& encoding) { - return str::EncodedStringView(mCharacterData).native(); + if (encoding.has_value()) + { + if (encoding == xml::lite::StringEncoding::Utf8) { } + else if (encoding == xml::lite::StringEncoding::Windows1252) { } + else + { + throw std::logic_error("Unknown encoding."); + } + return *encoding; + } + + // don't know the encoding ... assume a default based on the platform + return PlatformEncoding; } -coda_oss::u8string& xml::lite::Element::getCharacterData(coda_oss::u8string& result) const + +void xml::lite::Element::getCharacterData(coda_oss::u8string& result) const { - result = mCharacterData; - return result; + const auto encoding = ::getEncoding_(this->getEncoding()); + + str::EncodedStringView view; + if (encoding == xml::lite::StringEncoding::Utf8) + { + view = str::EncodedStringView(str::c_str(mCharacterData)); + } + else if (encoding == xml::lite::StringEncoding::Windows1252) + { + view = str::EncodedStringView(str::c_str(mCharacterData)); + } + else + { + throw std::logic_error("getCharacterData(): unknown encoding"); + } + + result = view.u8string(); // copy or conversion } -static void writeCharacterData(io::OutputStream& stream, const std::u8string& characterData, bool isConsoleOutput) +static void writeCharacterData(io::OutputStream& stream, + const std::string& characterData, const coda_oss::optional& encoding_) { - if (!isConsoleOutput) + const auto encoding = getEncoding_(encoding_); + if (encoding == xml::lite::StringEncoding::Windows1252) + { + // need to convert before writing + const str::EncodedStringView view(str::c_str(characterData)); + stream.write(view.u8string()); + } + else if (encoding == xml::lite::StringEncoding::Utf8) { - stream.write(characterData); // call UTF-8 overload + // already in UTF-8, no converstion necessary + auto pUtf8 = str::c_str(characterData); + stream.write(pUtf8, characterData.length()); // call UTF-8 overload } else { - stream.write(str::EncodedStringView(characterData).native()); // write to the console using the platform native encoding + throw std::logic_error("writeCharacterData(): unknown encoding"); } } -void xml::lite::Element::depthPrint(io::OutputStream& stream, int depth, const std::string& formatter, bool isConsoleOutput) const +void xml::lite::Element::depthPrint(io::OutputStream& stream, + int depth, + const std::string& formatter) const +{ + // XML must be stored in UTF-8 (or UTF-16/32), in particular, not + // Windows-1252. However, existing code did this, so preserve current behavior. + depthPrint(stream, false /*utf8*/, depth, formatter); +} +void xml::lite::Element::depthPrint(io::OutputStream& stream, StringEncoding encoding, + int depth, + const std::string& formatter) const +{ + if (encoding != StringEncoding::Utf8) + { + throw std::invalid_argument("'encoding' must be UTF-8"); + } + // THIS IS CORRECT, but may break existing code; so it must be explicitly requested. + depthPrint(stream, true /*utf8*/, depth, formatter); +} +void xml::lite::Element::depthPrint(io::OutputStream& stream, bool utf8, + int depth, + const std::string& formatter) const { - // XML must be stored in UTF-8 (or UTF-16/32), in particular, not Windows-1252. - // - // Except for a special exception for writing to the console: UTF-8 won't display well on Windows - // and Windows-1252 won't display nicely on Linux. Of course, "console output" is a bit - // iffy since both Windows and Linux support redirection ... so the user could still generate - // a bad XML file. - std::string prefix = ""; for (int i = 0; i < depth; ++i) prefix += formatter; @@ -304,21 +365,31 @@ void xml::lite::Element::depthPrint(io::OutputStream& stream, int depth, const s acc += std::string("\""); } - if (mCharacterData.empty() && mChildren.empty()) + if (mCharacterData.empty()&& mChildren.empty()) { //simple type - just end it here stream.write(acc + "/" + rBrack); } else { - stream.write(acc + rBrack); - writeCharacterData(stream, mCharacterData, isConsoleOutput); + stream.write(acc + rBrack); + if (utf8) + { + // Correct behavior, but may break existing code. + writeCharacterData(stream, mCharacterData, getEncoding()); + } + else + { + // Legacy behavior, will generate incorrect XML output if there are western European + // characters in "mCharacterData". + stream.write(mCharacterData); + } for (unsigned int i = 0; i < mChildren.size(); i++) { if (!formatter.empty()) stream.write("\n"); - mChildren[i]->depthPrint(stream, depth + 1, formatter, isConsoleOutput); + mChildren[i]->depthPrint(stream, utf8, depth + 1, formatter); } if (!mChildren.empty() && !formatter.empty()) @@ -452,9 +523,29 @@ void xml::lite::Element::setNamespaceURI( attr[std::string("xmlns:") + prefix] = uri; } +void xml::lite::Element::setCharacterData_(const std::string& characters, const StringEncoding* pEncoding) +{ + mCharacterData = characters; + if (pEncoding != nullptr) + { + mEncoding = *pEncoding; + } + else + { + mEncoding.reset(); + } +} void xml::lite::Element::setCharacterData(const std::string& characters) { - mCharacterData = str::EncodedStringView(characters).u8string(); + setCharacterData_(characters, nullptr /*pEncoding*/); +} +void xml::lite::Element::setCharacterData(const std::string& characters, StringEncoding encoding) +{ + setCharacterData_(characters, &encoding); +} +void xml::lite::Element::setCharacterData(const coda_oss::u8string& characters) +{ + setCharacterData(str::c_str(characters), StringEncoding::Utf8); } xml::lite::Element& xml::lite::add(const QName& qname, diff --git a/externals/coda-oss/modules/c++/xml.lite/source/MinidomHandler.cpp b/externals/coda-oss/modules/c++/xml.lite/source/MinidomHandler.cpp index fe33084b7..347de87ce 100644 --- a/externals/coda-oss/modules/c++/xml.lite/source/MinidomHandler.cpp +++ b/externals/coda-oss/modules/c++/xml.lite/source/MinidomHandler.cpp @@ -21,14 +21,11 @@ */ #include -#include -#include #include "str/Manip.h" #include "str/Convert.h" #include "str/Encoding.h" #include "sys/OS.h" -#include "str/EncodedStringView.h" #include "xml/lite/MinidomHandler.h" @@ -46,36 +43,69 @@ void xml::lite::MinidomHandler::setDocument(std::unique_ptr&& newDocum { setDocument(newDocument.release(), true /*own*/); } -std::unique_ptr& xml::lite::MinidomHandler::getDocument(std::unique_ptr& pDocument) +void xml::lite::MinidomHandler::getDocument(std::unique_ptr& pDocument) { pDocument.reset(getDocument(true /*steal*/)); - return pDocument; } void xml::lite::MinidomHandler::clear() { mDocument->destroy(); - currentCharacterData.clear(); + currentCharacterData = ""; assert(bytesForElement.empty()); assert(nodeStack.empty()); } -void xml::lite::MinidomHandler::characters(std::u8string&& s) +void xml::lite::MinidomHandler::characters(const char* value, int length, const StringEncoding* pEncoding) { - // Append number of bytes added to this node's stack value - assert(bytesForElement.size()); - bytesForElement.top() += static_cast(s.length()); + if (pEncoding != nullptr) + { + if (mpEncoding != nullptr) + { + // be sure the given encoding matches any encoding already set + if (*pEncoding != *mpEncoding) + { + throw std::invalid_argument("New 'encoding' is different than value already set."); + } + } + else if (storeEncoding()) + { + mpEncoding = std::make_shared(*pEncoding); + } + } // Append new data - currentCharacterData += std::move(s); + if (length) + currentCharacterData += std::string(value, length); + + // Append number of bytes added to this node's stack value + assert(bytesForElement.size()); + bytesForElement.top() += length; } void xml::lite::MinidomHandler::characters(const char *value, int length) { - // If we're still here despite use_char() being "false" then the - // wide-character routine "failed." On Windows, that means the char* value - // is encoded as Windows-1252 (more-or-less ISO8859-1). - const str::EncodedString chars(std::string(value, length)); - characters(chars.u8string()); + const StringEncoding* pEncoding = nullptr; + if ((sys::Platform == sys::PlatformType::Windows) && call_vcharacters()) + { + // If we're still here despite use_char() being "false" then the wide-character + // routine "failed." On Windows, that means the char* value is encoded + // as Windows-1252 (more-or-less ISO8859-1). + static const auto encoding = StringEncoding::Windows1252; + pEncoding = &encoding; + } + characters(value, length, pEncoding); +} + +void xml::lite::MinidomHandler::call_characters(const std::string& s, StringEncoding encoding) +{ + const auto length = static_cast(s.length()); + characters(s.c_str(), length, &encoding); +} + +bool xml::lite::MinidomHandler::call_vcharacters() const +{ + // if we're storing the encoding, get wchar_t so that we can convert + return storeEncoding(); } bool xml::lite::MinidomHandler::vcharacters(const void /*XMLCh*/* chars_, size_t length) @@ -92,8 +122,29 @@ bool xml::lite::MinidomHandler::vcharacters(const void /*XMLCh*/* chars_, size_t static_assert(sizeof(XMLCh) == sizeof(char16_t), "XMLCh should be 16-bits."); auto pChars16 = static_cast(chars_); - auto chars = str::EncodedString(std::u16string(pChars16, length)).u8string(); - characters(std::move(chars)); + std::string chars; + auto platformEncoding = xml::lite::PlatformEncoding; // "conditional expression is constant" + if (platformEncoding == xml::lite::StringEncoding::Utf8) + { + str::details::utf16to8(pChars16, length, chars); + } + else if (platformEncoding == xml::lite::StringEncoding::Windows1252) + { + // On Windows, we want std::string encoded as Windows-1252 so that + // western European characters will be displayed. We can't convert + // to UTF-8 (as above on Linux), because Windows doesn't have good + // support for displaying such strings. Using UTF-16 would be preferred + // on Windows, but all existing code uses std::string instead of std::wstring. + assert(pChars16 != nullptr); // XMLCh == wchar_t == char16_t on Windows + auto pChars = static_cast(chars_); + chars = xml::lite::XercesLocalString(pChars).str(); + } + else + { + throw std::logic_error("Unknown xml::lite::StringEncoding"); + } + + call_characters(chars, platformEncoding); return true; // vcharacters() processed } @@ -113,14 +164,14 @@ void xml::lite::MinidomHandler::startElement(const std::string & uri, } // This function subtracts off the char place from the push -std::u8string xml::lite::MinidomHandler::adjustCharacterData() +std::string xml::lite::MinidomHandler::adjustCharacterData() { // Edit the string with regard to this node's char data // Get rid of what we take on char data accumulator int diff = (int) (currentCharacterData.length()) - bytesForElement.top(); - auto newCharacterData(currentCharacterData.substr( + std::string newCharacterData(currentCharacterData.substr( diff, currentCharacterData.length()) ); @@ -132,7 +183,7 @@ std::u8string xml::lite::MinidomHandler::adjustCharacterData() return newCharacterData; } -void xml::lite::MinidomHandler::trim(std::u8string& s) +void xml::lite::MinidomHandler::trim(std::string & s) { str::trim(s); } @@ -145,7 +196,7 @@ void xml::lite::MinidomHandler::endElement(const std::string & /*uri*/, xml::lite::Element * current = nodeStack.top(); nodeStack.pop(); - current->setCharacterData(adjustCharacterData()); + current->setCharacterData_(adjustCharacterData(), mpEncoding.get()); // Remove corresponding int on bytes stack bytesForElement.pop(); @@ -170,3 +221,12 @@ void xml::lite::MinidomHandler::preserveCharacterData(bool preserve) mPreserveCharData = preserve; } +void xml::lite::MinidomHandler::storeEncoding(bool value) +{ + mStoreEncoding = value; +} + +bool xml::lite::MinidomHandler::storeEncoding() const +{ + return mStoreEncoding; +} diff --git a/externals/coda-oss/modules/c++/xml.lite/source/MinidomParser.cpp b/externals/coda-oss/modules/c++/xml.lite/source/MinidomParser.cpp index 73a64f3a4..8082915f6 100644 --- a/externals/coda-oss/modules/c++/xml.lite/source/MinidomParser.cpp +++ b/externals/coda-oss/modules/c++/xml.lite/source/MinidomParser.cpp @@ -22,24 +22,20 @@ #include "xml/lite/MinidomParser.h" -#include - xml::lite::MinidomParser::MinidomParser(bool storeEncoding) { - if (!storeEncoding) - { - throw std::invalid_argument("'storeEncoding' is no longer used and must always be 'true'"); - } mReader.setContentHandler(&mHandler); + mHandler.storeEncoding(storeEncoding); } -void xml::lite::MinidomParser::parse(io::InputStream& is, int size) +void xml::lite::MinidomParser::parse(io::InputStream& is, + int size) { - mReader.parse(is, size); + mReader.parse(mHandler.storeEncoding(), is, size); } -void xml::lite::MinidomParser::parse(io::InputStream& is, const void*pInitialEncoding, const void* pFallbackEncoding, int size) +void xml::lite::MinidomParser::parse(io::InputStream& is, StringEncoding encoding, int size) { - mReader.parse(is, pInitialEncoding, pFallbackEncoding, size); + mReader.parse(mHandler.storeEncoding(), is, encoding, size); } void xml::lite::MinidomParser::clear() @@ -56,9 +52,9 @@ xml::lite::Document* xml::lite::MinidomParser::getDocument(bool steal) { return mHandler.getDocument(steal); } -std::unique_ptr& xml::lite::MinidomParser::getDocument(std::unique_ptr& pDocument) +void xml::lite::MinidomParser::getDocument(std::unique_ptr& pDocument) { - return mHandler.getDocument(pDocument); + mHandler.getDocument(pDocument); } void xml::lite::MinidomParser::setDocument(xml::lite::Document* newDocument, bool own) @@ -74,3 +70,8 @@ void xml::lite::MinidomParser::preserveCharacterData(bool preserve) { mHandler.preserveCharacterData(preserve); } + +void xml::lite::MinidomParser::storeEncoding(bool preserve) +{ + mHandler.storeEncoding(preserve); +} \ No newline at end of file diff --git a/externals/coda-oss/modules/c++/xml.lite/source/NamespaceStack.cpp b/externals/coda-oss/modules/c++/xml.lite/source/NamespaceStack.cpp index 4ab047112..7e4f6ef38 100644 --- a/externals/coda-oss/modules/c++/xml.lite/source/NamespaceStack.cpp +++ b/externals/coda-oss/modules/c++/xml.lite/source/NamespaceStack.cpp @@ -54,7 +54,7 @@ void xml::lite::NamespaceStack::getMapping(const std::string& prefix, Uri& resul { if (mMappingStack[i].first == prefix) { - result = Uri(mMappingStack[i].second); + result = mMappingStack[i].second; return; } } diff --git a/externals/coda-oss/modules/c++/xml.lite/source/UtilitiesXerces.cpp b/externals/coda-oss/modules/c++/xml.lite/source/UtilitiesXerces.cpp index ffb0f7fd7..4e2cf942b 100644 --- a/externals/coda-oss/modules/c++/xml.lite/source/UtilitiesXerces.cpp +++ b/externals/coda-oss/modules/c++/xml.lite/source/UtilitiesXerces.cpp @@ -90,9 +90,12 @@ XercesLocalString& XercesLocalString::operator=(const XercesLocalString& rhs) void XercesContentHandler::characters(const XMLCh* const chars, const XercesSize_T length) { - if (mLiteHandler->vcharacters(chars, length)) + if (mLiteHandler->call_vcharacters()) { - return; // processed as void* + if (mLiteHandler->vcharacters(chars, length)) + { + return; // processed as void* + } } // Either use_wchar_t() is false (default, legacy behavior) or diff --git a/externals/coda-oss/modules/c++/xml.lite/source/ValidatorInterface.cpp b/externals/coda-oss/modules/c++/xml.lite/source/ValidatorInterface.cpp index d639655cc..d8c48895a 100644 --- a/externals/coda-oss/modules/c++/xml.lite/source/ValidatorInterface.cpp +++ b/externals/coda-oss/modules/c++/xml.lite/source/ValidatorInterface.cpp @@ -27,5 +27,38 @@ #include #include +#include +#include +#include +#include + +namespace fs = std::filesystem; + #include +template +bool vallidate_(const xml::lite::ValidatorInterface& validator, + io::InputStream& xml, TStringStream&& oss, + const std::string& xmlID, std::vector& errors) +{ + xml.streamTo(oss); + return validator.validate(oss.stream().str(), xmlID, errors); +} +bool xml::lite::ValidatorInterface::validate( + io::InputStream& xml, StringEncoding encoding, + const std::string& xmlID, + std::vector& errors) const +{ + // convert to the correcrt std::basic_string based on "encoding" + if (encoding == StringEncoding::Utf8) + { + return vallidate_(*this, xml, io::U8StringStream(), xmlID, errors); + } + if (encoding == StringEncoding::Windows1252) + { + return vallidate_(*this, xml, io::W1252StringStream(), xmlID, errors); + } + + // this really shouldn't happen + return validate(xml, xmlID, errors); +} diff --git a/externals/coda-oss/modules/c++/xml.lite/source/ValidatorXerces.cpp b/externals/coda-oss/modules/c++/xml.lite/source/ValidatorXerces.cpp index 41968be28..9fd7d8ab5 100644 --- a/externals/coda-oss/modules/c++/xml.lite/source/ValidatorXerces.cpp +++ b/externals/coda-oss/modules/c++/xml.lite/source/ValidatorXerces.cpp @@ -24,14 +24,11 @@ #include #include #include -#include -#include #include #include #include #include -#include namespace fs = std::filesystem; @@ -95,6 +92,11 @@ ValidatorXerces::ValidatorXerces( bool recursive) : ValidatorXerces(convert(schemaPaths), log, recursive) { + // The string conversion code in validate() doesn't work right on all platforms + // for non-ASCII characters. But changing that to be correct could break + // existing code someplace; thus, it's enabled only if using the new + // fs::path overload, std::string retains existing behavior. + mLegacyStringConversion = false; } ValidatorXerces::ValidatorXerces( const std::vector& schemaPaths, @@ -207,16 +209,40 @@ inline void reset(str::EncodedStringView xmlView, std::unique_ptr; -static std::unique_ptr setStringData(xercesc::DOMLSInputImpl& input, const std::u8string& xml) + +template +static void setStringData_(xercesc::DOMLSInputImpl& input, const std::basic_string& xml, std::unique_ptr& pWString) { - // expand to the wide character data for use with xerces - std::unique_ptr retval; - reset(str::EncodedStringView(xml), retval); - input.setStringData(retval->c_str()); - return retval; + reset(str::EncodedStringView(xml), pWString); + input.setStringData(pWString->c_str()); +} +static void setStringData(xercesc::DOMLSInputImpl& input, const std::string& xml, bool legacyStringConversion, + std::unique_ptr& pXmlWide, std::unique_ptr& pWString) +{ + if (legacyStringConversion) + { + // This doesn't work right for UTF-8 or Windows-1252 + pXmlWide.reset(new XercesLocalString(xml)); // std::make_unique fails with older compilers + input.setStringData(pXmlWide->toXMLCh()); + } + else + { + setStringData_(input, xml, pWString); + } +} +inline void setStringData(xercesc::DOMLSInputImpl& input, const coda_oss::u8string& xml, bool /*legacyStringConversion*/, + std::unique_ptr&, std::unique_ptr& pWString) +{ + setStringData_(input, xml, pWString); +} +inline void setStringData(xercesc::DOMLSInputImpl& input, const str::W1252string& xml, bool /*legacyStringConversion*/, + std::unique_ptr&, std::unique_ptr& pWString) +{ + setStringData_(input, xml, pWString); } -bool ValidatorXerces::validate_(const std::u8string& xml, +template +bool ValidatorXerces::validate_(const std::basic_string& xml, bool legacyStringConversion, const std::string& xmlID, std::vector& errors) const { @@ -234,7 +260,9 @@ bool ValidatorXerces::validate_(const std::u8string& xml, xercesc::XMLPlatformUtils::fgMemoryManager); // expand to the wide character data for use with xerces - auto pWString = setStringData(input, xml); + std::unique_ptr pXmlWide; + std::unique_ptr pWString; + setStringData(input, xml, legacyStringConversion, pXmlWide, pWString); // validate the document mValidator->parse(&input)->release(); @@ -249,58 +277,23 @@ bool ValidatorXerces::validate_(const std::u8string& xml, return (!mErrorHandler->getErrorLog().empty()); } - -static str::EncodedStringView encodeXml(const std::string& xml) -{ - // The XML might contain contain a specific encoding, if it does; - // we want to use it, otherwise we'll corrupt the data. - - // UTF-8 is the normal case, so check it first - const std::regex reUtf8("<\?.*encoding=.*['\"]?.*utf-8.*['\"]?.*\?>", std::regex::icase); - std::cmatch m; - if (std::regex_search(xml.c_str(), m, reUtf8)) - { - return str::EncodedStringView::fromUtf8(xml); - } - - // Maybe this is is poor XML with Windows-1252 encoding :-( - const std::regex reWindows1252("<\?.*encoding=.*['\"]?.*windows-1252.*['\"]?.*\?>", std::regex::icase); - if (std::regex_search(xml.c_str(), m, reWindows1252)) - { - return str::EncodedStringView::fromWindows1252(xml); - } - - // No "... encoding= ..."; let EncodedStringView deal with it - return str::EncodedStringView(xml); -} - bool ValidatorXerces::validate(const std::string& xml, const std::string& xmlID, std::vector& errors) const { - const auto view = encodeXml(xml); - try - { - return validate(view.u8string(), xmlID, errors); - } - catch (const utf8::invalid_utf8&) { } - - // Can't process as "native" (UTF-8 on Linux, Windows-1252 on Windows). - // Must be Windows-1252 on Linux. - return validate(str::c_str(xml), xmlID, errors); + return validate_(xml, mLegacyStringConversion, xmlID, errors); } bool ValidatorXerces::validate(const coda_oss::u8string& xml, const std::string& xmlID, std::vector& errors) const { - return validate_(xml, xmlID, errors); + return validate_(xml, false /*legacyStringConversion*/, xmlID, errors); } bool ValidatorXerces::validate(const str::W1252string& xml, const std::string& xmlID, std::vector& errors) const { - const str::EncodedStringView xmlView(xml); - return validate(xmlView.u8string(), xmlID, errors); + return validate_(xml, false /*legacyStringConversion*/, xmlID, errors); } } diff --git a/externals/coda-oss/modules/c++/xml.lite/source/XMLReaderXerces.cpp b/externals/coda-oss/modules/c++/xml.lite/source/XMLReaderXerces.cpp index 3ecf8d79f..13ec93c03 100644 --- a/externals/coda-oss/modules/c++/xml.lite/source/XMLReaderXerces.cpp +++ b/externals/coda-oss/modules/c++/xml.lite/source/XMLReaderXerces.cpp @@ -34,31 +34,39 @@ xml::lite::XMLReaderXerces::XMLReaderXerces() create(); } -const void* xml::lite::XMLReaderXerces::getWindows1252Encoding() +void xml::lite::XMLReaderXerces::parse(bool storeEncoding, + io::InputStream& is, const StringEncoding* pEncoding, int size) { - return XMLUni::fgWin1252EncodingString; -} + io::StringStream oss; + is.streamTo(oss, size); -static void parse(SAX2XMLReader& parser, const std::vector& buffer, const XMLCh* pEncoding) + const auto available = oss.available(); + if ( available <= 0 ) + { + throw xml::lite::XMLParseException(Ctxt("No stream available")); + } + std::vector buffer(available); + oss.read(buffer.data(), buffer.size()); + parse(storeEncoding, buffer, pEncoding); +} +void xml::lite::XMLReaderXerces::parse(bool storeEncoding, + const std::vector& buffer, const StringEncoding* pEncoding) { // Does not take ownership - MemBufInputSource memBuffer((const unsigned char*)buffer.data(), + MemBufInputSource memBuffer((const unsigned char *)buffer.data(), buffer.size(), - xml::lite::XMLReaderXerces::MEM_BUFFER_ID(), + XMLReaderXerces::MEM_BUFFER_ID(), false); - if (pEncoding != nullptr) + if ((pEncoding != nullptr) && (*pEncoding == StringEncoding::Windows1252)) { - memBuffer.setEncoding(pEncoding); + // The only other value is StringEncoding::Utf8 which is the default + memBuffer.setEncoding(XMLUni::fgWin1252EncodingString); } - parser.parse(memBuffer); -} -static void parse(SAX2XMLReader& parser, const std::vector& buffer, - const XMLCh* pInitialEncoding, const XMLCh* pFallbackEncoding) -{ + try { - parse(parser, buffer, pInitialEncoding); + mNative->parse(memBuffer); return; // successful parse } catch (const except::Error& e) @@ -68,41 +76,41 @@ static void parse(SAX2XMLReader& parser, const std::vector& buffer, { throw; } - - // Trying again will fail, so don't bother - if (pFallbackEncoding == pInitialEncoding) + // Caller specified an encoding; don't try calling parse() again + if (pEncoding != nullptr) + { + throw; + } + // legacy code, didn't pass storeEncoding=true to MinidomParser + if (!storeEncoding) { throw; } } - // Try again using the fallback encoding - parse(parser, buffer, pFallbackEncoding); + // If we're here, the initial parse failed and the caller did NOT specify an encoding + // (pEncoding == NULL). Since the default is UTF-8 and that failed, try again + // with Windows-1252. + assert(pEncoding == nullptr); + assert(storeEncoding); + const auto windows1252 = StringEncoding::Windows1252; + parse(true /*storeEncoding*/, buffer, &windows1252); } - -void xml::lite::XMLReaderXerces::parse(io::InputStream& is, const void*pInitialEncoding_, const void* pFallbackEncoding_, int size) +void xml::lite::XMLReaderXerces::parse(io::InputStream& is, int size) { - io::StringStream oss; - is.streamTo(oss, size); - - const auto available = oss.available(); - if ( available <= 0 ) - { - throw xml::lite::XMLParseException(Ctxt("No stream available")); - } - std::vector buffer(available); - oss.read(buffer.data(), buffer.size()); - - const auto pInitialEncoding = static_cast(pInitialEncoding_); - const auto pFallbackEncoding = static_cast(pFallbackEncoding_); - ::parse(*mNative, buffer, pInitialEncoding, pFallbackEncoding); + parse(false /*storeEncoding*/, is, size); } -void xml::lite::XMLReaderXerces::parse(io::InputStream& is, int size) +void xml::lite::XMLReaderXerces::parse(bool storeEncoding, io::InputStream& is, int size) +{ + parse(storeEncoding, is, nullptr /*pEncoding*/, size); +} +void xml::lite::XMLReaderXerces::parse(bool storeEncoding, + io::InputStream& is, StringEncoding encoding, int size) { - // This will try parsing the default (UTF-8) first, then Windows1252 - parse(is, nullptr /*pInitialEncoding*/, getWindows1252Encoding(), size); + parse(storeEncoding, is, &encoding, size); } + // This function creates the parser void xml::lite::XMLReaderXerces::create() { diff --git a/externals/coda-oss/modules/c++/xml.lite/unittests/test_xmlattribute.cpp b/externals/coda-oss/modules/c++/xml.lite/unittests/test_xmlattribute.cpp index d568df3ac..c96871fc1 100644 --- a/externals/coda-oss/modules/c++/xml.lite/unittests/test_xmlattribute.cpp +++ b/externals/coda-oss/modules/c++/xml.lite/unittests/test_xmlattribute.cpp @@ -27,8 +27,7 @@ #include "xml/lite/MinidomParser.h" -static const std::string strUri = "urn:example.com"; -static const xml::lite::Uri uri(strUri); +static const std::string uri = "urn:example.com"; static const std::string strXml_1_ = R"( @@ -38,7 +37,7 @@ static const std::string strXml_1_ = R"( static const std::string strXml_2_ = R"(" ns:int="314" /> )"; -static const auto strXml = strXml_1_ + strUri + strXml_2_; +static const auto strXml = strXml_1_ + uri + strXml_2_; struct test_MinidomParser final { diff --git a/externals/coda-oss/modules/c++/xml.lite/unittests/test_xmlelement.cpp b/externals/coda-oss/modules/c++/xml.lite/unittests/test_xmlelement.cpp index ccbdde0a7..51cc1dae5 100644 --- a/externals/coda-oss/modules/c++/xml.lite/unittests/test_xmlelement.cpp +++ b/externals/coda-oss/modules/c++/xml.lite/unittests/test_xmlelement.cpp @@ -20,23 +20,13 @@ * */ -#include -#include "coda_oss/CPlusPlus.h" +#include + #include "io/StringStream.h" #include #include "xml/lite/MinidomParser.h" -// It seems that a macro is better than a utility routine, see https://github.com/tahonermann/char8_t-remediation -// C++20 changed the type of u8 to char8_t* https://en.cppreference.com/w/cpp/language/string_literal -// Not putting this everywhere because (1) well, it's a macro, and (2) it's mostly -// only test code that uses string literals. -#if CODA_OSS_cpp20 -#define U8(s) u8##s -#else -#define U8(s) static_cast(static_cast(s)) -#endif - static const std::string text = "TEXT"; static const std::string strXml1_ = R"( @@ -74,6 +64,64 @@ struct test_MinidomParser final } }; +TEST_CASE(test_CloneCopy_root_encoding) +{ + { + test_MinidomParser xmlParser; + auto& root_ = xmlParser.getRootElement(); + root_.setCharacterData("abc", xml::lite::StringEncoding::Utf8); + const auto& root = root_; + TEST_ASSERT_TRUE(root.getEncoding().has_value()); + + xml::lite::Element copy; + copy.clone(root); + copy.clearChildren(); + TEST_ASSERT_TRUE(copy.getEncoding().has_value()); + copy.setCharacterData("xyz"); + TEST_ASSERT_FALSE(copy.getEncoding().has_value()); + TEST_ASSERT_TRUE(root.getEncoding().has_value()); + + root_.setCharacterData("123"); + TEST_ASSERT_FALSE(root.getEncoding().has_value()); + } + { + test_MinidomParser xmlParser; + auto& root_ = xmlParser.getRootElement(); + root_.setCharacterData("abc", xml::lite::StringEncoding::Utf8); + const auto& root = root_; + + xml::lite::Element copy; + copy.clone(root); + copy.clearChildren(); + TEST_ASSERT_TRUE(copy.getEncoding().has_value()); + copy.setCharacterData("xyz", xml::lite::StringEncoding::Windows1252); + TEST_ASSERT_TRUE(copy.getEncoding().has_value()); + TEST_ASSERT_TRUE(root.getEncoding().has_value()); + TEST_ASSERT(*root.getEncoding() != *copy.getEncoding()); + + root_.setCharacterData("123"); + TEST_ASSERT_FALSE(root.getEncoding().has_value()); + TEST_ASSERT_TRUE(copy.getEncoding().has_value()); + } +} + +TEST_CASE(test_CloneCopy_copy_encoding) +{ + test_MinidomParser xmlParser; + auto& root_ = xmlParser.getRootElement(); + root_.setCharacterData("abc"); + const auto& root = root_; + TEST_ASSERT_FALSE(root.getEncoding().has_value()); + + xml::lite::Element copy; + copy.clone(root); + copy.clearChildren(); + TEST_ASSERT_FALSE(copy.getEncoding().has_value()); + copy.setCharacterData("xyz", xml::lite::StringEncoding::Utf8); + TEST_ASSERT_TRUE(copy.getEncoding().has_value()); + TEST_ASSERT_FALSE(root.getEncoding().has_value()); +} + TEST_CASE(test_getRootElement) { io::StringStream ss; @@ -110,6 +158,8 @@ TEST_CASE(test_getElementsByTagName) const auto characterData = a.getCharacterData(); TEST_ASSERT_EQ(characterData, text); + const auto encoding = a.getEncoding(); + TEST_ASSERT_FALSE(encoding.has_value()); } } @@ -348,6 +398,9 @@ TEST_CASE(test_setValue) int main(int, char**) { + TEST_CHECK(test_CloneCopy_root_encoding); + TEST_CHECK(test_CloneCopy_copy_encoding); + TEST_CHECK(test_getRootElement); TEST_CHECK(test_getElementsByTagName); TEST_CHECK(test_getElementsByTagName_duplicate); diff --git a/externals/coda-oss/modules/c++/xml.lite/unittests/test_xmlparser.cpp b/externals/coda-oss/modules/c++/xml.lite/unittests/test_xmlparser.cpp index 5312639d4..87226c5ad 100644 --- a/externals/coda-oss/modules/c++/xml.lite/unittests/test_xmlparser.cpp +++ b/externals/coda-oss/modules/c++/xml.lite/unittests/test_xmlparser.cpp @@ -29,22 +29,15 @@ #include "str/Convert.h" #include "str/Encoding.h" #include "str/EncodedString.h" -#include "coda_oss/CPlusPlus.h" -#include "sys/OS.h" #include #include "xml/lite/MinidomParser.h" #include "xml/lite/Validator.h" -// It seems that a macro is better than a utility routine, see https://github.com/tahonermann/char8_t-remediation -// C++20 changed the type of u8 to char8_t* https://en.cppreference.com/w/cpp/language/string_literal -// Not putting this everywhere because (1) well, it's a macro, and (2) it's mostly -// only test code that uses string literals. -#if CODA_OSS_cpp20 -#define U8(s) u8##s -#else -#define U8(s) static_cast(static_cast(s)) -#endif +static inline std::u8string fromUtf8(const std::string& utf8) +{ + return str::EncodedStringView::fromUtf8(utf8).u8string(); +} static const std::string text("TEXT"); static const std::string strXml = "" + text + ""; @@ -53,14 +46,15 @@ static const std::string strXml = "" + text + ""; static const auto iso88591Text1252 = str::EncodedStringView::details::w1252string(iso88591Text.view()); static const auto pIso88591Text_ = str::c_str(iso88591Text1252); -static const str::EncodedString utf8Text(U8("T\xc3\x89XT")); // UTF-8, "T�XT" +static const str::EncodedString utf8Text(str::cast("T\xc3\x89XT")); // UTF-8, "T�XT" static const auto utf8Text8 = utf8Text.u8string(); static const auto pUtf8Text_ = str::c_str(utf8Text8); -static const auto strUtf8Xml8 = U8("") + utf8Text8 + U8(""); +static const auto strUtf8Xml8 = fromUtf8("") + utf8Text8 + fromUtf8(""); static const std::string strUtf8Xml = str::c_str(strUtf8Xml8); -static const std::string platfromText_ = sys::Platform == sys::PlatformType::Windows ? pIso88591Text_ : pUtf8Text_; +constexpr auto PlatformEncoding = xml::lite::PlatformEncoding; +static const std::string platfromText_ = PlatformEncoding == xml::lite::StringEncoding::Utf8 ? pUtf8Text_ : pIso88591Text_; namespace fs = std::filesystem; @@ -132,11 +126,28 @@ static xml::lite::Element& testXmlUtf8_(xml::lite::MinidomParser& xmlParser) return a; } -TEST_CASE(testXmlUtf8_u8string) +TEST_CASE(testXmlUtf8Legacy) { xml::lite::MinidomParser xmlParser; const auto& a = testXmlUtf8_(xmlParser); + // This is LEGACY behavior, it is INCORRECT on Linux! + const auto actual = a.getCharacterData(); + #ifdef _WIN32 + TEST_ASSERT_EQ(actual, pIso88591Text_); + #else + TEST_ASSERT_EQ(actual.length(), static_cast(4)); + #endif + + const auto encoding = a.getEncoding(); + TEST_ASSERT_FALSE(encoding.has_value()); +} + +TEST_CASE(testXmlUtf8_u8string) +{ + xml::lite::MinidomParser xmlParser(true /*storeEncoding*/); + const auto& a = testXmlUtf8_(xmlParser); + coda_oss::u8string actual; a.getCharacterData(actual); TEST_ASSERT_EQ(actual, utf8Text8); @@ -144,12 +155,23 @@ TEST_CASE(testXmlUtf8_u8string) TEST_CASE(testXmlUtf8) { - xml::lite::MinidomParser xmlParser; + xml::lite::MinidomParser xmlParser(true /*storeEncoding*/); const auto& a = testXmlUtf8_(xmlParser); auto actual = a.getCharacterData(); const auto expected = platfromText_; TEST_ASSERT_EQ(actual, expected); + + std::optional encoding; // avoid compiler warning about possible uninitialized variable + encoding = a.getEncoding(); + TEST_ASSERT_TRUE(encoding.has_value()); + TEST_ASSERT(*encoding == PlatformEncoding); + + // different getCharacterData() API + encoding = a.getCharacterData(actual); + TEST_ASSERT_EQ(actual, expected); + TEST_ASSERT_TRUE(encoding.has_value()); + TEST_ASSERT(*encoding == PlatformEncoding); } TEST_CASE(testXml_setCharacterData) @@ -158,6 +180,15 @@ TEST_CASE(testXml_setCharacterData) auto& a = testXmlUtf8_(xmlParser); a.setCharacterData(utf8Text8); + auto encoding = a.getEncoding(); + TEST_ASSERT_TRUE(encoding.has_value()); + TEST_ASSERT(encoding == xml::lite::StringEncoding::Utf8); + + std::string actual; + encoding = a.getCharacterData(actual); + TEST_ASSERT_TRUE(encoding.has_value()); + TEST_ASSERT(encoding == xml::lite::StringEncoding::Utf8); + TEST_ASSERT_EQ(actual, pUtf8Text_); } static std::string testXmlPrint_(std::string& expected, const std::string& characterData) @@ -179,82 +210,26 @@ TEST_CASE(testXmlPrintSimple) TEST_ASSERT_EQ(actual, expected); } -TEST_CASE(testXmlPrintUtf8) +TEST_CASE(testXmlPrintLegacy) { - const auto expected = std::string("") + pUtf8Text_ + ""; - { - xml::lite::MinidomParser xmlParser; - auto& document = getDocument(xmlParser); - - const auto s8_w1252 = str::fromWindows1252(pIso88591Text_); - const auto pRootElement = document.createElement(xml::lite::QName(xml::lite::Uri(), "root"), s8_w1252); - - io::StringStream output; - pRootElement->print(output); - const auto actual = output.stream().str(); - TEST_ASSERT_EQ(actual, expected); - } - { - xml::lite::MinidomParser xmlParser; - auto& document = getDocument(xmlParser); - - const auto pRootElement = document.createElement(xml::lite::QName(xml::lite::Uri(), "root"), utf8Text8); - - io::StringStream output; - pRootElement->print(output); - const auto actual = output.stream().str(); - TEST_ASSERT_EQ(actual, expected); - } - { - xml::lite::MinidomParser xmlParser; - auto& document = getDocument(xmlParser); - - const auto pRootElement = document.createElement(xml::lite::QName(xml::lite::Uri(), "root"), platfromText_); - - io::StringStream output; - pRootElement->print(output); - const auto actual = output.stream().str(); - TEST_ASSERT_EQ(actual, expected); - } + // This is LEGACY behavior, it generates bad XML + std::string expected; + const auto actual = testXmlPrint_(expected, pIso88591Text_); + TEST_ASSERT_EQ(actual, expected); } -TEST_CASE(testXmlConsoleOutput) +TEST_CASE(testXmlPrintUtf8) { - const auto expected = "" + platfromText_ + ""; - { - xml::lite::MinidomParser xmlParser; - auto& document = getDocument(xmlParser); - - const auto s8_w1252 = str::fromWindows1252(pIso88591Text_); - const auto pRootElement = document.createElement(xml::lite::QName(xml::lite::Uri(), "root"), s8_w1252); - - io::StringStream output; - pRootElement->consoleOutput_(output); - const auto actual = output.stream().str(); - TEST_ASSERT_EQ(actual, expected); - } - { - xml::lite::MinidomParser xmlParser; - auto& document = getDocument(xmlParser); - - const auto pRootElement = document.createElement(xml::lite::QName(xml::lite::Uri(), "root"), utf8Text8); - - io::StringStream output; - pRootElement->consoleOutput_(output); - const auto actual = output.stream().str(); - TEST_ASSERT_EQ(actual, expected); - } - { - xml::lite::MinidomParser xmlParser; - auto& document = getDocument(xmlParser); + xml::lite::MinidomParser xmlParser; + auto& document = getDocument(xmlParser); - const auto pRootElement = document.createElement(xml::lite::QName(xml::lite::Uri(), "root"), platfromText_); + const auto pRootElement = document.createElement(xml::lite::QName(xml::lite::Uri(), "root"), pIso88591Text_, xml::lite::StringEncoding::Windows1252); - io::StringStream output; - pRootElement->consoleOutput_(output); - const auto actual = output.stream().str(); - TEST_ASSERT_EQ(actual, expected); - } + io::StringStream output; + pRootElement->print(output, xml::lite::StringEncoding::Utf8); // write UTF-8 + const auto actual = output.stream().str(); + const auto expected = std::string("") + pUtf8Text_ + ""; + TEST_ASSERT_EQ(actual, expected); } TEST_CASE(testXmlParseAndPrintUtf8) @@ -262,19 +237,18 @@ TEST_CASE(testXmlParseAndPrintUtf8) io::StringStream input; input.stream() << strUtf8Xml; - xml::lite::MinidomParser xmlParser; + xml::lite::MinidomParser xmlParser(true /*storeEncoding*/); xmlParser.preserveCharacterData(true); xmlParser.parse(input); const auto pRootElement = getDocument(xmlParser).getRootElement(); io::StringStream output; - pRootElement->print(output); + pRootElement->print(output, xml::lite::StringEncoding::Utf8); // write UTF-8 const auto actual = output.stream().str(); TEST_ASSERT_EQ(actual, strUtf8Xml); } -static void testReadEncodedXmlFile(const std::string& testName, const std::string& xmlFile, bool preserveCharacterData, - const std::string& platformText, const std::u8string& text8) +static void testReadEncodedXmlFile(const std::string& testName, const std::string& xmlFile, bool preserveCharacterData) { const auto unittests = findRoot() / "modules" / "c++" / "xml.lite" / "unittests"; @@ -286,18 +260,20 @@ static void testReadEncodedXmlFile(const std::string& testName, const std::strin } io::FileInputStream input(path.string()); - xml::lite::MinidomParser xmlParser; + xml::lite::MinidomParser xmlParser(true /*storeEncoding*/); xmlParser.preserveCharacterData(preserveCharacterData); xmlParser.parse(input); const auto& root = getRootElement(getDocument(xmlParser)); const auto& a = root.getElementByTagName("a", true /*recurse*/); auto characterData = a.getCharacterData(); - TEST_ASSERT_EQ(characterData, platformText); + const auto encoding = a.getEncoding(); + TEST_ASSERT(encoding == PlatformEncoding); + TEST_ASSERT_EQ(characterData, platfromText_); std::u8string u8_characterData; a.getCharacterData(u8_characterData); - TEST_ASSERT_EQ(text8, u8_characterData); + TEST_ASSERT_EQ(utf8Text8, u8_characterData); const auto& textXML = root.getElementByTagName("text", true /*recurse*/); characterData = textXML.getCharacterData(); @@ -316,16 +292,13 @@ static void testReadEncodedXmlFile(const std::string& testName, const std::strin TEST_CASE(testReadEncodedXmlFiles) { // these have "" - testReadEncodedXmlFile(testName, "encoding_utf-8.xml", true /*preserveCharacterData*/, platfromText_ , utf8Text8); - testReadEncodedXmlFile(testName, "encoding_utf-8.xml", false /*preserveCharacterData*/, platfromText_ , utf8Text8); - testReadEncodedXmlFile(testName, "encoding_windows-1252.xml", true /*preserveCharacterData*/, platfromText_ , utf8Text8); - testReadEncodedXmlFile(testName, "encoding_windows-1252.xml", false /*preserveCharacterData*/, platfromText_ , utf8Text8); - testReadEncodedXmlFile(testName, "ascii_encoding_utf-8.xml", true /*preserveCharacterData*/, text , U8("TEXT")); - testReadEncodedXmlFile(testName, "ascii_encoding_utf-8.xml", false /*preserveCharacterData*/, text , U8("TEXT")); + testReadEncodedXmlFile(testName, "encoding_utf-8.xml", true /*preserveCharacterData*/); + testReadEncodedXmlFile(testName, "encoding_utf-8.xml", false /*preserveCharacterData*/); + testReadEncodedXmlFile(testName, "encoding_windows-1252.xml", true /*preserveCharacterData*/); + testReadEncodedXmlFile(testName, "encoding_windows-1252.xml", false /*preserveCharacterData*/); } -static void testReadXmlFile(const std::string& testName, const std::string& xmlFile, bool preserveCharacterData, - const std::string& platformText, const std::u8string& text8) +static void testReadXmlFile(const std::string& testName, const std::string& xmlFile, bool preserveCharacterData) { const auto unittests = findRoot() / "modules" / "c++" / "xml.lite" / "unittests"; @@ -337,7 +310,7 @@ static void testReadXmlFile(const std::string& testName, const std::string& xmlF } io::FileInputStream input(path.string()); - xml::lite::MinidomParser xmlParser; + xml::lite::MinidomParser xmlParser(true /*storeEncoding*/); xmlParser.preserveCharacterData(preserveCharacterData); xmlParser.parse(input); const auto& root = getRootElement(getDocument(xmlParser)); @@ -347,11 +320,13 @@ static void testReadXmlFile(const std::string& testName, const std::string& xmlF const auto& a = *(aElements[0]); auto characterData = a.getCharacterData(); - TEST_ASSERT_EQ(characterData, platformText); + const auto encoding = a.getEncoding(); + TEST_ASSERT(encoding == PlatformEncoding); + TEST_ASSERT_EQ(characterData, platfromText_); std::u8string u8_characterData; a.getCharacterData(u8_characterData); - TEST_ASSERT_EQ(text8, u8_characterData); + TEST_ASSERT_EQ(utf8Text8, u8_characterData); const auto& textXML = root.getElementByTagName("text", true /*recurse*/); characterData = textXML.getCharacterData(); @@ -370,12 +345,10 @@ static void testReadXmlFile(const std::string& testName, const std::string& xmlF TEST_CASE(testReadXmlFiles) { // These do NOT have "" - testReadXmlFile(testName, "utf-8.xml", true /*preserveCharacterData*/, platfromText_ , utf8Text8); - testReadXmlFile(testName, "utf-8.xml", false /*preserveCharacterData*/, platfromText_ , utf8Text8); - testReadXmlFile(testName, "windows-1252.xml", true /*preserveCharacterData*/, platfromText_ , utf8Text8); - testReadXmlFile(testName, "windows-1252.xml", false /*preserveCharacterData*/, platfromText_ , utf8Text8); - testReadXmlFile(testName, "ascii.xml", true /*preserveCharacterData*/, text , U8("TEXT")); - testReadXmlFile(testName, "ascii.xml", false /*preserveCharacterData*/, text , U8("TEXT")); + testReadXmlFile(testName, "utf-8.xml", true /*preserveCharacterData*/); + testReadXmlFile(testName, "utf-8.xml", false /*preserveCharacterData*/); + testReadXmlFile(testName, "windows-1252.xml", true /*preserveCharacterData*/); + testReadXmlFile(testName, "windows-1252.xml", false /*preserveCharacterData*/); } static bool find_string(io::FileInputStream& stream, const std::string& s) @@ -416,11 +389,14 @@ TEST_CASE(testReadEmbeddedXml) const auto result = find_string(input, " -static void testValidateXmlFile_(const std::string& testName, const std::string& xmlFile, TStringStream* pStringStream) +static void testValidateXmlFile_(const std::string& testName, const std::string& xmlFile, const xml::lite::StringEncoding* pEncoding) { const auto unittests = findRoot() / "modules" / "c++" / "xml.lite" / "unittests"; @@ -455,8 +429,8 @@ static void testValidateXmlFile_(const std::string& testName, const std::string& io::FileInputStream fis(path); std::vector errors; - const auto result = (pStringStream == nullptr) ? validator.validate(fis, path.string() /*xmlID*/, errors) : - validator.vallidateT(fis, *pStringStream, path.string() /*xmlID*/, errors); + const auto result = (pEncoding == nullptr) ? validator.validate(fis, path.string() /*xmlID*/, errors) : + validator.validate(fis, *pEncoding, path.string() /*xmlID*/, errors); for (const auto& error : errors) { std::clog << error.toString() << "\n"; @@ -466,12 +440,11 @@ static void testValidateXmlFile_(const std::string& testName, const std::string& } static void testValidateXmlFile(const std::string& testName, const std::string& xmlFile) { - testValidateXmlFile_(testName, xmlFile, nullptr /*pStringStream*/); + testValidateXmlFile_(testName, xmlFile, nullptr /*pEncoding*/); } -template -static void testValidateXmlFile(const std::string& testName, const std::string& xmlFile, TStringStream&& oss) +static void testValidateXmlFile(const std::string& testName, const std::string& xmlFile, xml::lite::StringEncoding encoding) { - testValidateXmlFile_(testName, xmlFile, &oss); + testValidateXmlFile_(testName, xmlFile, &encoding); } TEST_CASE(testValidateXmlFile) { @@ -481,32 +454,79 @@ TEST_CASE(testValidateXmlFile) // legacy validate() API, new string conversion testValidateXmlFile(testName, "utf-8.xml"); testValidateXmlFile(testName, "encoding_utf-8.xml"); - testValidateXmlFile(testName, "encoding_windows-1252.xml"); - testValidateXmlFile(testName, "windows-1252.xml"); // new validate() API - testValidateXmlFile(testName, "utf-8.xml", io::U8StringStream()); - testValidateXmlFile(testName, "encoding_utf-8.xml", io::U8StringStream()); - testValidateXmlFile(testName, "windows-1252.xml", io::W1252StringStream()); - testValidateXmlFile(testName, "encoding_windows-1252.xml", io::W1252StringStream()); + testValidateXmlFile(testName, "utf-8.xml", xml::lite::StringEncoding::Utf8); + testValidateXmlFile(testName, "encoding_utf-8.xml", xml::lite::StringEncoding::Utf8); + testValidateXmlFile(testName, "windows-1252.xml", xml::lite::StringEncoding::Windows1252); + testValidateXmlFile(testName, "encoding_windows-1252.xml", xml::lite::StringEncoding::Windows1252); +} + +static void testValidateXmlFileLegacy(const std::string& testName, const std::string& xmlFile, bool success=true) +{ + const auto unittests = findRoot() / "modules" / "c++" / "xml.lite" / "unittests"; + + const auto xsd = unittests / "doc.xsd"; + if (!exists(xsd)) // running in "externals" of a different project + { + std::clog << "Path does not exist: '" << xsd << "'\n"; + return; + } + const auto path = unittests / xmlFile; + + const std::vector schemaPaths{xsd.parent_path().string()}; // std::string -> legacy behavior + const xml::lite::Validator validator(schemaPaths, nullptr /*log*/); + + io::FileInputStream fis(path); + std::vector errors; + const auto result = validator.validate(fis, path.string() /*xmlID*/, errors); + for (const auto& error : errors) + { + std::clog << error.toString() << "\n"; + } + if (success) + { + TEST_ASSERT_FALSE(result); + TEST_ASSERT_TRUE(errors.empty()); + } + else + { + TEST_ASSERT_TRUE(result); // errors + TEST_ASSERT_FALSE(errors.empty()); + } +} +TEST_CASE(testValidateXmlFileLegacy) +{ + // these two work on all platforms + testValidateXmlFile(testName, "ascii.xml"); + testValidateXmlFile(testName, "ascii_encoding_utf-8.xml"); + + // These are OK on Windows but fail on Linux; this is as-expected with "legacy" string conversion. + constexpr auto success = sys::Platform == sys::PlatformType::Windows ? true : false; + testValidateXmlFileLegacy(testName, "utf-8.xml", success); + testValidateXmlFileLegacy(testName, "encoding_utf-8.xml", success); + testValidateXmlFileLegacy(testName, "windows-1252.xml", success); + testValidateXmlFileLegacy(testName, "encoding_windows-1252.xml", success); } int main(int, char**) { TEST_CHECK(testXmlParseSimple); TEST_CHECK(testXmlPreserveCharacterData); + TEST_CHECK(testXmlUtf8Legacy); TEST_CHECK(testXmlUtf8); TEST_CHECK(testXmlUtf8_u8string); TEST_CHECK(testXml_setCharacterData); TEST_CHECK(testXmlPrintSimple); + TEST_CHECK(testXmlPrintLegacy); TEST_CHECK(testXmlParseAndPrintUtf8); TEST_CHECK(testXmlPrintUtf8); - TEST_CHECK(testXmlConsoleOutput); TEST_CHECK(testReadEncodedXmlFiles); TEST_CHECK(testReadXmlFiles); TEST_CHECK(testReadEmbeddedXml); TEST_CHECK(testValidateXmlFile); + TEST_CHECK(testValidateXmlFileLegacy); } diff --git a/externals/coda-oss/modules/python/xml.lite/source/generated/xml_lite.py b/externals/coda-oss/modules/python/xml.lite/source/generated/xml_lite.py index 2c7c33f03..8daa62bb3 100644 --- a/externals/coda-oss/modules/python/xml.lite/source/generated/xml_lite.py +++ b/externals/coda-oss/modules/python/xml.lite/source/generated/xml_lite.py @@ -80,6 +80,8 @@ def __init__(self, *args): r""" __init__(Element self) -> Element __init__(Element self, std::string const & qname, std::string const & uri="", std::string const & characterData="") -> Element + __init__(Element self, std::string const & qname, std::string const & uri, std::string const & characterData, xml::lite::StringEncoding encoding) -> Element + __init__(Element self, std::string const & qname, std::string const & uri, coda_oss::u8string const & characterData) -> Element __init__(Element self, Element element) -> Element """ _xml_lite.Element_swiginit(self, _xml_lite.new_Element(*args)) @@ -135,12 +137,14 @@ def setNamespaceURI(self, prefix: "std::string", uri: "std::string") -> "void": def _print(self, *args) -> "void": r""" _print(Element self, io::OutputStream & stream) + _print(Element self, io::OutputStream & stream, xml::lite::StringEncoding arg3) """ return _xml_lite.Element__print(self, *args) def prettyPrint(self, *args) -> "void": r""" prettyPrint(Element self, io::OutputStream & stream, std::string const & formatter=" ") + prettyPrint(Element self, io::OutputStream & stream, xml::lite::StringEncoding arg3, std::string const & formatter=" ") """ return _xml_lite.Element_prettyPrint(self, *args) @@ -151,15 +155,27 @@ def hasElement(self, *args) -> "bool": """ return _xml_lite.Element_hasElement(self, *args) + def getEncoding(self) -> "coda_oss::optional< xml::lite::StringEncoding > const &": + r"""getEncoding(Element self) -> coda_oss::optional< xml::lite::StringEncoding > const &""" + return _xml_lite.Element_getEncoding(self) + def getCharacterData(self, *args) -> "void": r""" getCharacterData(Element self) -> std::string + getCharacterData(Element self, std::string & result) -> coda_oss::optional< xml::lite::StringEncoding > const + getCharacterData(Element self, coda_oss::u8string & result) """ return _xml_lite.Element_getCharacterData(self, *args) + def setCharacterData_(self, characters: "std::string const &", arg3: "xml::lite::StringEncoding const *") -> "void": + r"""setCharacterData_(Element self, std::string const & characters, xml::lite::StringEncoding const * arg3)""" + return _xml_lite.Element_setCharacterData_(self, characters, arg3) + def setCharacterData(self, *args) -> "void": r""" setCharacterData(Element self, std::string const & characters) + setCharacterData(Element self, std::string const & characters, xml::lite::StringEncoding arg3) + setCharacterData(Element self, coda_oss::u8string const & characters) """ return _xml_lite.Element_setCharacterData(self, *args) @@ -227,6 +243,8 @@ def clone(self) -> "xml::lite::Document *": def createElement(self, *args) -> "xml::lite::Element *": r""" createElement(Document self, std::string const & qname, std::string const & uri, std::string characterData="") -> Element + createElement(Document self, std::string const & qname, std::string const & uri, std::string const & characterData, xml::lite::StringEncoding arg5) -> Element + createElement(Document self, std::string const & qname, std::string const & uri, coda_oss::u8string const & characterData) -> Element """ return _xml_lite.Document_createElement(self, *args) @@ -265,8 +283,8 @@ class MinidomParser(object): thisown = property(lambda x: x.this.own(), lambda x, v: x.this.own(v), doc="The membership flag") __repr__ = _swig_repr - def __init__(self, storeEncoding: "bool"=True): - r"""__init__(MinidomParser self, bool storeEncoding=True) -> MinidomParser""" + def __init__(self, storeEncoding: "bool"=False): + r"""__init__(MinidomParser self, bool storeEncoding=False) -> MinidomParser""" _xml_lite.MinidomParser_swiginit(self, _xml_lite.new_MinidomParser(storeEncoding)) __swig_destroy__ = _xml_lite.delete_MinidomParser @@ -300,6 +318,10 @@ def preserveCharacterData(self, preserve: "bool") -> "void": r"""preserveCharacterData(MinidomParser self, bool preserve)""" return _xml_lite.MinidomParser_preserveCharacterData(self, preserve) + def storeEncoding(self, preserve: "bool") -> "void": + r"""storeEncoding(MinidomParser self, bool preserve)""" + return _xml_lite.MinidomParser_storeEncoding(self, preserve) + # Register MinidomParser in _xml_lite: _xml_lite.MinidomParser_swigregister(MinidomParser) diff --git a/externals/coda-oss/modules/python/xml.lite/source/generated/xml_lite_wrap.cxx b/externals/coda-oss/modules/python/xml.lite/source/generated/xml_lite_wrap.cxx index b74e536e3..6c786cac3 100644 --- a/externals/coda-oss/modules/python/xml.lite/source/generated/xml_lite_wrap.cxx +++ b/externals/coda-oss/modules/python/xml.lite/source/generated/xml_lite_wrap.cxx @@ -2698,6 +2698,7 @@ SWIGINTERN PyObject *SWIG_PyStaticMethod_New(PyObject *SWIGUNUSEDPARM(self), PyO #define SWIGTYPE_p_std__string swig_types[5] #define SWIGTYPE_p_std__vectorT_xml__lite__Element_p_t swig_types[6] #define SWIGTYPE_p_coda_oss__optionalT_xml__lite__string_encoding_t swig_types[7] +#define SWIGTYPE_p_sys__U8string swig_types[8] #define SWIGTYPE_p_xml__lite__Document swig_types[9] #define SWIGTYPE_p_xml__lite__Element swig_types[10] #define SWIGTYPE_p_xml__lite__MinidomParser swig_types[11] @@ -3113,6 +3114,106 @@ SWIGINTERN PyObject *_wrap_new_Element__SWIG_3(PyObject *SWIGUNUSEDPARM(self), P } +SWIGINTERN PyObject *_wrap_new_Element__SWIG_4(PyObject *SWIGUNUSEDPARM(self), Py_ssize_t nobjs, PyObject **swig_obj) { + PyObject *resultobj = 0; + std::string *arg1 = 0 ; + std::string *arg2 = 0 ; + std::string *arg3 = 0 ; + xml::lite::StringEncoding arg4 ; + void *argp1 = 0 ; + int res1 = 0 ; + void *argp2 = 0 ; + int res2 = 0 ; + void *argp3 = 0 ; + int res3 = 0 ; + int val4 ; + int ecode4 = 0 ; + xml::lite::Element *result = 0 ; + + if ((nobjs < 4) || (nobjs > 4)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1, SWIGTYPE_p_std__string, 0 | 0); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "new_Element" "', argument " "1"" of type '" "std::string const &""'"); + } + if (!argp1) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_Element" "', argument " "1"" of type '" "std::string const &""'"); + } + arg1 = reinterpret_cast< std::string * >(argp1); + res2 = SWIG_ConvertPtr(swig_obj[1], &argp2, SWIGTYPE_p_std__string, 0 | 0); + if (!SWIG_IsOK(res2)) { + SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "new_Element" "', argument " "2"" of type '" "std::string const &""'"); + } + if (!argp2) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_Element" "', argument " "2"" of type '" "std::string const &""'"); + } + arg2 = reinterpret_cast< std::string * >(argp2); + res3 = SWIG_ConvertPtr(swig_obj[2], &argp3, SWIGTYPE_p_std__string, 0 | 0); + if (!SWIG_IsOK(res3)) { + SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "new_Element" "', argument " "3"" of type '" "std::string const &""'"); + } + if (!argp3) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_Element" "', argument " "3"" of type '" "std::string const &""'"); + } + arg3 = reinterpret_cast< std::string * >(argp3); + ecode4 = SWIG_AsVal_int(swig_obj[3], &val4); + if (!SWIG_IsOK(ecode4)) { + SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "new_Element" "', argument " "4"" of type '" "xml::lite::StringEncoding""'"); + } + arg4 = static_cast< xml::lite::StringEncoding >(val4); + result = (xml::lite::Element *)new xml::lite::Element((std::string const &)*arg1,(std::string const &)*arg2,(std::string const &)*arg3,arg4); + resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_xml__lite__Element, SWIG_POINTER_NEW | 0 ); + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_new_Element__SWIG_5(PyObject *SWIGUNUSEDPARM(self), Py_ssize_t nobjs, PyObject **swig_obj) { + PyObject *resultobj = 0; + std::string *arg1 = 0 ; + std::string *arg2 = 0 ; + coda_oss::u8string *arg3 = 0 ; + void *argp1 = 0 ; + int res1 = 0 ; + void *argp2 = 0 ; + int res2 = 0 ; + void *argp3 = 0 ; + int res3 = 0 ; + xml::lite::Element *result = 0 ; + + if ((nobjs < 3) || (nobjs > 3)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1, SWIGTYPE_p_std__string, 0 | 0); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "new_Element" "', argument " "1"" of type '" "std::string const &""'"); + } + if (!argp1) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_Element" "', argument " "1"" of type '" "std::string const &""'"); + } + arg1 = reinterpret_cast< std::string * >(argp1); + res2 = SWIG_ConvertPtr(swig_obj[1], &argp2, SWIGTYPE_p_std__string, 0 | 0); + if (!SWIG_IsOK(res2)) { + SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "new_Element" "', argument " "2"" of type '" "std::string const &""'"); + } + if (!argp2) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_Element" "', argument " "2"" of type '" "std::string const &""'"); + } + arg2 = reinterpret_cast< std::string * >(argp2); + res3 = SWIG_ConvertPtr(swig_obj[2], &argp3, SWIGTYPE_p_sys__U8string, 0 | 0); + if (!SWIG_IsOK(res3)) { + SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "new_Element" "', argument " "3"" of type '" "coda_oss::u8string const &""'"); + } + if (!argp3) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "new_Element" "', argument " "3"" of type '" "coda_oss::u8string const &""'"); + } + arg3 = reinterpret_cast< coda_oss::u8string * >(argp3); + result = (xml::lite::Element *)new xml::lite::Element((std::string const &)*arg1,(std::string const &)*arg2,(coda_oss::u8string const &)*arg3); + resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_xml__lite__Element, SWIG_POINTER_NEW | 0 ); + return resultobj; +fail: + return NULL; +} + + SWIGINTERN PyObject *_wrap_delete_Element(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { PyObject *resultobj = 0; xml::lite::Element *arg1 = (xml::lite::Element *) 0 ; @@ -3157,7 +3258,7 @@ SWIGINTERN PyObject *_wrap_Element_destroyChildren(PyObject *SWIGUNUSEDPARM(self } -SWIGINTERN PyObject *_wrap_new_Element__SWIG_4(PyObject *SWIGUNUSEDPARM(self), Py_ssize_t nobjs, PyObject **swig_obj) { +SWIGINTERN PyObject *_wrap_new_Element__SWIG_6(PyObject *SWIGUNUSEDPARM(self), Py_ssize_t nobjs, PyObject **swig_obj) { PyObject *resultobj = 0; xml::lite::Element *arg1 = 0 ; void *argp1 = 0 ; @@ -3205,7 +3306,7 @@ SWIGINTERN PyObject *_wrap_new_Element(PyObject *self, PyObject *args) { int res = SWIG_ConvertPtr(argv[0], 0, SWIGTYPE_p_xml__lite__Element, SWIG_POINTER_NO_NULL | 0); _v = SWIG_CheckState(res); if (_v) { - return _wrap_new_Element__SWIG_4(self, argc, argv); + return _wrap_new_Element__SWIG_6(self, argc, argv); } } if (argc == 2) { @@ -3220,6 +3321,22 @@ SWIGINTERN PyObject *_wrap_new_Element(PyObject *self, PyObject *args) { } } } + if (argc == 3) { + int _v; + int res = SWIG_ConvertPtr(argv[0], 0, SWIGTYPE_p_std__string, SWIG_POINTER_NO_NULL | 0); + _v = SWIG_CheckState(res); + if (_v) { + int res = SWIG_ConvertPtr(argv[1], 0, SWIGTYPE_p_std__string, SWIG_POINTER_NO_NULL | 0); + _v = SWIG_CheckState(res); + if (_v) { + int res = SWIG_ConvertPtr(argv[2], 0, SWIGTYPE_p_sys__U8string, SWIG_POINTER_NO_NULL | 0); + _v = SWIG_CheckState(res); + if (_v) { + return _wrap_new_Element__SWIG_5(self, argc, argv); + } + } + } + } if (argc == 3) { int _v; int res = SWIG_ConvertPtr(argv[0], 0, SWIGTYPE_p_std__string, SWIG_POINTER_NO_NULL | 0); @@ -3236,6 +3353,28 @@ SWIGINTERN PyObject *_wrap_new_Element(PyObject *self, PyObject *args) { } } } + if (argc == 4) { + int _v; + int res = SWIG_ConvertPtr(argv[0], 0, SWIGTYPE_p_std__string, SWIG_POINTER_NO_NULL | 0); + _v = SWIG_CheckState(res); + if (_v) { + int res = SWIG_ConvertPtr(argv[1], 0, SWIGTYPE_p_std__string, SWIG_POINTER_NO_NULL | 0); + _v = SWIG_CheckState(res); + if (_v) { + int res = SWIG_ConvertPtr(argv[2], 0, SWIGTYPE_p_std__string, SWIG_POINTER_NO_NULL | 0); + _v = SWIG_CheckState(res); + if (_v) { + { + int res = SWIG_AsVal_int(argv[3], NULL); + _v = SWIG_CheckState(res); + } + if (_v) { + return _wrap_new_Element__SWIG_4(self, argc, argv); + } + } + } + } + } fail: SWIG_Python_RaiseOrModifyTypeError("Wrong number or type of arguments for overloaded function 'new_Element'.\n" @@ -3244,6 +3383,8 @@ SWIGINTERN PyObject *_wrap_new_Element(PyObject *self, PyObject *args) { " xml::lite::Element::Element(std::string const &,std::string const &,std::string const &)\n" " xml::lite::Element::Element(std::string const &,std::string const &)\n" " xml::lite::Element::Element(std::string const &)\n" + " xml::lite::Element::Element(std::string const &,std::string const &,std::string const &,xml::lite::StringEncoding)\n" + " xml::lite::Element::Element(std::string const &,std::string const &,coda_oss::u8string const &)\n" " xml::lite::Element::Element(xml::lite::Element const &)\n"); return 0; } @@ -5117,6 +5258,45 @@ SWIGINTERN PyObject *_wrap_Element__print__SWIG_0(PyObject *SWIGUNUSEDPARM(self) } +SWIGINTERN PyObject *_wrap_Element__print__SWIG_1(PyObject *SWIGUNUSEDPARM(self), Py_ssize_t nobjs, PyObject **swig_obj) { + PyObject *resultobj = 0; + xml::lite::Element *arg1 = (xml::lite::Element *) 0 ; + io::OutputStream *arg2 = 0 ; + xml::lite::StringEncoding arg3 ; + void *argp1 = 0 ; + int res1 = 0 ; + void *argp2 = 0 ; + int res2 = 0 ; + int val3 ; + int ecode3 = 0 ; + + if ((nobjs < 3) || (nobjs > 3)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_xml__lite__Element, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Element__print" "', argument " "1"" of type '" "xml::lite::Element const *""'"); + } + arg1 = reinterpret_cast< xml::lite::Element * >(argp1); + res2 = SWIG_ConvertPtr(swig_obj[1], &argp2, SWIGTYPE_p_io__OutputStream, 0 ); + if (!SWIG_IsOK(res2)) { + SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "Element__print" "', argument " "2"" of type '" "io::OutputStream &""'"); + } + if (!argp2) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "Element__print" "', argument " "2"" of type '" "io::OutputStream &""'"); + } + arg2 = reinterpret_cast< io::OutputStream * >(argp2); + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "Element__print" "', argument " "3"" of type '" "xml::lite::StringEncoding""'"); + } + arg3 = static_cast< xml::lite::StringEncoding >(val3); + ((xml::lite::Element const *)arg1)->print(*arg2,arg3); + resultobj = SWIG_Py_Void(); + return resultobj; +fail: + return NULL; +} + + SWIGINTERN PyObject *_wrap_Element__print(PyObject *self, PyObject *args) { Py_ssize_t argc; PyObject *argv[4] = { @@ -5139,11 +5319,32 @@ SWIGINTERN PyObject *_wrap_Element__print(PyObject *self, PyObject *args) { } } } + if (argc == 3) { + int _v; + void *vptr = 0; + int res = SWIG_ConvertPtr(argv[0], &vptr, SWIGTYPE_p_xml__lite__Element, 0); + _v = SWIG_CheckState(res); + if (_v) { + void *vptr = 0; + int res = SWIG_ConvertPtr(argv[1], &vptr, SWIGTYPE_p_io__OutputStream, SWIG_POINTER_NO_NULL); + _v = SWIG_CheckState(res); + if (_v) { + { + int res = SWIG_AsVal_int(argv[2], NULL); + _v = SWIG_CheckState(res); + } + if (_v) { + return _wrap_Element__print__SWIG_1(self, argc, argv); + } + } + } + } fail: SWIG_Python_RaiseOrModifyTypeError("Wrong number or type of arguments for overloaded function 'Element__print'.\n" " Possible C/C++ prototypes are:\n" - " xml::lite::Element::print(io::OutputStream &) const\n"); + " xml::lite::Element::print(io::OutputStream &) const\n" + " xml::lite::Element::print(io::OutputStream &,xml::lite::StringEncoding) const\n"); return 0; } @@ -5221,6 +5422,95 @@ SWIGINTERN PyObject *_wrap_Element_prettyPrint__SWIG_1(PyObject *SWIGUNUSEDPARM( } +SWIGINTERN PyObject *_wrap_Element_prettyPrint__SWIG_2(PyObject *SWIGUNUSEDPARM(self), Py_ssize_t nobjs, PyObject **swig_obj) { + PyObject *resultobj = 0; + xml::lite::Element *arg1 = (xml::lite::Element *) 0 ; + io::OutputStream *arg2 = 0 ; + xml::lite::StringEncoding arg3 ; + std::string *arg4 = 0 ; + void *argp1 = 0 ; + int res1 = 0 ; + void *argp2 = 0 ; + int res2 = 0 ; + int val3 ; + int ecode3 = 0 ; + void *argp4 = 0 ; + int res4 = 0 ; + + if ((nobjs < 4) || (nobjs > 4)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_xml__lite__Element, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Element_prettyPrint" "', argument " "1"" of type '" "xml::lite::Element const *""'"); + } + arg1 = reinterpret_cast< xml::lite::Element * >(argp1); + res2 = SWIG_ConvertPtr(swig_obj[1], &argp2, SWIGTYPE_p_io__OutputStream, 0 ); + if (!SWIG_IsOK(res2)) { + SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "Element_prettyPrint" "', argument " "2"" of type '" "io::OutputStream &""'"); + } + if (!argp2) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "Element_prettyPrint" "', argument " "2"" of type '" "io::OutputStream &""'"); + } + arg2 = reinterpret_cast< io::OutputStream * >(argp2); + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "Element_prettyPrint" "', argument " "3"" of type '" "xml::lite::StringEncoding""'"); + } + arg3 = static_cast< xml::lite::StringEncoding >(val3); + res4 = SWIG_ConvertPtr(swig_obj[3], &argp4, SWIGTYPE_p_std__string, 0 | 0); + if (!SWIG_IsOK(res4)) { + SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "Element_prettyPrint" "', argument " "4"" of type '" "std::string const &""'"); + } + if (!argp4) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "Element_prettyPrint" "', argument " "4"" of type '" "std::string const &""'"); + } + arg4 = reinterpret_cast< std::string * >(argp4); + ((xml::lite::Element const *)arg1)->prettyPrint(*arg2,arg3,(std::string const &)*arg4); + resultobj = SWIG_Py_Void(); + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_Element_prettyPrint__SWIG_3(PyObject *SWIGUNUSEDPARM(self), Py_ssize_t nobjs, PyObject **swig_obj) { + PyObject *resultobj = 0; + xml::lite::Element *arg1 = (xml::lite::Element *) 0 ; + io::OutputStream *arg2 = 0 ; + xml::lite::StringEncoding arg3 ; + void *argp1 = 0 ; + int res1 = 0 ; + void *argp2 = 0 ; + int res2 = 0 ; + int val3 ; + int ecode3 = 0 ; + + if ((nobjs < 3) || (nobjs > 3)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_xml__lite__Element, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Element_prettyPrint" "', argument " "1"" of type '" "xml::lite::Element const *""'"); + } + arg1 = reinterpret_cast< xml::lite::Element * >(argp1); + res2 = SWIG_ConvertPtr(swig_obj[1], &argp2, SWIGTYPE_p_io__OutputStream, 0 ); + if (!SWIG_IsOK(res2)) { + SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "Element_prettyPrint" "', argument " "2"" of type '" "io::OutputStream &""'"); + } + if (!argp2) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "Element_prettyPrint" "', argument " "2"" of type '" "io::OutputStream &""'"); + } + arg2 = reinterpret_cast< io::OutputStream * >(argp2); + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "Element_prettyPrint" "', argument " "3"" of type '" "xml::lite::StringEncoding""'"); + } + arg3 = static_cast< xml::lite::StringEncoding >(val3); + ((xml::lite::Element const *)arg1)->prettyPrint(*arg2,arg3); + resultobj = SWIG_Py_Void(); + return resultobj; +fail: + return NULL; +} + + SWIGINTERN PyObject *_wrap_Element_prettyPrint(PyObject *self, PyObject *args) { Py_ssize_t argc; PyObject *argv[5] = { @@ -5261,12 +5551,58 @@ SWIGINTERN PyObject *_wrap_Element_prettyPrint(PyObject *self, PyObject *args) { } } } - -fail: - SWIG_Python_RaiseOrModifyTypeError("Wrong number or type of arguments for overloaded function 'Element_prettyPrint'.\n" - " Possible C/C++ prototypes are:\n" + if (argc == 3) { + int _v; + void *vptr = 0; + int res = SWIG_ConvertPtr(argv[0], &vptr, SWIGTYPE_p_xml__lite__Element, 0); + _v = SWIG_CheckState(res); + if (_v) { + void *vptr = 0; + int res = SWIG_ConvertPtr(argv[1], &vptr, SWIGTYPE_p_io__OutputStream, SWIG_POINTER_NO_NULL); + _v = SWIG_CheckState(res); + if (_v) { + { + int res = SWIG_AsVal_int(argv[2], NULL); + _v = SWIG_CheckState(res); + } + if (_v) { + return _wrap_Element_prettyPrint__SWIG_3(self, argc, argv); + } + } + } + } + if (argc == 4) { + int _v; + void *vptr = 0; + int res = SWIG_ConvertPtr(argv[0], &vptr, SWIGTYPE_p_xml__lite__Element, 0); + _v = SWIG_CheckState(res); + if (_v) { + void *vptr = 0; + int res = SWIG_ConvertPtr(argv[1], &vptr, SWIGTYPE_p_io__OutputStream, SWIG_POINTER_NO_NULL); + _v = SWIG_CheckState(res); + if (_v) { + { + int res = SWIG_AsVal_int(argv[2], NULL); + _v = SWIG_CheckState(res); + } + if (_v) { + int res = SWIG_ConvertPtr(argv[3], 0, SWIGTYPE_p_std__string, SWIG_POINTER_NO_NULL | 0); + _v = SWIG_CheckState(res); + if (_v) { + return _wrap_Element_prettyPrint__SWIG_2(self, argc, argv); + } + } + } + } + } + +fail: + SWIG_Python_RaiseOrModifyTypeError("Wrong number or type of arguments for overloaded function 'Element_prettyPrint'.\n" + " Possible C/C++ prototypes are:\n" " xml::lite::Element::prettyPrint(io::OutputStream &,std::string const &) const\n" - " xml::lite::Element::prettyPrint(io::OutputStream &) const\n"); + " xml::lite::Element::prettyPrint(io::OutputStream &) const\n" + " xml::lite::Element::prettyPrint(io::OutputStream &,xml::lite::StringEncoding,std::string const &) const\n" + " xml::lite::Element::prettyPrint(io::OutputStream &,xml::lite::StringEncoding) const\n"); return 0; } @@ -5415,6 +5751,91 @@ SWIGINTERN PyObject *_wrap_Element_getCharacterData__SWIG_0(PyObject *SWIGUNUSED } +SWIGINTERN PyObject *_wrap_Element_getEncoding(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { + PyObject *resultobj = 0; + xml::lite::Element *arg1 = (xml::lite::Element *) 0 ; + void *argp1 = 0 ; + int res1 = 0 ; + PyObject *swig_obj[1] ; + coda_oss::optional* result = 0; + + if (!args) SWIG_fail; + swig_obj[0] = args; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_xml__lite__Element, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Element_getEncoding" "', argument " "1"" of type '" "xml::lite::Element const *""'"); + } + arg1 = reinterpret_cast< xml::lite::Element * >(argp1); + result = (coda_oss::optional< xml::lite::StringEncoding > *) &((xml::lite::Element const *)arg1)->getEncoding(); + resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_coda_oss__optionalT_xml__lite__string_encoding_t, 0 | 0 ); + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_Element_getCharacterData__SWIG_1(PyObject *SWIGUNUSEDPARM(self), Py_ssize_t nobjs, PyObject **swig_obj) { + PyObject *resultobj = 0; + xml::lite::Element *arg1 = (xml::lite::Element *) 0 ; + std::string *arg2 = 0 ; + void *argp1 = 0 ; + int res1 = 0 ; + void *argp2 = 0 ; + int res2 = 0 ; + coda_oss::optional< xml::lite::StringEncoding > *result = 0 ; + + if ((nobjs < 2) || (nobjs > 2)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_xml__lite__Element, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Element_getCharacterData" "', argument " "1"" of type '" "xml::lite::Element const *""'"); + } + arg1 = reinterpret_cast< xml::lite::Element * >(argp1); + res2 = SWIG_ConvertPtr(swig_obj[1], &argp2, SWIGTYPE_p_std__string, 0 ); + if (!SWIG_IsOK(res2)) { + SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "Element_getCharacterData" "', argument " "2"" of type '" "std::string &""'"); + } + if (!argp2) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "Element_getCharacterData" "', argument " "2"" of type '" "std::string &""'"); + } + arg2 = reinterpret_cast< std::string * >(argp2); + result = (coda_oss::optional< xml::lite::StringEncoding > *) &((xml::lite::Element const *)arg1)->getCharacterData(*arg2); + resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_coda_oss__optionalT_xml__lite__string_encoding_t, 0 | 0 ); + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_Element_getCharacterData__SWIG_2(PyObject *SWIGUNUSEDPARM(self), Py_ssize_t nobjs, PyObject **swig_obj) { + PyObject *resultobj = 0; + xml::lite::Element *arg1 = (xml::lite::Element *) 0 ; + coda_oss::u8string *arg2 = 0 ; + void *argp1 = 0 ; + int res1 = 0 ; + void *argp2 = 0 ; + int res2 = 0 ; + + if ((nobjs < 2) || (nobjs > 2)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_xml__lite__Element, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Element_getCharacterData" "', argument " "1"" of type '" "xml::lite::Element const *""'"); + } + arg1 = reinterpret_cast< xml::lite::Element * >(argp1); + res2 = SWIG_ConvertPtr(swig_obj[1], &argp2, SWIGTYPE_p_sys__U8string, 0 ); + if (!SWIG_IsOK(res2)) { + SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "Element_getCharacterData" "', argument " "2"" of type '" "coda_oss::u8string &""'"); + } + if (!argp2) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "Element_getCharacterData" "', argument " "2"" of type '" "coda_oss::u8string &""'"); + } + arg2 = reinterpret_cast< coda_oss::u8string * >(argp2); + ((xml::lite::Element const *)arg1)->getCharacterData(*arg2); + resultobj = SWIG_Py_Void(); + return resultobj; +fail: + return NULL; +} + SWIGINTERN PyObject *_wrap_Element_getCharacterData(PyObject *self, PyObject *args) { Py_ssize_t argc; @@ -5433,15 +5854,85 @@ SWIGINTERN PyObject *_wrap_Element_getCharacterData(PyObject *self, PyObject *ar return _wrap_Element_getCharacterData__SWIG_0(self, argc, argv); } } + if (argc == 2) { + int _v; + void *vptr = 0; + int res = SWIG_ConvertPtr(argv[0], &vptr, SWIGTYPE_p_xml__lite__Element, 0); + _v = SWIG_CheckState(res); + if (_v) { + void *vptr = 0; + int res = SWIG_ConvertPtr(argv[1], &vptr, SWIGTYPE_p_std__string, SWIG_POINTER_NO_NULL); + _v = SWIG_CheckState(res); + if (_v) { + return _wrap_Element_getCharacterData__SWIG_1(self, argc, argv); + } + } + } + if (argc == 2) { + int _v; + void *vptr = 0; + int res = SWIG_ConvertPtr(argv[0], &vptr, SWIGTYPE_p_xml__lite__Element, 0); + _v = SWIG_CheckState(res); + if (_v) { + void *vptr = 0; + int res = SWIG_ConvertPtr(argv[1], &vptr, SWIGTYPE_p_sys__U8string, SWIG_POINTER_NO_NULL); + _v = SWIG_CheckState(res); + if (_v) { + return _wrap_Element_getCharacterData__SWIG_2(self, argc, argv); + } + } + } fail: SWIG_Python_RaiseOrModifyTypeError("Wrong number or type of arguments for overloaded function 'Element_getCharacterData'.\n" " Possible C/C++ prototypes are:\n" - " xml::lite::Element::getCharacterData() const\n"); + " xml::lite::Element::getCharacterData() const\n" + " xml::lite::Element::getCharacterData(std::string &) const\n" + " xml::lite::Element::getCharacterData(coda_oss::u8string &) const\n"); return 0; } +SWIGINTERN PyObject *_wrap_Element_setCharacterData_(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { + PyObject *resultobj = 0; + xml::lite::Element *arg1 = (xml::lite::Element *) 0 ; + std::string *arg2 = 0 ; + xml::lite::StringEncoding *arg3 = (xml::lite::StringEncoding *) 0 ; + void *argp1 = 0 ; + int res1 = 0 ; + void *argp2 = 0 ; + int res2 = 0 ; + void *argp3 = 0 ; + int res3 = 0 ; + PyObject *swig_obj[3] ; + + if (!SWIG_Python_UnpackTuple(args, "Element_setCharacterData_", 3, 3, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_xml__lite__Element, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Element_setCharacterData_" "', argument " "1"" of type '" "xml::lite::Element *""'"); + } + arg1 = reinterpret_cast< xml::lite::Element * >(argp1); + res2 = SWIG_ConvertPtr(swig_obj[1], &argp2, SWIGTYPE_p_std__string, 0 | 0); + if (!SWIG_IsOK(res2)) { + SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "Element_setCharacterData_" "', argument " "2"" of type '" "std::string const &""'"); + } + if (!argp2) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "Element_setCharacterData_" "', argument " "2"" of type '" "std::string const &""'"); + } + arg2 = reinterpret_cast< std::string * >(argp2); + res3 = SWIG_ConvertPtr(swig_obj[2], &argp3,SWIGTYPE_p_xml__lite__string_encoding, 0 | 0 ); + if (!SWIG_IsOK(res3)) { + SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "Element_setCharacterData_" "', argument " "3"" of type '" "xml::lite::StringEncoding const *""'"); + } + arg3 = reinterpret_cast< xml::lite::StringEncoding * >(argp3); + (arg1)->setCharacterData_((std::string const &)*arg2,(xml::lite::StringEncoding const *)arg3); + resultobj = SWIG_Py_Void(); + return resultobj; +fail: + return NULL; +} + + SWIGINTERN PyObject *_wrap_Element_setCharacterData__SWIG_0(PyObject *SWIGUNUSEDPARM(self), Py_ssize_t nobjs, PyObject **swig_obj) { PyObject *resultobj = 0; xml::lite::Element *arg1 = (xml::lite::Element *) 0 ; @@ -5472,6 +5963,77 @@ SWIGINTERN PyObject *_wrap_Element_setCharacterData__SWIG_0(PyObject *SWIGUNUSED return NULL; } + +SWIGINTERN PyObject *_wrap_Element_setCharacterData__SWIG_1(PyObject *SWIGUNUSEDPARM(self), Py_ssize_t nobjs, PyObject **swig_obj) { + PyObject *resultobj = 0; + xml::lite::Element *arg1 = (xml::lite::Element *) 0 ; + std::string *arg2 = 0 ; + xml::lite::StringEncoding arg3 ; + void *argp1 = 0 ; + int res1 = 0 ; + void *argp2 = 0 ; + int res2 = 0 ; + int val3 ; + int ecode3 = 0 ; + + if ((nobjs < 3) || (nobjs > 3)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_xml__lite__Element, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Element_setCharacterData" "', argument " "1"" of type '" "xml::lite::Element *""'"); + } + arg1 = reinterpret_cast< xml::lite::Element * >(argp1); + res2 = SWIG_ConvertPtr(swig_obj[1], &argp2, SWIGTYPE_p_std__string, 0 | 0); + if (!SWIG_IsOK(res2)) { + SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "Element_setCharacterData" "', argument " "2"" of type '" "std::string const &""'"); + } + if (!argp2) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "Element_setCharacterData" "', argument " "2"" of type '" "std::string const &""'"); + } + arg2 = reinterpret_cast< std::string * >(argp2); + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "Element_setCharacterData" "', argument " "3"" of type '" "xml::lite::StringEncoding""'"); + } + arg3 = static_cast< xml::lite::StringEncoding >(val3); + (arg1)->setCharacterData((std::string const &)*arg2,arg3); + resultobj = SWIG_Py_Void(); + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_Element_setCharacterData__SWIG_2(PyObject *SWIGUNUSEDPARM(self), Py_ssize_t nobjs, PyObject **swig_obj) { + PyObject *resultobj = 0; + xml::lite::Element *arg1 = (xml::lite::Element *) 0 ; + coda_oss::u8string *arg2 = 0 ; + void *argp1 = 0 ; + int res1 = 0 ; + void *argp2 = 0 ; + int res2 = 0 ; + + if ((nobjs < 2) || (nobjs > 2)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_xml__lite__Element, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Element_setCharacterData" "', argument " "1"" of type '" "xml::lite::Element *""'"); + } + arg1 = reinterpret_cast< xml::lite::Element * >(argp1); + res2 = SWIG_ConvertPtr(swig_obj[1], &argp2, SWIGTYPE_p_sys__U8string, 0 | 0); + if (!SWIG_IsOK(res2)) { + SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "Element_setCharacterData" "', argument " "2"" of type '" "coda_oss::u8string const &""'"); + } + if (!argp2) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "Element_setCharacterData" "', argument " "2"" of type '" "coda_oss::u8string const &""'"); + } + arg2 = reinterpret_cast< coda_oss::u8string * >(argp2); + (arg1)->setCharacterData((coda_oss::u8string const &)*arg2); + resultobj = SWIG_Py_Void(); + return resultobj; +fail: + return NULL; +} + + SWIGINTERN PyObject *_wrap_Element_setCharacterData(PyObject *self, PyObject *args) { Py_ssize_t argc; PyObject *argv[4] = { @@ -5493,11 +6055,45 @@ SWIGINTERN PyObject *_wrap_Element_setCharacterData(PyObject *self, PyObject *ar } } } + if (argc == 2) { + int _v; + void *vptr = 0; + int res = SWIG_ConvertPtr(argv[0], &vptr, SWIGTYPE_p_xml__lite__Element, 0); + _v = SWIG_CheckState(res); + if (_v) { + int res = SWIG_ConvertPtr(argv[1], 0, SWIGTYPE_p_sys__U8string, SWIG_POINTER_NO_NULL | 0); + _v = SWIG_CheckState(res); + if (_v) { + return _wrap_Element_setCharacterData__SWIG_2(self, argc, argv); + } + } + } + if (argc == 3) { + int _v; + void *vptr = 0; + int res = SWIG_ConvertPtr(argv[0], &vptr, SWIGTYPE_p_xml__lite__Element, 0); + _v = SWIG_CheckState(res); + if (_v) { + int res = SWIG_ConvertPtr(argv[1], 0, SWIGTYPE_p_std__string, SWIG_POINTER_NO_NULL | 0); + _v = SWIG_CheckState(res); + if (_v) { + { + int res = SWIG_AsVal_int(argv[2], NULL); + _v = SWIG_CheckState(res); + } + if (_v) { + return _wrap_Element_setCharacterData__SWIG_1(self, argc, argv); + } + } + } + } fail: SWIG_Python_RaiseOrModifyTypeError("Wrong number or type of arguments for overloaded function 'Element_setCharacterData'.\n" " Possible C/C++ prototypes are:\n" - " xml::lite::Element::setCharacterData(std::string const &)\n"); + " xml::lite::Element::setCharacterData(std::string const &)\n" + " xml::lite::Element::setCharacterData(std::string const &,xml::lite::StringEncoding)\n" + " xml::lite::Element::setCharacterData(coda_oss::u8string const &)\n"); return 0; } @@ -6086,6 +6682,121 @@ SWIGINTERN PyObject *_wrap_Document_createElement__SWIG_1(PyObject *SWIGUNUSEDPA } +SWIGINTERN PyObject *_wrap_Document_createElement__SWIG_2(PyObject *SWIGUNUSEDPARM(self), Py_ssize_t nobjs, PyObject **swig_obj) { + PyObject *resultobj = 0; + xml::lite::Document *arg1 = (xml::lite::Document *) 0 ; + std::string *arg2 = 0 ; + std::string *arg3 = 0 ; + std::string *arg4 = 0 ; + xml::lite::StringEncoding arg5 ; + void *argp1 = 0 ; + int res1 = 0 ; + void *argp2 = 0 ; + int res2 = 0 ; + void *argp3 = 0 ; + int res3 = 0 ; + void *argp4 = 0 ; + int res4 = 0 ; + int val5 ; + int ecode5 = 0 ; + xml::lite::Element *result = 0 ; + + if ((nobjs < 5) || (nobjs > 5)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_xml__lite__Document, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Document_createElement" "', argument " "1"" of type '" "xml::lite::Document *""'"); + } + arg1 = reinterpret_cast< xml::lite::Document * >(argp1); + res2 = SWIG_ConvertPtr(swig_obj[1], &argp2, SWIGTYPE_p_std__string, 0 | 0); + if (!SWIG_IsOK(res2)) { + SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "Document_createElement" "', argument " "2"" of type '" "std::string const &""'"); + } + if (!argp2) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "Document_createElement" "', argument " "2"" of type '" "std::string const &""'"); + } + arg2 = reinterpret_cast< std::string * >(argp2); + res3 = SWIG_ConvertPtr(swig_obj[2], &argp3, SWIGTYPE_p_std__string, 0 | 0); + if (!SWIG_IsOK(res3)) { + SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "Document_createElement" "', argument " "3"" of type '" "std::string const &""'"); + } + if (!argp3) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "Document_createElement" "', argument " "3"" of type '" "std::string const &""'"); + } + arg3 = reinterpret_cast< std::string * >(argp3); + res4 = SWIG_ConvertPtr(swig_obj[3], &argp4, SWIGTYPE_p_std__string, 0 | 0); + if (!SWIG_IsOK(res4)) { + SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "Document_createElement" "', argument " "4"" of type '" "std::string const &""'"); + } + if (!argp4) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "Document_createElement" "', argument " "4"" of type '" "std::string const &""'"); + } + arg4 = reinterpret_cast< std::string * >(argp4); + ecode5 = SWIG_AsVal_int(swig_obj[4], &val5); + if (!SWIG_IsOK(ecode5)) { + SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "Document_createElement" "', argument " "5"" of type '" "xml::lite::StringEncoding""'"); + } + arg5 = static_cast< xml::lite::StringEncoding >(val5); + result = (xml::lite::Element *)(arg1)->createElement((std::string const &)*arg2,(std::string const &)*arg3,(std::string const &)*arg4,arg5); + resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_xml__lite__Element, 0 | 0 ); + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_Document_createElement__SWIG_3(PyObject *SWIGUNUSEDPARM(self), Py_ssize_t nobjs, PyObject **swig_obj) { + PyObject *resultobj = 0; + xml::lite::Document *arg1 = (xml::lite::Document *) 0 ; + std::string *arg2 = 0 ; + std::string *arg3 = 0 ; + coda_oss::u8string *arg4 = 0 ; + void *argp1 = 0 ; + int res1 = 0 ; + void *argp2 = 0 ; + int res2 = 0 ; + void *argp3 = 0 ; + int res3 = 0 ; + void *argp4 = 0 ; + int res4 = 0 ; + xml::lite::Element *result = 0 ; + + if ((nobjs < 4) || (nobjs > 4)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_xml__lite__Document, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Document_createElement" "', argument " "1"" of type '" "xml::lite::Document *""'"); + } + arg1 = reinterpret_cast< xml::lite::Document * >(argp1); + res2 = SWIG_ConvertPtr(swig_obj[1], &argp2, SWIGTYPE_p_std__string, 0 | 0); + if (!SWIG_IsOK(res2)) { + SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "Document_createElement" "', argument " "2"" of type '" "std::string const &""'"); + } + if (!argp2) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "Document_createElement" "', argument " "2"" of type '" "std::string const &""'"); + } + arg2 = reinterpret_cast< std::string * >(argp2); + res3 = SWIG_ConvertPtr(swig_obj[2], &argp3, SWIGTYPE_p_std__string, 0 | 0); + if (!SWIG_IsOK(res3)) { + SWIG_exception_fail(SWIG_ArgError(res3), "in method '" "Document_createElement" "', argument " "3"" of type '" "std::string const &""'"); + } + if (!argp3) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "Document_createElement" "', argument " "3"" of type '" "std::string const &""'"); + } + arg3 = reinterpret_cast< std::string * >(argp3); + res4 = SWIG_ConvertPtr(swig_obj[3], &argp4, SWIGTYPE_p_sys__U8string, 0 | 0); + if (!SWIG_IsOK(res4)) { + SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "Document_createElement" "', argument " "4"" of type '" "coda_oss::u8string const &""'"); + } + if (!argp4) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "Document_createElement" "', argument " "4"" of type '" "coda_oss::u8string const &""'"); + } + arg4 = reinterpret_cast< coda_oss::u8string * >(argp4); + result = (xml::lite::Element *)(arg1)->createElement((std::string const &)*arg2,(std::string const &)*arg3,(coda_oss::u8string const &)*arg4); + resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_xml__lite__Element, 0 | 0 ); + return resultobj; +fail: + return NULL; +} + SWIGINTERN PyObject *_wrap_Document_createElement(PyObject *self, PyObject *args) { Py_ssize_t argc; @@ -6133,12 +6844,62 @@ SWIGINTERN PyObject *_wrap_Document_createElement(PyObject *self, PyObject *args } } } + if (argc == 4) { + int _v; + void *vptr = 0; + int res = SWIG_ConvertPtr(argv[0], &vptr, SWIGTYPE_p_xml__lite__Document, 0); + _v = SWIG_CheckState(res); + if (_v) { + int res = SWIG_ConvertPtr(argv[1], 0, SWIGTYPE_p_std__string, SWIG_POINTER_NO_NULL | 0); + _v = SWIG_CheckState(res); + if (_v) { + int res = SWIG_ConvertPtr(argv[2], 0, SWIGTYPE_p_std__string, SWIG_POINTER_NO_NULL | 0); + _v = SWIG_CheckState(res); + if (_v) { + int res = SWIG_ConvertPtr(argv[3], 0, SWIGTYPE_p_sys__U8string, SWIG_POINTER_NO_NULL | 0); + _v = SWIG_CheckState(res); + if (_v) { + return _wrap_Document_createElement__SWIG_3(self, argc, argv); + } + } + } + } + } + if (argc == 5) { + int _v; + void *vptr = 0; + int res = SWIG_ConvertPtr(argv[0], &vptr, SWIGTYPE_p_xml__lite__Document, 0); + _v = SWIG_CheckState(res); + if (_v) { + int res = SWIG_ConvertPtr(argv[1], 0, SWIGTYPE_p_std__string, SWIG_POINTER_NO_NULL | 0); + _v = SWIG_CheckState(res); + if (_v) { + int res = SWIG_ConvertPtr(argv[2], 0, SWIGTYPE_p_std__string, SWIG_POINTER_NO_NULL | 0); + _v = SWIG_CheckState(res); + if (_v) { + int res = SWIG_ConvertPtr(argv[3], 0, SWIGTYPE_p_std__string, SWIG_POINTER_NO_NULL | 0); + _v = SWIG_CheckState(res); + if (_v) { + { + int res = SWIG_AsVal_int(argv[4], NULL); + _v = SWIG_CheckState(res); + } + if (_v) { + return _wrap_Document_createElement__SWIG_2(self, argc, argv); + } + } + } + } + } + } fail: SWIG_Python_RaiseOrModifyTypeError("Wrong number or type of arguments for overloaded function 'Document_createElement'.\n" " Possible C/C++ prototypes are:\n" " xml::lite::Document::createElement(std::string const &,std::string const &,std::string)\n" - " xml::lite::Document::createElement(std::string const &,std::string const &)\n"); + " xml::lite::Document::createElement(std::string const &,std::string const &)\n" + " xml::lite::Document::createElement(std::string const &,std::string const &,std::string const &,xml::lite::StringEncoding)\n" + " xml::lite::Document::createElement(std::string const &,std::string const &,coda_oss::u8string const &)\n"); return 0; } @@ -7143,6 +7904,36 @@ SWIGINTERN PyObject *_wrap_MinidomParser_preserveCharacterData(PyObject *SWIGUNU return NULL; } + +SWIGINTERN PyObject *_wrap_MinidomParser_storeEncoding(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { + PyObject *resultobj = 0; + xml::lite::MinidomParser *arg1 = (xml::lite::MinidomParser *) 0 ; + bool arg2 ; + void *argp1 = 0 ; + int res1 = 0 ; + bool val2 ; + int ecode2 = 0 ; + PyObject *swig_obj[2] ; + + if (!SWIG_Python_UnpackTuple(args, "MinidomParser_storeEncoding", 2, 2, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_xml__lite__MinidomParser, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "MinidomParser_storeEncoding" "', argument " "1"" of type '" "xml::lite::MinidomParser *""'"); + } + arg1 = reinterpret_cast< xml::lite::MinidomParser * >(argp1); + ecode2 = SWIG_AsVal_bool(swig_obj[1], &val2); + if (!SWIG_IsOK(ecode2)) { + SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "MinidomParser_storeEncoding" "', argument " "2"" of type '" "bool""'"); + } + arg2 = static_cast< bool >(val2); + (arg1)->storeEncoding(arg2); + resultobj = SWIG_Py_Void(); + return resultobj; +fail: + return NULL; +} + + SWIGINTERN PyObject *MinidomParser_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { PyObject *obj; if (!SWIG_Python_UnpackTuple(args, "swigregister", 1, 1, &obj)) return NULL; @@ -7161,6 +7952,8 @@ static PyMethodDef SwigMethods[] = { { "new_Element", _wrap_new_Element, METH_VARARGS, "\n" "Element()\n" "Element(std::string const & qname, std::string const & uri=\"\", std::string const & characterData=\"\")\n" + "Element(std::string const & qname, std::string const & uri, std::string const & characterData, xml::lite::StringEncoding encoding)\n" + "Element(std::string const & qname, std::string const & uri, coda_oss::u8string const & characterData)\n" "new_Element(Element element) -> Element\n" ""}, { "Element_clone", _wrap_Element_clone, METH_VARARGS, "Element_clone(Element self, Element element)"}, @@ -7189,19 +7982,27 @@ static PyMethodDef SwigMethods[] = { { "Element_setNamespaceURI", _wrap_Element_setNamespaceURI, METH_VARARGS, "Element_setNamespaceURI(Element self, std::string prefix, std::string uri)"}, { "Element__print", _wrap_Element__print, METH_VARARGS, "\n" "Element__print(Element self, io::OutputStream & stream)\n" + "Element__print(Element self, io::OutputStream & stream, xml::lite::StringEncoding arg3)\n" ""}, { "Element_prettyPrint", _wrap_Element_prettyPrint, METH_VARARGS, "\n" "Element_prettyPrint(Element self, io::OutputStream & stream, std::string const & formatter=\" \")\n" + "Element_prettyPrint(Element self, io::OutputStream & stream, xml::lite::StringEncoding arg3, std::string const & formatter=\" \")\n" ""}, { "Element_hasElement", _wrap_Element_hasElement, METH_VARARGS, "\n" "Element_hasElement(Element self, std::string const & localName) -> bool\n" "Element_hasElement(Element self, std::string const & uri, std::string const & localName) -> bool\n" ""}, + { "Element_getEncoding", _wrap_Element_getEncoding, METH_O, "Element_getEncoding(Element self) -> coda_oss::optional< xml::lite::StringEncoding > const &"}, { "Element_getCharacterData", _wrap_Element_getCharacterData, METH_VARARGS, "\n" "Element_getCharacterData(Element self) -> std::string\n" + "Element_getCharacterData(Element self, std::string & result) -> coda_oss::optional< xml::lite::StringEncoding > const\n" + "Element_getCharacterData(Element self, coda_oss::u8string & result)\n" ""}, + { "Element_setCharacterData_", _wrap_Element_setCharacterData_, METH_VARARGS, "Element_setCharacterData_(Element self, std::string const & characters, xml::lite::StringEncoding const * arg3)"}, { "Element_setCharacterData", _wrap_Element_setCharacterData, METH_VARARGS, "\n" "Element_setCharacterData(Element self, std::string const & characters)\n" + "Element_setCharacterData(Element self, std::string const & characters, xml::lite::StringEncoding arg3)\n" + "Element_setCharacterData(Element self, coda_oss::u8string const & characters)\n" ""}, { "Element_setLocalName", _wrap_Element_setLocalName, METH_VARARGS, "Element_setLocalName(Element self, std::string const & localName)"}, { "Element_getLocalName", _wrap_Element_getLocalName, METH_O, "Element_getLocalName(Element self) -> std::string"}, @@ -7223,6 +8024,8 @@ static PyMethodDef SwigMethods[] = { { "Document_clone", _wrap_Document_clone, METH_O, "Document_clone(Document self) -> Document"}, { "Document_createElement", _wrap_Document_createElement, METH_VARARGS, "\n" "Document_createElement(Document self, std::string const & qname, std::string const & uri, std::string characterData=\"\") -> Element\n" + "Document_createElement(Document self, std::string const & qname, std::string const & uri, std::string const & characterData, xml::lite::StringEncoding arg5) -> Element\n" + "Document_createElement(Document self, std::string const & qname, std::string const & uri, coda_oss::u8string const & characterData) -> Element\n" ""}, { "Document_destroy", _wrap_Document_destroy, METH_O, "Document_destroy(Document self)"}, { "Document_insert", _wrap_Document_insert, METH_VARARGS, "Document_insert(Document self, Element element, Element underThis)"}, @@ -7237,7 +8040,7 @@ static PyMethodDef SwigMethods[] = { ""}, { "Document_swigregister", Document_swigregister, METH_O, NULL}, { "Document_swiginit", Document_swiginit, METH_VARARGS, NULL}, - { "new_MinidomParser", _wrap_new_MinidomParser, METH_VARARGS, "MinidomParser(bool storeEncoding=True)"}, + { "new_MinidomParser", _wrap_new_MinidomParser, METH_VARARGS, "MinidomParser(bool storeEncoding=False)"}, { "delete_MinidomParser", _wrap_delete_MinidomParser, METH_O, "delete_MinidomParser(MinidomParser self)"}, { "MinidomParser_parse", _wrap_MinidomParser_parse, METH_VARARGS, "MinidomParser_parse(MinidomParser self, io::InputStream & _is, int size=io::InputStream::IS_END)"}, { "MinidomParser_clear", _wrap_MinidomParser_clear, METH_O, "MinidomParser_clear(MinidomParser self)"}, @@ -7251,6 +8054,7 @@ static PyMethodDef SwigMethods[] = { ""}, { "MinidomParser_setDocument", _wrap_MinidomParser_setDocument, METH_VARARGS, "MinidomParser_setDocument(MinidomParser self, Document newDocument, bool own=True)"}, { "MinidomParser_preserveCharacterData", _wrap_MinidomParser_preserveCharacterData, METH_VARARGS, "MinidomParser_preserveCharacterData(MinidomParser self, bool preserve)"}, + { "MinidomParser_storeEncoding", _wrap_MinidomParser_storeEncoding, METH_VARARGS, "MinidomParser_storeEncoding(MinidomParser self, bool preserve)"}, { "MinidomParser_swigregister", MinidomParser_swigregister, METH_O, NULL}, { "MinidomParser_swiginit", MinidomParser_swiginit, METH_VARARGS, NULL}, { NULL, NULL, 0, NULL } @@ -7270,9 +8074,12 @@ static swig_type_info _swigt__p_io__OutputStream = {"_p_io__OutputStream", "io:: static swig_type_info _swigt__p_std__nothrow_t = {"_p_std__nothrow_t", "std::nothrow_t *", 0, 0, (void*)0, 0}; static swig_type_info _swigt__p_std__string = {"_p_std__string", "std::string *", 0, 0, (void*)0, 0}; static swig_type_info _swigt__p_std__vectorT_xml__lite__Element_p_t = {"_p_std__vectorT_xml__lite__Element_p_t", "std::vector< xml::lite::Element * > *", 0, 0, (void*)0, 0}; +static swig_type_info _swigt__p_coda_oss__optionalT_xml__lite__string_encoding_t = {"_p_coda_oss__optionalT_xml__lite__string_encoding_t", "coda_oss::optional< xml::lite::StringEncoding > *", 0, 0, (void*)0, 0}; +static swig_type_info _swigt__p_sys__U8string = {"_p_sys__U8string", "coda_oss::u8string *", 0, 0, (void*)0, 0}; static swig_type_info _swigt__p_xml__lite__Document = {"_p_xml__lite__Document", "xml::lite::Document *", 0, 0, (void*)0, 0}; static swig_type_info _swigt__p_xml__lite__Element = {"_p_xml__lite__Element", "xml::lite::Element *", 0, 0, (void*)0, 0}; static swig_type_info _swigt__p_xml__lite__MinidomParser = {"_p_xml__lite__MinidomParser", "xml::lite::MinidomParser *", 0, 0, (void*)0, 0}; +static swig_type_info _swigt__p_xml__lite__string_encoding = {"_p_xml__lite__string_encoding", "enum xml::lite::StringEncoding *|xml::lite::StringEncoding *", 0, 0, (void*)0, 0}; static swig_type_info *swig_type_initial[] = { &_swigt__p_XMLReader, @@ -7282,9 +8089,12 @@ static swig_type_info *swig_type_initial[] = { &_swigt__p_std__nothrow_t, &_swigt__p_std__string, &_swigt__p_std__vectorT_xml__lite__Element_p_t, + &_swigt__p_coda_oss__optionalT_xml__lite__string_encoding_t, + &_swigt__p_sys__U8string, &_swigt__p_xml__lite__Document, &_swigt__p_xml__lite__Element, &_swigt__p_xml__lite__MinidomParser, + &_swigt__p_xml__lite__string_encoding, }; static swig_cast_info _swigc__p_XMLReader[] = { {&_swigt__p_XMLReader, 0, 0, 0},{0, 0, 0, 0}}; @@ -7294,9 +8104,12 @@ static swig_cast_info _swigc__p_io__OutputStream[] = { {&_swigt__p_io__OutputSt static swig_cast_info _swigc__p_std__nothrow_t[] = { {&_swigt__p_std__nothrow_t, 0, 0, 0},{0, 0, 0, 0}}; static swig_cast_info _swigc__p_std__string[] = { {&_swigt__p_std__string, 0, 0, 0},{0, 0, 0, 0}}; static swig_cast_info _swigc__p_std__vectorT_xml__lite__Element_p_t[] = { {&_swigt__p_std__vectorT_xml__lite__Element_p_t, 0, 0, 0},{0, 0, 0, 0}}; +static swig_cast_info _swigc__p_coda_oss__optionalT_xml__lite__string_encoding_t[] = { {&_swigt__p_coda_oss__optionalT_xml__lite__string_encoding_t, 0, 0, 0},{0, 0, 0, 0}}; +static swig_cast_info _swigc__p_sys__U8string[] = { {&_swigt__p_sys__U8string, 0, 0, 0},{0, 0, 0, 0}}; static swig_cast_info _swigc__p_xml__lite__Document[] = { {&_swigt__p_xml__lite__Document, 0, 0, 0},{0, 0, 0, 0}}; static swig_cast_info _swigc__p_xml__lite__Element[] = { {&_swigt__p_xml__lite__Element, 0, 0, 0},{0, 0, 0, 0}}; static swig_cast_info _swigc__p_xml__lite__MinidomParser[] = { {&_swigt__p_xml__lite__MinidomParser, 0, 0, 0},{0, 0, 0, 0}}; +static swig_cast_info _swigc__p_xml__lite__string_encoding[] = { {&_swigt__p_xml__lite__string_encoding, 0, 0, 0},{0, 0, 0, 0}}; static swig_cast_info *swig_cast_initial[] = { _swigc__p_XMLReader, @@ -7306,9 +8119,12 @@ static swig_cast_info *swig_cast_initial[] = { _swigc__p_std__nothrow_t, _swigc__p_std__string, _swigc__p_std__vectorT_xml__lite__Element_p_t, + _swigc__p_coda_oss__optionalT_xml__lite__string_encoding_t, + _swigc__p_sys__U8string, _swigc__p_xml__lite__Document, _swigc__p_xml__lite__Element, _swigc__p_xml__lite__MinidomParser, + _swigc__p_xml__lite__string_encoding, }; @@ -8044,6 +8860,8 @@ SWIG_init(void) { SWIG_InstallConstants(d,swig_const_table); + SWIG_Python_SetConstant(d, "string_encoding_windows_1252",SWIG_From_int(static_cast< int >(xml::lite::StringEncoding::Windows1252))); + SWIG_Python_SetConstant(d, "string_encoding_utf_8",SWIG_From_int(static_cast< int >(xml::lite::StringEncoding::Utf8))); #if PY_VERSION_HEX >= 0x03000000 return m; #else