Skip to content

Commit

Permalink
xml.lite uses UTF-8 from coda-oss
Browse files Browse the repository at this point in the history
  • Loading branch information
Dan Smith committed Jul 29, 2022
1 parent 32e27fc commit af27a9a
Show file tree
Hide file tree
Showing 24 changed files with 466 additions and 1,693 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,6 @@ class ContentHandler

virtual bool vcharacters(const void/*XMLCh*/*, size_t /*length*/) // avoid XMLCh, it's specific to Xerces
{ return false; /* continue on to existing characters()*/ } /* =0 would break existing code */
virtual bool call_vcharacters() const // =0 would break existing code
{
return false; // don't call vcharacters(const void*)
}

/*!
* Receive notification of the beginning of an element.
Expand Down
63 changes: 33 additions & 30 deletions externals/coda-oss/modules/c++/xml.lite/include/xml/lite/Document.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@

#include <assert.h>

#include <utility>
#include "coda_oss/string.h"
#include "coda_oss/memory.h"

#include "xml/lite/Element.h"
#include "xml/lite/QName.h"
Expand All @@ -54,33 +56,41 @@ namespace lite
* Use the Document to access the Element nodes contained within.
* The DocumentParser will build a tree that you can use.
*/
class Document
struct Document final
{
public:
//! Constructor
Document(Element* rootNode = nullptr, bool own = true) :
mRootNode(rootNode), mOwnRoot(own)
{
}
explicit Document(std::unique_ptr<Element>&& rootNode) : // implicitly own=true
Document(rootNode.release(), true /*own*/)
{
}

/*!
* Destroy the xml tree. This deletes the nodes if they exist
* Careful, this may delete your copy if you are not careful
*/
virtual ~Document()
~Document()
{
destroy();
}

virtual Document* clone() const
std::unique_ptr<Document>& clone(std::unique_ptr<Document>& doc) const
{
Document* doc = new Document();
doc = coda_oss::make_unique<Document>();

Element* cloneRoot = new Element();
auto cloneRoot = coda_oss::make_unique<Element>();
cloneRoot->clone(*mRootNode);
doc->setRootElement(cloneRoot);
doc->setRootElement(std::move(cloneRoot));
return doc;
}
Document* clone() const
{
std::unique_ptr<Document> doc;
return clone(doc).release();
}

/*!
* Factory-type method for creating a new Element
Expand All @@ -89,21 +99,9 @@ class Document
* \param characterData The character data (if any)
* \return A new element
*/
virtual Element *createElement(const std::string & qname,
const std::string & uri,
std::string characterData = "");
#ifndef SWIG // SWIG doesn't like unique_ptr or StringEncoding
Element* createElement(const std::string& qname,
const std::string & uri,
const std::string& characterData, StringEncoding) const;
Element* createElement(const std::string& qname,
const std::string& uri,
const coda_oss::u8string& characterData) const;
std::unique_ptr<Element> createElement(const xml::lite::QName& qname, const std::string& characterData) const;
std::unique_ptr<Element> createElement(const xml::lite::QName& qname,
const std::string& characterData, StringEncoding) const;
#endif // SWIG

Element *createElement(const std::string & qname, const std::string & uri, std::string characterData = "");
std::unique_ptr<Element> createElement(const xml::lite::QName&, const std::string& characterData) const;
std::unique_ptr<Element> createElement(const xml::lite::QName&, const coda_oss::u8string& characterData) const;

/*!
* Blanket destructor. This thing deletes everything
Expand All @@ -118,13 +116,13 @@ class Document
* \param element Element to add
* \param underThis Element to add element to
*/
virtual void insert(Element * element, Element * underThis);
void insert(Element * element, Element * underThis);

/*!
* Remove an element from the tree, starting at the root
* \param toDelete The node to delete (This DOES do deletion)
*/
virtual void remove(Element * toDelete);
void remove(Element * toDelete);

/*!
* Remove an element from the tree, starting at the second param
Expand All @@ -133,13 +131,17 @@ class Document
* be an optimization depending on the task, so I allow it to remain
* public
*/
virtual void remove(Element * toDelete, Element * fromHere);
void remove(Element * toDelete, Element * fromHere);

/*!
* Sets the internal root element
* \param element The node to set.
*/
void setRootElement(Element * element, bool own = true);
void setRootElement(std::unique_ptr<Element>&& element) // implicitly own=true
{
setRootElement(element.release(), true /*own*/);
}

/*!
* Retrieves the internal root element
Expand All @@ -151,17 +153,18 @@ class Document
mOwnRoot = false;
return mRootNode;
}

std::unique_ptr<Element>& getRootElement(std::unique_ptr<Element>& rootNode) // implicitly steal=true
{
rootNode.reset(getRootElement(true /*steal*/));
return rootNode;
}
Element *getRootElement() const
{
return mRootNode;
}

protected:
//! Copy constructor
private:
Document(const Document&);

//! Assignment operator
Document& operator=(const Document&);

//! The root node element
Expand Down
126 changes: 38 additions & 88 deletions externals/coda-oss/modules/c++/xml.lite/include/xml/lite/Element.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,16 @@
#include <memory>
#include <string>
#include <new> // std::nothrow_t
#include <coda_oss/string.h>

#include <io/InputStream.h>
#include <io/OutputStream.h>
#include <str/Convert.h>
#include <str/EncodedString.h>
#include "xml/lite/XMLException.h"
#include "xml/lite/Attributes.h"
#include "xml/lite/QName.h"
#include "sys/Conf.h"
#include "coda_oss/optional.h"
#include "mem/SharedPtr.h"

/*!
Expand All @@ -59,57 +60,33 @@ namespace lite
* This class stores all of the element information about an XML
* document.
*/
class Element
struct Element final
{
Element(const std::string& qname, const std::string& uri, std::nullptr_t) :
mParent(nullptr), mName(uri, qname)
{
}

public:
//! Default constructor
Element() :
mParent(nullptr)
{
}
Element() = default;

/*!
* Constructor taking the namespace prefix and the local name
* \param qname The qname of the object
* \param uri The uri of the object
* \param characterData The character data (if any)
*/
Element(const std::string& qname, const std::string& uri = "",
const std::string& characterData = "") :
Element(qname, uri, nullptr)
explicit Element(const std::string& qname, const std::string& uri = "", const std::string& characterData = "") :
mName(uri, qname)
{
setCharacterData(characterData);
}
#ifndef SWIG // SWIG doesn't like unique_ptr or StringEncoding
Element(const std::string& qname, const std::string& uri,
const std::string& characterData, StringEncoding encoding) :
Element(qname, uri, nullptr)
{
setCharacterData(characterData, encoding);
}
Element(const std::string& qname, const std::string& uri,
const coda_oss::u8string& characterData) :
Element(qname, uri, nullptr)
Element(const xml::lite::QName& qname, const coda_oss::u8string& characterData) :
mName(qname.getName(), qname.getUri().value)
{
setCharacterData(characterData);
}

// StringEncoding is assumed based on the platform: Windows-1252 or UTF-8.
static std::unique_ptr<Element> create(const std::string& qname, const std::string& uri = "", const std::string& characterData = "");
static std::unique_ptr<Element> create(const std::string& qname, const xml::lite::Uri& uri, const std::string& characterData = "");
static std::unique_ptr<Element> create(const xml::lite::QName&, const std::string& characterData = "");
static std::unique_ptr<Element> create(const xml::lite::QName&, const coda_oss::u8string&);
// Encoding of "characterData" is always UTF-8
static std::unique_ptr<Element> createU8(const xml::lite::QName&, const std::string& characterData = "");
#endif // SWIG

//! Destructor
virtual ~Element()
~Element()
{
destroyChildren();
}
Expand All @@ -118,14 +95,14 @@ class Element
void destroyChildren();

// use clone() to duplicate an Element
#if !(defined(SWIG) || defined(SWIGPYTHON) || defined(HAVE_PYTHON_H)) // SWIG needs these
//private: // encoded as part of the C++ name mangling by some compilers
#endif
#if !(defined(SWIG) || defined(SWIGPYTHON) || defined(HAVE_PYTHON_H)) // SWIG needs these
//private: // encoded as part of the C++ name mangling by some compilers
#endif
Element(const Element&);
Element& operator=(const Element&);
#if !(defined(SWIG) || defined(SWIGPYTHON) || defined(HAVE_PYTHON_H))
public:
#endif
#if !(defined(SWIG) || defined(SWIGPYTHON) || defined(HAVE_PYTHON_H))
public:
#endif

Element(Element&&) = default;
Element& operator=(Element&&) = default;
Expand Down Expand Up @@ -290,21 +267,17 @@ class Element
* \todo Add format capability
*/
void print(io::OutputStream& stream) const;

// This is another slightly goofy routine to maintain backwards compatibility.
// XML documents must be properly (UTF-8, UTF-16 or UTF-32). The legacy
// print() routine (above) can write documents with a Windows-1252 encoding
// as the string is just copied to the output.
//
// The only valid setting for StringEncoding is Utf8; but defaulting that
// could change behavior on Windows.
void prettyPrint(io::OutputStream& stream,
const std::string& formatter = " ") const;
#ifndef SWIG // SWIG doesn't like unique_ptr or StringEncoding
void print(io::OutputStream& stream, StringEncoding /*=Utf8*/) const;
void prettyPrint(io::OutputStream& stream, StringEncoding /*=Utf8*/,

// Outputs (presumablly to the console) using the **NATIVE** encoding.
// For most XML processing, **THIS IS WRONG** as output should
// always be UTF-8. However, for displaying XML on the console in Windows,
// the native (Windows-1252) encoding will work better as "special" characters
// will be displayed.
void consoleOutput_(io::OutputStream& stream) const; // be sure OutputStream is the console, not a file
void prettyConsoleOutput_(io::OutputStream& stream, // be sure OutputStream is the console, not a file
const std::string& formatter = " ") const;
#endif // SWIG

/*!
* Determines if a child element exists
Expand All @@ -329,33 +302,21 @@ class Element
* Returns the character data of this element.
* \return the charater data
*/
std::string getCharacterData() const
{
return mCharacterData;
}
#ifndef SWIG // SWIG doesn't like unique_ptr or StringEncoding
const coda_oss::optional<StringEncoding>& getEncoding() const
{
return mEncoding;
}
const coda_oss::optional<StringEncoding>& getCharacterData(std::string& result) const
{
result = getCharacterData();
return getEncoding();
}
void getCharacterData(coda_oss::u8string& result) const;
#endif // SWIG
std::string getCharacterData() const;
coda_oss::u8string& getCharacterData(coda_oss::u8string& result) const;

/*!
* Sets the character data for this element.
* \param characters The data to add to this element
*/
void setCharacterData(const std::string& characters);
#ifndef SWIG // SWIG doesn't like unique_ptr or StringEncoding
void setCharacterData_(const std::string& characters, const StringEncoding*);
void setCharacterData(const std::string& characters, StringEncoding);
void setCharacterData(const coda_oss::u8string& characters);
#endif // SWIG
void setCharacterData(const std::string&);
void setCharacterData(coda_oss::u8string s)
{
// See Item #41 in "Effective Modern C++" by Scott Meyers.
// std::basic_string<T> is "cheap to move" and "always copied"
// into mCharacterData.
mCharacterData = std::move(s);
}

/*!
* Sets the local name for this element.
Expand Down Expand Up @@ -482,8 +443,7 @@ class Element
mParent = parent;
}

protected:

private:
void changePrefix(Element* element,
const std::string& prefix,
const std::string& uri);
Expand All @@ -492,28 +452,18 @@ class Element
const std::string& prefix,
const std::string& uri);

void depthPrint(io::OutputStream& stream, int depth,
const std::string& formatter) const;
void depthPrint(io::OutputStream& stream, StringEncoding, int depth,
const std::string& formatter) const;
void depthPrint(io::OutputStream& stream, int depth, const std::string& formatter, bool isConsoleOutput = false) const;

Element* mParent;
Element* mParent = nullptr;
//! The children of this element
std::vector<Element*> mChildren;
xml::lite::QName mName;
//! The attributes for this element
xml::lite::Attributes mAttributes;
//! The character data ...
std::string mCharacterData;

private:
// ... and how that data is encoded
coda_oss::optional<StringEncoding> mEncoding;
void depthPrint(io::OutputStream& stream, bool utf8, int depth,
const std::string& formatter) const;
coda_oss::u8string mCharacterData;
};

extern Element& add(const xml::lite::QName&, const std::string& value, Element& parent);
Element& add(const xml::lite::QName&, const std::string& value, Element& parent);

#ifndef SWIG
// The (old) version of SWIG we're using doesn't like certain C++11 features.
Expand Down
Loading

0 comments on commit af27a9a

Please sign in to comment.