-
Notifications
You must be signed in to change notification settings - Fork 344
/
Copy pathXLUtilities.hpp
451 lines (413 loc) · 25 KB
/
XLUtilities.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
//
// Created by Kenneth Balslev on 24/08/2020.
//
#ifndef OPENXLSX_XLUTILITIES_HPP
#define OPENXLSX_XLUTILITIES_HPP
#include <fstream>
#include <pugixml.hpp>
#include <string> // 2024-04-25 needed for xml_node_type_string
#include <string_view> // std::string_view
#include <vector> // std::vector< std::string_view >
#include "XLConstants.hpp" // 2024-05-28 OpenXLSX::MAX_ROWS
#include "XLCellReference.hpp"
#include "XLCellValue.hpp" // OpenXLSX::XLValueType
#include "XLContentTypes.hpp" // OpenXLSX::XLContentType
#include "XLRelationships.hpp" // OpenXLSX::XLRelationshipType
#include "XLXmlParser.hpp"
namespace OpenXLSX
{
constexpr const bool XLRemoveAttributes = true; // helper variables for appendAndSetNodeAttribute, parameter removeAttributes
constexpr const bool XLKeepAttributes = false; //
/**
* @brief Get rid of compiler warnings about unused variables (-Wunused-variable) or unused parameters (-Wunusued-parameter)
* @param (unnamed) the variable / parameter which is intentionally unused (e.g. for function stubs)
*/
template<class T> void ignore( const T& ) {}
/**
* @brief Get a string representation of pugi::xml_node_type
* @param t the pugi::xml_node_type of a node
* @return a std::string containing the descriptive name of the node type
*/
inline std::string XLValueTypeString( XLValueType t )
{
using namespace std::literals::string_literals;
switch( t ) {
case XLValueType::Empty : return "Empty"s;
case XLValueType::Boolean : return "Boolean"s;
case XLValueType::Integer : return "Integer"s;
case XLValueType::Float : return "Float"s;
case XLValueType::Error : return "Error"s;
case XLValueType::String : return "String"s;
}
throw XLInternalError(__func__ + "Invalid XLValueType."s);
}
/**
* @brief Get a string representation of pugi::xml_node_type
* @param t the pugi::xml_node_type of a node
* @return a std::string containing the descriptive name of the node type
*/
inline std::string xml_node_type_string( pugi::xml_node_type t )
{
using namespace std::literals::string_literals;
switch( t ) {
case pugi::node_null : return "node_null"s;
case pugi::node_document : return "node_document"s;
case pugi::node_element : return "node_element"s;
case pugi::node_pcdata : return "node_pcdata"s;
case pugi::node_cdata : return "node_cdata"s;
case pugi::node_comment : return "node_comment"s;
case pugi::node_pi : return "node_pi"s;
case pugi::node_declaration : return "node_declaration"s;
case pugi::node_doctype : return "node_doctype"s;
}
throw XLInternalError("Invalid XML node type.");
}
/**
* @brief Get a string representation of OpenXLSX::XLContentType
* @param t an OpenXLSX::XLContentType value
* @return a std::string containing the descriptive name of the content type
*/
inline std::string XLContentTypeString( OpenXLSX::XLContentType const & t )
{
using namespace OpenXLSX;
switch (t) {
case XLContentType::Workbook: return "Workbook";
case XLContentType::WorkbookMacroEnabled: return "WorkbookMacroEnabled";
case XLContentType::Worksheet: return "Worksheet";
case XLContentType::Chartsheet: return "Chartsheet";
case XLContentType::ExternalLink: return "ExternalLink";
case XLContentType::Theme: return "Theme";
case XLContentType::Styles: return "Styles";
case XLContentType::SharedStrings: return "SharedStrings";
case XLContentType::Drawing: return "Drawing";
case XLContentType::Chart: return "Chart";
case XLContentType::ChartStyle: return "ChartStyle";
case XLContentType::ChartColorStyle: return "ChartColorStyle";
case XLContentType::ControlProperties: return "ControlProperties";
case XLContentType::CalculationChain: return "CalculationChain";
case XLContentType::VBAProject: return "VBAProject";
case XLContentType::CoreProperties: return "CoreProperties";
case XLContentType::ExtendedProperties: return "ExtendedProperties";
case XLContentType::CustomProperties: return "CustomProperties";
case XLContentType::Comments: return "Comments";
case XLContentType::Table: return "Table";
case XLContentType::VMLDrawing: return "VMLDrawing";
case XLContentType::Unknown: return "Unknown";
}
return "invalid";
}
/**
* @brief Get a string representation of OpenXLSX::XLRelationshipType
* @param t an OpenXLSX::XLRelationshipType value
* @return std::string containing the descriptive name of the relationship type
*/
inline std::string XLRelationshipTypeString( OpenXLSX::XLRelationshipType const & t )
{
using namespace OpenXLSX;
switch (t) {
case XLRelationshipType::CoreProperties: return "CoreProperties";
case XLRelationshipType::ExtendedProperties: return "ExtendedProperties";
case XLRelationshipType::CustomProperties: return "CustomProperties";
case XLRelationshipType::Workbook: return "Workbook";
case XLRelationshipType::Worksheet: return "Worksheet";
case XLRelationshipType::Chartsheet: return "Chartsheet";
case XLRelationshipType::Dialogsheet: return "Dialogsheet";
case XLRelationshipType::Macrosheet: return "Macrosheet";
case XLRelationshipType::CalculationChain: return "CalculationChain";
case XLRelationshipType::ExternalLink: return "ExternalLink";
case XLRelationshipType::ExternalLinkPath: return "ExternalLinkPath";
case XLRelationshipType::Theme: return "Theme";
case XLRelationshipType::Styles: return "Styles";
case XLRelationshipType::Chart: return "Chart";
case XLRelationshipType::ChartStyle: return "ChartStyle";
case XLRelationshipType::ChartColorStyle: return "ChartColorStyle";
case XLRelationshipType::Image: return "Image";
case XLRelationshipType::Drawing: return "Drawing";
case XLRelationshipType::VMLDrawing: return "VMLDrawing";
case XLRelationshipType::SharedStrings: return "SharedStrings";
case XLRelationshipType::PrinterSettings: return "PrinterSettings";
case XLRelationshipType::VBAProject: return "VBAProject";
case XLRelationshipType::ControlProperties: return "ControlProperties";
case XLRelationshipType::Comments: return "Comments";
case XLRelationshipType::Unknown: return "Unknown";
}
return "invalid";
}
/**
* @details
*/
inline XMLNode getRowNode(XMLNode sheetDataNode, uint32_t rowNumber)
{
if (rowNumber < 1 || rowNumber > OpenXLSX::MAX_ROWS) { // 2024-05-28: added range check
using namespace std::literals::string_literals;
throw XLCellAddressError("rowNumber "s + std::to_string( rowNumber ) + " is outside valid range [1;"s + std::to_string(OpenXLSX::MAX_ROWS) + "]"s);
}
// ===== Get the last child of sheetDataNode that is of type node_element.
XMLNode result = sheetDataNode.last_child_of_type(pugi::node_element);
// ===== If there are now rows in the worksheet, or the requested row is beyond the current max row, append a new row to the end.
if (result.empty() || (rowNumber > result.attribute("r").as_ullong())) {
result = sheetDataNode.append_child("row");
result.append_attribute("r") = rowNumber;
// result.append_attribute("x14ac:dyDescent") = "0.2";
// result.append_attribute("spans") = "1:1";
}
// ===== If the requested node is closest to the end, start from the end and search backwards.
else if (result.attribute("r").as_ullong() - rowNumber < rowNumber) {
while (not result.empty() && (result.attribute("r").as_ullong() > rowNumber)) result = result.previous_sibling_of_type(pugi::node_element);
// ===== If the backwards search failed to locate the requested row
if (result.empty() || (result.attribute("r").as_ullong() != rowNumber)) {
if (result.empty())
result = sheetDataNode.prepend_child("row"); // insert a new row node at datasheet begin. When saving, this will keep whitespace formatting towards next row node
else
result = sheetDataNode.insert_child_after("row", result);
result.append_attribute("r") = rowNumber;
// result.append_attribute("x14ac:dyDescent") = "0.2";
// result.append_attribute("spans") = "1:1";
}
}
// ===== Otherwise, start from the beginning
else {
// ===== At this point, it is guaranteed that there is at least one node_element in the row that is not empty.
result = sheetDataNode.first_child_of_type(pugi::node_element);
// ===== It has been verified above that the requested rowNumber is <= the row number of the last node_element, therefore this loop will halt.
while (result.attribute("r").as_ullong() < rowNumber) result = result.next_sibling_of_type(pugi::node_element);
// ===== If the forwards search failed to locate the requested row
if (result.attribute("r").as_ullong() > rowNumber) {
result = sheetDataNode.insert_child_before("row", result);
result.append_attribute("r") = rowNumber;
// result.append_attribute("x14ac:dyDescent") = "0.2";
// result.append_attribute("spans") = "1:1";
}
}
return result;
}
/**
* @brief get the style attribute s for the indicated column, if any is set
* @param rowNode the row node from which to obtain the parent that should hold the <cols> node
* @param colNo the column for which to obtain the style
* @return the XLStyleIndex stored for the column, or XLDefaultCellFormat if none
*/
inline XLStyleIndex getColumnStyle(XMLNode rowNode, uint16_t colNo)
{
XMLNode cols = rowNode.parent().parent().child("cols");
if (not cols.empty()) {
XMLNode col = cols.first_child_of_type(pugi::node_element);
while (not col.empty()) {
if (col.attribute("min").as_int(MAX_COLS+1) <= colNo && col.attribute("max").as_int(0) >= colNo) // found
return col.attribute("style").as_uint(XLDefaultCellFormat);
col = col.next_sibling_of_type(pugi::node_element);
}
}
return XLDefaultCellFormat; // if no col style was found
}
/**
* @brief set the cell reference, and a default cell style attribute if and only if row or column style is != XLDefaultCellFormat
* @note row style takes precedence over column style
* @param cellNode the cell XML node
* @param cellRef the cell reference (attribute r) to set
* @param rowNode the row node for the cell
* @param colNo the column number for this cell (to try and fetch a column style)
* @param colStyles an optional std::vector<XLStyleIndex> that contains all pre-evaluated column styles,
* and can be used to avoid performance impact from lookup
*/
inline void setDefaultCellAttributes(XMLNode cellNode, const std::string & cellRef, XMLNode rowNode, uint16_t colNo,
/**/ std::vector<XLStyleIndex> const & colStyles = {})
{
cellNode.append_attribute("r").set_value(cellRef.c_str());
XMLAttribute rowStyle = rowNode.attribute("s");
XLStyleIndex cellStyle; // scope declaration
if (rowStyle.empty()) { // if no explicit row style is set
if (colStyles.size() > 0) // if colStyles were provided
cellStyle = (colNo > colStyles.size() ? XLDefaultCellFormat : colStyles[ colNo - 1 ]); // determine cellStyle from colStyles
else // else: no colStyles provided
cellStyle = getColumnStyle(rowNode, colNo); // so perform a lookup
}
else // else: an explicit row style is set
cellStyle = rowStyle.as_uint(XLDefaultCellFormat); // use the row style
if (cellStyle != XLDefaultCellFormat) // if cellStyle was determined as not the default style (no point in setting that)
cellNode.append_attribute("s").set_value(cellStyle);
}
/**
* @brief Retrieve the xml node representing the cell at the given row and column. If the node doesn't
* exist, it will be created.
* @param rowNode The row node under which to find the cell.
* @param columnNumber The column at which to find the cell.
* @param rowNumber (optional) row number of the row node, if already known, defaults to 0
* @param colStyles an optional std::vector<XLStyleIndex> that contains all pre-evaluated column styles,
* and can be used to avoid performance impact from lookup
* @return The xml node representing the requested cell.
*/
inline XMLNode getCellNode(XMLNode rowNode, uint16_t columnNumber, uint32_t rowNumber = 0,
/**/ std::vector<XLStyleIndex> const & colStyles = {})
{
if (columnNumber < 1 || columnNumber > OpenXLSX::MAX_COLS) { // 2024-08-05: added range check
using namespace std::literals::string_literals;
throw XLException("XLWorksheet::column: columnNumber "s + std::to_string(columnNumber) + " is outside allowed range [1;"s + std::to_string(MAX_COLS) + "]"s);
}
if (rowNode.empty()) return XMLNode{}; // 2024-05-28: return an empty node in case of empty rowNode
XMLNode cellNode = rowNode.last_child_of_type(pugi::node_element);
if (!rowNumber) rowNumber = rowNode.attribute("r").as_uint(); // if not provided, determine from rowNode
auto cellRef = XLCellReference(rowNumber, columnNumber);
// ===== If there are no cells in the current row, or the requested cell is beyond the last cell in the row...
if (cellNode.empty() || (XLCellReference(cellNode.attribute("r").value()).column() < columnNumber)) {
// ===== append a new node to the end.
cellNode = rowNode.append_child("c");
setDefaultCellAttributes(cellNode, cellRef.address(), rowNode, columnNumber, colStyles);
}
// ===== If the requested node is closest to the end, start from the end and search backwards...
else if (XLCellReference(cellNode.attribute("r").value()).column() - columnNumber < columnNumber) {
while (not cellNode.empty() && (XLCellReference(cellNode.attribute("r").value()).column() > columnNumber))
cellNode = cellNode.previous_sibling_of_type(pugi::node_element);
// ===== If the backwards search failed to locate the requested cell
if (cellNode.empty() || (XLCellReference(cellNode.attribute("r").value()).column() < columnNumber)) {
if (cellNode.empty()) // If between row begin and higher column number, only non-element nodes exist
cellNode = rowNode.prepend_child("c"); // insert a new cell node at row begin. When saving, this will keep whitespace formatting towards next cell node
else
cellNode = rowNode.insert_child_after("c", cellNode);
setDefaultCellAttributes(cellNode, cellRef.address(), rowNode, columnNumber, colStyles);
}
}
// ===== Otherwise, start from the beginning
else {
// ===== At this point, it is guaranteed that there is at least one node_element in the row that is not empty.
cellNode = rowNode.first_child_of_type(pugi::node_element);
// ===== It has been verified above that the requested columnNumber is <= the column number of the last node_element, therefore this loop will halt:
while (XLCellReference(cellNode.attribute("r").value()).column() < columnNumber)
cellNode = cellNode.next_sibling_of_type(pugi::node_element);
// ===== If the forwards search failed to locate the requested cell
if (XLCellReference(cellNode.attribute("r").value()).column() > columnNumber) {
cellNode = rowNode.insert_child_before("c", cellNode);
setDefaultCellAttributes(cellNode, cellRef.address(), rowNode, columnNumber, colStyles);
}
}
return cellNode;
}
/**
* @brief find the index of nodeName in nodeOrder
* @param nodeName search this
* @param nodeOrder in this
* @return index of nodeName in nodeOrder
* @return -1 if nodeName is not an element of nodeOrder
* @note this function uses a vector of std::string_view because std::string is not constexpr-capable, and in
* a future c++20 build, a std::span could be used to have the node order in any OpenXLSX class be a constexpr
*/
constexpr const int SORT_INDEX_NOT_FOUND = -1;
inline int findStringInVector( std::string const & nodeName, std::vector< std::string_view > const & nodeOrder )
{
for (int i = 0; static_cast< size_t >( i ) < nodeOrder.size(); ++i)
if (nodeName == nodeOrder[ i ]) return i;
return SORT_INDEX_NOT_FOUND;
}
/**
* @brief copy all leading pc_data nodes from fromNode to toNode
* @param parent parent node that can perform sibling insertions
* @param fromNode node whose preceeding whitespaces shall be duplicated
* @param toNode node before which the duplicated whitespaces shall be inserted
* @return N/A
*/
inline void copyLeadingWhitespaces( XMLNode & parent, XMLNode fromNode, XMLNode toNode )
{
fromNode = fromNode.previous_sibling(); // move to preceeding whitespace node, if any
// loop from back to front, inserting in the same order before toNode
while (fromNode.type() == pugi::node_pcdata) { // loop ends on pugi::node_element or node_null
toNode = parent.insert_child_before(pugi::node_pcdata, toNode); // prepend as toNode a new pcdata node
toNode.set_value(fromNode.value()); // with the value of fromNode
fromNode = fromNode.previous_sibling();
}
}
/**
* @brief ensure that node with nodeName exists in parent and return it
* @param parent parent node that can perform sibling insertions
* @param nodename name of the node to be (created &) returned
* @param nodeOrder optional vector of a predefined element node sequence required by MS Office
* @return the requested XMLNode or an empty node if the insert operation failed
* @note 2024-12-19: appendAndGetNode will attempt to perform an ordered insert per nodeOrder if provided
* Once sufficiently tested, this functionality might be generalized (e.g. in XLXmlParser OpenXLSX_xml_node)
*/
inline XMLNode appendAndGetNode(XMLNode & parent, std::string const & nodeName, std::vector< std::string_view > const & nodeOrder = {} )
{
if (parent.empty()) return XMLNode{};
XMLNode nextNode = parent.first_child_of_type(pugi::node_element);
if (nextNode.empty()) return parent.prepend_child(nodeName.c_str()); // nothing to sort, whitespaces "belong" to parent closing tag
XMLNode node{}; // empty until successfully created;
int nodeSortIndex = (nodeOrder.size() > 1 ? findStringInVector(nodeName, nodeOrder) : SORT_INDEX_NOT_FOUND);
if (nodeSortIndex != SORT_INDEX_NOT_FOUND) { // can't sort anything if nodeOrder contains less than 2 entries or does not contain nodeName
// ===== Find first node to follow nodeName per nodeOrder
while (not nextNode.empty() && findStringInVector(nextNode.name(), nodeOrder) < nodeSortIndex)
nextNode = nextNode.next_sibling_of_type(pugi::node_element);
// ===== Evaluate search result
if (not nextNode.empty()) { // found nodeName or a node before which nodeName should be inserted
if( nextNode.name() == nodeName ) // if nodeName was found
node = nextNode; // use existing node
else { // else: a node was found before which nodeName must be inserted
node = parent.insert_child_before(nodeName.c_str(), nextNode); // insert before nextNode without whitespaces
copyLeadingWhitespaces(parent, node, nextNode);
}
}
// else: no node was found before which nodeName should be inserted: proceed as usual
}
else // no possibility to perform ordered insert - attempt to locate existing node:
node = parent.child(nodeName.c_str());
if( node.empty() ) { // neither nodeName, nor a node following per nodeOrder was found
// ===== There is no reference to perform an ordered insert for nodeName
nextNode = parent.last_child_of_type(pugi::node_element); // at least one element node must exist, tested at begin of function
node = parent.insert_child_after(nodeName.c_str(), nextNode); // append as the last element node, but before final whitespaces
copyLeadingWhitespaces( parent, nextNode, node ); // duplicate the prefix whitespaces of nextNode to node
}
return node;
}
inline XMLAttribute appendAndGetAttribute(XMLNode & node, std::string const & attrName, std::string const & attrDefaultVal)
{
if (node.empty()) return XMLAttribute{};
XMLAttribute attr = node.attribute(attrName.c_str());
if (attr.empty()) {
attr = node.append_attribute(attrName.c_str());
attr.set_value(attrDefaultVal.c_str());
}
return attr;
}
inline XMLAttribute appendAndSetAttribute(XMLNode & node, std::string const & attrName, std::string const & attrVal)
{
if (node.empty()) return XMLAttribute{};
XMLAttribute attr = node.attribute(attrName.c_str());
if (attr.empty())
attr = node.append_attribute(attrName.c_str());
attr.set_value(attrVal.c_str()); // silently fails on empty attribute, which is intended here
return attr;
}
/**
* @brief ensure that node with nodeName exists in parent, has an attribute with attrName and return that attribute
* @param parent parent node that can perform sibling insertions
* @param nodename name of the node under which attribute attrName shall exist
* @param attrName name of the attribute to get for node nodeName
* @param attrDefaultVal value to assign to the attribute if it has to be created
* @param nodeOrder optional vector of a predefined element node sequence required by MS Office, passed through to appendAndGetNode
* @returns the requested XMLAttribute or an empty node if the operation failed
*/
inline XMLAttribute appendAndGetNodeAttribute(XMLNode & parent, std::string const & nodeName, std::string const & attrName, std::string const & attrDefaultVal,
/**/ std::vector< std::string_view > const & nodeOrder = {})
{
if (parent.empty()) return XMLAttribute{};
XMLNode node = appendAndGetNode(parent, nodeName, nodeOrder);
return appendAndGetAttribute(node, attrName, attrDefaultVal);
}
/**
* @brief ensure that node with nodeName exists in parent, has an attribute with attrName, set attribute value and return that attribute
* @param parent parent node that can perform sibling insertions
* @param nodename name of the node under which attribute attrName shall exist
* @param attrName name of the attribute to set for node nodeName
* @param attrVal value to assign to the attribute
* @param removeAttributes if true, all other attributes of the node with nodeName will be deleted
* @param nodeOrder optional vector of a predefined element node sequence required by MS Office, passed through to appendAndGetNode
* @returns the XMLAttribute that was modified or an empty node if the operation failed
*/
inline XMLAttribute appendAndSetNodeAttribute(XMLNode & parent, std::string const & nodeName, std::string const & attrName, std::string const & attrVal,
/**/ bool removeAttributes = XLKeepAttributes, std::vector< std::string_view > const & nodeOrder = {})
{
if (parent.empty()) return XMLAttribute{};
XMLNode node = appendAndGetNode(parent, nodeName, nodeOrder);
if (removeAttributes) node.remove_attributes();
return appendAndSetAttribute(node, attrName, attrVal);
}
} // namespace OpenXLSX
#endif // OPENXLSX_XLUTILITIES_HPP