Skip to content

Commit

Permalink
re #480 : improve deserialization of empty strings
Browse files Browse the repository at this point in the history
  - Deserializing an empty quoted string *will not* cause an error. Previously, this was causing an error.
  - Deserializing an empty string *will* cause an error.
  - Ensure keys are deserialized using all the rules applying to vals: add readkey() analogs to read().
  - Added `KEYNIL` and `VALNIL` to `NodeType_e`, used by the parser to mark the key or val as empty. This changed the values of the `NodeType_e` enumeration.
  - Added `NodeType::key_is_null()` and `NodeType::val_is_null()`.
  • Loading branch information
biojppm committed Jan 19, 2025
1 parent 75e1034 commit e7fe887
Show file tree
Hide file tree
Showing 31 changed files with 1,080 additions and 436 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/infra.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
runs-on: ubuntu-24.04
steps:
- name: log github event
run: echo "${{toJSON(github.event)}}"
run: echo "${{toJSON(github.event)}}" || echo >/dev/null
check_workflows:
if: always()
continue-on-error: false
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/infra.ys
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
runs-on: ubuntu-24.04
steps:
- name: log github event
run: echo "${{toJSON(github.event)}}"
run: echo "${{toJSON(github.event)}}" || echo >/dev/null

check_workflows:
:: setup-job('infra' 'check_workflows')
Expand Down
14 changes: 13 additions & 1 deletion changelog/current.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,18 @@
## Fixes

- Fix [#480](https://github.com/biojppm/rapidyaml/issues/480) ([PR#489](https://github.com/biojppm/rapidyaml/pull/489)):
- Deserializing an empty quoted string *will not* cause an error.
- Deserializing an empty string *will* cause an error.
- Ensure keys are deserialized using all the rules applying to vals.
- Added `KEYNIL` and `VALNIL` to `NodeType_e`, used by the parser to mark the key or val as empty. This changed the values of the `NodeType_e` enumeration.
- Added `NodeType::key_is_null()` and `NodeType::val_is_null()`.
- [PR#488](https://github.com/biojppm/rapidyaml/pull/488):
- add workarounds for problems with codegen of gcc 11,12,13
- improve CI coverage of gcc and clang optimization levels
- [BREAKING] Fix [#477](https://github.com/biojppm/rapidyaml/issues/477): changed `read<std::map>()` to overwrite existing entries. The provided implementations had an inconsistency between `std::map` (which wasn't overwriting) and `std::vector` (which *was* overwriting).
- [BREAKING] Fix [#477](https://github.com/biojppm/rapidyaml/issues/477) ([PR#479](https://github.com/biojppm/rapidyaml/pull/479)): changed `read<std::map>()` to overwrite existing entries. The provided implementations had an inconsistency between `std::map` (which wasn't overwriting) and `std::vector` (which *was* overwriting).


## Thanks

- @Delian0
- @perlpunk
2 changes: 1 addition & 1 deletion ext/c4core
21 changes: 11 additions & 10 deletions src/c4/yml/emit.def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,18 +147,19 @@ void Emitter<Writer>::_emit_yaml(id_type id)
template<class Writer>
void Emitter<Writer>::_write_doc(id_type id)
{
RYML_ASSERT(m_tree->is_doc(id));
RYML_ASSERT(!m_tree->has_key(id));
const NodeType ty = m_tree->type(id);
RYML_ASSERT(ty.is_doc());
RYML_ASSERT(!ty.has_key());
if(!m_tree->is_root(id))
{
RYML_ASSERT(m_tree->is_stream(m_tree->parent(id)));
this->Writer::_do_write("---");
}
//
if(!m_tree->has_val(id)) // this is more frequent
if(!ty.has_val()) // this is more frequent
{
const bool tag = m_tree->has_val_tag(id);
const bool anchor = m_tree->has_val_anchor(id);
const bool tag = ty.has_val_tag();
const bool anchor = ty.has_val_anchor();
if(!tag && !anchor)
{
;
Expand Down Expand Up @@ -199,20 +200,20 @@ void Emitter<Writer>::_write_doc(id_type id)
}
else // docval
{
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->has_val(id));
_RYML_CB_ASSERT(m_tree->callbacks(), ty.has_val());
// some plain scalars such as '...' and '---' must not
// appear at 0-indentation
const csubstr val = m_tree->val(id);
const bool preceded_by_3_dashes = !m_tree->is_root(id);
const type_bits style_marks = m_tree->type(id) & (KEY_STYLE|VAL_STYLE);
const bool is_plain = m_tree->type(id).is_val_plain();
const type_bits style_marks = ty & VAL_STYLE;
const bool is_plain = ty.is_val_plain();
const bool is_ambiguous = (is_plain || !style_marks)
&& ((val.begins_with("...") || val.begins_with("---"))
||
(val.find('\n') != npos));
if(preceded_by_3_dashes)
{
if(val.len == 0 && !m_tree->has_val_anchor(id) && !m_tree->has_val_tag(id))
if(is_plain && val.len == 0 && !ty.has_val_anchor() && !ty.has_val_tag())
{
this->Writer::_do_write('\n');
return;
Expand Down Expand Up @@ -607,7 +608,7 @@ void Emitter<Writer>::_write(NodeScalar const& C4_RESTRICT sc, NodeType flags, i
}

// ensure the style flags only have one of KEY or VAL
_RYML_CB_ASSERT(m_tree->callbacks(), ((flags & SCALAR_STYLE) == 0) || (((flags&KEY_STYLE) == 0) != ((flags&VAL_STYLE) == 0)));
_RYML_CB_ASSERT(m_tree->callbacks(), ((flags & SCALAR_STYLE) == 0) || (((flags & KEY_STYLE) == 0) != ((flags & VAL_STYLE) == 0)));
type_bits style_marks = flags & SCALAR_STYLE;
if(!style_marks)
style_marks = scalar_style_choose(sc.scalar);
Expand Down
4 changes: 2 additions & 2 deletions src/c4/yml/emit.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,6 @@ inline OStream& operator<< (OStream& s, as_yaml const& y)
* @param id the node where to start emitting.
* @param opts emit options.
* @param buf the output buffer.
* @param opts emit options.
* @param error_on_excess Raise an error if the space in the buffer is insufficient.
* @return a substr trimmed to the result in the output buffer. If the buffer is
* insufficient (when error_on_excess is false), the string pointer of the
Expand All @@ -493,7 +492,6 @@ inline substr emit_yaml(Tree const& t, id_type id, substr buf, bool error_on_exc
* @param id the node where to start emitting.
* @param opts emit options.
* @param buf the output buffer.
* @param opts emit options.
* @param error_on_excess Raise an error if the space in the buffer is insufficient.
* @return a substr trimmed to the result in the output buffer. If the buffer is
* insufficient (when error_on_excess is false), the string pointer of the
Expand All @@ -515,6 +513,7 @@ inline substr emit_json(Tree const& t, id_type id, substr buf, bool error_on_exc

/** (1) emit YAML to the given buffer. Return a substr trimmed to the emitted YAML.
* @param t the tree; will be emitted from the root node.
* @param opts emit options.
* @param buf the output buffer.
* @param error_on_excess Raise an error if the space in the buffer is insufficient.
* @return a substr trimmed to the result in the output buffer. If the buffer is
Expand All @@ -533,6 +532,7 @@ inline substr emit_yaml(Tree const& t, substr buf, bool error_on_excess=true)
}
/** (1) emit JSON to the given buffer. Return a substr trimmed to the emitted JSON.
* @param t the tree; will be emitted from the root node.
* @param opts emit options.
* @param buf the output buffer.
* @param error_on_excess Raise an error if the space in the buffer is insufficient.
* @return a substr trimmed to the result in the output buffer. If the buffer is
Expand Down
13 changes: 13 additions & 0 deletions src/c4/yml/event_handler_tree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,19 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle
/** @{ */


C4_ALWAYS_INLINE void set_key_scalar_plain_empty() noexcept
{
_c4dbgpf("node[{}]: set key scalar plain as empty", m_curr->node_id);
m_curr->tr_data->m_key.scalar = {};
_enable_(KEY|KEY_PLAIN|KEYNIL);
}
C4_ALWAYS_INLINE void set_val_scalar_plain_empty() noexcept
{
_c4dbgpf("node[{}]: set val scalar plain as empty", m_curr->node_id);
m_curr->tr_data->m_val.scalar = {};
_enable_(VAL|VAL_PLAIN|VALNIL);
}

C4_ALWAYS_INLINE void set_key_scalar_plain(csubstr scalar) noexcept
{
_c4dbgpf("node[{}]: set key scalar plain: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast<void const*>(scalar.str));
Expand Down
188 changes: 53 additions & 135 deletions src/c4/yml/node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,13 @@ template<class K> C4_ALWAYS_INLINE Key<K> key(K & k) { return Key<K>{k}; }
C4_ALWAYS_INLINE Key<fmt::const_base64_wrapper> key(fmt::const_base64_wrapper w) { return {w}; }
C4_ALWAYS_INLINE Key<fmt::base64_wrapper> key(fmt::base64_wrapper w) { return {w}; }

template<class T> void write(NodeRef *n, T const& v);

template<class T>
typename std::enable_if< ! std::is_floating_point<T>::value, bool>::type
read(NodeRef const& n, T *v);
template<class T> void write(NodeRef *n, T const& v);

template<class T>
typename std::enable_if< std::is_floating_point<T>::value, bool>::type
read(NodeRef const& n, T *v);
template<class T> inline bool read(ConstNodeRef const& C4_RESTRICT n, T *v);
template<class T> inline bool read(NodeRef const& C4_RESTRICT n, T *v);
template<class T> inline bool readkey(ConstNodeRef const& C4_RESTRICT n, T *v);
template<class T> inline bool readkey(NodeRef const& C4_RESTRICT n, T *v);

/** @} */

Expand Down Expand Up @@ -328,8 +326,7 @@ struct RoNodeMethods

template<class U=Impl>
C4_ALWAYS_INLINE auto doc(id_type i) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { RYML_ASSERT(tree_); return {tree__, tree__->doc(i)}; } /**< Forward to @ref Tree::doc(). Node must be readable. */
/** succeeds even when the node may have invalid or seed id */
C4_ALWAYS_INLINE ConstImpl doc(id_type i) const RYML_NOEXCEPT { RYML_ASSERT(tree_); return {tree_, tree_->doc(i)}; } /**< Forward to @ref Tree::doc(). Node must be readable. */
C4_ALWAYS_INLINE ConstImpl doc(id_type i) const RYML_NOEXCEPT { RYML_ASSERT(tree_); return {tree_, tree_->doc(i)}; } /**< Forward to @ref Tree::doc(). Node must be readable. succeeds even when the node may have invalid or seed id */

template<class U=Impl>
C4_ALWAYS_INLINE auto parent() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->parent(id__)}; } /**< Forward to @ref Tree::parent(). Node must be readable. */
Expand Down Expand Up @@ -633,42 +630,11 @@ struct RoNodeMethods
ConstImpl const& operator>> (Key<T> v) const
{
_C4RR();
if(key().empty() || ! from_chars(key(), &v.k))
if( ! readkey((ConstImpl const&)*this, &v.k))
_RYML_CB_ERR(tree_->m_callbacks, "could not deserialize key");
return *((ConstImpl const*)this);
}

/** deserialize the node's key as base64. lightweight wrapper over @ref deserialize_key() */
ConstImpl const& operator>> (Key<fmt::base64_wrapper> w) const
{
deserialize_key(w.wrapper);
return *((ConstImpl const*)this);
}

/** deserialize the node's val as base64. lightweight wrapper over @ref deserialize_val() */
ConstImpl const& operator>> (fmt::base64_wrapper w) const
{
deserialize_val(w);
return *((ConstImpl const*)this);
}

/** decode the base64-encoded key and assign the
* decoded blob to the given buffer/
* @return the size of base64-decoded blob */
size_t deserialize_key(fmt::base64_wrapper v) const
{
_C4RR();
return from_chars(key(), &v);
}
/** decode the base64-encoded key and assign the
* decoded blob to the given buffer/
* @return the size of base64-decoded blob */
size_t deserialize_val(fmt::base64_wrapper v) const
{
_C4RR();
return from_chars(val(), &v);
};

/** look for a child by name, if it exists assign to var. return
* true if the child existed. */
template<class T>
Expand Down Expand Up @@ -702,6 +668,42 @@ struct RoNodeMethods
}
}

/** @name deserialization_base64 */
/** @{ */

/** deserialize the node's key as base64. lightweight wrapper over @ref deserialize_key() */
ConstImpl const& operator>> (Key<fmt::base64_wrapper> w) const
{
deserialize_key(w.wrapper);
return *((ConstImpl const*)this);
}

/** deserialize the node's val as base64. lightweight wrapper over @ref deserialize_val() */
ConstImpl const& operator>> (fmt::base64_wrapper w) const
{
deserialize_val(w);
return *((ConstImpl const*)this);
}

/** decode the base64-encoded key and assign the
* decoded blob to the given buffer/
* @return the size of base64-decoded blob */
size_t deserialize_key(fmt::base64_wrapper v) const
{
_C4RR();
return from_chars(key(), &v);
}
/** decode the base64-encoded key and assign the
* decoded blob to the given buffer/
* @return the size of base64-decoded blob */
size_t deserialize_val(fmt::base64_wrapper v) const
{
_C4RR();
return from_chars(val(), &v);
};

/** @} */

/** @} */

public:
Expand Down Expand Up @@ -1603,117 +1605,33 @@ inline ConstNodeRef& ConstNodeRef::operator= (NodeRef && that) noexcept // NOLIN
*/

template<class T>
inline void write(NodeRef *n, T const& v)
C4_ALWAYS_INLINE void write(NodeRef *n, T const& v)
{
n->set_val_serialized(v);
}

namespace detail {
// SFINAE overloads for skipping leading + which cannot be read by the charconv functions
template<class T>
C4_ALWAYS_INLINE auto read_skip_plus(csubstr val, T *v)
-> typename std::enable_if<std::is_arithmetic<T>::value, bool>::type
C4_ALWAYS_INLINE bool read(ConstNodeRef const& C4_RESTRICT n, T *v)
{
if(val.begins_with('+'))
val = val.sub(1);
return from_chars(val, v);
return read(n.m_tree, n.m_id, v);
}
template<class T>
C4_ALWAYS_INLINE auto read_skip_plus(csubstr val, T *v)
-> typename std::enable_if< ! std::is_arithmetic<T>::value, bool>::type
{
return from_chars(val, v);
}
} // namespace detail

/** convert the val of a scalar node to a particular type, by
* forwarding its val to @ref from_chars<T>(). The full string is
* used.
* @return false if the conversion failed */
template<class T>
inline auto read(NodeRef const& n, T *v)
-> typename std::enable_if< ! std::is_floating_point<T>::value, bool>::type
{
csubstr val = n.val();
if(val.empty())
return false;
return detail::read_skip_plus(val, v);
}
/** convert the val of a scalar node to a particular type, by
* forwarding its val to @ref from_chars<T>(). The full string is
* used.
* @return false if the conversion failed */
template<class T>
inline auto read(ConstNodeRef const& n, T *v)
-> typename std::enable_if< ! std::is_floating_point<T>::value, bool>::type
C4_ALWAYS_INLINE bool read(NodeRef const& C4_RESTRICT n, T *v)
{
csubstr val = n.val();
if(val.empty())
return false;
return detail::read_skip_plus(val, v);
return read(n.tree(), n.id(), v);
}

/** convert the val of a scalar node to a floating point type, by
* forwarding its val to @ref from_chars_float<T>().
*
* @return false if the conversion failed
*
* @warning Unlike non-floating types, only the leading part of the
* string that may constitute a number is processed. This happens
* because the float parsing is delegated to fast_float, which is
* implemented that way. Consequently, for example, all of `"34"`,
* `"34 "` `"34hg"` `"34 gh"` will be read as 34. If you are not sure
* about the contents of the data, you can use
* csubstr::first_real_span() to check before calling `>>`, for
* example like this:
*
* ```cpp
* csubstr val = node.val();
* if(val.first_real_span() == val)
* node >> v;
* else
* ERROR("not a real")
* ```
*/
template<class T>
typename std::enable_if<std::is_floating_point<T>::value, bool>::type
inline read(NodeRef const& n, T *v)
C4_ALWAYS_INLINE bool readkey(ConstNodeRef const& C4_RESTRICT n, T *v)
{
csubstr val = n.val();
if(val.empty())
return false;
return from_chars_float(val, v);
return readkey(n.m_tree, n.m_id, v);
}
/** convert the val of a scalar node to a floating point type, by
* forwarding its val to @ref from_chars_float<T>().
*
* @return false if the conversion failed
*
* @warning Unlike non-floating types, only the leading part of the
* string that may constitute a number is processed. This happens
* because the float parsing is delegated to fast_float, which is
* implemented that way. Consequently, for example, all of `"34"`,
* `"34 "` `"34hg"` `"34 gh"` will be read as 34. If you are not sure
* about the contents of the data, you can use
* csubstr::first_real_span() to check before calling `>>`, for
* example like this:
*
* ```cpp
* csubstr val = node.val();
* if(val.first_real_span() == val)
* node >> v;
* else
* ERROR("not a real")
* ```
*/

template<class T>
typename std::enable_if<std::is_floating_point<T>::value, bool>::type
inline read(ConstNodeRef const& n, T *v)
C4_ALWAYS_INLINE bool readkey(NodeRef const& C4_RESTRICT n, T *v)
{
csubstr val = n.val();
if(val.empty())
return false;
return from_chars_float(val, v);
return readkey(n.tree(), n.id(), v);
}

/** @} */
Expand Down
Loading

0 comments on commit e7fe887

Please sign in to comment.