Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support unicode idents (matching rust) #444

Closed
wants to merge 15 commits into from
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ indexmap = { version = "1.9", features = ["serde-1"], optional = true }
# serde supports i128/u128 from 1.0.60 onwards
serde = "1.0.60"
serde_derive = "1.0"
unicode-ident = "1.0.8"

[dev-dependencies]
serde = { version = "1.0", features = ["derive"] }
Expand Down
6 changes: 3 additions & 3 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
return self.handle_any_struct(visitor);
}

match self.bytes.peek_or_eof()? {
match self.bytes.peek_byte_or_eof()? {
b'(' => self.handle_any_struct(visitor),
b'[' => self.deserialize_seq(visitor),
b'{' => self.deserialize_map(visitor),
Expand Down Expand Up @@ -651,7 +651,7 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
where
V: Visitor<'de>,
{
let identifier = str::from_utf8(self.bytes.identifier()?).map_err(Error::from)?;
let identifier = self.bytes.identifier()?;

self.last_identifier = Some(identifier);

Expand Down Expand Up @@ -686,7 +686,7 @@ impl<'a, 'de> CommaSeparated<'a, 'de> {

match (
self.had_comma,
self.de.bytes.peek_or_eof()? != self.terminator,
self.de.bytes.peek_byte_or_eof()? != self.terminator,
) {
// Trailing comma, maybe has a next element
(true, has_element) => Ok(has_element),
Expand Down
36 changes: 31 additions & 5 deletions src/error.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
use std::{error::Error as StdError, fmt, io, str::Utf8Error, string::FromUtf8Error};
use std::{
error::Error as StdError,
fmt, io,
str::{self, Utf8Error},
string::FromUtf8Error,
};

use serde::{de, ser};
use unicode_ident::is_xid_continue;

use crate::parse::{is_ident_first_char, is_ident_other_char, is_ident_raw_char, BASE64_ENGINE};
use crate::parse::{is_ident_first_char, is_ident_raw_char, BASE64_ENGINE};

/// This type represents all possible errors that can occur when
/// serializing or deserializing RON data.
Expand Down Expand Up @@ -92,6 +98,7 @@ pub enum Error {
SuggestRawIdentifier(String),
ExpectedRawValue,
ExceededRecursionLimit,
NoCharBoundary,
}

impl fmt::Display for SpannedError {
Expand Down Expand Up @@ -252,6 +259,7 @@ impl fmt::Display for Error {
),
Error::ExpectedRawValue => f.write_str("Expected a `ron::value::RawValue`"),
Error::ExceededRecursionLimit => f.write_str("Exceeded recursion limit, try increasing the limit and using `serde_stacker` to protect against a stack overflow"),
Error::NoCharBoundary => f.write_str("Tried to index source inside of char boundry"),
}
}
}
Expand Down Expand Up @@ -399,6 +407,24 @@ impl From<SpannedError> for Error {
}
}

impl SpannedError {
pub(crate) fn from_utf8_error(error: Utf8Error, source: &[u8]) -> Self {
let source =
str::from_utf8(&source[..error.valid_up_to()]).expect("source is valid up to error");
let line = 1 + source.chars().filter(|&c| c == '\n').count();
let col = 1 + source
.rsplit('\n')
.next()
.expect("rsplit always yields at least one value")
.chars()
.count();
Self {
code: error.into(),
position: Position { line, col },
}
}
}

struct OneOf {
alts: &'static [&'static str],
none: &'static str,
Expand Down Expand Up @@ -432,13 +458,13 @@ struct Identifier<'a>(&'a str);

impl<'a> fmt::Display for Identifier<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.0.is_empty() || !self.0.as_bytes().iter().copied().all(is_ident_raw_char) {
if self.0.is_empty() || !self.0.chars().all(is_ident_raw_char) {
return write!(f, "{:?}_[invalid identifier]", self.0);
}

let mut bytes = self.0.as_bytes().iter().copied();
let mut chars = self.0.chars();

if !bytes.next().map_or(false, is_ident_first_char) || !bytes.all(is_ident_other_char) {
if !chars.next().map_or(false, is_ident_first_char) || !chars.all(is_xid_continue) {
write!(f, "`r#{}`", self.0)
} else {
write!(f, "`{}`", self.0)
Expand Down
8 changes: 4 additions & 4 deletions src/extensions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ bitflags::bitflags! {

impl Extensions {
/// Creates an extension flag from an ident.
pub fn from_ident(ident: &[u8]) -> Option<Extensions> {
pub fn from_ident(ident: &str) -> Option<Extensions> {
match ident {
b"unwrap_newtypes" => Some(Extensions::UNWRAP_NEWTYPES),
b"implicit_some" => Some(Extensions::IMPLICIT_SOME),
b"unwrap_variant_newtypes" => Some(Extensions::UNWRAP_VARIANT_NEWTYPES),
"unwrap_newtypes" => Some(Extensions::UNWRAP_NEWTYPES),
"implicit_some" => Some(Extensions::IMPLICIT_SOME),
"unwrap_variant_newtypes" => Some(Extensions::UNWRAP_VARIANT_NEWTYPES),
_ => None,
}
}
Expand Down
Loading