Skip to content

Commit

Permalink
Require spaces between prolog attrs
Browse files Browse the repository at this point in the history
  • Loading branch information
kornelski committed May 22, 2023
1 parent 535914e commit 1eefdc9
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 3 deletions.
20 changes: 20 additions & 0 deletions src/reader/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,10 +249,12 @@ pub enum DeclarationSubstate {
InsideVersionValue,
AfterVersionValue,

BeforeEncoding,
InsideEncoding,
AfterEncoding,

InsideEncodingValue,
AfterEncodingValue,

BeforeStandaloneDecl,
InsideStandaloneDecl,
Expand Down Expand Up @@ -730,6 +732,24 @@ mod tests {
expect_event!(r, p, Ok(XmlEvent::EndDocument));
}

#[test]
fn malformed_declaration_attrs() {
let (mut r, mut p) = test_data!(r#"<?xml version x="1.0"?>"#);
expect_event!(r, p, Err(_));

let (mut r, mut p) = test_data!(r#"<?xml version="1.0" version="1.0"?>"#);
expect_event!(r, p, Err(_));

let (mut r, mut p) = test_data!(r#"<?xml version="1.0"encoding="utf-8"?>"#);
expect_event!(r, p, Err(_));

let (mut r, mut p) = test_data!(r#"<?xml version="1.0"standalone="yes"?>"#);
expect_event!(r, p, Err(_));

let (mut r, mut p) = test_data!(r#"<?xml version="1.0" encoding="utf-8"standalone="yes"?>"#);
expect_event!(r, p, Err(_));
}

#[test]
fn opening_tag_in_attribute_value() {
use crate::reader::error::{SyntaxError, Error, ErrorKind};
Expand Down
14 changes: 13 additions & 1 deletion src/reader/parser/inside_declaration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,12 @@ impl PullParser {
}),

DeclarationSubstate::AfterVersionValue => match t {
Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeEncoding)),
Token::ProcessingInstructionEnd => self.emit_start_document(),
_ => Some(self.error(SyntaxError::UnexpectedToken(t))),
},

DeclarationSubstate::BeforeEncoding => match t {
Token::Character('e') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncoding)),
Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)),
Token::ProcessingInstructionEnd => self.emit_start_document(),
Expand All @@ -114,9 +120,15 @@ impl PullParser {

DeclarationSubstate::InsideEncodingValue => self.read_attribute_value(t, |this, value| {
this.data.encoding = Some(value);
this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeStandaloneDecl))
this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterEncodingValue))
}),

DeclarationSubstate::AfterEncodingValue => match t {
Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeStandaloneDecl)),
Token::ProcessingInstructionEnd => self.emit_start_document(),
_ => Some(self.error(SyntaxError::UnexpectedToken(t))),
},

DeclarationSubstate::BeforeStandaloneDecl => match t {
Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)),
Token::ProcessingInstructionEnd => self.emit_start_document(),
Expand Down
1 change: 0 additions & 1 deletion tests/oasis.fail.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ o-p12fail6 p12fail6.xml built-in entity refs excluded
o-p12fail7 p12fail7.xml The public ID has a tab character, which is disallowed
o-p30fail1 p30fail1.xml An XML declaration is not the same as a TextDecl
o-p31fail1 p31fail1.xml external subset excludes doctypedecl
o-p32fail3 p32fail3.xml initial S is required
o-p40fail1 p40fail1.xml S is required between attributes
o-p44fail4 p44fail4.xml Whitespace required between attributes.
o-p45fail2 p45fail2.xml S before contentspec is required.
Expand Down
1 change: 0 additions & 1 deletion tests/xmltest.fail.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ not-wf-sa-086 086.xml Public IDs may not contain "[".
not-wf-sa-087 087.xml Public IDs may not contain "[".
not-wf-sa-089 089.xml Parameter entities "are" always parsed; NDATA annotations are not permitted.
not-wf-sa-091 091.xml Parameter entities "are" always parsed; NDATA annotations are not permitted.
not-wf-sa-096 096.xml Space is required before the standalone declaration.
not-wf-sa-104 104.xml Internal general parsed entities are only well formed if they match the "content" production.
not-wf-sa-115 115.xml The replacement text of this entity is an illegal character reference, which must be rejected when it is parsed in the context of an attribute value.
not-wf-sa-116 116.xml Internal general parsed entities are only well formed if they match the "content" production. This is a partial character reference, not a full one.
Expand Down

0 comments on commit 1eefdc9

Please sign in to comment.