Skip to content

Commit

Permalink
Makes Turtle parser pass the full W3C testsuite
Browse files Browse the repository at this point in the history
  • Loading branch information
Tpt committed Jul 28, 2019
1 parent 4aaca7c commit 0d2762c
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 83 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@ Rio is a library aiming at providing conformant and fast parsers for RDF related

It currently provides [N-Triples](https://www.w3.org/TR/n-triples/) and [Turtle](https://www.w3.org/TR/turtle/) parsers.

It is not done to be used directly, but to be embedded inside of RDF libraries written in Rust, or exposed to other programming languages.
It is design primarily to be embedded inside of RDF libraries written in Rust, or exposed to other programming languages.

It provides multiple crates:
* `rio_api` provides common traits and data structures to be used in Rio parsers (`Triple`, `TripleParser`...).
* `rio_turtle` provides a conformant streaming [N-Triples](https://www.w3.org/TR/n-triples/) parser and a work in progress [Turtle](https://www.w3.org/TR/turtle/) parser.
* `rio_turtle` provides conformant streaming parsers for [Turtle](https://www.w3.org/TR/turtle/) and [N-Triples](https://www.w3.org/TR/n-triples/) formats.

There is also the `rio_testsuite` crate that is used for testing Rio parsers against the [W3C RDF tests](http://w3c.github.io/rdf-tests/) to ensure their conformance.
It provides both an executable for building implementation reports and integration test to quickly ensure that the parsers stay conformant.
It is not designed to be used outside of Rio.


## License
Expand Down
21 changes: 4 additions & 17 deletions testsuite/tests/w3c_testsuite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@ fn get_test_path() -> PathBuf {
return base_path;
}

fn run_testsuite(
manifest_uri: String,
number_of_expected_errors: usize,
) -> Result<(), Box<dyn Error>> {
fn run_testsuite(manifest_uri: String) -> Result<(), Box<dyn Error>> {
let test_path = get_test_path();
let manifest = TestManifest::new(manifest_uri, |url| parse_w3c_rdf_test_file(url, &test_path));

Expand All @@ -26,26 +23,16 @@ fn run_testsuite(
}
}

assert!(
errors.len() <= number_of_expected_errors,
"\n{}\n",
errors.join("\n")
);
assert!(errors.is_empty(), "\n{}\n", errors.join("\n"));
Ok(())
}

#[test]
fn ntriples_w3c_testsuite() -> Result<(), Box<dyn Error>> {
run_testsuite(
"http://w3c.github.io/rdf-tests/ntriples/manifest.ttl".to_owned(),
0,
)
run_testsuite("http://w3c.github.io/rdf-tests/ntriples/manifest.ttl".to_owned())
}

#[test]
fn turtle_w3c_testsuite() -> Result<(), Box<dyn Error>> {
run_testsuite(
"http://w3c.github.io/rdf-tests/turtle/manifest.ttl".to_owned(),
2,
)
run_testsuite("http://w3c.github.io/rdf-tests/turtle/manifest.ttl".to_owned())
}
76 changes: 57 additions & 19 deletions turtle/src/iri.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,12 @@ pub fn resolve_relative_iri(
if base_positions.authority_end > base_positions.scheme_end
&& base_positions.path_end == base_positions.authority_end
{
target_buffer.push(b'/');
append_and_remove_dot_segments_with_extra_slash(
&reference_iri
[reference_positions.authority_end..reference_positions.path_end],
target_buffer,
path_start_in_target,
);
} else {
let last_base_slash = base_iri
[base_positions.authority_end..base_positions.path_end]
Expand All @@ -122,13 +127,23 @@ pub fn resolve_relative_iri(
target_buffer,
path_start_in_target,
);
let to_add = &reference_iri
[reference_positions.authority_end..reference_positions.path_end];
if target_buffer.ends_with(b"/") {
target_buffer.pop();
append_and_remove_dot_segments_with_extra_slash(
to_add,
target_buffer,
path_start_in_target,
);
} else {
append_and_remove_dot_segments(
to_add,
target_buffer,
path_start_in_target,
);
}
}
append_and_remove_dot_segments(
&reference_iri
[reference_positions.authority_end..reference_positions.path_end],
target_buffer,
path_start_in_target,
);
}
// T.query = R.query;
target_buffer.extend_from_slice(
Expand All @@ -148,28 +163,25 @@ pub fn resolve_relative_iri(
fn append_and_remove_dot_segments(
mut input: &[u8],
output: &mut Vec<u8>,
path_start_in_output: usize, //protects the authority before this position
path_start_in_output: usize,
) {
while !input.is_empty() {
if input.starts_with(b"../") {
pop_last_segment(output, path_start_in_output);
input = &input[3..];
} else if input.starts_with(b"./") || input.starts_with(b"/./") {
input = &input[2..];
} else if input == b"/." {
input = b"/";
} else if input == b"." {
input = b"";
} else if input.starts_with(b"/../") {
pop_last_segment(output, path_start_in_output);
input = &input[3..];
} else if input == b"/.." || input == b".." {
} else if input == b"/.." {
pop_last_segment(output, path_start_in_output);
input = b"/";
} else if input == b"." || input == b".." {
input = b"";
} else {
if input[0] != b'/' || output.last() != Some(&b'/') {
output.push(input[0]);
}
output.push(input[0]);
input = &input[1..];
while !input.is_empty() && input[0] != b'/' {
output.push(input[0]);
Expand All @@ -180,14 +192,40 @@ fn append_and_remove_dot_segments(
}

fn pop_last_segment(buffer: &mut Vec<u8>, path_start_in_buffer: usize) {
let init_len = buffer.len();
for i in (path_start_in_buffer..init_len).rev() {
if buffer[i] == b'/' && i != init_len - 1 {
for i in (path_start_in_buffer..buffer.len()).rev() {
if buffer[i] == b'/' {
buffer.pop();
return;
}
buffer.pop();
}
buffer.push(b'/') // Ensures there is always a /
}

fn append_and_remove_dot_segments_with_extra_slash(
mut input: &[u8],
output: &mut Vec<u8>,
path_start_in_output: usize,
) {
if input.is_empty() {
output.push(b'/');
} else if input.starts_with(b"./") {
append_and_remove_dot_segments(&input[1..], output, path_start_in_output)
} else if input == b"." {
append_and_remove_dot_segments(b"/", output, path_start_in_output)
} else if input.starts_with(b"../") {
pop_last_segment(output, path_start_in_output);
append_and_remove_dot_segments(&input[2..], output, path_start_in_output)
} else if input == b".." {
pop_last_segment(output, path_start_in_output);
append_and_remove_dot_segments(b"/", output, path_start_in_output)
} else {
output.push(b'/');
while !input.is_empty() && input[0] != b'/' {
output.push(input[0]);
input = &input[1..];
}
append_and_remove_dot_segments(input, output, path_start_in_output)
}
}

fn parse_iri(value: &[u8], start: usize) -> Result<IriElementsPositions, usize> {
Expand Down
Loading

0 comments on commit 0d2762c

Please sign in to comment.