From f033882e455f577cc422623a1e0a2f3cfcda5c1c Mon Sep 17 00:00:00 2001 From: Pieter-Jan Briers Date: Fri, 17 Sep 2021 18:27:27 +0200 Subject: [PATCH 1/3] Handle files with BOM in lexer. Not sure if this is the best way to go about it, but it seems to work. --- src/dreammaker/lexer.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/dreammaker/lexer.rs b/src/dreammaker/lexer.rs index 1bdd3ff1..2ba037ae 100644 --- a/src/dreammaker/lexer.rs +++ b/src/dreammaker/lexer.rs @@ -482,6 +482,10 @@ enum Directive { Stringy, } +fn has_bom(slice: &[u8]) -> bool { + slice.len() > 3 && slice[0] == 0xEF && slice[1] == 0xBB && slice[2] == 0xBF +} + fn buffer_read(file: FileId, mut read: R) -> Result, DMError> { let mut buffer = Vec::new(); @@ -630,9 +634,14 @@ impl<'ctx> HasLocation for Lexer<'ctx> { impl<'ctx> Lexer<'ctx> { /// Create a new lexer from a byte stream. pub fn new>>(context: &'ctx Context, file_number: FileId, input: I) -> Self { + let mut cow = input.into(); + if has_bom(&cow) { + cow = Cow::from(cow[3..].to_owned()); + } + Lexer { context, - input: LocationTracker::new(file_number, input.into()), + input: LocationTracker::new(file_number, cow), next: None, final_newline: false, at_line_head: true, From 0de25259bb4b1aac3f416b0c72dbd1282e9b6954 Mon Sep 17 00:00:00 2001 From: Pieter-Jan Briers Date: Sat, 18 Sep 2021 22:39:32 +0200 Subject: [PATCH 2/3] Don't reallocate borrowed Cow, use starts_with() --- src/dreammaker/lexer.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/dreammaker/lexer.rs b/src/dreammaker/lexer.rs index 2ba037ae..49ec0ebe 100644 --- a/src/dreammaker/lexer.rs +++ b/src/dreammaker/lexer.rs @@ -483,7 +483,7 @@ enum Directive { } fn has_bom(slice: &[u8]) -> bool { - slice.len() > 3 && slice[0] == 0xEF && slice[1] == 0xBB && slice[2] == 0xBF + slice.starts_with(b"\xEF\xBB\xBF") } fn buffer_read(file: FileId, mut read: R) -> Result, DMError> { @@ -636,7 +636,10 @@ impl<'ctx> Lexer<'ctx> { pub fn new>>(context: &'ctx Context, file_number: FileId, input: I) -> Self { let mut cow = input.into(); if has_bom(&cow) { - cow = Cow::from(cow[3..].to_owned()); + cow = match cow { + Cow::Borrowed(b) => Cow::from(&b[3..]), + Cow::Owned(o) => Cow::from(o[3..].to_owned()) + }; } Lexer { From 8e26cf7ffd2547a50cee78556ad8eed806cf29f0 Mon Sep 17 00:00:00 2001 From: Pieter-Jan Briers Date: Sat, 18 Sep 2021 23:12:55 +0200 Subject: [PATCH 3/3] Use vec drain for BOM skipping. Still a memmove but better than a full realloc. --- src/dreammaker/lexer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dreammaker/lexer.rs b/src/dreammaker/lexer.rs index 49ec0ebe..d24c5dad 100644 --- a/src/dreammaker/lexer.rs +++ b/src/dreammaker/lexer.rs @@ -638,7 +638,7 @@ impl<'ctx> Lexer<'ctx> { if has_bom(&cow) { cow = match cow { Cow::Borrowed(b) => Cow::from(&b[3..]), - Cow::Owned(o) => Cow::from(o[3..].to_owned()) + Cow::Owned(mut o) => { o.drain(..3); Cow::Owned(o) } }; }