diff --git a/core/parser/src/source/utf8.rs b/core/parser/src/source/utf8.rs index f5e96f44cb4..41c8404b1c5 100644 --- a/core/parser/src/source/utf8.rs +++ b/core/parser/src/source/utf8.rs @@ -28,27 +28,26 @@ impl ReadChar for UTF8Input { fn next_char(&mut self) -> io::Result> { // Decode UTF-8 let x = match self.next_byte()? { - Some(b) if b < 128 => return Ok(Some(u32::from(b))), - Some(b) => b, - None => return Ok(None), + Some(b) if b >= 128 => b, // UTF-8 codepoint + b => return Ok(b.map(u32::from)), // ASCII or None }; // Multibyte case follows // Decode from a byte combination out of: [[[x y] z] w] // NOTE: Performance is sensitive to the exact formulation here let init = utf8_first_byte(x, 2); - let y = unwrap_or_0(self.next_byte()?); + let y = self.next_byte()?.unwrap_or(0); let mut ch = utf8_acc_cont_byte(init, y); if x >= 0xE0 { // [[x y z] w] case // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid - let z = unwrap_or_0(self.next_byte()?); + let z = self.next_byte()?.unwrap_or(0); let y_z = utf8_acc_cont_byte(u32::from(y & CONT_MASK), z); ch = init << 12 | y_z; if x >= 0xF0 { // [x y z w] case // use only the lower 3 bits of `init` - let w = unwrap_or_0(self.next_byte()?); + let w = self.next_byte()?.unwrap_or(0); ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w); } }; @@ -71,7 +70,3 @@ fn utf8_first_byte(byte: u8, width: u32) -> u32 { fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 { (ch << 6) | u32::from(byte & CONT_MASK) } - -fn unwrap_or_0(opt: Option) -> u8 { - opt.unwrap_or(0) -}