diff --git a/src/lib.rs b/src/lib.rs index 8862c3aa..8a92c93d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -197,24 +197,29 @@ // Make `cargo test` execute the README doctests. #[cfg(doctest)] -#[doc = include_str!("../README.md")] +#[doc = include_str ! ("../README.md")] mod readme_doctest {} use std::borrow::Cow; mod indentation; + pub use crate::indentation::{dedent, indent}; mod word_separators; + pub use word_separators::WordSeparator; pub mod word_splitters; + pub use word_splitters::WordSplitter; pub mod wrap_algorithms; + pub use wrap_algorithms::WrapAlgorithm; pub mod line_ending; + pub use line_ending::LineEnding; pub mod core; @@ -623,19 +628,18 @@ where /// * This is an /// example of /// a list item. -/// ", LineEnding::LF); +/// "); /// /// assert_eq!(text, "This is an example of a list item.\n"); /// assert_eq!(options.initial_indent, "* "); /// assert_eq!(options.subsequent_indent, " "); /// ``` -pub fn unfill(text: &str, line_ending: LineEnding) -> (String, Options<'_>) { - let line_ending_pat = line_ending.as_str(); - let trimmed = text.trim_end_matches(line_ending_pat); +pub fn unfill(text: &str) -> (String, Options<'_>) { + let trimmed = text.trim_end_matches(&['\r', '\n']); let prefix_chars: &[_] = &[' ', '-', '+', '*', '>', '#', '/']; - let mut options = Options::new(0).line_ending(line_ending); - for (idx, line) in trimmed.split(line_ending_pat).enumerate() { + let mut options = Options::new(0); + for (idx, line) in trimmed.split('\n').enumerate() { options.width = std::cmp::max(options.width, core::display_width(line)); let without_prefix = line.trim_start_matches(prefix_chars); let prefix = &line[..line.len() - without_prefix.len()]; @@ -658,16 +662,24 @@ pub fn unfill(text: &str, line_ending: LineEnding) -> (String, Options<'_>) { } let mut unfilled = String::with_capacity(text.len()); - for (idx, line) in trimmed.split(line_ending_pat).enumerate() { - if idx == 0 { + let mut detected_line_ending = None; + + for (line, ending) in line_ending::NonEmptyLines(text) { + if unfilled.is_empty() { unfilled.push_str(&line[options.initial_indent.len()..]); } else { unfilled.push(' '); unfilled.push_str(&line[options.subsequent_indent.len()..]); } + match (detected_line_ending, ending) { + (None, Some(_)) => detected_line_ending = ending, + (Some(LineEnding::CRLF), Some(LineEnding::LF)) => detected_line_ending = ending, + _ => (), + } } - unfilled.push_str(&text[trimmed.len()..]); + + options.line_ending = detected_line_ending.unwrap_or(LineEnding::LF); (unfilled, options) } @@ -731,7 +743,7 @@ where { let mut new_options = new_width_or_options.into(); let trimmed = filled_text.trim_end_matches(new_options.line_ending.as_str()); - let (text, options) = unfill(trimmed, new_options.line_ending); + let (text, options) = unfill(trimmed); new_options.initial_indent = options.initial_indent; new_options.subsequent_indent = options.subsequent_indent; let mut refilled = fill(&text, new_options); @@ -1232,7 +1244,7 @@ mod tests { assert_eq!( wrap( "To be, or not to be, that is the question.", - Options::new(10).wrap_algorithm(WrapAlgorithm::FirstFit) + Options::new(10).wrap_algorithm(WrapAlgorithm::FirstFit), ), vec!["To be, or", "not to be,", "that is", "the", "question."] ); @@ -1308,7 +1320,7 @@ mod tests { assert_eq!( wrap( "Hello, World!", - Options::new(15).word_separator(WordSeparator::AsciiSpace) + Options::new(15).word_separator(WordSeparator::AsciiSpace), ), vec!["Hello,", "World!"] ); @@ -1319,7 +1331,7 @@ mod tests { assert_eq!( wrap( "Hello, World!", - Options::new(15).word_separator(WordSeparator::UnicodeBreakProperties) + Options::new(15).word_separator(WordSeparator::UnicodeBreakProperties), ), vec!["Hello, W", "orld!"] ); @@ -1592,14 +1604,14 @@ mod tests { assert_eq!( fill( "1 3 5 7\n1 3 5 7", - Options::new(7).wrap_algorithm(WrapAlgorithm::FirstFit) + Options::new(7).wrap_algorithm(WrapAlgorithm::FirstFit), ), "1 3 5 7\n1 3 5 7" ); assert_eq!( fill( "1 3 5 7\n1 3 5 7", - Options::new(5).wrap_algorithm(WrapAlgorithm::FirstFit) + Options::new(5).wrap_algorithm(WrapAlgorithm::FirstFit), ), "1 3 5\n7\n1 3 5\n7" ); @@ -1721,21 +1733,65 @@ mod tests { #[test] fn unfill_simple() { - let (text, options) = unfill("foo\nbar", LineEnding::LF); + let (text, options) = unfill("foo\nbar"); assert_eq!(text, "foo bar"); assert_eq!(options.width, 3); + assert_eq!(options.line_ending, LineEnding::LF); + } + + #[test] + fn unfill_no_new_line() { + let (text, options) = unfill("foo bar"); + assert_eq!(text, "foo bar"); + assert_eq!(options.width, 7); + assert_eq!(options.line_ending, LineEnding::LF); + } + + #[test] + fn unfill_simple_crlf() { + let (text, options) = unfill("foo\r\nbar"); + assert_eq!(text, "foo bar"); + assert_eq!(options.width, 3); + assert_eq!(options.line_ending, LineEnding::CRLF); + } + + /// If mixed new line sequence is encountered, we want to fallback to `\n` + /// 1. it is the default + /// 2. it still matches both `\n` and `\r\n` unlike `\r\n` which will not match `\n` + #[test] + fn unfill_mixed_new_lines() { + let (text, options) = unfill("foo\r\nbar\nbaz"); + assert_eq!(text, "foo bar baz"); + assert_eq!(options.width, 3); + assert_eq!(options.line_ending, LineEnding::LF); } #[test] fn unfill_trailing_newlines() { - let (text, options) = unfill("foo\nbar\n\n\n", LineEnding::LF); + let (text, options) = unfill("foo\nbar\n\n\n"); assert_eq!(text, "foo bar\n\n\n"); assert_eq!(options.width, 3); } + #[test] + fn unfill_mixed_trailing_newlines() { + let (text, options) = unfill("foo\r\nbar\n\r\n\n"); + assert_eq!(text, "foo bar\n\r\n\n"); + assert_eq!(options.width, 3); + assert_eq!(options.line_ending, LineEnding::LF); + } + + #[test] + fn unfill_trailing_crlf() { + let (text, options) = unfill("foo bar\r\n"); + assert_eq!(text, "foo bar\r\n"); + assert_eq!(options.width, 7); + assert_eq!(options.line_ending, LineEnding::CRLF); + } + #[test] fn unfill_initial_indent() { - let (text, options) = unfill(" foo\nbar\nbaz", LineEnding::LF); + let (text, options) = unfill(" foo\nbar\nbaz"); assert_eq!(text, "foo bar baz"); assert_eq!(options.width, 5); assert_eq!(options.initial_indent, " "); @@ -1743,7 +1799,7 @@ mod tests { #[test] fn unfill_differing_indents() { - let (text, options) = unfill(" foo\n bar\n baz", LineEnding::LF); + let (text, options) = unfill(" foo\n bar\n baz"); assert_eq!(text, "foo bar baz"); assert_eq!(options.width, 7); assert_eq!(options.initial_indent, " "); @@ -1752,7 +1808,7 @@ mod tests { #[test] fn unfill_list_item() { - let (text, options) = unfill("* foo\n bar\n baz", LineEnding::LF); + let (text, options) = unfill("* foo\n bar\n baz"); assert_eq!(text, "foo bar baz"); assert_eq!(options.width, 5); assert_eq!(options.initial_indent, "* "); @@ -1761,7 +1817,7 @@ mod tests { #[test] fn unfill_multiple_char_prefix() { - let (text, options) = unfill(" // foo bar\n // baz\n // quux", LineEnding::LF); + let (text, options) = unfill(" // foo bar\n // baz\n // quux"); assert_eq!(text, "foo bar baz quux"); assert_eq!(options.width, 14); assert_eq!(options.initial_indent, " // "); @@ -1770,7 +1826,7 @@ mod tests { #[test] fn unfill_block_quote() { - let (text, options) = unfill("> foo\n> bar\n> baz", LineEnding::LF); + let (text, options) = unfill("> foo\n> bar\n> baz"); assert_eq!(text, "foo bar baz"); assert_eq!(options.width, 5); assert_eq!(options.initial_indent, "> "); @@ -1779,16 +1835,16 @@ mod tests { #[test] fn unfill_whitespace() { - assert_eq!(unfill("foo bar", LineEnding::LF).0, "foo bar"); + assert_eq!(unfill("foo bar").0, "foo bar"); } - #[test] - fn unfill_crlf() { - let (text, options) = unfill("foo\r\nbar", LineEnding::CRLF); - assert_eq!(text, "foo bar"); - assert_eq!(options.width, 3); - assert_eq!(options.line_ending, LineEnding::CRLF); - } + // #[test] + // fn unfill_crlf() { + // let (text, options) = unfill("foo\r\nbar"); + // assert_eq!(text, "foo bar"); + // assert_eq!(options.width, 3); + // assert_eq!(options.line_ending, LineEnding::CRLF); + // } #[test] fn wrap_columns_empty_text() { @@ -1835,12 +1891,12 @@ mod tests { 30, "✨ ", " ⚽ ", - " 👀" + " 👀", ), vec![ "✨ Words ⚽ wrapped in 👀", "✨ and a few ⚽ ⓶ columns 👀", - "✨ emojis 😍 ⚽ 👀" + "✨ emojis 😍 ⚽ 👀", ] ); } @@ -1853,7 +1909,7 @@ mod tests { wrap_columns("xyz", 2, 10, "----> ", " !!! ", " <----"), vec![ "----> x !!! z <----", // - "----> y !!! <----" + "----> y !!! <----", ] ); } diff --git a/src/line_ending.rs b/src/line_ending.rs index 0228013d..55a4c5f8 100644 --- a/src/line_ending.rs +++ b/src/line_ending.rs @@ -5,8 +5,6 @@ use std::str::FromStr; /// TODO doc #[derive(Clone, Copy, Debug, PartialEq)] pub enum LineEnding { - /// TODO - CR, /// TODO CRLF, /// TODO @@ -14,22 +12,12 @@ pub enum LineEnding { } impl LineEnding { - /// TODO - #[inline] - pub const fn len_chars(&self) -> usize { - match self { - Self::CRLF => 2, - _ => 1, - } - } - /// TODO #[inline] pub const fn as_str(&self) -> &'static str { match self { - Self::CRLF => "\u{000D}\u{000A}", - Self::LF => "\u{000A}", - Self::CR => "\u{000D}", + Self::CRLF => "\r\n", + Self::LF => "\n", } } } @@ -43,8 +31,37 @@ impl FromStr for LineEnding { match s { "\u{000D}\u{000A}" => Result::Ok(LineEnding::CRLF), "\u{000A}" => Result::Ok(LineEnding::LF), - "\u{000D}" => Result::Ok(LineEnding::CR), _ => Result::Err(()), } } } + +/// TODO +#[derive(Debug, Clone, Copy)] +pub struct NonEmptyLines<'a>(pub &'a str); + +impl<'a> Iterator for NonEmptyLines<'a> { + type Item = (&'a str, Option); + + fn next(&mut self) -> Option { + while let Some(lf) = self.0.find('\n') { + if lf == 0 || (lf == 1 && self.0.as_bytes()[lf - 1] == b'\r') { + self.0 = &self.0[(lf + 1)..]; + continue; + } + let trimmed = match self.0.as_bytes()[lf - 1] { + b'\r' => (&self.0[..(lf - 1)], Some(LineEnding::CRLF)), + _ => (&self.0[..lf], Some(LineEnding::LF)), + }; + self.0 = &self.0[(lf + 1)..]; + return Some(trimmed); + } + if self.0.len() > 0 { + let result = Some((self.0, None)); + self.0 = ""; + return result; + } else { + return None; + } + } +}