Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add configurable LineEnding #454

Merged
merged 11 commits into from
Jul 9, 2022
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ name = "indent"
harness = false
path = "benches/indent.rs"

[[bench]]
name = "unfill"
harness = false
path = "benches/unfill.rs"

[features]
default = ["unicode-linebreak", "unicode-width", "smawk"]

Expand Down
23 changes: 23 additions & 0 deletions benches/unfill.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
use criterion::{criterion_group, criterion_main, Criterion};

pub fn benchmark(c: &mut Criterion) {
mgeisler marked this conversation as resolved.
Show resolved Hide resolved
let words_per_line = [
5, 10, 15, 5, 5, 10, 5, 5, 5, 10, // 10 lines
10, 10, 5, 5, 5, 5, 15, 10, 5, 5, // 20 lines
10, 5, 5, 5, 15, 10, 10, 5, 5, 5, // 30 lines
15, 5, 5, 10, 5, 5, 5, 15, 5, 10, // 40 lines
5, 15, 5, 5, 15, 5, 10, 10, 5, 5, // 50 lines
];
let mut text = String::new();
for (line_no, word_count) in words_per_line.iter().enumerate() {
text.push_str(&lipsum::lipsum_words_from_seed(*word_count, line_no as u64));
text.push('\n');
}
text.push_str("\n\n\n\n");
assert_eq!(text.len(), 2650); // The size for reference.

c.bench_function("unfill", |b| b.iter(|| textwrap::unfill(&text)));
}

criterion_group!(benches, benchmark);
criterion_main!(benches);
138 changes: 123 additions & 15 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,13 +214,18 @@ pub use word_splitters::WordSplitter;
pub mod wrap_algorithms;
pub use wrap_algorithms::WrapAlgorithm;

mod line_ending;
pub use line_ending::LineEnding;

pub mod core;

/// Holds configuration options for wrapping and filling text.
#[derive(Debug, Clone)]
pub struct Options<'a> {
/// The width in columns at which the text will be wrapped.
pub width: usize,
/// Line ending used for breaking lines.
pub line_ending: LineEnding,
/// Indentation used for the first line of output. See the
/// [`Options::initial_indent`] method.
pub initial_indent: &'a str,
Expand Down Expand Up @@ -248,6 +253,7 @@ impl<'a> From<&'a Options<'a>> for Options<'a> {
fn from(options: &'a Options<'a>) -> Self {
Self {
width: options.width,
line_ending: options.line_ending,
initial_indent: options.initial_indent,
subsequent_indent: options.subsequent_indent,
break_words: options.break_words,
Expand All @@ -268,12 +274,13 @@ impl<'a> Options<'a> {
/// Creates a new [`Options`] with the specified width. Equivalent to
///
/// ```
/// # use textwrap::{Options, WordSplitter, WordSeparator, WrapAlgorithm};
/// # use textwrap::{LineEnding, Options, WordSplitter, WordSeparator, WrapAlgorithm};
/// # let width = 80;
/// # let actual = Options::new(width);
/// # let expected =
/// Options {
/// width: width,
/// line_ending: LineEnding::LF,
/// initial_indent: "",
/// subsequent_indent: "",
/// break_words: true,
Expand All @@ -289,6 +296,7 @@ impl<'a> Options<'a> {
/// }
/// # ;
/// # assert_eq!(actual.width, expected.width);
/// # assert_eq!(actual.line_ending, expected.line_ending);
/// # assert_eq!(actual.initial_indent, expected.initial_indent);
/// # assert_eq!(actual.subsequent_indent, expected.subsequent_indent);
/// # assert_eq!(actual.break_words, expected.break_words);
Expand All @@ -301,6 +309,7 @@ impl<'a> Options<'a> {
pub const fn new(width: usize) -> Self {
Options {
width,
line_ending: LineEnding::LF,
initial_indent: "",
subsequent_indent: "",
break_words: true,
Expand Down Expand Up @@ -332,6 +341,28 @@ impl<'a> Options<'a> {
Self::new(termwidth())
}

/// Change [`self.line_ending`]. This specifies which of the
/// supported line endings should be used to break the lines of the
/// input text.
///
/// # Examples
///
/// ```
/// use textwrap::{refill, LineEnding, Options};
///
/// let options = Options::new(15).line_ending(LineEnding::CRLF);
/// assert_eq!(refill("This is a little example.", options),
/// "This is a\r\nlittle example.");
/// ```
///
/// [`self.line_ending`]: #structfield.line_ending
pub fn line_ending(self, line_ending: LineEnding) -> Self {
Options {
line_ending,
..self
}
}

/// Change [`self.initial_indent`]. The initial indentation is
/// used on the very first line of output.
///
Expand Down Expand Up @@ -428,6 +459,7 @@ impl<'a> Options<'a> {
pub fn word_separator(self, word_separator: WordSeparator) -> Options<'a> {
Options {
width: self.width,
line_ending: self.line_ending,
initial_indent: self.initial_indent,
subsequent_indent: self.subsequent_indent,
break_words: self.break_words,
Expand All @@ -446,6 +478,7 @@ impl<'a> Options<'a> {
pub fn wrap_algorithm(self, wrap_algorithm: WrapAlgorithm) -> Options<'a> {
Options {
width: self.width,
line_ending: self.line_ending,
initial_indent: self.initial_indent,
subsequent_indent: self.subsequent_indent,
break_words: self.break_words,
Expand Down Expand Up @@ -473,6 +506,7 @@ impl<'a> Options<'a> {
pub fn word_splitter(self, word_splitter: WordSplitter) -> Options<'a> {
Options {
width: self.width,
line_ending: self.line_ending,
initial_indent: self.initial_indent,
subsequent_indent: self.subsequent_indent,
break_words: self.break_words,
Expand Down Expand Up @@ -546,13 +580,16 @@ pub fn fill<'a, Opt>(text: &str, width_or_options: Opt) -> String
where
Opt: Into<Options<'a>>,
{
let options = width_or_options.into();
let line_ending_str = options.line_ending.as_str();

// This will avoid reallocation in simple cases (no
// indentation, no hyphenation).
let mut result = String::with_capacity(text.len());

for (i, line) in wrap(text, width_or_options).iter().enumerate() {
for (i, line) in wrap(text, options).iter().enumerate() {
if i > 0 {
result.push('\n');
result.push_str(line_ending_str);
}
result.push_str(line);
}
Expand All @@ -578,11 +615,17 @@ where
/// textwrap: a small library for wrapping text.
/// ```
///
/// In addition, it will recognize a common prefix among the lines.
/// In addition, it will recognize a common prefix and a common line
/// ending among the lines.
///
/// The prefix of the first line is returned in
/// [`Options::initial_indent`] and the prefix (if any) of the the
/// other lines is returned in [`Options::subsequent_indent`].
///
/// Line ending is returned in [`Options::line_ending`]. If line ending
/// can not be confidently detected (mixed or no line endings in the
/// input), [`LineEnding::LF`] will be returned.
///
/// In addition to `' '`, the prefixes can consist of characters used
/// for unordered lists (`'-'`, `'+'`, and `'*'`) and block quotes
/// (`'>'`) in Markdown as well as characters often used for inline
Expand All @@ -594,7 +637,7 @@ where
/// # Examples
///
/// ```
/// use textwrap::unfill;
/// use textwrap::{LineEnding, unfill};
koiuo marked this conversation as resolved.
Show resolved Hide resolved
///
/// let (text, options) = unfill("\
/// * This is an
Expand All @@ -605,13 +648,15 @@ where
/// assert_eq!(text, "This is an example of a list item.\n");
/// assert_eq!(options.initial_indent, "* ");
/// assert_eq!(options.subsequent_indent, " ");
/// assert_eq!(options.line_ending, LineEnding::LF);
/// ```
pub fn unfill(text: &str) -> (String, Options<'_>) {
let trimmed = text.trim_end_matches('\n');
let line_ending_pat: &[_] = &['\r', '\n'];
let trimmed = text.trim_end_matches(line_ending_pat);
let prefix_chars: &[_] = &[' ', '-', '+', '*', '>', '#', '/'];

let mut options = Options::new(0);
mgeisler marked this conversation as resolved.
Show resolved Hide resolved
for (idx, line) in trimmed.split('\n').enumerate() {
for (idx, line) in trimmed.lines().enumerate() {
options.width = std::cmp::max(options.width, core::display_width(line));
let without_prefix = line.trim_start_matches(prefix_chars);
let prefix = &line[..line.len() - without_prefix.len()];
Expand All @@ -634,16 +679,24 @@ pub fn unfill(text: &str) -> (String, Options<'_>) {
}

let mut unfilled = String::with_capacity(text.len());
for (idx, line) in trimmed.split('\n').enumerate() {
if idx == 0 {
let mut detected_line_ending = None;

for (line, ending) in line_ending::NonEmptyLines(text) {
if unfilled.is_empty() {
unfilled.push_str(&line[options.initial_indent.len()..]);
} else {
unfilled.push(' ');
unfilled.push_str(&line[options.subsequent_indent.len()..]);
}
match (detected_line_ending, ending) {
(None, Some(_)) => detected_line_ending = ending,
(Some(LineEnding::CRLF), Some(LineEnding::LF)) => detected_line_ending = ending,
koiuo marked this conversation as resolved.
Show resolved Hide resolved
_ => (),
}
}

unfilled.push_str(&text[trimmed.len()..]);

options.line_ending = detected_line_ending.unwrap_or(LineEnding::LF);
(unfilled, options)
}

Expand Down Expand Up @@ -705,9 +758,9 @@ pub fn refill<'a, Opt>(filled_text: &str, new_width_or_options: Opt) -> String
where
Opt: Into<Options<'a>>,
{
let trimmed = filled_text.trim_end_matches('\n');
let mut new_options = new_width_or_options.into();
let trimmed = filled_text.trim_end_matches(new_options.line_ending.as_str());
let (text, options) = unfill(trimmed);
let mut new_options: Options = new_width_or_options.into();
new_options.initial_indent = options.initial_indent;
new_options.subsequent_indent = options.subsequent_indent;
let mut refilled = fill(&text, new_options);
Expand Down Expand Up @@ -891,6 +944,8 @@ where
{
let options: Options = width_or_options.into();

let line_ending_str = options.line_ending.as_str();

let initial_width = options
.width
.saturating_sub(core::display_width(options.initial_indent));
Expand All @@ -899,7 +954,7 @@ where
.saturating_sub(core::display_width(options.subsequent_indent));

let mut lines = Vec::new();
for line in text.split('\n') {
for line in text.split(line_ending_str) {
let words = options.word_separator.find_words(line);
let split_words = word_splitters::split_words(words, &options.word_splitter);
let broken_words = if options.break_words {
Expand Down Expand Up @@ -1095,10 +1150,11 @@ where
/// [`fill`] with these options:
///
/// ```
/// # use textwrap::{core, Options, WordSplitter, WordSeparator, WrapAlgorithm};
/// # use textwrap::{core, LineEnding, Options, WordSplitter, WordSeparator, WrapAlgorithm};
/// # let width = 80;
/// Options {
/// width: width,
/// line_ending: LineEnding::LF,
/// initial_indent: "",
/// subsequent_indent: "",
/// break_words: false,
Expand Down Expand Up @@ -1292,7 +1348,7 @@ mod tests {
assert_eq!(
wrap(
"Hello, World!",
Options::new(15).word_separator(WordSeparator::UnicodeBreakProperties)
Options::new(15).word_separator(WordSeparator::UnicodeBreakProperties),
),
vec!["Hello, W", "orld!"]
);
Expand Down Expand Up @@ -1697,6 +1753,34 @@ mod tests {
let (text, options) = unfill("foo\nbar");
assert_eq!(text, "foo bar");
assert_eq!(options.width, 3);
assert_eq!(options.line_ending, LineEnding::LF);
}

#[test]
fn unfill_no_new_line() {
let (text, options) = unfill("foo bar");
assert_eq!(text, "foo bar");
assert_eq!(options.width, 7);
assert_eq!(options.line_ending, LineEnding::LF);
}

#[test]
fn unfill_simple_crlf() {
mgeisler marked this conversation as resolved.
Show resolved Hide resolved
let (text, options) = unfill("foo\r\nbar");
assert_eq!(text, "foo bar");
assert_eq!(options.width, 3);
assert_eq!(options.line_ending, LineEnding::CRLF);
}

/// If mixed new line sequence is encountered, we want to fallback to `\n`
/// 1. it is the default
/// 2. it still matches both `\n` and `\r\n` unlike `\r\n` which will not match `\n`
#[test]
fn unfill_mixed_new_lines() {
let (text, options) = unfill("foo\r\nbar\nbaz");
assert_eq!(text, "foo bar baz");
assert_eq!(options.width, 3);
assert_eq!(options.line_ending, LineEnding::LF);
}

#[test]
Expand All @@ -1706,6 +1790,22 @@ mod tests {
assert_eq!(options.width, 3);
}

#[test]
fn unfill_mixed_trailing_newlines() {
let (text, options) = unfill("foo\r\nbar\n\r\n\n");
assert_eq!(text, "foo bar\n\r\n\n");
assert_eq!(options.width, 3);
assert_eq!(options.line_ending, LineEnding::LF);
}

#[test]
fn unfill_trailing_crlf() {
let (text, options) = unfill("foo bar\r\n");
assert_eq!(text, "foo bar\r\n");
assert_eq!(options.width, 7);
assert_eq!(options.line_ending, LineEnding::CRLF);
}

#[test]
fn unfill_initial_indent() {
let (text, options) = unfill(" foo\nbar\nbaz");
Expand Down Expand Up @@ -1755,6 +1855,14 @@ mod tests {
assert_eq!(unfill("foo bar").0, "foo bar");
}

#[test]
fn refill_convert_crlf() {
assert_eq!(
refill("foo\nbar\n", Options::new(5).line_ending(LineEnding::CRLF)),
"foo\r\nbar\n",
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think it's possible to make this return "foo\r\nbar\r\n" in an easy way? I'm thinking that the problem might be in

    let line_ending_pat: &[_] = &['\r', '\n'];
    let trimmed = text.trim_end_matches(line_ending_pat);

which makes us eat the line ending (only to append it later without consideration for the detected line neding).

If it's not easy, then don't worry: what we have now is still great. We can fix this corner case later.

);
}

#[test]
fn wrap_columns_empty_text() {
assert_eq!(wrap_columns("", 1, 10, "| ", "", " |"), vec!["| |"]);
Expand Down
Loading