diff --git a/benches/linear.rs b/benches/linear.rs index 9478fe2b..2d8ca28b 100644 --- a/benches/linear.rs +++ b/benches/linear.rs @@ -31,7 +31,7 @@ pub fn benchmark(c: &mut Criterion) { #[cfg(feature = "unicode-linebreak")] { let options = textwrap::Options::new(LINE_LENGTH) - .wrap_algorithm(textwrap::wrap_algorithms::OptimalFit) + .wrap_algorithm(textwrap::wrap_algorithms::OptimalFit::new()) .word_separator(textwrap::word_separators::UnicodeBreakProperties); group.bench_with_input( BenchmarkId::new("fill_optimal_fit_unicode", length), @@ -43,7 +43,7 @@ pub fn benchmark(c: &mut Criterion) { } let options = textwrap::Options::new(LINE_LENGTH) - .wrap_algorithm(textwrap::wrap_algorithms::OptimalFit) + .wrap_algorithm(textwrap::wrap_algorithms::OptimalFit::new()) .word_separator(textwrap::word_separators::AsciiSpace); group.bench_with_input( BenchmarkId::new("fill_optimal_fit_ascii", length), diff --git a/examples/interactive.rs b/examples/interactive.rs index 523a44b9..92ed9455 100644 --- a/examples/interactive.rs +++ b/examples/interactive.rs @@ -109,12 +109,20 @@ mod unix_only { #[cfg(feature = "smawk")] { + // The OptimalFit struct formats itself with a ton of + // parameters. This removes the parameters, leaving only + // the struct name behind. + let wrap_algorithm_label = format!("{:?}", options.wrap_algorithm) + .split(' ') + .next() + .unwrap() + .to_string(); write!( stdout, - "{}- algorithm: {}{:?}{} (toggle with Ctrl-o)", + "{}- algorithm: {}{}{} (toggle with Ctrl-o)", cursor::Goto(left_col, left_row), style::Bold, - options.wrap_algorithm, + wrap_algorithm_label, style::Reset, )?; left_row += 1; @@ -233,9 +241,9 @@ mod unix_only { pub fn main() -> Result<(), io::Error> { let mut wrap_algorithms: Vec> = - vec![Box::new(wrap_algorithms::FirstFit)]; + vec![Box::new(wrap_algorithms::FirstFit::new())]; #[cfg(feature = "smawk")] - wrap_algorithms.push(Box::new(wrap_algorithms::OptimalFit)); + wrap_algorithms.push(Box::new(wrap_algorithms::OptimalFit::new())); let mut word_splitters: Vec> = vec![ Box::new(word_splitters::HyphenSplitter), diff --git a/examples/wasm/Cargo.lock b/examples/wasm/Cargo.lock index 6b604393..cfbc918e 100644 --- a/examples/wasm/Cargo.lock +++ b/examples/wasm/Cargo.lock @@ -1,5 +1,7 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + [[package]] name = "aho-corasick" version = "0.7.18" @@ -127,7 +129,7 @@ dependencies = [ [[package]] name = "textwrap" -version = "0.13.4" +version = "0.14.0" dependencies = [ "smawk", "unicode-linebreak", diff --git a/examples/wasm/src/lib.rs b/examples/wasm/src/lib.rs index 67869abb..f3a24d31 100644 --- a/examples/wasm/src/lib.rs +++ b/examples/wasm/src/lib.rs @@ -5,7 +5,7 @@ use wasm_bindgen::JsCast; use textwrap::core; use textwrap::word_separators::{AsciiSpace, UnicodeBreakProperties, WordSeparator}; use textwrap::word_splitters::{split_words, HyphenSplitter, NoHyphenation, WordSplitter}; -use textwrap::wrap_algorithms::{wrap_first_fit, wrap_optimal_fit}; +use textwrap::wrap_algorithms::{wrap_first_fit, wrap_optimal_fit, OptimalFit}; #[wasm_bindgen] extern "C" { @@ -248,6 +248,48 @@ pub enum WasmWrapAlgorithm { OptimalFit = "OptimalFit", } +#[wasm_bindgen] +#[derive(Copy, Clone, Debug, Default)] +pub struct WasmOptimalFit { + pub nline_penalty: i32, + pub overflow_penalty: i32, + pub short_last_line_fraction: usize, + pub short_last_line_penalty: i32, + pub hyphen_penalty: i32, +} + +#[wasm_bindgen] +impl WasmOptimalFit { + #[wasm_bindgen(constructor)] + pub fn new( + nline_penalty: i32, + overflow_penalty: i32, + short_last_line_fraction: usize, + short_last_line_penalty: i32, + hyphen_penalty: i32, + ) -> WasmOptimalFit { + WasmOptimalFit { + nline_penalty, + overflow_penalty, + short_last_line_fraction, + short_last_line_penalty, + hyphen_penalty, + } + } +} + +impl Into for WasmOptimalFit { + fn into(self) -> OptimalFit { + OptimalFit { + nline_penalty: self.nline_penalty, + overflow_penalty: self.overflow_penalty, + short_last_line_fraction: self.short_last_line_fraction, + short_last_line_penalty: self.short_last_line_penalty, + hyphen_penalty: self.hyphen_penalty, + } + } +} + #[wasm_bindgen] #[derive(Copy, Clone, Debug)] pub struct WasmOptions { @@ -256,6 +298,7 @@ pub struct WasmOptions { pub word_separator: WasmWordSeparator, pub word_splitter: WasmWordSplitter, pub wrap_algorithm: WasmWrapAlgorithm, + pub optimal_fit: WasmOptimalFit, } #[wasm_bindgen] @@ -267,6 +310,7 @@ impl WasmOptions { word_separator: WasmWordSeparator, word_splitter: WasmWordSplitter, wrap_algorithm: WasmWrapAlgorithm, + optimal_fit: WasmOptimalFit, ) -> WasmOptions { WasmOptions { width, @@ -274,6 +318,7 @@ impl WasmOptions { word_separator, word_splitter, wrap_algorithm, + optimal_fit, } } } @@ -325,7 +370,10 @@ pub fn draw_wrapped_text( let line_lengths = [options.width * PRECISION]; let wrapped_words = match options.wrap_algorithm { WasmWrapAlgorithm::FirstFit => wrap_first_fit(&canvas_words, &line_lengths), - WasmWrapAlgorithm::OptimalFit => wrap_optimal_fit(&canvas_words, &line_lengths), + WasmWrapAlgorithm::OptimalFit => { + let penalties = options.optimal_fit.into(); + wrap_optimal_fit(&canvas_words, &line_lengths, &penalties) + } _ => Err("WasmOptions has an invalid wrap_algorithm field")?, }; diff --git a/examples/wasm/www/index.html b/examples/wasm/www/index.html index c71f77c3..50fe3bf8 100644 --- a/examples/wasm/www/index.html +++ b/examples/wasm/www/index.html @@ -80,15 +80,43 @@

Textwrap WebAssembly Demo

-
diff --git a/examples/wasm/www/index.js b/examples/wasm/www/index.js index 0aeb713d..6f74dfc5 100644 --- a/examples/wasm/www/index.js +++ b/examples/wasm/www/index.js @@ -1,4 +1,4 @@ -import { draw_wrapped_text, WasmOptions } from "textwrap-wasm-demo"; +import { draw_wrapped_text, WasmOptions, WasmOptimalFit } from "textwrap-wasm-demo"; fetch("build-info.json").then(response => response.json()).then(buildInfo => { if (buildInfo.date && buildInfo.commit) { @@ -25,24 +25,45 @@ function redraw(event) { let breakWords = document.getElementById("break-words").checked; let wordSeparator = document.getElementById("word-separator").value; let wordSplitter = document.getElementById("word-splitter").value; - // TODO: The optimal-fit algorithm does not work well for - // proportional fonts, so we always use FirstFit. See - // https://github.com/mgeisler/textwrap/issues/326. - let wrapAlgorithm = "FirstFit"; // document.getElementById("wrap-algorithm").value; - let options = new WasmOptions(lineWidth, breakWords, wordSeparator, wordSplitter, wrapAlgorithm); - draw_wrapped_text(ctx, options, text); + let wrapAlgorithm = document.getElementById("wrap-algorithm").value; + let optimalFit = new WasmOptimalFit(document.getElementById("nline-penalty").valueAsNumber, + document.getElementById("overflow-penalty").valueAsNumber, + document.getElementById("short-line-fraction").valueAsNumber, + document.getElementById("short-last-line-penalty").valueAsNumber, + document.getElementById("hyphen-penalty").valueAsNumber); + let options = new WasmOptions(lineWidth, breakWords, wordSeparator, wordSplitter, wrapAlgorithm, optimalFit); + draw_wrapped_text(ctx, options, text, optimalFit); } -document.getElementById("line-width").addEventListener("input", (event) => { - let lineWidthText = document.getElementById("line-width-text"); - lineWidthText.value = event.target.valueAsNumber; +document.getElementById("wrap-algorithm").addEventListener("input", (event) => { + let disableOptimalFitParams = (event.target.value == "FirstFit"); + let rangeInputIds = ["nline-penalty", + "overflow-penalty", + "short-line-fraction", + "short-last-line-penalty", + "hyphen-penalty"]; + rangeInputIds.forEach((rangeInputId) => { + let rangeInput = document.getElementById(rangeInputId); + let textInput = document.getElementById(`${rangeInputId}-text`); + rangeInput.disabled = disableOptimalFitParams; + textInput.disabled = disableOptimalFitParams; + }); }); -document.getElementById("line-width-text").addEventListener("input", (event) => { - let lineWidth = document.getElementById("line-width"); - lineWidth.value = event.target.valueAsNumber; -}); +document.querySelectorAll("input[type=range]").forEach((rangeInput) => { + let textInput = document.getElementById(`${rangeInput.id}-text`); + textInput.min = rangeInput.min; + textInput.max = rangeInput.max; + textInput.value = rangeInput.value; + + rangeInput.addEventListener("input", (event) => { + textInput.value = rangeInput.valueAsNumber; + }); + textInput.addEventListener("input", (event) => { + rangeInput.value = textInput.valueAsNumber; + }); +}); document.querySelectorAll("textarea, select, input").forEach((elem) => { elem.addEventListener("input", redraw); diff --git a/src/lib.rs b/src/lib.rs index 3417d31b..02cc0e40 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -317,9 +317,9 @@ impl<'a> /// #[cfg(not(feature = "unicode-linebreak"))] /// word_separator: textwrap::word_separators::AsciiSpace, /// #[cfg(feature = "smawk")] - /// wrap_algorithm: textwrap::wrap_algorithms::OptimalFit, + /// wrap_algorithm: textwrap::wrap_algorithms::OptimalFit::new(), /// #[cfg(not(feature = "smawk"))] - /// wrap_algorithm: textwrap::wrap_algorithms::FirstFit, + /// wrap_algorithm: textwrap::wrap_algorithms::FirstFit::new(), /// word_splitter: textwrap::word_splitters::HyphenSplitter, /// } /// # ; @@ -429,9 +429,9 @@ impl<'a, WordSplit> Options<'a, DefaultWrapAlgorithm!(), DefaultWordSeparator!() /// #[cfg(not(feature = "unicode-linebreak"))] /// word_separator: textwrap::word_separators::AsciiSpace, /// #[cfg(feature = "smawk")] - /// wrap_algorithm: textwrap::wrap_algorithms::OptimalFit, + /// wrap_algorithm: textwrap::wrap_algorithms::OptimalFit::new(), /// #[cfg(not(feature = "smawk"))] - /// wrap_algorithm: textwrap::wrap_algorithms::FirstFit, + /// wrap_algorithm: textwrap::wrap_algorithms::FirstFit::new(), /// word_splitter: word_splitter, /// } /// # ; @@ -492,7 +492,7 @@ impl<'a, WordSplit> Options<'a, DefaultWrapAlgorithm!(), DefaultWordSeparator!() subsequent_indent: "", break_words: true, word_separator: DefaultWordSeparator!(), - wrap_algorithm: DefaultWrapAlgorithm!(), + wrap_algorithm: ::new(), word_splitter: word_splitter, } } @@ -993,7 +993,7 @@ where /// # use textwrap::wrap_algorithms::OptimalFit; /// # /// # let lines = wrap("To be, or not to be: that is the question", -/// # Options::new(10).wrap_algorithm(OptimalFit)); +/// # Options::new(10).wrap_algorithm(OptimalFit::new())); /// # assert_eq!(lines.join("\n") + "\n", "\ /// To be, /// or not to diff --git a/src/wrap_algorithms.rs b/src/wrap_algorithms.rs index 368ef2a4..c216e708 100644 --- a/src/wrap_algorithms.rs +++ b/src/wrap_algorithms.rs @@ -74,9 +74,22 @@ impl WrapAlgorithm for Box { /// This algorithm uses no look-ahead when finding line breaks. /// Implemented by [`wrap_first_fit`], please see that function for /// details and examples. -#[derive(Clone, Copy, Debug, Default)] +#[derive(Clone, Copy, Debug)] pub struct FirstFit; +impl FirstFit { + /// Create a new empty struct. + pub const fn new() -> Self { + FirstFit + } +} + +impl Default for FirstFit { + fn default() -> Self { + Self::new() + } +} + impl WrapAlgorithm for FirstFit { #[inline] fn wrap<'a, 'b>(&self, words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]> { @@ -107,7 +120,7 @@ impl WrapAlgorithm for FirstFit { /// /// ``` /// use textwrap::core::Word; -/// use textwrap::wrap_algorithms; +/// use textwrap::wrap_algorithms::wrap_first_fit; /// use textwrap::word_separators::{AsciiSpace, WordSeparator}; /// /// // Helper to convert wrapped lines to a Vec. @@ -119,7 +132,7 @@ impl WrapAlgorithm for FirstFit { /// /// let text = "These few words will unfortunately not wrap nicely."; /// let words = AsciiSpace.find_words(text).collect::>(); -/// assert_eq!(lines_to_strings(wrap_algorithms::wrap_first_fit(&words, &[15])), +/// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15])), /// vec!["These few words", /// "will", // <-- short line /// "unfortunately", @@ -128,7 +141,9 @@ impl WrapAlgorithm for FirstFit { /// /// // We can avoid the short line if we look ahead: /// #[cfg(feature = "smawk")] -/// assert_eq!(lines_to_strings(wrap_algorithms::wrap_optimal_fit(&words, &[15])), +/// use textwrap::wrap_algorithms::{wrap_optimal_fit, OptimalFit}; +/// #[cfg(feature = "smawk")] +/// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15], &OptimalFit::new())), /// vec!["These few", /// "words will", /// "unfortunately", diff --git a/src/wrap_algorithms/optimal_fit.rs b/src/wrap_algorithms/optimal_fit.rs index 95ecf1f7..7e3c99e2 100644 --- a/src/wrap_algorithms/optimal_fit.rs +++ b/src/wrap_algorithms/optimal_fit.rs @@ -6,18 +6,131 @@ use crate::wrap_algorithms::WrapAlgorithm; /// Wrap words using an advanced algorithm with look-ahead. /// /// This wrapping algorithm considers the entire paragraph to find -/// optimal line breaks. Implemented by [`wrap_optimal_fit`], please -/// see that function for details and examples. +/// optimal line breaks. When wrapping text, "penalties" are assigned +/// to line breaks based on the gaps left at the end of lines. The +/// penalties are given by this struct, with [`OptimalFit::default`] +/// assigning penalties that work well for monospace text. +/// +/// If you are wrapping proportional text, you are advised to assign +/// your own penalties according to your font size. See the individual +/// penalties below for details. +/// +/// The underlying wrapping algorithm is implemented by +/// [`wrap_optimal_fit`], please see that function for examples. /// /// **Note:** Only available when the `smawk` Cargo feature is /// enabled. -#[derive(Clone, Copy, Debug, Default)] -pub struct OptimalFit; +#[derive(Clone, Copy, Debug)] +pub struct OptimalFit { + /// Per-line penalty. This is added for every line, which makes it + /// expensive to output more lines than the minimum required. + pub nline_penalty: i32, + + /// Per-character cost for lines that overflow the target line width. + /// + /// With a default value of 50², every single character costs as + /// much as leaving a gap of 50 characters behind. This is because + /// we assign as cost of `gap * gap` to a short line. When + /// wrapping monospace text, we can overflow the line by 1 + /// character in extreme cases: + /// + /// ``` + /// use textwrap::wrap_algorithms::{wrap_optimal_fit, OptimalFit}; + /// use textwrap::core::Word; + /// + /// let short = "foo "; + /// let long = "x".repeat(50); + /// let fragments = vec![Word::from(short), Word::from(&long)]; + /// let penalties = OptimalFit::new(); + /// + /// // Perfect fit, both words are on a single line with no overflow. + /// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len()], &penalties); + /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); + /// + /// // The words no longer fit, yet we get a single line back. While + /// // the cost of overflow (`1 * 2500`) is the same as the cost of the + /// // gap (`50 * 50 = 2500`), the tie is broken by `nline_penalty` + /// // which makes it cheaper to overflow than to use two lines. + /// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len() - 1], &penalties); + /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); + /// + /// // The cost of overflow would be 2 * 2500, whereas the cost of + /// // the gap is only `49 * 49 + nline_penalty = 2401 + 1000 = + /// // 3401`. We therefore get two lines. + /// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len() - 2], &penalties); + /// assert_eq!(wrapped, vec![&[Word::from(short)], + /// &[Word::from(&long)]]); + /// ``` + /// + /// This only happens if the overflowing word is 50 characters + /// long _and_ if the word overflows the line by exactly one + /// character. If it overflows by more than one character, the + /// overflow penalty will quickly outgrow the cost of the gap, as + /// seen above. + pub overflow_penalty: i32, + + /// When should the last line be considered "short"? + /// + /// If the last line of the text is shorter than `1 / + /// short_last_line_fraction` of the line width, it will be + /// considered "short". Short lines will have + /// `short_last_line_penalty` added as an extra penalty. + /// + /// The effect of this is to avoid a final line consisting of just + /// a single word. For example, with a `short_last_line_penalty` + /// of 25 (the default), a gap of up to 5 columns will be seen as + /// more desirable than having a final short line. So you get + /// + /// ```text + /// This is a demo of the short last + /// line penalty. + /// ``` + /// + /// instead of + /// + /// ```text + /// This is a demo of the short last line + /// penalty. + /// ``` + pub short_last_line_fraction: usize, + + /// Penalty for a short last line. + /// + /// Set this to zero if you do not want to penalize short last lines. + pub short_last_line_penalty: i32, + + /// Penalty for lines ending with a hyphen. + pub hyphen_penalty: i32, +} + +impl OptimalFit { + /// Default penalties for monospace text. + /// + /// The penalties here work well for monospace text. This is + /// because they expect the gaps at the end of lines to be roughly + /// in the range `0..100`. If the gaps are larger, the + /// `overflow_penalty` and `hyphen_penalty` become insignificant. + pub const fn new() -> Self { + OptimalFit { + nline_penalty: 1000, + overflow_penalty: 50 * 50, + short_last_line_fraction: 4, + short_last_line_penalty: 25, + hyphen_penalty: 25, + } + } +} + +impl Default for OptimalFit { + fn default() -> Self { + Self::new() + } +} impl WrapAlgorithm for OptimalFit { #[inline] fn wrap<'a, 'b>(&self, words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]> { - wrap_optimal_fit(words, line_widths) + wrap_optimal_fit(words, line_widths, &self) } } @@ -47,59 +160,6 @@ impl LineNumbers { } } -/// Per-line penalty. This is added for every line, which makes it -/// expensive to output more lines than the minimum required. -const NLINE_PENALTY: i32 = 1000; - -/// Per-character cost for lines that overflow the target line width. -/// -/// With a value of 50², every single character costs as much as -/// leaving a gap of 50 characters behind. This is becuase we assign -/// as cost of `gap * gap` to a short line. This means that we can -/// overflow the line by 1 character in extreme cases: -/// -/// ``` -/// use textwrap::wrap_algorithms::wrap_optimal_fit; -/// use textwrap::core::Word; -/// -/// let short = "foo "; -/// let long = "x".repeat(50); -/// let fragments = vec![Word::from(short), Word::from(&long)]; -/// -/// // Perfect fit, both words are on a single line with no overflow. -/// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len()]); -/// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); -/// -/// // The words no longer fit, yet we get a single line back. While -/// // the cost of overflow (`1 * 2500`) is the same as the cost of the -/// // gap (`50 * 50 = 2500`), the tie is broken by `NLINE_PENALTY` -/// // which makes it cheaper to overflow than to use two lines. -/// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len() - 1]); -/// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); -/// -/// // The cost of overflow would be 2 * 2500, whereas the cost of the -/// // gap is only `49 * 49 + NLINE_PENALTY = 2401 + 1000 = 3401`. We -/// // therefore get two lines. -/// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len() - 2]); -/// assert_eq!(wrapped, vec![&[Word::from(short)], -/// &[Word::from(&long)]]); -/// ``` -/// -/// This only happens if the overflowing word is 50 characters long -/// _and_ if it happens to overflow the line by exactly one character. -/// If it overflows by more than one character, the overflow penalty -/// will quickly outgrow the cost of the gap, as seen above. -const OVERFLOW_PENALTY: i32 = 50 * 50; - -/// The last line is short if it is less than 1/4 of the target width. -const SHORT_LINE_FRACTION: usize = 4; - -/// Penalize a short last line. -const SHORT_LAST_LINE_PENALTY: i32 = 25; - -/// Penalty for lines ending with a hyphen. -const HYPHEN_PENALTY: i32 = 25; - /// Wrap abstract fragments into lines with an optimal-fit algorithm. /// /// The `line_widths` slice gives the target line width for each line @@ -178,6 +238,7 @@ const HYPHEN_PENALTY: i32 = 25; pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( fragments: &'a [T], line_widths: &'b [usize], + penalties: &'b OptimalFit, ) -> Vec<&'a [T]> { // The final line width is used for all remaining lines. let default_line_width = line_widths.last().copied().unwrap_or(0); @@ -211,30 +272,30 @@ pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( // breaking before fragments[i]. // // First, every extra line cost NLINE_PENALTY. - let mut cost = minima[i].1 + NLINE_PENALTY; + let mut cost = minima[i].1 + penalties.nline_penalty; // Next, we add a penalty depending on the line length. if line_width > target_width { // Lines that overflow get a hefty penalty. let overflow = (line_width - target_width) as i32; - cost += overflow * OVERFLOW_PENALTY; + cost += overflow * penalties.overflow_penalty; } else if j < fragments.len() { // Other lines (except for the last line) get a milder // penalty which depend on the size of the gap. let gap = (target_width - line_width) as i32; cost += gap * gap; - } else if i + 1 == j && line_width < target_width / SHORT_LINE_FRACTION { + } else if i + 1 == j && line_width < target_width / penalties.short_last_line_fraction { // The last line can have any size gap, but we do add a // penalty if the line is very short (typically because it // contains just a single word). - cost += SHORT_LAST_LINE_PENALTY; + cost += penalties.short_last_line_penalty; } // Finally, we discourage hyphens. if fragments[j - 1].penalty_width() > 0 { // TODO: this should use a penalty value from the fragment // instead. - cost += HYPHEN_PENALTY; + cost += penalties.hyphen_penalty; } cost