From 79b9cc7fb94fdc80d13dc3b1b6ee7e29f1b86ced Mon Sep 17 00:00:00 2001 From: "Pavel N. Krivitsky" Date: Fri, 4 Aug 2017 10:06:39 +1000 Subject: [PATCH 1/8] Added an option to tidy_source() to enforce a strict maximum line length. --- R/tidy.R | 40 ++++++++++++++++++++++++++++++++++++---- man/tidy_source.Rd | 29 ++++++++++++++++++----------- 2 files changed, 54 insertions(+), 15 deletions(-) diff --git a/R/tidy.R b/R/tidy.R index 8ba1079..dbe15d0 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -23,6 +23,11 @@ #' @param width.cutoff passed to \code{\link{deparse}}: integer in [20, 500] #' determining the cutoff at which line-breaking is tried (default to be #' \code{getOption("width")}) +#' @param width.strict if \code{TRUE}, rather than being passed +#' directly to \code{\link{deparse}}, \code{width.cutoff} is treated +#' as a hard upper bound on the row width, with the argument to +#' \code{\link{deparse}} chosen adaptively to achieve this upper +#' bound, if possible #' @param ... other arguments passed to \code{\link{cat}}, e.g. \code{file} #' (this can be useful for batch-processing R scripts, e.g. #' \code{tidy_source(source = 'input.R', file = 'output.R')}) @@ -45,7 +50,8 @@ tidy_source = function( brace.newline = getOption('formatR.brace.newline', FALSE), indent = getOption('formatR.indent', 4), output = TRUE, text = NULL, - width.cutoff = getOption('width'), ... + width.cutoff = getOption('width'), + width.strict = FALSE, ... ) { if (is.null(text)) { if (source == 'clipboard' && Sys.info()['sysname'] == 'Darwin') { @@ -66,7 +72,7 @@ tidy_source = function( } on.exit(.env$line_break <- NULL, add = TRUE) if (comment) text = mask_comments(text, width.cutoff, blank) - text.mask = tidy_block(text, width.cutoff, arrow && length(grep('=', text))) + text.mask = tidy_block(text, width.cutoff, arrow && length(grep('=', text)), width.strict) text.tidy = if (comment) unmask_source(text.mask) else text.mask text.tidy = reindent_lines(text.tidy, indent) if (brace.newline) text.tidy = move_leftbrace(text.tidy) @@ -84,12 +90,38 @@ mat.comment = sprintf('invisible\\("\\%s([^"]*)\\%s"\\)', begin.comment, end.com inline.comment = ' %InLiNe_IdEnTiFiEr%[ ]*"([ ]*#[^"]*)"' blank.comment = sprintf('invisible("%s%s")', begin.comment, end.comment) +# wrapper around deparse() that enforces a strict maximum line width +strict_deparse = function(..., max.width, width.cutoff=getOption('width')){ + wcmin = 19L # If deparse() can't manage it with width.cutoff <= 20, issue a warning. + wcmax = 500L + # A binary search to find the greatest width.cutoff such that the width of the longest line <= max.width. + repeat{ + guess = ceiling((wcmin+wcmax)/2) + if(guess<20){ + # If it's induced by a comment, don't complain. + if(!length(grep(pat.comment,deparse(..., width.cutoff=500L)))) + warning("Unable to find a suitable adaptive cut-off. Falling back to width.cutoff.") + return(trimws(deparse(..., width.cutoff=width.cutoff), "right")) + } + o = trimws(deparse(..., width.cutoff=guess), "right") + + if(wcmax==wcmin) break + + l = max(nchar(o)) + if(l>max.width) wcmax = guess-1 else wcmin = guess + } + o +} + # wrapper around parse() and deparse() -tidy_block = function(text, width = getOption('width'), arrow = FALSE) { +tidy_block = function(text, width = getOption('width'), arrow = FALSE, width.strict = FALSE) { exprs = parse_only(text) if (length(exprs) == 0) return(character(0)) exprs = if (arrow) replace_assignment(exprs) else as.list(exprs) - sapply(exprs, function(e) paste(base::deparse(e, width), collapse = '\n')) + if(width.strict) + sapply(exprs, function(e) paste(strict_deparse(e, max.width=width, width.cutoff=width), collapse = '\n')) + else + sapply(exprs, function(e) paste(base::deparse(e, width), collapse = '\n')) } # Restore the real source code from the masked text diff --git a/man/tidy_source.Rd b/man/tidy_source.Rd index c5a61e2..c274c52 100644 --- a/man/tidy_source.Rd +++ b/man/tidy_source.Rd @@ -4,11 +4,12 @@ \alias{tidy_source} \title{Reformat R code while preserving blank lines and comments} \usage{ -tidy_source(source = "clipboard", comment = getOption("formatR.comment", - TRUE), blank = getOption("formatR.blank", TRUE), arrow = getOption("formatR.arrow", - FALSE), brace.newline = getOption("formatR.brace.newline", FALSE), - indent = getOption("formatR.indent", 4), output = TRUE, text = NULL, - width.cutoff = getOption("width"), ...) +tidy_source(source = "clipboard", comment = getOption("formatR.comment", + TRUE), blank = getOption("formatR.blank", TRUE), + arrow = getOption("formatR.arrow", FALSE), + brace.newline = getOption("formatR.brace.newline", FALSE), + indent = getOption("formatR.indent", 4), output = TRUE, text = NULL, + width.cutoff = getOption("width"), width.strict = FALSE, ...) } \arguments{ \item{source}{a character string: location of the source code (default to be @@ -38,6 +39,12 @@ ignored} determining the cutoff at which line-breaking is tried (default to be \code{getOption("width")})} +\item{width.strict}{if \code{TRUE}, rather than being passed +directly to \code{\link{deparse}}, \code{width.cutoff} is treated +as a hard upper bound on the row width, with the argument to +\code{\link{deparse}} chosen adaptively to achieve this upper +bound, if possible} + \item{...}{other arguments passed to \code{\link{cat}}, e.g. \code{file} (this can be useful for batch-processing R scripts, e.g. \code{tidy_source(source = 'input.R', file = 'output.R')})} @@ -61,14 +68,14 @@ Be sure to read the reference to know other limitations. library(formatR) ## a messy R script -messy = system.file("format", "messy.R", package = "formatR") +messy = system.file('format', 'messy.R', package = 'formatR') tidy_source(messy) ## use the 'text' argument src = readLines(messy) ## source code -cat(src, sep = "\\n") +cat(src, sep = '\\n') ## the formatted version tidy_source(text = src) @@ -106,13 +113,13 @@ tidy_source(x) ## if you've copied R code into the clipboard if (interactive()) { - tidy_source("clipboard") - ## write into clipboard again - tidy_source("clipboard", file = "clipboard") +tidy_source("clipboard") +## write into clipboard again +tidy_source("clipboard", file = "clipboard") } ## the if-else structure -tidy_source(text = c("{if(TRUE)1 else 2; if(FALSE){1+1", "## comments", "} else 2}")) +tidy_source(text=c('{if(TRUE)1 else 2; if(FALSE){1+1',"## comments",'} else 2}')) } \references{ \url{https://yihui.name/formatR} (an introduction to this package, From 58106bb01c5fbafba64f56026d6b669f37de4600 Mon Sep 17 00:00:00 2001 From: "Pavel N. Krivitsky" Date: Fri, 4 Aug 2017 13:07:44 +1000 Subject: [PATCH 2/8] In strict_deparse(), renamed max.width= to width.max= for consistency with other functions. --- R/tidy.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/tidy.R b/R/tidy.R index dbe15d0..27ff70b 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -91,10 +91,10 @@ inline.comment = ' %InLiNe_IdEnTiFiEr%[ ]*"([ ]*#[^"]*)"' blank.comment = sprintf('invisible("%s%s")', begin.comment, end.comment) # wrapper around deparse() that enforces a strict maximum line width -strict_deparse = function(..., max.width, width.cutoff=getOption('width')){ +strict_deparse = function(..., width.max, width.cutoff=getOption('width')){ wcmin = 19L # If deparse() can't manage it with width.cutoff <= 20, issue a warning. wcmax = 500L - # A binary search to find the greatest width.cutoff such that the width of the longest line <= max.width. + # A binary search to find the greatest width.cutoff such that the width of the longest line <= width.max. repeat{ guess = ceiling((wcmin+wcmax)/2) if(guess<20){ @@ -108,7 +108,7 @@ strict_deparse = function(..., max.width, width.cutoff=getOption('width')){ if(wcmax==wcmin) break l = max(nchar(o)) - if(l>max.width) wcmax = guess-1 else wcmin = guess + if(l>width.max) wcmax = guess-1 else wcmin = guess } o } @@ -119,7 +119,7 @@ tidy_block = function(text, width = getOption('width'), arrow = FALSE, width.str if (length(exprs) == 0) return(character(0)) exprs = if (arrow) replace_assignment(exprs) else as.list(exprs) if(width.strict) - sapply(exprs, function(e) paste(strict_deparse(e, max.width=width, width.cutoff=width), collapse = '\n')) + sapply(exprs, function(e) paste(strict_deparse(e, width.max=width, width.cutoff=width), collapse = '\n')) else sapply(exprs, function(e) paste(base::deparse(e, width), collapse = '\n')) } From b3ab028b234619aa14da5eadddf35ae921ba2111 Mon Sep 17 00:00:00 2001 From: "Pavel N. Krivitsky" Date: Fri, 4 Aug 2017 14:15:27 +1000 Subject: [PATCH 3/8] Replaced "%InLiNe_IdEnTiFiEr%" with "%\u1d166%" (Unicode MUSICAL SYMBOL COMBINING SPRECHGESANG STEM character) to save space. --- DESCRIPTION | 3 ++- R/tidy.R | 4 ++-- R/utils.R | 4 ++-- vignettes/formatR.Rmd | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2a81bbd..e276617 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: formatR Type: Package Title: Format R Code Automatically Version: 1.5 -Date: 2017-04-26 +Date: 2017-08-04 Authors@R: c( person("Yihui", "Xie", email = "xie@yihui.name", role = c("aut", "cre")), person("Eugene", "Ha", role = "ctb"), @@ -28,3 +28,4 @@ URL: https://yihui.name/formatR BugReports: https://github.com/yihui/formatR/issues VignetteBuilder: knitr RoxygenNote: 6.0.1 +Encoding: UTF-8 diff --git a/R/tidy.R b/R/tidy.R index 27ff70b..e9db4a5 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -87,7 +87,7 @@ begin.comment = '.BeGiN_TiDy_IdEnTiFiEr_HaHaHa' end.comment = '.HaHaHa_EnD_TiDy_IdEnTiFiEr' pat.comment = sprintf('invisible\\("\\%s|\\%s"\\)', begin.comment, end.comment) mat.comment = sprintf('invisible\\("\\%s([^"]*)\\%s"\\)', begin.comment, end.comment) -inline.comment = ' %InLiNe_IdEnTiFiEr%[ ]*"([ ]*#[^"]*)"' +inline.comment = ' %\u1d166%[ ]*"([ ]*#[^"]*)"' blank.comment = sprintf('invisible("%s%s")', begin.comment, end.comment) # wrapper around deparse() that enforces a strict maximum line width @@ -131,7 +131,7 @@ unmask_source = function(text.mask) { if (!is.null(m)) text.mask = gsub(m, '\n', text.mask) ## if the comments were separated into the next line, then remove '\n' after ## the identifier first to move the comments back to the same line - text.mask = gsub('%InLiNe_IdEnTiFiEr%[ ]*\n', '%InLiNe_IdEnTiFiEr%', text.mask) + text.mask = gsub('%\u1d166%[ ]*\n', '%\u1d166%', text.mask) ## move 'else ...' back to the last line text.mask = gsub('\n\\s*else(\\s+|$)', ' else\\1', text.mask) if (any(grepl('\\\\\\\\', text.mask)) && diff --git a/R/utils.R b/R/utils.R index 54d38a3..960cc1d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -60,7 +60,7 @@ mask_comments = function(x, width, keep.blank.line) { # mask block and inline comments d.text[c1 & !c3] = reflow_comments(d.text[c1 & !c3], width) d.text[c3] = sprintf('invisible("%s%s%s")', begin.comment, d.text[c3], end.comment) - d.text[c2] = sprintf('%%InLiNe_IdEnTiFiEr%% "%s"', d.text[c2]) + d.text[c2] = sprintf('%%\u1d166%% "%s"', d.text[c2]) # add blank lines if (keep.blank.line) for (i in seq_along(d.text)) { @@ -94,7 +94,7 @@ mask_inline = function(x) { p = paste('{\ninvisible("', begin.comment, '\\1', end.comment, '")', sep = '') x[idx] = gsub('\\{\\s*(#.*)$', p, x[idx]) } - gsub('(#[^"]*)$', ' %InLiNe_IdEnTiFiEr% "\\1"', x) + gsub('(#[^"]*)$', ' %\u1d166% "\\1"', x) } # reflow comments (excluding roxygen comments) diff --git a/vignettes/formatR.Rmd b/vignettes/formatR.Rmd index 1555432..60d5f9b 100644 --- a/vignettes/formatR.Rmd +++ b/vignettes/formatR.Rmd @@ -250,10 +250,10 @@ will become Inline comments are first disguised as a weird operation with its preceding R code, which is essentially meaningless but syntactically correct! For example, ```r -1+1 %InLiNe_IdEnTiFiEr% "# comments" +1+1 %𝅦% "# comments" ``` -then `base::parse()` will deal with this expression; again, the disguised comments will not be removed. In the end, inline comments will be freed as well (remove the operator `%InLiNe_IdEnTiFiEr%` and surrounding double quotes). +then `base::parse()` will deal with this expression; again, the disguised comments will not be removed. In the end, inline comments will be freed as well (remove the operator `%𝅦%` and surrounding double quotes). All these special treatments to comments are due to the fact that `base::parse()` and `base::deparse()` can tidy the R code at the price of dropping all the comments. From 779f3e0b95f30ca97a78920de9e1feacf5c5227e Mon Sep 17 00:00:00 2001 From: Yihui Xie Date: Tue, 16 Mar 2021 15:15:22 -0500 Subject: [PATCH 4/8] try the zero-width space instead of the musical symbol, since the latter may not work on Windows --- R/tidy.R | 4 ++-- R/utils.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/tidy.R b/R/tidy.R index 5c5b28e..652693d 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -100,7 +100,7 @@ begin.comment = '.BeGiN_TiDy_IdEnTiFiEr_HaHaHa' end.comment = '.HaHaHa_EnD_TiDy_IdEnTiFiEr' pat.comment = sprintf('invisible\\("\\%s|\\%s"\\)', begin.comment, end.comment) mat.comment = sprintf('invisible\\("\\%s([^"]*)\\%s"\\)', begin.comment, end.comment) -inline.comment = ' %\u1d166%[ ]*"([ ]*#[^"]*)"' +inline.comment = ' %\U200B%[ ]*"([ ]*#[^"]*)"' blank.comment = sprintf('invisible("%s%s")', begin.comment, end.comment) blank.comment2 = sprintf('(\n)\\s+invisible\\("%s%s"\\)(\n|$)', begin.comment, end.comment) @@ -145,7 +145,7 @@ unmask_source = function(text.mask, spaces) { if (!is.null(m)) text.mask = gsub(m, '\n', text.mask) ## if the comments were separated into the next line, then remove '\n' after ## the identifier first to move the comments back to the same line - text.mask = gsub('%\u1d166%[ ]*\n', '%\u1d166%', text.mask) + text.mask = gsub('(%\U200B%)[ ]*\n', '\\1', text.mask) ## move 'else ...' back to the last line text.mask = gsub('\n\\s*else(\\s+|$)', ' else\\1', text.mask) if (any(grepl('\\\\\\\\', text.mask)) && diff --git a/R/utils.R b/R/utils.R index 945abe5..8a39b65 100644 --- a/R/utils.R +++ b/R/utils.R @@ -62,7 +62,7 @@ mask_comments = function(x, width, keep.blank.line, wrap = TRUE, spaces) { # mask block and inline comments d.text[c1 & !c3] = reflow_comments(d.text[c1 & !c3], width) d.text[c3] = sprintf('invisible("%s%s%s")', begin.comment, d.text[c3], end.comment) - d.text[c2] = sprintf('%%\u1d166%% "%s"', d.text[c2]) + d.text[c2] = sprintf('%%\U200B%% "%s"', d.text[c2]) # add blank lines if (keep.blank.line) for (i in seq_along(d.text)) { @@ -100,7 +100,7 @@ mask_inline = function(x) { p = paste('{\ninvisible("', begin.comment, '\\1', end.comment, '")', sep = '') x[idx] = gsub('\\{\\s*(#.*)$', p, x[idx]) } - gsub('(#[^"]*)$', ' %\u1d166% "\\1"', x) + gsub('(#[^"]*)$', ' %\U200B% "\\1"', x) } # reflow comments (excluding roxygen comments) From 3859e32552bc466f398699c65be6a2d8e28da4d9 Mon Sep 17 00:00:00 2001 From: Yihui Xie Date: Tue, 16 Mar 2021 15:25:35 -0500 Subject: [PATCH 5/8] the zero-width space doesn't work on Windows, either; try \b --- R/tidy.R | 4 ++-- R/utils.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/tidy.R b/R/tidy.R index 652693d..86e3c97 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -100,7 +100,7 @@ begin.comment = '.BeGiN_TiDy_IdEnTiFiEr_HaHaHa' end.comment = '.HaHaHa_EnD_TiDy_IdEnTiFiEr' pat.comment = sprintf('invisible\\("\\%s|\\%s"\\)', begin.comment, end.comment) mat.comment = sprintf('invisible\\("\\%s([^"]*)\\%s"\\)', begin.comment, end.comment) -inline.comment = ' %\U200B%[ ]*"([ ]*#[^"]*)"' +inline.comment = ' %\b%[ ]*"([ ]*#[^"]*)"' blank.comment = sprintf('invisible("%s%s")', begin.comment, end.comment) blank.comment2 = sprintf('(\n)\\s+invisible\\("%s%s"\\)(\n|$)', begin.comment, end.comment) @@ -145,7 +145,7 @@ unmask_source = function(text.mask, spaces) { if (!is.null(m)) text.mask = gsub(m, '\n', text.mask) ## if the comments were separated into the next line, then remove '\n' after ## the identifier first to move the comments back to the same line - text.mask = gsub('(%\U200B%)[ ]*\n', '\\1', text.mask) + text.mask = gsub('(%\b%)[ ]*\n', '\\1', text.mask) ## move 'else ...' back to the last line text.mask = gsub('\n\\s*else(\\s+|$)', ' else\\1', text.mask) if (any(grepl('\\\\\\\\', text.mask)) && diff --git a/R/utils.R b/R/utils.R index 8a39b65..a4d8edf 100644 --- a/R/utils.R +++ b/R/utils.R @@ -62,7 +62,7 @@ mask_comments = function(x, width, keep.blank.line, wrap = TRUE, spaces) { # mask block and inline comments d.text[c1 & !c3] = reflow_comments(d.text[c1 & !c3], width) d.text[c3] = sprintf('invisible("%s%s%s")', begin.comment, d.text[c3], end.comment) - d.text[c2] = sprintf('%%\U200B%% "%s"', d.text[c2]) + d.text[c2] = sprintf('%%\b%% "%s"', d.text[c2]) # add blank lines if (keep.blank.line) for (i in seq_along(d.text)) { @@ -100,7 +100,7 @@ mask_inline = function(x) { p = paste('{\ninvisible("', begin.comment, '\\1', end.comment, '")', sep = '') x[idx] = gsub('\\{\\s*(#.*)$', p, x[idx]) } - gsub('(#[^"]*)$', ' %\U200B% "\\1"', x) + gsub('(#[^"]*)$', ' %\b% "\\1"', x) } # reflow comments (excluding roxygen comments) From c5b4a407acd2ed374c8e9e8198bb45d707b1f064 Mon Sep 17 00:00:00 2001 From: Yihui Xie Date: Wed, 17 Mar 2021 16:15:30 -0500 Subject: [PATCH 6/8] treat I(width) as the maximum width, and use brute-force if the binary search for the optimal width fails --- NEWS | 7 ++++ R/tidy.R | 98 +++++++++++++++++++++++++++---------------- man/tidy_source.Rd | 29 +++++++------ vignettes/formatR.Rmd | 4 +- 4 files changed, 89 insertions(+), 49 deletions(-) diff --git a/NEWS b/NEWS index 2d4ae52..048f74e 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,13 @@ NEW FEATURES o Lines will be wrapped after operators `%>%`, `%T%`, `%$%`, and `%<>%` now (thanks, @g4challenge #54, @jzelner #62, @edlee123 #68). + o The argument `width.cutoff` of `tidy_source()` used to be the lower bound of + line widths. Now if you pass a number wrapped in I(), it will be treated as + the uppper bound, e.g., `tidy_source(width.cutoff = I(60))`. However, please + note that the upper bound cannot always be respected, e.g., when the code + contains an extremely long string, there is no way to break it into shorter + lines automatically (thanks, @krivit @pablo14, #71). + BUG FIXES o When the text in the clipboard on macOS does not have a final EOL, diff --git a/R/tidy.R b/R/tidy.R index 86e3c97..9d73d40 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -5,6 +5,15 @@ #' \code{\link{deparse}}. It can also replace \code{=} with \code{<-} where #' \code{=} means assignments, and reindent code by a specified number of spaces #' (default is 4). +#' +#' If the value of the argument \code{width.cutoff} is wrapped in +#' \code{\link{I}()} (e.g., \code{I(60)}), it will be treated as the \emph{upper +#' bound} on the line width, but this upper bound may not be satisfied. In this +#' case, the function will perform a binary search for a width value that can +#' make \code{deparse()} return code with line width smaller than or equal to +#' the \code{width.cutoff} value. If the search fails to find such a value, it +#' will emit a warning, which can be suppressed by the global option +#' \code{options(formatR.width.warning = FALSE)}. #' @param source a character string: location of the source code (default to be #' the clipboard; this means we can copy the code to clipboard and use #' \code{tidy_source()} without specifying the argument \code{source}) @@ -16,20 +25,17 @@ #' @param indent number of spaces to indent the code (default 4) #' @param wrap whether to wrap comments to the linewidth determined by #' \code{width.cutoff} (note that roxygen comments will never be wrapped) +#' @param width.cutoff Passed to \code{\link{deparse}()}: an integer in +#' \code{[20, 500]} determining the cutoff at which line-breaking is tried +#' (default to be \code{getOption("width")}). In other words, this is the +#' \emph{lower bound} of the line width. See \sQuote{Details} if an upper +#' bound is desired instead. #' @param output output to the console or a file using \code{\link{cat}}? #' @param text an alternative way to specify the input: if it is \code{NULL}, #' the function will read the source code from the \code{source} argument; #' alternatively, if \code{text} is a character vector containing the source #' code, it will be used as the input and the \code{source} argument will be #' ignored -#' @param width.cutoff passed to \code{\link{deparse}}: integer in [20, 500] -#' determining the cutoff at which line-breaking is tried (default to be -#' \code{getOption("width")}) -#' @param width.strict if \code{TRUE}, rather than being passed -#' directly to \code{\link{deparse}}, \code{width.cutoff} is treated -#' as a hard upper bound on the row width, with the argument to -#' \code{\link{deparse}} chosen adaptively to achieve this upper -#' bound, if possible #' @param ... other arguments passed to \code{\link{cat}}, e.g. \code{file} #' (this can be useful for batch-processing R scripts, e.g. #' \code{tidy_source(source = 'input.R', file = 'output.R')}) @@ -52,9 +58,8 @@ tidy_source = function( brace.newline = getOption('formatR.brace.newline', FALSE), indent = getOption('formatR.indent', 4), wrap = getOption('formatR.wrap', TRUE), - output = TRUE, text = NULL, - width.cutoff = getOption('width'), - width.strict = FALSE, ... + width.cutoff = getOption('formatR.width', getOption('width')), + output = TRUE, text = NULL, ... ) { if (is.null(text)) { if (source == 'clipboard' && Sys.info()['sysname'] == 'Darwin') { @@ -78,11 +83,13 @@ tidy_source = function( n2 = attr(regexpr('\n*$', one), 'match.length') } on.exit(.env$line_break <- NULL, add = TRUE) + if (width.cutoff > 500) width.cutoff[1] = 500 + if (width.cutoff < 20) width.cutoff[1] = 20 # insert enough spaces into infix operators such as %>% so the lines can be # broken after the operators spaces = paste(rep(' ', max(10, width.cutoff)), collapse = '') if (comment) text = mask_comments(text, width.cutoff, blank, wrap, spaces) - text.mask = tidy_block(text, width.cutoff, arrow && length(grep('=', text)), width.strict) + text.mask = tidy_block(text, width.cutoff, arrow && length(grep('=', text))) text.tidy = if (comment) unmask_source(text.mask, spaces) else text.mask text.tidy = reindent_lines(text.tidy, indent) if (brace.newline) text.tidy = move_leftbrace(text.tidy) @@ -104,38 +111,59 @@ inline.comment = ' %\b%[ ]*"([ ]*#[^"]*)"' blank.comment = sprintf('invisible("%s%s")', begin.comment, end.comment) blank.comment2 = sprintf('(\n)\\s+invisible\\("%s%s"\\)(\n|$)', begin.comment, end.comment) -# wrapper around deparse() that enforces a strict maximum line width -strict_deparse = function(..., width.max, width.cutoff=getOption('width')){ - wcmin = 19L # If deparse() can't manage it with width.cutoff <= 20, issue a warning. - wcmax = 500L - # A binary search to find the greatest width.cutoff such that the width of the longest line <= width.max. - repeat{ - guess = ceiling((wcmin+wcmax)/2) - if(guess<20){ - # If it's induced by a comment, don't complain. - if(!length(grep(pat.comment,deparse(..., width.cutoff=500L)))) - warning("Unable to find a suitable adaptive cut-off. Falling back to width.cutoff.") - return(trimws(deparse(..., width.cutoff=width.cutoff), "right")) - } - o = trimws(deparse(..., width.cutoff=guess), "right") +# first, perform a (semi-)binary search to find the greatest cutoff width such +# that the width of the longest line <= `width`; if the search fails, use +# brute-force to try all possible widths +deparse2 = function(expr, width, warn = getOption('formatR.width.warning', TRUE)) { + wmin = 20 # if deparse() can't manage it with width.cutoff <= 20, issue a warning + wmax = min(500, width + 10) # +10 because a larger width may result in smaller actual width - if(wcmax==wcmin) break + r = seq(wmin, wmax) + k = setNames(rep(NA, length(r)), as.character(r)) # results of width checks + d = p = list() # deparsed results and lines exceeding desired width - l = max(nchar(o)) - if(l>width.max) wcmax = guess-1 else wcmin = guess + check_width = function(w) { + i = as.character(w) + if (!is.na(x <- k[i])) return(x) + x = deparse(expr, w) + x = gsub('\\s+$', '', x) + d[[i]] <<- x + x2 = grep(pat.comment, x, invert = TRUE, value = TRUE) # don't check comments + p[[i]] <<- x2[nchar(x2, type = 'width') > width] + k[i] <<- length(p[[i]]) == 0 } - o + + # if the desired width happens to just work, return the result + if (check_width(w <- width)) return(d[[as.character(w)]]) + + repeat { + if (!any(is.na(k))) break # has tried all possibilities + if (wmin >= wmax) break + w = ceiling((wmin + wmax)/2) + if (check_width(w)) wmin = w else wmax = wmax - 2 + } + + # try all the rest of widths if no suitable width has been found + if (!any(k, na.rm = TRUE)) for (i in r[is.na(k)]) check_width(i) + r = r[which(k)] + if ((n <- length(r)) > 0) return(d[[as.character(r[n])]]) + + i = as.character(width) + if (warn) warning( + 'Unable to find a suitable cut-off to make the line widths smaller than ', + width, ' for the line(s) of code:\n', paste0(' ', p[[i]], collapse = '\n'), + call. = FALSE + ) + d[[i]] } # wrapper around parse() and deparse() -tidy_block = function(text, width = getOption('width'), arrow = FALSE, width.strict = FALSE) { +tidy_block = function(text, width = getOption('width'), arrow = FALSE) { exprs = parse_only(text) if (length(exprs) == 0) return(character(0)) exprs = if (arrow) replace_assignment(exprs) else as.list(exprs) - if(width.strict) - sapply(exprs, function(e) paste(strict_deparse(e, width.max=width, width.cutoff=width), collapse = '\n')) - else - sapply(exprs, function(e) paste(base::deparse(e, width), collapse = '\n')) + deparse = if (inherits(width, 'AsIs')) deparse2 else base::deparse + sapply(exprs, function(e) paste(deparse(e, width), collapse = '\n')) } # Restore the real source code from the masked text diff --git a/man/tidy_source.Rd b/man/tidy_source.Rd index 02c79a2..e1a0e64 100644 --- a/man/tidy_source.Rd +++ b/man/tidy_source.Rd @@ -12,10 +12,9 @@ tidy_source( brace.newline = getOption("formatR.brace.newline", FALSE), indent = getOption("formatR.indent", 4), wrap = getOption("formatR.wrap", TRUE), + width.cutoff = getOption("formatR.width", getOption("width")), output = TRUE, text = NULL, - width.cutoff = getOption("width"), - width.strict = FALSE, ... ) } @@ -38,6 +37,12 @@ the clipboard; this means we can copy the code to clipboard and use \item{wrap}{whether to wrap comments to the linewidth determined by \code{width.cutoff} (note that roxygen comments will never be wrapped)} +\item{width.cutoff}{Passed to \code{\link{deparse}()}: an integer in +\code{[20, 500]} determining the cutoff at which line-breaking is tried +(default to be \code{getOption("width")}). In other words, this is the +\emph{lower bound} of the line width. See \sQuote{Details} if an upper +bound is desired instead.} + \item{output}{output to the console or a file using \code{\link{cat}}?} \item{text}{an alternative way to specify the input: if it is \code{NULL}, @@ -46,16 +51,6 @@ alternatively, if \code{text} is a character vector containing the source code, it will be used as the input and the \code{source} argument will be ignored} -\item{width.cutoff}{passed to \code{\link{deparse}}: integer in [20, 500] -determining the cutoff at which line-breaking is tried (default to be -\code{getOption("width")})} - -\item{width.strict}{if \code{TRUE}, rather than being passed -directly to \code{\link{deparse}}, \code{width.cutoff} is treated -as a hard upper bound on the row width, with the argument to -\code{\link{deparse}} chosen adaptively to achieve this upper -bound, if possible} - \item{...}{other arguments passed to \code{\link{cat}}, e.g. \code{file} (this can be useful for batch-processing R scripts, e.g. \code{tidy_source(source = 'input.R', file = 'output.R')})} @@ -72,6 +67,16 @@ lines and comments, which is different with \code{\link{parse}} and \code{=} means assignments, and reindent code by a specified number of spaces (default is 4). } +\details{ +If the value of the argument \code{width.cutoff} is wrapped in +\code{\link{I}()} (e.g., \code{I(60)}), it will be treated as the \emph{upper +bound} on the line width, but this upper bound may not be satisfied. In this +case, the function will perform a binary search for a width value that can +make \code{deparse()} return code with line width smaller than or equal to +the \code{width.cutoff} value. If the search fails to find such a value, it +will emit a warning, which can be suppressed by the global option +\code{options(formatR.width.warning = FALSE)}. +} \note{ Be sure to read the reference to know other limitations. } diff --git a/vignettes/formatR.Rmd b/vignettes/formatR.Rmd index 54ce262..37c0ff1 100644 --- a/vignettes/formatR.Rmd +++ b/vignettes/formatR.Rmd @@ -273,10 +273,10 @@ will become Inline comments are first disguised as a weird operation with its preceding R code, which is essentially meaningless but syntactically correct! For example, ```r -1+1 %𝅦% "# comments" +1+1 %\b% "# comments" ``` -then `base::parse()` will deal with this expression; again, the disguised comments will not be removed. In the end, inline comments will be freed as well (remove the operator `%𝅦%` and surrounding double quotes). +then `base::parse()` will deal with this expression; again, the disguised comments will not be removed. In the end, inline comments will be freed as well (remove the operator `%\b%` and surrounding double quotes). All these special treatments to comments are due to the fact that `base::parse()` and `base::deparse()` can tidy the R code at the price of dropping all the comments. From 2cbd377936c2608dfcc059acedb4184907e2064d Mon Sep 17 00:00:00 2001 From: Yihui Xie Date: Wed, 17 Mar 2021 16:34:25 -0500 Subject: [PATCH 7/8] mention the global option formatR.width in NEWS and vignette --- NEWS | 4 ++++ vignettes/formatR.Rmd | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 048f74e..7ae698f 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,10 @@ NEW FEATURES contains an extremely long string, there is no way to break it into shorter lines automatically (thanks, @krivit @pablo14, #71). + o The value of the argument `width.cutoff` can be specified in the global + option `formatR.width` now. By default, the value is still taken from the + global option `width` like before. + BUG FIXES o When the text in the clipboard on macOS does not have a final EOL, diff --git a/vignettes/formatR.Rmd b/vignettes/formatR.Rmd index 37c0ff1..49d9139 100644 --- a/vignettes/formatR.Rmd +++ b/vignettes/formatR.Rmd @@ -290,6 +290,8 @@ There are global options which can override some arguments in `tidy_source()`: | `blank` | `options('formatR.blank')` | `TRUE` | | `arrow` | `options('formatR.arrow')` | `FALSE` | | `indent` | `options('formatR.indent')` | `4` | +| `wrap` | `options('formatR.wrap')` | `TRUE` | +| `width.cutoff` | `options('formatR.width')` | `options('width')` | | `brace.newline` | `options('formatR.brace.newline')` | `FALSE` | -Also note that single lines of long comments will be wrapped into shorter ones automatically, but roxygen comments will not be wrapped (i.e., comments that begin with `#'`). +Also note that single lines of long comments will be wrapped into shorter ones automatically when `wrap = TRUE`, but roxygen comments will not be wrapped (i.e., comments that begin with `#'`). From a06d43112fab921246e8de7f50a8615a330b1ae1 Mon Sep 17 00:00:00 2001 From: Yihui Xie Date: Wed, 17 Mar 2021 16:46:04 -0500 Subject: [PATCH 8/8] add @krivit to the list of ctb --- DESCRIPTION | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index f639e8d..39e94f5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,9 +4,11 @@ Title: Format R Code Automatically Version: 1.8.2 Authors@R: c( person("Yihui", "Xie", role = c("aut", "cre"), email = "xie@yihui.name", comment = c(ORCID = "0000-0003-0645-5666")), + person("Ed", "Lee", role = "ctb"), person("Eugene", "Ha", role = "ctb"), person("Kohske", "Takahashi", role = "ctb"), - person("Ed", "Lee", role = "ctb") + person("Pavel", "Krivitsky", role = "ctb"), + person() ) Description: Provides a function tidy_source() to format R source code. Spaces and indent will be added to the code automatically, and comments will be