From 0979bf66d5433f108a902698af5df3698597d471 Mon Sep 17 00:00:00 2001 From: Elin Waring Date: Sat, 31 Dec 2022 23:32:39 -0500 Subject: [PATCH 1/2] documentation --- man/deprecated-v1.Rd | 8 ++++---- man/get_default_skimmers.Rd | 10 +++++----- man/get_skimmers.Rd | 30 ++++++++++++++-------------- man/knit_print.Rd | 10 +++++----- man/partition.Rd | 6 +++--- man/print.Rd | 8 ++++---- man/skim-attr.Rd | 16 +++++++-------- man/skim-obj.Rd | 28 +++++++++++++------------- man/stats.Rd | 40 ++++++++++++++++++------------------- 9 files changed, 78 insertions(+), 78 deletions(-) diff --git a/man/deprecated-v1.Rd b/man/deprecated-v1.Rd index 7395e823..158b77fe 100644 --- a/man/deprecated-v1.Rd +++ b/man/deprecated-v1.Rd @@ -31,10 +31,10 @@ data frame. Others have been replaced by functions that do a single thing. } \section{Functions}{ \itemize{ -\item \code{skim_to_wide}: \code{\link[=skim]{skim()}} always produces a wide data frame. +\item \code{skim_to_wide()}: \code{\link[=skim]{skim()}} always produces a wide data frame. -\item \code{skim_to_list}: \code{\link[=partition]{partition()}} creates a list. +\item \code{skim_to_list()}: \code{\link[=partition]{partition()}} creates a list. -\item \code{skim_format}: \code{\link[=print]{print()}} and \code{\link[=skim_with]{skim_with()}} set options. -}} +\item \code{skim_format()}: \code{\link[=print]{print()}} and \code{\link[=skim_with]{skim_with()}} set options. +}} diff --git a/man/get_default_skimmers.Rd b/man/get_default_skimmers.Rd index 45a31e00..aee18371 100644 --- a/man/get_default_skimmers.Rd +++ b/man/get_default_skimmers.Rd @@ -43,15 +43,15 @@ a list of functions and a \code{skim_type}. } \section{Functions}{ \itemize{ -\item \code{get_one_default_skimmer}: Get the functions associated with one +\item \code{get_one_default_skimmer()}: Get the functions associated with one \code{skim_type}. -\item \code{get_default_skimmer_names}: Get the names of the functions used in one +\item \code{get_default_skimmer_names()}: Get the names of the functions used in one or more \code{skim_type}'s. -\item \code{get_one_default_skimmer_names}: Get the names of the functions used in one +\item \code{get_one_default_skimmer_names()}: Get the names of the functions used in one \code{skim_type}. -\item \code{get_sfl}: Get the \code{sfl} for a \code{skim_type}. -}} +\item \code{get_sfl()}: Get the \code{sfl} for a \code{skim_type}. +}} diff --git a/man/get_skimmers.Rd b/man/get_skimmers.Rd index 9e6d7a75..dd8a6b29 100644 --- a/man/get_skimmers.Rd +++ b/man/get_skimmers.Rd @@ -84,51 +84,51 @@ for creating new default \code{sfl}'s. } \section{Methods (by class)}{ \itemize{ -\item \code{default}: The default method for skimming data. Only used when +\item \code{get_skimmers(default)}: The default method for skimming data. Only used when a column's data type doesn't match currently installed types. Call \link{get_default_skimmer_names} to see these defaults. -\item \code{numeric}: Summary functions for numeric columns, covering both +\item \code{get_skimmers(numeric)}: Summary functions for numeric columns, covering both \code{\link[=double]{double()}} and \code{\link[=integer]{integer()}} classes: \code{\link[=mean]{mean()}}, \code{\link[=sd]{sd()}}, \code{\link[=quantile]{quantile()}} and \code{\link[=inline_hist]{inline_hist()}}. -\item \code{factor}: Summary functions for factor columns: +\item \code{get_skimmers(factor)}: Summary functions for factor columns: \code{\link[=is.ordered]{is.ordered()}}, \code{\link[=n_unique]{n_unique()}} and \code{\link[=top_counts]{top_counts()}}. -\item \code{character}: Summary functions for character columns. Also, the +\item \code{get_skimmers(character)}: Summary functions for character columns. Also, the default for unknown columns: \code{\link[=min_char]{min_char()}}, \code{\link[=max_char]{max_char()}}, \code{\link[=n_empty]{n_empty()}}, \code{\link[=n_unique]{n_unique()}} and \code{\link[=n_whitespace]{n_whitespace()}}. -\item \code{logical}: Summary functions for logical/ boolean columns: +\item \code{get_skimmers(logical)}: Summary functions for logical/ boolean columns: \code{\link[=mean]{mean()}}, which produces rates for each value, and \code{\link[=top_counts]{top_counts()}}. -\item \code{complex}: Summary functions for complex columns: \code{\link[=mean]{mean()}}. +\item \code{get_skimmers(complex)}: Summary functions for complex columns: \code{\link[=mean]{mean()}}. -\item \code{Date}: Summary functions for \code{Date} columns: \code{\link[=min]{min()}}, +\item \code{get_skimmers(Date)}: Summary functions for \code{Date} columns: \code{\link[=min]{min()}}, \code{\link[=max]{max()}}, \code{\link[=median]{median()}} and \code{\link[=n_unique]{n_unique()}}. -\item \code{POSIXct}: Summary functions for \code{POSIXct} columns: \code{\link[=min]{min()}}, +\item \code{get_skimmers(POSIXct)}: Summary functions for \code{POSIXct} columns: \code{\link[=min]{min()}}, \code{\link[=max]{max()}}, \code{\link[=median]{median()}} and \code{\link[=n_unique]{n_unique()}}. -\item \code{difftime}: Summary functions for \code{difftime} columns: \code{\link[=min]{min()}}, +\item \code{get_skimmers(difftime)}: Summary functions for \code{difftime} columns: \code{\link[=min]{min()}}, \code{\link[=max]{max()}}, \code{\link[=median]{median()}} and \code{\link[=n_unique]{n_unique()}}. -\item \code{Timespan}: Summary functions for \code{Timespan} columns: \code{\link[=min]{min()}}, +\item \code{get_skimmers(Timespan)}: Summary functions for \code{Timespan} columns: \code{\link[=min]{min()}}, \code{\link[=max]{max()}}, \code{\link[=median]{median()}} and \code{\link[=n_unique]{n_unique()}}. -\item \code{ts}: Summary functions for \code{ts} columns: \code{\link[=min]{min()}}, +\item \code{get_skimmers(ts)}: Summary functions for \code{ts} columns: \code{\link[=min]{min()}}, \code{\link[=max]{max()}}, \code{\link[=median]{median()}} and \code{\link[=n_unique]{n_unique()}}. -\item \code{list}: Summary functions for \code{list} columns: \code{\link[=n_unique]{n_unique()}}, +\item \code{get_skimmers(list)}: Summary functions for \code{list} columns: \code{\link[=n_unique]{n_unique()}}, \code{\link[=list_min_length]{list_min_length()}} and \code{\link[=list_max_length]{list_max_length()}}. -\item \code{AsIs}: Summary functions for \code{AsIs} columns: \code{\link[=n_unique]{n_unique()}}, +\item \code{get_skimmers(AsIs)}: Summary functions for \code{AsIs} columns: \code{\link[=n_unique]{n_unique()}}, \code{\link[=list_min_length]{list_min_length()}} and \code{\link[=list_max_length]{list_max_length()}}. -\item \code{haven_labelled}: Summary functions for \code{haven_labelled} columns. +\item \code{get_skimmers(haven_labelled)}: Summary functions for \code{haven_labelled} columns. Finds the appropriate skimmers for the underlying data in the vector. -}} +}} \examples{ # Defining default skimming functions for a new class, `my_class`. # Note that the class argument is required for dynamic reassignment. diff --git a/man/knit_print.Rd b/man/knit_print.Rd index 04cfb3c0..72fa0095 100644 --- a/man/knit_print.Rd +++ b/man/knit_print.Rd @@ -47,15 +47,15 @@ Windows users most commonly. Call \code{vignette("Using_fonts")} for more detail } \section{Methods (by class)}{ \itemize{ -\item \code{skim_df}: Default \code{knitr} print for \code{skim_df} objects. +\item \code{knit_print(skim_df)}: Default \code{knitr} print for \code{skim_df} objects. -\item \code{skim_list}: Default \code{knitr} print for a \code{skim_list}. +\item \code{knit_print(skim_list)}: Default \code{knitr} print for a \code{skim_list}. -\item \code{one_skim_df}: Default \code{knitr} print within a partitioned \code{skim_df}. +\item \code{knit_print(one_skim_df)}: Default \code{knitr} print within a partitioned \code{skim_df}. -\item \code{summary_skim_df}: Default \code{knitr} print for \code{skim_df} summaries. -}} +\item \code{knit_print(summary_skim_df)}: Default \code{knitr} print for \code{skim_df} summaries. +}} \seealso{ \code{\link[knitr:kable]{knitr::kable()}} } diff --git a/man/partition.Rd b/man/partition.Rd index 7e1a5fba..66cbc32a 100644 --- a/man/partition.Rd +++ b/man/partition.Rd @@ -36,13 +36,13 @@ in the list is a data type from the original \code{skim_df}. The inverse of } \section{Functions}{ \itemize{ -\item \code{bind}: The inverse of a \code{partition()}. Rebuild the original +\item \code{bind()}: The inverse of a \code{partition()}. Rebuild the original \code{skim_df}. -\item \code{yank}: Extract a subtable from a \code{skim_df} with a particular +\item \code{yank()}: Extract a subtable from a \code{skim_df} with a particular type. -}} +}} \examples{ # Create a wide skimmed data frame (a skim_df) skimmed <- skim(iris) diff --git a/man/print.Rd b/man/print.Rd index d5ea755c..f89681db 100644 --- a/man/print.Rd +++ b/man/print.Rd @@ -45,13 +45,13 @@ methods for \code{knitr}/ \code{rmarkdown} documents is also provided. } \section{Methods (by class)}{ \itemize{ -\item \code{skim_df}: Print a skimmed data frame (\code{skim_df} from \code{\link[=skim]{skim()}}). +\item \code{print(skim_df)}: Print a skimmed data frame (\code{skim_df} from \code{\link[=skim]{skim()}}). -\item \code{skim_list}: Print a \code{skim_list}, a list of \code{skim_df} objects. +\item \code{print(skim_list)}: Print a \code{skim_list}, a list of \code{skim_df} objects. -\item \code{summary_skim_df}: Print method for a \code{summary_skim_df} object. -}} +\item \code{print(summary_skim_df)}: Print method for a \code{summary_skim_df} object. +}} \section{Printing options}{ diff --git a/man/skim-attr.Rd b/man/skim-attr.Rd index a3367ee8..7106d993 100644 --- a/man/skim-attr.Rd +++ b/man/skim-attr.Rd @@ -39,23 +39,23 @@ over calling base R's attribute functions. } \section{Functions}{ \itemize{ -\item \code{data_rows}: Get the number of rows in the skimmed data frame. +\item \code{data_rows()}: Get the number of rows in the skimmed data frame. -\item \code{data_cols}: Get the number of columns in the skimmed data frame. +\item \code{data_cols()}: Get the number of columns in the skimmed data frame. -\item \code{df_name}: Get the name of the skimmed data frame. This is only +\item \code{df_name()}: Get the name of the skimmed data frame. This is only available in contexts where the name can be looked up. This is often not the case within a pipeline. -\item \code{dt_key}: Get the key of the skimmed data.table. This is only +\item \code{dt_key()}: Get the key of the skimmed data.table. This is only available in contexts where \code{data} is of class \code{data.table}. -\item \code{group_names}: Get the names of the groups in the original data frame. +\item \code{group_names()}: Get the names of the groups in the original data frame. Only available if the data was grouped. Otherwise, \code{NULL}. -\item \code{base_skimmers}: Get the names of the base skimming functions used. +\item \code{base_skimmers()}: Get the names of the base skimming functions used. -\item \code{skimmers_used}: Get the names of the skimming functions used, separated +\item \code{skimmers_used()}: Get the names of the skimming functions used, separated by data type. -}} +}} diff --git a/man/skim-obj.Rd b/man/skim-obj.Rd index 960af910..9c192de9 100644 --- a/man/skim-obj.Rd +++ b/man/skim-obj.Rd @@ -71,34 +71,34 @@ more useful when throwing errors. } \section{Functions}{ \itemize{ -\item \code{has_type_column}: Does the object have the \code{skim_type} column? +\item \code{has_type_column()}: Does the object have the \code{skim_type} column? -\item \code{has_variable_column}: Does the object have the \code{skim_variable} column? +\item \code{has_variable_column()}: Does the object have the \code{skim_variable} column? -\item \code{has_skimr_attributes}: Does the object have the appropriate \code{skimr} attributes? +\item \code{has_skimr_attributes()}: Does the object have the appropriate \code{skimr} attributes? -\item \code{has_skim_type_attribute}: Does the object have a \code{skim_type} attribute? This makes +\item \code{has_skim_type_attribute()}: Does the object have a \code{skim_type} attribute? This makes it a \code{one_skim_df}. -\item \code{has_skimmers}: Does the object have skimmers? +\item \code{has_skimmers()}: Does the object have skimmers? -\item \code{is_data_frame}: Is the object a data frame? +\item \code{is_data_frame()}: Is the object a data frame? -\item \code{is_skim_df}: Is the object a \code{skim_df}? +\item \code{is_skim_df()}: Is the object a \code{skim_df}? -\item \code{is_one_skim_df}: Is the object a \code{one_skim_df}? This is similar to a +\item \code{is_one_skim_df()}: Is the object a \code{one_skim_df}? This is similar to a \code{skim_df}, but does not have the \code{type} column. That is stored as an attribute instead. -\item \code{is_skim_list}: Is the object a \code{skim_list}? +\item \code{is_skim_list()}: Is the object a \code{skim_list}? -\item \code{could_be_skim_df}: Is this a data frame with \code{skim_variable} and +\item \code{could_be_skim_df()}: Is this a data frame with \code{skim_variable} and \code{skim_type} columns? -\item \code{assert_is_skim_df}: Stop if the object is not a \code{skim_df}. +\item \code{assert_is_skim_df()}: Stop if the object is not a \code{skim_df}. -\item \code{assert_is_skim_list}: Stop if the object is not a \code{skim_list}. +\item \code{assert_is_skim_list()}: Stop if the object is not a \code{skim_list}. -\item \code{assert_is_one_skim_df}: Stop if the object is not a \code{one_skim_df}. -}} +\item \code{assert_is_one_skim_df()}: Stop if the object is not a \code{one_skim_df}. +}} diff --git a/man/stats.Rd b/man/stats.Rd index 378aec5f..56369dd1 100644 --- a/man/stats.Rd +++ b/man/stats.Rd @@ -83,59 +83,59 @@ a given data type. } \section{Functions}{ \itemize{ -\item \code{n_missing}: Calculate the sum of \code{NA} and \code{NULL} (i.e. missing) values. +\item \code{n_missing()}: Calculate the sum of \code{NA} and \code{NULL} (i.e. missing) values. -\item \code{n_complete}: Calculate the sum of not \code{NA} and \code{NULL} (i.e. missing) +\item \code{n_complete()}: Calculate the sum of not \code{NA} and \code{NULL} (i.e. missing) values. -\item \code{complete_rate}: Calculate complete values; complete values are not missing. +\item \code{complete_rate()}: Calculate complete values; complete values are not missing. -\item \code{n_whitespace}: Calculate the number of rows containing only whitespace +\item \code{n_whitespace()}: Calculate the number of rows containing only whitespace values using s+ regex. -\item \code{sorted_count}: Create a contingency table and arrange its levels in +\item \code{sorted_count()}: Create a contingency table and arrange its levels in descending order. In case of ties, the ordering of results is alphabetical and depends upon the locale. \code{NA} is treated as a ordinary value for sorting. -\item \code{top_counts}: Compute and collapse a contingency table into a single +\item \code{top_counts()}: Compute and collapse a contingency table into a single character scalar. Wraps \code{\link[=sorted_count]{sorted_count()}}. -\item \code{inline_hist}: Generate inline histogram for numeric variables. The +\item \code{inline_hist()}: Generate inline histogram for numeric variables. The character length of the histogram is controlled by the formatting options for character vectors. -\item \code{n_empty}: Calculate the number of blank values in a character vector. +\item \code{n_empty()}: Calculate the number of blank values in a character vector. A "blank" is equal to "". -\item \code{min_char}: Calculate the minimum number of characters within a +\item \code{min_char()}: Calculate the minimum number of characters within a character vector. -\item \code{max_char}: Calculate the maximum number of characters within a +\item \code{max_char()}: Calculate the maximum number of characters within a character vector. -\item \code{n_unique}: Calculate the number of unique elements but remove \code{NA}. +\item \code{n_unique()}: Calculate the number of unique elements but remove \code{NA}. -\item \code{ts_start}: Get the start for a time series without the frequency. +\item \code{ts_start()}: Get the start for a time series without the frequency. -\item \code{ts_end}: Get the finish for a time series without the frequency. +\item \code{ts_end()}: Get the finish for a time series without the frequency. -\item \code{inline_linegraph}: Generate inline line graph for time series variables. The +\item \code{inline_linegraph()}: Generate inline line graph for time series variables. The character length of the line graph is controlled by the formatting options for character vectors. Based on the function in the pillar package. -\item \code{list_lengths_min}: Get the length of the shortest list in a vector of lists. +\item \code{list_lengths_min()}: Get the length of the shortest list in a vector of lists. -\item \code{list_lengths_median}: Get the median length of the lists. +\item \code{list_lengths_median()}: Get the median length of the lists. -\item \code{list_lengths_max}: Get the maximum length of the lists. +\item \code{list_lengths_max()}: Get the maximum length of the lists. -\item \code{list_min_length}: Get the length of the shortest list in a vector of lists. +\item \code{list_min_length()}: Get the length of the shortest list in a vector of lists. -\item \code{list_max_length}: Get the length of the longest list in a vector of lists. -}} +\item \code{list_max_length()}: Get the length of the longest list in a vector of lists. +}} \seealso{ \code{\link[=get_skimmers]{get_skimmers()}} for customizing the functions called by \code{\link[=skim]{skim()}}. } From 0710d6a0eafe6326e4af6c914cd52e2f3a2820eb Mon Sep 17 00:00:00 2001 From: Elin Waring Date: Sun, 1 Jan 2023 16:09:52 -0500 Subject: [PATCH 2/2] Update references to Windows handling of UTF-8 characters. --- NEWS.md | 4 ++++ R/utils.R | 4 ++-- README.Rmd | 6 ++++-- README.md | 11 ++++++----- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/NEWS.md b/NEWS.md index b9cdb93e..2ab2e470 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# skimr 2.2.0 + +* Update handling of spark graphs for Windows when UTF-8 is supported. + # skimr 2.1.5 * Updated to work with newer version of purrr diff --git a/R/utils.R b/R/utils.R index e2fa74d4..f7733ba7 100644 --- a/R/utils.R +++ b/R/utils.R @@ -25,12 +25,12 @@ is_windows <- function() { #' Fix unicode histograms on Windows #' #' This functions changes your session's locale to address issues with printing -#' histograms on Windows. +#' histograms on Windows on versions of R below 4.2.1. #' #' There are known issues with printing the spark-histogram characters when #' printing a data frame, appearing like this: "". #' This longstanding problem originates in the low-level code for printing -#' dataframes. +#' dataframes. This was addressed in R version 4.2.1. #' #' @seealso [skim_without_charts()] #' @export diff --git a/README.Rmd b/README.Rmd index e6ae50e1..06240715 100644 --- a/README.Rmd +++ b/README.Rmd @@ -214,7 +214,8 @@ line charts in various contexts, some of which are described below. ### Support for spark histograms -There are known issues with printing the spark-histogram characters when +With versions of R before 4.2.1, there are known issues with +printing the spark-histogram characters when printing a data frame. For example, `"▂▅▇"` is printed as `""`. This longstanding problem [originates in the low-level @@ -222,7 +223,8 @@ code](https://stat.ethz.ch/pipermail/r-devel/2015-May/071250.html) for printing dataframes. While some cases have been addressed, there are, for example, reports of this issue in Emacs ESS. While this is a deep issue, there is [ongoing -work to address it in base R](https://blog.r-project.org/2020/05/02/utf-8-support-on-windows/). +work to address it in base R](https://blog.r-project.org/2020/05/02/utf-8-support-on-windows/). +We recommend upgrading to at least R 4.2.1 to address this issue. This means that while `skimr` can render the histograms to the console and in RMarkdown documents, it cannot in other circumstances. This includes: diff --git a/README.md b/README.md index 5c84ca50..36e8a276 100644 --- a/README.md +++ b/README.md @@ -435,15 +435,16 @@ and line charts in various contexts, some of which are described below. ### Support for spark histograms -There are known issues with printing the spark-histogram characters when -printing a data frame. For example, `"▂▅▇"` is printed as -`""`. This longstanding problem [originates in -the low-level +With versions of R before 4.2.1, there are known issues with printing +the spark-histogram characters when printing a data frame. For example, +`"▂▅▇"` is printed as `""`. This longstanding +problem [originates in the low-level code](https://stat.ethz.ch/pipermail/r-devel/2015-May/071250.html) for printing dataframes. While some cases have been addressed, there are, for example, reports of this issue in Emacs ESS. While this is a deep issue, there is [ongoing work to address it in base -R](https://blog.r-project.org/2020/05/02/utf-8-support-on-windows/). +R](https://blog.r-project.org/2020/05/02/utf-8-support-on-windows/). We +recommend upgrading to at least R 4.2.1 to address this issue. This means that while `skimr` can render the histograms to the console and in RMarkdown documents, it cannot in other circumstances. This