-
Notifications
You must be signed in to change notification settings - Fork 23
/
Copy pathFSAUtils.R
920 lines (878 loc) · 31.6 KB
/
FSAUtils.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
#' @title Capitalizes the first letter of first or all words in a string.
#'
#' @description Capitalizes the first letter of first or all words in a string.
#'
#' @param x A single string.
#' @param which A single string that indicates whether all (the default) or only the first words should be capitalized.
#'
#' @return A single string with the first letter of the first or all words capitalized.
#'
#' @author Derek H. Ogle, \email{[email protected]}
#'
#' @keywords manip
#'
#' @examples
#' ## Capitalize first letter of all words (the default)
#' capFirst("Derek Ogle")
#' capFirst("derek ogle")
#' capFirst("derek")
#'
#' ## Capitalize first letter of only the first words
#' capFirst("Derek Ogle",which="first")
#' capFirst("derek ogle",which="first")
#' capFirst("derek",which="first")
#' ## apply to all elements in a vector
#' vec <- c("Derek Ogle","derek ogle","Derek ogle","derek Ogle","DEREK OGLE")
#' capFirst(vec)
#' capFirst(vec,which="first")
#'
#' ## check class types
#' class(vec)
#' vec1 <- capFirst(vec)
#' class(vec1)
#' fvec <- factor(vec)
#' fvec1 <- capFirst(fvec)
#' class(fvec1)
#'
#' @export
capFirst <- function(x,which=c("all","first")) {
## Get the class of the object
cls <- class(x)
## Perform a check
if (!inherits(cls,c("character","factor"))) STOP("'capFirst' only works with 'character' or 'factor' objects.")
## Capitalize the one word or the words in the vector
if (length(x)==1) x <- iCapFirst(x,which)
else x <- apply(matrix(x),MARGIN=1,FUN=iCapFirst,which=which)
## Change the class to what the original was
if (cls=="factor") x <- as.factor(x)
## Return the object
x
}
## Internal Function
iCapFirst<- function(x,which=c("all","first")) {
# See whether all or just the first word should have the first letter capitalized
which <- match.arg(which)
# Only process if not NA
if (!is.na(x)) {
# convert entire string to lower case ...
x <- tolower(x)
# then split on space if more than one word
s <- strsplit(x, " ")[[1]]
if (which=="first") {
# convert first letters of first word to upper-case
s1 <- toupper(substring(s, 1,1)[1])
# attach capitalized first letter to rest of lower-cased original string
x <- paste(s1,substring(x,2),sep="",collapse=" ")
} else {
# convert first letters of all words to upper-case
s1 <- toupper(substring(s, 1,1))
# attach capitalized first letter to rest of lower-cased separated strings
x <- paste(s1,substring(s,2),sep="",collapse=" ")
}
}
x
}
#' @title Converts an R color to RGB (red/green/blue) including a transparency (alpha channel).
#'
#' @description Converts an R color to RGB (red/green/blue) including a transparency (alpha channel). Similar to \code{\link[grDevices]{col2rgb}} except that a transparency (alpha channel) can be included.
#'
#' @param col A vector of any of the three kinds of R color specifications (i.e., either a color name (as listed by \code{\link[grDevices]{colors}}()), a hexadecimal string of the form "#rrggbb" or "#rrggbbaa" (see \code{\link[grDevices]{rgb}}), or a positive integer i meaning \code{\link[grDevices]{palette}}()[i].
#' @param transp A numeric vector that indicates the transparency level for the color. The transparency values must be greater than 0. Transparency values greater than 1 are interpreted as the number of points plotted on top of each other before the transparency is lost and is, thus, transformed to the inverse of the transparency value provided.
#'
#' @return A vector of hexadecimal strings of the form "#rrggbbaa" as would be returned by \code{\link[grDevices]{rgb}}.
#'
#' @author Derek H. Ogle, \email{[email protected]}
#'
#' @seealso See \code{\link[grDevices]{col2rgb}} for similar functionality.
#'
#' @keywords manip
#'
#' @examples
#' col2rgbt("black")
#' col2rgbt("black",1/4)
#' clrs <- c("black","blue","red","green")
#' col2rgbt(clrs)
#' col2rgbt(clrs,1/4)
#' trans <- (1:4)/5
#' col2rgbt(clrs,trans)
#'
#' @export
col2rgbt <- function(col,transp=1) {
if (length(transp)==1) transp <- rep(transp,length(col))
if (length(col)!=length(transp))
STOP("Length of 'transp' must be 1 or same as length of 'col'.")
mapply(iMakeColor,col,transp,USE.NAMES=FALSE)
}
#' @title Converts "numeric" factor levels to numeric values.
#'
#' @description Converts \dQuote{numeric} factor levels to numeric values.
#'
#' @param object A vector with \dQuote{numeric} factor levels to be converted to numeric values.
#'
#' @return A numeric vector.
#'
#' @author Derek H. Ogle, \email{[email protected]}
#'
#' @keywords manip
#'
#' @examples
#' junk <- factor(c(1,7,2,4,3,10))
#' str(junk)
#' junk2 <- fact2num(junk)
#' str(junk2)
#'
#' ## ONLY RUN IN INTERACTIVE MODE
#' if (interactive()) {
#'
#' bad <- factor(c("A","B","C"))
#' # This will result in an error -- levels are not 'numeric'
#' bad2 <- fact2num(bad)
#'
#' } ## END IF INTERACTIVE MODE
#'
#' @export
fact2num <- function(object) {
## Don't continue if object is not a factor or character
## i.e., does not fit the purpose of this function
if (!inherits(object,c("factor","character")))
STOP("'object' is not a factor or character and",
"does not fit the purpose of this function.")
## Convert factor to character and then numeric
suppressWarnings(res <- as.numeric(as.character(object)))
## If all na's then stop because values were not numeric-like, else return
if (all(is.na(res)))
STOP("Conversion aborted because all levels in 'object' are not 'numbers'.")
else as.vector(res)
}
#' @title Opens web pages associated with the fishR website.
#'
#' @description Opens web pages associated with the \href{https://fishr-core-team.github.io/fishR/}{fishR website} in a browser. The user can open the main page or choose a specific page to open.
#'
#' @param where A string that indicates a particular page on the fishR website to open.
#' @param open A logical that indicates whether the webpage should be opened in the default browser. Defaults to \code{TRUE}; \code{FALSE} is used for unit testing.
#'
#' @return None, but a webpage will be opened in the default browser.
#'
#' @author Derek H. Ogle, \email{[email protected]}
#'
#' @keywords misc
#'
#' @examples
#' \dontrun{
#' ## Opens an external webpage ... only run interactively
#' fishR() # home page
#' fishR("posts") # blog posts (some examples) page
#' fishR("books") # examples page
#' fishR("IFAR") # Introduction to Fisheries Analysis with R page
#' fishR("AIFFD") # Analysis & Interpretation of Freshw. Fisher. Data page
#' fishR("packages") # list of r-related fishereis packages
#' fishR("data") # list of fisheries data sets
#' }
#'
#' @export
fishR <- function(where=c("home","posts","books","IFAR","AIFFD",
"packages","data"),
open=TRUE) {
where <- match.arg(where)
tmp <- "https://fishr-core-team.github.io/fishR/"
switch(where,
home= { tmp <- paste0(tmp,"") },
posts= { tmp <- paste0(tmp,"blog/") },
books= { tmp <- paste0(tmp,"pages/books.html") },
IFAR= { tmp <- paste0(tmp,"pages/books.html#introductory-fisheries-analyses-with-r") },
AIFFD= { tmp <- paste0(tmp,"pages/books.html#analysis-and-interpretation-of-freshwater-fisheries-data-i") },
packages= { tmp <- paste0(tmp,"pages/packages.html") },
data= { tmp <- paste0(tmp,"pages/data_fishR_alpha.html")}
)
if (open) utils::browseURL(tmp)
invisible(tmp)
}
#' @title Shows rows from the head and tail of a data frame or matrix.
#'
#' @description Shows rows from the head and tail of a data frame or matrix.
#'
#' @param x A data frame or matrix.
#' @param n A single numeric that indicates the number of rows to display from each of the head and tail of structure.
#' @param which A numeric or string vector that contains the column numbers or names to display. Defaults to showing all columns.
#' @param addrownums If there are no row names for the MATRIX, then create them from the row numbers.
#' @param \dots Arguments to be passed to or from other methods.
#'
#' @return A matrix or data.frame with 2*n rows.
#'
#' @author Derek H. Ogle, \email{[email protected]}
#'
#' @note If \code{n} is larger than the number of rows in \code{x} then all of \code{x} is displayed.
#'
#' @seealso \code{peek}
#'
#' @keywords manip
#'
#' @examples
#' headtail(iris)
#' headtail(iris,10)
#' headtail(iris,which=c("Sepal.Length","Sepal.Width","Species"))
#' headtail(iris,which=grep("Sepal",names(iris)))
#' headtail(iris,n=200)
#'
#' ## Make a matrix for demonstration purposes only
#' miris <- as.matrix(iris[,1:4])
#' headtail(miris)
#' headtail(miris,10)
#' headtail(miris,addrownums=FALSE)
#' headtail(miris,10,which=2:4)
#'
#' ## Make a tbl_df type from tibble ... note how headtail()
#' ## is not limited by the tbl_df restriction on number of
#' ## rows to show (but head() is).
#' if (require(tibble)) {
#' iris2 <- as_tibble(iris)
#' class(iris2)
#' headtail(iris2,n=15)
#' head(iris2,n=15)
#' }
#' @export
headtail <- function(x,n=3L,which=NULL,addrownums=TRUE,...) {
## Some checks
if (!(is.matrix(x) | is.data.frame(x)))
STOP("'x' must be a matrix or data.frame.")
if (length(n)!=1L)
STOP("'n' must be a single number.")
## Remove tbl_df class if it exists
if ("tbl_df" %in% class(x)) x <- as.data.frame(x)
## Process data.frame
N <- nrow(x)
if (n>=N) tmp <- x
else {
h <- utils::head(x,n,...)
if (addrownums) {
if (is.null(rownames(x))) rownames(h) <- paste0("[",seq_len(n),",]")
} else rownames(h) <- NULL
t <- utils::tail(x,n,addrownums,...)
tmp <- rbind(h,t)
}
if (!is.null(which)) tmp <- tmp[,which]
tmp
}
#' @title Ratio of lagged observations.
#'
#' @description Computes the ratio of lagged observations in a vector.
#'
#' @details This function behaves similarly to \code{diff()} except that it returns a vector or matrix of ratios rather than differences.
#'
#' @param x A numeric vector or matrix.
#' @param lag An integer representing the lag \sQuote{distance}.
#' @param direction A string that indicates the direction of calculation. A \code{"backward"} indicates that \sQuote{latter} values are divided by \sQuote{former} values. A \code{"forward"} indicates that \sQuote{former} values are divided by \sQuote{latter} values. See examples.
#' @param recursion An integer that indicates the level of recursion for the calculations. A \code{1} will simply compute the ratios. A \code{2}, for example, will compute the ratios, save the result, and then compute the ratios of the results using the same \code{lag}. See examples.
#' @param differences Same as \code{recursion}. Used for symmetry with \code{\link[base]{diff}}.
#' @param \dots Additional arguments to \code{diff()}.
#'
#' @return A vector or matrix of lagged ratios.
#'
#' @author Derek H. Ogle, \email{[email protected]}
#'
#' @seealso \code{diff}
#'
#' @keywords manip
#'
#' @examples
#' ## Backward lagged ratios
#' # no recursion
#' lagratio(1:10,1)
#' lagratio(1:10,2)
#' # with recursion
#' lagratio(1:10,1,2)
#' lagratio(1:10,2,2)
#'
#' ## Forward lagged ratios
#' # no recursion
#' lagratio(10:1,1,direction="forward")
#' lagratio(10:1,2,direction="forward")
#' # with recursion
#' lagratio(10:1,1,2,direction="forward")
#' lagratio(10:1,2,2,direction="forward")
#'
#' @export
lagratio <- function(x,lag=1L,recursion=1L,differences=recursion,
direction=c("backward","forward"),...) {
## Some checks
direction <- match.arg(direction)
if(any(x==0))
STOP("Will not work with zeros in 'x'.")
if(inherits(x,c("POSIXt","POSIXct")))
STOP("Function does not work for 'POSIXt' objects.")
if (!recursion>0) STOP("'recursion' value must be >0.")
## Flip vector if ratio direction is forward
if (direction=="forward") x <- rev(x)
## Compute lagged ratio
res <- exp(diff(log(x),lag=lag,differences=differences,...))
## Flip the resulting vector if direction is forward
if (direction=="forward") res <- rev(res)
## Return the result
res
}
#' @title Constructs the correction-factor used when back-transforming log-transformed values.
#'
#' @description Constructs the correction-factor used when back-transforming log-transformed values according to Sprugel (1983). Sprugel's main formula -- exp((syx^2)/2) -- is used when syx is estimated for natural log transformed data. A correction for any base is obtained by multiplying the syx term by log_e(base) to give exp(((log_e(base)*syx)^2)/2). This more general formula is implemented here (if, of course, the base is exp(1) then the general formula reduces to the original specific formula).
#'
#' @param obj An object from \code{lm}.
#' @param base A single numeric that indicates the base of the logarithm used.
#'
#' @return A numeric value that is the correction factor according to Sprugel (1983).
#'
#' @author Derek H. Ogle, \email{[email protected]}
#'
#' @references Sprugel, D.G. 1983. Correcting for bias in log-transformed allometric equations. Ecology 64:209-210.
#'
#' @keywords manip
#'
#' @examples
#' # toy data
#' df <- data.frame(y=rlnorm(10),x=rlnorm(10))
#' df$logey <- log(df$y)
#' df$log10y <- log10(df$y)
#' df$logex <- log(df$x)
#' df$log10x <- log10(df$x)
#'
#' # model and predictions on loge scale
#' lme <- lm(logey~logex,data=df)
#' ( ploge <- predict(lme,data.frame(logex=log(10))) )
#' ( pe <- exp(ploge) )
#' ( cfe <- logbtcf(lme) )
#' ( cpe <- cfe*pe )
#'
#' # model and predictions on log10 scale
#' lm10 <- lm(log10y~log10x,data=df)
#' plog10 <- predict(lm10,data.frame(log10x=log10(10)))
#' p10 <- 10^(plog10)
#' ( cf10 <- logbtcf(lm10,10) )
#' ( cp10 <- cf10*p10 )
#'
#' # cfe and cf10, cpe and cp10 should be equal
#' all.equal(cfe,cf10)
#' all.equal(cpe,cp10)
#'
#' @rdname logbtcf
#' @export
logbtcf <- function(obj,base=exp(1)) {
if (!all(class(obj)=="lm")) STOP("'obj' must be from lm().")
exp(((log(base)*summary(obj)$sigma)^2)/2)
}
#' @name is.odd
#'
#' @title Determine if a number is odd or even.
#'
#' @description Determine if a number is odd or even.
#'
#' @param x A numeric vector.
#'
#' @return A logical vector of the same length as x.
#'
#' @author Derek H. Ogle, \email{[email protected]}
#'
#' @keywords manip
#'
#' @aliases is.odd is.even
#'
#' @examples
#' ## Individual values
#' is.odd(1)
#' is.odd(2)
#' is.even(3)
#' is.even(4)
#'
#' ## Vector of values
#' d <- 1:8
#' data.frame(d,odd=is.odd(d),even=is.even(d))
NULL
#' @rdname is.odd
#' @export
is.odd <- function (x) iOddEven(x,1)
#' @rdname is.odd
#' @export
is.even <- function(x) iOddEven(x,0)
## Internal function
iOddEven <- function(x,checkval) {
if (!is.vector(x)) STOP("'x' must be a vector.")
if (!is.numeric(x)) STOP("'x' must be numeric.")
x%%2 == checkval
}
#' @title Computes the percentage of values in a vector less than or greater than (and equal to) some value.
#'
#' @description Computes the percentage of values in a vector less than or greater than (and equal to) a user-supplied value.
#'
#' @details This function is most useful when used with an apply-type of function.
#'
#' @param x A numeric vector.
#' @param val A single numeric value.
#' @param dir A string that indicates whether the percentage is for values in \code{x} that are \dQuote{greater than and equal} \code{"geq"}, \dQuote{greater than} \code{"gt"}, \dQuote{less than and equal} \code{"leq"}, \dQuote{less than} \code{"lt"} the value in \code{val}.
#' @param na.rm A logical that indicates whether \code{NA} values should be removed (DEFAULT) from \code{x} or not.
#' @param digits A single numeric that indicates the number of decimals the percentage should be rounded to.
#'
#' @return A single numeric that is the percentage of values in \code{x} that meet the criterion in \code{dir} relative to \code{val}.
#'
#' @author Derek H. Ogle, \email{[email protected]}
#'
#' @keywords misc
#'
#' @examples
#' ## vector of values
#' ( tmp <- c(1:8,NA,NA) )
#'
#' ## percentages excluding NA values
#' perc(tmp,5)
#' perc(tmp,5,"gt")
#' perc(tmp,5,"leq")
#' perc(tmp,5,"lt")
#'
#' ## percentages including NA values
#' perc(tmp,5,na.rm=FALSE)
#' perc(tmp,5,"gt",na.rm=FALSE)
#' perc(tmp,5,"leq",na.rm=FALSE)
#' perc(tmp,5,"lt",na.rm=FALSE)
#'
#' @export
perc <- function(x,val,dir=c("geq","gt","leq","lt"),na.rm=TRUE,
digits=getOption("digits")) {
## Some checks
dir <- match.arg(dir)
if (!inherits(x,c("numeric","integer")))
STOP("'perc' only works for numeric vectors.")
if (length(val)>1)
WARN("Only the first value of 'val' was used.")
## Find sample size (don't or do include NA values)
n <- ifelse(na.rm,length(x[!is.na(x)]),length(x))
## Compute percentage in dir(ection) of val(ue), but return
## a NaN if the x has no valid values
if (n==0) return(NaN)
else { # find values that fit criterion
switch(dir,
geq= {tmp <- x[x>=val]},
gt = {tmp <- x[x>val]},
leq= {tmp <- x[x<=val]},
lt = {tmp <- x[x<val]}
) # end switch
## must remove NA values (even if asked not to because they
## will appear to be less than val ... i.e., NAs were included
## in n above if asked for but they should not be included in
## the vector of values that fit the criterion) to find
## number that match the criterion
tmp <- length(tmp[!is.na(tmp)])
}
round(tmp/n*100,digits)
}
#' @title Peek into (show a subset of) a data frame or matrix.
#'
#' @description Shows the first, last, and approximately evenly spaced rows from a data frame or matrix.
#'
#' @param x A data frame or matrix.
#' @param n A single numeric that indicates the number of rows to display.
#' @param which A numeric or string vector that contains the column numbers or names to display. Defaults to showing all columns.
#' @param addrownums If there are no row names for the MATRIX, then create them from the row numbers.
#'
#' @return A matrix or data.frame with n rows.
#'
#' @author Derek H. Ogle, \email{[email protected]}
#'
#' @author A. Powell Wheeler, \email{powell.wheeler@@gmail.com}
#'
#' @seealso \code{headtail}
#'
#' @note If \code{n} is larger than the number of rows in \code{x} then all of \code{x} is displayed.
#'
#' @keywords manip
#'
#' @examples
#' peek(iris)
#' peek(iris,n=6)
#' peek(iris,n=6,which=c("Sepal.Length","Sepal.Width","Species"))
#' peek(iris,n=6,which=grep("Sepal",names(iris)))
#' peek(iris,n=200)
#'
#' ## Make a matrix for demonstration purposes only
#' miris <- as.matrix(iris[,1:4])
#' peek(miris)
#' peek(miris,n=6)
#' peek(miris,n=6,addrownums=FALSE)
#' peek(miris,n=6,which=2:4)
#'
#' ## Make a tbl_df type from dplyr ... note how peek() is not limited by
#' ## the tbl_df restriction on number of rows to show (but head() is).
#' if (require(dplyr)) {
#' iris2 <- tbl_df(iris)
#' class(iris2)
#' peek(iris2,n=6)
#' head(iris2,n=15)
#' }
#' @export
peek <- function(x,n=20L,which=NULL,addrownums=TRUE) {
## Some checks
if (!(is.matrix(x) | is.data.frame(x)))
STOP("'x' must be a matrix or data.frame.")
if (length(n)!=1L) STOP("'n' must be a single number.")
if (n<1L) STOP("'n' must be greater than 0.")
## Remove tbl_df class if it exists
if ("tbl_df" %in% class(x)) x <- as.data.frame(x)
## Get number of rows in x
N <- nrow(x)
## If asked for is greater than size then just return x
if (n>=N) tmp <- x
else {
rows <- c(1,round((1:(n-2))*(N/(n-1)),0),N)
tmp <- x[rows,]
if (addrownums) {
if (is.null(rownames(tmp))) rownames(tmp) <- rows
} else rownames(tmp) <- NULL
}
if (!is.null(which)) tmp <- tmp[,which]
tmp
}
#' @name rcumsum
#'
#' @title Computes the prior to or reverse cumulative sum of a vector.
#'
#' @description Computes the prior-to (i.e., the cumulative sum prior to but not including the current value) or the reverse (i.e., the number that large or larger) cumulative sum of a vector. Also works for 1-dimensional tables, matrices, and data.frames, though it is best used with vectors.
#'
#' @note An \code{NA} in the vector causes all returned values at and after the first \code{NA} for \code{pcumsum} and at and before the last \code{NA} for \code{rcumsum} to be \code{NA}. See the examples.
#'
#' @param x a numeric object.
#'
#' @return A numeric vector that contains the prior-to or reverse cumulative sums.
#'
#' @author Derek H. Ogle, \email{[email protected]}
#'
#' @seealso \code{\link{cumsum}}.
#'
#' @keywords misc
#'
#' @examples
#' ## Simple example
#' cbind(vals=1:10,
#' cum=cumsum(1:10),
#' pcum=pcumsum(1:10),
#' rcum=rcumsum(1:10))
#'
#' ## Example with NA
#' vals <- c(1,2,NA,3)
#' cbind(vals,
#' cum=cumsum(vals),
#' pcum=pcumsum(vals),
#' rcum=rcumsum(vals))
#'
#' ## Example with NA
#' vals <- c(1,2,NA,3,NA,4)
#' cbind(vals,
#' cum=cumsum(vals),
#' pcum=pcumsum(vals),
#' rcum=rcumsum(vals))
#'
#' ## Example with a matrix
#' mat <- matrix(c(1,2,3,4,5),nrow=1)
#' cumsum(mat)
#' pcumsum(mat)
#' rcumsum(mat)
#'
#' ## Example with a table (must be 1-d)
#' df <- sample(1:10,100,replace=TRUE)
#' tbl <- table(df)
#' cumsum(tbl)
#' pcumsum(tbl)
#' rcumsum(tbl)
#'
#' ## Example with a data.frame (must be 1-d)
#' df <- sample(1:10,100,replace=TRUE)
#' tbl <- as.data.frame(table(df))[,-1]
#' cumsum(tbl)
#' pcumsum(tbl)
#' rcumsum(tbl)
NULL
#' @rdname rcumsum
#' @export
rcumsum <- function(x) {
iChkCumSum(x)
rev(cumsum(rev(x)))
}
#' @rdname rcumsum
#' @export
pcumsum <- function(x) {
iChkCumSum(x)
cumsum(x)-x
}
## Internal function for Xcumsum()
iChkCumSum <- function(x) {
tmp <- class(x)
if ("matrix" %in% tmp | "data.frame" %in% tmp) {
if (all(dim(x)!=1)) STOP("'x' is not 1-dimensional.")
}
if ("table" %in% tmp | "xtabs" %in% tmp) {
if (length(dim(x))>1) STOP("'x' is not 1-dimensional.")
}
if (!is.numeric(x)) STOP("'x' must be numeric.")
}
#' @title Extract the coefficient of determination from a linear model object.
#'
#' @description Extracts the coefficient of determination (i.e., \dQuote{r-squared}) from a linear model (i.e., \code{lm}) object.
#'
#' @details This is a convenience function to extract the \code{r.squared} part from \code{summary(lm)}.
#'
#' @param object An object saved from \code{lm}.
#' @param digits A single number that is the number of digits to round the returned result to.
#' @param percent A logical that indicates if the result should be returned as a percentage (\code{=TRUE}) or as a proportion (\code{=FALSE}; default).
#' @param \dots Additional arguments for methods.
#'
#' @return A numeric, as either a proportion or percentage, that is the coefficient of determination for a linear model.
#'
#' @keywords misc
#'
#' @aliases rSquared rSquared.default rSquared.lm
#'
#' @examples
#' lm1 <- lm(mirex~weight, data=Mirex)
#' rSquared(lm1)
#' rSquared(lm1,digits=3)
#' rSquared(lm1,digits=1,percent=TRUE)
#'
#' ## rSquared only works with lm objects
#' \dontrun{
#' nls1 <- nls(mirex~a*weight^b,data=Mirex,start=list(a=1,b=1))
#' rSquared(nls1)
#' }
#'
#' @rdname rSquared
#' @export
rSquared <- function(object, ...) {
UseMethod("rSquared")
}
#' @rdname rSquared
#' @export
rSquared.default <- function(object, ...) {
STOP("'rSquared' only works with 'lm', not '",class(object),"', objects")
}
#' @rdname rSquared
#' @export
rSquared.lm <- function(object,digits=getOption("digits"),
percent=FALSE,...) {
r2 <- summary(object)$r.squared
ifelse(percent,round(r2*100,digits),round(r2,digits))
}
#' @title Find non-repeated consecutive rows in a data.frame.
#'
#' @description Finds the rows in a data.frame that are not repeats of the row immediately above or below it.
#'
#' @param df A data.frame.
#' @param cols2use A string or numeric vector that indicates columns in \code{df} to use. Negative numeric values will not use those columns. Cannot use both \code{cols2use} and \code{col2ignore}.
#' @param cols2ignore A string or numeric vector that indicates columns in \code{df} to ignore. Cannot use both \code{cols2use} and \code{col2ignore}.
#' @param keep A string that indicates whether the \code{first} (DEFAULT) or \code{last} row of consecutive repeated rows should be kept.
#'
#' @return A single logical that indicates which rows of \code{df} to keep such that no consecutive rows (for the columns used) will be repeated.
#'
#' @author Derek H. Ogle, \email{[email protected]}
#'
#' @keywords manip
#'
#' @examples
#' test1 <- data.frame(ID=1:10,
#' KEEP=c("First","Last","Both","Both","Both",
#' "Both","First","Neither","Last","Both"),
#' V1=c("a","a","a","B","b","B","A","A","A","a"),
#' V2=c("a","a","A","B","B","b","A","A","A","a"))
#' keepFirst <- repeatedRows2Keep(test1,cols2ignore=1:2)
#' keepLast <- repeatedRows2Keep(test1,cols2use=3:4,keep="last")
#' data.frame(test1,keepFirst,keepLast)
#'
#' droplevels(subset(test1,keepFirst)) # should be all "First" or "Both" (7 items)
#' droplevels(subset(test1,keepLast)) # should be all "Last" or "Both" (7 items)
#'
#' @export
repeatedRows2Keep <- function(df,cols2use=NULL,cols2ignore=NULL,
keep=c("first","last")) {
## Internal Function
### from www.cookbook-r.com/Manipulating_data/Comparing_vectors_or_factors_with_NA/
iCompareNA <- function(v1,v2) {
same <- (v1 == v2) | (is.na(v1) & is.na(v2))
same[is.na(same)] <- FALSE
same
}
## Main Function
keep <- match.arg(keep)
# make sure df is a data.frame (could be sent as a matrix)
df <- as.data.frame(df)
if (nrow(df)==1) res <- FALSE
else {
# change data.frame based on cols2use or cols2ignore
df <- iHndlCols2UseIgnore(df,cols2use,cols2ignore)
## get data.frames offset by 1 indice for comparisons
df1 <- df[1:(nrow(df)-1),]
df2 <- df[2:nrow(df),]
## compare data.frames
if (keep=="first") { # returns first of the repeats
# find rows where all are TRUE (consecutive rows repeat)
# first row cannot be a repeat so put FALSE in its place
res <- iCompareNA(df1,df2)
if (is.matrix(res)) res <- apply(res,MARGIN=1,FUN=all)
res <- c(FALSE,res)
} else { # returns last of the repeats
# reverse the order of the data.frames
df1a <- df1[nrow(df1):1,]
df2a <- df2[nrow(df2):1,]
# find rows where all are TRUE (consecutive row repeats), but reverse the
# order to return; last row can't be a repeat so put FALSE in its place
res <- iCompareNA(df2a,df1a)
if (is.matrix(res)) res <- apply(res,MARGIN=1,FUN=all)
res <- c(rev(res),FALSE)
}
# remove names attribute
names(res) <- NULL
}
# reverse the TRUE/FALSEs so that TRUE means rows to keep (not rows repeated)
!res
}
#' @title Computes standard error of the mean.
#'
#' @description Computes the standard error of the mean (i.e., standard deviation divided by the square root of the sample size).
#'
#' @details
#' The standard error of the value in vector \code{x} is simply the standard deviation of \code{x} divided by the square root of the number of valid items in \code{x}
#'
#' @param x A numeric vector.
#' @param na.rm A logical that indicates whether missing values should be removed before computing the standard error.
#'
#' @return A single numeric that is the standard error of the mean of \code{x}.
#'
#' @author Derek H. Ogle, \email{[email protected]}
#'
#' @seealso See \code{se} in \pkg{sciplot} for similar functionality.
#'
#' @keywords manip
#'
#' @examples
#' # example vector
#' x <- 1:20
#' se(x)
#' sd(x)/sqrt(length(x)) ## matches
#'
#' # all return NA if missing values are not removed
#' x2 <- c(x,NA)
#' sd(x2)/sqrt(length(x2))
#'
#' # Better if missing values are removed
#' se(x2) ## Default behavior
#' sd(x2,na.rm=TRUE)/sqrt(length(x2[complete.cases(x2)])) ## Matches
#' se(x2,na.rm=FALSE) ## Result from not removing NAs
#'
#' @export
se <- function (x,na.rm=TRUE) {
if (!is.vector(x)) STOP("'x' must be a vector.")
if (!is.numeric(x)) STOP("'x' must be numeric.")
if (na.rm) x <- x[stats::complete.cases(x)]
sqrt(stats::var(x)/length(x))
}
#' @title Finds the number of valid (non-NA) values in a vector.
#'
#' @description Finds the number of valid (non-NA) values in a vector.
#'
#' @param object A vector.
#'
#' @return A single numeric value that is the number of non-\code{NA} values in a vector.
#'
#' @seealso See \code{\link[plotrix]{valid.n}} in \pkg{plotrix} and \code{nobs} in \pkg{gdata} for similar functionality. See \code{\link{is.na}} for finding the missing values.
#'
#' @author Derek H. Ogle, \email{[email protected]}
#'
#' @section IFAR Chapter: 2-Basic Data Manipulations.
#'
#' @keywords manip
#'
#' @examples
#' junk1 <- c(1,7,2,4,3,10,NA)
#' junk2 <- c("Derek","Hugh","Ogle","Santa","Claus","Nick",NA,NA)
#' junk3 <- factor(junk2)
#' junk4 <- c(TRUE,TRUE,FALSE,FALSE,FALSE,TRUE,NA,NA)
#' junk5 <- data.frame(junk1)
#' junk6 <- data.frame(junk3)
#'
#' validn(junk1)
#' validn(junk2)
#' validn(junk3)
#' validn(junk4)
#' validn(junk5)
#' validn(junk6)
#'
#' @export
validn <- function(object) {
## Handle data.frame
if (is.data.frame(object)) {
if (ncol(object)==1) object <- object[,1]
else STOP("'object' cannot be a data.frame with more than one column.")
}
## Handle matrix
if (is.matrix(object)) {
if (ncol(object)==1) object <- object[,1]
else STOP("'object' cannot be a matrix with more than one column.")
}
sum(!is.na(object))
}
#' @title Calculates the geometric mean or geometric standard deviation.
#'
#' @description Calculates the geometric mean or standard deviation of a vector of numeric values.
#'
#' @details The geometric mean is computed by log transforming the raw data in \code{x}, computing the arithmetic mean of the transformed data, and back-transforming this mean to the geometric mean by exponentiating.
#'
#' The geometric standard deviation is computed by log transforming the raw data in \code{x}, computing the arithmetic standard deviation of the transformed data, and back-transforming this standard deviation to the geometric standard deviation by exponentiating.
#'
#' @param x Vector of numeric values.
#' @param na.rm Logical indicating whether to remove missing values or not.
#' @param zneg.rm Logical indicating whether to ignore or remove zero or negative values found in \code{x}.
#'
#' @return A numeric value that is the geometric mean or geometric standard deviation of the numeric values in \code{x}.
#'
#' @note This function is largely an implementation of the code suggested by Russell Senior on R-help in November, 1999.
#'
#' @seealso See \code{\link[psych]{geometric.mean}} in \pkg{psych} and \code{\link[DescTools]{Gmean}} for geometric mean calculators. See \code{Gsd} (documented with \code{\link[DescTools]{Gmean}}) from \pkg{DescTools} for geometric standard deviation calculators.
#'
#' @keywords misc
#'
#' @aliases geomean geosd
#'
#' @examples
#' ## generate random lognormal data
#' d <- rlnorm(500,meanlog=0,sdlog=1)
#' # d has a mean on log scale of 0; thus, gm should be exp(0)~=1
#' # d has a sd on log scale of 1; thus, gsd should be exp(1)~=2.7
#' geomean(d)
#' geosd(d)
#'
#' \dontrun{
#' ## Demonstrate handling of zeros and negative values
#' x <- seq(-1,5)
#' # this will given an error
#' geomean(x)
#' # this will only give a warning, but might not be what you want
#' geomean(x,zneg.rm=TRUE)
#' }
#'
#' @rdname geomean
#' @export
geomean <- function(x,na.rm=FALSE,zneg.rm=FALSE) {
x <- iChk4Geos(x,na.rm,zneg.rm)
exp(mean(log(x),na.rm=na.rm))
}
#' @rdname geomean
#' @export
geosd <- function(x,na.rm=FALSE,zneg.rm=FALSE) {
x <- iChk4Geos(x,na.rm,zneg.rm)
exp(stats::sd(log(x),na.rm=na.rm))
}
iChk4Geos <- function(x,na.rm,zneg.rm) {
if (!is.vector(x))
STOP("'x' must be a vector.")
if (!is.numeric(x))
STOP("'x' must be a numeric vector.")
if (any(x<=0,na.rm=na.rm) & !zneg.rm)
STOP("'x' must contain all positive values.")
if (any(x<=0,na.rm=na.rm) & zneg.rm) {
WARN("Some non-positive values were ignored/removed.")
# remove non-positive values
x <- x[x>0]
}
x
}