-
Notifications
You must be signed in to change notification settings - Fork 10
/
gridVectors.R
108 lines (100 loc) · 5.02 KB
/
gridVectors.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#' Summarize vectors into a grid
#'
#' Summarize the velocity vectors into a grid, usually for easy plotting.
#'
#' @param x A numeric matrix of low-dimensional coordinates, e.g., after t-SNE.
#' Alternatively, a \linkS4class{SingleCellExperiment} containing such coordinates in its \code{\link{reducedDims}}.
#' @param embedded A low-dimensional projection of the velocity vectors into the embedding of \code{x}.
#' This should be of the same dimensions as \code{x} and is typically produced by \code{\link{embedVelocity}}.
#' @param resolution Integer scalar specifying the resolution of the grid,
#' in terms of the number of grid intervals along each axis.
#' @param scale Logical scalar indicating whether the averaged vectors should be scaled by the grid resolution.
#' @param as.data.frame Logical scalar indicating whether the output should be a data.frame.
#' If \code{FALSE}, a list of two matrices is returned.
#' @param return.intermediates Logical scalar indicating whether intermediate objects should also be returned.
#' This enforces \code{as.data.frame=FALSE} and throws a warning if it is \code{TRUE}.
#' @param ... For the generic, further arguments to pass to specific methods.
#'
#' For the SingleCellExperiment method, further arguments to pass to the ANY method.
#' @param use.dimred String or integer scalar specifying the reduced dimensions to retrieve from \code{x}.
#'
#' @details
#' This partitions the bounding box of \code{x} into a grid with \code{resolution} units in each dimension.
#' The locations and vectors of all cells in each block are averaged to obtain a representative of that block.
#' This is most obviously useful for visualization to avoid overplotting of velocity vectors.
#'
#' If \code{scale=TRUE}, per-block vectors are scaled so that the median vector length is comparable to the spacing between blocks.
#' This improves visualization when the scales of \code{x} and \code{embedded} are not immediately comparable.
#'
#' @return
#' If \code{as.data.frame=FALSE}, a list is returned containing \code{start} and \code{end},
#' two numeric matrices with one row per non-empty block in the grid and one column per column in \code{x}.
#' \code{start} contains the mean location of all cells inside that block,
#' and \code{end} contains the endpoint after adding the (scaled) average of the block's cell's velocity vectors.
#'
#' If \code{as.data.frame=TRUE}, a data.frame is returned with numeric columns of the same contents as the list above.
#' Column names are prefixed by \code{start.*} and \code{end.*}.
#'
#' If \code{return.intermediates=TRUE}, a list is returned (irrespective of the value of \code{as.data.frame})
#' that in addition to \code{start} and \code{end} also contains intermediate objects \code{limits}
#' (the ranges in x and y), \code{delta} (the grid intervals in x and y), \code{categories} (a
#' DataFrame with integer row and column indices for each cell that specify the grid field that it is
#' contained in), \code{grp} (numerical index of grid fields for each cell) and \code{vec} (velocity
#' vectors for non-empty grid fields).
#'
#' @author Aaron Lun
#' @examples
#' tsne.results <- matrix(rnorm(10000), ncol=2)
#' tsne.vectors <- matrix(rnorm(10000), ncol=2)
#'
#' out <- gridVectors(tsne.results, tsne.vectors)
#'
#' # Demonstration for plotting.
#' plot(tsne.results[,1], tsne.results[,2], col='grey')
#' arrows(out$start.1, out$start.2, out$end.1, out$end.2, length=0.05)
#'
#' @seealso
#' \code{\link{embedVelocity}}, to generate \code{embedded}.
#' @name gridVectors
NULL
#' @export
#' @importFrom S4Vectors selfmatch DataFrame
#' @importFrom stats median
.grid_vectors <- function(x, embedded, resolution=40, scale=TRUE, as.data.frame=TRUE, return.intermediates=FALSE) {
limits <- apply(x, 2, range)
intercept <- limits[1,]
delta <- (limits[2,] - intercept)/resolution
categories <- t((t(x) - intercept)/delta)
storage.mode(categories) <- "integer"
categories <- DataFrame(categories)
grp <- selfmatch(categories)
tab <- as.integer(table(grp))
pos <- rowsum(x, grp)/tab
vec <- rowsum(embedded, grp)/tab
if (scale) {
d <- sqrt(rowSums(vec^2))
target <- sqrt(sum(delta^2))
vec <- vec * target/median(d)/2 # divide by 2 to avoid running over into another block.
}
if (return.intermediates) {
if (as.data.frame) {
warning("ignoring 'as.data.frame=TRUE' for 'return.intermediates=TRUE'")
}
return(list(start=pos, end=pos + vec, limits=limits, delta=delta,
categories=categories, grp=grp, vec=vec))
} else {
FUN <- if (as.data.frame) data.frame else list
return(FUN(start=pos, end=pos + vec))
}
}
#' @export
#' @rdname gridVectors
setGeneric("gridVectors", function(x, embedded, ...) standardGeneric("gridVectors"))
#' @export
#' @rdname gridVectors
setMethod("gridVectors", "ANY", .grid_vectors)
#' @export
#' @rdname gridVectors
setMethod("gridVectors", "SingleCellExperiment", function(x, embedded, ..., use.dimred=1) {
.grid_vectors(reducedDim(x, use.dimred), embedded, ...)
})