-
Notifications
You must be signed in to change notification settings - Fork 0
/
gene-plots.Rmd
149 lines (124 loc) · 3.54 KB
/
gene-plots.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# Gene plots
Objective: for a region of the genome, find peaks near the TSS of genes
and then plot their signal strength per gene, stratifying by the tissue
origin of the peak.
We start by loading the pre-downloaded peaks ranges:
```{r}
load("data/peaks.rda")
```
Likewise, we want to use hg19 genes again to match the hg19 peaks:
```{r message=FALSE}
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(org.Hs.eg.db)
g <- genes(TxDb.Hsapiens.UCSC.hg19.knownGene)
```
Adding gene symbols:
```{r message=FALSE}
library(plyranges)
g <- g %>%
mutate(gene_name = mapIds(org.Hs.eg.db,
gene_id, "SYMBOL", "ENTREZID"))
```
Find a region of the genome near a kidney-specific gene:
```{r}
g %>%
filter(gene_name == "UMOD")
region <- data.frame(
seqnames="chr16",
start=20e6,
end=21e6) %>%
as_granges()
```
Combine the bladder and kidney peaks, and select certain columns:
```{r}
pks <- bind_ranges(bladder=bladder_pks,
kidney=kidney_pks,
.id="tissue") %>%
select(signal=signalValue, tissue)
```
Finally, we perform the overlap join, locating peaks within 100kb of
the TSS of the gene.
```{r}
g_with_pks <- g %>%
anchor_5p() %>%
mutate(width=1) %>%
filter_by_overlaps(region) %>%
join_overlap_inner(pks, maxgap=1e5)
g_with_pks$tissue %>% table()
```
We can construct a faceted set of boxplots, first we make a tibble of
data for our plot.
```{r message=FALSE}
library(dplyr)
library(tibble)
dat <- g_with_pks %>%
select(gene_name, signal, tissue, .drop_ranges=TRUE) %>%
as_tibble()
```
Then pass the data to ggplot (we could have just passed the data
directly, but we plan to re-use the data).
```{r peaks-near-genes, message=FALSE}
library(ggplot2)
dat %>%
ggplot(aes(tissue, signal)) +
geom_boxplot() +
facet_wrap(~gene_name)
```
Now let's try to plot these in context, using *plotgardener*
[@plotgardener].
First we filter down to the peaks near *UMOD*.
```{r}
umod <- g %>%
filter(gene_name == "UMOD") %>%
anchor_5p() %>%
mutate(width=1)
pks_to_plot <- pks %>%
filter_by_overlaps(umod, maxgap=1e5) %>%
anchor_center() %>%
mutate(width=1e4) # to make the ranges more visible
```
We then define a color scheme for the tissue variable, and make a
ggplot object which will be added to our genome plots.
```{r}
cols <- function(n) palette.colors(n+2)[-c(1,3)]
col_vec <- cols(2)
names(col_vec) <- unique(dat$tissue)
p <- dat %>%
filter(gene_name == "UMOD") %>%
ggplot(aes(tissue, signal, col=tissue)) +
# here we set a seed for jitter
geom_point(size=.5, position=position_jitter(width=.1, seed=5)) +
scale_color_manual(values = col_vec)
```
We then create some parameters that will be shared across a number of
the plots in *plotgardener*.
```{r message=FALSE}
library(plotgardener)
par <- pgParams(
chrom = "chr16",
chromstart = round((start(umod) - 1e5)/1e5)*1e5,
chromend = round((start(umod) + 4e5)/1e5)*1e5,
assembly = "hg19", just = c("left", "bottom")
)
```
Finally we put all the pieces together on a page (for laying out the
plot, first use `showGuides=TRUE`).
```{r garden, fig.width=5, fig.height=3, message=FALSE}
pageCreate(width = 5, height = 3, showGuides = FALSE)
plotGenes(
params = par, x = 0.5, y = 2.5, width = 4, height = .75
)
plotRanges(
pks_to_plot,
fill = colorby("tissue", palette=cols),
params = par, x = 0.5, y = 1.75, width = 4, height = 1.75
)
plotGenomeLabel(
params = par, x = 0.5, y = 2.5, length = 4,
just = c("left", "top")
)
plotGG(
p + ggtitle("UMOD"),
params = par, x = 2.25, y = 1.75, width = 2.5, height = 1.75
)
```