-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy path1_Find_power_factor_and_plot_dendrogram.Rout
212 lines (195 loc) · 10.6 KB
/
1_Find_power_factor_and_plot_dendrogram.Rout
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
R version 3.0.2 (2013-09-25) -- "Frisbee Sailing"
Copyright (C) 2013 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
Natural language support but running in an English locale
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
[Previously saved workspace restored]
>
> ###########################################################################################################################################################################################
> ## This code was run using R version 3.0.2 (2013-09-25) -- "Frisbee Sailing" on an Ubuntu 14.04.1 LTS. It is recommended at least 4G RAM is available for running this code.
> ## This code was used for transcriptional network analyses in Aylward et al., PNAS, 2015. The R packages WGCNA (Langfelder and Horvath, BMC Bioinformatics, 2008) and igraph
> ## (Csardi and Nepusz, Interjournal, 2006) are required.
> ## It is assumed the user has a count table (timepoints or treatments as columns, genes/transcripts as rows) to use as input. As an example, the data used in Aylward et al., PNAS, 2015
> ## provided. To use custom count data the user will need to modify certain parameters to fit the data- for example the soft-thresholding power used in weighted networks will vary for
> ## dataset. For more information this see Zhang and Horvath, Stat. Appl. Genet. Mol. Biol., 2005.
> ###########################################################################################################################################################################################
>
> # Load R Packages
> library(WGCNA)
Loading required package: dynamicTreeCut
Loading required package: flashClust
Attaching package: ‘flashClust’
The following object is masked from ‘package:stats’:
hclust
==========================================================================
*
* Package WGCNA 1.41.1 loaded.
*
* Important note: It appears that your system supports multi-threading,
* but it is not enabled within WGCNA in R.
* To allow multi-threading within WGCNA with all available cores, use
*
* allowWGCNAThreads()
*
* within R. Use disableWGCNAThreads() to disable threading if necessary.
* Alternatively, set the following environment variable on your system:
*
* ALLOW_WGCNA_THREADS=<number_of_processors>
*
* for example
*
* ALLOW_WGCNA_THREADS=4
*
* To set the environment variable in linux bash shell, type
*
* export ALLOW_WGCNA_THREADS=4
*
* before running R. Other operating systems or shells will
* have a similar command to achieve the same aim.
*
==========================================================================
Attaching package: ‘WGCNA’
The following object is masked from ‘package:stats’:
cor
> library(igraph)
>
> #### Enable multithreading for WGCNA
> enableWGCNAThreads()
Allowing parallel execution with up to 3 working processes.
>
> ############ Load raw count data and normalize by total counts in a given timepoint [column]
> numsamples <- 35
> canon.counts <- read.table(file="Data/canon.combined.counts.10_2014.txt", sep="\t", header=TRUE, row.names=1)
> numgenes <- dim(canon.counts)[1]
> Data.Counts <- canon.counts[1:numgenes,1:numsamples]
> Total.Counts <- as.numeric(apply(canon.counts[2:numgenes,1:numsamples], 2, sum))
> Norm=scale(Data.Counts, scale=Total.Counts, center=FALSE)
> Norm_Final <- t(Norm)
> #time <- as.numeric(canon.counts[1,])
>
> #Pick Soft Threshold
> powers =c(c(1:10),seq(from = 12, to=20,by=2))
> sft=pickSoftThreshold(Norm_Final, powerVector=powers, verbose=5)
pickSoftThreshold: will use block size 8844.
pickSoftThreshold: calculating connectivity for given powers...
..working on genes 1 through 8844 of 8844
Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
1 1 0.09980 2.400 0.938 1650.000 1.65e+03 2460.0
2 2 0.00679 -0.349 0.931 487.000 4.77e+02 968.0
3 3 0.18800 -0.802 0.966 189.000 1.75e+02 472.0
4 4 0.70600 -1.680 0.986 89.400 7.53e+01 324.0
5 5 0.86200 -1.980 0.994 48.500 3.63e+01 244.0
6 6 0.90300 -2.010 0.996 29.100 1.91e+01 194.0
7 7 0.90200 -2.030 0.989 18.800 1.06e+01 158.0
8 8 0.91800 -1.970 0.995 12.800 6.26e+00 131.0
9 9 0.92300 -1.930 0.995 9.090 3.80e+00 110.0
10 10 0.92800 -1.890 0.997 6.670 2.40e+00 93.6
11 12 0.94000 -1.830 0.997 3.860 1.03e+00 69.8
12 14 0.93300 -1.810 0.987 2.400 4.81e-01 53.6
13 16 0.92400 -1.800 0.971 1.580 2.38e-01 42.1
14 18 0.94400 -1.740 0.991 1.080 1.24e-01 33.6
15 20 0.94100 -1.740 0.994 0.763 6.74e-02 27.3
>
> #Document Scale-free topology fits and power-connectivity relationship, output results as a figure
> jpeg(file="Data/CANON_SFT_Fit_DESeq_Final.jpg", quality=100, res=400, height=5, width=9, units="in")
> par(mfrow =c(1,2))
> cex1 = 0.9
> plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit, signedR^2",type="n",main =paste("Scale independence"))
> text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],labels=powers,cex=cex1,col="red");
> plot(sft$fitIndices[,1], sft$fitIndices[,5],xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",main =paste("Mean connectivity"))
> text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1, col="red")
> dev.off()
null device
1
>
> ##Power of 5 chosen for the CANON 2012 ESP drifter dataset. The general rule of thumb is to choose the smallest value that gives a scale free index > 0.8.
> pow <- 5
> net = blockwiseModules(Norm_Final,power= pow,TOMType ="signed", minModuleSize = 30, reassignThreshold = 0, mergeCutHeight = 0.25, numericLabels = TRUE, pamRespectsDendro = FALSE, verbose = 3)
Calculating module eigengenes block-wise from all genes
Flagging genes and samples with too many missing values...
..step 1
....pre-clustering genes to determine blocks..
Projective K-means:
..k-means clustering..
..merging smaller clusters...
Block sizes:
gBlocks
1 2
4765 4079
..Working on block 1 .
TOM calculation: adjacency..
..will use 3 parallel threads.
Fraction of slow calculations: 0.000000
..connectivity..
..matrix multiplication..
..normalization..
..done.
....clustering..
....detecting modules..
....calculating module eigengenes..
....checking modules for statistical meaningfulness..
..removing 11 genes from module 1 because their KME is too low.
..removing 73 genes from module 2 because their KME is too low.
..removing 3 genes from module 4 because their KME is too low.
..removing 12 genes from module 5 because their KME is too low.
..removing 5 genes from module 6 because their KME is too low.
..removing 6 genes from module 7 because their KME is too low.
..removing 1 genes from module 8 because their KME is too low.
..removing 1 genes from module 10 because their KME is too low.
..removing 5 genes from module 12 because their KME is too low.
..removing 1 genes from module 13 because their KME is too low.
..removing 7 genes from module 14 because their KME is too low.
..removing 2 genes from module 16 because their KME is too low.
..removing 1 genes from module 17 because their KME is too low.
..removing 5 genes from module 18 because their KME is too low.
..removing 1 genes from module 19 because their KME is too low.
..Working on block 2 .
TOM calculation: adjacency..
..will use 3 parallel threads.
Fraction of slow calculations: 0.000000
..connectivity..
..matrix multiplication..
..normalization..
..done.
....clustering..
....detecting modules..
....calculating module eigengenes..
....checking modules for statistical meaningfulness..
..removing 6 genes from module 1 because their KME is too low.
..removing 8 genes from module 2 because their KME is too low.
..removing 6 genes from module 3 because their KME is too low.
..removing 4 genes from module 4 because their KME is too low.
..removing 3 genes from module 5 because their KME is too low.
..removing 2 genes from module 7 because their KME is too low.
..removing 2 genes from module 8 because their KME is too low.
..removing 2 genes from module 12 because their KME is too low.
..removing 1 genes from module 15 because their KME is too low.
..merging modules that are too close..
mergeCloseModules: Merging modules whose distance is less than 0.25
Calculating new MEs...
>
> #Get unique colors to associate with each module identified. Custom colors were chosen here, but for datasets with variable numbers of modules the user may prefer to use the "labels2colors" function with an unspecified "colorSeq" parameter, as random colors will then be chosen.
> mergedColors = labels2colors(net$colors, colorSeq=c("steelblue1", "purple", "orange", "springgreen4", "red", "magenta", "steelblue4", "brown", "cyan", "turquoise3", "violetred4", "tomato1", "violet", "tan4", "turquoise4", "wheat3", "slategray2", "yellow", "tan1", "sienna2", "darkturquoise", "saddlebrown", "orangered", "firebrick1", "orchid4", "royalblue3", "tomato4", "olivedrab1", "olivedrab4", "springgreen3", "skyblue", "chocolate3", "darkcyan", "aquamarine", "coral", "darkgoldenrod", "blueviolet"))
>
> #Combine module information (ortholog cluster ID, module number, module color) and output table
> #mod <- cbind(colnames(Norm_Final), net$colors, mergedColors); colnames(mod) <- c("Cluster", "Module", "Module_Color")
> #write.table(mod, file="Data/canon.modules.10_2014", quote=F, sep="\t", row.names=F)
>
> #Plot dendrogram
> #jpeg(file="Data/combined.jpg", quality=75, res=150, height=15, width=30, units="in")
> #plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],dendroLabels = FALSE, hang = 0.05,addGuide = TRUE, guideHang = 0.05)
> #dev.off()
>
> #calculate and output Topological Overlap Matrix (TOM, for later analyses)
> #TOM = TOMsimilarityFromExpr(Norm_Final, power= pow)
> colnames(TOM) = colnames(Norm_Final)
Error in colnames(TOM) = colnames(Norm_Final) : object 'TOM' not found
Execution halted