diff --git a/WGCNA_ANALYSIS.html b/WGCNA_ANALYSIS.html deleted file mode 100644 index 12dae0e..0000000 --- a/WGCNA_ANALYSIS.html +++ /dev/null @@ -1,4280 +0,0 @@ - - - - -
- - - - - - - - - - -/ /
-suppressMessages(library(ggplot2))
-suppressMessages(library(ape))
-suppressMessages(library(limma))
-suppressMessages(library(edgeR))
-suppressMessages(library(statmod))
-suppressMessages(library(stats))
-suppressMessages(library(dplyr))
-suppressMessages(library(stringr))
-suppressMessages(library(tibble))
-suppressMessages(library(ggforce))
-suppressMessages(library(WGCNA))
-suppressMessages(library(km2gcn))
-suppressMessages(library(CoExpNets))
-suppressMessages(library(devtools))
-suppressMessages(library(UpSetR))
-suppressMessages(library(RCy3))
-suppressMessages(library(readr))
-suppressMessages(library(scales))
-suppressMessages(library(data.table))
-suppressMessages(library(forcats))
-suppressMessages(library(kableExtra))
-setwd("/Users/jessicamitchell/Library/CloudStorage/OneDrive-HarvardUniversity/OD_documents/Cfixpaper")
-
-
-
-
-rsem_gene_data <- read.table("epersephone_rsem_gene_count_proteinCODINGonly_2022.08.14.matrix")
-
-
-s2c<-read.csv("rsem_gene_info.csv",header = TRUE, stringsAsFactors=FALSE)
-s2c$sample
-## [1] "sample103" "sample106" "sample112" "sample115" "sample119"
-## [6] "sample121" "sample127" "sample1294" "sample1300" "sample1310"
-## [11] "sample1324" "sample1330" "sample1346" "sample1347" "sample1353"
-## [16] "sample1369" "sample136" "sample16" "sample1750" "sample1871"
-## [21] "sample1876" "sample28" "sample36" "sample54" "sample56"
-## [26] "sample58" "sample69" "sample72" "sample78" "sample94"
-rnaseqMatrix=round(rsem_gene_data)
-
-filter=rowSums(cpm(rnaseqMatrix)>=1)>=3
-summary(filter)
-## Mode FALSE TRUE
-## logical 28 3140
-cpm_filtered_matrix=rnaseqMatrix[filter,]
-#cpm_filtered_matrix
-DGE<-DGEList(cpm_filtered_matrix)
-DGE<-calcNormFactors(DGE,method =c("TMM"))
-#DGE
-
-
-treatvals<-s2c$condition
-
-design_condition=model.matrix(~condition, data=s2c)
-
-RNAseq_voom <- voomWithQualityWeights(DGE, design=design_condition,normalize.method="none")$E
-
-
-###this filters out the genes that have less variation in expression
-WGCNA_matrix = t(RNAseq_voom[order(apply(RNAseq_voom,1,mad), decreasing = T)[1:2500],])
-WGCNA_matrix <- as.data.frame(WGCNA_matrix)
-
-row.names(WGCNA_matrix) = s2c$sample
-
-names(s2c)
-## [1] "sample" "condition" "reps" "size" "size_cat"
-## [6] "troph_color" "X" "X.1" "X.2" "X.3"
-## [11] "X.4"
-###remove the blank columns
-s2c <- s2c[,1:6]
-sample_rownames <- rownames(WGCNA_matrix)
-
-s2c2 <- s2c[, -1]
-rownames(s2c2) <- sample_rownames
-
-#removing rep and size categories
-s2c3 <- s2c2[,c(1,4:5)]
-names(s2c3)
-## [1] "condition" "size_cat" "troph_color"
-####making this binary data for downstream analyses
-
-s2c4 <- s2c3 %>%
- mutate(Sulfide = case_when(
- startsWith(condition, "HS") ~ "1",
- startsWith(condition, "LS") ~ "0",
- startsWith(condition, "SW") ~ "0",
- startsWith(condition, "H2")~ "0"
- ))
-
-s2c5 <- s2c4 %>%
- mutate(Oxygen = case_when(
- endsWith(condition, "HO") ~ "1",
- endsWith(condition, "LO") ~ "0"
- ))
-
-s2c6 <- s2c5 %>%
- mutate(Nitrogen = case_when(
- grepl("noN",condition) ~ "0",
- grepl("_N",condition) ~ "1")
- )
-
-
-s2c7 <- s2c6 %>%
- mutate(Hydrogen = case_when(
- startsWith(condition, "HS") ~ "0",
- startsWith(condition, "LS") ~ "0",
- startsWith(condition, "SW") ~ "0",
- startsWith(condition, "H2")~ "1"
- ))
-
-s2c20 <- s2c7 %>%
- mutate(Low_to_No_sulfide = case_when(
- startsWith(condition, "HS") ~ "0",
- startsWith(condition, "LS") ~ "1",
- startsWith(condition, "SW") ~ "1",
- startsWith(condition, "H2")~ "0"
- ))
-
-
-s2c8 <- s2c20[,4:8]
-
-s2c8$Sulfide <- as.numeric(unlist(s2c8$Sulfide))
-s2c8$Oxygen <- as.numeric(unlist(s2c8$Oxygen))
-s2c8$Nitrogen <- as.numeric(unlist(s2c8$Nitrogen))
-s2c8$Hydrogen <- as.numeric(unlist(s2c8$Hydrogen))
-s2c8$Low_to_No_sulfide <- as.numeric(unlist(s2c8$Low_to_No_sulfide))
-str(s2c8)
-## 'data.frame': 30 obs. of 5 variables:
-## $ Sulfide : num 0 0 1 1 1 1 1 1 1 1 ...
-## $ Oxygen : num 1 1 0 0 0 1 1 1 1 1 ...
-## $ Nitrogen : num 1 1 1 1 1 1 1 0 0 0 ...
-## $ Hydrogen : num 1 1 0 0 0 0 0 0 0 0 ...
-## $ Low_to_No_sulfide: num 0 0 0 0 0 0 0 0 0 0 ...
-s2c9 <- s2c3 %>%
- mutate(H2_N_HO = case_when(
- condition== "H2_N_HO" ~ "1",
- TRUE ~ "0"))
-s210 <- s2c9 %>%
- mutate(HS_N_LO = case_when(
- condition== "HS_N_LO" ~ "1",
- TRUE ~ "0"))
-
-s211 <- s210 %>%
- mutate(HS_N_HO = case_when(
- condition== "HS_N_HO" ~ "1",
- TRUE ~ "0"))
-s212 <- s211 %>%
- mutate(HS_noN_HO = case_when(
- condition== "HS_noN_HO" ~ "1",
- TRUE ~ "0"))
-s213 <- s212 %>%
- mutate(LS_noN_HO = case_when(
- condition== "LS_noN_HO" ~ "1",
- TRUE ~ "0"))
-s214 <- s213 %>%
- mutate(SW_noN_HO = case_when(
- condition== "SW_noN_HO" ~ "1",
- TRUE ~ "0"))
-s215 <- s214 %>%
- mutate(LS_N_HO = case_when(
- condition== "LS_N_HO" ~ "1",
- TRUE ~ "0"))
- s216 <- s215 %>%
- mutate(H2_noN_HO = case_when(
- condition== "H2_noN_HO" ~ "1",
- TRUE ~ "0"))
- s217 <- s216 %>%
- mutate(H2_N_LO = case_when(
- condition== "H2_N_LO" ~ "1",
- TRUE ~ "0"))
-s218 <- s217 %>%
- mutate(H2_noN_LO = case_when(
- condition== "H2_noN_LO" ~ "1",
- TRUE ~ "0"))
-
-names(s218)
-## [1] "condition" "size_cat" "troph_color" "H2_N_HO" "HS_N_LO"
-## [6] "HS_N_HO" "HS_noN_HO" "LS_noN_HO" "SW_noN_HO" "LS_N_HO"
-## [11] "H2_noN_HO" "H2_N_LO" "H2_noN_LO"
-s218$H2_N_HO <- as.numeric(unlist(s218$H2_N_HO))
-s218$HS_N_LO <- as.numeric(unlist(s218$HS_N_LO))
-s218$HS_N_HO <- as.numeric(unlist(s218$HS_N_HO))
-s218$HS_noN_HO <- as.numeric(unlist(s218$HS_noN_HO))
-s218$LS_noN_HO <- as.numeric(unlist(s218$LS_noN_HO))
-s218$SW_noN_HO <- as.numeric(unlist(s218$SW_noN_HO))
-s218$LS_N_HO <- as.numeric(unlist(s218$LS_N_HO))
-s218$H2_noN_HO <- as.numeric(unlist(s218$H2_noN_HO))
-s218$H2_N_LO <- as.numeric(unlist(s218$H2_N_LO))
-s218$H2_noN_LO <- as.numeric(unlist(s218$H2_noN_LO))
-
-
-
-s218 <- s218[,4:13]
-biconditions <- cbind(s218,s2c8)
-
-save(WGCNA_matrix, biconditions, file = "count_matrix_condition_info_dataInput.RData")
-/
-/
-powers = c(c(1:10), seq(from = 12, to=20, by=2))
-
-# Call the network topology analysis function
-sft2 = pickSoftThreshold(WGCNA_matrix, powerVector = powers, networkType= "signed hybrid", verbose = 2)
-## pickSoftThreshold: will use block size 2500.
-## pickSoftThreshold: calculating connectivity for given powers...
-## ..working on genes 1 through 2500 of 2500
-## Warning: executing %dopar% sequentially: no parallel backend registered
-## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
-## 1 1 0.0489 -0.388 0.858 331.000 308.0000 639.00
-## 2 2 0.4660 -1.170 0.913 124.000 112.0000 343.00
-## 3 3 0.6690 -1.540 0.944 57.600 48.3000 211.00
-## 4 4 0.7060 -1.770 0.933 30.400 23.5000 140.00
-## 5 5 0.7310 -1.910 0.942 17.600 12.5000 96.90
-## 6 6 0.7640 -1.900 0.956 10.900 7.0200 69.60
-## 7 7 0.7900 -1.900 0.968 7.060 4.0600 51.30
-## 8 8 0.8070 -1.890 0.970 4.800 2.4400 38.60
-## 9 9 0.8150 -1.850 0.974 3.370 1.5300 29.50
-## 10 10 0.8220 -1.830 0.972 2.440 0.9920 22.90
-## 11 12 0.8720 -1.640 0.985 1.380 0.4430 14.30
-## 12 14 0.8920 -1.490 0.975 0.839 0.2160 9.26
-## 13 16 0.9150 -1.430 0.968 0.544 0.1110 6.68
-## 14 18 0.9660 -1.380 0.991 0.371 0.0579 5.22
-## 15 20 0.9190 -1.490 0.949 0.264 0.0317 4.66
-# Plot the results:
-
-par(mar = c(1,2,1,2));
-cex1 = 0.8;
-# Scale-free topology fit index as a function of the soft-thresholding power
-plot(sft2$fitIndices[,1], -sign(sft2$fitIndices[,3])*sft2$fitIndices[,2],
- xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
- main = paste("Scale independence"));
-text(sft2$fitIndices[,1], -sign(sft2$fitIndices[,3])*sft2$fitIndices[,2],
- labels=powers,cex=cex1,col="red");
-# this line corresponds to using an R^2 cut-off of h
-abline(h=0.8,col="red")
-
-# Mean connectivity as a function of the soft-thresholding power
-par(mar = c(1,2,1,2))
-plot(sft2$fitIndices[,1], sft2$fitIndices[,5],
- xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
- main = paste("Mean connectivity"))
-
-text(sft2$fitIndices[,1], sft2$fitIndices[,5], labels=powers, cex=cex1,col="red")
-
-sft2$fitIndices
-## Power SFT.R.sq slope truncated.R.sq mean.k. median.k.
-## 1 1 0.04890048 -0.3884992 0.8581139 331.4011205 308.13070231
-## 2 2 0.46592537 -1.1680241 0.9132379 124.4904470 112.01173850
-## 3 3 0.66927892 -1.5390034 0.9440341 57.6399451 48.32562217
-## 4 4 0.70560384 -1.7734087 0.9325776 30.4063264 23.49079218
-## 5 5 0.73115622 -1.9082518 0.9420155 17.5623528 12.50091882
-## 6 6 0.76403365 -1.9033807 0.9561525 10.8503969 7.02207374
-## 7 7 0.79011799 -1.8998930 0.9678497 7.0639452 4.05989014
-## 8 8 0.80724037 -1.8863799 0.9695552 4.7967210 2.44030738
-## 9 9 0.81539375 -1.8468855 0.9740395 3.3725516 1.52789332
-## 10 10 0.82151935 -1.8250238 0.9724534 2.4419234 0.99168615
-## 11 12 0.87162088 -1.6441349 0.9852418 1.3763842 0.44250659
-## 12 14 0.89171300 -1.4932610 0.9749399 0.8387154 0.21564804
-## 13 16 0.91539066 -1.4287488 0.9678064 0.5438888 0.11080318
-## 14 18 0.96562166 -1.3772418 0.9913790 0.3711772 0.05787538
-## 15 20 0.91949827 -1.4881952 0.9494624 0.2643675 0.03166435
-## max.k.
-## 1 638.846689
-## 2 342.917019
-## 3 210.869059
-## 4 139.536080
-## 5 96.870153
-## 6 69.586561
-## 7 51.295838
-## 8 38.594403
-## 9 29.529360
-## 10 22.915384
-## 11 14.292054
-## 12 9.260525
-## 13 6.684230
-## 14 5.219041
-## 15 4.660383
-####choosing softpower to be 8
-softPower = 8
-ADJ1=abs(cor(WGCNA_matrix,use="p"))^8
-k=as.vector(apply(ADJ1,2,sum, na.rm=T))
-sizeGrWindow(10,5)
-par(mfrow=c(1,2))
-hist(k)
-scaleFreePlot(k, main="Check scale free topology\n")
-
-## scaleFreeRsquared slope
-## 1 0.85 -1.93
-/
-/
-set.seed(123)
-softPower = 8;
-adjacency = adjacency(WGCNA_matrix, power = softPower, type="signed hybrid")
-
-dissTOM = TOMdist(adjacency, TOMType = "signed")
-## ..connectivity..
-## ..matrix multiplication (system BLAS)..
-## ..normalization..
-## ..done.
-# Call the hierarchical clustering function
-geneTree = hclust(as.dist(dissTOM), method = "average");
-# Plot the resulting clustering tree (dendrogram)
-
-par(mar = c(3,3,3,3))
-
-
-plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity",
- labels = FALSE, hang = 0.04);
-
-minModuleSize = 30;
-# Module identification using dynamic tree cut:
-dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM,
- deepSplit = 2, pamRespectsDendro = FALSE,
- minClusterSize = minModuleSize)
-## ..cutHeight not given, setting it to 0.998 ===> 99% of the (truncated) height range in dendro.
-## ..done.
-table(dynamicMods)
-## dynamicMods
-## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
-## 242 233 214 160 155 143 131 124 124 120 109 107 106 100 99 83 79 72 53 46
-dynamicColors = labels2colors(dynamicMods)
-table(dynamicColors)
-## dynamicColors
-## black blue brown cyan green greenyellow
-## 124 214 160 99 143 107
-## grey grey60 lightcyan lightgreen lightyellow magenta
-## 242 72 79 53 46 120
-## midnightblue pink purple red salmon tan
-## 83 124 109 131 100 106
-## turquoise yellow
-## 233 155
-####changing names of colors
-
-colorOrder = c("grey", standardColors(50))
-list(colorOrder)
-## [[1]]
-## [1] "grey" "turquoise" "blue" "brown"
-## [5] "yellow" "green" "red" "black"
-## [9] "pink" "magenta" "purple" "greenyellow"
-## [13] "tan" "salmon" "cyan" "midnightblue"
-## [17] "lightcyan" "grey60" "lightgreen" "lightyellow"
-## [21] "royalblue" "darkred" "darkgreen" "darkturquoise"
-## [25] "darkgrey" "orange" "darkorange" "white"
-## [29] "skyblue" "saddlebrown" "steelblue" "paleturquoise"
-## [33] "violet" "darkolivegreen" "darkmagenta" "sienna3"
-## [37] "yellowgreen" "skyblue3" "plum1" "orangered4"
-## [41] "mediumpurple3" "lightsteelblue1" "lightcyan1" "ivory"
-## [45] "floralwhite" "darkorange2" "brown4" "bisque4"
-## [49] "darkslateblue" "plum2" "thistle2"
-moduleLabels = match(dynamicColors, colorOrder)-1
-head(moduleLabels)
-## [1] 10 13 15 17 17 17
-table(moduleLabels)
-## moduleLabels
-## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
-## 242 233 214 160 155 143 131 124 124 120 109 107 106 100 99 83 79 72 53 46
-table(dynamicColors)
-## dynamicColors
-## black blue brown cyan green greenyellow
-## 124 214 160 99 143 107
-## grey grey60 lightcyan lightgreen lightyellow magenta
-## 242 72 79 53 46 120
-## midnightblue pink purple red salmon tan
-## 83 124 109 131 100 106
-## turquoise yellow
-## 233 155
-customColorOrder = c("grey", "brown", "deeppink3", "darkcyan","plum","darkgreen","red","black","pink","skyblue","plum4","greenyellow","tan","darkorchid4","darkgoldenrod1","midnightblue","lightcyan","grey60","lightgreen","yellow")
-
-
-
-
-
-
-moduleColors.custom = customColorOrder[moduleLabels + 1]
-table(dynamicColors, moduleColors.custom)##sanity check
-## moduleColors.custom
-## dynamicColors black brown darkcyan darkgoldenrod1 darkgreen darkorchid4
-## black 124 0 0 0 0 0
-## blue 0 0 0 0 0 0
-## brown 0 0 160 0 0 0
-## cyan 0 0 0 99 0 0
-## green 0 0 0 0 143 0
-## greenyellow 0 0 0 0 0 0
-## grey 0 0 0 0 0 0
-## grey60 0 0 0 0 0 0
-## lightcyan 0 0 0 0 0 0
-## lightgreen 0 0 0 0 0 0
-## lightyellow 0 0 0 0 0 0
-## magenta 0 0 0 0 0 0
-## midnightblue 0 0 0 0 0 0
-## pink 0 0 0 0 0 0
-## purple 0 0 0 0 0 0
-## red 0 0 0 0 0 0
-## salmon 0 0 0 0 0 100
-## tan 0 0 0 0 0 0
-## turquoise 0 233 0 0 0 0
-## yellow 0 0 0 0 0 0
-## moduleColors.custom
-## dynamicColors deeppink3 greenyellow grey grey60 lightcyan lightgreen
-## black 0 0 0 0 0 0
-## blue 214 0 0 0 0 0
-## brown 0 0 0 0 0 0
-## cyan 0 0 0 0 0 0
-## green 0 0 0 0 0 0
-## greenyellow 0 107 0 0 0 0
-## grey 0 0 242 0 0 0
-## grey60 0 0 0 72 0 0
-## lightcyan 0 0 0 0 79 0
-## lightgreen 0 0 0 0 0 53
-## lightyellow 0 0 0 0 0 0
-## magenta 0 0 0 0 0 0
-## midnightblue 0 0 0 0 0 0
-## pink 0 0 0 0 0 0
-## purple 0 0 0 0 0 0
-## red 0 0 0 0 0 0
-## salmon 0 0 0 0 0 0
-## tan 0 0 0 0 0 0
-## turquoise 0 0 0 0 0 0
-## yellow 0 0 0 0 0 0
-## moduleColors.custom
-## dynamicColors midnightblue pink plum plum4 red skyblue tan yellow
-## black 0 0 0 0 0 0 0 0
-## blue 0 0 0 0 0 0 0 0
-## brown 0 0 0 0 0 0 0 0
-## cyan 0 0 0 0 0 0 0 0
-## green 0 0 0 0 0 0 0 0
-## greenyellow 0 0 0 0 0 0 0 0
-## grey 0 0 0 0 0 0 0 0
-## grey60 0 0 0 0 0 0 0 0
-## lightcyan 0 0 0 0 0 0 0 0
-## lightgreen 0 0 0 0 0 0 0 0
-## lightyellow 0 0 0 0 0 0 0 46
-## magenta 0 0 0 0 0 120 0 0
-## midnightblue 83 0 0 0 0 0 0 0
-## pink 0 124 0 0 0 0 0 0
-## purple 0 0 0 109 0 0 0 0
-## red 0 0 0 0 131 0 0 0
-## salmon 0 0 0 0 0 0 0 0
-## tan 0 0 0 0 0 0 106 0
-## turquoise 0 0 0 0 0 0 0 0
-## yellow 0 0 155 0 0 0 0 0
-str(dynamicColors)
-## chr [1:2500] "purple" "salmon" "midnightblue" "grey60" "grey60" "grey60" ...
-str(moduleColors.custom)
-## chr [1:2500] "plum4" "darkorchid4" "midnightblue" "grey60" "grey60" ...
-plotDendroAndColors(geneTree, moduleColors.custom, "Dynamic Tree Cut",
- dendroLabels = FALSE, hang = 0.03,
- addGuide = TRUE, guideHang = 0.05,
- main = "Gene dendrogram and module colors")
-
-# Calculate eigengenes
-MEList1 = moduleEigengenes(WGCNA_matrix, colors = moduleColors.custom)
-MEs1 = MEList1$eigengenes
- #Calculate dissimilarity of module eigengenes
-MEDiss1 = 1-cor(MEs1);
-# Cluster module eigengenes
-METree1 = hclust(as.dist(MEDiss1), method = "average")
-# Plot the result
-
-
-par(mar = c(3,3,3,3))
-plot(METree1, main = "Clustering of module eigengenes",
- xlab = "", sub = "")
-MEDissThres = 0.3
-# Plot the cut line into the dendrogram
-abline(h=MEDissThres, col = "red")
-
-# Call an automatic merging function
-merge = mergeCloseModules(WGCNA_matrix, moduleColors.custom, cutHeight = MEDissThres, verbose = 3)
-## mergeCloseModules: Merging modules whose distance is less than 0.3
-## multiSetMEs: Calculating module MEs.
-## Working on set 1 ...
-## moduleEigengenes: Calculating 20 module eigengenes in given set.
-## multiSetMEs: Calculating module MEs.
-## Working on set 1 ...
-## moduleEigengenes: Calculating 16 module eigengenes in given set.
-## multiSetMEs: Calculating module MEs.
-## Working on set 1 ...
-## moduleEigengenes: Calculating 15 module eigengenes in given set.
-## Calculating new MEs...
-## multiSetMEs: Calculating module MEs.
-## Working on set 1 ...
-## moduleEigengenes: Calculating 15 module eigengenes in given set.
-#Eigengenes of the new merged modules with new colors
-mergedMEs = merge$newMEs;
-mergedColors = merge$colors
-table(moduleColors.custom)
-## moduleColors.custom
-## black brown darkcyan darkgoldenrod1 darkgreen
-## 124 233 160 99 143
-## darkorchid4 deeppink3 greenyellow grey grey60
-## 100 214 107 242 72
-## lightcyan lightgreen midnightblue pink plum
-## 79 53 83 124 155
-## plum4 red skyblue tan yellow
-## 109 131 120 106 46
-table(mergedColors)
-## mergedColors
-## black brown darkcyan darkgoldenrod1 darkgreen
-## 124 233 239 99 143
-## darkorchid4 deeppink3 greenyellow grey grey60
-## 100 214 160 242 203
-## midnightblue pink plum4 skyblue yellow
-## 83 385 109 120 46
-plotDendroAndColors(geneTree, cbind(moduleColors.custom, mergedColors),
- c("Dynamic Tree Cut", "Merged dynamic"),
- dendroLabels = FALSE, hang = 0.03,
- addGuide = TRUE, guideHang = 0.05)
-
-nGenes = ncol(WGCNA_matrix)
-nSamples = nrow(WGCNA_matrix)
-modNames = substring(names(mergedMEs), 3)
-
-MEs0 = moduleEigengenes(WGCNA_matrix, mergedColors)$eigengenes
-MEs = orderMEs(MEs0)
-moduleTraitCor = cor(MEs, biconditions, use = "p")
-moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
-
-save(MEs, mergedColors, geneTree,moduleTraitCor,moduleTraitPvalue,
-file = "wgcna_manual_network20220910.RData")
-lnames = load(file = "wgcna_manual_network20220910.RData")
-lnames = load(file= "count_matrix_condition_info_dataInput.RData")
-
-
-
-
-
-
-# Convert traits to a color representation: white means low, red means high, grey means missing entry
-
-
-par(mar = c(1,1,2,2))
-par(mfrow=c(1,1))
-sampleTree = hclust(dist(WGCNA_matrix), method = "average")
-
-names(biconditions)
-## [1] "H2_N_HO" "HS_N_LO" "HS_N_HO"
-## [4] "HS_noN_HO" "LS_noN_HO" "SW_noN_HO"
-## [7] "LS_N_HO" "H2_noN_HO" "H2_N_LO"
-## [10] "H2_noN_LO" "Sulfide" "Oxygen"
-## [13] "Nitrogen" "Hydrogen" "Low_to_No_sulfide"
-biconditions <- biconditions[,1:14]
-
-desired_order <- c("HS_N_HO" , "HS_noN_HO", "HS_N_LO","LS_N_HO","LS_noN_HO","SW_noN_HO","H2_N_HO","H2_noN_HO","H2_N_LO","H2_noN_LO","Sulfide","Oxygen","Nitrogen","Hydrogen")
-
-biconditions <- biconditions[,match(desired_order,colnames(biconditions))]
-
-
-
-traitColors = numbers2colors(biconditions, signed = TRUE);
-# Plot the sample dendrogram and the colors underneath.
-plotDendroAndColors(sampleTree, traitColors,
- groupLabels = names(biconditions),
- main = "Sample dendrogram and trait heatmap")
-
-nGenes = ncol(WGCNA_matrix)
-nSamples = nrow(WGCNA_matrix)
-
-
-
-
-moduleColors = mergedColors
-MEs0 = moduleEigengenes(WGCNA_matrix, moduleColors)$eigengenes
-MEs = orderMEs(MEs0)
-
-##reordering MEs to match figure
-mycolororder <- c("MEmidnightblue","MEdarkorchid4","MEyellow","MEgreenyellow", "MEblack", "MEdarkgreen","MEdeeppink3","MEpink","MEgrey60","MEbrown","MEplum4","MEskyblue","MEdarkcyan","MEdarkgoldenrod1","MEgrey")
-MEs <- MEs[,match(mycolororder,colnames(MEs))]
-
-biconditions <- biconditions[,match(desired_order,colnames(biconditions))]
-
-
-moduleTraitCor = cor(MEs, biconditions, use = "p")
-
-moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
-
-summary(moduleTraitPvalue)
-## HS_N_HO HS_noN_HO HS_N_LO LS_N_HO
-## Min. :0.01196 Min. :0.00000 Min. :0.003134 Min. :0.2678
-## 1st Qu.:0.03387 1st Qu.:0.03521 1st Qu.:0.137401 1st Qu.:0.4198
-## Median :0.14691 Median :0.27026 Median :0.344828 Median :0.5388
-## Mean :0.26185 Mean :0.32608 Mean :0.396634 Mean :0.5700
-## 3rd Qu.:0.32153 3rd Qu.:0.57388 3rd Qu.:0.671187 3rd Qu.:0.6861
-## Max. :0.91450 Max. :0.93636 Max. :0.970864 Max. :0.9718
-## LS_noN_HO SW_noN_HO H2_N_HO H2_noN_HO
-## Min. :0.0000558 Min. :0.0008013 Min. :0.01812 Min. :0.03995
-## 1st Qu.:0.1967968 1st Qu.:0.2808627 1st Qu.:0.29340 1st Qu.:0.13709
-## Median :0.4090254 Median :0.3534417 Median :0.51191 Median :0.21959
-## Mean :0.4217359 Mean :0.4078278 Mean :0.49212 Mean :0.33175
-## 3rd Qu.:0.6354574 3rd Qu.:0.6356339 3rd Qu.:0.69145 3rd Qu.:0.50608
-## Max. :0.8682022 Max. :0.7872686 Max. :0.98003 Max. :0.78614
-## H2_N_LO H2_noN_LO Sulfide Oxygen
-## Min. :0.001603 Min. :0.05381 Min. :0.001234 Min. :0.0000002
-## 1st Qu.:0.060706 1st Qu.:0.10483 1st Qu.:0.067933 1st Qu.:0.0007034
-## Median :0.145126 Median :0.18447 Median :0.174750 Median :0.0103513
-## Mean :0.239831 Mean :0.27926 Mean :0.366110 Mean :0.0822130
-## 3rd Qu.:0.435491 3rd Qu.:0.36867 3rd Qu.:0.667126 3rd Qu.:0.0620483
-## Max. :0.716300 Max. :0.77636 Max. :0.970160 Max. :0.5030011
-## Nitrogen Hydrogen
-## Min. :0.002802 Min. :0.000174
-## 1st Qu.:0.143535 1st Qu.:0.034898
-## Median :0.314161 Median :0.068381
-## Mean :0.334997 Mean :0.203924
-## 3rd Qu.:0.555353 3rd Qu.:0.219406
-## Max. :0.841863 Max. :0.900770
-textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
- signif(moduleTraitPvalue, 1), ")", sep = "")
-
-
-
-
-
-
-
-
-dim(textMatrix) = dim(moduleTraitCor)
-par(mar = c(3,4, 1,1))
-
-names(biconditions)
-## [1] "HS_N_HO" "HS_noN_HO" "HS_N_LO" "LS_N_HO" "LS_noN_HO" "SW_noN_HO"
-## [7] "H2_N_HO" "H2_noN_HO" "H2_N_LO" "H2_noN_LO" "Sulfide" "Oxygen"
-## [13] "Nitrogen" "Hydrogen"
-head(moduleTraitCor)
-## HS_N_HO HS_noN_HO HS_N_LO LS_N_HO LS_noN_HO
-## MEmidnightblue 0.02046914 0.77549274 -0.1012675 -0.006751255 0.24346567
-## MEdarkorchid4 0.39363790 0.20790779 -0.3311569 0.208972393 0.24138458
-## MEyellow 0.04160808 0.06616182 -0.5017755 0.093907529 0.06104898
-## MEgreenyellow 0.22355044 0.08593010 -0.1231448 0.134486463 0.16362566
-## MEblack 0.16895507 0.30463652 -0.5213307 0.137603215 0.15939483
-## MEdarkgreen 0.29051730 0.14258634 -0.4439989 0.165647455 0.14956928
-## SW_noN_HO H2_N_HO H2_noN_HO H2_N_LO H2_noN_LO
-## MEmidnightblue 0.10228132 -0.326359663 -0.07370678 -0.3379315 -0.2956921
-## MEdarkorchid4 0.06986956 -0.214684994 0.29526963 -0.5156230 -0.3555769
-## MEyellow 0.18334438 0.064921651 0.15299999 0.1231695 -0.2853864
-## MEgreenyellow 0.07837730 -0.428591372 0.23740423 -0.2124148 -0.1592232
-## MEblack 0.17555561 -0.124569788 0.24385690 -0.3445732 -0.1995284
-## MEdarkgreen -0.05142174 -0.004771732 0.23089975 -0.2973662 -0.1816616
-## Sulfide Oxygen Nitrogen Hydrogen
-## MEmidnightblue 0.454784198 0.4810992 -0.4511045 -0.63300331
-## MEdarkorchid4 0.177011014 0.7871273 -0.2753128 -0.48415103
-## MEyellow -0.257937228 0.4346851 -0.1069013 0.03411204
-## MEgreenyellow 0.121985353 0.3239114 -0.2436685 -0.34465859
-## MEblack -0.031252607 0.6974892 -0.4103493 -0.26014469
-## MEdarkgreen -0.007132597 0.6042628 -0.1739832 -0.15486885
-# Display the correlation values within a heatmap plot
-labeledHeatmap(Matrix = moduleTraitCor,
- xLabels = names(biconditions),
- yLabels = names(MEs),
- ySymbols = names(MEs),
- colorLabels = FALSE,
- colors = blueWhiteRed(50),
- textMatrix = textMatrix,
- setStdMargins = FALSE,
- cex.text = 0.5,
- cex.lab.x =0.4,
- cex.lab.y=0.4,
- zlim = c(-1,1),
- main = paste("Module-trait relationships"))
-
-nGenes = ncol(WGCNA_matrix)
-nSamples = nrow(WGCNA_matrix)
-
-names(biconditions)
-## [1] "HS_N_HO" "HS_noN_HO" "HS_N_LO" "LS_N_HO" "LS_noN_HO" "SW_noN_HO"
-## [7] "H2_N_HO" "H2_noN_HO" "H2_N_LO" "H2_noN_LO" "Sulfide" "Oxygen"
-## [13] "Nitrogen" "Hydrogen"
-Sulfide = as.data.frame(biconditions$Sulfide);
-names(Sulfide)[1] <- "Sulfide"
-Oxygen <- as.data.frame(biconditions$Oxygen)
-names(Oxygen)[1] <- "Oxygen"
-Nitrogen <- as.data.frame(biconditions$Nitrogen)
-names(Nitrogen)[1] <- "Nitrogen"
-Hydrogen <- as.data.frame(biconditions$Hydrogen)
-names(Hydrogen)[1] <- "Hydrogen"
-H2_N_HO <- as.data.frame(biconditions$H2_N_HO)
-names(H2_N_HO)[1] <- "H2_N_HO"
-HS_N_LO <- as.data.frame(biconditions$HS_N_LO)
-names(HS_N_LO)[1] <- "HS_N_LO"
-HS_N_HO <- as.data.frame(biconditions$HS_N_HO)
-names(HS_N_HO)[1] <- "HS_N_HO"
-HS_noN_HO <- as.data.frame(biconditions$HS_noN_HO)
-names(HS_noN_HO)[1] <- "HS_noN_HO"
-LS_noN_HO <- as.data.frame(biconditions$LS_noN_HO)
-names(LS_noN_HO)[1] <- "LS_noN_HO"
-SW_noN_HO <- as.data.frame(biconditions$SW_noN_HO)
-names(SW_noN_HO)[1] <- "SW_noN_HO"
-LS_N_HO <- as.data.frame(biconditions$LS_N_HO)
-names(LS_N_HO)[1] <- "LS_N_HO"
-H2_noN_HO <- as.data.frame(biconditions$H2_noN_HO)
-names(H2_noN_HO)[1] <- "H2_noN_HO"
-H2_N_LO <- as.data.frame(biconditions$H2_N_LO)
-names(H2_N_LO)[1] <- "H2_N_LO"
-H2_noN_LO <- as.data.frame(biconditions$H2_noN_LO)
-names(H2_noN_LO)[1] <- "H2_noN_LO"
-
-modNames = substring(names(MEs), 3)
-
-
-geneModuleMembership = as.data.frame(cor(WGCNA_matrix, MEs, use = "p"));
-MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples))
-names(geneModuleMembership) = paste("MM", modNames, sep="");
-names(MMPvalue) = paste("p.MM", modNames, sep="");
-
-
-geneSignificance=as.data.frame(cor(WGCNA_matrix, biconditions, use = "p"))
-GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneSignificance), nSamples))
-names(geneSignificance) = paste("GS.", names(biconditions), sep="");
-names(GSPvalue) = paste("p.Sul.", names(biconditions), sep="");
-
-str(geneModuleMembership)
-## 'data.frame': 2500 obs. of 15 variables:
-## $ MMmidnightblue : num -0.298 0.648 0.768 0.625 0.599 ...
-## $ MMdarkorchid4 : num -0.474 0.848 0.291 0.445 0.409 ...
-## $ MMyellow : num -0.0623 0.163 -0.2172 0.211 -0.0863 ...
-## $ MMgreenyellow : num -0.1228 0.3899 0.0313 0.0184 -0.2098 ...
-## $ MMblack : num -0.1697 0.5395 0.0647 0.4343 0.2309 ...
-## $ MMdarkgreen : num -0.0548 0.4469 -0.0929 0.2286 -0.0154 ...
-## $ MMdeeppink3 : num -0.0172 0.4138 0.0872 0.1792 0.0883 ...
-## $ MMpink : num -0.016 0.5972 0.1198 0.1043 -0.0477 ...
-## $ MMgrey60 : num -0.656 0.183 0.255 0.474 0.511 ...
-## $ MMbrown : num -0.138 -0.295 0.237 0.274 0.475 ...
-## $ MMplum4 : num 0.648 -0.599 -0.107 -0.237 -0.344 ...
-## $ MMskyblue : num 0.247 -0.286 -0.58 -0.626 -0.677 ...
-## $ MMdarkcyan : num 0.256 -0.84 -0.471 -0.44 -0.391 ...
-## $ MMdarkgoldenrod1: num -0.173 -0.442 -0.579 -0.381 -0.328 ...
-## $ MMgrey : num -0.3438 0.3343 -0.1019 0.0722 0.278 ...
-str(geneSignificance)
-## 'data.frame': 2500 obs. of 14 variables:
-## $ GS.HS_N_HO : num -0.119 0.376 -0.219 -0.477 -0.338 ...
-## $ GS.HS_noN_HO: num 0.134 0.419 0.675 0.431 0.299 ...
-## $ GS.HS_N_LO : num 0.1614 -0.3814 0.0652 -0.2549 -0.0556 ...
-## $ GS.LS_N_HO : num 0.0746 0.1922 0.1029 0.0454 -0.0436 ...
-## $ GS.LS_noN_HO: num -0.567 0.24 0.114 0.362 0.321 ...
-## $ GS.SW_noN_HO: num -0.1715 -0.1254 0.0558 0.1978 0.2951 ...
-## $ GS.H2_N_HO : num 0.1176 -0.0198 -0.3035 0.0256 -0.0918 ...
-## $ GS.H2_noN_HO: num -0.1839 0.0707 -0.1085 0.2563 0.2329 ...
-## $ GS.H2_N_LO : num 0.196 -0.365 -0.251 -0.358 -0.285 ...
-## $ GS.H2_noN_LO: num 0.357 -0.406 -0.131 -0.229 -0.334 ...
-## $ GS.Sulfide : num 0.1156 0.2704 0.3413 -0.1966 -0.0622 ...
-## $ GS.Oxygen : num -0.468 0.754 0.208 0.551 0.442 ...
-## $ GS.Nitrogen : num 0.259 -0.119 -0.364 -0.611 -0.489 ...
-## $ GS.Hydrogen : num 0.298 -0.441 -0.486 -0.187 -0.293 ...
-geneSulfideSignificance=as.data.frame(cor(WGCNA_matrix, Sulfide, use = "p"))
-SulPvalue = as.data.frame(corPvalueStudent(as.matrix(geneSulfideSignificance), nSamples))
-names(geneSulfideSignificance) = paste("Sul.", names(Sulfide), sep="");
-names(SulPvalue) = paste("p.Sul.", names(Sulfide), sep="");
-
-
-geneOxygenSignificance=as.data.frame(cor(WGCNA_matrix, Oxygen, use = "p"))
-OxyPvalue = as.data.frame(corPvalueStudent(as.matrix(geneOxygenSignificance), nSamples))
-names(geneOxygenSignificance) = paste("Oxy.", names(Oxygen), sep="");
-names(OxyPvalue) = paste("p.Oxy.", names(Oxygen), sep="");
-
-
-geneNitrogenSignificance=as.data.frame(cor(WGCNA_matrix, Nitrogen, use = "p"))
-NitPvalue = as.data.frame(corPvalueStudent(as.matrix(geneNitrogenSignificance), nSamples))
-names(geneNitrogenSignificance) = paste("Nit.", names(Nitrogen), sep="");
-names(NitPvalue) = paste("p.Nit.", names(Nitrogen), sep="");
-
-
-
-geneHydrogenSignificance=as.data.frame(cor(WGCNA_matrix, Hydrogen, use = "p"))
-HydPvalue = as.data.frame(corPvalueStudent(as.matrix(geneHydrogenSignificance), nSamples))
-names(geneHydrogenSignificance) = paste("Hyd.", names(Hydrogen), sep="");
-names(HydPvalue) = paste("p.Hyd.", names(Hydrogen), sep="");
-
-
-
-
-
-####making dataframe to add to cytoscape layer#########################################
-sulfsig <- geneSulfideSignificance
-sulfsig$locusID <- rownames(sulfsig)
-rownames(sulfsig) <- NULL
-sulfsig <- sulfsig[,c(2,1)]
-
-oxysig <- geneOxygenSignificance
-oxysig$locusID <- rownames(oxysig)
-rownames(oxysig) <- NULL
-oxysig <- oxysig[,c(2,1)]
-
-nitrosig <- geneNitrogenSignificance
-nitrosig$locusID <- rownames(nitrosig)
-rownames(nitrosig) <- NULL
-nitrosig <- nitrosig[,c(2,1)]
-
-
-hydrosig <- geneHydrogenSignificance
-hydrosig$locusID <- rownames(hydrosig)
-rownames(hydrosig) <- NULL
-hydrosig <- hydrosig[,c(2,1)]
-
-
-sig.genes <- cbind(sulfsig,oxysig$Oxy.Oxygen,nitrosig$Nit.Nitrogen,hydrosig$Hyd.Hydrogen)
-names(sig.genes)[2] <- "Sulfide_GS"
-names(sig.genes)[3] <- "Oxygen_GS"
-names(sig.genes)[4] <- "Nitrogen_GS"
-names(sig.genes)[5] <- "Hydrogen_GS"
-names(sig.genes)
-## [1] "locusID" "Sulfide_GS" "Oxygen_GS" "Nitrogen_GS" "Hydrogen_GS"
-#write.csv(sig.genes,"sig.genes.csv")
-
-
-
-#########################making datframe to oxygen and sulfide dataframe########################################################
-
-sig.genes3 <- cbind(sulfsig,oxysig$Oxy.Oxygen)
-colnames(sig.genes3)
-## [1] "locusID" "Sul.Sulfide" "oxysig$Oxy.Oxygen"
-colnames(sig.genes3) <- c("locusID","corSulfide","corOxygen")
-
-sulP <- SulPvalue
-sulP <- tibble(locusID = rownames(sulP), sulP)
-oxyP <- OxyPvalue
-oxyP <- tibble(locusID = rownames(oxyP), oxyP)
-
-colnames(sulP)
-## [1] "locusID" "p.Sul.Sulfide"
-colnames(sulP) <- c("locusID","pval_Sulfide")
-
-colnames(oxyP)
-## [1] "locusID" "p.Oxy.Oxygen"
-colnames(oxyP) <- c("locusID","pval_Oxygen")
-
-
-
-
-GMM <- geneModuleMembership
-
-GMMP <- MMPvalue
-
-head(sig.genes3)
-## locusID corSulfide corOxygen
-## 1 gene-L0Y14_RS04070 0.11559790 -0.4677785
-## 2 gene-L0Y14_RS09775 0.27041835 0.7541553
-## 3 gene-L0Y14_RS01990 0.34126255 0.2075954
-## 4 gene-L0Y14_RS09950 -0.19662226 0.5510156
-## 5 gene-L0Y14_RS09955 -0.06219137 0.4415475
-## 6 gene-L0Y14_RS12365 -0.23047838 0.2361504
-head(GMM)
-## MMmidnightblue MMdarkorchid4 MMyellow MMgreenyellow
-## gene-L0Y14_RS04070 -0.2978002 -0.4742295 -0.06228793 -0.12283434
-## gene-L0Y14_RS09775 0.6475847 0.8479211 0.16301606 0.38986765
-## gene-L0Y14_RS01990 0.7683016 0.2909617 -0.21720940 0.03127418
-## gene-L0Y14_RS09950 0.6250107 0.4452151 0.21104415 0.01837192
-## gene-L0Y14_RS09955 0.5993774 0.4088890 -0.08634607 -0.20976909
-## gene-L0Y14_RS12365 0.3632049 0.1311813 -0.06645301 -0.38765511
-## MMblack MMdarkgreen MMdeeppink3 MMpink MMgrey60
-## gene-L0Y14_RS04070 -0.16971124 -0.05481641 -0.01719957 -0.01598375 -0.6563363
-## gene-L0Y14_RS09775 0.53953441 0.44692677 0.41381637 0.59720934 0.1830737
-## gene-L0Y14_RS01990 0.06471391 -0.09289025 0.08717597 0.11976742 0.2550905
-## gene-L0Y14_RS09950 0.43427234 0.22863891 0.17923724 0.10431595 0.4740972
-## gene-L0Y14_RS09955 0.23088121 -0.01544034 0.08834925 -0.04765631 0.5107174
-## gene-L0Y14_RS12365 -0.00202547 -0.12064102 -0.25455458 -0.31551755 0.7070535
-## MMbrown MMplum4 MMskyblue MMdarkcyan
-## gene-L0Y14_RS04070 -0.1384809 0.6483345 0.2465064 0.25589117
-## gene-L0Y14_RS09775 -0.2948900 -0.5986951 -0.2859013 -0.84047590
-## gene-L0Y14_RS01990 0.2365179 -0.1068451 -0.5795192 -0.47056380
-## gene-L0Y14_RS09950 0.2737269 -0.2366527 -0.6256319 -0.43994105
-## gene-L0Y14_RS09955 0.4747395 -0.3438988 -0.6771603 -0.39111707
-## gene-L0Y14_RS12365 0.4675125 -0.2198442 -0.5555037 -0.09885019
-## MMdarkgoldenrod1 MMgrey
-## gene-L0Y14_RS04070 -0.1725039 -0.34377603
-## gene-L0Y14_RS09775 -0.4423334 0.33425164
-## gene-L0Y14_RS01990 -0.5786492 -0.10188510
-## gene-L0Y14_RS09950 -0.3808909 0.07218213
-## gene-L0Y14_RS09955 -0.3278772 0.27804497
-## gene-L0Y14_RS12365 -0.2081241 0.06962918
-head(GMMP)
-## p.MMmidnightblue p.MMdarkorchid4 p.MMyellow p.MMgreenyellow
-## gene-L0Y14_RS04070 1.099727e-01 8.107296e-03 0.7436784 0.51785242
-## gene-L0Y14_RS09775 1.095424e-04 3.335534e-09 0.3893952 0.03319238
-## gene-L0Y14_RS01990 7.146697e-07 1.187803e-01 0.2489174 0.86968629
-## gene-L0Y14_RS09950 2.220815e-04 1.368570e-02 0.2629330 0.92323522
-## gene-L0Y14_RS09955 4.649887e-04 2.486376e-02 0.6500519 0.26589457
-## gene-L0Y14_RS12365 4.852036e-02 4.895817e-01 0.7271658 0.03429251
-## p.MMblack p.MMdarkgreen p.MMdeeppink3 p.MMpink
-## gene-L0Y14_RS04070 0.369944508 0.77357610 0.92812078 0.9331901555
-## gene-L0Y14_RS09775 0.002091438 0.01328534 0.02301156 0.0004935721
-## gene-L0Y14_RS01990 0.734046428 0.62539413 0.64690412 0.5284364628
-## gene-L0Y14_RS09950 0.016490067 0.22426245 0.34327557 0.5832907609
-## gene-L0Y14_RS09955 0.219626475 0.93545678 0.64246404 0.8025293779
-## gene-L0Y14_RS12365 0.991524596 0.52541108 0.17462403 0.0894278762
-## p.MMgrey60 p.MMbrown p.MMplum4 p.MMskyblue
-## gene-L0Y14_RS04070 8.200365e-05 0.465519128 0.0001068964 1.891247e-01
-## gene-L0Y14_RS09775 3.328721e-01 0.113659304 0.0004738236 1.256266e-01
-## gene-L0Y14_RS01990 1.736871e-01 0.208261116 0.5741429330 7.907558e-04
-## gene-L0Y14_RS09950 8.127494e-03 0.143281237 0.2079944552 2.179639e-04
-## gene-L0Y14_RS09955 3.928636e-03 0.008029824 0.0627743027 3.961873e-05
-## gene-L0Y14_RS12365 1.253228e-05 0.009188101 0.2430811504 1.438928e-03
-## p.MMdarkcyan p.MMdarkgoldenrod1 p.MMgrey
-## gene-L0Y14_RS04070 1.722940e-01 0.3620032361 0.06287441
-## gene-L0Y14_RS09775 6.204607e-09 0.0143824230 0.07102686
-## gene-L0Y14_RS01990 8.682956e-03 0.0008087425 0.59214321
-## gene-L0Y14_RS09950 1.498308e-02 0.0378395675 0.70464811
-## gene-L0Y14_RS09955 3.258386e-02 0.0769249135 0.13682488
-## gene-L0Y14_RS12365 6.032765e-01 0.2697473140 0.71465331
-dim(sig.genes3)
-## [1] 2500 3
-####before I can merge these I have to change the rownames to columns
-
-
-
-# Assuming your datasets are named df1, df2, df3
-# Convert row names to column for df2 and df3
-GMM <- tibble(locusID = rownames(GMM), GMM)
-GMMP <- tibble(locusID = rownames(GMMP), GMMP)
-
-
-
-colnames(GMM) <- c("locusID", "GM_MEpurple","GM_MEmidnightblue", "GM_MElightyellow","GM_MEgreenyellow","GM_MEblack","GM_MEgreen","GM_MEcherry","GM_MEpink","GM_MEbrown","GM_MEgrey60", "GM_MElightpurple","GM_MEskyblue","GM_MEteal","GM_MEgold","GM_MEgrey")
-
-
-
-
-colnames(GMMP) <- c("locusID", "GMPval_MEpurple","GMPval_MEmidnightblue", "GMPval_MElightyellow","GMPval_MEgreenyellow","GMPval_MEblack","GMPval_MEgreen","GMPval_MEcherry","GMPval_MEpink","GMPval_MEbrown","GMPval_MEgrey60", "GMPval_MElightpurple","GMPval_MEskyblue","GMPval_MEteal","GMPval_MEgold","GMPval_MEgrey")
-
-
-merged_df <- merge(sig.genes3, sulP, by = "locusID", all.x = TRUE)
-merged_df2 <- merge(merged_df, oxyP, by = "locusID", all.x = TRUE)
-
-merged_df3 <- merge(merged_df2, GMM, by = "locusID", all.x = TRUE)
-merged_df4 <- merge(merged_df3, GMMP, by = "locusID", all.x = TRUE)
-
-#write.csv(merged_df4,"GM_GS_sulfide_oxygen.csv")
-
-##################################################################################################################
-names(modNames)
-## NULL
-moduleColors = mergedColors
-table(moduleColors)
-## moduleColors
-## black brown darkcyan darkgoldenrod1 darkgreen
-## 124 233 239 99 143
-## darkorchid4 deeppink3 greenyellow grey grey60
-## 100 214 160 242 203
-## midnightblue pink plum4 skyblue yellow
-## 83 385 109 120 46
-module = "midnightblue"
-column = match(module, modNames);
-moduleGenes = moduleColors==module;
-
-module2 = "brown"
-column2 = match(module2, modNames);
-moduleGenes2 = moduleColors==module2;
-
-module3 = "darkgoldenrod1"
-column3 = match(module3, modNames);
-moduleGenes3 = moduleColors==module3;
-
-
-module4 = "pink"
-column4 = match(module4, modNames);
-moduleGenes4 = moduleColors==module4;
-
-module5 = "black"
-column5 = match(module5, modNames);
-moduleGenes5 = moduleColors==module5;
-
-module6 = "deeppink3"
-column6 = match(module6, modNames);
-moduleGenes6 = moduleColors==module6;
-
-
-module7 = "darkcyan"
-column7 = match(module7, modNames);
-moduleGenes7 = moduleColors==module7;
-
-module8 = "darkgreen"
-column8 = match(module8, modNames);
-moduleGenes8 = moduleColors==module8;
-
-
-module9 = "greenyellow"
-column9 = match(module9, modNames);
-moduleGenes9 = moduleColors==module9;
-
-
-module10 = "grey60"
-column10 = match(module10, modNames);
-moduleGenes10 = moduleColors==module10;
-
-module11 = "yellow"
-column11 = match(module11, modNames);
-moduleGenes11 = moduleColors==module11;
-
-module12 = "skyblue"
-column12 = match(module12, modNames);
-moduleGenes12 = moduleColors==module12;
-
-
-module13 = "plum4"
-column13 = match(module13, modNames);
-moduleGenes13 = moduleColors==module13;
-
-
-module14 = "darkorchid4"
-column14 = match(module14, modNames);
-moduleGenes14 = moduleColors==module14;
-
-module15 = "grey"
-column15 = match(module15, modNames);
-moduleGenes15 = moduleColors==module15;
-
-
-#sizeGrWindow(7, 7); # uncomment when running interactively (not rendering in rmarkdown)
-par(mfrow = c(2,4))
-par(mar=c(5,4,2,2))
-
- #midnight blue and brown
-verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
- abs(geneSulfideSignificance[moduleGenes, 1]),
- xlab = paste("Module Membership in", module, "module"),
- ylab = "Sulfide",
- main = paste("Module membership vs. gene significance\n"),
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
- abs(geneOxygenSignificance[moduleGenes, 1]),
- xlab = paste("Module Membership in", module, "module"),
- ylab = "Oxygen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
- abs(geneHydrogenSignificance[moduleGenes, 1]),
- xlab = paste("Module Membership in", module, "module"),
- ylab = "Hydrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
- abs(geneNitrogenSignificance[moduleGenes, 1]),
- xlab = paste("Module Membership in", module, "module"),
- ylab = "Nitrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes2, column2]),
- abs(geneSulfideSignificance[moduleGenes2, 1]),
- xlab = paste("Module Membership in", module2, "module"),
- ylab = "Sulfide",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module2,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes2, column2]),
- abs(geneOxygenSignificance[moduleGenes2, 1]),
- xlab = paste("Module Membership in", module2, "module"),
- ylab = "Oxygen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module2,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes2, column2]),
- abs(geneHydrogenSignificance[moduleGenes2, 1]),
- xlab = paste("Module Membership in", module2, "module"),
- ylab = "Hydrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module2,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes2, column2]),
- abs(geneNitrogenSignificance[moduleGenes2, 1]),
- xlab = paste("Module Membership in", module2, "module"),
- ylab = "Nitrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module2,
- abline=TRUE)
-
-#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
-par(mfrow = c(2,4))
-par(mar=c(5,4,2,2))
-
-####gold and pink
-verboseScatterplot(abs(geneModuleMembership[moduleGenes3, column3]),
- abs(geneSulfideSignificance[moduleGenes3, 1]),
- xlab = paste("Module Membership in", module3, "module"),
- ylab = "Sulfide",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module3,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes3, column3]),
- abs(geneOxygenSignificance[moduleGenes3, 1]),
- xlab = paste("Module Membership in", module3, "module"),
- ylab = "Oxygen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module3,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes3, column3]),
- abs(geneHydrogenSignificance[moduleGenes3, 1]),
- xlab = paste("Module Membership in", module3, "module"),
- ylab = "Hydrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module3,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes3, column3]),
- abs(geneNitrogenSignificance[moduleGenes3, 1]),
- xlab = paste("Module Membership in", module3, "module"),
- ylab = "Nitrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module3,
- abline=TRUE)
-#pink
-verboseScatterplot(abs(geneModuleMembership[moduleGenes4, column4]),
- abs(geneSulfideSignificance[moduleGenes4, 1]),
- xlab = paste("Module Membership in", module4, "module"),
- ylab = "Sulfide",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module4,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes4, column4]),
- abs(geneOxygenSignificance[moduleGenes4, 1]),
- xlab = paste("Module Membership in", module4, "module"),
- ylab = "Oxygen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module4,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes4, column4]),
- abs(geneHydrogenSignificance[moduleGenes4, 1]),
- xlab = paste("Module Membership in", module4, "module"),
- ylab = "Hydrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module4,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes4, column4]),
- abs(geneNitrogenSignificance[moduleGenes4, 1]),
- xlab = paste("Module Membership in", module4, "module"),
- ylab = "Nitrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module4,
- abline=TRUE)
-
-#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
-par(mfrow = c(2,4))
-par(mar=c(5,4,2,2))
-
-
-####black and blue
-verboseScatterplot(abs(geneModuleMembership[moduleGenes5, column5]),
- abs(geneSulfideSignificance[moduleGenes5, 1]),
- xlab = paste("Module Membership in", module5, "module"),
- ylab = "Sulfide",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module5,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes5, column5]),
- abs(geneOxygenSignificance[moduleGenes5, 1]),
- xlab = paste("Module Membership in", module5, "module"),
- ylab = "Oxygen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module5,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes5, column5]),
- abs(geneHydrogenSignificance[moduleGenes5, 1]),
- xlab = paste("Module Membership in", module5, "module"),
- ylab = "Hydrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module5,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes5, column5]),
- abs(geneNitrogenSignificance[moduleGenes5, 1]),
- xlab = paste("Module Membership in", module5, "module"),
- ylab = "Nitrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module5,
- abline=TRUE)
-#blue
-verboseScatterplot(abs(geneModuleMembership[moduleGenes6, column6]),
- abs(geneSulfideSignificance[moduleGenes6, 1]),
- xlab = paste("Module Membership in", module6, "module"),
- ylab = "Sulfide",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module6,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes6, column6]),
- abs(geneOxygenSignificance[moduleGenes6, 1]),
- xlab = paste("Module Membership in", module6, "module"),
- ylab = "Oxygen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module6,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes6, column6]),
- abs(geneHydrogenSignificance[moduleGenes6, 1]),
- xlab = paste("Module Membership in", module6, "module"),
- ylab = "Hydrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module6,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes6, column6]),
- abs(geneNitrogenSignificance[moduleGenes6, 1]),
- xlab = paste("Module Membership in", module6, "module"),
- ylab = "Nitrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module6,
- abline=TRUE)
-
-#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
-par(mfrow = c(2,4))
-par(mar=c(5,4,2,2))
-
-# darkcyan and green (7 and 8)
-#darkcyan
-verboseScatterplot(abs(geneModuleMembership[moduleGenes7, column7]),
- abs(geneSulfideSignificance[moduleGenes7, 1]),
- xlab = paste("Module Membership in", module7, "module"),
- ylab = "Sulfide",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module7,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes7, column7]),
- abs(geneOxygenSignificance[moduleGenes7, 1]),
- xlab = paste("Module Membership in", module7, "module"),
- ylab = "Oxygen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module7,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes7, column7]),
- abs(geneHydrogenSignificance[moduleGenes7, 1]),
- xlab = paste("Module Membership in", module7, "module"),
- ylab = "Hydrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module7,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes7, column7]),
- abs(geneNitrogenSignificance[moduleGenes7, 1]),
- xlab = paste("Module Membership in", module7, "module"),
- ylab = "Nitrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module7,
- abline=TRUE)
-#green
-verboseScatterplot(abs(geneModuleMembership[moduleGenes8, column8]),
- abs(geneSulfideSignificance[moduleGenes8, 1]),
- xlab = paste("Module Membership in", module8, "module"),
- ylab = "Sulfide",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module8,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes8, column8]),
- abs(geneOxygenSignificance[moduleGenes8, 1]),
- xlab = paste("Module Membership in", module8, "module"),
- ylab = "Oxygen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module8,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes8, column8]),
- abs(geneHydrogenSignificance[moduleGenes8, 1]),
- xlab = paste("Module Membership in", module8, "module"),
- ylab = "Hydrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module8,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes8, column8]),
- abs(geneNitrogenSignificance[moduleGenes8, 1]),
- xlab = paste("Module Membership in", module8, "module"),
- ylab = "Nitrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module8,
- abline=TRUE)
-
-#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
-par(mfrow = c(2,4))
-par(mar=c(5,4,2,2))
-
-
-# greenyellow and grey60
-#greenyellow
-verboseScatterplot(abs(geneModuleMembership[moduleGenes9, column9]),
- abs(geneSulfideSignificance[moduleGenes9, 1]),
- xlab = paste("Module Membership in", module9, "module"),
- ylab = "Sulfide",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module9,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes9, column9]),
- abs(geneOxygenSignificance[moduleGenes9, 1]),
- xlab = paste("Module Membership in", module9, "module"),
- ylab = "Oxygen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module9,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes9, column9]),
- abs(geneHydrogenSignificance[moduleGenes9, 1]),
- xlab = paste("Module Membership in", module9, "module"),
- ylab = "Hydrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module9,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes9, column9]),
- abs(geneNitrogenSignificance[moduleGenes9, 1]),
- xlab = paste("Module Membership in", module9, "module"),
- ylab = "Nitrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module9,
- abline=TRUE)
-#grey60
-verboseScatterplot(abs(geneModuleMembership[moduleGenes10, column10]),
- abs(geneSulfideSignificance[moduleGenes10, 1]),
- xlab = paste("Module Membership in", module10, "module"),
- ylab = "Sulfide",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module10,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes10, column10]),
- abs(geneOxygenSignificance[moduleGenes10, 1]),
- xlab = paste("Module Membership in", module10, "module"),
- ylab = "Oxygen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module10,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes10, column10]),
- abs(geneHydrogenSignificance[moduleGenes10, 1]),
- xlab = paste("Module Membership in", module10, "module"),
- ylab = "Hydrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module10,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes10, column10]),
- abs(geneNitrogenSignificance[moduleGenes10, 1]),
- xlab = paste("Module Membership in", module10, "module"),
- ylab = "Nitrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module10,
- abline=TRUE)
-
-#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
-par(mfrow = c(2,4))
-par(mar=c(5,4,2,2))
-
-#lightyellow and magenta (11,12)
-#lightyellow
-verboseScatterplot(abs(geneModuleMembership[moduleGenes11, column11]),
- abs(geneSulfideSignificance[moduleGenes11, 1]),
- xlab = paste("Module Membership in", module11, "module"),
- ylab = "Sulfide",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module11,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes11, column11]),
- abs(geneOxygenSignificance[moduleGenes11, 1]),
- xlab = paste("Module Membership in", module11, "module"),
- ylab = "Oxygen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module11,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes11, column11]),
- abs(geneHydrogenSignificance[moduleGenes11, 1]),
- xlab = paste("Module Membership in", module11, "module"),
- ylab = "Hydrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module11,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes11, column11]),
- abs(geneNitrogenSignificance[moduleGenes11, 1]),
- xlab = paste("Module Membership in", module11, "module"),
- ylab = "Nitrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module11,
- abline=TRUE)
-#magenta
-verboseScatterplot(abs(geneModuleMembership[moduleGenes12, column12]),
- abs(geneSulfideSignificance[moduleGenes12, 1]),
- xlab = paste("Module Membership in", module12, "module"),
- ylab = "Sulfide",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module12,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes12, column12]),
- abs(geneOxygenSignificance[moduleGenes12, 1]),
- xlab = paste("Module Membership in", module12, "module"),
- ylab = "Oxygen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module12,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes12, column12]),
- abs(geneHydrogenSignificance[moduleGenes12, 1]),
- xlab = paste("Module Membership in", module12, "module"),
- ylab = "Hydrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module12,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes12, column12]),
- abs(geneNitrogenSignificance[moduleGenes12, 1]),
- xlab = paste("Module Membership in", module12, "module"),
- ylab = "Nitrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module12,
- abline=TRUE)
-
-str(geneSulfideSignificance)
-## 'data.frame': 2500 obs. of 1 variable:
-## $ Sul.Sulfide: num 0.1156 0.2704 0.3413 -0.1966 -0.0622 ...
-what <- geneSulfideSignificance
-what$locusID <- rownames(what)
-what <- what[,c(2,1)]
-# Writing the dataframe to a CSV file so I can use this in cytoscape for a layer
-##write.csv(what, file = "geneSulfideSignificance.csv", row.names = FALSE)
-#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
-par(mfrow = c(2,4))
-par(mar=c(5,4,2,2))
-
-#purple and salmon (13,14)
-#purple
-verboseScatterplot(abs(geneModuleMembership[moduleGenes13, column13]),
- abs(geneSulfideSignificance[moduleGenes13, 1]),
- xlab = paste("Module Membership in", module13, "module"),
- ylab = "Sulfide",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module13,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes13, column13]),
- abs(geneOxygenSignificance[moduleGenes13, 1]),
- xlab = paste("Module Membership in", module13, "module"),
- ylab = "Oxygen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module13,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes13, column13]),
- abs(geneHydrogenSignificance[moduleGenes13, 1]),
- xlab = paste("Module Membership in", module13, "module"),
- ylab = "Hydrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module13,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes13, column13]),
- abs(geneNitrogenSignificance[moduleGenes13, 1]),
- xlab = paste("Module Membership in", module13, "module"),
- ylab = "Nitrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module13,
- abline=TRUE)
-#salmon
-verboseScatterplot(abs(geneModuleMembership[moduleGenes14, column14]),
- abs(geneSulfideSignificance[moduleGenes14, 1]),
- xlab = paste("Module Membership in", module14, "module"),
- ylab = "Sulfide",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module14,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes14, column14]),
- abs(geneOxygenSignificance[moduleGenes14, 1]),
- xlab = paste("Module Membership in", module14, "module"),
- ylab = "Oxygen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module14,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes14, column14]),
- abs(geneHydrogenSignificance[moduleGenes14, 1]),
- xlab = paste("Module Membership in", module14, "module"),
- ylab = "Hydrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module14,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes14, column14]),
- abs(geneNitrogenSignificance[moduleGenes14, 1]),
- xlab = paste("Module Membership in", module14, "module"),
- ylab = "Nitrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module14,
- abline=TRUE)
-
-#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
-par(mfrow = c(2,4))
-par(mar=c(5,4,2,2))
-
-
-#grey
-verboseScatterplot(abs(geneModuleMembership[moduleGenes15, column15]),
- abs(geneSulfideSignificance[moduleGenes15, 1]),
- xlab = paste("Module Membership in", module15, "module"),
- ylab = "Sulfide",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module15,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes15, column15]),
- abs(geneOxygenSignificance[moduleGenes15, 1]),
- xlab = paste("Module Membership in", module15, "module"),
- ylab = "Oxygen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module15,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes15, column15]),
- abs(geneHydrogenSignificance[moduleGenes15, 1]),
- xlab = paste("Module Membership in", module15, "module"),
- ylab = "Hydrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module15,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes15, column15]),
- abs(geneNitrogenSignificance[moduleGenes15, 1]),
- xlab = paste("Module Membership in", module15, "module"),
- ylab = "Nitrogen",
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module15,
- abline=TRUE)
-
-######now I am just making the plots that will go next to the gene significance barplots that I made in the previous section, I will do the two most significant modules for each treatment condition
-
-### for sulfide that is teal and gold
-
-
-#sizeGrWindow(3,5);
-par(mfrow = c(1,2))
-par(mar=c(4,3,4,3))
-
-verboseScatterplot(abs(geneModuleMembership[moduleGenes7, column7]),
- abs(geneSulfideSignificance[moduleGenes7, 1]),
- xlab = paste("Module Membership in teal module"),
- ylab = "Sulfide",
- pch = 16,
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module7,
- abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes3, column3]),
- abs(geneSulfideSignificance[moduleGenes3, 1]),
- xlab = paste("Module Membership in gold module"),
- ylab = "Sulfide",
- pch = 16,
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module3,
- abline=TRUE)
-
-#sizeGrWindow(3,5);
-par(mfrow = c(1,2))
-par(mar=c(4,3,4,3))
-
-#### for oxygen in black and purple modules
-
-
-verboseScatterplot(abs(geneModuleMembership[moduleGenes14, column14]),
- abs(geneOxygenSignificance[moduleGenes14, 1]),
- xlab = paste("Module Membership in purple"),
- ylab = "Oxygen",
- pch = 16,
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module14,
- abline = TRUE, xaxt = "n") # Suppress x-axis
-
-
-
-verboseScatterplot(abs(geneModuleMembership[moduleGenes5, column5]),
- abs(geneOxygenSignificance[moduleGenes5, 1]),
- xlab = paste("Module Membership in black"),
- ylab = "oxygen",
- pch = 16,
- cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module5,
- abline=TRUE)
-
-####genes of significance in teal module (darkcyan) under Sulfide
-#geneSulfideSignificance
-
-FilterGenes= abs(geneSulfideSignificance)> .2 & abs(geneModuleMembership$MMdarkcyan)>.8 & SulPvalue <= 0.05
-table(FilterGenes)
-## FilterGenes
-## FALSE TRUE
-## 2450 50
-head(FilterGenes)
-## Sul.Sulfide
-## gene-L0Y14_RS04070 FALSE
-## gene-L0Y14_RS09775 FALSE
-## gene-L0Y14_RS01990 FALSE
-## gene-L0Y14_RS09950 FALSE
-## gene-L0Y14_RS09955 FALSE
-## gene-L0Y14_RS12365 FALSE
-Sulfideteal_filtered <- dimnames(data.frame(WGCNA_matrix))[[2]][FilterGenes]
-Sulfideteal_filtered <- gsub("e.L", "e-L", Sulfideteal_filtered)
-
-gene_names <- read.csv("gene_names_for_wgcna_matrix_files.csv",header=TRUE)
-gene_names <- gene_names[,2:3]
-names(gene_names)[1] <- "locusID"
-
-Sulfideteal_filtered <- as.data.frame(Sulfideteal_filtered)
-names(Sulfideteal_filtered)[1] <- "locusID"
-str(Sulfideteal_filtered)
-## 'data.frame': 50 obs. of 1 variable:
-## $ locusID: chr "gene-L0Y14_RS07425" "gene-L0Y14_RS07440" "gene-L0Y14_RS07430" "gene-L0Y14_RS07435" ...
-Sulfideteal_filtered <- left_join(Sulfideteal_filtered,gene_names)
-## Joining with `by = join_by(locusID)`
-dim(Sulfideteal_filtered)
-## [1] 50 2
-names(Sulfideteal_filtered)
-## [1] "locusID" "gene"
-list <- 1:50
-condition <- rep("Sulfide",length(list))
-moduleFil <- rep("teal",length(list))
-a <- cbind(Sulfideteal_filtered,condition,moduleFil)
-
-sul.teal <- a
-
-
-####genes of significance in gold module under Sulfide
-#geneSulfideSignificance
-FilterGenes= abs(geneSulfideSignificance)> .2 & abs(geneModuleMembership$MMdarkgoldenrod1)>.8 & SulPvalue <= 0.05
-table(FilterGenes)
-## FilterGenes
-## FALSE TRUE
-## 2478 22
-Sulfidegold_filtered <- dimnames(data.frame(WGCNA_matrix))[[2]][FilterGenes]
-Sulfidegold_filtered <- gsub("e.L", "e-L", Sulfidegold_filtered)
-
-gene_names <- read.csv("gene_names_for_wgcna_matrix_files.csv",header=TRUE)
-gene_names <- gene_names[,2:3]
-names(gene_names)[1] <- "locusID"
-
-Sulfidegold_filtered <- as.data.frame(Sulfidegold_filtered)
-names(Sulfidegold_filtered)[1] <- "locusID"
-str(Sulfidegold_filtered)
-## 'data.frame': 22 obs. of 1 variable:
-## $ locusID: chr "gene-L0Y14_RS01985" "gene-L0Y14_RS07695" "gene-L0Y14_RS00080" "gene-L0Y14_RS12030" ...
-Sulfidegold_filtered <- left_join(Sulfidegold_filtered,gene_names)
-## Joining with `by = join_by(locusID)`
-dim(Sulfidegold_filtered)
-## [1] 22 2
-names(Sulfidegold_filtered)
-## [1] "locusID" "gene"
-list <- 1:22
-condition <- rep("Sulfide",length(list))
-moduleFil <- rep("darkgoldenrod1",length(list))
-d <- cbind(Sulfidegold_filtered,condition,moduleFil)
-sulfide_hub_gold <- d
-####genes of significance in black module under Oxygen
-#geneOxygenSignificance
-FilterGenes= abs(geneOxygenSignificance)> .2 & abs(geneModuleMembership$MMblack)>.8 & OxyPvalue <= 0.05
-table(FilterGenes)
-## FilterGenes
-## FALSE TRUE
-## 2467 33
-head(FilterGenes)
-## Oxy.Oxygen
-## gene-L0Y14_RS04070 FALSE
-## gene-L0Y14_RS09775 FALSE
-## gene-L0Y14_RS01990 FALSE
-## gene-L0Y14_RS09950 FALSE
-## gene-L0Y14_RS09955 FALSE
-## gene-L0Y14_RS12365 FALSE
-Oxygenblack_filtered <- dimnames(data.frame(WGCNA_matrix))[[2]][FilterGenes]
-Oxygenblack_filtered <- gsub("e.L", "e-L", Oxygenblack_filtered)
-
-gene_names <- read.csv("gene_names_for_wgcna_matrix_files.csv",header=TRUE)
-gene_names <- gene_names[,2:3]
-names(gene_names)[1] <- "locusID"
-
-Oxygenblack_filtered <- as.data.frame(Oxygenblack_filtered)
-names(Oxygenblack_filtered)[1] <- "locusID"
-str(Oxygenblack_filtered)
-## 'data.frame': 33 obs. of 1 variable:
-## $ locusID: chr "gene-L0Y14_RS14085" "gene-L0Y14_RS12670" "gene-L0Y14_RS08400" "gene-L0Y14_RS05880" ...
-Oxygenblack_filtered <- left_join(Oxygenblack_filtered,gene_names)
-## Joining with `by = join_by(locusID)`
-dim(Oxygenblack_filtered)
-## [1] 33 2
-names(Oxygenblack_filtered)
-## [1] "locusID" "gene"
-list <- 1:33
-condition <- rep("Oxygen",length(list))
-moduleFil <- rep("black",length(list))
-oxy_b <- cbind(Oxygenblack_filtered,condition,moduleFil)
-
-
-#####
-
-
-
-names(geneModuleMembership)
-## [1] "MMmidnightblue" "MMdarkorchid4" "MMyellow" "MMgreenyellow"
-## [5] "MMblack" "MMdarkgreen" "MMdeeppink3" "MMpink"
-## [9] "MMgrey60" "MMbrown" "MMplum4" "MMskyblue"
-## [13] "MMdarkcyan" "MMdarkgoldenrod1" "MMgrey"
-names(geneModuleMembership)
-## [1] "MMmidnightblue" "MMdarkorchid4" "MMyellow" "MMgreenyellow"
-## [5] "MMblack" "MMdarkgreen" "MMdeeppink3" "MMpink"
-## [9] "MMgrey60" "MMbrown" "MMplum4" "MMskyblue"
-## [13] "MMdarkcyan" "MMdarkgoldenrod1" "MMgrey"
-####genes of significance in purple (darkorchid) module under Oxygen
-#geneOxygenSignificance
-names(geneModuleMembership)
-## [1] "MMmidnightblue" "MMdarkorchid4" "MMyellow" "MMgreenyellow"
-## [5] "MMblack" "MMdarkgreen" "MMdeeppink3" "MMpink"
-## [9] "MMgrey60" "MMbrown" "MMplum4" "MMskyblue"
-## [13] "MMdarkcyan" "MMdarkgoldenrod1" "MMgrey"
-FilterGenes= abs(geneOxygenSignificance)> .2 & abs(geneModuleMembership$MMdarkorchid4)>.8 & OxyPvalue <= 0.05
-table(FilterGenes)
-## FilterGenes
-## FALSE TRUE
-## 2476 24
-Oxygenpurple_filtered <- dimnames(data.frame(WGCNA_matrix))[[2]][FilterGenes]
-Oxygenpurple_filtered <- gsub("e.L", "e-L", Oxygenpurple_filtered)
-
-gene_names <- read.csv("gene_names_for_wgcna_matrix_files.csv",header=TRUE)
-gene_names <- gene_names[,2:3]
-names(gene_names)[1] <- "locusID"
-
-Oxygenpurple_filtered <- as.data.frame(Oxygenpurple_filtered)
-names(Oxygenpurple_filtered)[1] <- "locusID"
-str(Oxygenpurple_filtered)
-## 'data.frame': 24 obs. of 1 variable:
-## $ locusID: chr "gene-L0Y14_RS09775" "gene-L0Y14_RS16350" "gene-L0Y14_RS12700" "gene-L0Y14_RS07425" ...
-Oxygenpurple_filtered <- left_join(Oxygenpurple_filtered,gene_names)
-## Joining with `by = join_by(locusID)`
-dim(Oxygenpurple_filtered)
-## [1] 24 2
-names(Oxygenpurple_filtered)
-## [1] "locusID" "gene"
-list <- 1:24
-condition <- rep("Oxygen",length(list))
-moduleFil <- rep("darkorchid4",length(list))
-oxy_purple <- cbind(Oxygenpurple_filtered,condition,moduleFil)
-
-#####
-
-
-
-names(geneModuleMembership)
-## [1] "MMmidnightblue" "MMdarkorchid4" "MMyellow" "MMgreenyellow"
-## [5] "MMblack" "MMdarkgreen" "MMdeeppink3" "MMpink"
-## [9] "MMgrey60" "MMbrown" "MMplum4" "MMskyblue"
-## [13] "MMdarkcyan" "MMdarkgoldenrod1" "MMgrey"
-####genes of significance in purple module under Oxygen
-FilterGenes= abs(geneOxygenSignificance)> .2 & abs(geneModuleMembership$MMdarkorchid4)>.8
-table(FilterGenes)
-## FilterGenes
-## FALSE TRUE
-## 2476 24
-Oxygenpurple_filtered <- dimnames(data.frame(WGCNA_matrix))[[2]][FilterGenes]
-Oxygenpurple_filtered <- gsub("e.L", "e-L", Oxygenpurple_filtered)
-
-gene_names <- read.csv("gene_names_for_wgcna_matrix_files.csv",header=TRUE)
-gene_names <- gene_names[,2:3]
-names(gene_names)[1] <- "locusID"
-
-Oxygenpurple_filtered <- as.data.frame(Oxygenpurple_filtered)
-names(Oxygenpurple_filtered)[1] <- "locusID"
-str(Oxygenpurple_filtered)
-## 'data.frame': 24 obs. of 1 variable:
-## $ locusID: chr "gene-L0Y14_RS09775" "gene-L0Y14_RS16350" "gene-L0Y14_RS12700" "gene-L0Y14_RS07425" ...
-Oxygenpurple_filtered <- left_join(Oxygenpurple_filtered,gene_names)
-## Joining with `by = join_by(locusID)`
-dim(Oxygenpurple_filtered)
-## [1] 24 2
-names(Oxygenpurple_filtered)
-## [1] "locusID" "gene"
-list <- 1:24
-condition <- rep("Oxygen",length(list))
-moduleFil <- rep("purple",length(list))
-oxy_purple <- cbind(Oxygenpurple_filtered,condition,moduleFil)
-lnames = load(file = "wgcna_manual_network20220910.RData")
-lnames =load(file="count_matrix_condition_info_dataInput.RData")
-
-TOM = TOMsimilarityFromExpr(WGCNA_matrix, power = 8,
- networkType = "signed hybrid",
- TOMType = "signed")
-## TOM calculation: adjacency..
-## ..will not use multithreading.
-## Fraction of slow calculations: 0.000000
-## ..connectivity..
-## ..matrix multiplication (system BLAS)..
-## ..normalization..
-## ..done.
-moduleColors = mergedColors
-
-###below we will just use all genes, but module=c("list of color modules you could filter by","eg.black","red")
-#module= moduleColors
-
-#module2=c("brown","cyan","midnightblue","blue","magenta","turquoise","black","salmon","pink","green","turqoise")
-
-
-inModule = is.finite(match(moduleColors, moduleColors))
-
-#inModule2 = is.finite(match(moduleColors, module2))
-#table(inModule2)
-
-genes = colnames(WGCNA_matrix)
-modGenes = genes[inModule]
-#modGenes2=genes[inModule2]
-
-modTOM = TOM[inModule, inModule]
-#modTOM2 = TOM[inModule2,inModule2]
-dimnames(modTOM) = list(modGenes, modGenes)
-#dimnames(modTOM2) = list(modGenes2, modGenes2)
-
-
-genes_of_note <- read.csv("enzymes_of_note.csv")
-names(genes_of_note)
-## [1] "broad_function" "function." "gene"
-## [4] "locus_ID_andre_new" "X"
-genes_of_note <- genes_of_note[,1:4]
-names(genes_of_note)[4] <- "modGenes"
-cool <- genes_of_note[,c(4,3,2,1)]
-
-write.csv(cool,"cool_genes.csv")
-
-
-modGenes3 <- as.data.frame(modGenes)
-names(modGenes3)[1] <- "modGenes"
-
-
-
-#modGenes4 <- as.data.frame(modGenes2)
-#names(modGenes4)[1] <- "modGenes"
-#str(modGenes4)
-
-
-
-gene_annos <- right_join(modGenes3,cool)
-## Joining with `by = join_by(modGenes)`
-#gene_annos2 <- right_join(modGenes4,cool)
-
-
-names(gene_annos)
-## [1] "modGenes" "gene" "function." "broad_function"
-cool_gene_list <- gene_annos[,1]
-cool_gene_list <- as.data.frame(cool_gene_list)
-names(cool_gene_list)[1] <- "modGenes"
-
-
-#names(gene_annos2)
-#cool_gene_list2 <- gene_annos2[,1]
-#cool_gene_list2 <- as.data.frame(cool_gene_list2)
-#names(cool_gene_list2)[1] <- "modGenes"
-
-modcool <- inner_join(modGenes3,cool_gene_list)
-## Joining with `by = join_by(modGenes)`
-#modcool2 <- inner_join(modGenes4,cool_gene_list2)
-
-
-uncool <- modGenes3[ ! modGenes3$modGenes %in% modcool$modGenes,]
-#uncool2 <- modGenes4[ ! modGenes4$modGenes %in% modcool2$modGenes,]
-uncool <- as.data.frame(uncool)
-#uncool2 <- as.data.frame(uncool2)
-names(uncool)[1] <- "modGenes"
-#names(uncool2)[1] <- "modGenes"
-
-from_gff <- read.delim("GCF_023733635.1_ASM2373363v1_feature_table.txt")
-from_gff$locus_tag = paste0('gene-', from_gff$locus_tag)
-names(from_gff)[17] <- "modGenes"
-names(from_gff)
-## [1] "X..feature" "class"
-## [3] "assembly" "assembly_unit"
-## [5] "seq_type" "chromosome"
-## [7] "genomic_accession" "start"
-## [9] "end" "strand"
-## [11] "product_accession" "non.redundant_refseq"
-## [13] "related_accession" "name"
-## [15] "symbol" "GeneID"
-## [17] "modGenes" "feature_interval_length"
-## [19] "product_length" "attributes"
-names(from_gff)[1] <- "feature"
-gff2 <- subset(from_gff,feature=="CDS")
-names(gff2)
-## [1] "feature" "class"
-## [3] "assembly" "assembly_unit"
-## [5] "seq_type" "chromosome"
-## [7] "genomic_accession" "start"
-## [9] "end" "strand"
-## [11] "product_accession" "non.redundant_refseq"
-## [13] "related_accession" "name"
-## [15] "symbol" "GeneID"
-## [17] "modGenes" "feature_interval_length"
-## [19] "product_length" "attributes"
-gff3 <- gff2[,c(17,14)]
-gff4 <- gff2[,c(17,14,11)]
-
-
-
-
-
-
-coolg <- left_join(modcool,cool)
-## Joining with `by = join_by(modGenes)`
-coolg <- left_join(coolg,gff4)
-## Joining with `by = join_by(modGenes)`
-coolg <- coolg[,c(1,2,6)]
-
-#coolg2 <- left_join(modcool2,cool)
-#coolg2 <- left_join(coolg2,gff4)
-#coolg2 <- coolg2[,c(1,2,6)]
-
-str(gff4)
-## 'data.frame': 3213 obs. of 3 variables:
-## $ modGenes : chr "gene-L0Y14_RS00005" "gene-L0Y14_RS00010" "gene-L0Y14_RS00015" "gene-L0Y14_RS00020" ...
-## $ name : chr "chromosomal replication initiator protein DnaA" "DNA polymerase III subunit beta" "IS3 family transposase" "hypothetical protein" ...
-## $ product_accession: chr "WP_005958812.1" "WP_006474992.1" "" "WP_138921860.1" ...
-#str(uncool2)
-#names(uncool)[1] <- "modGenes"
-uncoolg <- left_join(uncool,gff3)
-## Joining with `by = join_by(modGenes)`
-#uncoolg2 <- left_join(uncool2,gff4)
-names(uncoolg)[2] <- "gene"
-coolg <- coolg[,1:2]
-all_genes6 <- rbind(coolg,uncoolg)
-
-
-#names(uncool2)[1] <- "modGenes"
-#uncoolg2 <- left_join(uncool2,gff3)
-#uncoolg3 <- left_join(uncool2,gff4)
-#names(uncoolg3)[2] <- "gene"
-#names(coolg2)
-all_genes <- rbind(coolg,uncoolg)
-
-write.csv(all_genes6,"gene_names_for_wgcna_matrix_files_sig_module.csv")
-
-
-vec1 <- all_genes6[,1]
-
-vec2 <- modGenes
-head(vec1)
-## [1] "gene-L0Y14_RS04070" "gene-L0Y14_RS01990" "gene-L0Y14_RS12365"
-## [4] "gene-L0Y14_RS06850" "gene-L0Y14_RS02365" "gene-L0Y14_RS07425"
-reorder_idx <- match(vec2,vec1)
-
-all_genes <- vec1[reorder_idx]
-all_genes3 <- as.data.frame(all_genes)
-names(all_genes3)[1] <- "modGenes"
-
-genes2 <- left_join(all_genes3,all_genes6)
-## Joining with `by = join_by(modGenes)`
-genes777 <- genes2[,2]
-genes888 <- genes2[,2]
-
-
-
-dimnames(modTOM) = list(all_genes, all_genes)
-#str(modGenes2)
-#moduleColors[inModule]
-#head(modTOM2)
-#str(modTOM2)
-head(all_genes)
-## [1] "gene-L0Y14_RS04070" "gene-L0Y14_RS09775" "gene-L0Y14_RS01990"
-## [4] "gene-L0Y14_RS09950" "gene-L0Y14_RS09955" "gene-L0Y14_RS12365"
-#str(all_genes2)
-
-set.seed(123)
-###all_genes2 needs to be a list not dataframe!
-
-
-
-
-cyt_nofilter=cyt = exportNetworkToCytoscape(modTOM,
- edgeFile = "CytoscapeEdgeFile20230201.txt",
- nodeFile = "CytoscapeNodeFile20230201.txt",
- weighted = TRUE,
- threshold = 0.0,
- nodeNames = all_genes,
- altNodeNames = genes777,
- nodeAttr = moduleColors[inModule])
-
-
-
-
-
-
-cyt_all = exportNetworkToCytoscape(modTOM,
- edgeFile = "CytoscapeEdgeFile20231201_0.05.txt",
- nodeFile = "CytoscapeNodeFile20231201_0.05.txt",
- weighted = TRUE,
- threshold = 0.05,
- nodeNames = all_genes,
- altNodeNames = genes888,
- nodeAttr = moduleColors[inModule])
-
-
-
-file_path <- "CytoscapeNodeFile20231201_0.05.txt"
-
-lines <- readLines(file_path)
-cleaned_data <- list()
-column_names <- NULL
-# Loop through the lines and process data as needed
-for (line in lines) {
- # Split the line using tabs as delimiters
- elements <- unlist(strsplit(line, "\t"))
-
- if (is.null(column_names)) {
- # If column names are not set, use the first line as column names
- column_names <- elements
- } else {
- # Process elements as needed (e.g., remove extra data, format, etc.)
- # Append cleaned data to the list
- cleaned_data <- append(cleaned_data, list(elements))
- }
-}
-
-final_data <- as.data.frame(do.call(rbind, cleaned_data), stringsAsFactors = FALSE)
-colnames(final_data) <- column_names
-
-table(final_data[,3])
-##
-## black brown darkcyan darkgoldenrod1 darkgreen
-## 98 196 214 79 130
-## darkorchid4 deeppink3 greenyellow grey grey60
-## 71 199 136 32 155
-## midnightblue pink plum4 skyblue yellow
-## 59 337 92 106 41
-# Replace all instances of color names to match those used in paper in the entire data frame
-final_data <- lapply(final_data, function(x) ifelse(x == "darkorchid4", "purple", x))
-final_data <- as.data.frame(final_data)
-
-final_data <- lapply(final_data, function(x) ifelse(x == "plum4", "light_purple", x))
-final_data <- as.data.frame(final_data)
-
-final_data <- lapply(final_data, function(x) ifelse(x == "darkcyan", "teal", x))
-final_data <- as.data.frame(final_data)
-
-final_data <- lapply(final_data, function(x) ifelse(x == "deeppink3", "cherry", x))
-final_data <- as.data.frame(final_data)
-
-final_data <- lapply(final_data, function(x) ifelse(x == "yellow", "light_yellow", x))
-final_data <- as.data.frame(final_data)
-
-
-final_data <- lapply(final_data, function(x) ifelse(x == "darkgoldenrod1", "gold", x))
-final_data <- as.data.frame(final_data)
-
-final_data <- lapply(final_data, function(x) ifelse(x == "darkgreen","green", x))
-final_data <- as.data.frame(final_data)
-
-
-
-#write.table(final_data, file = "output_file.txt", sep = "\t", quote = FALSE, row.names = FALSE)
-###mean_significance_of_module_for_conditionSulfide############################################################
-
-
-
-par(mfrow = c(2,1))
-
-colorh1 <- mergedColors
-
-GS1=as.numeric(cor(biconditions$Sulfide,WGCNA_matrix, use="p"))
-head(GS1)
-## [1] 0.11559790 0.27041835 0.34126255 -0.19662226 -0.06219137 -0.23047838
-GeneSignificance=abs(GS1)
-head(GeneSignificance)
-## [1] 0.11559790 0.27041835 0.34126255 0.19662226 0.06219137 0.23047838
-str(colorh1)
-## chr [1:2500] "plum4" "darkorchid4" "midnightblue" "grey60" "grey60" ...
-ModuleSignificance=tapply(GeneSignificance, colorh1, mean, na.rm=T)
-str(ModuleSignificance)
-## num [1:15(1d)] 0.138 0.193 0.309 0.368 0.133 ...
-## - attr(*, "dimnames")=List of 1
-## ..$ : chr [1:15] "black" "brown" "darkcyan" "darkgoldenrod1" ...
-geneSulfideSignificance=as.data.frame(cor(WGCNA_matrix, Sulfide, use = "p"))
-SulPvalue = as.data.frame(corPvalueStudent(as.matrix(geneSulfideSignificance), nSamples))
-names(geneSulfideSignificance) = paste("Sul.", names(Sulfide), sep="");
-names(SulPvalue) = paste("p.Sul.", names(Sulfide), sep="");
-
-x_limits <- range(
- abs(geneModuleMembership[moduleGenes7, column7]),
- abs(geneModuleMembership[moduleGenes3, column3])
-)
-
-head(geneModuleMembership)
-## MMmidnightblue MMdarkorchid4 MMyellow MMgreenyellow
-## gene-L0Y14_RS04070 -0.2978002 -0.4742295 -0.06228793 -0.12283434
-## gene-L0Y14_RS09775 0.6475847 0.8479211 0.16301606 0.38986765
-## gene-L0Y14_RS01990 0.7683016 0.2909617 -0.21720940 0.03127418
-## gene-L0Y14_RS09950 0.6250107 0.4452151 0.21104415 0.01837192
-## gene-L0Y14_RS09955 0.5993774 0.4088890 -0.08634607 -0.20976909
-## gene-L0Y14_RS12365 0.3632049 0.1311813 -0.06645301 -0.38765511
-## MMblack MMdarkgreen MMdeeppink3 MMpink MMgrey60
-## gene-L0Y14_RS04070 -0.16971124 -0.05481641 -0.01719957 -0.01598375 -0.6563363
-## gene-L0Y14_RS09775 0.53953441 0.44692677 0.41381637 0.59720934 0.1830737
-## gene-L0Y14_RS01990 0.06471391 -0.09289025 0.08717597 0.11976742 0.2550905
-## gene-L0Y14_RS09950 0.43427234 0.22863891 0.17923724 0.10431595 0.4740972
-## gene-L0Y14_RS09955 0.23088121 -0.01544034 0.08834925 -0.04765631 0.5107174
-## gene-L0Y14_RS12365 -0.00202547 -0.12064102 -0.25455458 -0.31551755 0.7070535
-## MMbrown MMplum4 MMskyblue MMdarkcyan
-## gene-L0Y14_RS04070 -0.1384809 0.6483345 0.2465064 0.25589117
-## gene-L0Y14_RS09775 -0.2948900 -0.5986951 -0.2859013 -0.84047590
-## gene-L0Y14_RS01990 0.2365179 -0.1068451 -0.5795192 -0.47056380
-## gene-L0Y14_RS09950 0.2737269 -0.2366527 -0.6256319 -0.43994105
-## gene-L0Y14_RS09955 0.4747395 -0.3438988 -0.6771603 -0.39111707
-## gene-L0Y14_RS12365 0.4675125 -0.2198442 -0.5555037 -0.09885019
-## MMdarkgoldenrod1 MMgrey
-## gene-L0Y14_RS04070 -0.1725039 -0.34377603
-## gene-L0Y14_RS09775 -0.4423334 0.33425164
-## gene-L0Y14_RS01990 -0.5786492 -0.10188510
-## gene-L0Y14_RS09950 -0.3808909 0.07218213
-## gene-L0Y14_RS09955 -0.3278772 0.27804497
-## gene-L0Y14_RS12365 -0.2081241 0.06962918
-# First Plot (plotModuleSignificance)
-plotModuleSignificance(GeneSignificance, colorh1,
- legend.text = TRUE,
- main = "Average gene significance in modules with sulfide variable",
- cex.axis = 1)
-
-
-help(plotModuleSignificance)
-
-table(colorh1)
-## colorh1
-## black brown darkcyan darkgoldenrod1 darkgreen
-## 124 233 239 99 143
-## darkorchid4 deeppink3 greenyellow grey grey60
-## 100 214 160 242 203
-## midnightblue pink plum4 skyblue yellow
-## 83 385 109 120 46
-par(mar = c(2, 3, 2, 3))
-par(mfrow = c(2,1))
-
-###mean_significance_of_module_for_condition_Oxygen#########################################################
-sizeGrWindow(8,7)
-par(mar = c(2, 3, 2, 3))
-par(mfrow = c(2,1))
-
-
-
-GS1=as.numeric(cor(biconditions$Oxygen,WGCNA_matrix, use="p"))
-GeneSignificance=abs(GS1)
-ModuleSignificance=tapply(GeneSignificance, colorh1, mean, na.rm=T)
-
-#sizeGrWindow(8,7)
-#par(mfrow = c(1,3))
-plotModuleSignificance(GeneSignificance,colorh1,
- legend.text=TRUE,
- main = "Gene significance in oxygen treatments across modules",
- cex.axis=1)
-help(plotModuleSignificance)
-
-# Plot for the purple module with suppressed x-axis
-
-#y_limits <- range(
- # abs(geneOxygenSignificance[moduleGenes14, 1]),
- #abs(geneOxygenSignificance[moduleGenes5, 1])
-#)
-
-x_limits <- range(abs(geneModuleMembership[moduleGenes14, column14]),
- abs(geneModuleMembership[moduleGenes5, column5]))
-
-nitro <- read.csv("Merged Network_2 default node.csv")
-#names(nitro)
-
-nirS <- subset(nitro,nirS=="1")
-
-#nitro$shared.name
-
-#(nitro$neighbors666)
-
-rtca6 <-nitro[,c(133,69)]
-
-
-nitro1 <- nitro[c(1:55,57:894),c(133,69,106,3,28,38,39,65,66,108,109,120,121,125)]
-#nitro1$shared.name
-#names(nitro1)
-
-names(nitro1)[3] <- "cbb"
-nitro1$cbb <-factor(ifelse(nitro1$cbb=="cbb",1,0))
-table(nitro1$cbb)
-##
-## 0 1
-## 373 520
-names(nitro1)[2] <- "rtca"
-nitro1$rtca <-factor(ifelse(nitro1$rtca=="rtca",1,0))
-table(nitro1$rtca)
-##
-## 0 1
-## 745 148
-indx <- sapply(nitro1, is.factor)
-nitro1[indx] <- lapply(nitro1[indx], function(x) as.numeric(as.character(x)))
-
-str(nitro1)
-## 'data.frame': 893 obs. of 14 variables:
-## $ shared.name : chr "gene-L0Y14_RS16060" "gene-L0Y14_RS01710" "gene-L0Y14_RS01760" "gene-L0Y14_RS01755" ...
-## $ rtca : num 0 0 0 0 0 0 0 0 0 0 ...
-## $ cbb : num 1 1 1 1 1 1 1 1 1 0 ...
-## $ amtB : int 1 1 1 1 1 1 1 1 1 1 ...
-## $ gogat_glnA : int 1 1 1 1 1 1 1 1 1 1 ...
-## $ hoa1 : int 1 NA NA 1 NA 1 NA NA 1 NA ...
-## $ hoa2 : int NA NA NA NA NA 1 1 NA NA NA ...
-## $ nap : int NA NA NA NA NA NA NA NA NA NA ...
-## $ narGHI : int NA NA NA NA NA NA NA NA 1 NA ...
-## $ nirBD_nasA_nark1: int 1 NA NA 1 1 NA NA NA 1 NA ...
-## $ nirS : int NA NA NA NA NA NA 1 NA NA NA ...
-## $ norCB : int NA NA NA NA NA NA NA NA NA NA ...
-## $ nosZ_cluster : int NA NA NA NA NA NA NA NA NA NA ...
-## $ otr : int NA NA NA NA NA 1 1 NA 1 NA ...
-nitro1[is.na(nitro1)] = 0
-indx <- sapply(nitro1, is.factor)
-nitro1[indx] <- lapply(nitro1[indx], function(x) as.numeric(as.character(x)))
-
-upset(nitro1,sets = c("rtca","cbb" ,"amtB","gogat_glnA","hoa1","hoa2","otr","nirBD_nasA_nark1","nap","narGHI","nirS","norCB","nosZ_cluster"), mb.ratio = c(0.55, 0.45), order.by = "freq",empty.intersections = "off", keep.order = TRUE, order)
-
-letslook <- read.csv("aa_cbb_rtca_merged_with_fixed_rtca_cbb default node.csv")
-#names(letslook)
-letslook <- letslook[1:1052,c(131,60:109)]
-rtca1 <- subset(letslook, neighbors1000 =="rtca")
-#rtca1$shared.name
-#names(letslook)
-table(letslook$neighbors1000)
-##
-## rtca
-## 904 148
-names(letslook)[5] <- "rtca"
-letslook$rtca <-factor(ifelse(letslook$rtca=="rtca",1,0))
-table(letslook$rtca)
-##
-## 0 1
-## 904 148
-#names(letslook)
-table(letslook$neighbhors184)
-##
-## lysine_synthesis
-## 836 216
-which(colnames(letslook)=="neighbhors184")
-## [1] 3
-names(letslook)[which(colnames(letslook)=="neighbhors184")] <- "lysine_synthesis"
-letslook$lysine_synthesis <-factor(ifelse(letslook$lysine_synthesis=="lysine_synthesis",1,0))
-table(letslook$lysine_synthesis)
-##
-## 0 1
-## 836 216
-#names(letslook)
-table(letslook$neighbors666)
-##
-## cbb
-## 532 520
-which(colnames(letslook)=="neighbors666")
-## [1] 51
-names(letslook)[which(colnames(letslook)=="neighbors666")] <- "cbb"
-letslook$cbb <-factor(ifelse(letslook$cbb=="cbb",1,0))
-table(letslook$cbb)
-##
-## 0 1
-## 532 520
-#names(letslook)
-table(letslook$neighbors185)
-##
-## arginine_synthesis
-## 1010 42
-names(letslook)[which(colnames(letslook)=="neighbors185")] <- "arginine_synthesis"
-letslook$arginine_synthesis <-factor(ifelse(letslook$arginine_synthesis=="arginine_synthesis",1,0))
-table(letslook$arginine_synthesis)
-##
-## 0 1
-## 1010 42
-#names(letslook)
-table(letslook$neighbors183)
-##
-## ornithine_synthesis
-## 648 404
-names(letslook)[which(colnames(letslook)=="neighbors183")] <- "ornithine_synthesis"
-letslook$ornithine_synthesis <-factor(ifelse(letslook$ornithine_synthesis=="ornithine_synthesis",1,0))
-table(letslook$ornithine_synthesis)
-##
-## 0 1
-## 648 404
-#names(letslook)
-table(letslook$neighbors182)
-##
-## citruline_synthesis
-## 941 111
-names(letslook)[which(colnames(letslook)=="neighbors182")] <- "citruline_synthesis"
-letslook$citruline_synthesis <-factor(ifelse(letslook$citruline_synthesis=="citruline_synthesis",1,0))
-table(letslook$citruline_synthesis)
-##
-## 0 1
-## 941 111
-#names(letslook)
-table(letslook$neighbors181)
-##
-## asparagine_synth
-## 756 296
-names(letslook)[which(colnames(letslook)=="neighbors181")] <- "asparagine_synth"
-letslook$asparagine_synth <-factor(ifelse(letslook$asparagine_synth=="asparagine_synth",1,0))
-table(letslook$asparagine_synth)
-##
-## 0 1
-## 756 296
-#names(letslook)
-table(letslook$neighbors186)
-##
-## shikimate
-## 674 378
-names(letslook)[which(colnames(letslook)=="neighbors186")] <- "shikimate"
-letslook$shikimate <-factor(ifelse(letslook$shikimate=="shikimate",1,0))
-table(letslook$shikimate)
-##
-## 0 1
-## 674 378
-#names(letslook)
-table(letslook$neighbors187)
-##
-## tryptophan_synthesis
-## 961 91
-names(letslook)[which(colnames(letslook)=="neighbors187")] <- "tryptophan_synthesis"
-letslook$tryptophan_synthesis <-factor(ifelse(letslook$tryptophan_synthesis=="tryptophan_synthesis",1,0))
-table(letslook$tryptophan_synthesis)
-##
-## 0 1
-## 961 91
-#names(letslook)
-table(letslook$neighbors189)
-##
-## phenylalanine_tyrosine_synth
-## 1029 23
-names(letslook)[which(colnames(letslook)=="neighbors189")] <- "phenylalanine_tyrosine_synth"
-letslook$phenylalanine_tyrosine_synth <-factor(ifelse(letslook$phenylalanine_tyrosine_synth=="phenylalanine_tyrosine_synth",1,0))
-table(letslook$phenylalanine_tyrosine_synth)
-##
-## 0 1
-## 1029 23
-#names(letslook)
-table(letslook$neighbors200)
-##
-## histidine_synthesis
-## 836 216
-names(letslook)[which(colnames(letslook)=="neighbors200")] <- "histidine_synthesis"
-letslook$histidine_synthesis <-factor(ifelse(letslook$histidine_synthesis=="histidine_synthesis",1,0))
-table(letslook$histidine_synthesis)
-##
-## 0 1
-## 836 216
-#names(letslook)
-table(letslook$neighbors202)
-##
-## valine_synthesis
-## 1003 49
-names(letslook)[which(colnames(letslook)=="neighbors202")] <- "valine_synthesis"
-letslook$valine_synthesis <-factor(ifelse(letslook$valine_synthesis=="valine_synthesis",1,0))
-table(letslook$valine_synthesis)
-##
-## 0 1
-## 1003 49
-#names(letslook)
-table(letslook$neighbors190)
-##
-## luecine_isoluecine_synthesis
-## 1002 50
-names(letslook)[which(colnames(letslook)=="neighbors190")] <- "luecine_isoluecine_synthesis"
-letslook$luecine_isoluecine_synthesis <-factor(ifelse(letslook$luecine_isoluecine_synthesis=="luecine_isoluecine_synthesis",1,0))
-table(letslook$luecine_isoluecine_synthesis)
-##
-## 0 1
-## 1002 50
-#names(letslook)
-
-indx <- sapply(letslook, is.factor)
-letslook[indx] <- lapply(letslook[indx], function(x) as.numeric(as.character(x)))
-
-
-upset(letslook,sets = c("rtca","cbb" ,"lysine_synthesis","asparagine_synth","citruline_synthesis","ornithine_synthesis","arginine_synthesis","shikimate","tryptophan_synthesis","phenylalanine_tyrosine_synth","luecine_isoluecine_synthesis","histidine_synthesis","valine_synthesis"), mb.ratio = c(0.55, 0.45), order.by = "freq",empty.intersections = "off", keep.order = TRUE)
-
-sulfy <- read.csv("cfix_sulfide_merged_binary default node.csv")
-#names(sulfy)
-
-sulfy1 <- sulfy[1:890,c(131:135,127,129,5,22,27,68,107)]
-
-which(colnames(sulfy1)=="sbp")
-## [1] 7
-#names(sulfy1)
-names(sulfy1)[12] <- "cbb"
-sulfy1$cbb <-factor(ifelse(sulfy1$cbb=="cbb",1,0))
-table(sulfy1$cbb)
-##
-## 0 1
-## 370 520
-names(sulfy1)[11] <- "rtca"
-sulfy1$rtca <-factor(ifelse(sulfy1$rtca=="rtca",1,0))
-table(sulfy1$rtca)
-##
-## 0 1
-## 741 149
-indx <- sapply(sulfy1, is.factor)
-sulfy1[indx] <- lapply(sulfy1[indx], function(x) as.numeric(as.character(x)))
-
-#str(sulfy1)
-sulfy1[is.na(sulfy1)] = 0
-indx <- sapply(sulfy1, is.factor)
-sulfy1[indx] <- lapply(sulfy1[indx], function(x) as.numeric(as.character(x)))
-
-#names(sulfy1)
-upset(sulfy1,sets = c("rtca","cbb" ,"sqr","fccA","sbp","sox","soe","sreA","dsr_tusA","apr_sat","qmoABhdrCB"), mb.ratio = c(0.55, 0.45), order.by = "freq",empty.intersections = "off", keep.order = TRUE)
-
-electron <- read.csv("rtca_cbb_hyd_etc_atpases_merged_binary default node.csv")
-#names(electron)
-
-electro1 <- electron[,c(94,68,70,12,25,38,39,63,84,86,106,107)]
-
-#names(electro1)
-names(electro1)[3] <- "cbb"
-electro1$cbb <-factor(ifelse(electro1$cbb=="cbb",1,0))
-table(electro1$cbb)
-##
-## 0 1
-## 485 520
-names(electro1)[2] <- "rtca"
-electro1$rtca <-factor(ifelse(electro1$rtca=="rtca",1,0))
-table(electro1$rtca)
-##
-## 0 1
-## 856 149
-indx <- sapply(electro1, is.factor)
-electro1[indx] <- lapply(electro1[indx], function(x) as.numeric(as.character(x)))
-
-#str(electro1)
-electro1[is.na(electro1)] = 0
-indx <- sapply(electro1, is.factor)
-electro1[indx] <- lapply(electro1[indx], function(x) as.numeric(as.character(x)))
-
-#names(electro1)
-upset(electro1,sets = c("rtca","cbb" ,"nuo","pet","cco","mbx","F1atpase","V1atpase","V2atpase","hyd1e","hyd3b"), mb.ratio = c(0.55, 0.45), order.by = "freq",empty.intersections = "off", keep.order = TRUE)
-
-de<-read.csv('entotal_202200815_use_this_corrected_locus_tags.csv',header=TRUE,stringsAsFactors=FALSE)
-
-sig<-de[de$adj.P.Val<=0.05,]
-sig0<-sig[sig$logFC<=-1,]
-sig1<-sig[sig$logFC>=1,]
-sig<-rbind(sig0,sig1)
-
-
-a<-subset(sig,comparison=='LS_vs_HS_noN_HO')
-b<-subset(sig,comparison=='LS_vs_HS_N_HO')
-c<-subset(sig,comparison=='H2_vs_HS_N_HO')
-d<-subset(sig,comparison=='H2_vs_HS_noN_HO')
-e<-subset(sig,comparison=='SW_vs_HS_noN_HO')
-f<-subset(sig,comparison=='H2_vs_LS_N_HO')
-g<-subset(sig,comparison=='H2_vs_LS_noN_HO')
-h<-subset(sig,comparison=='SW_vs_LS_noN_HO')
-
-#i<-subset(sig,comparison=='LO_vs_HO_H2_N')no de!!!
-j <- subset(sig,comparison=='LO_vs_HO_H2_noN')
-k <- subset(sig,comparison=='LO_vs_HO_HS_N')
-
-l <- subset(sig,comparison=='noN_vs_N_H2_HO')
-m <- subset(sig,comparison=='noN_vs_N_HS_HO')
-n <- subset(sig,comparison=='noN_vs_N_LS_HO')
-
-sulfide<-rbind(a,b,c,d,e,f,g,h)
-
-
-no_hydrogen_sulfide<-rbind(a,b,e,h)
-
-sulfide_nolowSvsH2 <- rbind(a,b,c,d,e)
-
-oxygen <- rbind(j,k)
-
-nitrogen <- rbind(l,m,n)
-
-my.colors.sulfide<-
-scale_color_manual(values=c(
-'LS_vs_HS_noN_HO'='#A982AA',
-'LS_vs_HS_N_HO'='#8E4162',
-'H2_vs_HS_N_HO'='#404E7C',
-'H2_vs_HS_noN_HO'='#6C91BF',
-'SW_vs_HS_noN_HO'='#EEA58C',
-'H2_vs_LS_N_HO'='#7EA172',
-'H2_vs_LS_noN_HO'='#C7CB85',
-'SW_vs_LS_noN_HO'='#000000'))
-
-
-my.colors.oxygen <-
-scale_color_manual(values=c(
- 'LO_vs_HO_HS_N'="#022b3a",
- 'LO_vs_HO_H2_noN'="#2a9d8f"))
-
-my.colors.nitrogen <-
- scale_color_manual(values=c(
- 'noN_vs_N_H2_HO'="#264653",
- 'noN_vs_N_HS_HO'="#2a9d8f",
- 'noN_vs_N_LS_HO'="#e9c46a"))
-
-
-my.colors.sulfide_nolowSvsH2 <-
- scale_color_manual(values=c(
- 'LS_vs_HS_noN_HO'='#A982AA',
-'LS_vs_HS_N_HO'='#8E4162',
-'H2_vs_HS_N_HO'='#404E7C',
-'H2_vs_HS_noN_HO'='#6C91BF',
-'SW_vs_HS_noN_HO'='#EEA58C'))
-
-
-
-my.theme <- theme(axis.title= element_text(face = "plain",size = 20),
- axis.text = element_text(face = "plain",size = 15, colour = "black"),
- plot.caption = element_text(hjust = 0),
- panel.background=element_rect(fill="white"),
- panel.border=element_rect(colour="black",fill=NA,linewidth = 1),
- axis.text.x = element_text(angle = 90),
- axis.line.x.top = element_blank(),
- )
-
-#####setup for individual comparisons
-
-sig$Threshold<-cut(sig$logFC,breaks=c(-Inf,1,Inf),labels=c("<=1",">1"))
-
-LS_vs_HS_noN_HO <-subset(sig,comparison=='LS_vs_HS_noN_HO')
-LS_vs_HS_N_HO <-subset(sig,comparison=='LS_vs_HS_N_HO')
-H2_vs_HS_N_HO <-subset(sig,comparison=='H2_vs_HS_N_HO')
-H2_vs_HS_noN_HO <-subset(sig,comparison=='H2_vs_HS_noN_HO')
-SW_vs_HS_noN_HO <-subset(sig,comparison=='SW_vs_HS_noN_HO')
-LO_vs_HO_H2_noN<- subset(sig,comparison=='LO_vs_HO_H2_noN')
-LO_vs_HO_HS_N <- subset(sig,comparison=='LO_vs_HO_HS_N')
-
-H2_vs_LS_N_HO<-subset(sig,comparison=='H2_vs_LS_N_HO')
-H2_vs_LS_noN_HO<-subset(sig,comparison=='H2_vs_LS_noN_HO')
-SW_vs_LS_noN_HO<-subset(sig,comparison=='SW_vs_LS_noN_HO')
-
-noN_vs_N_H2_HO <- subset(sig,comparison=='noN_vs_N_H2_HO')
-noN_vs_N_HS_HO <- subset(sig,comparison=='noN_vs_N_HS_HO')
-noN_vs_N_LS_HO <- subset(sig,comparison=='noN_vs_N_LS_HO')
-
-my.colors.ind.comparisons <-
- scale_color_manual(values=c(
- '>1'='#F2444D',
-'<=1'='#3BD4CC'))
-
-###### sulfide in gold
-
-
-names(sulfide_hub_gold)
-## [1] "locusID" "gene" "condition" "moduleFil"
-names(sulfide_hub_gold)[2] <- "Predicted_function"
-names(sulfide_nolowSvsH2)
-## [1] "X.1" "locus_ID_andre_new"
-## [3] "geneid" "locus_ID_andre_old"
-## [5] "start.x" "stop.x"
-## [7] "gene_letters_andre" "locus_tag_andre"
-## [9] "accession_andre" "start"
-## [11] "stop" "notes"
-## [13] "Ontology" "Inference"
-## [15] "locus_tag" "product"
-## [17] "protein_id" "gene_letters.notes"
-## [19] "NCBI_ID" "annotations_from_andre"
-## [21] "target_id" "Accession"
-## [23] "Predicted_function" "functional_role"
-## [25] "sub_role" "sub_role2"
-## [27] "Protein" "Accession...Protein"
-## [29] "seed_ortholog_evalue" "seed_ortholog_score"
-## [31] "best_tax_level" "Preferred_name"
-## [33] "GOs" "X"
-## [35] "KEGG_ko" "KEGG_Pathway"
-## [37] "KEGG_Module" "KEGG_Reaction"
-## [39] "KEGG_rclass" "BRITE"
-## [41] "KEGG_TC" "CAZy"
-## [43] "BiGG_Reaction" "tax_scope"
-## [45] "eggNOG_OGs" "bestOG"
-## [47] "COG_Functional_Category" "eggNOG_free_text_description"
-## [49] "logFC" "AveExpr"
-## [51] "t" "P.value"
-## [53] "adj.P.Val" "B"
-## [55] "comparison"
-names(sulfide_nolowSvsH2)[2] <- "locusID"
-
-names(sulfide_nolowSvsH2)
-## [1] "X.1" "locusID"
-## [3] "geneid" "locus_ID_andre_old"
-## [5] "start.x" "stop.x"
-## [7] "gene_letters_andre" "locus_tag_andre"
-## [9] "accession_andre" "start"
-## [11] "stop" "notes"
-## [13] "Ontology" "Inference"
-## [15] "locus_tag" "product"
-## [17] "protein_id" "gene_letters.notes"
-## [19] "NCBI_ID" "annotations_from_andre"
-## [21] "target_id" "Accession"
-## [23] "Predicted_function" "functional_role"
-## [25] "sub_role" "sub_role2"
-## [27] "Protein" "Accession...Protein"
-## [29] "seed_ortholog_evalue" "seed_ortholog_score"
-## [31] "best_tax_level" "Preferred_name"
-## [33] "GOs" "X"
-## [35] "KEGG_ko" "KEGG_Pathway"
-## [37] "KEGG_Module" "KEGG_Reaction"
-## [39] "KEGG_rclass" "BRITE"
-## [41] "KEGG_TC" "CAZy"
-## [43] "BiGG_Reaction" "tax_scope"
-## [45] "eggNOG_OGs" "bestOG"
-## [47] "COG_Functional_Category" "eggNOG_free_text_description"
-## [49] "logFC" "AveExpr"
-## [51] "t" "P.value"
-## [53] "adj.P.Val" "B"
-## [55] "comparison"
-sulfide_comp_DE <- sulfide_nolowSvsH2[,c(2,3,5,6,7,9:14,16:55)]
-
-shg_vector <- sulfide_hub_gold[,1]
-shg_vector
-## [1] "gene-L0Y14_RS01985" "gene-L0Y14_RS07695" "gene-L0Y14_RS00080"
-## [4] "gene-L0Y14_RS12030" "gene-L0Y14_RS12025" "gene-L0Y14_RS12035"
-## [7] "gene-L0Y14_RS11990" "gene-L0Y14_RS11995" "gene-L0Y14_RS12020"
-## [10] "gene-L0Y14_RS12015" "gene-L0Y14_RS12045" "gene-L0Y14_RS12000"
-## [13] "gene-L0Y14_RS11980" "gene-L0Y14_RS11975" "gene-L0Y14_RS11965"
-## [16] "gene-L0Y14_RS12040" "gene-L0Y14_RS11985" "gene-L0Y14_RS12010"
-## [19] "gene-L0Y14_RS11255" "gene-L0Y14_RS10315" "gene-L0Y14_RS10410"
-## [22] "gene-L0Y14_RS12005"
-shg_DE <- subset(sulfide_comp_DE, locusID %in% shg_vector)
-
-shg_DE_names_added <- shg_DE %>%
- left_join(sulfide_hub_gold, by = "locusID", suffix = c("", "_sulfide_hub_gold"))
-
-names(shg_DE_names_added)
-## [1] "locusID" "geneid"
-## [3] "start.x" "stop.x"
-## [5] "gene_letters_andre" "accession_andre"
-## [7] "start" "stop"
-## [9] "notes" "Ontology"
-## [11] "Inference" "product"
-## [13] "protein_id" "gene_letters.notes"
-## [15] "NCBI_ID" "annotations_from_andre"
-## [17] "target_id" "Accession"
-## [19] "Predicted_function" "functional_role"
-## [21] "sub_role" "sub_role2"
-## [23] "Protein" "Accession...Protein"
-## [25] "seed_ortholog_evalue" "seed_ortholog_score"
-## [27] "best_tax_level" "Preferred_name"
-## [29] "GOs" "X"
-## [31] "KEGG_ko" "KEGG_Pathway"
-## [33] "KEGG_Module" "KEGG_Reaction"
-## [35] "KEGG_rclass" "BRITE"
-## [37] "KEGG_TC" "CAZy"
-## [39] "BiGG_Reaction" "tax_scope"
-## [41] "eggNOG_OGs" "bestOG"
-## [43] "COG_Functional_Category" "eggNOG_free_text_description"
-## [45] "logFC" "AveExpr"
-## [47] "t" "P.value"
-## [49] "adj.P.Val" "B"
-## [51] "comparison" "Predicted_function_sulfide_hub_gold"
-## [53] "condition" "moduleFil"
-tesstes <- shg_DE_names_added[,c(19,28,52)]
-shg_DE2 <- shg_DE_names_added[,c(1:18,20:54)]
-names(shg_DE2)
-## [1] "locusID" "geneid"
-## [3] "start.x" "stop.x"
-## [5] "gene_letters_andre" "accession_andre"
-## [7] "start" "stop"
-## [9] "notes" "Ontology"
-## [11] "Inference" "product"
-## [13] "protein_id" "gene_letters.notes"
-## [15] "NCBI_ID" "annotations_from_andre"
-## [17] "target_id" "Accession"
-## [19] "functional_role" "sub_role"
-## [21] "sub_role2" "Protein"
-## [23] "Accession...Protein" "seed_ortholog_evalue"
-## [25] "seed_ortholog_score" "best_tax_level"
-## [27] "Preferred_name" "GOs"
-## [29] "X" "KEGG_ko"
-## [31] "KEGG_Pathway" "KEGG_Module"
-## [33] "KEGG_Reaction" "KEGG_rclass"
-## [35] "BRITE" "KEGG_TC"
-## [37] "CAZy" "BiGG_Reaction"
-## [39] "tax_scope" "eggNOG_OGs"
-## [41] "bestOG" "COG_Functional_Category"
-## [43] "eggNOG_free_text_description" "logFC"
-## [45] "AveExpr" "t"
-## [47] "P.value" "adj.P.Val"
-## [49] "B" "comparison"
-## [51] "Predicted_function_sulfide_hub_gold" "condition"
-## [53] "moduleFil"
-names(shg_DE2)[51] <- "Predicted_function"
-
-#write_csv(shg_DE2,"shg_DE2.csv")
-
-###now I have to reorder this nonsense
-list(unique(shg_DE2$locusID))
-## [[1]]
-## [1] "gene-L0Y14_RS10410" "gene-L0Y14_RS00080" "gene-L0Y14_RS01985"
-## [4] "gene-L0Y14_RS07695" "gene-L0Y14_RS11995" "gene-L0Y14_RS11990"
-## [7] "gene-L0Y14_RS11985" "gene-L0Y14_RS11965" "gene-L0Y14_RS10315"
-## [10] "gene-L0Y14_RS11255" "gene-L0Y14_RS11980" "gene-L0Y14_RS12045"
-## [13] "gene-L0Y14_RS12000" "gene-L0Y14_RS11975" "gene-L0Y14_RS12005"
-## [16] "gene-L0Y14_RS12015" "gene-L0Y14_RS12020" "gene-L0Y14_RS12010"
-## [19] "gene-L0Y14_RS12040" "gene-L0Y14_RS12030" "gene-L0Y14_RS12025"
-## [22] "gene-L0Y14_RS12035"
-list(unique(shg_DE2$Predicted_function))
-## [[1]]
-## [1] "leuC" "nitrogen regulation protein NR(II)"
-## [3] "nirB" "frdB_1"
-## [5] "flxD" "hdrA"
-## [7] "korC" "hdrB"
-## [9] "U32 family peptidase" "DNA repair protein RecN"
-## [11] "korB_1" "mdh_1"
-## [13] "flxC" "korA_1"
-## [15] "flxB" "aclB_full"
-## [17] "aclA" "flxA"
-## [19] "pntB" "acnA"
-## [21] "idh" "pntA"
-order <- c("hdrB","korA_1","korB_1","korC","hdrA","flxD","flxC","flxB","flxA","aclB_full","aclA","idh","acnA","pntA","pntB","mdh_1","frdB_1","nitrogen regulation protein NR(II)","nirB","leuC","U32 family peptidase", "DNA repair protein RecN")
-
-
-
-shg_DE.plot <-ggplot(shg_DE2,
-aes(x=factor(Predicted_function,levels = order), y=logFC,color=comparison))
-
-shg_DE.plot +
- ggtitle("Hub genes in gold module")+
- geom_jitter(size=5,shape=18, width = 0.32, height = 0.32)+
- geom_vline(xintercept = c(1.5,2.5,3.5, 4.5, 5.5,6.5,7.5,8.5,9.5,10.5,11.5,12.5,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5), color = "grey") +
- my.colors.sulfide_nolowSvsH2 +
- my.theme
-
-####now doing this for sulfide_teal, the otehr module that is significant for sulfide
-names(sul.teal)
-## [1] "locusID" "gene" "condition" "moduleFil"
-names(sul.teal)[2] <- "Predicted_function"
-
-sht_vector <- sul.teal[,1]
-#sht_vector
-sht_DE <- subset(sulfide_comp_DE, locusID %in% sht_vector)
-#names(sht_DE)
-
-sht_DE_names_added <- sht_DE %>%
- left_join(sul.teal, by = "locusID", suffix = c("", "_sulfide_hub_teal"))
-
-#names(sht_DE_names_added)
-sht_DE2 <- sht_DE_names_added[,c(1:18,20:54)]
-#names(sht_DE2)
-names(sht_DE2)[51] <- "Predicted_function"
-
-
-names_shtDE <- unique(sht_DE$locusID)
-
-setdiff(names_shtDE,sht_vector)
-## character(0)
-### shortening some of the names
-
-#write.csv(sht_DE2,"sht_DE2.csv")
-getwd()
-## [1] "/Users/jessicamitchell/Library/CloudStorage/OneDrive-HarvardUniversity/OD_documents/Cfixpaper"
-###pulling up dataframe with gene names and functions added
-sht_DE3 <- read.csv("sht_DE2.csv")
-
-
-
-sht_DE3.plot <-ggplot(sht_DE3,
-aes(x=factor(Preferred_name), y=logFC,color=comparison))
-
-sht_DE3.plot +
- ggtitle("Hub genes in teal module")+
- geom_point(size=5,shape=18) +
- my.colors.sulfide_nolowSvsH2 +
- my.theme
-
-###Going to have to add my own short names to make above figure run right...
-###### oxygen in purple
-
-names(oxy_purple)
-## [1] "locusID" "gene" "condition" "moduleFil"
-names(oxy_purple)[2] <- "Predicted_function"
-
-names(oxygen)[2] <- "locusID"
-
-ohp_vector <- oxy_purple[,1]
-#ohp_vector
-ohp_DE <- subset(oxygen, locusID %in% ohp_vector)
-#names(ohp_DE)
-
-###just the locusID and Preferred_name
-
-ohp_DE1 <- ohp_DE[,c(1,6,28)]
-ohp_DE2 <- unique(ohp_DE1)
-ohp3 <- merge(ohp_DE2, oxy_purple)
-
-names(ohp3)[3] <- "short.names"
-
-#names(ohp_DE)
-#write_csv(ohp3, "ohp4.csv")
-#added short.names and notes
-ohp4 <- read.csv("ohp4.csv")
-
-ohp_DE3 <- merge(ohp_DE,ohp4, by = "locusID")
-
-ohp_DE.plot <-ggplot(ohp_DE3,
-aes(x=factor(short.names), y=logFC,color=comparison))
-
-
-
-
-
-ohp_DE.plot +
- ggtitle("Hub genes in purple module")+
- geom_point(size=5,shape=18) +
- my.colors.oxygen +
- my.theme
-
-###### oxygen in black
-
-#names(oxy_b)
-
-names(oxy_b)[2] <- "Predicted_function"
-
-
-
-ohb_vector <- oxy_b[,1]
-ohb_vector
-## [1] "gene-L0Y14_RS14085" "gene-L0Y14_RS12670" "gene-L0Y14_RS08400"
-## [4] "gene-L0Y14_RS05880" "gene-L0Y14_RS13180" "gene-L0Y14_RS15950"
-## [7] "gene-L0Y14_RS14595" "gene-L0Y14_RS13130" "gene-L0Y14_RS14970"
-## [10] "gene-L0Y14_RS03820" "gene-L0Y14_RS07270" "gene-L0Y14_RS04045"
-## [13] "gene-L0Y14_RS03375" "gene-L0Y14_RS14700" "gene-L0Y14_RS11330"
-## [16] "gene-L0Y14_RS15890" "gene-L0Y14_RS06760" "gene-L0Y14_RS00450"
-## [19] "gene-L0Y14_RS08420" "gene-L0Y14_RS10875" "gene-L0Y14_RS00105"
-## [22] "gene-L0Y14_RS01330" "gene-L0Y14_RS16130" "gene-L0Y14_RS02180"
-## [25] "gene-L0Y14_RS08830" "gene-L0Y14_RS06620" "gene-L0Y14_RS01145"
-## [28] "gene-L0Y14_RS11140" "gene-L0Y14_RS14660" "gene-L0Y14_RS09840"
-## [31] "gene-L0Y14_RS03075" "gene-L0Y14_RS06585" "gene-L0Y14_RS08700"
-ohb_DE <- subset(oxygen, locusID %in% ohb_vector)
-#names(ohb_DE)
-
-###just the locusID,accession and Preferred_name
-
-ohb <- ohb_DE[,c(2,9,20,32)]
-
-ohb <- unique(ohb)
-names(ohb)[4] <- "short.names"
-
-
-#write_csv(ohb, "ohb2.csv")
-#added short.names and notes
-ohb2 <- read.csv("ohb2.csv")
-
-ohb2_DE3 <- merge(ohb_DE,ohb2, by = "locusID")
-
-ohb2_DE3.plot <-ggplot(ohb2_DE3,
-aes(x=factor(short.names), y=logFC,color=comparison))
-
-
-
-
-
-ohb2_DE3.plot +
- ggtitle("Hub genes in black module")+
- geom_point(size=5,shape=18) +
- my.colors.oxygen +
- my.theme
-
-######putting all of this together, so that I have a workable file I can use in adobe illustrator
-
-all_together <- read.csv("wgcna_trait_module_hub_genes.csv")
-names(all_together)
-## [1] "locusID" "short.names" "moduleFil" "func_category"
-## [5] "X"
-all_together <- all_together[,1:4]
-
-
-
-shg_DE_av <- shg_DE2[,c(1,44,50,53)]
-
-head(shg_DE_av)
-## locusID logFC comparison moduleFil
-## 1 gene-L0Y14_RS10410 -6.607549 LS_vs_HS_noN_HO darkgoldenrod1
-## 2 gene-L0Y14_RS00080 -3.742340 LS_vs_HS_noN_HO darkgoldenrod1
-## 3 gene-L0Y14_RS01985 -5.245126 LS_vs_HS_noN_HO darkgoldenrod1
-## 4 gene-L0Y14_RS07695 4.361391 LS_vs_HS_noN_HO darkgoldenrod1
-## 5 gene-L0Y14_RS11995 2.598792 LS_vs_HS_noN_HO darkgoldenrod1
-## 6 gene-L0Y14_RS11990 2.739793 LS_vs_HS_noN_HO darkgoldenrod1
-# Calculate the average logFC for each locusID in gold sulfide
-
-avg_logFC_df <- shg_DE_av %>%
- group_by(locusID, moduleFil) %>%
- summarise(
- avg_logFC = mean(logFC, na.rm = TRUE),
- sd_logFC = sd(logFC, na.rm = TRUE)
- ) %>%
- ungroup()
-## `summarise()` has grouped output by 'locusID'. You can override using the
-## `.groups` argument.
-avg_logFC_df <- shg_DE_av %>%
- group_by(locusID, moduleFil) %>%
- summarise(
- n = n(), # Add the count of non-NA values
- avg_logFC = mean(logFC, na.rm = TRUE),
- sd_logFC = sd(logFC, na.rm = TRUE)
- ) %>%
- ungroup()
-## `summarise()` has grouped output by 'locusID'. You can override using the
-## `.groups` argument.
-# Print the result
-head(avg_logFC_df)
-## # A tibble: 6 × 5
-## locusID moduleFil n avg_logFC sd_logFC
-## <chr> <chr> <int> <dbl> <dbl>
-## 1 gene-L0Y14_RS00080 darkgoldenrod1 3 -4.58 0.732
-## 2 gene-L0Y14_RS01985 darkgoldenrod1 3 -5.89 0.580
-## 3 gene-L0Y14_RS07695 darkgoldenrod1 4 5.48 1.11
-## 4 gene-L0Y14_RS10315 darkgoldenrod1 3 2.38 0.487
-## 5 gene-L0Y14_RS10410 darkgoldenrod1 3 -5.97 0.737
-## 6 gene-L0Y14_RS11255 darkgoldenrod1 4 2.70 0.991
-avg_logFC_df <- avg_logFC_df %>%
- mutate(treatment_logFC_ave = "logFC_sulfide_limiting")
-
-# Print the result
-head(avg_logFC_df)
-## # A tibble: 6 × 6
-## locusID moduleFil n avg_logFC sd_logFC treatment_logFC_ave
-## <chr> <chr> <int> <dbl> <dbl> <chr>
-## 1 gene-L0Y14_RS00080 darkgoldenrod1 3 -4.58 0.732 logFC_sulfide_limi…
-## 2 gene-L0Y14_RS01985 darkgoldenrod1 3 -5.89 0.580 logFC_sulfide_limi…
-## 3 gene-L0Y14_RS07695 darkgoldenrod1 4 5.48 1.11 logFC_sulfide_limi…
-## 4 gene-L0Y14_RS10315 darkgoldenrod1 3 2.38 0.487 logFC_sulfide_limi…
-## 5 gene-L0Y14_RS10410 darkgoldenrod1 3 -5.97 0.737 logFC_sulfide_limi…
-## 6 gene-L0Y14_RS11255 darkgoldenrod1 4 2.70 0.991 logFC_sulfide_limi…
-shg_DE_av2 <- merge(avg_logFC_df,all_together, by = "locusID")
-
-#write.csv(shg_DE_av2,"shg_DE_av2.csv")
-dim(shg_DE_av2)
-## [1] 22 9
-n_val_sug <- shg_DE_av2[,c(1,3)]
-
-####added "broad_category" column
-shg_DE_av3 <- read.csv("shg_DE_av2.csv")
-dim(shg_DE_av3)
-## [1] 22 10
-####now doing this for the teal sulfide module
-#names(sht_DE3)
-v <- unique(sht_DE3$locusID)
-sht_DE3_av <- sht_DE3[,c(2,45,51,54)]
-
-head(sht_DE3_av)
-## locusID logFC comparison moduleFil
-## 1 gene-L0Y14_RS00400 -5.671978 LS_vs_HS_noN_HO teal
-## 2 gene-L0Y14_RS01755 -1.093415 LS_vs_HS_noN_HO teal
-## 3 gene-L0Y14_RS04610 -1.158590 LS_vs_HS_noN_HO teal
-## 4 gene-L0Y14_RS05075 -1.192507 LS_vs_HS_noN_HO teal
-## 5 gene-L0Y14_RS05080 -2.083575 LS_vs_HS_noN_HO teal
-## 6 gene-L0Y14_RS07060 -1.072405 LS_vs_HS_noN_HO teal
-avg_logFC_df <- sht_DE3_av %>%
- group_by(locusID, moduleFil) %>%
- summarise(
- avg_logFC = mean(logFC, na.rm = TRUE),
- sd_logFC = sd(logFC, na.rm = TRUE)
- ) %>%
- ungroup()
-## `summarise()` has grouped output by 'locusID'. You can override using the
-## `.groups` argument.
-avg_logFC_df <- sht_DE3_av %>%
- group_by(locusID, moduleFil) %>%
- summarise(
- n = n(), # Add the count of non-NA values
- avg_logFC = mean(logFC, na.rm = TRUE),
- sd_logFC = sd(logFC, na.rm = TRUE)
- ) %>%
- ungroup()
-## `summarise()` has grouped output by 'locusID'. You can override using the
-## `.groups` argument.
-# Print the result
-head(avg_logFC_df)
-## # A tibble: 6 × 5
-## locusID moduleFil n avg_logFC sd_logFC
-## <chr> <chr> <int> <dbl> <dbl>
-## 1 gene-L0Y14_RS00400 teal 4 -6.35 0.877
-## 2 gene-L0Y14_RS00515 teal 3 1.86 0.534
-## 3 gene-L0Y14_RS00525 teal 4 2.52 0.570
-## 4 gene-L0Y14_RS00625 teal 3 1.86 0.534
-## 5 gene-L0Y14_RS00655 teal 4 2.62 0.768
-## 6 gene-L0Y14_RS01660 teal 3 -2.42 0.0345
-avg_logFC_df <- avg_logFC_df %>%
- mutate(treatment_logFC_ave = "logFC_sulfide_limiting")
-
-# Print the result
-head(avg_logFC_df)
-## # A tibble: 6 × 6
-## locusID moduleFil n avg_logFC sd_logFC treatment_logFC_ave
-## <chr> <chr> <int> <dbl> <dbl> <chr>
-## 1 gene-L0Y14_RS00400 teal 4 -6.35 0.877 logFC_sulfide_limiting
-## 2 gene-L0Y14_RS00515 teal 3 1.86 0.534 logFC_sulfide_limiting
-## 3 gene-L0Y14_RS00525 teal 4 2.52 0.570 logFC_sulfide_limiting
-## 4 gene-L0Y14_RS00625 teal 3 1.86 0.534 logFC_sulfide_limiting
-## 5 gene-L0Y14_RS00655 teal 4 2.62 0.768 logFC_sulfide_limiting
-## 6 gene-L0Y14_RS01660 teal 3 -2.42 0.0345 logFC_sulfide_limiting
-sht_DE_av2 <- merge(avg_logFC_df,all_together, by = "locusID")
-
-dim(sht_DE_av2)
-## [1] 50 9
-names(sht_DE_av2)
-## [1] "locusID" "moduleFil.x" "n"
-## [4] "avg_logFC" "sd_logFC" "treatment_logFC_ave"
-## [7] "short.names" "moduleFil.y" "func_category"
-sht_n <- sht_DE_av2[,c(1,3)]
-
-#write.csv(sht_DE_av2,"sht_DE_av2.csv")
-#write.csv(sht_DE_av2,"sht_DE_av5.csv")
-###added broad categories so I can use
-sht_DE_av3_old <- read.csv("sht_DE_av3.csv")
-sht_DE_av5_new <- read.csv("sht_DE_av5.csv")
-
-
-names(sht_DE_av5_new)
-## [1] "X" "locusID" "moduleFil.x"
-## [4] "avg_logFC" "sd_logFC" "treatment_logFC_ave"
-## [7] "short.names" "moduleFil.y" "func_category"
-## [10] "broad_category"
-names(sht_DE_av3_old)
-## [1] "X" "locusID" "moduleFil.x"
-## [4] "avg_logFC" "treatment_logFC_ave" "short.names"
-## [7] "moduleFil.y" "func_category" "broad_category"
-vec1 <- (sht_DE_av3_old)[2]
-vec2 <- (sht_DE_av5_new)[2]
-differences <- anti_join(vec1,vec2)
-## Joining with `by = join_by(locusID)`
-whatdiff <- merge(differences,sht_DE_av3_old, by = "locusID", all.x = TRUE)
-whatdiff$short.names
-## [1] "xerC" "Na_H_Exchanger" "pth"
-## [4] "dsrE_3" "AAA_LonB" "dxs"
-## [7] "hypo" "gsiA" "recQ"
-## [10] "glyco_like_mftF " "tmk" "frdA_1"
-## [13] "ppdk" "cvpA" "hflC"
-## [16] "YdcH" "malQ" "HpcH"
-## [19] "recD" "gspB" "trxA"
-## [22] "glgP" "urvD" "yidC"
-## [25] "yidD"
-sht_DE_av3_old_filtered <- sht_DE_av3_old %>%
- filter(locusID %in% unlist(vec2))
-
-names(sht_DE_av5_new)
-## [1] "X" "locusID" "moduleFil.x"
-## [4] "avg_logFC" "sd_logFC" "treatment_logFC_ave"
-## [7] "short.names" "moduleFil.y" "func_category"
-## [10] "broad_category"
-sht_DE_new_justSD <- sht_DE_av5_new[,c(2,5)]
-sht_DE_new <- merge(sht_DE_av3_old_filtered,sht_DE_new_justSD, by= "locusID",all.x = TRUE)
-
-
-
-
-head(sht_DE_new)
-## locusID X moduleFil.x avg_logFC treatment_logFC_ave short.names
-## 1 gene-L0Y14_RS00400 2 teal -6.353622 logFC_sulfide_limiting torF
-## 2 gene-L0Y14_RS00515 3 teal 1.860711 logFC_sulfide_limiting norB
-## 3 gene-L0Y14_RS00525 4 teal 2.519077 logFC_sulfide_limiting nirS
-## 4 gene-L0Y14_RS00625 5 teal 1.860711 logFC_sulfide_limiting norB1
-## 5 gene-L0Y14_RS00655 6 teal 2.618491 logFC_sulfide_limiting norD2
-## 6 gene-L0Y14_RS01660 9 teal -2.423910 logFC_sulfide_limiting psrA
-## moduleFil.y func_category broad_category
-## 1 sulfide_teal_nitro_midnightblue porin transport
-## 2 sulfide_teal denitrification nitrogen
-## 3 sulfide_teal denitrification nitrogen
-## 4 sulfide_teal denitrification nitrogen
-## 5 sulfide_teal denitrification nitrogen
-## 6 sulfide_teal nucleotide biosynthesis biosynthesis
-## sd_logFC
-## 1 0.87692331
-## 2 0.53357947
-## 3 0.57017737
-## 4 0.53357947
-## 5 0.76846294
-## 6 0.03451484
-#write.csv(sht_DE_new,"sht_DE_new.csv")
-
-# Filter out 'unknown'
-sht_DE_new_filtered <- sht_DE_new %>%
- filter(broad_category != "unknown")
-
-names(sht_DE_new_filtered)
-## [1] "locusID" "X" "moduleFil.x"
-## [4] "avg_logFC" "treatment_logFC_ave" "short.names"
-## [7] "moduleFil.y" "func_category" "broad_category"
-## [10] "sd_logFC"
-names(shg_DE_av3)
-## [1] "X" "locusID" "moduleFil.x"
-## [4] "avg_logFC" "sd_logFC" "treatment_logFC_ave"
-## [7] "short.names" "moduleFil.y" "func_category"
-## [10] "broad_category"
-sht_DE <- sht_DE_new_filtered[,c(1,3:10)]
-shg_DE <- shg_DE_av3[,c(2:10)]
-
-dim(shg_DE)
-## [1] 22 9
-dim(sht_DE)
-## [1] 42 9
-gold_teal <- rbind(sht_DE,shg_DE)
-
-
-
-
-
-gold_teal_filtered <- gold_teal %>%
- filter(broad_category != "unknown")
-
-
-######putting them all together
-
-
-
-
- #reordering the gene names so they appear on th xaxis in the right order
-sht_DE_new_filtered$short.names
-## [1] "torF" "norB" "nirS" "norB1" "norD2"
-## [6] "psrA" "fusA" "trmD" "ibpA" "pyrG"
-## [11] "ftsH" "rimP" "nusA" "fusA2" "napG"
-## [16] "moaA" "fusA_2" "holA" "hupE" "HyiA_small"
-## [21] "isp1" "isp2" "HyiB_large" "frdB_1" "frdC_1"
-## [26] "htpX" "cmoB" "korB_2" "korA_2" "rhlE"
-## [31] "dnaJ" "pfor_3" "ispB" "porin" "clpB"
-## [36] "rho" "ptsP_E1" "feoB" "mcrA" "korA_3"
-## [41] "korB_3" "pfor_like"
-desired_order <- c("moaA","psrA","ispB" ,"pyrG","frdB_1","frdC_1","korA_2","korB_2","korA_3","korB_3", "pfor_3","pfor_like","fusA","fusA_2","fusA2","trmD","ibpA","nusA","mcrA" ,"recQ","holA","cmoB","dnaJ","ftsH","rimP","rhlE","rho","htpX","clpB","hupE","HyiB_large","isp1","isp2","HyiA_small","napG","nirS","norB" ,"norB1","norD2","feoB","porin","ptsP_E1", "torF")
-
-
-
-
-sht_DE_new_filtered$short.names <- factor(sht_DE_new_filtered$short.names, levels = desired_order)
-
-
-
-
-
-
-
-
-
-
-
-
-
-sht_DE_new_filtered$clipped_avg_logFC <- pmin(pmax(sht_DE_new_filtered$avg_logFC, -4), 4)
-
-
-
-
-ggplot(sht_DE_new_filtered, aes(x = short.names, y = avg_logFC, color = clipped_avg_logFC)) +
- geom_point(size = 3) +
- geom_errorbar(aes(ymin = avg_logFC - sd_logFC, ymax = avg_logFC + sd_logFC), width = 0.25) +
- scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+ # specify breaks within this range
-
- labs(title = "Average logFC under sulfide limiting conditions",
- x = "Genes",
- y = "Average LogFC",
- color = "Avg LogFC Value") +
- theme_minimal() +
- theme(axis.text.x = element_text(angle = 45, hjust = 1))+
- facet_grid(~broad_category,scales = "free",space="free")
-
-######putting them all together
-names(sht_DE_new_filtered)
-## [1] "locusID" "X" "moduleFil.x"
-## [4] "avg_logFC" "treatment_logFC_ave" "short.names"
-## [7] "moduleFil.y" "func_category" "broad_category"
-## [10] "sd_logFC" "clipped_avg_logFC"
-names(shg_DE_av3)
-## [1] "X" "locusID" "moduleFil.x"
-## [4] "avg_logFC" "sd_logFC" "treatment_logFC_ave"
-## [7] "short.names" "moduleFil.y" "func_category"
-## [10] "broad_category"
-sht_DE <- sht_DE_new_filtered[,c(1,3:10)]
-shg_DE <- shg_DE_av3[,c(2:10)]
-
-dim(shg_DE)
-## [1] 22 9
-dim(sht_DE)
-## [1] 42 9
-gold_teal <- rbind(sht_DE,shg_DE)
-
-####breaking up this figure by positive and negative fold change
-head(gold_teal)
-## locusID moduleFil.x avg_logFC treatment_logFC_ave short.names
-## 1 gene-L0Y14_RS00400 teal -6.353622 logFC_sulfide_limiting torF
-## 2 gene-L0Y14_RS00515 teal 1.860711 logFC_sulfide_limiting norB
-## 3 gene-L0Y14_RS00525 teal 2.519077 logFC_sulfide_limiting nirS
-## 4 gene-L0Y14_RS00625 teal 1.860711 logFC_sulfide_limiting norB1
-## 5 gene-L0Y14_RS00655 teal 2.618491 logFC_sulfide_limiting norD2
-## 6 gene-L0Y14_RS01660 teal -2.423910 logFC_sulfide_limiting psrA
-## moduleFil.y func_category broad_category
-## 1 sulfide_teal_nitro_midnightblue porin transport
-## 2 sulfide_teal denitrification nitrogen
-## 3 sulfide_teal denitrification nitrogen
-## 4 sulfide_teal denitrification nitrogen
-## 5 sulfide_teal denitrification nitrogen
-## 6 sulfide_teal nucleotide biosynthesis biosynthesis
-## sd_logFC
-## 1 0.87692331
-## 2 0.53357947
-## 3 0.57017737
-## 4 0.53357947
-## 5 0.76846294
-## 6 0.03451484
- ##Filter for avg_logFC > 0
-sulfide_up <- gold_teal[gold_teal$avg_logFC > 0, ]
-
-# Filter for avg_logFC < 0
-sulfide_down <- gold_teal[gold_teal$avg_logFC < 0, ]
-
-
-# Filter out 'unknown'
-sulfide_up_filtered <- sulfide_up %>%
- filter(broad_category != "unknown")
-
-sulfide_down_filtered <- sulfide_down %>%
- filter(broad_category != "unknown")
-
-dim(sulfide_up_filtered)
-## [1] 46 9
-###removing the second frdB, since it was just sig for both the gold and the teal, it has the same fold change and everything...
-sulfide_up_filtered <- sulfide_up_filtered[-28,]
-###makes a clipping for the color (so its not all washed out)
-
-sulfide_up_filtered$clipped_avg_logFC <- pmin(pmax(sulfide_up_filtered$avg_logFC, -4), 4)
-
-sulfide_down_filtered$clipped_avg_logFC <- pmin(pmax(sulfide_down_filtered$avg_logFC, -4), 4)
-
-#write.csv(sulfide_up_filtered,"sulfide_up_filtered2.csv")
-##reordered in excel, faster
-SG_short.name_order <- read.csv("gold_teal_up_short.name.order.csv")
-
-dim(SG_short.name_order)
-## [1] 45 11
-dim(sulfide_up_filtered)
-## [1] 45 10
-desired_order <- SG_short.name_order[,6]
-list(desired_order)
-## [[1]]
-## [1] "norB" "nirS" "norB1" "norD2" "napG"
-## [6] "hupE" "HyiA_small" "isp1" "isp2" "HyiB_large"
-## [11] "frdB_1" "frdC_1" "korB_2" "korA_2" "pfor_3"
-## [16] "hdrB" "korA_1" "korB_1" "korC" "hdrA"
-## [21] "flxD" "flxC" "flxB" "flxA" "aclB_full"
-## [26] "aclA" "idh" "acnA" "pntA" "pntB"
-## [31] "mdh_1" "ibpA" "ftsH" "fusA2" "holA"
-## [36] "htpX" "cmoB" "dnaJ" "porin" "clpB"
-## [41] "peptidase" "recN" "ptsP_E1" "feoB" "moaA"
-sulfide_up_filtered$short.names <- factor(sulfide_up_filtered$short.names, levels = desired_order)
-levels(sulfide_up_filtered$short.names)
-## [1] "norB" "nirS" "norB1" "norD2" "napG"
-## [6] "hupE" "HyiA_small" "isp1" "isp2" "HyiB_large"
-## [11] "frdB_1" "frdC_1" "korB_2" "korA_2" "pfor_3"
-## [16] "hdrB" "korA_1" "korB_1" "korC" "hdrA"
-## [21] "flxD" "flxC" "flxB" "flxA" "aclB_full"
-## [26] "aclA" "idh" "acnA" "pntA" "pntB"
-## [31] "mdh_1" "ibpA" "ftsH" "fusA2" "holA"
-## [36] "htpX" "cmoB" "dnaJ" "porin" "clpB"
-## [41] "peptidase" "recN" "ptsP_E1" "feoB" "moaA"
-sulfide_up_filtered$short.names <- as.factor(sulfide_up_filtered$short.names)
-str(sulfide_up_filtered)
-## 'data.frame': 45 obs. of 10 variables:
-## $ locusID : chr "gene-L0Y14_RS00515" "gene-L0Y14_RS00525" "gene-L0Y14_RS00625" "gene-L0Y14_RS00655" ...
-## $ moduleFil.x : chr "teal" "teal" "teal" "teal" ...
-## $ avg_logFC : num 1.86 2.52 1.86 2.62 4.09 ...
-## $ treatment_logFC_ave: chr "logFC_sulfide_limiting" "logFC_sulfide_limiting" "logFC_sulfide_limiting" "logFC_sulfide_limiting" ...
-## $ short.names : Factor w/ 45 levels "norB","nirS",..: 1 2 3 4 32 33 34 5 45 35 ...
-## $ moduleFil.y : chr "sulfide_teal" "sulfide_teal" "sulfide_teal" "sulfide_teal" ...
-## $ func_category : chr "denitrification" "denitrification" "denitrification" "denitrification" ...
-## $ broad_category : chr "nitrogen" "nitrogen" "nitrogen" "nitrogen" ...
-## $ sd_logFC : num 0.534 0.57 0.534 0.768 1.564 ...
-## $ clipped_avg_logFC : num 1.86 2.52 1.86 2.62 4 ...
-sulfide_up_filtered$short.names
-## [1] norB nirS norB1 norD2 ibpA ftsH
-## [7] fusA2 napG moaA holA hupE HyiA_small
-## [13] isp1 isp2 HyiB_large frdB_1 frdC_1 htpX
-## [19] cmoB korB_2 korA_2 dnaJ pfor_3 porin
-## [25] clpB ptsP_E1 feoB peptidase recN hdrB
-## [31] korA_1 korB_1 korC hdrA flxD flxC
-## [37] flxB flxA aclB_full aclA idh acnA
-## [43] pntA pntB mdh_1
-## 45 Levels: norB nirS norB1 norD2 napG hupE HyiA_small isp1 isp2 ... moaA
-##order of the genes in sulfide_up
-sulfide_up_filtered$short.names <- factor(sulfide_up_filtered$short.names, levels = desired_order)
-
-
-# Extend the desired order with genes from sulfide_down that might not be in desired_order
-extended_order <- unique(c(desired_order, as.character(sulfide_down_filtered$short.names)))
-list(extended_order)
-## [[1]]
-## [1] "norB" "nirS" "norB1" "norD2" "napG"
-## [6] "hupE" "HyiA_small" "isp1" "isp2" "HyiB_large"
-## [11] "frdB_1" "frdC_1" "korB_2" "korA_2" "pfor_3"
-## [16] "hdrB" "korA_1" "korB_1" "korC" "hdrA"
-## [21] "flxD" "flxC" "flxB" "flxA" "aclB_full"
-## [26] "aclA" "idh" "acnA" "pntA" "pntB"
-## [31] "mdh_1" "ibpA" "ftsH" "fusA2" "holA"
-## [36] "htpX" "cmoB" "dnaJ" "porin" "clpB"
-## [41] "peptidase" "recN" "ptsP_E1" "feoB" "moaA"
-## [46] "torF" "psrA" "fusA" "trmD" "pyrG"
-## [51] "rimP" "nusA" "fusA_2" "rhlE" "ispB"
-## [56] "rho" "mcrA" "korA_3" "korB_3" "pfor_like"
-## [61] "glnL" "nirB" "leuC"
-# Set factor levels for both datasets based on the extended order
-sulfide_up_filtered$short.names <- factor(sulfide_up_filtered$short.names, levels = extended_order)
-sulfide_down_filtered$short.names <- factor(sulfide_down_filtered$short.names, levels = extended_order)
-
-
-
-
-####sulfide_up plot#################################################################
-sulfide_up_plot <- ggplot(sulfide_up_filtered, aes(x = short.names, y = avg_logFC, color = clipped_avg_logFC)) +
- geom_point(size = 3) +
- geom_errorbar(aes(ymin = avg_logFC - sd_logFC, ymax = avg_logFC + sd_logFC), width = 0.25) +
- scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+
-
- labs(title = "Average logFC under sulfide limiting conditions",
- x = "Genes",
- y = "Average LogFC",
- color = "Avg LogFC Value") +
- theme_minimal() +
-
- theme(axis.text.x = element_text(angle = 45, hjust = 1))+
- facet_grid(~broad_category,scales = "free",space="free")
-
-
-
-
-
-
-
-####sulfide_down plot#################################################################
-sulfide_down_plot <- ggplot(sulfide_down_filtered, aes(x = short.names, y = avg_logFC, color = clipped_avg_logFC)) +
- geom_point(size = 3) +
- geom_errorbar(aes(ymin = avg_logFC - sd_logFC, ymax = avg_logFC + sd_logFC), width = 0.25) +
- scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+
-
- labs(title = "Average logFC under sulfide limiting conditions",
- x = "Genes",
- y = "Average LogFC",
- color = "Avg LogFC Value") +
- theme_minimal() +
-
- theme(axis.text.x = element_text(angle = 45, hjust = 1))+
- facet_grid(~broad_category,scales = "free",space="free")
-
-
-
-names(sulfide_up_filtered)
-## [1] "locusID" "moduleFil.x" "avg_logFC"
-## [4] "treatment_logFC_ave" "short.names" "moduleFil.y"
-## [7] "func_category" "broad_category" "sd_logFC"
-## [10] "clipped_avg_logFC"
-names(sulfide_down_filtered)
-## [1] "locusID" "moduleFil.x" "avg_logFC"
-## [4] "treatment_logFC_ave" "short.names" "moduleFil.y"
-## [7] "func_category" "broad_category" "sd_logFC"
-## [10] "clipped_avg_logFC"
-# Combine the datasets with an identifier for each
-sulfide_up_filtered$set <- "Up"
-sulfide_down_filtered$set <- "Down"
-
-combined_dataS <- rbind(sulfide_up_filtered, sulfide_down_filtered)
-# Reverse the levels so that "Up" comes before "Down"
-combined_dataS$set <- factor(combined_dataS$set, levels = c("Up", "Down"))
-
-combined_dataS <- combined_dataS %>%
- mutate(adj_avg_logFC = case_when(
- set == "Up" & avg_logFC < 0 ~ 0,
- set == "Down" & avg_logFC > 0 ~ 0,
- TRUE ~ avg_logFC
- ))
-
-# Plotting
-p <- ggplot(combined_dataS, aes(x = adj_avg_logFC, y = short.names, color = clipped_avg_logFC)) +
- geom_point(size = 3) +
- geom_errorbar(aes(xmin = adj_avg_logFC - sd_logFC, xmax = adj_avg_logFC + sd_logFC), width = 0.25) +
- scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0) +
- labs(title = "Average logFC under sulfide limiting conditions",
- x = "Adjusted Average LogFC",
- y = "Genes",
- color = "Avg LogFC Value") +
- theme_minimal() +
- theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
- facet_grid(cols = vars(set), rows= vars(broad_category), scales = "free_y", space = "free_y")
-
-print(p)
-
-################################OXYGEN PURPLE ANDBLACK##################################################
-
-#####now doing this for purple oxy
-
-#names(ohp_DE3)
-table(ohp_DE3$short.names)
-##
-## bamD cphA cvpA dprE1 FBP fusA2 GH16 hupE hybO hypE hypo
-## 2 2 1 2 1 2 1 2 2 2 8
-## isp2 mrcA nifJ ppdK rocR SgpA_1 tsaC yidC yidD
-## 2 2 2 2 2 2 2 2 1
-ohp_DE3_av <- ohp_DE3[,c(1,49,55,59,57)]
-
-head(ohp_DE3_av)
-## locusID logFC comparison moduleFil short.names
-## 1 gene-L0Y14_RS05880 -4.039158 LO_vs_HO_HS_N purple hypo
-## 2 gene-L0Y14_RS05880 -3.657596 LO_vs_HO_H2_noN purple hypo
-## 3 gene-L0Y14_RS06635 -5.296420 LO_vs_HO_HS_N purple dprE1
-## 4 gene-L0Y14_RS06635 -4.281582 LO_vs_HO_H2_noN purple dprE1
-## 5 gene-L0Y14_RS06820 2.418749 LO_vs_HO_H2_noN purple fusA2
-## 6 gene-L0Y14_RS06820 2.176861 LO_vs_HO_HS_N purple fusA2
-####dealing with the fact I have more than one genes that are called "hypo"
-
-
-# Assign a unique number for each distinct locusID with "hypo" genes
-locus_mapping <- ohp_DE3_av %>%
- filter(short.names == "hypo") %>%
- distinct(locusID) %>%
- mutate(hypo_num = row_number())
-
-# Join this with the original dataframe and replace "hypo" genes
-new_df <- ohp_DE3_av %>%
- left_join(locus_mapping, by = "locusID") %>%
- mutate(short.names = ifelse(!is.na(hypo_num) & short.names == "hypo",
- paste0("hypo", hypo_num),
- short.names)) %>%
- select(-hypo_num) # Remove the auxiliary column
-
-# Print the result
-head(new_df)
-## locusID logFC comparison moduleFil short.names
-## 1 gene-L0Y14_RS05880 -4.039158 LO_vs_HO_HS_N purple hypo1
-## 2 gene-L0Y14_RS05880 -3.657596 LO_vs_HO_H2_noN purple hypo1
-## 3 gene-L0Y14_RS06635 -5.296420 LO_vs_HO_HS_N purple dprE1
-## 4 gene-L0Y14_RS06635 -4.281582 LO_vs_HO_H2_noN purple dprE1
-## 5 gene-L0Y14_RS06820 2.418749 LO_vs_HO_H2_noN purple fusA2
-## 6 gene-L0Y14_RS06820 2.176861 LO_vs_HO_HS_N purple fusA2
-pox <- new_df
-
-
-
-
-avg_logFC_df <- new_df %>%
- group_by(locusID, moduleFil) %>%
- summarise(avg_logFC = mean(logFC, na.rm = TRUE),
- sd_logFC = sd(logFC, na.rm = TRUE)) %>%
- ungroup()
-## `summarise()` has grouped output by 'locusID'. You can override using the
-## `.groups` argument.
-###adding the n value too
-avg_logFC_df <- new_df %>%
- group_by(locusID, moduleFil) %>%
- summarise(
- n = n(), # Add the count of non-NA values
- avg_logFC = mean(logFC, na.rm = TRUE),
- sd_logFC = sd(logFC, na.rm = TRUE)
- ) %>%
- ungroup()
-## `summarise()` has grouped output by 'locusID'. You can override using the
-## `.groups` argument.
-# Print the result
-head(avg_logFC_df)
-## # A tibble: 6 × 5
-## locusID moduleFil n avg_logFC sd_logFC
-## <chr> <chr> <int> <dbl> <dbl>
-## 1 gene-L0Y14_RS05880 purple 2 -3.85 0.270
-## 2 gene-L0Y14_RS06635 purple 2 -4.79 0.718
-## 3 gene-L0Y14_RS06820 purple 2 2.30 0.171
-## 4 gene-L0Y14_RS07045 purple 2 2.42 0.107
-## 5 gene-L0Y14_RS07050 purple 2 4.01 1.56
-## 6 gene-L0Y14_RS07420 purple 2 3.16 0.175
-avg_logFC_df <- avg_logFC_df %>%
- mutate(treatment_logFC_ave = "logFC_oxygen_limiting")
-
-# Print the result
-head(avg_logFC_df)
-## # A tibble: 6 × 6
-## locusID moduleFil n avg_logFC sd_logFC treatment_logFC_ave
-## <chr> <chr> <int> <dbl> <dbl> <chr>
-## 1 gene-L0Y14_RS05880 purple 2 -3.85 0.270 logFC_oxygen_limiting
-## 2 gene-L0Y14_RS06635 purple 2 -4.79 0.718 logFC_oxygen_limiting
-## 3 gene-L0Y14_RS06820 purple 2 2.30 0.171 logFC_oxygen_limiting
-## 4 gene-L0Y14_RS07045 purple 2 2.42 0.107 logFC_oxygen_limiting
-## 5 gene-L0Y14_RS07050 purple 2 4.01 1.56 logFC_oxygen_limiting
-## 6 gene-L0Y14_RS07420 purple 2 3.16 0.175 logFC_oxygen_limiting
-ohp_DE_av2 <- merge(avg_logFC_df,all_together, by = "locusID")
-
-names(ohp_DE_av2)
-## [1] "locusID" "moduleFil.x" "n"
-## [4] "avg_logFC" "sd_logFC" "treatment_logFC_ave"
-## [7] "short.names" "moduleFil.y" "func_category"
-poxn <- ohp_DE_av2[,c(1,3)]
-
-
-######now doing this with oxy black#################
-
-
-
-#names(ohb2_DE3)
-ohb2_DE3_av <- ohb2_DE3[,c(1,49,55,59,57)]
-
-head(ohb2_DE3_av)
-## locusID logFC comparison moduleFil3 short.names
-## 1 gene-L0Y14_RS00450 -2.998400 LO_vs_HO_HS_N black_oxy Hr
-## 2 gene-L0Y14_RS03075 -4.498757 LO_vs_HO_HS_N black_oxy hypo
-## 3 gene-L0Y14_RS03375 -3.606201 LO_vs_HO_HS_N black_oxy fabA
-## 4 gene-L0Y14_RS03820 -4.795972 LO_vs_HO_HS_N black_oxy HIT
-## 5 gene-L0Y14_RS04045 -3.506534 LO_vs_HO_HS_N black_oxy cbbQ
-## 6 gene-L0Y14_RS05880 -3.657596 LO_vs_HO_H2_noN black_oxy hypo
-names(ohb2_DE3_av)[4] <- "moduleFil"
-
-
-
-
-
-# again, I have the hypo issue
-# Assign a unique number for each distinct locusID with "hypo" genes
-locus_mapping <- ohb2_DE3_av %>%
- filter(short.names == "hypo") %>%
- distinct(locusID) %>%
- mutate(hypo_num = row_number())
-
-# Join this with the original dataframe and replace "hypo" genes
-new_df <- ohb2_DE3_av %>%
- left_join(locus_mapping, by = "locusID") %>%
- mutate(short.names = ifelse(!is.na(hypo_num) & short.names == "hypo",
- paste0("hypo", hypo_num),
- short.names)) %>%
- select(-hypo_num) # Remove the auxiliary column
-
-box <- new_df
-
-
-
-avg_logFC_df <- new_df %>%
- group_by(locusID, moduleFil) %>%
- summarise(avg_logFC = mean(logFC, na.rm = TRUE),
- sd_logFC = sd(logFC, na.rm = TRUE)) %>%
- ungroup()
-## `summarise()` has grouped output by 'locusID'. You can override using the
-## `.groups` argument.
-avg_logFC_df <- new_df %>%
- group_by(locusID, moduleFil) %>%
- summarise(
- n = n(), # Add the count of non-NA values
- avg_logFC = mean(logFC, na.rm = TRUE),
- sd_logFC = sd(logFC, na.rm = TRUE)
- ) %>%
- ungroup()
-## `summarise()` has grouped output by 'locusID'. You can override using the
-## `.groups` argument.
-avg_logFC_df <- avg_logFC_df %>%
- mutate(treatment_logFC_ave = "logFC_oxygen_limiting")
-
-# Print the result
-head(avg_logFC_df)
-## # A tibble: 6 × 6
-## locusID moduleFil n avg_logFC sd_logFC treatment_logFC_ave
-## <chr> <chr> <int> <dbl> <dbl> <chr>
-## 1 gene-L0Y14_RS00450 black_oxy 1 -3.00 NA logFC_oxygen_limiting
-## 2 gene-L0Y14_RS03075 black_oxy 1 -4.50 NA logFC_oxygen_limiting
-## 3 gene-L0Y14_RS03375 black_oxy 1 -3.61 NA logFC_oxygen_limiting
-## 4 gene-L0Y14_RS03820 black_oxy 1 -4.80 NA logFC_oxygen_limiting
-## 5 gene-L0Y14_RS04045 black_oxy 1 -3.51 NA logFC_oxygen_limiting
-## 6 gene-L0Y14_RS05880 black_oxy 2 -3.85 0.270 logFC_oxygen_limiting
-ohb_DE_av2 <- merge(avg_logFC_df,all_together, by = "locusID")
-
-boxn <- ohb_DE_av2[,c(1,3)]
-
-
-oxy_purple_black <- rbind(ohb_DE_av2,ohp_DE_av2)
-
-
-names(oxy_purple_black)
-## [1] "locusID" "moduleFil.x" "n"
-## [4] "avg_logFC" "sd_logFC" "treatment_logFC_ave"
-## [7] "short.names" "moduleFil.y" "func_category"
-#write.csv(oxy_purple_black,"oxy_purple_black2.csv")
-###added broad_category, removed duplicate hypo gene
-oxy_purple_black2 <- read.csv("oxy_purple_black2.csv")
-
-
-
-
- ##Filter for avg_logFC > 0
-oxy_up <- oxy_purple_black2[oxy_purple_black2$avg_logFC > 0, ]
-
-# Filter for avg_logFC < 0
-oxy_down <- oxy_purple_black2[oxy_purple_black2$avg_logFC < 0, ]
-
-
-# Filter out 'unknown'
-oxy_up_filtered <- oxy_up %>%
- filter(func_category != "unknown")
-
-oxy_down_filtered <- oxy_down %>%
- filter(func_category != "unknown")
-
-dim(oxy_up_filtered)
-## [1] 11 10
-###makes a clipping for the color (so its not all washed out)
-
-oxy_up_filtered$clipped_avg_logFC <- pmin(pmax(oxy_up_filtered$avg_logFC, -4), 4)
-
-oxy_down_filtered$clipped_avg_logFC <- pmin(pmax(oxy_down_filtered$avg_logFC, -4), 4)
-# Combine the datasets with an identifier for each
-oxy_up_filtered$set <- "Up"
-oxy_down_filtered$set <- "Down"
-
-combined_dataO <- rbind(oxy_up_filtered, oxy_down_filtered)
-# Reverse the levels so that "Up" comes before "Down"
-combined_dataO$set <- factor(combined_dataO$set, levels = c("Up", "Down"))
-
-combined_dataO <- combined_dataO %>%
- mutate(adj_avg_logFC = case_when(
- set == "Up" & avg_logFC < 0 ~ 0,
- set == "Down" & avg_logFC > 0 ~ 0,
- TRUE ~ avg_logFC
- ))
-
-
-
-# Plotting
-p <- ggplot(combined_dataO, aes(x = adj_avg_logFC, y = short.names, color = clipped_avg_logFC)) +
- geom_point(size = 3) +
- geom_errorbar(aes(xmin = adj_avg_logFC - sd_logFC, xmax = adj_avg_logFC + sd_logFC), width = 0.25) +
- scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0) +
- labs(title = "Average logFC under oxygen limiting conditions",
- x = "Adjusted Average LogFC",
- y = "Genes",
- color = "Avg LogFC Value") +
- theme_minimal() +
- theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
- facet_grid(cols = vars(set), rows= vars(func_category), scales = "free_y", space = "free_y")
-
-print(p)
-
-####trying to combine both plots so I have same persepectives for figure
-
-names(combined_dataO)
-## [1] "X" "locusID" "moduleFil.x"
-## [4] "n" "avg_logFC" "sd_logFC"
-## [7] "treatment_logFC_ave" "short.names" "moduleFil.y"
-## [10] "func_category" "clipped_avg_logFC" "set"
-## [13] "adj_avg_logFC"
-combined_dataO <- combined_dataO[,c("locusID","moduleFil.x","moduleFil.y","avg_logFC","sd_logFC","treatment_logFC_ave","short.names","func_category","clipped_avg_logFC","adj_avg_logFC", "set")]
-
-combined_dataS <- combined_dataS[,c("locusID","moduleFil.x","moduleFil.y","avg_logFC","sd_logFC","treatment_logFC_ave","short.names","func_category","clipped_avg_logFC","adj_avg_logFC", "set")]
-
-names(combined_dataS)
-## [1] "locusID" "moduleFil.x" "moduleFil.y"
-## [4] "avg_logFC" "sd_logFC" "treatment_logFC_ave"
-## [7] "short.names" "func_category" "clipped_avg_logFC"
-## [10] "adj_avg_logFC" "set"
-combinedOS <- rbind(combined_dataO,combined_dataS)
-
-
-# Plotting
-OS <- ggplot(combinedOS, aes(x = adj_avg_logFC, y = short.names, color = clipped_avg_logFC)) +
- geom_point(size = 3) +
- geom_errorbar(aes(xmin = adj_avg_logFC - sd_logFC, xmax = adj_avg_logFC + sd_logFC), width = 0.25) +
- scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0) +
- labs(title = "Average logFC under limiting conditions",
- x = "Adjusted Average LogFC",
- y = "Genes",
- color = "Avg LogFC Value") +
- theme_minimal() +
- theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
- facet_grid(cols = vars(treatment_logFC_ave), rows= vars(func_category), scales = "free_y", space = "free_y")
-
-print(OS)
-
-###remaking figure 6 for paper such that all of the data points (n= -condition tereatments)
-###here are the genes in the gold module correlated to sulfide
-all_together <- read.csv("wgcna_trait_module_hub_genes.csv")
-#names(all_together)
-all_together <- all_together[,1:4]
-
-
-
-shg_DE_av <- shg_DE2[,c(1,44,50,53)]
-
-head(shg_DE_av)
-## locusID logFC comparison moduleFil
-## 1 gene-L0Y14_RS10410 -6.607549 LS_vs_HS_noN_HO darkgoldenrod1
-## 2 gene-L0Y14_RS00080 -3.742340 LS_vs_HS_noN_HO darkgoldenrod1
-## 3 gene-L0Y14_RS01985 -5.245126 LS_vs_HS_noN_HO darkgoldenrod1
-## 4 gene-L0Y14_RS07695 4.361391 LS_vs_HS_noN_HO darkgoldenrod1
-## 5 gene-L0Y14_RS11995 2.598792 LS_vs_HS_noN_HO darkgoldenrod1
-## 6 gene-L0Y14_RS11990 2.739793 LS_vs_HS_noN_HO darkgoldenrod1
-#names(shg_DE_av)
-
-###here are the genes in the teal module correlated to sulfide
-
-names(sht_DE3)
-## [1] "X.1" "locusID"
-## [3] "geneid" "start.x"
-## [5] "stop.x" "gene_letters_andre"
-## [7] "accession_andre" "start"
-## [9] "stop" "notes"
-## [11] "Ontology" "Inference"
-## [13] "product" "protein_id"
-## [15] "gene_letters.notes" "NCBI_ID"
-## [17] "annotations_from_andre" "target_id"
-## [19] "Accession" "functional_role"
-## [21] "sub_role" "sub_role2"
-## [23] "Protein" "Accession...Protein"
-## [25] "seed_ortholog_evalue" "seed_ortholog_score"
-## [27] "best_tax_level" "Preferred_name"
-## [29] "GOs" "X"
-## [31] "KEGG_ko" "KEGG_Pathway"
-## [33] "KEGG_Module" "KEGG_Reaction"
-## [35] "KEGG_rclass" "BRITE"
-## [37] "KEGG_TC" "CAZy"
-## [39] "BiGG_Reaction" "tax_scope"
-## [41] "eggNOG_OGs" "bestOG"
-## [43] "COG_Functional_Category" "eggNOG_free_text_description"
-## [45] "logFC" "AveExpr"
-## [47] "t" "P.value"
-## [49] "adj.P.Val" "B"
-## [51] "comparison" "Predicted_function"
-## [53] "condition" "moduleFil"
-v <- unique(sht_DE3$locusID)
-sht_DE3_av <- sht_DE3[,c(2,45,51,54)]
-head(sht_DE3_av)
-## locusID logFC comparison moduleFil
-## 1 gene-L0Y14_RS00400 -5.671978 LS_vs_HS_noN_HO teal
-## 2 gene-L0Y14_RS01755 -1.093415 LS_vs_HS_noN_HO teal
-## 3 gene-L0Y14_RS04610 -1.158590 LS_vs_HS_noN_HO teal
-## 4 gene-L0Y14_RS05075 -1.192507 LS_vs_HS_noN_HO teal
-## 5 gene-L0Y14_RS05080 -2.083575 LS_vs_HS_noN_HO teal
-## 6 gene-L0Y14_RS07060 -1.072405 LS_vs_HS_noN_HO teal
-#names(sht_DE3_av)
-
-sulfide_gt <- rbind(sht_DE3_av,shg_DE_av)
-
-
-#names(sht_DE3_av)
-
-
-#names(all_together)
-
-shortnames <- all_together[,1:2]
-
-sulfideGT <- merge(sulfide_gt,shortnames)
-list(unique(sulfideGT$short.names))
-## [[1]]
-## [1] "glnL" "torF" "norB" "nirS"
-## [5] "norB1" "norD2" "psrA" "fusA"
-## [9] "nirB" "trmD" "YqgE/AlgH" "ibpA"
-## [13] "pyrG" "ftsH" "rimP" "nusA"
-## [17] "fusA2" "napG" "moaA" "DUF4340"
-## [21] "fusA_2" "holA" "hupE" "HyiA_small"
-## [25] "isp1" "isp2" "HyiB_large" "hypo"
-## [29] "frdB_1" "frdC_1" "htpX" "cmoB"
-## [33] "peptidase" "leuC" "ferritin_like_AB" "korB_2"
-## [37] "korA_2" "rhlE" "DUF2058" "dnaJ"
-## [41] "recN" "hdrB" "korA_1" "korB_1"
-## [45] "korC" "hdrA" "flxD" "flxC"
-## [49] "flxB" "flxA" "aclB_full" "aclA"
-## [53] "idh" "acnA" "pntA" "pntB"
-## [57] "mdh_1" "pfor_3" "ispB" "porin"
-## [61] "clpB" "DUF4126" "rho" "ptsP_E1"
-## [65] "feoB" "mcrA" "korA_3" "korB_3"
-## [69] "pfor_like"
-desired_order2 <- c("pntB","pntA","mdh_1","korC","korB_1","korA_1","idh","hdrB", "hdrA", "flxD","flxC","flxB", "flxA","acnA","aclB_full","aclA","korB_2","korA_2","korB_3","korA_3","pfor_like","pfor_3","frdC_1" ,"frdB_1","HyiB_large","isp1","isp2","HyiA_small", "hupE","hypo","norD2","norB1" ,"norB", "nirS","napG","nirB","glnL","fusA2","mcrA","fusA_2","fusA","trmD","rimP","rho","rhlE","recN","peptidase","nusA","ibpA","htpX","holA","ftsH", "dnaJ","cmoB","clpB","pyrG","psrA","moaA","leuC","ispB","torF","ptsP_E1","porin","feoB")
-
-filteredGT <- subset(sulfideGT, short.names %in% desired_order2)
-filteredGT$short.names <- factor(filteredGT$short.names, levels = desired_order2)
-
-
-filteredGT$clipped_logFC <- pmin(pmax(filteredGT$logFC, -4), 4)
-
-try <- ggplot(data=filteredGT, aes(x= logFC, y = fct_rev(short.names))) +
- geom_boxplot()+
- geom_point(aes(color = clipped_logFC), position = position_jitter(width = 0.1,height = 0.1)) +
- scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+
- labs(title = "Boxplot with Colored Dots", x = "Category", y = "Value") +
- theme_minimal()
-
-try
-
-table(sulfideGT$comparison,sulfideGT$short.names)
-##
-## aclA aclB_full acnA clpB cmoB dnaJ DUF2058 DUF4126 DUF4340
-## H2_vs_HS_N_HO 0 0 0 1 1 1 1 1 1
-## H2_vs_HS_noN_HO 1 1 1 1 1 1 0 0 0
-## LS_vs_HS_N_HO 0 0 0 1 0 0 1 1 0
-## LS_vs_HS_noN_HO 1 1 1 1 0 1 0 0 0
-## SW_vs_HS_noN_HO 1 1 1 1 1 1 1 1 1
-##
-## feoB ferritin_like_AB flxA flxB flxC flxD frdB_1 frdC_1 ftsH
-## H2_vs_HS_N_HO 1 1 0 0 0 0 2 1 1
-## H2_vs_HS_noN_HO 0 1 1 1 1 1 2 1 1
-## LS_vs_HS_N_HO 1 0 0 0 0 0 0 0 1
-## LS_vs_HS_noN_HO 0 0 1 1 1 1 2 1 0
-## SW_vs_HS_noN_HO 1 1 1 1 1 1 2 1 1
-##
-## fusA fusA_2 fusA2 glnL hdrA hdrB holA htpX hupE HyiA_small
-## H2_vs_HS_N_HO 1 1 1 0 0 0 1 1 1 1
-## H2_vs_HS_noN_HO 1 1 1 1 1 1 1 1 0 1
-## LS_vs_HS_N_HO 0 0 0 0 0 0 0 1 0 1
-## LS_vs_HS_noN_HO 1 1 1 1 1 1 0 0 0 0
-## SW_vs_HS_noN_HO 1 1 1 1 1 1 1 1 0 1
-##
-## HyiB_large hypo ibpA idh isp1 isp2 ispB korA_1 korA_2 korA_3
-## H2_vs_HS_N_HO 1 2 1 0 1 1 1 0 1 1
-## H2_vs_HS_noN_HO 1 2 1 1 1 1 1 1 1 1
-## LS_vs_HS_N_HO 1 1 1 0 1 1 1 0 0 1
-## LS_vs_HS_noN_HO 0 0 0 1 0 0 0 1 0 1
-## SW_vs_HS_noN_HO 1 1 1 1 1 1 1 1 1 1
-##
-## korB_1 korB_2 korB_3 korC leuC mcrA mdh_1 moaA napG nirB nirS
-## H2_vs_HS_N_HO 0 1 1 0 0 1 0 1 1 0 1
-## H2_vs_HS_noN_HO 1 1 1 1 1 0 1 0 1 1 1
-## LS_vs_HS_N_HO 0 0 0 0 0 0 0 0 0 0 1
-## LS_vs_HS_noN_HO 1 0 1 1 1 0 1 0 0 1 0
-## SW_vs_HS_noN_HO 1 1 1 1 1 0 1 1 1 1 1
-##
-## norB norB1 norD2 nusA peptidase pfor_3 pfor_like pntA pntB
-## H2_vs_HS_N_HO 1 1 1 1 0 1 1 0 0
-## H2_vs_HS_noN_HO 1 1 1 1 1 1 1 1 1
-## LS_vs_HS_N_HO 0 0 0 1 0 0 1 0 0
-## LS_vs_HS_noN_HO 0 0 1 1 1 0 0 1 1
-## SW_vs_HS_noN_HO 1 1 1 1 1 1 1 1 1
-##
-## porin psrA ptsP_E1 pyrG recN rhlE rho rimP torF trmD
-## H2_vs_HS_N_HO 1 1 1 1 1 1 1 1 1 1
-## H2_vs_HS_noN_HO 1 1 1 1 1 1 1 1 1 1
-## LS_vs_HS_N_HO 1 0 0 0 0 1 0 0 0 0
-## LS_vs_HS_noN_HO 1 0 1 1 1 0 0 1 1 0
-## SW_vs_HS_noN_HO 1 1 1 1 1 1 1 1 1 1
-##
-## YqgE/AlgH
-## H2_vs_HS_N_HO 1
-## H2_vs_HS_noN_HO 1
-## LS_vs_HS_N_HO 0
-## LS_vs_HS_noN_HO 0
-## SW_vs_HS_noN_HO 1
-###### now doing this for oxygen with purple and black modules, which I named pox and box
-
-pb_ox <- rbind(pox,box)
-
-###changing "hypo2 to another name so it wont get filtered out
-pb_ox <- pb_ox %>%
- mutate(short.names = ifelse(short.names == "hypo2", "unknown_hyd1e", short.names))
-
-
-
-####oops this caught the hypo2 from the other module as well, so filtering that out
-
-pb_ox <- pb_ox %>%
- filter(!grepl("gene-L0Y14_RS03075", locusID))
-
-####getting rid of the hypotheticals
-
-
-###in case I want this list later
-hypos_fil <- pb_ox %>%
- filter(grepl("^hypo", short.names))
-
-
-###ok, again with the filter
-pb_ox_fil <- pb_ox %>%
- filter(!grepl("^hypo", short.names))
-
-###checking if genes are overlapping (meaning found in both modules....they aren't)
-#table(pb_ox_fil$moduleFil,pb_ox_fil$short.names)
-
-#print(unique(pb_ox_fil$short.names))
-
-desired_order3 <- c("nifJ","ppdK","malQ","GH16","acnB","fabA","unknown_hyd1e","hypE","isp2","hybO","hupE","cphA","SgpA_1","fusA2","mrcA","yidD","yidC","yebA","tsaC","rocR","rpmB","prsK","LT_domain","lolA_like","IscA","HIT","himD","gmhA","ftsE","dut","cbbQ","bamD","Prxs","Hr","fliK1","FBP")
-
-#list(desired_order3)
-
-####ok, some of these are unknowns that I left off of the fgure, so I am going to filter them out
-
-pb_ox_fil2 <- subset(pb_ox_fil, short.names %in% desired_order3)
-pb_ox_fil2$short.names <- factor(pb_ox_fil2$short.names, levels = desired_order3)
-
-pb_ox_fil2$clipped_logFC <- pmin(pmax(pb_ox_fil2$logFC, -4), 4)
-
-
-try2 <- ggplot(data=pb_ox_fil2, aes(x= logFC, y = fct_rev(short.names))) +
- geom_boxplot()+
- geom_point(aes(color = clipped_logFC), position = position_jitter(width = 0.1,height = 0.1)) +
- scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+
- labs(title = "Boxplot with Colored Dots", x = "Category", y = "Value") +
- scale_x_reverse()+
- theme_minimal()
-
-try2
-
-table(pb_ox_fil2$comparison)
-##
-## LO_vs_HO_H2_noN LO_vs_HO_HS_N
-## 19 30
-####ok, I have a problem, I can't get the dimensions to match for adobe illustrator, so adding dummy variables to save me time in illustrator
-
-# Define the number of dummy entries you need
-num_dummy <- 33
-
-# Creating the dummy data frame with unique dummy names
-dummy_data2 <- data.frame(
- locusID= rep("fakeid",num_dummy),
- logFC = rep(0, num_dummy), # Sets all logFC values to 0
- comparison=rep("fakecomparison",num_dummy),
- moduleFil = rep("fakemoduleFil",num_dummy),
- short.names = paste("Dummy", 1:num_dummy, sep=""),
- clipped_logFC = rep(4, num_dummy)# Creates "Dummy1", "Dummy2", ..., "Dummy33"
-)
-
-
-pb_ox_filDUMM <- rbind(pb_ox_fil2,dummy_data2)
-
-
-try3 <- ggplot(data=pb_ox_filDUMM, aes(x= logFC, y = fct_rev(short.names))) +
- geom_boxplot()+
- geom_point(aes(color = clipped_logFC), position = position_jitter(width = 0.1,height = 0.1)) +
- scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+
- labs(title = "Boxplot with Colored Dots", x = "Category", y = "Value") +
- scale_x_reverse()+
- theme_minimal()
-
-try3
-
-names(modNames)
## NULL
moduleColors = mergedColors
@@ -2741,7 +2744,7 @@ DE expression analysis of significant genes, classified as hubs, in
significant modules
de<-read.csv('entotal_202200815_use_this_corrected_locus_tags.csv',header=TRUE,stringsAsFactors=FALSE)
-sig<-de[de$adj.P.Val<=0.1,]
+sig<-de[de$adj.P.Val<=0.05,]
sig0<-sig[sig$logFC<=-1,]
sig1<-sig[sig$logFC>=1,]
sig<-rbind(sig0,sig1)
@@ -3025,7 +3028,7 @@ DE expression analysis of significant genes, classified as hubs, in
geom_vline(xintercept = c(1.5,2.5,3.5, 4.5, 5.5,6.5,7.5,8.5,9.5,10.5,11.5,12.5,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5), color = "grey") +
my.colors.sulfide_nolowSvsH2 +
my.theme
-
+
####now doing this for sulfide_teal, the otehr module that is significant for sulfide
names(sul.teal)
## [1] "locusID" "gene" "condition" "moduleFil"
@@ -3109,7 +3112,7 @@ DE expression analysis of significant genes, classified as hubs, in
geom_point(size=5,shape=18) +
my.colors.oxygen +
my.theme
-
+
###### oxygen in black
#names(oxy_b)
@@ -3160,7 +3163,7 @@ DE expression analysis of significant genes, classified as hubs, in
geom_point(size=5,shape=18) +
my.colors.oxygen +
my.theme
-
+
######putting all of this together, so that I have a workable file I can use in adobe illustrator
@@ -3210,9 +3213,9 @@ average logFCs
## locusID moduleFil n avg_logFC sd_logFC
## <chr> <chr> <int> <dbl> <dbl>
## 1 gene-L0Y14_RS00080 darkgoldenrod1 3 -4.58 0.732
-## 2 gene-L0Y14_RS01985 darkgoldenrod1 4 -5.22 1.43
+## 2 gene-L0Y14_RS01985 darkgoldenrod1 3 -5.89 0.580
## 3 gene-L0Y14_RS07695 darkgoldenrod1 4 5.48 1.11
-## 4 gene-L0Y14_RS10315 darkgoldenrod1 4 2.05 0.778
+## 4 gene-L0Y14_RS10315 darkgoldenrod1 3 2.38 0.487
## 5 gene-L0Y14_RS10410 darkgoldenrod1 3 -5.97 0.737
## 6 gene-L0Y14_RS11255 darkgoldenrod1 4 2.70 0.991
avg_logFC_df <- avg_logFC_df %>%
@@ -3224,9 +3227,9 @@ average logFCs
## locusID moduleFil n avg_logFC sd_logFC treatment_logFC_ave
## <chr> <chr> <int> <dbl> <dbl> <chr>
## 1 gene-L0Y14_RS00080 darkgoldenrod1 3 -4.58 0.732 logFC_sulfide_limi…
-## 2 gene-L0Y14_RS01985 darkgoldenrod1 4 -5.22 1.43 logFC_sulfide_limi…
+## 2 gene-L0Y14_RS01985 darkgoldenrod1 3 -5.89 0.580 logFC_sulfide_limi…
## 3 gene-L0Y14_RS07695 darkgoldenrod1 4 5.48 1.11 logFC_sulfide_limi…
-## 4 gene-L0Y14_RS10315 darkgoldenrod1 4 2.05 0.778 logFC_sulfide_limi…
+## 4 gene-L0Y14_RS10315 darkgoldenrod1 3 2.38 0.487 logFC_sulfide_limi…
## 5 gene-L0Y14_RS10410 darkgoldenrod1 3 -5.97 0.737 logFC_sulfide_limi…
## 6 gene-L0Y14_RS11255 darkgoldenrod1 4 2.70 0.991 logFC_sulfide_limi…
shg_DE_av2 <- merge(avg_logFC_df,all_together, by = "locusID")
@@ -3695,19 +3698,19 @@ average logFCs
table(ohp_DE3$short.names)
##
## bamD cphA cvpA dprE1 FBP fusA2 GH16 hupE hybO hypE hypo
-## 2 2 2 2 1 2 2 2 2 2 8
-## isp2 mrcA nifJ ppdK rocR SgpA_1 tsaC yfbK yidC yidD
-## 2 2 2 2 2 2 2 1 2 1
+## 2 2 1 2 1 2 1 2 2 2 8
+## isp2 mrcA nifJ ppdK rocR SgpA_1 tsaC yidC yidD
+## 2 2 2 2 2 2 2 2 1
ohp_DE3_av <- ohp_DE3[,c(1,49,55,59,57)]
head(ohp_DE3_av)
## locusID logFC comparison moduleFil short.names
## 1 gene-L0Y14_RS05880 -4.039158 LO_vs_HO_HS_N purple hypo
## 2 gene-L0Y14_RS05880 -3.657596 LO_vs_HO_H2_noN purple hypo
-## 3 gene-L0Y14_RS06635 -4.281582 LO_vs_HO_H2_noN purple dprE1
-## 4 gene-L0Y14_RS06635 -5.296420 LO_vs_HO_HS_N purple dprE1
-## 5 gene-L0Y14_RS06820 2.176861 LO_vs_HO_HS_N purple fusA2
-## 6 gene-L0Y14_RS06820 2.418749 LO_vs_HO_H2_noN purple fusA2
+## 3 gene-L0Y14_RS06635 -5.296420 LO_vs_HO_HS_N purple dprE1
+## 4 gene-L0Y14_RS06635 -4.281582 LO_vs_HO_H2_noN purple dprE1
+## 5 gene-L0Y14_RS06820 2.418749 LO_vs_HO_H2_noN purple fusA2
+## 6 gene-L0Y14_RS06820 2.176861 LO_vs_HO_HS_N purple fusA2
####dealing with the fact I have more than one genes that are called "hypo"
@@ -3730,10 +3733,10 @@ average logFCs
## locusID logFC comparison moduleFil short.names
## 1 gene-L0Y14_RS05880 -4.039158 LO_vs_HO_HS_N purple hypo1
## 2 gene-L0Y14_RS05880 -3.657596 LO_vs_HO_H2_noN purple hypo1
-## 3 gene-L0Y14_RS06635 -4.281582 LO_vs_HO_H2_noN purple dprE1
-## 4 gene-L0Y14_RS06635 -5.296420 LO_vs_HO_HS_N purple dprE1
-## 5 gene-L0Y14_RS06820 2.176861 LO_vs_HO_HS_N purple fusA2
-## 6 gene-L0Y14_RS06820 2.418749 LO_vs_HO_H2_noN purple fusA2
+## 3 gene-L0Y14_RS06635 -5.296420 LO_vs_HO_HS_N purple dprE1
+## 4 gene-L0Y14_RS06635 -4.281582 LO_vs_HO_H2_noN purple dprE1
+## 5 gene-L0Y14_RS06820 2.418749 LO_vs_HO_H2_noN purple fusA2
+## 6 gene-L0Y14_RS06820 2.176861 LO_vs_HO_HS_N purple fusA2
pox <- new_df
@@ -3801,11 +3804,11 @@ average logFCs
head(ohb2_DE3_av)
## locusID logFC comparison moduleFil3 short.names
## 1 gene-L0Y14_RS00450 -2.998400 LO_vs_HO_HS_N black_oxy Hr
-## 2 gene-L0Y14_RS00450 -2.057712 LO_vs_HO_H2_noN black_oxy Hr
-## 3 gene-L0Y14_RS01145 -2.629476 LO_vs_HO_HS_N black_oxy hypo
-## 4 gene-L0Y14_RS02180 -3.074314 LO_vs_HO_HS_N black_oxy ftsE
-## 5 gene-L0Y14_RS03075 -4.498757 LO_vs_HO_HS_N black_oxy hypo
-## 6 gene-L0Y14_RS03375 -3.606201 LO_vs_HO_HS_N black_oxy fabA
+## 2 gene-L0Y14_RS03075 -4.498757 LO_vs_HO_HS_N black_oxy hypo
+## 3 gene-L0Y14_RS03375 -3.606201 LO_vs_HO_HS_N black_oxy fabA
+## 4 gene-L0Y14_RS03820 -4.795972 LO_vs_HO_HS_N black_oxy HIT
+## 5 gene-L0Y14_RS04045 -3.506534 LO_vs_HO_HS_N black_oxy cbbQ
+## 6 gene-L0Y14_RS05880 -3.657596 LO_vs_HO_H2_noN black_oxy hypo
names(ohb2_DE3_av)[4] <- "moduleFil"
@@ -3856,12 +3859,12 @@ average logFCs
## # A tibble: 6 × 6
## locusID moduleFil n avg_logFC sd_logFC treatment_logFC_ave
## <chr> <chr> <int> <dbl> <dbl> <chr>
-## 1 gene-L0Y14_RS00450 black_oxy 2 -2.53 0.665 logFC_oxygen_limiting
-## 2 gene-L0Y14_RS01145 black_oxy 1 -2.63 NA logFC_oxygen_limiting
-## 3 gene-L0Y14_RS02180 black_oxy 1 -3.07 NA logFC_oxygen_limiting
-## 4 gene-L0Y14_RS03075 black_oxy 1 -4.50 NA logFC_oxygen_limiting
-## 5 gene-L0Y14_RS03375 black_oxy 1 -3.61 NA logFC_oxygen_limiting
-## 6 gene-L0Y14_RS03820 black_oxy 1 -4.80 NA logFC_oxygen_limiting
+## 1 gene-L0Y14_RS00450 black_oxy 1 -3.00 NA logFC_oxygen_limiting
+## 2 gene-L0Y14_RS03075 black_oxy 1 -4.50 NA logFC_oxygen_limiting
+## 3 gene-L0Y14_RS03375 black_oxy 1 -3.61 NA logFC_oxygen_limiting
+## 4 gene-L0Y14_RS03820 black_oxy 1 -4.80 NA logFC_oxygen_limiting
+## 5 gene-L0Y14_RS04045 black_oxy 1 -3.51 NA logFC_oxygen_limiting
+## 6 gene-L0Y14_RS05880 black_oxy 2 -3.85 0.270 logFC_oxygen_limiting
ohb_DE_av2 <- merge(avg_logFC_df,all_together, by = "locusID")
boxn <- ohb_DE_av2[,c(1,3)]
@@ -4079,7 +4082,7 @@ average logFCs
theme_minimal()
try
-
+
table(sulfideGT$comparison,sulfideGT$short.names)
##
## aclA aclB_full acnA clpB cmoB dnaJ DUF2058 DUF4126 DUF4340
@@ -4090,7 +4093,7 @@ average logFCs
## SW_vs_HS_noN_HO 1 1 1 1 1 1 1 1 1
##
## feoB ferritin_like_AB flxA flxB flxC flxD frdB_1 frdC_1 ftsH
-## H2_vs_HS_N_HO 1 1 0 0 1 0 2 1 1
+## H2_vs_HS_N_HO 1 1 0 0 0 0 2 1 1
## H2_vs_HS_noN_HO 0 1 1 1 1 1 2 1 1
## LS_vs_HS_N_HO 1 0 0 0 0 0 0 0 1
## LS_vs_HS_noN_HO 0 0 1 1 1 1 2 1 0
@@ -4111,14 +4114,14 @@ average logFCs
## SW_vs_HS_noN_HO 1 1 1 1 1 1 1 1 1 1
##
## korB_1 korB_2 korB_3 korC leuC mcrA mdh_1 moaA napG nirB nirS
-## H2_vs_HS_N_HO 0 1 1 0 0 1 0 1 1 1 1
+## H2_vs_HS_N_HO 0 1 1 0 0 1 0 1 1 0 1
## H2_vs_HS_noN_HO 1 1 1 1 1 0 1 0 1 1 1
## LS_vs_HS_N_HO 0 0 0 0 0 0 0 0 0 0 1
## LS_vs_HS_noN_HO 1 0 1 1 1 0 1 0 0 1 0
## SW_vs_HS_noN_HO 1 1 1 1 1 0 1 1 1 1 1
##
## norB norB1 norD2 nusA peptidase pfor_3 pfor_like pntA pntB
-## H2_vs_HS_N_HO 1 1 1 1 1 1 1 0 0
+## H2_vs_HS_N_HO 1 1 1 1 0 1 1 0 0
## H2_vs_HS_noN_HO 1 1 1 1 1 1 1 1 1
## LS_vs_HS_N_HO 0 0 0 1 0 0 1 0 0
## LS_vs_HS_noN_HO 0 0 1 1 1 0 0 1 1
@@ -4190,11 +4193,11 @@ average logFCs
theme_minimal()
try2
-
+
table(pb_ox_fil2$comparison)
##
## LO_vs_HO_H2_noN LO_vs_HO_HS_N
-## 20 35
+## 19 30
####ok, I have a problem, I can't get the dimensions to match for adobe illustrator, so adding dummy variables to save me time in illustrator
# Define the number of dummy entries you need
@@ -4223,8 +4226,7 @@ average logFCs
theme_minimal()
try3
-
-#save.image("allRfrom20240306.RData")
+