From e045f2b62c6331068441a5513893bee42548808c Mon Sep 17 00:00:00 2001 From: afreedman Date: Fri, 29 Mar 2024 08:36:13 -0400 Subject: [PATCH] replaced WGCNA html to reflect correct pval --- WGCNA_ANALYSIS.html | 4280 ---------------------------- analysis/WGCNA/WGCNA_ANALYSIS.html | 78 +- 2 files changed, 40 insertions(+), 4318 deletions(-) delete mode 100644 WGCNA_ANALYSIS.html diff --git a/WGCNA_ANALYSIS.html b/WGCNA_ANALYSIS.html deleted file mode 100644 index 12dae0e..0000000 --- a/WGCNA_ANALYSIS.html +++ /dev/null @@ -1,4280 +0,0 @@ - - - - - - - - - - - - - - - -WGCNA_ANALYSIS - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - -
-

librarys

-

/ /

-
suppressMessages(library(ggplot2))
-suppressMessages(library(ape))
-suppressMessages(library(limma))
-suppressMessages(library(edgeR))
-suppressMessages(library(statmod))
-suppressMessages(library(stats))
-suppressMessages(library(dplyr))
-suppressMessages(library(stringr))
-suppressMessages(library(tibble))
-suppressMessages(library(ggforce))
-suppressMessages(library(WGCNA))
-suppressMessages(library(km2gcn))
-suppressMessages(library(CoExpNets))
-suppressMessages(library(devtools))
-suppressMessages(library(UpSetR))
-suppressMessages(library(RCy3))
-suppressMessages(library(readr))
-suppressMessages(library(scales))
-suppressMessages(library(data.table))
-suppressMessages(library(forcats))
-suppressMessages(library(kableExtra))
-
-
-

Setting up dataframes

-
setwd("/Users/jessicamitchell/Library/CloudStorage/OneDrive-HarvardUniversity/OD_documents/Cfixpaper")
-
-
-
-
-rsem_gene_data <- read.table("epersephone_rsem_gene_count_proteinCODINGonly_2022.08.14.matrix")
-
-
-s2c<-read.csv("rsem_gene_info.csv",header = TRUE, stringsAsFactors=FALSE)
-s2c$sample
-
##  [1] "sample103"  "sample106"  "sample112"  "sample115"  "sample119" 
-##  [6] "sample121"  "sample127"  "sample1294" "sample1300" "sample1310"
-## [11] "sample1324" "sample1330" "sample1346" "sample1347" "sample1353"
-## [16] "sample1369" "sample136"  "sample16"   "sample1750" "sample1871"
-## [21] "sample1876" "sample28"   "sample36"   "sample54"   "sample56"  
-## [26] "sample58"   "sample69"   "sample72"   "sample78"   "sample94"
-
rnaseqMatrix=round(rsem_gene_data)
-
-filter=rowSums(cpm(rnaseqMatrix)>=1)>=3
-summary(filter)
-
##    Mode   FALSE    TRUE 
-## logical      28    3140
-
cpm_filtered_matrix=rnaseqMatrix[filter,]
-#cpm_filtered_matrix
-DGE<-DGEList(cpm_filtered_matrix)
-DGE<-calcNormFactors(DGE,method =c("TMM"))
-#DGE
-
-
-treatvals<-s2c$condition
-
-design_condition=model.matrix(~condition, data=s2c)
-
-RNAseq_voom <- voomWithQualityWeights(DGE, design=design_condition,normalize.method="none")$E
-
-
-###this filters out the genes that have less variation in expression
-WGCNA_matrix = t(RNAseq_voom[order(apply(RNAseq_voom,1,mad), decreasing = T)[1:2500],])
-WGCNA_matrix <- as.data.frame(WGCNA_matrix)
-
-row.names(WGCNA_matrix) = s2c$sample
-
-names(s2c)
-
##  [1] "sample"      "condition"   "reps"        "size"        "size_cat"   
-##  [6] "troph_color" "X"           "X.1"         "X.2"         "X.3"        
-## [11] "X.4"
-
###remove the blank columns
-s2c <- s2c[,1:6]
-sample_rownames <- rownames(WGCNA_matrix)
-
-s2c2 <- s2c[, -1]
-rownames(s2c2) <- sample_rownames
-
-#removing rep and size categories
-s2c3 <- s2c2[,c(1,4:5)]
-names(s2c3)
-
## [1] "condition"   "size_cat"    "troph_color"
-
####making this binary data for downstream analyses
-
-s2c4 <- s2c3 %>%
-  mutate(Sulfide = case_when(
-    startsWith(condition, "HS") ~ "1",
-    startsWith(condition, "LS") ~ "0",
-    startsWith(condition, "SW") ~ "0",
-    startsWith(condition, "H2")~ "0"
-    ))
-
-s2c5 <- s2c4 %>%
-  mutate(Oxygen = case_when(
-    endsWith(condition, "HO") ~ "1",
-    endsWith(condition, "LO") ~ "0"
-    ))
-
-s2c6 <- s2c5 %>%
-  mutate(Nitrogen = case_when(
-    grepl("noN",condition) ~ "0",
-    grepl("_N",condition) ~ "1")
-  ) 
-
-
-s2c7 <- s2c6 %>%
-  mutate(Hydrogen = case_when(
-    startsWith(condition, "HS") ~ "0",
-    startsWith(condition, "LS") ~ "0",
-    startsWith(condition, "SW") ~ "0",
-    startsWith(condition, "H2")~ "1"
-    ))
-
-s2c20 <- s2c7 %>%
-  mutate(Low_to_No_sulfide = case_when(
-    startsWith(condition, "HS") ~ "0",
-    startsWith(condition, "LS") ~ "1",
-    startsWith(condition, "SW") ~ "1",
-    startsWith(condition, "H2")~ "0"
-    ))
-
-
-s2c8 <- s2c20[,4:8]
-
-s2c8$Sulfide <- as.numeric(unlist(s2c8$Sulfide))
-s2c8$Oxygen <- as.numeric(unlist(s2c8$Oxygen))
-s2c8$Nitrogen <- as.numeric(unlist(s2c8$Nitrogen))
-s2c8$Hydrogen <- as.numeric(unlist(s2c8$Hydrogen))
-s2c8$Low_to_No_sulfide <- as.numeric(unlist(s2c8$Low_to_No_sulfide))
-str(s2c8)
-
## 'data.frame':    30 obs. of  5 variables:
-##  $ Sulfide          : num  0 0 1 1 1 1 1 1 1 1 ...
-##  $ Oxygen           : num  1 1 0 0 0 1 1 1 1 1 ...
-##  $ Nitrogen         : num  1 1 1 1 1 1 1 0 0 0 ...
-##  $ Hydrogen         : num  1 1 0 0 0 0 0 0 0 0 ...
-##  $ Low_to_No_sulfide: num  0 0 0 0 0 0 0 0 0 0 ...
-
s2c9 <- s2c3 %>%
-  mutate(H2_N_HO = case_when(
-    condition== "H2_N_HO" ~ "1",
-    TRUE ~ "0"))
-s210 <-   s2c9  %>%
-  mutate(HS_N_LO = case_when(
-    condition== "HS_N_LO" ~ "1",
-    TRUE ~ "0"))
-
-s211 <-   s210  %>%
-  mutate(HS_N_HO = case_when(
-    condition== "HS_N_HO" ~ "1",
-    TRUE ~ "0")) 
-s212 <-   s211  %>%
-  mutate(HS_noN_HO = case_when(
-    condition== "HS_noN_HO" ~ "1",
-    TRUE ~ "0")) 
-s213 <-   s212  %>%
-  mutate(LS_noN_HO = case_when(
-    condition== "LS_noN_HO" ~ "1",
-    TRUE ~ "0")) 
-s214 <-   s213  %>%
-  mutate(SW_noN_HO = case_when(
-    condition== "SW_noN_HO" ~ "1",
-    TRUE ~ "0")) 
-s215 <-   s214  %>%
-  mutate(LS_N_HO = case_when(
-    condition== "LS_N_HO" ~ "1",
-    TRUE ~ "0"))                 
- s216 <-   s215  %>%
-  mutate(H2_noN_HO = case_when(
-    condition== "H2_noN_HO" ~ "1",
-    TRUE ~ "0"))                
- s217 <-   s216  %>%
-  mutate(H2_N_LO = case_when(
-    condition== "H2_N_LO" ~ "1",
-    TRUE ~ "0"))               
-s218 <-   s217  %>%
-  mutate(H2_noN_LO = case_when(
-    condition== "H2_noN_LO" ~ "1",
-    TRUE ~ "0"))               
-               
-names(s218) 
-
##  [1] "condition"   "size_cat"    "troph_color" "H2_N_HO"     "HS_N_LO"    
-##  [6] "HS_N_HO"     "HS_noN_HO"   "LS_noN_HO"   "SW_noN_HO"   "LS_N_HO"    
-## [11] "H2_noN_HO"   "H2_N_LO"     "H2_noN_LO"
-
s218$H2_N_HO <- as.numeric(unlist(s218$H2_N_HO))
-s218$HS_N_LO <- as.numeric(unlist(s218$HS_N_LO))
-s218$HS_N_HO <- as.numeric(unlist(s218$HS_N_HO))
-s218$HS_noN_HO <- as.numeric(unlist(s218$HS_noN_HO))
-s218$LS_noN_HO <- as.numeric(unlist(s218$LS_noN_HO))
-s218$SW_noN_HO <- as.numeric(unlist(s218$SW_noN_HO))
-s218$LS_N_HO <- as.numeric(unlist(s218$LS_N_HO))
-s218$H2_noN_HO <- as.numeric(unlist(s218$H2_noN_HO))
-s218$H2_N_LO <- as.numeric(unlist(s218$H2_N_LO))
-s218$H2_noN_LO <- as.numeric(unlist(s218$H2_noN_LO))
-
-
-
-s218 <- s218[,4:13]                
-biconditions <- cbind(s218,s2c8)                 
-
-save(WGCNA_matrix, biconditions, file = "count_matrix_condition_info_dataInput.RData")
-

/

-

/

-
-
-

softpower choice

-
powers = c(c(1:10), seq(from = 12, to=20, by=2))
-
-# Call the network topology analysis function
-sft2 = pickSoftThreshold(WGCNA_matrix, powerVector = powers, networkType= "signed hybrid", verbose = 2)
-
## pickSoftThreshold: will use block size 2500.
-##  pickSoftThreshold: calculating connectivity for given powers...
-##    ..working on genes 1 through 2500 of 2500
-
## Warning: executing %dopar% sequentially: no parallel backend registered
-
##    Power SFT.R.sq  slope truncated.R.sq mean.k. median.k. max.k.
-## 1      1   0.0489 -0.388          0.858 331.000  308.0000 639.00
-## 2      2   0.4660 -1.170          0.913 124.000  112.0000 343.00
-## 3      3   0.6690 -1.540          0.944  57.600   48.3000 211.00
-## 4      4   0.7060 -1.770          0.933  30.400   23.5000 140.00
-## 5      5   0.7310 -1.910          0.942  17.600   12.5000  96.90
-## 6      6   0.7640 -1.900          0.956  10.900    7.0200  69.60
-## 7      7   0.7900 -1.900          0.968   7.060    4.0600  51.30
-## 8      8   0.8070 -1.890          0.970   4.800    2.4400  38.60
-## 9      9   0.8150 -1.850          0.974   3.370    1.5300  29.50
-## 10    10   0.8220 -1.830          0.972   2.440    0.9920  22.90
-## 11    12   0.8720 -1.640          0.985   1.380    0.4430  14.30
-## 12    14   0.8920 -1.490          0.975   0.839    0.2160   9.26
-## 13    16   0.9150 -1.430          0.968   0.544    0.1110   6.68
-## 14    18   0.9660 -1.380          0.991   0.371    0.0579   5.22
-## 15    20   0.9190 -1.490          0.949   0.264    0.0317   4.66
-
# Plot the results:
-
-par(mar = c(1,2,1,2));
-cex1 = 0.8;
-# Scale-free topology fit index as a function of the soft-thresholding power
-plot(sft2$fitIndices[,1], -sign(sft2$fitIndices[,3])*sft2$fitIndices[,2],
-     xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
-     main = paste("Scale independence"));
-text(sft2$fitIndices[,1], -sign(sft2$fitIndices[,3])*sft2$fitIndices[,2],
-     labels=powers,cex=cex1,col="red");
-# this line corresponds to using an R^2 cut-off of h
-abline(h=0.8,col="red")
-

-
# Mean connectivity as a function of the soft-thresholding power
-par(mar = c(1,2,1,2))
-plot(sft2$fitIndices[,1], sft2$fitIndices[,5],
-     xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
-     main = paste("Mean connectivity"))
-
-text(sft2$fitIndices[,1], sft2$fitIndices[,5], labels=powers, cex=cex1,col="red")
-

-
sft2$fitIndices
-
##    Power   SFT.R.sq      slope truncated.R.sq     mean.k.    median.k.
-## 1      1 0.04890048 -0.3884992      0.8581139 331.4011205 308.13070231
-## 2      2 0.46592537 -1.1680241      0.9132379 124.4904470 112.01173850
-## 3      3 0.66927892 -1.5390034      0.9440341  57.6399451  48.32562217
-## 4      4 0.70560384 -1.7734087      0.9325776  30.4063264  23.49079218
-## 5      5 0.73115622 -1.9082518      0.9420155  17.5623528  12.50091882
-## 6      6 0.76403365 -1.9033807      0.9561525  10.8503969   7.02207374
-## 7      7 0.79011799 -1.8998930      0.9678497   7.0639452   4.05989014
-## 8      8 0.80724037 -1.8863799      0.9695552   4.7967210   2.44030738
-## 9      9 0.81539375 -1.8468855      0.9740395   3.3725516   1.52789332
-## 10    10 0.82151935 -1.8250238      0.9724534   2.4419234   0.99168615
-## 11    12 0.87162088 -1.6441349      0.9852418   1.3763842   0.44250659
-## 12    14 0.89171300 -1.4932610      0.9749399   0.8387154   0.21564804
-## 13    16 0.91539066 -1.4287488      0.9678064   0.5438888   0.11080318
-## 14    18 0.96562166 -1.3772418      0.9913790   0.3711772   0.05787538
-## 15    20 0.91949827 -1.4881952      0.9494624   0.2643675   0.03166435
-##        max.k.
-## 1  638.846689
-## 2  342.917019
-## 3  210.869059
-## 4  139.536080
-## 5   96.870153
-## 6   69.586561
-## 7   51.295838
-## 8   38.594403
-## 9   29.529360
-## 10  22.915384
-## 11  14.292054
-## 12   9.260525
-## 13   6.684230
-## 14   5.219041
-## 15   4.660383
-
####choosing softpower to be 8
-softPower = 8
-
-
-

checking on scale free topology

-
ADJ1=abs(cor(WGCNA_matrix,use="p"))^8
-k=as.vector(apply(ADJ1,2,sum, na.rm=T))
-sizeGrWindow(10,5)
-par(mfrow=c(1,2))
-hist(k)
-scaleFreePlot(k, main="Check scale free topology\n")
-

-
##   scaleFreeRsquared slope
-## 1              0.85 -1.93
-

/

-

/

-
-
-

build network (manual version)

-
set.seed(123)
-softPower = 8;
-adjacency = adjacency(WGCNA_matrix, power = softPower, type="signed hybrid")
-
-dissTOM = TOMdist(adjacency, TOMType = "signed")
-
## ..connectivity..
-## ..matrix multiplication (system BLAS)..
-## ..normalization..
-## ..done.
-
# Call the hierarchical clustering function
-geneTree = hclust(as.dist(dissTOM), method = "average");
-# Plot the resulting clustering tree (dendrogram)
-
-par(mar = c(3,3,3,3))
-
-
-plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity",
-     labels = FALSE, hang = 0.04);
-

-
minModuleSize = 30;
-# Module identification using dynamic tree cut:
-dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM,
-                deepSplit = 2, pamRespectsDendro = FALSE,
-                minClusterSize = minModuleSize)
-
##  ..cutHeight not given, setting it to 0.998  ===>  99% of the (truncated) height range in dendro.
-##  ..done.
-
table(dynamicMods)
-
## dynamicMods
-##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19 
-## 242 233 214 160 155 143 131 124 124 120 109 107 106 100  99  83  79  72  53  46
-
dynamicColors = labels2colors(dynamicMods)
-table(dynamicColors)
-
## dynamicColors
-##        black         blue        brown         cyan        green  greenyellow 
-##          124          214          160           99          143          107 
-##         grey       grey60    lightcyan   lightgreen  lightyellow      magenta 
-##          242           72           79           53           46          120 
-## midnightblue         pink       purple          red       salmon          tan 
-##           83          124          109          131          100          106 
-##    turquoise       yellow 
-##          233          155
-
####changing names of colors
-
-colorOrder = c("grey", standardColors(50))
-list(colorOrder)
-
## [[1]]
-##  [1] "grey"            "turquoise"       "blue"            "brown"          
-##  [5] "yellow"          "green"           "red"             "black"          
-##  [9] "pink"            "magenta"         "purple"          "greenyellow"    
-## [13] "tan"             "salmon"          "cyan"            "midnightblue"   
-## [17] "lightcyan"       "grey60"          "lightgreen"      "lightyellow"    
-## [21] "royalblue"       "darkred"         "darkgreen"       "darkturquoise"  
-## [25] "darkgrey"        "orange"          "darkorange"      "white"          
-## [29] "skyblue"         "saddlebrown"     "steelblue"       "paleturquoise"  
-## [33] "violet"          "darkolivegreen"  "darkmagenta"     "sienna3"        
-## [37] "yellowgreen"     "skyblue3"        "plum1"           "orangered4"     
-## [41] "mediumpurple3"   "lightsteelblue1" "lightcyan1"      "ivory"          
-## [45] "floralwhite"     "darkorange2"     "brown4"          "bisque4"        
-## [49] "darkslateblue"   "plum2"           "thistle2"
-
moduleLabels = match(dynamicColors, colorOrder)-1
-head(moduleLabels)
-
## [1] 10 13 15 17 17 17
-
table(moduleLabels)
-
## moduleLabels
-##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19 
-## 242 233 214 160 155 143 131 124 124 120 109 107 106 100  99  83  79  72  53  46
-
table(dynamicColors) 
-
## dynamicColors
-##        black         blue        brown         cyan        green  greenyellow 
-##          124          214          160           99          143          107 
-##         grey       grey60    lightcyan   lightgreen  lightyellow      magenta 
-##          242           72           79           53           46          120 
-## midnightblue         pink       purple          red       salmon          tan 
-##           83          124          109          131          100          106 
-##    turquoise       yellow 
-##          233          155
-
customColorOrder = c("grey", "brown", "deeppink3", "darkcyan","plum","darkgreen","red","black","pink","skyblue","plum4","greenyellow","tan","darkorchid4","darkgoldenrod1","midnightblue","lightcyan","grey60","lightgreen","yellow")
-
-
-
-
-
-
-moduleColors.custom = customColorOrder[moduleLabels + 1]
-table(dynamicColors, moduleColors.custom)##sanity check
-
##               moduleColors.custom
-## dynamicColors  black brown darkcyan darkgoldenrod1 darkgreen darkorchid4
-##   black          124     0        0              0         0           0
-##   blue             0     0        0              0         0           0
-##   brown            0     0      160              0         0           0
-##   cyan             0     0        0             99         0           0
-##   green            0     0        0              0       143           0
-##   greenyellow      0     0        0              0         0           0
-##   grey             0     0        0              0         0           0
-##   grey60           0     0        0              0         0           0
-##   lightcyan        0     0        0              0         0           0
-##   lightgreen       0     0        0              0         0           0
-##   lightyellow      0     0        0              0         0           0
-##   magenta          0     0        0              0         0           0
-##   midnightblue     0     0        0              0         0           0
-##   pink             0     0        0              0         0           0
-##   purple           0     0        0              0         0           0
-##   red              0     0        0              0         0           0
-##   salmon           0     0        0              0         0         100
-##   tan              0     0        0              0         0           0
-##   turquoise        0   233        0              0         0           0
-##   yellow           0     0        0              0         0           0
-##               moduleColors.custom
-## dynamicColors  deeppink3 greenyellow grey grey60 lightcyan lightgreen
-##   black                0           0    0      0         0          0
-##   blue               214           0    0      0         0          0
-##   brown                0           0    0      0         0          0
-##   cyan                 0           0    0      0         0          0
-##   green                0           0    0      0         0          0
-##   greenyellow          0         107    0      0         0          0
-##   grey                 0           0  242      0         0          0
-##   grey60               0           0    0     72         0          0
-##   lightcyan            0           0    0      0        79          0
-##   lightgreen           0           0    0      0         0         53
-##   lightyellow          0           0    0      0         0          0
-##   magenta              0           0    0      0         0          0
-##   midnightblue         0           0    0      0         0          0
-##   pink                 0           0    0      0         0          0
-##   purple               0           0    0      0         0          0
-##   red                  0           0    0      0         0          0
-##   salmon               0           0    0      0         0          0
-##   tan                  0           0    0      0         0          0
-##   turquoise            0           0    0      0         0          0
-##   yellow               0           0    0      0         0          0
-##               moduleColors.custom
-## dynamicColors  midnightblue pink plum plum4 red skyblue tan yellow
-##   black                   0    0    0     0   0       0   0      0
-##   blue                    0    0    0     0   0       0   0      0
-##   brown                   0    0    0     0   0       0   0      0
-##   cyan                    0    0    0     0   0       0   0      0
-##   green                   0    0    0     0   0       0   0      0
-##   greenyellow             0    0    0     0   0       0   0      0
-##   grey                    0    0    0     0   0       0   0      0
-##   grey60                  0    0    0     0   0       0   0      0
-##   lightcyan               0    0    0     0   0       0   0      0
-##   lightgreen              0    0    0     0   0       0   0      0
-##   lightyellow             0    0    0     0   0       0   0     46
-##   magenta                 0    0    0     0   0     120   0      0
-##   midnightblue           83    0    0     0   0       0   0      0
-##   pink                    0  124    0     0   0       0   0      0
-##   purple                  0    0    0   109   0       0   0      0
-##   red                     0    0    0     0 131       0   0      0
-##   salmon                  0    0    0     0   0       0   0      0
-##   tan                     0    0    0     0   0       0 106      0
-##   turquoise               0    0    0     0   0       0   0      0
-##   yellow                  0    0  155     0   0       0   0      0
-
str(dynamicColors)
-
##  chr [1:2500] "purple" "salmon" "midnightblue" "grey60" "grey60" "grey60" ...
-
str(moduleColors.custom)
-
##  chr [1:2500] "plum4" "darkorchid4" "midnightblue" "grey60" "grey60" ...
-
plotDendroAndColors(geneTree, moduleColors.custom, "Dynamic Tree Cut",
-                    dendroLabels = FALSE, hang = 0.03,
-                    addGuide = TRUE, guideHang = 0.05,
-                    main = "Gene dendrogram and module colors")
-

-
# Calculate eigengenes
-MEList1 = moduleEigengenes(WGCNA_matrix, colors = moduleColors.custom)
-MEs1 = MEList1$eigengenes
- #Calculate dissimilarity of module eigengenes
-MEDiss1 = 1-cor(MEs1);
-# Cluster module eigengenes
-METree1 = hclust(as.dist(MEDiss1), method = "average")
-
# Plot the result
-
-
-par(mar = c(3,3,3,3))
-plot(METree1, main = "Clustering of module eigengenes",
-     xlab = "", sub = "")
-MEDissThres = 0.3
-# Plot the cut line into the dendrogram
-abline(h=MEDissThres, col = "red")
-

-
# Call an automatic merging function
-merge = mergeCloseModules(WGCNA_matrix, moduleColors.custom, cutHeight = MEDissThres, verbose = 3)
-
##  mergeCloseModules: Merging modules whose distance is less than 0.3
-##    multiSetMEs: Calculating module MEs.
-##      Working on set 1 ...
-##      moduleEigengenes: Calculating 20 module eigengenes in given set.
-##    multiSetMEs: Calculating module MEs.
-##      Working on set 1 ...
-##      moduleEigengenes: Calculating 16 module eigengenes in given set.
-##    multiSetMEs: Calculating module MEs.
-##      Working on set 1 ...
-##      moduleEigengenes: Calculating 15 module eigengenes in given set.
-##    Calculating new MEs...
-##    multiSetMEs: Calculating module MEs.
-##      Working on set 1 ...
-##      moduleEigengenes: Calculating 15 module eigengenes in given set.
-
#Eigengenes of the new merged modules with new colors
-mergedMEs = merge$newMEs;
-mergedColors = merge$colors
-table(moduleColors.custom)
-
## moduleColors.custom
-##          black          brown       darkcyan darkgoldenrod1      darkgreen 
-##            124            233            160             99            143 
-##    darkorchid4      deeppink3    greenyellow           grey         grey60 
-##            100            214            107            242             72 
-##      lightcyan     lightgreen   midnightblue           pink           plum 
-##             79             53             83            124            155 
-##          plum4            red        skyblue            tan         yellow 
-##            109            131            120            106             46
-
table(mergedColors)
-
## mergedColors
-##          black          brown       darkcyan darkgoldenrod1      darkgreen 
-##            124            233            239             99            143 
-##    darkorchid4      deeppink3    greenyellow           grey         grey60 
-##            100            214            160            242            203 
-##   midnightblue           pink          plum4        skyblue         yellow 
-##             83            385            109            120             46
-
plotDendroAndColors(geneTree, cbind(moduleColors.custom, mergedColors),
-                    c("Dynamic Tree Cut", "Merged dynamic"),
-                    dendroLabels = FALSE, hang = 0.03,
-                    addGuide = TRUE, guideHang = 0.05)
-

-
nGenes = ncol(WGCNA_matrix)
-nSamples = nrow(WGCNA_matrix)
-modNames = substring(names(mergedMEs), 3)
-
-MEs0 = moduleEigengenes(WGCNA_matrix, mergedColors)$eigengenes
-MEs = orderMEs(MEs0)
-moduleTraitCor = cor(MEs, biconditions, use = "p")
-moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
-
-save(MEs, mergedColors, geneTree,moduleTraitCor,moduleTraitPvalue,
-file = "wgcna_manual_network20220910.RData")
-
lnames = load(file = "wgcna_manual_network20220910.RData")
-lnames = load(file= "count_matrix_condition_info_dataInput.RData")
-
-
-
-
-
-
-# Convert traits to a color representation: white means low, red means high, grey means missing entry
-
-
-par(mar = c(1,1,2,2))
-par(mfrow=c(1,1))
-sampleTree = hclust(dist(WGCNA_matrix), method = "average")
-
-names(biconditions)
-
##  [1] "H2_N_HO"           "HS_N_LO"           "HS_N_HO"          
-##  [4] "HS_noN_HO"         "LS_noN_HO"         "SW_noN_HO"        
-##  [7] "LS_N_HO"           "H2_noN_HO"         "H2_N_LO"          
-## [10] "H2_noN_LO"         "Sulfide"           "Oxygen"           
-## [13] "Nitrogen"          "Hydrogen"          "Low_to_No_sulfide"
-
biconditions <- biconditions[,1:14]
-
-desired_order <- c("HS_N_HO" , "HS_noN_HO", "HS_N_LO","LS_N_HO","LS_noN_HO","SW_noN_HO","H2_N_HO","H2_noN_HO","H2_N_LO","H2_noN_LO","Sulfide","Oxygen","Nitrogen","Hydrogen") 
-
-biconditions <- biconditions[,match(desired_order,colnames(biconditions))]
-
-
-
-traitColors = numbers2colors(biconditions, signed = TRUE);
-# Plot the sample dendrogram and the colors underneath.
-plotDendroAndColors(sampleTree, traitColors,
-                    groupLabels = names(biconditions),
-                    main = "Sample dendrogram and trait heatmap")
-

-
nGenes = ncol(WGCNA_matrix)
-nSamples = nrow(WGCNA_matrix)
-
-
-
-
-moduleColors = mergedColors
-MEs0 = moduleEigengenes(WGCNA_matrix, moduleColors)$eigengenes
-MEs = orderMEs(MEs0)
-
-##reordering MEs to match figure 
-mycolororder <- c("MEmidnightblue","MEdarkorchid4","MEyellow","MEgreenyellow", "MEblack", "MEdarkgreen","MEdeeppink3","MEpink","MEgrey60","MEbrown","MEplum4","MEskyblue","MEdarkcyan","MEdarkgoldenrod1","MEgrey")
-MEs <- MEs[,match(mycolororder,colnames(MEs))]
-
-biconditions <- biconditions[,match(desired_order,colnames(biconditions))]
-
-
-moduleTraitCor = cor(MEs, biconditions, use = "p")
-
-moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
-
-summary(moduleTraitPvalue)
-
##     HS_N_HO          HS_noN_HO          HS_N_LO            LS_N_HO      
-##  Min.   :0.01196   Min.   :0.00000   Min.   :0.003134   Min.   :0.2678  
-##  1st Qu.:0.03387   1st Qu.:0.03521   1st Qu.:0.137401   1st Qu.:0.4198  
-##  Median :0.14691   Median :0.27026   Median :0.344828   Median :0.5388  
-##  Mean   :0.26185   Mean   :0.32608   Mean   :0.396634   Mean   :0.5700  
-##  3rd Qu.:0.32153   3rd Qu.:0.57388   3rd Qu.:0.671187   3rd Qu.:0.6861  
-##  Max.   :0.91450   Max.   :0.93636   Max.   :0.970864   Max.   :0.9718  
-##    LS_noN_HO           SW_noN_HO            H2_N_HO          H2_noN_HO      
-##  Min.   :0.0000558   Min.   :0.0008013   Min.   :0.01812   Min.   :0.03995  
-##  1st Qu.:0.1967968   1st Qu.:0.2808627   1st Qu.:0.29340   1st Qu.:0.13709  
-##  Median :0.4090254   Median :0.3534417   Median :0.51191   Median :0.21959  
-##  Mean   :0.4217359   Mean   :0.4078278   Mean   :0.49212   Mean   :0.33175  
-##  3rd Qu.:0.6354574   3rd Qu.:0.6356339   3rd Qu.:0.69145   3rd Qu.:0.50608  
-##  Max.   :0.8682022   Max.   :0.7872686   Max.   :0.98003   Max.   :0.78614  
-##     H2_N_LO           H2_noN_LO          Sulfide             Oxygen         
-##  Min.   :0.001603   Min.   :0.05381   Min.   :0.001234   Min.   :0.0000002  
-##  1st Qu.:0.060706   1st Qu.:0.10483   1st Qu.:0.067933   1st Qu.:0.0007034  
-##  Median :0.145126   Median :0.18447   Median :0.174750   Median :0.0103513  
-##  Mean   :0.239831   Mean   :0.27926   Mean   :0.366110   Mean   :0.0822130  
-##  3rd Qu.:0.435491   3rd Qu.:0.36867   3rd Qu.:0.667126   3rd Qu.:0.0620483  
-##  Max.   :0.716300   Max.   :0.77636   Max.   :0.970160   Max.   :0.5030011  
-##     Nitrogen           Hydrogen       
-##  Min.   :0.002802   Min.   :0.000174  
-##  1st Qu.:0.143535   1st Qu.:0.034898  
-##  Median :0.314161   Median :0.068381  
-##  Mean   :0.334997   Mean   :0.203924  
-##  3rd Qu.:0.555353   3rd Qu.:0.219406  
-##  Max.   :0.841863   Max.   :0.900770
-
textMatrix =  paste(signif(moduleTraitCor, 2), "\n(",
-                           signif(moduleTraitPvalue, 1), ")", sep = "")
-
-
-
-
-
-
-
-
-dim(textMatrix) = dim(moduleTraitCor)
-
par(mar = c(3,4, 1,1))
-
-names(biconditions)
-
##  [1] "HS_N_HO"   "HS_noN_HO" "HS_N_LO"   "LS_N_HO"   "LS_noN_HO" "SW_noN_HO"
-##  [7] "H2_N_HO"   "H2_noN_HO" "H2_N_LO"   "H2_noN_LO" "Sulfide"   "Oxygen"   
-## [13] "Nitrogen"  "Hydrogen"
-
head(moduleTraitCor)
-
##                   HS_N_HO  HS_noN_HO    HS_N_LO      LS_N_HO  LS_noN_HO
-## MEmidnightblue 0.02046914 0.77549274 -0.1012675 -0.006751255 0.24346567
-## MEdarkorchid4  0.39363790 0.20790779 -0.3311569  0.208972393 0.24138458
-## MEyellow       0.04160808 0.06616182 -0.5017755  0.093907529 0.06104898
-## MEgreenyellow  0.22355044 0.08593010 -0.1231448  0.134486463 0.16362566
-## MEblack        0.16895507 0.30463652 -0.5213307  0.137603215 0.15939483
-## MEdarkgreen    0.29051730 0.14258634 -0.4439989  0.165647455 0.14956928
-##                  SW_noN_HO      H2_N_HO   H2_noN_HO    H2_N_LO  H2_noN_LO
-## MEmidnightblue  0.10228132 -0.326359663 -0.07370678 -0.3379315 -0.2956921
-## MEdarkorchid4   0.06986956 -0.214684994  0.29526963 -0.5156230 -0.3555769
-## MEyellow        0.18334438  0.064921651  0.15299999  0.1231695 -0.2853864
-## MEgreenyellow   0.07837730 -0.428591372  0.23740423 -0.2124148 -0.1592232
-## MEblack         0.17555561 -0.124569788  0.24385690 -0.3445732 -0.1995284
-## MEdarkgreen    -0.05142174 -0.004771732  0.23089975 -0.2973662 -0.1816616
-##                     Sulfide    Oxygen   Nitrogen    Hydrogen
-## MEmidnightblue  0.454784198 0.4810992 -0.4511045 -0.63300331
-## MEdarkorchid4   0.177011014 0.7871273 -0.2753128 -0.48415103
-## MEyellow       -0.257937228 0.4346851 -0.1069013  0.03411204
-## MEgreenyellow   0.121985353 0.3239114 -0.2436685 -0.34465859
-## MEblack        -0.031252607 0.6974892 -0.4103493 -0.26014469
-## MEdarkgreen    -0.007132597 0.6042628 -0.1739832 -0.15486885
-
# Display the correlation values within a heatmap plot
-labeledHeatmap(Matrix = moduleTraitCor,
-               xLabels = names(biconditions),
-               yLabels = names(MEs),
-               ySymbols = names(MEs),
-               colorLabels = FALSE,
-               colors = blueWhiteRed(50),
-               textMatrix = textMatrix,
-               setStdMargins = FALSE,
-               cex.text = 0.5,
-               cex.lab.x =0.4,
-               cex.lab.y=0.4,
-               zlim = c(-1,1),
-               main = paste("Module-trait relationships"))
-

-
-
-

MM and GS calc

-
nGenes = ncol(WGCNA_matrix)
-nSamples = nrow(WGCNA_matrix)
-
-names(biconditions)
-
##  [1] "HS_N_HO"   "HS_noN_HO" "HS_N_LO"   "LS_N_HO"   "LS_noN_HO" "SW_noN_HO"
-##  [7] "H2_N_HO"   "H2_noN_HO" "H2_N_LO"   "H2_noN_LO" "Sulfide"   "Oxygen"   
-## [13] "Nitrogen"  "Hydrogen"
-
Sulfide = as.data.frame(biconditions$Sulfide);
-names(Sulfide)[1] <- "Sulfide"
-Oxygen <- as.data.frame(biconditions$Oxygen)
-names(Oxygen)[1] <- "Oxygen"
-Nitrogen <- as.data.frame(biconditions$Nitrogen)
-names(Nitrogen)[1] <- "Nitrogen"
-Hydrogen <- as.data.frame(biconditions$Hydrogen)
-names(Hydrogen)[1] <- "Hydrogen"
-H2_N_HO <- as.data.frame(biconditions$H2_N_HO)
-names(H2_N_HO)[1] <- "H2_N_HO"
-HS_N_LO <- as.data.frame(biconditions$HS_N_LO)
-names(HS_N_LO)[1] <- "HS_N_LO"
-HS_N_HO <- as.data.frame(biconditions$HS_N_HO)
-names(HS_N_HO)[1] <- "HS_N_HO"
-HS_noN_HO <- as.data.frame(biconditions$HS_noN_HO)
-names(HS_noN_HO)[1] <- "HS_noN_HO"
-LS_noN_HO <- as.data.frame(biconditions$LS_noN_HO)
-names(LS_noN_HO)[1] <- "LS_noN_HO"
-SW_noN_HO <- as.data.frame(biconditions$SW_noN_HO)
-names(SW_noN_HO)[1] <- "SW_noN_HO"
-LS_N_HO <- as.data.frame(biconditions$LS_N_HO)
-names(LS_N_HO)[1] <- "LS_N_HO"
-H2_noN_HO <- as.data.frame(biconditions$H2_noN_HO)
-names(H2_noN_HO)[1] <- "H2_noN_HO"
-H2_N_LO <- as.data.frame(biconditions$H2_N_LO)
-names(H2_N_LO)[1] <- "H2_N_LO"
-H2_noN_LO <- as.data.frame(biconditions$H2_noN_LO)
-names(H2_noN_LO)[1] <- "H2_noN_LO"
-
-modNames = substring(names(MEs), 3)
-
-
-geneModuleMembership = as.data.frame(cor(WGCNA_matrix, MEs, use = "p"));
-MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples))
-names(geneModuleMembership) = paste("MM", modNames, sep="");
-names(MMPvalue) = paste("p.MM", modNames, sep="");
-
-
-geneSignificance=as.data.frame(cor(WGCNA_matrix, biconditions, use = "p"))
-GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneSignificance), nSamples))
-names(geneSignificance) = paste("GS.", names(biconditions), sep="");
-names(GSPvalue) = paste("p.Sul.", names(biconditions), sep="");
-
-str(geneModuleMembership)
-
## 'data.frame':    2500 obs. of  15 variables:
-##  $ MMmidnightblue  : num  -0.298 0.648 0.768 0.625 0.599 ...
-##  $ MMdarkorchid4   : num  -0.474 0.848 0.291 0.445 0.409 ...
-##  $ MMyellow        : num  -0.0623 0.163 -0.2172 0.211 -0.0863 ...
-##  $ MMgreenyellow   : num  -0.1228 0.3899 0.0313 0.0184 -0.2098 ...
-##  $ MMblack         : num  -0.1697 0.5395 0.0647 0.4343 0.2309 ...
-##  $ MMdarkgreen     : num  -0.0548 0.4469 -0.0929 0.2286 -0.0154 ...
-##  $ MMdeeppink3     : num  -0.0172 0.4138 0.0872 0.1792 0.0883 ...
-##  $ MMpink          : num  -0.016 0.5972 0.1198 0.1043 -0.0477 ...
-##  $ MMgrey60        : num  -0.656 0.183 0.255 0.474 0.511 ...
-##  $ MMbrown         : num  -0.138 -0.295 0.237 0.274 0.475 ...
-##  $ MMplum4         : num  0.648 -0.599 -0.107 -0.237 -0.344 ...
-##  $ MMskyblue       : num  0.247 -0.286 -0.58 -0.626 -0.677 ...
-##  $ MMdarkcyan      : num  0.256 -0.84 -0.471 -0.44 -0.391 ...
-##  $ MMdarkgoldenrod1: num  -0.173 -0.442 -0.579 -0.381 -0.328 ...
-##  $ MMgrey          : num  -0.3438 0.3343 -0.1019 0.0722 0.278 ...
-
str(geneSignificance)
-
## 'data.frame':    2500 obs. of  14 variables:
-##  $ GS.HS_N_HO  : num  -0.119 0.376 -0.219 -0.477 -0.338 ...
-##  $ GS.HS_noN_HO: num  0.134 0.419 0.675 0.431 0.299 ...
-##  $ GS.HS_N_LO  : num  0.1614 -0.3814 0.0652 -0.2549 -0.0556 ...
-##  $ GS.LS_N_HO  : num  0.0746 0.1922 0.1029 0.0454 -0.0436 ...
-##  $ GS.LS_noN_HO: num  -0.567 0.24 0.114 0.362 0.321 ...
-##  $ GS.SW_noN_HO: num  -0.1715 -0.1254 0.0558 0.1978 0.2951 ...
-##  $ GS.H2_N_HO  : num  0.1176 -0.0198 -0.3035 0.0256 -0.0918 ...
-##  $ GS.H2_noN_HO: num  -0.1839 0.0707 -0.1085 0.2563 0.2329 ...
-##  $ GS.H2_N_LO  : num  0.196 -0.365 -0.251 -0.358 -0.285 ...
-##  $ GS.H2_noN_LO: num  0.357 -0.406 -0.131 -0.229 -0.334 ...
-##  $ GS.Sulfide  : num  0.1156 0.2704 0.3413 -0.1966 -0.0622 ...
-##  $ GS.Oxygen   : num  -0.468 0.754 0.208 0.551 0.442 ...
-##  $ GS.Nitrogen : num  0.259 -0.119 -0.364 -0.611 -0.489 ...
-##  $ GS.Hydrogen : num  0.298 -0.441 -0.486 -0.187 -0.293 ...
-
geneSulfideSignificance=as.data.frame(cor(WGCNA_matrix, Sulfide, use = "p"))
-SulPvalue = as.data.frame(corPvalueStudent(as.matrix(geneSulfideSignificance), nSamples))
-names(geneSulfideSignificance) = paste("Sul.", names(Sulfide), sep="");
-names(SulPvalue) = paste("p.Sul.", names(Sulfide), sep="");
-
-
-geneOxygenSignificance=as.data.frame(cor(WGCNA_matrix, Oxygen, use = "p"))
-OxyPvalue = as.data.frame(corPvalueStudent(as.matrix(geneOxygenSignificance), nSamples))
-names(geneOxygenSignificance) = paste("Oxy.", names(Oxygen), sep="");
-names(OxyPvalue) = paste("p.Oxy.", names(Oxygen), sep="");
-
-
-geneNitrogenSignificance=as.data.frame(cor(WGCNA_matrix, Nitrogen, use = "p"))
-NitPvalue = as.data.frame(corPvalueStudent(as.matrix(geneNitrogenSignificance), nSamples))
-names(geneNitrogenSignificance) = paste("Nit.", names(Nitrogen), sep="");
-names(NitPvalue) = paste("p.Nit.", names(Nitrogen), sep="");
-
-
-
-geneHydrogenSignificance=as.data.frame(cor(WGCNA_matrix, Hydrogen, use = "p"))
-HydPvalue = as.data.frame(corPvalueStudent(as.matrix(geneHydrogenSignificance), nSamples))
-names(geneHydrogenSignificance) = paste("Hyd.", names(Hydrogen), sep="");
-names(HydPvalue) = paste("p.Hyd.", names(Hydrogen), sep="");
-
-
-
-
-
-####making dataframe to add to cytoscape layer#########################################
-sulfsig <- geneSulfideSignificance
-sulfsig$locusID <- rownames(sulfsig)
-rownames(sulfsig) <- NULL
-sulfsig <- sulfsig[,c(2,1)]
-
-oxysig <- geneOxygenSignificance
-oxysig$locusID <- rownames(oxysig)
-rownames(oxysig) <- NULL
-oxysig <- oxysig[,c(2,1)]
-
-nitrosig <- geneNitrogenSignificance
-nitrosig$locusID <- rownames(nitrosig)
-rownames(nitrosig) <- NULL
-nitrosig <- nitrosig[,c(2,1)]
-
-
-hydrosig <- geneHydrogenSignificance
-hydrosig$locusID <- rownames(hydrosig)
-rownames(hydrosig) <- NULL
-hydrosig <- hydrosig[,c(2,1)]
-
-
-sig.genes <- cbind(sulfsig,oxysig$Oxy.Oxygen,nitrosig$Nit.Nitrogen,hydrosig$Hyd.Hydrogen)
-names(sig.genes)[2] <- "Sulfide_GS"
-names(sig.genes)[3] <- "Oxygen_GS"
-names(sig.genes)[4] <- "Nitrogen_GS"
-names(sig.genes)[5] <- "Hydrogen_GS"
-names(sig.genes)
-
## [1] "locusID"     "Sulfide_GS"  "Oxygen_GS"   "Nitrogen_GS" "Hydrogen_GS"
-
#write.csv(sig.genes,"sig.genes.csv")
-
-
-
-#########################making datframe to oxygen and sulfide dataframe########################################################
-
-sig.genes3 <- cbind(sulfsig,oxysig$Oxy.Oxygen)
-colnames(sig.genes3)
-
## [1] "locusID"           "Sul.Sulfide"       "oxysig$Oxy.Oxygen"
-
colnames(sig.genes3) <- c("locusID","corSulfide","corOxygen")
-
-sulP <- SulPvalue
-sulP <- tibble(locusID = rownames(sulP), sulP)
-oxyP <- OxyPvalue
-oxyP <- tibble(locusID = rownames(oxyP), oxyP)
-
-colnames(sulP)
-
## [1] "locusID"       "p.Sul.Sulfide"
-
colnames(sulP) <- c("locusID","pval_Sulfide")
-
-colnames(oxyP)
-
## [1] "locusID"      "p.Oxy.Oxygen"
-
colnames(oxyP) <- c("locusID","pval_Oxygen")
-
-
-
-
-GMM <- geneModuleMembership
-
-GMMP <- MMPvalue
-
-head(sig.genes3)
-
##              locusID  corSulfide  corOxygen
-## 1 gene-L0Y14_RS04070  0.11559790 -0.4677785
-## 2 gene-L0Y14_RS09775  0.27041835  0.7541553
-## 3 gene-L0Y14_RS01990  0.34126255  0.2075954
-## 4 gene-L0Y14_RS09950 -0.19662226  0.5510156
-## 5 gene-L0Y14_RS09955 -0.06219137  0.4415475
-## 6 gene-L0Y14_RS12365 -0.23047838  0.2361504
-
head(GMM)
-
##                    MMmidnightblue MMdarkorchid4    MMyellow MMgreenyellow
-## gene-L0Y14_RS04070     -0.2978002    -0.4742295 -0.06228793   -0.12283434
-## gene-L0Y14_RS09775      0.6475847     0.8479211  0.16301606    0.38986765
-## gene-L0Y14_RS01990      0.7683016     0.2909617 -0.21720940    0.03127418
-## gene-L0Y14_RS09950      0.6250107     0.4452151  0.21104415    0.01837192
-## gene-L0Y14_RS09955      0.5993774     0.4088890 -0.08634607   -0.20976909
-## gene-L0Y14_RS12365      0.3632049     0.1311813 -0.06645301   -0.38765511
-##                        MMblack MMdarkgreen MMdeeppink3      MMpink   MMgrey60
-## gene-L0Y14_RS04070 -0.16971124 -0.05481641 -0.01719957 -0.01598375 -0.6563363
-## gene-L0Y14_RS09775  0.53953441  0.44692677  0.41381637  0.59720934  0.1830737
-## gene-L0Y14_RS01990  0.06471391 -0.09289025  0.08717597  0.11976742  0.2550905
-## gene-L0Y14_RS09950  0.43427234  0.22863891  0.17923724  0.10431595  0.4740972
-## gene-L0Y14_RS09955  0.23088121 -0.01544034  0.08834925 -0.04765631  0.5107174
-## gene-L0Y14_RS12365 -0.00202547 -0.12064102 -0.25455458 -0.31551755  0.7070535
-##                       MMbrown    MMplum4  MMskyblue  MMdarkcyan
-## gene-L0Y14_RS04070 -0.1384809  0.6483345  0.2465064  0.25589117
-## gene-L0Y14_RS09775 -0.2948900 -0.5986951 -0.2859013 -0.84047590
-## gene-L0Y14_RS01990  0.2365179 -0.1068451 -0.5795192 -0.47056380
-## gene-L0Y14_RS09950  0.2737269 -0.2366527 -0.6256319 -0.43994105
-## gene-L0Y14_RS09955  0.4747395 -0.3438988 -0.6771603 -0.39111707
-## gene-L0Y14_RS12365  0.4675125 -0.2198442 -0.5555037 -0.09885019
-##                    MMdarkgoldenrod1      MMgrey
-## gene-L0Y14_RS04070       -0.1725039 -0.34377603
-## gene-L0Y14_RS09775       -0.4423334  0.33425164
-## gene-L0Y14_RS01990       -0.5786492 -0.10188510
-## gene-L0Y14_RS09950       -0.3808909  0.07218213
-## gene-L0Y14_RS09955       -0.3278772  0.27804497
-## gene-L0Y14_RS12365       -0.2081241  0.06962918
-
head(GMMP)
-
##                    p.MMmidnightblue p.MMdarkorchid4 p.MMyellow p.MMgreenyellow
-## gene-L0Y14_RS04070     1.099727e-01    8.107296e-03  0.7436784      0.51785242
-## gene-L0Y14_RS09775     1.095424e-04    3.335534e-09  0.3893952      0.03319238
-## gene-L0Y14_RS01990     7.146697e-07    1.187803e-01  0.2489174      0.86968629
-## gene-L0Y14_RS09950     2.220815e-04    1.368570e-02  0.2629330      0.92323522
-## gene-L0Y14_RS09955     4.649887e-04    2.486376e-02  0.6500519      0.26589457
-## gene-L0Y14_RS12365     4.852036e-02    4.895817e-01  0.7271658      0.03429251
-##                      p.MMblack p.MMdarkgreen p.MMdeeppink3     p.MMpink
-## gene-L0Y14_RS04070 0.369944508    0.77357610    0.92812078 0.9331901555
-## gene-L0Y14_RS09775 0.002091438    0.01328534    0.02301156 0.0004935721
-## gene-L0Y14_RS01990 0.734046428    0.62539413    0.64690412 0.5284364628
-## gene-L0Y14_RS09950 0.016490067    0.22426245    0.34327557 0.5832907609
-## gene-L0Y14_RS09955 0.219626475    0.93545678    0.64246404 0.8025293779
-## gene-L0Y14_RS12365 0.991524596    0.52541108    0.17462403 0.0894278762
-##                      p.MMgrey60   p.MMbrown    p.MMplum4  p.MMskyblue
-## gene-L0Y14_RS04070 8.200365e-05 0.465519128 0.0001068964 1.891247e-01
-## gene-L0Y14_RS09775 3.328721e-01 0.113659304 0.0004738236 1.256266e-01
-## gene-L0Y14_RS01990 1.736871e-01 0.208261116 0.5741429330 7.907558e-04
-## gene-L0Y14_RS09950 8.127494e-03 0.143281237 0.2079944552 2.179639e-04
-## gene-L0Y14_RS09955 3.928636e-03 0.008029824 0.0627743027 3.961873e-05
-## gene-L0Y14_RS12365 1.253228e-05 0.009188101 0.2430811504 1.438928e-03
-##                    p.MMdarkcyan p.MMdarkgoldenrod1   p.MMgrey
-## gene-L0Y14_RS04070 1.722940e-01       0.3620032361 0.06287441
-## gene-L0Y14_RS09775 6.204607e-09       0.0143824230 0.07102686
-## gene-L0Y14_RS01990 8.682956e-03       0.0008087425 0.59214321
-## gene-L0Y14_RS09950 1.498308e-02       0.0378395675 0.70464811
-## gene-L0Y14_RS09955 3.258386e-02       0.0769249135 0.13682488
-## gene-L0Y14_RS12365 6.032765e-01       0.2697473140 0.71465331
-
dim(sig.genes3)
-
## [1] 2500    3
-
####before I can merge these I have to change the rownames to columns
-
-
-
-# Assuming your datasets are named df1, df2, df3
-# Convert row names to column for df2 and df3
-GMM <- tibble(locusID = rownames(GMM), GMM)
-GMMP <- tibble(locusID = rownames(GMMP), GMMP)
-
-
-
-colnames(GMM) <- c("locusID", "GM_MEpurple","GM_MEmidnightblue", "GM_MElightyellow","GM_MEgreenyellow","GM_MEblack","GM_MEgreen","GM_MEcherry","GM_MEpink","GM_MEbrown","GM_MEgrey60", "GM_MElightpurple","GM_MEskyblue","GM_MEteal","GM_MEgold","GM_MEgrey")
-
-
-
-
-colnames(GMMP) <- c("locusID", "GMPval_MEpurple","GMPval_MEmidnightblue", "GMPval_MElightyellow","GMPval_MEgreenyellow","GMPval_MEblack","GMPval_MEgreen","GMPval_MEcherry","GMPval_MEpink","GMPval_MEbrown","GMPval_MEgrey60", "GMPval_MElightpurple","GMPval_MEskyblue","GMPval_MEteal","GMPval_MEgold","GMPval_MEgrey")
-
-
-merged_df <- merge(sig.genes3, sulP, by = "locusID", all.x = TRUE)
-merged_df2 <- merge(merged_df, oxyP, by = "locusID", all.x = TRUE)
-
-merged_df3 <- merge(merged_df2, GMM, by = "locusID", all.x = TRUE)
-merged_df4 <- merge(merged_df3, GMMP, by = "locusID", all.x = TRUE)
-
-#write.csv(merged_df4,"GM_GS_sulfide_oxygen.csv")
-
-##################################################################################################################
-
-
-

scatter plots

-
names(modNames)
-
## NULL
-
moduleColors = mergedColors
-table(moduleColors)
-
## moduleColors
-##          black          brown       darkcyan darkgoldenrod1      darkgreen 
-##            124            233            239             99            143 
-##    darkorchid4      deeppink3    greenyellow           grey         grey60 
-##            100            214            160            242            203 
-##   midnightblue           pink          plum4        skyblue         yellow 
-##             83            385            109            120             46
-
module = "midnightblue"
-column = match(module, modNames);
-moduleGenes = moduleColors==module;
-
-module2 = "brown"
-column2 = match(module2, modNames);
-moduleGenes2 = moduleColors==module2;
-
-module3 = "darkgoldenrod1"
-column3 = match(module3, modNames);
-moduleGenes3 = moduleColors==module3;
-
-
-module4 = "pink"
-column4 = match(module4, modNames);
-moduleGenes4 = moduleColors==module4;
-
-module5 = "black"
-column5 = match(module5, modNames);
-moduleGenes5 = moduleColors==module5;
-
-module6 = "deeppink3"
-column6 = match(module6, modNames);
-moduleGenes6 = moduleColors==module6;
-
-
-module7 = "darkcyan"
-column7 = match(module7, modNames);
-moduleGenes7 = moduleColors==module7;
-
-module8 = "darkgreen"
-column8 = match(module8, modNames);
-moduleGenes8 = moduleColors==module8;
-
-
-module9 = "greenyellow"
-column9 = match(module9, modNames);
-moduleGenes9 = moduleColors==module9;
-
-
-module10 = "grey60"
-column10 = match(module10, modNames);
-moduleGenes10 = moduleColors==module10;
-
-module11 = "yellow"
-column11 = match(module11, modNames);
-moduleGenes11 = moduleColors==module11;
-
-module12 = "skyblue"
-column12 = match(module12, modNames);
-moduleGenes12 = moduleColors==module12;
-
-
-module13 = "plum4"
-column13 = match(module13, modNames);
-moduleGenes13 = moduleColors==module13;
-
-
-module14 = "darkorchid4"
-column14 = match(module14, modNames);
-moduleGenes14 = moduleColors==module14;
-
-module15 = "grey"
-column15 = match(module15, modNames);
-moduleGenes15 = moduleColors==module15;
-
-
-#sizeGrWindow(7, 7);  # uncomment when running interactively (not rendering in rmarkdown)
-par(mfrow = c(2,4))
-par(mar=c(5,4,2,2))
-
- #midnight blue and brown       
-verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
-                   abs(geneSulfideSignificance[moduleGenes, 1]),
-                   xlab = paste("Module Membership in", module, "module"),
-                   ylab = "Sulfide",
-                    main = paste("Module membership vs. gene significance\n"),
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
-                   abs(geneOxygenSignificance[moduleGenes, 1]),
-                   xlab = paste("Module Membership in", module, "module"),
-                   ylab = "Oxygen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
-                   abs(geneHydrogenSignificance[moduleGenes, 1]),
-                   xlab = paste("Module Membership in", module, "module"),
-                   ylab = "Hydrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
-                   abs(geneNitrogenSignificance[moduleGenes, 1]),
-                   xlab = paste("Module Membership in", module, "module"),
-                   ylab = "Nitrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes2, column2]),
-                   abs(geneSulfideSignificance[moduleGenes2, 1]),
-                   xlab = paste("Module Membership in", module2, "module"),
-                   ylab = "Sulfide",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module2,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes2, column2]),
-                   abs(geneOxygenSignificance[moduleGenes2, 1]),
-                   xlab = paste("Module Membership in", module2, "module"),
-                   ylab = "Oxygen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module2,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes2, column2]),
-                   abs(geneHydrogenSignificance[moduleGenes2, 1]),
-                   xlab = paste("Module Membership in", module2, "module"),
-                   ylab = "Hydrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module2,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes2, column2]),
-                   abs(geneNitrogenSignificance[moduleGenes2, 1]),
-                   xlab = paste("Module Membership in", module2, "module"),
-                   ylab = "Nitrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module2,
-                    abline=TRUE)
-

-
#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
-par(mfrow = c(2,4))
-par(mar=c(5,4,2,2))              
-
-####gold and pink
-verboseScatterplot(abs(geneModuleMembership[moduleGenes3, column3]),
-                   abs(geneSulfideSignificance[moduleGenes3, 1]),
-                   xlab = paste("Module Membership in", module3, "module"),
-                   ylab = "Sulfide",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module3,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes3, column3]),
-                   abs(geneOxygenSignificance[moduleGenes3, 1]),
-                   xlab = paste("Module Membership in", module3, "module"),
-                   ylab = "Oxygen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module3,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes3, column3]),
-                   abs(geneHydrogenSignificance[moduleGenes3, 1]),
-                   xlab = paste("Module Membership in", module3, "module"),
-                   ylab = "Hydrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module3,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes3, column3]),
-                   abs(geneNitrogenSignificance[moduleGenes3, 1]),
-                   xlab = paste("Module Membership in", module3, "module"),
-                   ylab = "Nitrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module3,
-                  abline=TRUE)
-#pink
-verboseScatterplot(abs(geneModuleMembership[moduleGenes4, column4]),
-                   abs(geneSulfideSignificance[moduleGenes4, 1]),
-                   xlab = paste("Module Membership in", module4, "module"),
-                   ylab = "Sulfide",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module4,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes4, column4]),
-                   abs(geneOxygenSignificance[moduleGenes4, 1]),
-                   xlab = paste("Module Membership in", module4, "module"),
-                   ylab = "Oxygen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module4,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes4, column4]),
-                   abs(geneHydrogenSignificance[moduleGenes4, 1]),
-                   xlab = paste("Module Membership in", module4, "module"),
-                   ylab = "Hydrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module4,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes4, column4]),
-                   abs(geneNitrogenSignificance[moduleGenes4, 1]),
-                   xlab = paste("Module Membership in", module4, "module"),
-                   ylab = "Nitrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module4,
-                   abline=TRUE)
-

-
#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
-par(mfrow = c(2,4))
-par(mar=c(5,4,2,2))   
-
-
-####black and blue
-verboseScatterplot(abs(geneModuleMembership[moduleGenes5, column5]),
-                   abs(geneSulfideSignificance[moduleGenes5, 1]),
-                   xlab = paste("Module Membership in", module5, "module"),
-                   ylab = "Sulfide",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module5,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes5, column5]),
-                   abs(geneOxygenSignificance[moduleGenes5, 1]),
-                   xlab = paste("Module Membership in", module5, "module"),
-                   ylab = "Oxygen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module5,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes5, column5]),
-                   abs(geneHydrogenSignificance[moduleGenes5, 1]),
-                   xlab = paste("Module Membership in", module5, "module"),
-                   ylab = "Hydrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module5,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes5, column5]),
-                   abs(geneNitrogenSignificance[moduleGenes5, 1]),
-                   xlab = paste("Module Membership in", module5, "module"),
-                   ylab = "Nitrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module5,
-                  abline=TRUE)
-#blue
-verboseScatterplot(abs(geneModuleMembership[moduleGenes6, column6]),
-                   abs(geneSulfideSignificance[moduleGenes6, 1]),
-                   xlab = paste("Module Membership in", module6, "module"),
-                   ylab = "Sulfide",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module6,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes6, column6]),
-                   abs(geneOxygenSignificance[moduleGenes6, 1]),
-                   xlab = paste("Module Membership in", module6, "module"),
-                   ylab = "Oxygen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module6,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes6, column6]),
-                   abs(geneHydrogenSignificance[moduleGenes6, 1]),
-                   xlab = paste("Module Membership in", module6, "module"),
-                   ylab = "Hydrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module6,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes6, column6]),
-                   abs(geneNitrogenSignificance[moduleGenes6, 1]),
-                   xlab = paste("Module Membership in", module6, "module"),
-                   ylab = "Nitrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module6,
-                   abline=TRUE)
-

-
#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
-par(mfrow = c(2,4))
-par(mar=c(5,4,2,2))   
-
-# darkcyan and green (7 and 8)
-#darkcyan
-verboseScatterplot(abs(geneModuleMembership[moduleGenes7, column7]),
-                   abs(geneSulfideSignificance[moduleGenes7, 1]),
-                   xlab = paste("Module Membership in", module7, "module"),
-                   ylab = "Sulfide",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module7,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes7, column7]),
-                   abs(geneOxygenSignificance[moduleGenes7, 1]),
-                   xlab = paste("Module Membership in", module7, "module"),
-                   ylab = "Oxygen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module7,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes7, column7]),
-                   abs(geneHydrogenSignificance[moduleGenes7, 1]),
-                   xlab = paste("Module Membership in", module7, "module"),
-                   ylab = "Hydrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module7,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes7, column7]),
-                   abs(geneNitrogenSignificance[moduleGenes7, 1]),
-                   xlab = paste("Module Membership in", module7, "module"),
-                   ylab = "Nitrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module7,
-                  abline=TRUE)
-#green
-verboseScatterplot(abs(geneModuleMembership[moduleGenes8, column8]),
-                   abs(geneSulfideSignificance[moduleGenes8, 1]),
-                   xlab = paste("Module Membership in", module8, "module"),
-                   ylab = "Sulfide",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module8,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes8, column8]),
-                   abs(geneOxygenSignificance[moduleGenes8, 1]),
-                   xlab = paste("Module Membership in", module8, "module"),
-                   ylab = "Oxygen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module8,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes8, column8]),
-                   abs(geneHydrogenSignificance[moduleGenes8, 1]),
-                   xlab = paste("Module Membership in", module8, "module"),
-                   ylab = "Hydrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module8,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes8, column8]),
-                   abs(geneNitrogenSignificance[moduleGenes8, 1]),
-                   xlab = paste("Module Membership in", module8, "module"),
-                   ylab = "Nitrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module8,
-                   abline=TRUE)
-

-
#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
-par(mfrow = c(2,4))
-par(mar=c(5,4,2,2))   
-
-
-# greenyellow and grey60
-#greenyellow
-verboseScatterplot(abs(geneModuleMembership[moduleGenes9, column9]),
-                   abs(geneSulfideSignificance[moduleGenes9, 1]),
-                   xlab = paste("Module Membership in", module9, "module"),
-                   ylab = "Sulfide",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module9,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes9, column9]),
-                   abs(geneOxygenSignificance[moduleGenes9, 1]),
-                   xlab = paste("Module Membership in", module9, "module"),
-                   ylab = "Oxygen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module9,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes9, column9]),
-                   abs(geneHydrogenSignificance[moduleGenes9, 1]),
-                   xlab = paste("Module Membership in", module9, "module"),
-                   ylab = "Hydrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module9,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes9, column9]),
-                   abs(geneNitrogenSignificance[moduleGenes9, 1]),
-                   xlab = paste("Module Membership in", module9, "module"),
-                   ylab = "Nitrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module9,
-                  abline=TRUE)
-#grey60
-verboseScatterplot(abs(geneModuleMembership[moduleGenes10, column10]),
-                   abs(geneSulfideSignificance[moduleGenes10, 1]),
-                   xlab = paste("Module Membership in", module10, "module"),
-                   ylab = "Sulfide",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module10,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes10, column10]),
-                   abs(geneOxygenSignificance[moduleGenes10, 1]),
-                   xlab = paste("Module Membership in", module10, "module"),
-                   ylab = "Oxygen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module10,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes10, column10]),
-                   abs(geneHydrogenSignificance[moduleGenes10, 1]),
-                   xlab = paste("Module Membership in", module10, "module"),
-                   ylab = "Hydrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module10,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes10, column10]),
-                   abs(geneNitrogenSignificance[moduleGenes10, 1]),
-                   xlab = paste("Module Membership in", module10, "module"),
-                   ylab = "Nitrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module10,
-                   abline=TRUE)
-

-
#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
-par(mfrow = c(2,4))
-par(mar=c(5,4,2,2))   
-
-#lightyellow and magenta (11,12)
-#lightyellow
-verboseScatterplot(abs(geneModuleMembership[moduleGenes11, column11]),
-                   abs(geneSulfideSignificance[moduleGenes11, 1]),
-                   xlab = paste("Module Membership in", module11, "module"),
-                   ylab = "Sulfide",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module11,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes11, column11]),
-                   abs(geneOxygenSignificance[moduleGenes11, 1]),
-                   xlab = paste("Module Membership in", module11, "module"),
-                   ylab = "Oxygen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module11,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes11, column11]),
-                   abs(geneHydrogenSignificance[moduleGenes11, 1]),
-                   xlab = paste("Module Membership in", module11, "module"),
-                   ylab = "Hydrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module11,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes11, column11]),
-                   abs(geneNitrogenSignificance[moduleGenes11, 1]),
-                   xlab = paste("Module Membership in", module11, "module"),
-                   ylab = "Nitrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module11,
-                  abline=TRUE)
-#magenta
-verboseScatterplot(abs(geneModuleMembership[moduleGenes12, column12]),
-                   abs(geneSulfideSignificance[moduleGenes12, 1]),
-                   xlab = paste("Module Membership in", module12, "module"),
-                   ylab = "Sulfide",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module12,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes12, column12]),
-                   abs(geneOxygenSignificance[moduleGenes12, 1]),
-                   xlab = paste("Module Membership in", module12, "module"),
-                   ylab = "Oxygen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module12,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes12, column12]),
-                   abs(geneHydrogenSignificance[moduleGenes12, 1]),
-                   xlab = paste("Module Membership in", module12, "module"),
-                   ylab = "Hydrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module12,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes12, column12]),
-                   abs(geneNitrogenSignificance[moduleGenes12, 1]),
-                   xlab = paste("Module Membership in", module12, "module"),
-                   ylab = "Nitrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module12,
-                   abline=TRUE)
-

-
str(geneSulfideSignificance)
-
## 'data.frame':    2500 obs. of  1 variable:
-##  $ Sul.Sulfide: num  0.1156 0.2704 0.3413 -0.1966 -0.0622 ...
-
what <- geneSulfideSignificance
-what$locusID <- rownames(what)
-what <- what[,c(2,1)]
-# Writing the dataframe to a CSV file so I can use this in cytoscape for a layer
-##write.csv(what, file = "geneSulfideSignificance.csv", row.names = FALSE)
-
#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
-par(mfrow = c(2,4))
-par(mar=c(5,4,2,2))   
-
-#purple and salmon (13,14)
-#purple
-verboseScatterplot(abs(geneModuleMembership[moduleGenes13, column13]),
-                   abs(geneSulfideSignificance[moduleGenes13, 1]),
-                   xlab = paste("Module Membership in", module13, "module"),
-                   ylab = "Sulfide",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module13,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes13, column13]),
-                   abs(geneOxygenSignificance[moduleGenes13, 1]),
-                   xlab = paste("Module Membership in", module13, "module"),
-                   ylab = "Oxygen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module13,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes13, column13]),
-                   abs(geneHydrogenSignificance[moduleGenes13, 1]),
-                   xlab = paste("Module Membership in", module13, "module"),
-                   ylab = "Hydrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module13,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes13, column13]),
-                   abs(geneNitrogenSignificance[moduleGenes13, 1]),
-                   xlab = paste("Module Membership in", module13, "module"),
-                   ylab = "Nitrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module13,
-                  abline=TRUE)
-#salmon
-verboseScatterplot(abs(geneModuleMembership[moduleGenes14, column14]),
-                   abs(geneSulfideSignificance[moduleGenes14, 1]),
-                   xlab = paste("Module Membership in", module14, "module"),
-                   ylab = "Sulfide",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module14,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes14, column14]),
-                   abs(geneOxygenSignificance[moduleGenes14, 1]),
-                   xlab = paste("Module Membership in", module14, "module"),
-                   ylab = "Oxygen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module14,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes14, column14]),
-                   abs(geneHydrogenSignificance[moduleGenes14, 1]),
-                   xlab = paste("Module Membership in", module14, "module"),
-                   ylab = "Hydrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module14,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes14, column14]),
-                   abs(geneNitrogenSignificance[moduleGenes14, 1]),
-                   xlab = paste("Module Membership in", module14, "module"),
-                   ylab = "Nitrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module14,
-                   abline=TRUE)
-

-
#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
-par(mfrow = c(2,4))
-par(mar=c(5,4,2,2))   
-
-
-#grey
-verboseScatterplot(abs(geneModuleMembership[moduleGenes15, column15]),
-                   abs(geneSulfideSignificance[moduleGenes15, 1]),
-                   xlab = paste("Module Membership in", module15, "module"),
-                   ylab = "Sulfide",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module15,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes15, column15]),
-                   abs(geneOxygenSignificance[moduleGenes15, 1]),
-                   xlab = paste("Module Membership in", module15, "module"),
-                   ylab = "Oxygen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module15,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes15, column15]),
-                   abs(geneHydrogenSignificance[moduleGenes15, 1]),
-                   xlab = paste("Module Membership in", module15, "module"),
-                   ylab = "Hydrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module15,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes15, column15]),
-                   abs(geneNitrogenSignificance[moduleGenes15, 1]),
-                   xlab = paste("Module Membership in", module15, "module"),
-                   ylab = "Nitrogen",
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module15,
-                   abline=TRUE)
-

-
######now I am just making the plots that will go next to the gene significance barplots that I made in the previous section, I will do the two most significant modules for each treatment condition
-
-### for sulfide that is teal and gold
-
-
-#sizeGrWindow(3,5);
-par(mfrow = c(1,2))
-par(mar=c(4,3,4,3)) 
-
-verboseScatterplot(abs(geneModuleMembership[moduleGenes7, column7]),
-                   abs(geneSulfideSignificance[moduleGenes7, 1]),
-                   xlab = paste("Module Membership in teal module"),
-                   ylab = "Sulfide",
-                   pch = 16,
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module7,
-                   abline=TRUE)
-verboseScatterplot(abs(geneModuleMembership[moduleGenes3, column3]),
-                   abs(geneSulfideSignificance[moduleGenes3, 1]),
-                   xlab = paste("Module Membership in gold module"),
-                   ylab = "Sulfide",
-                   pch = 16,
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module3,
-                   abline=TRUE)
-

-
#sizeGrWindow(3,5);
-par(mfrow = c(1,2))
-par(mar=c(4,3,4,3)) 
-
-#### for oxygen in black and purple modules
-
-
-verboseScatterplot(abs(geneModuleMembership[moduleGenes14, column14]),
-                   abs(geneOxygenSignificance[moduleGenes14, 1]),
-                   xlab = paste("Module Membership in purple"), 
-                   ylab = "Oxygen",
-                   pch = 16,
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module14,
-                   abline = TRUE, xaxt = "n") # Suppress x-axis
-
-
-
-verboseScatterplot(abs(geneModuleMembership[moduleGenes5, column5]),
-                   abs(geneOxygenSignificance[moduleGenes5, 1]),
-                   xlab = paste("Module Membership in black"),
-                   ylab = "oxygen",
-                   pch = 16,
-                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module5,
-                   abline=TRUE)
-

-
-
-

genes of significance for sulfide limiting response (a version of -“hub” genes)

-
####genes of significance in teal module (darkcyan) under Sulfide 
-#geneSulfideSignificance
-
-FilterGenes= abs(geneSulfideSignificance)> .2 & abs(geneModuleMembership$MMdarkcyan)>.8 & SulPvalue <= 0.05
-table(FilterGenes)
-
## FilterGenes
-## FALSE  TRUE 
-##  2450    50
-
head(FilterGenes)
-
##                    Sul.Sulfide
-## gene-L0Y14_RS04070       FALSE
-## gene-L0Y14_RS09775       FALSE
-## gene-L0Y14_RS01990       FALSE
-## gene-L0Y14_RS09950       FALSE
-## gene-L0Y14_RS09955       FALSE
-## gene-L0Y14_RS12365       FALSE
-
Sulfideteal_filtered <- dimnames(data.frame(WGCNA_matrix))[[2]][FilterGenes]
-Sulfideteal_filtered <- gsub("e.L", "e-L", Sulfideteal_filtered)
-
-gene_names <- read.csv("gene_names_for_wgcna_matrix_files.csv",header=TRUE)
-gene_names <- gene_names[,2:3]
-names(gene_names)[1] <- "locusID"
-
-Sulfideteal_filtered <- as.data.frame(Sulfideteal_filtered)
-names(Sulfideteal_filtered)[1] <- "locusID"
-str(Sulfideteal_filtered)
-
## 'data.frame':    50 obs. of  1 variable:
-##  $ locusID: chr  "gene-L0Y14_RS07425" "gene-L0Y14_RS07440" "gene-L0Y14_RS07430" "gene-L0Y14_RS07435" ...
-
Sulfideteal_filtered <- left_join(Sulfideteal_filtered,gene_names)
-
## Joining with `by = join_by(locusID)`
-
dim(Sulfideteal_filtered)
-
## [1] 50  2
-
names(Sulfideteal_filtered)
-
## [1] "locusID" "gene"
-
list <- 1:50
-condition <- rep("Sulfide",length(list))
-moduleFil <- rep("teal",length(list))
-a <- cbind(Sulfideteal_filtered,condition,moduleFil)
-
-sul.teal <- a
-
-
-####genes of significance in gold module under Sulfide
-#geneSulfideSignificance
-FilterGenes= abs(geneSulfideSignificance)> .2 & abs(geneModuleMembership$MMdarkgoldenrod1)>.8 & SulPvalue <= 0.05
-table(FilterGenes)
-
## FilterGenes
-## FALSE  TRUE 
-##  2478    22
-
Sulfidegold_filtered <- dimnames(data.frame(WGCNA_matrix))[[2]][FilterGenes]
-Sulfidegold_filtered <- gsub("e.L", "e-L", Sulfidegold_filtered)
-
-gene_names <- read.csv("gene_names_for_wgcna_matrix_files.csv",header=TRUE)
-gene_names <- gene_names[,2:3]
-names(gene_names)[1] <- "locusID"
-
-Sulfidegold_filtered <- as.data.frame(Sulfidegold_filtered)
-names(Sulfidegold_filtered)[1] <- "locusID"
-str(Sulfidegold_filtered)
-
## 'data.frame':    22 obs. of  1 variable:
-##  $ locusID: chr  "gene-L0Y14_RS01985" "gene-L0Y14_RS07695" "gene-L0Y14_RS00080" "gene-L0Y14_RS12030" ...
-
Sulfidegold_filtered <- left_join(Sulfidegold_filtered,gene_names)
-
## Joining with `by = join_by(locusID)`
-
dim(Sulfidegold_filtered)
-
## [1] 22  2
-
names(Sulfidegold_filtered)
-
## [1] "locusID" "gene"
-
list <- 1:22
-condition <- rep("Sulfide",length(list))
-moduleFil <- rep("darkgoldenrod1",length(list))
-d <- cbind(Sulfidegold_filtered,condition,moduleFil)
-sulfide_hub_gold <- d
-
-
-

genes of significance for sulfide limiting response (a version of -“hub” genes)

-
####genes of significance in black module under Oxygen 
-#geneOxygenSignificance
-FilterGenes= abs(geneOxygenSignificance)> .2 & abs(geneModuleMembership$MMblack)>.8 & OxyPvalue <= 0.05
-table(FilterGenes)
-
## FilterGenes
-## FALSE  TRUE 
-##  2467    33
-
head(FilterGenes)
-
##                    Oxy.Oxygen
-## gene-L0Y14_RS04070      FALSE
-## gene-L0Y14_RS09775      FALSE
-## gene-L0Y14_RS01990      FALSE
-## gene-L0Y14_RS09950      FALSE
-## gene-L0Y14_RS09955      FALSE
-## gene-L0Y14_RS12365      FALSE
-
Oxygenblack_filtered <- dimnames(data.frame(WGCNA_matrix))[[2]][FilterGenes]
-Oxygenblack_filtered <- gsub("e.L", "e-L", Oxygenblack_filtered)
-
-gene_names <- read.csv("gene_names_for_wgcna_matrix_files.csv",header=TRUE)
-gene_names <- gene_names[,2:3]
-names(gene_names)[1] <- "locusID"
-
-Oxygenblack_filtered <- as.data.frame(Oxygenblack_filtered)
-names(Oxygenblack_filtered)[1] <- "locusID"
-str(Oxygenblack_filtered)
-
## 'data.frame':    33 obs. of  1 variable:
-##  $ locusID: chr  "gene-L0Y14_RS14085" "gene-L0Y14_RS12670" "gene-L0Y14_RS08400" "gene-L0Y14_RS05880" ...
-
Oxygenblack_filtered <- left_join(Oxygenblack_filtered,gene_names)
-
## Joining with `by = join_by(locusID)`
-
dim(Oxygenblack_filtered)
-
## [1] 33  2
-
names(Oxygenblack_filtered)
-
## [1] "locusID" "gene"
-
list <- 1:33
-condition <- rep("Oxygen",length(list))
-moduleFil <- rep("black",length(list))
-oxy_b <- cbind(Oxygenblack_filtered,condition,moduleFil)
-
-
-#####
-
-
-
-names(geneModuleMembership)
-
##  [1] "MMmidnightblue"   "MMdarkorchid4"    "MMyellow"         "MMgreenyellow"   
-##  [5] "MMblack"          "MMdarkgreen"      "MMdeeppink3"      "MMpink"          
-##  [9] "MMgrey60"         "MMbrown"          "MMplum4"          "MMskyblue"       
-## [13] "MMdarkcyan"       "MMdarkgoldenrod1" "MMgrey"
-
names(geneModuleMembership)
-
##  [1] "MMmidnightblue"   "MMdarkorchid4"    "MMyellow"         "MMgreenyellow"   
-##  [5] "MMblack"          "MMdarkgreen"      "MMdeeppink3"      "MMpink"          
-##  [9] "MMgrey60"         "MMbrown"          "MMplum4"          "MMskyblue"       
-## [13] "MMdarkcyan"       "MMdarkgoldenrod1" "MMgrey"
-
####genes of significance in purple (darkorchid) module under Oxygen
-#geneOxygenSignificance
-names(geneModuleMembership)
-
##  [1] "MMmidnightblue"   "MMdarkorchid4"    "MMyellow"         "MMgreenyellow"   
-##  [5] "MMblack"          "MMdarkgreen"      "MMdeeppink3"      "MMpink"          
-##  [9] "MMgrey60"         "MMbrown"          "MMplum4"          "MMskyblue"       
-## [13] "MMdarkcyan"       "MMdarkgoldenrod1" "MMgrey"
-
FilterGenes= abs(geneOxygenSignificance)> .2 & abs(geneModuleMembership$MMdarkorchid4)>.8 & OxyPvalue <= 0.05
-table(FilterGenes)
-
## FilterGenes
-## FALSE  TRUE 
-##  2476    24
-
Oxygenpurple_filtered <- dimnames(data.frame(WGCNA_matrix))[[2]][FilterGenes]
-Oxygenpurple_filtered <- gsub("e.L", "e-L", Oxygenpurple_filtered)
-
-gene_names <- read.csv("gene_names_for_wgcna_matrix_files.csv",header=TRUE)
-gene_names <- gene_names[,2:3]
-names(gene_names)[1] <- "locusID"
-
-Oxygenpurple_filtered <- as.data.frame(Oxygenpurple_filtered)
-names(Oxygenpurple_filtered)[1] <- "locusID"
-str(Oxygenpurple_filtered)
-
## 'data.frame':    24 obs. of  1 variable:
-##  $ locusID: chr  "gene-L0Y14_RS09775" "gene-L0Y14_RS16350" "gene-L0Y14_RS12700" "gene-L0Y14_RS07425" ...
-
Oxygenpurple_filtered <- left_join(Oxygenpurple_filtered,gene_names)
-
## Joining with `by = join_by(locusID)`
-
dim(Oxygenpurple_filtered)
-
## [1] 24  2
-
names(Oxygenpurple_filtered)
-
## [1] "locusID" "gene"
-
list <- 1:24
-condition <- rep("Oxygen",length(list))
-moduleFil <- rep("darkorchid4",length(list))
-oxy_purple <- cbind(Oxygenpurple_filtered,condition,moduleFil)
-
-#####
-
-
-
-names(geneModuleMembership)
-
##  [1] "MMmidnightblue"   "MMdarkorchid4"    "MMyellow"         "MMgreenyellow"   
-##  [5] "MMblack"          "MMdarkgreen"      "MMdeeppink3"      "MMpink"          
-##  [9] "MMgrey60"         "MMbrown"          "MMplum4"          "MMskyblue"       
-## [13] "MMdarkcyan"       "MMdarkgoldenrod1" "MMgrey"
-
####genes of significance in purple module under Oxygen
-FilterGenes= abs(geneOxygenSignificance)> .2 & abs(geneModuleMembership$MMdarkorchid4)>.8
-table(FilterGenes)
-
## FilterGenes
-## FALSE  TRUE 
-##  2476    24
-
Oxygenpurple_filtered <- dimnames(data.frame(WGCNA_matrix))[[2]][FilterGenes]
-Oxygenpurple_filtered <- gsub("e.L", "e-L", Oxygenpurple_filtered)
-
-gene_names <- read.csv("gene_names_for_wgcna_matrix_files.csv",header=TRUE)
-gene_names <- gene_names[,2:3]
-names(gene_names)[1] <- "locusID"
-
-Oxygenpurple_filtered <- as.data.frame(Oxygenpurple_filtered)
-names(Oxygenpurple_filtered)[1] <- "locusID"
-str(Oxygenpurple_filtered)
-
## 'data.frame':    24 obs. of  1 variable:
-##  $ locusID: chr  "gene-L0Y14_RS09775" "gene-L0Y14_RS16350" "gene-L0Y14_RS12700" "gene-L0Y14_RS07425" ...
-
Oxygenpurple_filtered <- left_join(Oxygenpurple_filtered,gene_names)
-
## Joining with `by = join_by(locusID)`
-
dim(Oxygenpurple_filtered)
-
## [1] 24  2
-
names(Oxygenpurple_filtered)
-
## [1] "locusID" "gene"
-
list <- 1:24
-condition <- rep("Oxygen",length(list))
-moduleFil <- rep("purple",length(list))
-oxy_purple <- cbind(Oxygenpurple_filtered,condition,moduleFil)
-
-
-

build file for cytoscape

-
lnames = load(file = "wgcna_manual_network20220910.RData")
-lnames =load(file="count_matrix_condition_info_dataInput.RData")
-
-TOM = TOMsimilarityFromExpr(WGCNA_matrix, power = 8,
-                            networkType = "signed hybrid",
-                            TOMType = "signed")
-
## TOM calculation: adjacency..
-## ..will not use multithreading.
-##  Fraction of slow calculations: 0.000000
-## ..connectivity..
-## ..matrix multiplication (system BLAS)..
-## ..normalization..
-## ..done.
-
moduleColors = mergedColors
-
-###below we will just use all genes, but module=c("list of color modules you could filter by","eg.black","red")
-#module= moduleColors
-
-#module2=c("brown","cyan","midnightblue","blue","magenta","turquoise","black","salmon","pink","green","turqoise")
-          
-
-inModule = is.finite(match(moduleColors, moduleColors))
-
-#inModule2 = is.finite(match(moduleColors, module2))
-#table(inModule2)
-
-genes = colnames(WGCNA_matrix)
-modGenes = genes[inModule]
-#modGenes2=genes[inModule2]
-
-modTOM = TOM[inModule, inModule]
-#modTOM2 = TOM[inModule2,inModule2]
-dimnames(modTOM) = list(modGenes, modGenes)
-#dimnames(modTOM2) = list(modGenes2, modGenes2)
-
-
-genes_of_note <- read.csv("enzymes_of_note.csv")
-names(genes_of_note)
-
## [1] "broad_function"     "function."          "gene"              
-## [4] "locus_ID_andre_new" "X"
-
genes_of_note <- genes_of_note[,1:4]
-names(genes_of_note)[4] <- "modGenes"
-cool <- genes_of_note[,c(4,3,2,1)]
-
-write.csv(cool,"cool_genes.csv")
-
-
-modGenes3 <- as.data.frame(modGenes)
-names(modGenes3)[1] <- "modGenes"
-
-
-
-#modGenes4 <- as.data.frame(modGenes2)
-#names(modGenes4)[1] <- "modGenes"
-#str(modGenes4)
-
-
-
-gene_annos <- right_join(modGenes3,cool)
-
## Joining with `by = join_by(modGenes)`
-
#gene_annos2 <- right_join(modGenes4,cool)
-
-
-names(gene_annos)
-
## [1] "modGenes"       "gene"           "function."      "broad_function"
-
cool_gene_list <- gene_annos[,1]
-cool_gene_list <- as.data.frame(cool_gene_list)
-names(cool_gene_list)[1] <- "modGenes"
-
-
-#names(gene_annos2)
-#cool_gene_list2 <- gene_annos2[,1]
-#cool_gene_list2 <- as.data.frame(cool_gene_list2)
-#names(cool_gene_list2)[1] <- "modGenes"
-
-modcool <- inner_join(modGenes3,cool_gene_list)
-
## Joining with `by = join_by(modGenes)`
-
#modcool2 <- inner_join(modGenes4,cool_gene_list2)
-
-
-uncool <- modGenes3[ ! modGenes3$modGenes %in% modcool$modGenes,]
-#uncool2 <- modGenes4[ ! modGenes4$modGenes %in% modcool2$modGenes,]
-uncool <- as.data.frame(uncool)
-#uncool2 <- as.data.frame(uncool2)
-names(uncool)[1] <- "modGenes"
-#names(uncool2)[1] <- "modGenes"
-
-from_gff <- read.delim("GCF_023733635.1_ASM2373363v1_feature_table.txt")
-from_gff$locus_tag = paste0('gene-', from_gff$locus_tag)
-names(from_gff)[17] <- "modGenes"
-names(from_gff)
-
##  [1] "X..feature"              "class"                  
-##  [3] "assembly"                "assembly_unit"          
-##  [5] "seq_type"                "chromosome"             
-##  [7] "genomic_accession"       "start"                  
-##  [9] "end"                     "strand"                 
-## [11] "product_accession"       "non.redundant_refseq"   
-## [13] "related_accession"       "name"                   
-## [15] "symbol"                  "GeneID"                 
-## [17] "modGenes"                "feature_interval_length"
-## [19] "product_length"          "attributes"
-
names(from_gff)[1] <- "feature"
-gff2 <- subset(from_gff,feature=="CDS")
-names(gff2)
-
##  [1] "feature"                 "class"                  
-##  [3] "assembly"                "assembly_unit"          
-##  [5] "seq_type"                "chromosome"             
-##  [7] "genomic_accession"       "start"                  
-##  [9] "end"                     "strand"                 
-## [11] "product_accession"       "non.redundant_refseq"   
-## [13] "related_accession"       "name"                   
-## [15] "symbol"                  "GeneID"                 
-## [17] "modGenes"                "feature_interval_length"
-## [19] "product_length"          "attributes"
-
gff3 <- gff2[,c(17,14)]
-gff4 <- gff2[,c(17,14,11)]
-
-
-
-
-
-
-coolg <- left_join(modcool,cool)
-
## Joining with `by = join_by(modGenes)`
-
coolg <- left_join(coolg,gff4)
-
## Joining with `by = join_by(modGenes)`
-
coolg <- coolg[,c(1,2,6)]
-
-#coolg2 <- left_join(modcool2,cool)
-#coolg2 <- left_join(coolg2,gff4)
-#coolg2 <- coolg2[,c(1,2,6)]
-
-str(gff4)
-
## 'data.frame':    3213 obs. of  3 variables:
-##  $ modGenes         : chr  "gene-L0Y14_RS00005" "gene-L0Y14_RS00010" "gene-L0Y14_RS00015" "gene-L0Y14_RS00020" ...
-##  $ name             : chr  "chromosomal replication initiator protein DnaA" "DNA polymerase III subunit beta" "IS3 family transposase" "hypothetical protein" ...
-##  $ product_accession: chr  "WP_005958812.1" "WP_006474992.1" "" "WP_138921860.1" ...
-
#str(uncool2)
-#names(uncool)[1] <- "modGenes"
-uncoolg <- left_join(uncool,gff3)
-
## Joining with `by = join_by(modGenes)`
-
#uncoolg2 <- left_join(uncool2,gff4)
-names(uncoolg)[2] <- "gene"
-coolg <- coolg[,1:2]
-all_genes6 <- rbind(coolg,uncoolg)
-
-
-#names(uncool2)[1] <- "modGenes"
-#uncoolg2 <- left_join(uncool2,gff3)
-#uncoolg3 <- left_join(uncool2,gff4)
-#names(uncoolg3)[2] <- "gene"
-#names(coolg2)
-all_genes <- rbind(coolg,uncoolg)
-
-write.csv(all_genes6,"gene_names_for_wgcna_matrix_files_sig_module.csv")
-
-
-vec1 <- all_genes6[,1]
-
-vec2 <- modGenes
-head(vec1)
-
## [1] "gene-L0Y14_RS04070" "gene-L0Y14_RS01990" "gene-L0Y14_RS12365"
-## [4] "gene-L0Y14_RS06850" "gene-L0Y14_RS02365" "gene-L0Y14_RS07425"
-
reorder_idx <- match(vec2,vec1)
-
-all_genes <- vec1[reorder_idx]
-all_genes3 <- as.data.frame(all_genes)
-names(all_genes3)[1] <- "modGenes"
-
-genes2 <- left_join(all_genes3,all_genes6)
-
## Joining with `by = join_by(modGenes)`
-
genes777 <- genes2[,2]
-genes888 <- genes2[,2]
-
-
-
-dimnames(modTOM) = list(all_genes, all_genes)
-#str(modGenes2)
-#moduleColors[inModule]
-#head(modTOM2)
-#str(modTOM2)
-head(all_genes)
-
## [1] "gene-L0Y14_RS04070" "gene-L0Y14_RS09775" "gene-L0Y14_RS01990"
-## [4] "gene-L0Y14_RS09950" "gene-L0Y14_RS09955" "gene-L0Y14_RS12365"
-
#str(all_genes2)
-
-set.seed(123)
-###all_genes2 needs to be a list not dataframe!
-
-
-
-
-cyt_nofilter=cyt = exportNetworkToCytoscape(modTOM,
-                               edgeFile = "CytoscapeEdgeFile20230201.txt",
-                               nodeFile = "CytoscapeNodeFile20230201.txt",
-                               weighted = TRUE,
-                               threshold = 0.0,
-                               nodeNames = all_genes,
-                               altNodeNames = genes777,
-                               nodeAttr = moduleColors[inModule])
-
-
-
-
-
-
-cyt_all = exportNetworkToCytoscape(modTOM,
-                               edgeFile = "CytoscapeEdgeFile20231201_0.05.txt",
-                               nodeFile = "CytoscapeNodeFile20231201_0.05.txt",
-                               weighted = TRUE,
-                               threshold = 0.05,
-                               nodeNames = all_genes,
-                               altNodeNames = genes888,
-                               nodeAttr = moduleColors[inModule])
-
-
-
-file_path <- "CytoscapeNodeFile20231201_0.05.txt"  
-
-lines <- readLines(file_path)
-cleaned_data <- list()
-column_names <- NULL
-# Loop through the lines and process data as needed
-for (line in lines) {
-  # Split the line using tabs as delimiters
-  elements <- unlist(strsplit(line, "\t"))
-  
-  if (is.null(column_names)) {
-    # If column names are not set, use the first line as column names
-    column_names <- elements
-  } else {
-    # Process elements as needed (e.g., remove extra data, format, etc.)
-    # Append cleaned data to the list
-    cleaned_data <- append(cleaned_data, list(elements))
-  }
-}
-
-final_data <- as.data.frame(do.call(rbind, cleaned_data), stringsAsFactors = FALSE)
-colnames(final_data) <- column_names
-
-table(final_data[,3])
-
## 
-##          black          brown       darkcyan darkgoldenrod1      darkgreen 
-##             98            196            214             79            130 
-##    darkorchid4      deeppink3    greenyellow           grey         grey60 
-##             71            199            136             32            155 
-##   midnightblue           pink          plum4        skyblue         yellow 
-##             59            337             92            106             41
-
# Replace all instances of color names to match those used in paper in the entire data frame
-final_data <- lapply(final_data, function(x) ifelse(x == "darkorchid4", "purple", x))
-final_data <- as.data.frame(final_data)
-
-final_data <- lapply(final_data, function(x) ifelse(x == "plum4", "light_purple", x))
-final_data <- as.data.frame(final_data)
-
-final_data <- lapply(final_data, function(x) ifelse(x == "darkcyan", "teal", x))
-final_data <- as.data.frame(final_data)
-
-final_data <- lapply(final_data, function(x) ifelse(x == "deeppink3", "cherry", x))
-final_data <- as.data.frame(final_data)
-
-final_data <- lapply(final_data, function(x) ifelse(x == "yellow", "light_yellow", x))
-final_data <- as.data.frame(final_data)
-
-
-final_data <- lapply(final_data, function(x) ifelse(x == "darkgoldenrod1", "gold", x))
-final_data <- as.data.frame(final_data)
-
-final_data <- lapply(final_data, function(x) ifelse(x == "darkgreen","green", x))
-final_data <- as.data.frame(final_data)
-
-
-
-#write.table(final_data, file = "output_file.txt", sep = "\t", quote = FALSE, row.names = FALSE)
-
-
-

GSvsMS figures

-
###mean_significance_of_module_for_conditionSulfide############################################################
-
-
-
-par(mfrow = c(2,1))
-
-colorh1 <- mergedColors
-
-GS1=as.numeric(cor(biconditions$Sulfide,WGCNA_matrix, use="p"))
-head(GS1)
-
## [1]  0.11559790  0.27041835  0.34126255 -0.19662226 -0.06219137 -0.23047838
-
GeneSignificance=abs(GS1)
-head(GeneSignificance)
-
## [1] 0.11559790 0.27041835 0.34126255 0.19662226 0.06219137 0.23047838
-
str(colorh1)
-
##  chr [1:2500] "plum4" "darkorchid4" "midnightblue" "grey60" "grey60" ...
-
ModuleSignificance=tapply(GeneSignificance, colorh1, mean, na.rm=T)
-str(ModuleSignificance)
-
##  num [1:15(1d)] 0.138 0.193 0.309 0.368 0.133 ...
-##  - attr(*, "dimnames")=List of 1
-##   ..$ : chr [1:15] "black" "brown" "darkcyan" "darkgoldenrod1" ...
-
geneSulfideSignificance=as.data.frame(cor(WGCNA_matrix, Sulfide, use = "p"))
-SulPvalue = as.data.frame(corPvalueStudent(as.matrix(geneSulfideSignificance), nSamples))
-names(geneSulfideSignificance) = paste("Sul.", names(Sulfide), sep="");
-names(SulPvalue) = paste("p.Sul.", names(Sulfide), sep="");
-
-x_limits <- range(
-  abs(geneModuleMembership[moduleGenes7, column7]),
-  abs(geneModuleMembership[moduleGenes3, column3])
-)
-
-head(geneModuleMembership)
-
##                    MMmidnightblue MMdarkorchid4    MMyellow MMgreenyellow
-## gene-L0Y14_RS04070     -0.2978002    -0.4742295 -0.06228793   -0.12283434
-## gene-L0Y14_RS09775      0.6475847     0.8479211  0.16301606    0.38986765
-## gene-L0Y14_RS01990      0.7683016     0.2909617 -0.21720940    0.03127418
-## gene-L0Y14_RS09950      0.6250107     0.4452151  0.21104415    0.01837192
-## gene-L0Y14_RS09955      0.5993774     0.4088890 -0.08634607   -0.20976909
-## gene-L0Y14_RS12365      0.3632049     0.1311813 -0.06645301   -0.38765511
-##                        MMblack MMdarkgreen MMdeeppink3      MMpink   MMgrey60
-## gene-L0Y14_RS04070 -0.16971124 -0.05481641 -0.01719957 -0.01598375 -0.6563363
-## gene-L0Y14_RS09775  0.53953441  0.44692677  0.41381637  0.59720934  0.1830737
-## gene-L0Y14_RS01990  0.06471391 -0.09289025  0.08717597  0.11976742  0.2550905
-## gene-L0Y14_RS09950  0.43427234  0.22863891  0.17923724  0.10431595  0.4740972
-## gene-L0Y14_RS09955  0.23088121 -0.01544034  0.08834925 -0.04765631  0.5107174
-## gene-L0Y14_RS12365 -0.00202547 -0.12064102 -0.25455458 -0.31551755  0.7070535
-##                       MMbrown    MMplum4  MMskyblue  MMdarkcyan
-## gene-L0Y14_RS04070 -0.1384809  0.6483345  0.2465064  0.25589117
-## gene-L0Y14_RS09775 -0.2948900 -0.5986951 -0.2859013 -0.84047590
-## gene-L0Y14_RS01990  0.2365179 -0.1068451 -0.5795192 -0.47056380
-## gene-L0Y14_RS09950  0.2737269 -0.2366527 -0.6256319 -0.43994105
-## gene-L0Y14_RS09955  0.4747395 -0.3438988 -0.6771603 -0.39111707
-## gene-L0Y14_RS12365  0.4675125 -0.2198442 -0.5555037 -0.09885019
-##                    MMdarkgoldenrod1      MMgrey
-## gene-L0Y14_RS04070       -0.1725039 -0.34377603
-## gene-L0Y14_RS09775       -0.4423334  0.33425164
-## gene-L0Y14_RS01990       -0.5786492 -0.10188510
-## gene-L0Y14_RS09950       -0.3808909  0.07218213
-## gene-L0Y14_RS09955       -0.3278772  0.27804497
-## gene-L0Y14_RS12365       -0.2081241  0.06962918
-
# First Plot (plotModuleSignificance)
-plotModuleSignificance(GeneSignificance, colorh1,
-                       legend.text = TRUE,
-                       main = "Average gene significance in modules with sulfide variable",
-                       cex.axis = 1)
-
-
-help(plotModuleSignificance)
-
-table(colorh1)
-
## colorh1
-##          black          brown       darkcyan darkgoldenrod1      darkgreen 
-##            124            233            239             99            143 
-##    darkorchid4      deeppink3    greenyellow           grey         grey60 
-##            100            214            160            242            203 
-##   midnightblue           pink          plum4        skyblue         yellow 
-##             83            385            109            120             46
-
par(mar = c(2, 3, 2, 3))
-par(mfrow = c(2,1))
-

-
###mean_significance_of_module_for_condition_Oxygen#########################################################
-sizeGrWindow(8,7)
-par(mar = c(2, 3, 2, 3))
-par(mfrow = c(2,1))
-
-
-
-GS1=as.numeric(cor(biconditions$Oxygen,WGCNA_matrix, use="p"))
-GeneSignificance=abs(GS1)
-ModuleSignificance=tapply(GeneSignificance, colorh1, mean, na.rm=T)
-
-#sizeGrWindow(8,7)
-#par(mfrow = c(1,3))
-plotModuleSignificance(GeneSignificance,colorh1,
-                       legend.text=TRUE,
-                       main = "Gene significance in oxygen treatments across modules",
-                       cex.axis=1)
-help(plotModuleSignificance)
-
-# Plot for the purple module with suppressed x-axis
-
-#y_limits <- range(
- # abs(geneOxygenSignificance[moduleGenes14, 1]),
-  #abs(geneOxygenSignificance[moduleGenes5, 1])
-#)
-
-x_limits <- range(abs(geneModuleMembership[moduleGenes14, column14]),
-                  abs(geneModuleMembership[moduleGenes5, column5]))
-

-
-
-

upset r plot nitrogen metabolism

-
nitro <- read.csv("Merged Network_2 default node.csv")
-#names(nitro)
-
-nirS <- subset(nitro,nirS=="1")
-
-#nitro$shared.name
-
-#(nitro$neighbors666)
-
-rtca6 <-nitro[,c(133,69)] 
-  
-  
-nitro1 <- nitro[c(1:55,57:894),c(133,69,106,3,28,38,39,65,66,108,109,120,121,125)]
-#nitro1$shared.name
-#names(nitro1)
-
-names(nitro1)[3] <- "cbb"
-nitro1$cbb <-factor(ifelse(nitro1$cbb=="cbb",1,0))
-table(nitro1$cbb)
-
## 
-##   0   1 
-## 373 520
-
names(nitro1)[2] <- "rtca"
-nitro1$rtca <-factor(ifelse(nitro1$rtca=="rtca",1,0))
-table(nitro1$rtca)
-
## 
-##   0   1 
-## 745 148
-
indx <- sapply(nitro1, is.factor)
-nitro1[indx] <- lapply(nitro1[indx], function(x) as.numeric(as.character(x)))
-
-str(nitro1)
-
## 'data.frame':    893 obs. of  14 variables:
-##  $ shared.name     : chr  "gene-L0Y14_RS16060" "gene-L0Y14_RS01710" "gene-L0Y14_RS01760" "gene-L0Y14_RS01755" ...
-##  $ rtca            : num  0 0 0 0 0 0 0 0 0 0 ...
-##  $ cbb             : num  1 1 1 1 1 1 1 1 1 0 ...
-##  $ amtB            : int  1 1 1 1 1 1 1 1 1 1 ...
-##  $ gogat_glnA      : int  1 1 1 1 1 1 1 1 1 1 ...
-##  $ hoa1            : int  1 NA NA 1 NA 1 NA NA 1 NA ...
-##  $ hoa2            : int  NA NA NA NA NA 1 1 NA NA NA ...
-##  $ nap             : int  NA NA NA NA NA NA NA NA NA NA ...
-##  $ narGHI          : int  NA NA NA NA NA NA NA NA 1 NA ...
-##  $ nirBD_nasA_nark1: int  1 NA NA 1 1 NA NA NA 1 NA ...
-##  $ nirS            : int  NA NA NA NA NA NA 1 NA NA NA ...
-##  $ norCB           : int  NA NA NA NA NA NA NA NA NA NA ...
-##  $ nosZ_cluster    : int  NA NA NA NA NA NA NA NA NA NA ...
-##  $ otr             : int  NA NA NA NA NA 1 1 NA 1 NA ...
-
nitro1[is.na(nitro1)] = 0
-indx <- sapply(nitro1, is.factor)
-nitro1[indx] <- lapply(nitro1[indx], function(x) as.numeric(as.character(x)))
-
-upset(nitro1,sets = c("rtca","cbb" ,"amtB","gogat_glnA","hoa1","hoa2","otr","nirBD_nasA_nark1","nap","narGHI","nirS","norCB","nosZ_cluster"), mb.ratio = c(0.55, 0.45), order.by = "freq",empty.intersections = "off", keep.order = TRUE, order)
-

-
-
-

upset r plot amino acid metabolism metabolism

-
letslook <- read.csv("aa_cbb_rtca_merged_with_fixed_rtca_cbb default  node.csv")
-#names(letslook)
-letslook <- letslook[1:1052,c(131,60:109)]
-rtca1 <- subset(letslook, neighbors1000 =="rtca")
-#rtca1$shared.name
-#names(letslook)
-table(letslook$neighbors1000)
-
## 
-##      rtca 
-##  904  148
-
names(letslook)[5] <- "rtca"
-letslook$rtca <-factor(ifelse(letslook$rtca=="rtca",1,0))
-table(letslook$rtca)
-
## 
-##   0   1 
-## 904 148
-
#names(letslook)
-table(letslook$neighbhors184)
-
## 
-##                  lysine_synthesis 
-##              836              216
-
which(colnames(letslook)=="neighbhors184")
-
## [1] 3
-
names(letslook)[which(colnames(letslook)=="neighbhors184")] <- "lysine_synthesis"
-letslook$lysine_synthesis <-factor(ifelse(letslook$lysine_synthesis=="lysine_synthesis",1,0))
-table(letslook$lysine_synthesis)
-
## 
-##   0   1 
-## 836 216
-
#names(letslook)
-table(letslook$neighbors666)
-
## 
-##     cbb 
-## 532 520
-
which(colnames(letslook)=="neighbors666")
-
## [1] 51
-
names(letslook)[which(colnames(letslook)=="neighbors666")] <- "cbb"
-letslook$cbb <-factor(ifelse(letslook$cbb=="cbb",1,0))
-table(letslook$cbb)
-
## 
-##   0   1 
-## 532 520
-
#names(letslook)
-table(letslook$neighbors185)
-
## 
-##                    arginine_synthesis 
-##               1010                 42
-
names(letslook)[which(colnames(letslook)=="neighbors185")] <- "arginine_synthesis"
-letslook$arginine_synthesis <-factor(ifelse(letslook$arginine_synthesis=="arginine_synthesis",1,0))
-table(letslook$arginine_synthesis)
-
## 
-##    0    1 
-## 1010   42
-
#names(letslook)
-table(letslook$neighbors183)
-
## 
-##                     ornithine_synthesis 
-##                 648                 404
-
names(letslook)[which(colnames(letslook)=="neighbors183")] <- "ornithine_synthesis"
-letslook$ornithine_synthesis <-factor(ifelse(letslook$ornithine_synthesis=="ornithine_synthesis",1,0))
-table(letslook$ornithine_synthesis)
-
## 
-##   0   1 
-## 648 404
-
#names(letslook)
-table(letslook$neighbors182)
-
## 
-##                     citruline_synthesis 
-##                 941                 111
-
names(letslook)[which(colnames(letslook)=="neighbors182")] <- "citruline_synthesis"
-letslook$citruline_synthesis <-factor(ifelse(letslook$citruline_synthesis=="citruline_synthesis",1,0))
-table(letslook$citruline_synthesis)
-
## 
-##   0   1 
-## 941 111
-
#names(letslook)
-table(letslook$neighbors181)
-
## 
-##                  asparagine_synth 
-##              756              296
-
names(letslook)[which(colnames(letslook)=="neighbors181")] <- "asparagine_synth"
-letslook$asparagine_synth <-factor(ifelse(letslook$asparagine_synth=="asparagine_synth",1,0))
-table(letslook$asparagine_synth)
-
## 
-##   0   1 
-## 756 296
-
#names(letslook)
-table(letslook$neighbors186)
-
## 
-##           shikimate 
-##       674       378
-
names(letslook)[which(colnames(letslook)=="neighbors186")] <- "shikimate"
-letslook$shikimate <-factor(ifelse(letslook$shikimate=="shikimate",1,0))
-table(letslook$shikimate)
-
## 
-##   0   1 
-## 674 378
-
#names(letslook)
-table(letslook$neighbors187)
-
## 
-##                      tryptophan_synthesis 
-##                  961                   91
-
names(letslook)[which(colnames(letslook)=="neighbors187")] <- "tryptophan_synthesis"
-letslook$tryptophan_synthesis <-factor(ifelse(letslook$tryptophan_synthesis=="tryptophan_synthesis",1,0))
-table(letslook$tryptophan_synthesis)
-
## 
-##   0   1 
-## 961  91
-
#names(letslook)
-table(letslook$neighbors189)
-
## 
-##                              phenylalanine_tyrosine_synth 
-##                         1029                           23
-
names(letslook)[which(colnames(letslook)=="neighbors189")] <- "phenylalanine_tyrosine_synth"
-letslook$phenylalanine_tyrosine_synth <-factor(ifelse(letslook$phenylalanine_tyrosine_synth=="phenylalanine_tyrosine_synth",1,0))
-table(letslook$phenylalanine_tyrosine_synth)
-
## 
-##    0    1 
-## 1029   23
-
#names(letslook)
-table(letslook$neighbors200)
-
## 
-##                     histidine_synthesis 
-##                 836                 216
-
names(letslook)[which(colnames(letslook)=="neighbors200")] <- "histidine_synthesis"
-letslook$histidine_synthesis <-factor(ifelse(letslook$histidine_synthesis=="histidine_synthesis",1,0))
-table(letslook$histidine_synthesis)
-
## 
-##   0   1 
-## 836 216
-
#names(letslook)
-table(letslook$neighbors202)
-
## 
-##                  valine_synthesis 
-##             1003               49
-
names(letslook)[which(colnames(letslook)=="neighbors202")] <- "valine_synthesis"
-letslook$valine_synthesis <-factor(ifelse(letslook$valine_synthesis=="valine_synthesis",1,0))
-table(letslook$valine_synthesis)
-
## 
-##    0    1 
-## 1003   49
-
#names(letslook)
-table(letslook$neighbors190)
-
## 
-##                              luecine_isoluecine_synthesis 
-##                         1002                           50
-
names(letslook)[which(colnames(letslook)=="neighbors190")] <- "luecine_isoluecine_synthesis"
-letslook$luecine_isoluecine_synthesis <-factor(ifelse(letslook$luecine_isoluecine_synthesis=="luecine_isoluecine_synthesis",1,0))
-table(letslook$luecine_isoluecine_synthesis)
-
## 
-##    0    1 
-## 1002   50
-
#names(letslook)
-
-indx <- sapply(letslook, is.factor)
-letslook[indx] <- lapply(letslook[indx], function(x) as.numeric(as.character(x)))
-
-
-upset(letslook,sets = c("rtca","cbb" ,"lysine_synthesis","asparagine_synth","citruline_synthesis","ornithine_synthesis","arginine_synthesis","shikimate","tryptophan_synthesis","phenylalanine_tyrosine_synth","luecine_isoluecine_synthesis","histidine_synthesis","valine_synthesis"), mb.ratio = c(0.55, 0.45), order.by = "freq",empty.intersections = "off", keep.order = TRUE)
-

-
-
-

upset r plot sulfide metabolism

-
sulfy <- read.csv("cfix_sulfide_merged_binary default  node.csv")
-#names(sulfy)
-
-sulfy1 <- sulfy[1:890,c(131:135,127,129,5,22,27,68,107)]
-
-which(colnames(sulfy1)=="sbp")
-
## [1] 7
-
#names(sulfy1)
-names(sulfy1)[12] <- "cbb"
-sulfy1$cbb <-factor(ifelse(sulfy1$cbb=="cbb",1,0))
-table(sulfy1$cbb)
-
## 
-##   0   1 
-## 370 520
-
names(sulfy1)[11] <- "rtca"
-sulfy1$rtca <-factor(ifelse(sulfy1$rtca=="rtca",1,0))
-table(sulfy1$rtca)
-
## 
-##   0   1 
-## 741 149
-
indx <- sapply(sulfy1, is.factor)
-sulfy1[indx] <- lapply(sulfy1[indx], function(x) as.numeric(as.character(x)))
-
-#str(sulfy1)
-sulfy1[is.na(sulfy1)] = 0
-indx <- sapply(sulfy1, is.factor)
-sulfy1[indx] <- lapply(sulfy1[indx], function(x) as.numeric(as.character(x)))
-
-#names(sulfy1)
-upset(sulfy1,sets = c("rtca","cbb" ,"sqr","fccA","sbp","sox","soe","sreA","dsr_tusA","apr_sat","qmoABhdrCB"), mb.ratio = c(0.55, 0.45), order.by = "freq",empty.intersections = "off", keep.order = TRUE)
-

-
-
-

upset r hydrogenases, atpases, etc

-
electron <- read.csv("rtca_cbb_hyd_etc_atpases_merged_binary default  node.csv")
-#names(electron)
-
-electro1 <- electron[,c(94,68,70,12,25,38,39,63,84,86,106,107)]
-
-#names(electro1)
-names(electro1)[3] <- "cbb"
-electro1$cbb <-factor(ifelse(electro1$cbb=="cbb",1,0))
-table(electro1$cbb)
-
## 
-##   0   1 
-## 485 520
-
names(electro1)[2] <- "rtca"
-electro1$rtca <-factor(ifelse(electro1$rtca=="rtca",1,0))
-table(electro1$rtca)
-
## 
-##   0   1 
-## 856 149
-
indx <- sapply(electro1, is.factor)
-electro1[indx] <- lapply(electro1[indx], function(x) as.numeric(as.character(x)))
-
-#str(electro1)
-electro1[is.na(electro1)] = 0
-indx <- sapply(electro1, is.factor)
-electro1[indx] <- lapply(electro1[indx], function(x) as.numeric(as.character(x)))
-
-#names(electro1)
-upset(electro1,sets = c("rtca","cbb" ,"nuo","pet","cco","mbx","F1atpase","V1atpase","V2atpase","hyd1e","hyd3b"), mb.ratio = c(0.55, 0.45), order.by = "freq",empty.intersections = "off", keep.order = TRUE)
-

-
-

DE expression analysis of significant genes, classified as hubs, in -significant modules

-
de<-read.csv('entotal_202200815_use_this_corrected_locus_tags.csv',header=TRUE,stringsAsFactors=FALSE)
-
-sig<-de[de$adj.P.Val<=0.05,]
-sig0<-sig[sig$logFC<=-1,]
-sig1<-sig[sig$logFC>=1,]
-sig<-rbind(sig0,sig1)
-
-
-a<-subset(sig,comparison=='LS_vs_HS_noN_HO')
-b<-subset(sig,comparison=='LS_vs_HS_N_HO')
-c<-subset(sig,comparison=='H2_vs_HS_N_HO')
-d<-subset(sig,comparison=='H2_vs_HS_noN_HO')
-e<-subset(sig,comparison=='SW_vs_HS_noN_HO')
-f<-subset(sig,comparison=='H2_vs_LS_N_HO')
-g<-subset(sig,comparison=='H2_vs_LS_noN_HO')
-h<-subset(sig,comparison=='SW_vs_LS_noN_HO')
-
-#i<-subset(sig,comparison=='LO_vs_HO_H2_N')no de!!!
-j <- subset(sig,comparison=='LO_vs_HO_H2_noN')
-k <- subset(sig,comparison=='LO_vs_HO_HS_N')
-
-l <- subset(sig,comparison=='noN_vs_N_H2_HO')
-m <- subset(sig,comparison=='noN_vs_N_HS_HO')
-n <- subset(sig,comparison=='noN_vs_N_LS_HO')
-
-sulfide<-rbind(a,b,c,d,e,f,g,h)
-
-
-no_hydrogen_sulfide<-rbind(a,b,e,h)
-
-sulfide_nolowSvsH2 <- rbind(a,b,c,d,e)
-
-oxygen <- rbind(j,k)
-
-nitrogen <- rbind(l,m,n)
-
-my.colors.sulfide<-
-scale_color_manual(values=c(
-'LS_vs_HS_noN_HO'='#A982AA',
-'LS_vs_HS_N_HO'='#8E4162',
-'H2_vs_HS_N_HO'='#404E7C',
-'H2_vs_HS_noN_HO'='#6C91BF',
-'SW_vs_HS_noN_HO'='#EEA58C',
-'H2_vs_LS_N_HO'='#7EA172',
-'H2_vs_LS_noN_HO'='#C7CB85',
-'SW_vs_LS_noN_HO'='#000000'))
-
-
-my.colors.oxygen <-
-scale_color_manual(values=c(
-  'LO_vs_HO_HS_N'="#022b3a",
-  'LO_vs_HO_H2_noN'="#2a9d8f"))
-
-my.colors.nitrogen <- 
-  scale_color_manual(values=c(
-    'noN_vs_N_H2_HO'="#264653",
-    'noN_vs_N_HS_HO'="#2a9d8f",
-    'noN_vs_N_LS_HO'="#e9c46a"))
-    
-    
-my.colors.sulfide_nolowSvsH2 <- 
-  scale_color_manual(values=c(
-  'LS_vs_HS_noN_HO'='#A982AA',
-'LS_vs_HS_N_HO'='#8E4162',
-'H2_vs_HS_N_HO'='#404E7C',
-'H2_vs_HS_noN_HO'='#6C91BF',
-'SW_vs_HS_noN_HO'='#EEA58C'))
-
-
-
-my.theme <- theme(axis.title= element_text(face = "plain",size = 20),
-                  axis.text = element_text(face = "plain",size = 15, colour = "black"),
-                  plot.caption = element_text(hjust = 0),
-                  panel.background=element_rect(fill="white"),
-                  panel.border=element_rect(colour="black",fill=NA,linewidth = 1),
-                  axis.text.x = element_text(angle = 90),
-                  axis.line.x.top = element_blank(),
-                  )
-
-#####setup for individual comparisons
-
-sig$Threshold<-cut(sig$logFC,breaks=c(-Inf,1,Inf),labels=c("<=1",">1"))
-
-LS_vs_HS_noN_HO <-subset(sig,comparison=='LS_vs_HS_noN_HO')
-LS_vs_HS_N_HO <-subset(sig,comparison=='LS_vs_HS_N_HO')
-H2_vs_HS_N_HO <-subset(sig,comparison=='H2_vs_HS_N_HO')
-H2_vs_HS_noN_HO <-subset(sig,comparison=='H2_vs_HS_noN_HO')
-SW_vs_HS_noN_HO <-subset(sig,comparison=='SW_vs_HS_noN_HO')
-LO_vs_HO_H2_noN<- subset(sig,comparison=='LO_vs_HO_H2_noN')
-LO_vs_HO_HS_N <- subset(sig,comparison=='LO_vs_HO_HS_N')
-
-H2_vs_LS_N_HO<-subset(sig,comparison=='H2_vs_LS_N_HO')
-H2_vs_LS_noN_HO<-subset(sig,comparison=='H2_vs_LS_noN_HO')
-SW_vs_LS_noN_HO<-subset(sig,comparison=='SW_vs_LS_noN_HO')
-
-noN_vs_N_H2_HO <- subset(sig,comparison=='noN_vs_N_H2_HO')
-noN_vs_N_HS_HO <- subset(sig,comparison=='noN_vs_N_HS_HO')
-noN_vs_N_LS_HO <- subset(sig,comparison=='noN_vs_N_LS_HO')
-
-my.colors.ind.comparisons <- 
-  scale_color_manual(values=c(
-  '>1'='#F2444D',
-'<=1'='#3BD4CC'))
-
-###### sulfide in gold 
-
-
-names(sulfide_hub_gold)
-
## [1] "locusID"   "gene"      "condition" "moduleFil"
-
names(sulfide_hub_gold)[2] <- "Predicted_function"
-names(sulfide_nolowSvsH2)
-
##  [1] "X.1"                          "locus_ID_andre_new"          
-##  [3] "geneid"                       "locus_ID_andre_old"          
-##  [5] "start.x"                      "stop.x"                      
-##  [7] "gene_letters_andre"           "locus_tag_andre"             
-##  [9] "accession_andre"              "start"                       
-## [11] "stop"                         "notes"                       
-## [13] "Ontology"                     "Inference"                   
-## [15] "locus_tag"                    "product"                     
-## [17] "protein_id"                   "gene_letters.notes"          
-## [19] "NCBI_ID"                      "annotations_from_andre"      
-## [21] "target_id"                    "Accession"                   
-## [23] "Predicted_function"           "functional_role"             
-## [25] "sub_role"                     "sub_role2"                   
-## [27] "Protein"                      "Accession...Protein"         
-## [29] "seed_ortholog_evalue"         "seed_ortholog_score"         
-## [31] "best_tax_level"               "Preferred_name"              
-## [33] "GOs"                          "X"                           
-## [35] "KEGG_ko"                      "KEGG_Pathway"                
-## [37] "KEGG_Module"                  "KEGG_Reaction"               
-## [39] "KEGG_rclass"                  "BRITE"                       
-## [41] "KEGG_TC"                      "CAZy"                        
-## [43] "BiGG_Reaction"                "tax_scope"                   
-## [45] "eggNOG_OGs"                   "bestOG"                      
-## [47] "COG_Functional_Category"      "eggNOG_free_text_description"
-## [49] "logFC"                        "AveExpr"                     
-## [51] "t"                            "P.value"                     
-## [53] "adj.P.Val"                    "B"                           
-## [55] "comparison"
-
names(sulfide_nolowSvsH2)[2] <- "locusID"
-
-names(sulfide_nolowSvsH2)
-
##  [1] "X.1"                          "locusID"                     
-##  [3] "geneid"                       "locus_ID_andre_old"          
-##  [5] "start.x"                      "stop.x"                      
-##  [7] "gene_letters_andre"           "locus_tag_andre"             
-##  [9] "accession_andre"              "start"                       
-## [11] "stop"                         "notes"                       
-## [13] "Ontology"                     "Inference"                   
-## [15] "locus_tag"                    "product"                     
-## [17] "protein_id"                   "gene_letters.notes"          
-## [19] "NCBI_ID"                      "annotations_from_andre"      
-## [21] "target_id"                    "Accession"                   
-## [23] "Predicted_function"           "functional_role"             
-## [25] "sub_role"                     "sub_role2"                   
-## [27] "Protein"                      "Accession...Protein"         
-## [29] "seed_ortholog_evalue"         "seed_ortholog_score"         
-## [31] "best_tax_level"               "Preferred_name"              
-## [33] "GOs"                          "X"                           
-## [35] "KEGG_ko"                      "KEGG_Pathway"                
-## [37] "KEGG_Module"                  "KEGG_Reaction"               
-## [39] "KEGG_rclass"                  "BRITE"                       
-## [41] "KEGG_TC"                      "CAZy"                        
-## [43] "BiGG_Reaction"                "tax_scope"                   
-## [45] "eggNOG_OGs"                   "bestOG"                      
-## [47] "COG_Functional_Category"      "eggNOG_free_text_description"
-## [49] "logFC"                        "AveExpr"                     
-## [51] "t"                            "P.value"                     
-## [53] "adj.P.Val"                    "B"                           
-## [55] "comparison"
-
sulfide_comp_DE <- sulfide_nolowSvsH2[,c(2,3,5,6,7,9:14,16:55)]
-
-shg_vector <- sulfide_hub_gold[,1]
-shg_vector
-
##  [1] "gene-L0Y14_RS01985" "gene-L0Y14_RS07695" "gene-L0Y14_RS00080"
-##  [4] "gene-L0Y14_RS12030" "gene-L0Y14_RS12025" "gene-L0Y14_RS12035"
-##  [7] "gene-L0Y14_RS11990" "gene-L0Y14_RS11995" "gene-L0Y14_RS12020"
-## [10] "gene-L0Y14_RS12015" "gene-L0Y14_RS12045" "gene-L0Y14_RS12000"
-## [13] "gene-L0Y14_RS11980" "gene-L0Y14_RS11975" "gene-L0Y14_RS11965"
-## [16] "gene-L0Y14_RS12040" "gene-L0Y14_RS11985" "gene-L0Y14_RS12010"
-## [19] "gene-L0Y14_RS11255" "gene-L0Y14_RS10315" "gene-L0Y14_RS10410"
-## [22] "gene-L0Y14_RS12005"
-
shg_DE <- subset(sulfide_comp_DE, locusID %in% shg_vector)
-
-shg_DE_names_added <- shg_DE %>% 
-  left_join(sulfide_hub_gold, by = "locusID", suffix = c("", "_sulfide_hub_gold"))
-
-names(shg_DE_names_added)
-
##  [1] "locusID"                             "geneid"                             
-##  [3] "start.x"                             "stop.x"                             
-##  [5] "gene_letters_andre"                  "accession_andre"                    
-##  [7] "start"                               "stop"                               
-##  [9] "notes"                               "Ontology"                           
-## [11] "Inference"                           "product"                            
-## [13] "protein_id"                          "gene_letters.notes"                 
-## [15] "NCBI_ID"                             "annotations_from_andre"             
-## [17] "target_id"                           "Accession"                          
-## [19] "Predicted_function"                  "functional_role"                    
-## [21] "sub_role"                            "sub_role2"                          
-## [23] "Protein"                             "Accession...Protein"                
-## [25] "seed_ortholog_evalue"                "seed_ortholog_score"                
-## [27] "best_tax_level"                      "Preferred_name"                     
-## [29] "GOs"                                 "X"                                  
-## [31] "KEGG_ko"                             "KEGG_Pathway"                       
-## [33] "KEGG_Module"                         "KEGG_Reaction"                      
-## [35] "KEGG_rclass"                         "BRITE"                              
-## [37] "KEGG_TC"                             "CAZy"                               
-## [39] "BiGG_Reaction"                       "tax_scope"                          
-## [41] "eggNOG_OGs"                          "bestOG"                             
-## [43] "COG_Functional_Category"             "eggNOG_free_text_description"       
-## [45] "logFC"                               "AveExpr"                            
-## [47] "t"                                   "P.value"                            
-## [49] "adj.P.Val"                           "B"                                  
-## [51] "comparison"                          "Predicted_function_sulfide_hub_gold"
-## [53] "condition"                           "moduleFil"
-
tesstes <- shg_DE_names_added[,c(19,28,52)]
-shg_DE2 <- shg_DE_names_added[,c(1:18,20:54)]
-names(shg_DE2)
-
##  [1] "locusID"                             "geneid"                             
-##  [3] "start.x"                             "stop.x"                             
-##  [5] "gene_letters_andre"                  "accession_andre"                    
-##  [7] "start"                               "stop"                               
-##  [9] "notes"                               "Ontology"                           
-## [11] "Inference"                           "product"                            
-## [13] "protein_id"                          "gene_letters.notes"                 
-## [15] "NCBI_ID"                             "annotations_from_andre"             
-## [17] "target_id"                           "Accession"                          
-## [19] "functional_role"                     "sub_role"                           
-## [21] "sub_role2"                           "Protein"                            
-## [23] "Accession...Protein"                 "seed_ortholog_evalue"               
-## [25] "seed_ortholog_score"                 "best_tax_level"                     
-## [27] "Preferred_name"                      "GOs"                                
-## [29] "X"                                   "KEGG_ko"                            
-## [31] "KEGG_Pathway"                        "KEGG_Module"                        
-## [33] "KEGG_Reaction"                       "KEGG_rclass"                        
-## [35] "BRITE"                               "KEGG_TC"                            
-## [37] "CAZy"                                "BiGG_Reaction"                      
-## [39] "tax_scope"                           "eggNOG_OGs"                         
-## [41] "bestOG"                              "COG_Functional_Category"            
-## [43] "eggNOG_free_text_description"        "logFC"                              
-## [45] "AveExpr"                             "t"                                  
-## [47] "P.value"                             "adj.P.Val"                          
-## [49] "B"                                   "comparison"                         
-## [51] "Predicted_function_sulfide_hub_gold" "condition"                          
-## [53] "moduleFil"
-
names(shg_DE2)[51] <- "Predicted_function"
-
-#write_csv(shg_DE2,"shg_DE2.csv")
-
-###now I have to reorder this nonsense
-list(unique(shg_DE2$locusID))
-
## [[1]]
-##  [1] "gene-L0Y14_RS10410" "gene-L0Y14_RS00080" "gene-L0Y14_RS01985"
-##  [4] "gene-L0Y14_RS07695" "gene-L0Y14_RS11995" "gene-L0Y14_RS11990"
-##  [7] "gene-L0Y14_RS11985" "gene-L0Y14_RS11965" "gene-L0Y14_RS10315"
-## [10] "gene-L0Y14_RS11255" "gene-L0Y14_RS11980" "gene-L0Y14_RS12045"
-## [13] "gene-L0Y14_RS12000" "gene-L0Y14_RS11975" "gene-L0Y14_RS12005"
-## [16] "gene-L0Y14_RS12015" "gene-L0Y14_RS12020" "gene-L0Y14_RS12010"
-## [19] "gene-L0Y14_RS12040" "gene-L0Y14_RS12030" "gene-L0Y14_RS12025"
-## [22] "gene-L0Y14_RS12035"
-
list(unique(shg_DE2$Predicted_function))
-
## [[1]]
-##  [1] "leuC"                               "nitrogen regulation protein NR(II)"
-##  [3] "nirB"                               "frdB_1"                            
-##  [5] "flxD"                               "hdrA"                              
-##  [7] "korC"                               "hdrB"                              
-##  [9] "U32 family peptidase"               "DNA repair protein RecN"           
-## [11] "korB_1"                             "mdh_1"                             
-## [13] "flxC"                               "korA_1"                            
-## [15] "flxB"                               "aclB_full"                         
-## [17] "aclA"                               "flxA"                              
-## [19] "pntB"                               "acnA"                              
-## [21] "idh"                                "pntA"
-
order <- c("hdrB","korA_1","korB_1","korC","hdrA","flxD","flxC","flxB","flxA","aclB_full","aclA","idh","acnA","pntA","pntB","mdh_1","frdB_1","nitrogen regulation protein NR(II)","nirB","leuC","U32 family peptidase", "DNA repair protein RecN")
-
-
-
-shg_DE.plot <-ggplot(shg_DE2,
-aes(x=factor(Predicted_function,levels = order), y=logFC,color=comparison))
-
-shg_DE.plot +
-  ggtitle("Hub genes in gold module")+
-  geom_jitter(size=5,shape=18, width = 0.32, height = 0.32)+
-  geom_vline(xintercept = c(1.5,2.5,3.5, 4.5, 5.5,6.5,7.5,8.5,9.5,10.5,11.5,12.5,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5), color = "grey") +
-  my.colors.sulfide_nolowSvsH2 +
-  my.theme 
-

-
####now doing this for sulfide_teal, the otehr module that is significant for sulfide
-names(sul.teal)
-
## [1] "locusID"   "gene"      "condition" "moduleFil"
-
names(sul.teal)[2] <- "Predicted_function"
-
-sht_vector <- sul.teal[,1]
-#sht_vector
-sht_DE <- subset(sulfide_comp_DE, locusID %in% sht_vector)
-#names(sht_DE)
-
-sht_DE_names_added <- sht_DE %>% 
-  left_join(sul.teal, by = "locusID", suffix = c("", "_sulfide_hub_teal"))
-
-#names(sht_DE_names_added)
-sht_DE2 <- sht_DE_names_added[,c(1:18,20:54)]
-#names(sht_DE2)
-names(sht_DE2)[51] <- "Predicted_function"
-
-
-names_shtDE <- unique(sht_DE$locusID)
-
-setdiff(names_shtDE,sht_vector)
-
## character(0)
-
### shortening some of the names
-
-#write.csv(sht_DE2,"sht_DE2.csv")
-getwd()
-
## [1] "/Users/jessicamitchell/Library/CloudStorage/OneDrive-HarvardUniversity/OD_documents/Cfixpaper"
-
###pulling up dataframe with gene names and functions added
-sht_DE3 <- read.csv("sht_DE2.csv")
-
-
-
-sht_DE3.plot <-ggplot(sht_DE3,
-aes(x=factor(Preferred_name), y=logFC,color=comparison))
-
-sht_DE3.plot +
-  ggtitle("Hub genes in teal module")+
-  geom_point(size=5,shape=18) +
-  my.colors.sulfide_nolowSvsH2 +
-  my.theme 
-

-
###Going to have to add my own short names to make above figure run right...
-###### oxygen in purple 
-
-names(oxy_purple)
-
## [1] "locusID"   "gene"      "condition" "moduleFil"
-
names(oxy_purple)[2] <- "Predicted_function"
-
-names(oxygen)[2] <- "locusID"
-
-ohp_vector <- oxy_purple[,1]
-#ohp_vector
-ohp_DE <- subset(oxygen, locusID %in% ohp_vector)
-#names(ohp_DE)
-
-###just the locusID and Preferred_name 
-
-ohp_DE1 <- ohp_DE[,c(1,6,28)]
-ohp_DE2 <- unique(ohp_DE1)
-ohp3 <- merge(ohp_DE2, oxy_purple)
-
-names(ohp3)[3] <- "short.names"
-
-#names(ohp_DE)
-#write_csv(ohp3, "ohp4.csv")
-#added short.names and notes
-ohp4 <- read.csv("ohp4.csv")
-
-ohp_DE3 <- merge(ohp_DE,ohp4,  by = "locusID")
-
-ohp_DE.plot <-ggplot(ohp_DE3,
-aes(x=factor(short.names), y=logFC,color=comparison))
-
-
-
-
-
-ohp_DE.plot +
-  ggtitle("Hub genes in purple module")+
-  geom_point(size=5,shape=18) +
-  my.colors.oxygen +
-  my.theme 
-

-
###### oxygen in black
-
-#names(oxy_b)
-
-names(oxy_b)[2] <- "Predicted_function"
-
-
-
-ohb_vector <- oxy_b[,1]
-ohb_vector
-
##  [1] "gene-L0Y14_RS14085" "gene-L0Y14_RS12670" "gene-L0Y14_RS08400"
-##  [4] "gene-L0Y14_RS05880" "gene-L0Y14_RS13180" "gene-L0Y14_RS15950"
-##  [7] "gene-L0Y14_RS14595" "gene-L0Y14_RS13130" "gene-L0Y14_RS14970"
-## [10] "gene-L0Y14_RS03820" "gene-L0Y14_RS07270" "gene-L0Y14_RS04045"
-## [13] "gene-L0Y14_RS03375" "gene-L0Y14_RS14700" "gene-L0Y14_RS11330"
-## [16] "gene-L0Y14_RS15890" "gene-L0Y14_RS06760" "gene-L0Y14_RS00450"
-## [19] "gene-L0Y14_RS08420" "gene-L0Y14_RS10875" "gene-L0Y14_RS00105"
-## [22] "gene-L0Y14_RS01330" "gene-L0Y14_RS16130" "gene-L0Y14_RS02180"
-## [25] "gene-L0Y14_RS08830" "gene-L0Y14_RS06620" "gene-L0Y14_RS01145"
-## [28] "gene-L0Y14_RS11140" "gene-L0Y14_RS14660" "gene-L0Y14_RS09840"
-## [31] "gene-L0Y14_RS03075" "gene-L0Y14_RS06585" "gene-L0Y14_RS08700"
-
ohb_DE <- subset(oxygen, locusID %in% ohb_vector)
-#names(ohb_DE)
-
-###just the locusID,accession and Preferred_name 
-
-ohb <- ohb_DE[,c(2,9,20,32)]
-
-ohb <- unique(ohb)
-names(ohb)[4] <- "short.names"
-
-
-#write_csv(ohb, "ohb2.csv")
-#added short.names and notes
-ohb2 <- read.csv("ohb2.csv")
-
-ohb2_DE3 <- merge(ohb_DE,ohb2,  by = "locusID")
-
-ohb2_DE3.plot <-ggplot(ohb2_DE3,
-aes(x=factor(short.names), y=logFC,color=comparison))
-
-
-
-
-
-ohb2_DE3.plot +
-  ggtitle("Hub genes in black module")+
-  geom_point(size=5,shape=18) +
-  my.colors.oxygen +
-  my.theme 
-

-
-

average logFCs

-
######putting all of this together, so that I have a workable file I can use in adobe illustrator
-
-all_together <- read.csv("wgcna_trait_module_hub_genes.csv")
-names(all_together)
-
## [1] "locusID"       "short.names"   "moduleFil"     "func_category"
-## [5] "X"
-
all_together <- all_together[,1:4]
-
-
-
-shg_DE_av <- shg_DE2[,c(1,44,50,53)]
-
-head(shg_DE_av)
-
##              locusID     logFC      comparison      moduleFil
-## 1 gene-L0Y14_RS10410 -6.607549 LS_vs_HS_noN_HO darkgoldenrod1
-## 2 gene-L0Y14_RS00080 -3.742340 LS_vs_HS_noN_HO darkgoldenrod1
-## 3 gene-L0Y14_RS01985 -5.245126 LS_vs_HS_noN_HO darkgoldenrod1
-## 4 gene-L0Y14_RS07695  4.361391 LS_vs_HS_noN_HO darkgoldenrod1
-## 5 gene-L0Y14_RS11995  2.598792 LS_vs_HS_noN_HO darkgoldenrod1
-## 6 gene-L0Y14_RS11990  2.739793 LS_vs_HS_noN_HO darkgoldenrod1
-
# Calculate the average logFC for each locusID in gold sulfide 
-
-avg_logFC_df <- shg_DE_av %>%
-  group_by(locusID, moduleFil) %>%
-  summarise(
-    avg_logFC = mean(logFC, na.rm = TRUE),
-    sd_logFC = sd(logFC, na.rm = TRUE)
-  ) %>%
-  ungroup()
-
## `summarise()` has grouped output by 'locusID'. You can override using the
-## `.groups` argument.
-
avg_logFC_df <- shg_DE_av %>%
-  group_by(locusID, moduleFil) %>%
-  summarise(
-    n = n(), # Add the count of non-NA values
-    avg_logFC = mean(logFC, na.rm = TRUE),
-    sd_logFC = sd(logFC, na.rm = TRUE)
-  ) %>%
-  ungroup()
-
## `summarise()` has grouped output by 'locusID'. You can override using the
-## `.groups` argument.
-
# Print the result
-head(avg_logFC_df)
-
## # A tibble: 6 × 5
-##   locusID            moduleFil          n avg_logFC sd_logFC
-##   <chr>              <chr>          <int>     <dbl>    <dbl>
-## 1 gene-L0Y14_RS00080 darkgoldenrod1     3     -4.58    0.732
-## 2 gene-L0Y14_RS01985 darkgoldenrod1     3     -5.89    0.580
-## 3 gene-L0Y14_RS07695 darkgoldenrod1     4      5.48    1.11 
-## 4 gene-L0Y14_RS10315 darkgoldenrod1     3      2.38    0.487
-## 5 gene-L0Y14_RS10410 darkgoldenrod1     3     -5.97    0.737
-## 6 gene-L0Y14_RS11255 darkgoldenrod1     4      2.70    0.991
-
avg_logFC_df <- avg_logFC_df %>%
-  mutate(treatment_logFC_ave = "logFC_sulfide_limiting")
-
-# Print the result
-head(avg_logFC_df)
-
## # A tibble: 6 × 6
-##   locusID            moduleFil          n avg_logFC sd_logFC treatment_logFC_ave
-##   <chr>              <chr>          <int>     <dbl>    <dbl> <chr>              
-## 1 gene-L0Y14_RS00080 darkgoldenrod1     3     -4.58    0.732 logFC_sulfide_limi…
-## 2 gene-L0Y14_RS01985 darkgoldenrod1     3     -5.89    0.580 logFC_sulfide_limi…
-## 3 gene-L0Y14_RS07695 darkgoldenrod1     4      5.48    1.11  logFC_sulfide_limi…
-## 4 gene-L0Y14_RS10315 darkgoldenrod1     3      2.38    0.487 logFC_sulfide_limi…
-## 5 gene-L0Y14_RS10410 darkgoldenrod1     3     -5.97    0.737 logFC_sulfide_limi…
-## 6 gene-L0Y14_RS11255 darkgoldenrod1     4      2.70    0.991 logFC_sulfide_limi…
-
shg_DE_av2 <- merge(avg_logFC_df,all_together, by = "locusID")
-
-#write.csv(shg_DE_av2,"shg_DE_av2.csv")
-dim(shg_DE_av2)
-
## [1] 22  9
-
n_val_sug <- shg_DE_av2[,c(1,3)]
-
-####added "broad_category" column
-shg_DE_av3 <- read.csv("shg_DE_av2.csv")
-dim(shg_DE_av3)
-
## [1] 22 10
-
####now doing this for the teal sulfide module 
-#names(sht_DE3)
-v <- unique(sht_DE3$locusID)
-sht_DE3_av <- sht_DE3[,c(2,45,51,54)]
-
-head(sht_DE3_av)
-
##              locusID     logFC      comparison moduleFil
-## 1 gene-L0Y14_RS00400 -5.671978 LS_vs_HS_noN_HO      teal
-## 2 gene-L0Y14_RS01755 -1.093415 LS_vs_HS_noN_HO      teal
-## 3 gene-L0Y14_RS04610 -1.158590 LS_vs_HS_noN_HO      teal
-## 4 gene-L0Y14_RS05075 -1.192507 LS_vs_HS_noN_HO      teal
-## 5 gene-L0Y14_RS05080 -2.083575 LS_vs_HS_noN_HO      teal
-## 6 gene-L0Y14_RS07060 -1.072405 LS_vs_HS_noN_HO      teal
-
avg_logFC_df <- sht_DE3_av %>%
-  group_by(locusID, moduleFil) %>%
-  summarise(
-    avg_logFC = mean(logFC, na.rm = TRUE),
-    sd_logFC = sd(logFC, na.rm = TRUE)
-  ) %>%
-  ungroup()
-
## `summarise()` has grouped output by 'locusID'. You can override using the
-## `.groups` argument.
-
avg_logFC_df <- sht_DE3_av %>%
-  group_by(locusID, moduleFil) %>%
-  summarise(
-    n = n(), # Add the count of non-NA values
-    avg_logFC = mean(logFC, na.rm = TRUE),
-    sd_logFC = sd(logFC, na.rm = TRUE)
-  ) %>%
-  ungroup()
-
## `summarise()` has grouped output by 'locusID'. You can override using the
-## `.groups` argument.
-
# Print the result
-head(avg_logFC_df)
-
## # A tibble: 6 × 5
-##   locusID            moduleFil     n avg_logFC sd_logFC
-##   <chr>              <chr>     <int>     <dbl>    <dbl>
-## 1 gene-L0Y14_RS00400 teal          4     -6.35   0.877 
-## 2 gene-L0Y14_RS00515 teal          3      1.86   0.534 
-## 3 gene-L0Y14_RS00525 teal          4      2.52   0.570 
-## 4 gene-L0Y14_RS00625 teal          3      1.86   0.534 
-## 5 gene-L0Y14_RS00655 teal          4      2.62   0.768 
-## 6 gene-L0Y14_RS01660 teal          3     -2.42   0.0345
-
avg_logFC_df <- avg_logFC_df %>%
-  mutate(treatment_logFC_ave = "logFC_sulfide_limiting")
-
-# Print the result
-head(avg_logFC_df)
-
## # A tibble: 6 × 6
-##   locusID            moduleFil     n avg_logFC sd_logFC treatment_logFC_ave   
-##   <chr>              <chr>     <int>     <dbl>    <dbl> <chr>                 
-## 1 gene-L0Y14_RS00400 teal          4     -6.35   0.877  logFC_sulfide_limiting
-## 2 gene-L0Y14_RS00515 teal          3      1.86   0.534  logFC_sulfide_limiting
-## 3 gene-L0Y14_RS00525 teal          4      2.52   0.570  logFC_sulfide_limiting
-## 4 gene-L0Y14_RS00625 teal          3      1.86   0.534  logFC_sulfide_limiting
-## 5 gene-L0Y14_RS00655 teal          4      2.62   0.768  logFC_sulfide_limiting
-## 6 gene-L0Y14_RS01660 teal          3     -2.42   0.0345 logFC_sulfide_limiting
-
sht_DE_av2 <- merge(avg_logFC_df,all_together, by = "locusID")
-
-dim(sht_DE_av2)
-
## [1] 50  9
-
names(sht_DE_av2)
-
## [1] "locusID"             "moduleFil.x"         "n"                  
-## [4] "avg_logFC"           "sd_logFC"            "treatment_logFC_ave"
-## [7] "short.names"         "moduleFil.y"         "func_category"
-
sht_n <- sht_DE_av2[,c(1,3)]
-
-#write.csv(sht_DE_av2,"sht_DE_av2.csv")
-#write.csv(sht_DE_av2,"sht_DE_av5.csv")
-###added broad categories so I can use 
-sht_DE_av3_old <- read.csv("sht_DE_av3.csv")
-sht_DE_av5_new <- read.csv("sht_DE_av5.csv")
-
-
-names(sht_DE_av5_new)
-
##  [1] "X"                   "locusID"             "moduleFil.x"        
-##  [4] "avg_logFC"           "sd_logFC"            "treatment_logFC_ave"
-##  [7] "short.names"         "moduleFil.y"         "func_category"      
-## [10] "broad_category"
-
names(sht_DE_av3_old)
-
## [1] "X"                   "locusID"             "moduleFil.x"        
-## [4] "avg_logFC"           "treatment_logFC_ave" "short.names"        
-## [7] "moduleFil.y"         "func_category"       "broad_category"
-
vec1 <- (sht_DE_av3_old)[2]
-vec2 <- (sht_DE_av5_new)[2]
-differences <- anti_join(vec1,vec2)
-
## Joining with `by = join_by(locusID)`
-
whatdiff <- merge(differences,sht_DE_av3_old, by = "locusID", all.x = TRUE)
-whatdiff$short.names
-
##  [1] "xerC"              "Na_H_Exchanger"    "pth"              
-##  [4] "dsrE_3"            "AAA_LonB"          "dxs"              
-##  [7] "hypo"              "gsiA"              "recQ"             
-## [10] "glyco_like_mftF " "tmk"               "frdA_1"           
-## [13] "ppdk"              "cvpA"              "hflC"             
-## [16] "YdcH"              "malQ"              "HpcH"             
-## [19] "recD"              "gspB"              "trxA"             
-## [22] "glgP"              "urvD"              "yidC"             
-## [25] "yidD"
-
sht_DE_av3_old_filtered <-  sht_DE_av3_old %>%
-  filter(locusID %in% unlist(vec2))
-
-names(sht_DE_av5_new)
-
##  [1] "X"                   "locusID"             "moduleFil.x"        
-##  [4] "avg_logFC"           "sd_logFC"            "treatment_logFC_ave"
-##  [7] "short.names"         "moduleFil.y"         "func_category"      
-## [10] "broad_category"
-
sht_DE_new_justSD <- sht_DE_av5_new[,c(2,5)]
-sht_DE_new <- merge(sht_DE_av3_old_filtered,sht_DE_new_justSD, by= "locusID",all.x = TRUE)
-
-
-
-
-head(sht_DE_new)
-
##              locusID X moduleFil.x avg_logFC    treatment_logFC_ave short.names
-## 1 gene-L0Y14_RS00400 2        teal -6.353622 logFC_sulfide_limiting        torF
-## 2 gene-L0Y14_RS00515 3        teal  1.860711 logFC_sulfide_limiting        norB
-## 3 gene-L0Y14_RS00525 4        teal  2.519077 logFC_sulfide_limiting        nirS
-## 4 gene-L0Y14_RS00625 5        teal  1.860711 logFC_sulfide_limiting       norB1
-## 5 gene-L0Y14_RS00655 6        teal  2.618491 logFC_sulfide_limiting       norD2
-## 6 gene-L0Y14_RS01660 9        teal -2.423910 logFC_sulfide_limiting        psrA
-##                       moduleFil.y           func_category broad_category
-## 1 sulfide_teal_nitro_midnightblue                   porin      transport
-## 2                    sulfide_teal         denitrification       nitrogen
-## 3                    sulfide_teal         denitrification       nitrogen
-## 4                    sulfide_teal         denitrification       nitrogen
-## 5                    sulfide_teal         denitrification       nitrogen
-## 6                    sulfide_teal nucleotide biosynthesis   biosynthesis
-##     sd_logFC
-## 1 0.87692331
-## 2 0.53357947
-## 3 0.57017737
-## 4 0.53357947
-## 5 0.76846294
-## 6 0.03451484
-
#write.csv(sht_DE_new,"sht_DE_new.csv")
-
-# Filter out 'unknown'
-sht_DE_new_filtered <- sht_DE_new %>%
-  filter(broad_category != "unknown")
-
-names(sht_DE_new_filtered)
-
##  [1] "locusID"             "X"                   "moduleFil.x"        
-##  [4] "avg_logFC"           "treatment_logFC_ave" "short.names"        
-##  [7] "moduleFil.y"         "func_category"       "broad_category"     
-## [10] "sd_logFC"
-
names(shg_DE_av3)
-
##  [1] "X"                   "locusID"             "moduleFil.x"        
-##  [4] "avg_logFC"           "sd_logFC"            "treatment_logFC_ave"
-##  [7] "short.names"         "moduleFil.y"         "func_category"      
-## [10] "broad_category"
-
sht_DE <- sht_DE_new_filtered[,c(1,3:10)]
-shg_DE <- shg_DE_av3[,c(2:10)]
-
-dim(shg_DE)
-
## [1] 22  9
-
dim(sht_DE)
-
## [1] 42  9
-
gold_teal <- rbind(sht_DE,shg_DE)
-
-
-
-
-
-gold_teal_filtered <- gold_teal %>%
-  filter(broad_category != "unknown")
-
-
-######putting them all together
-
-
-
-
- #reordering the gene names so they appear on th xaxis in the right order
-sht_DE_new_filtered$short.names
-
##  [1] "torF"       "norB"       "nirS"       "norB1"      "norD2"     
-##  [6] "psrA"       "fusA"       "trmD"       "ibpA"       "pyrG"      
-## [11] "ftsH"       "rimP"       "nusA"       "fusA2"      "napG"      
-## [16] "moaA"       "fusA_2"     "holA"       "hupE"       "HyiA_small"
-## [21] "isp1"       "isp2"       "HyiB_large" "frdB_1"     "frdC_1"    
-## [26] "htpX"       "cmoB"       "korB_2"     "korA_2"     "rhlE"      
-## [31] "dnaJ"       "pfor_3"     "ispB"       "porin"      "clpB"      
-## [36] "rho"        "ptsP_E1"    "feoB"       "mcrA"       "korA_3"    
-## [41] "korB_3"     "pfor_like"
-
desired_order <- c("moaA","psrA","ispB" ,"pyrG","frdB_1","frdC_1","korA_2","korB_2","korA_3","korB_3", "pfor_3","pfor_like","fusA","fusA_2","fusA2","trmD","ibpA","nusA","mcrA" ,"recQ","holA","cmoB","dnaJ","ftsH","rimP","rhlE","rho","htpX","clpB","hupE","HyiB_large","isp1","isp2","HyiA_small","napG","nirS","norB" ,"norB1","norD2","feoB","porin","ptsP_E1", "torF")
-
-
-
-
-sht_DE_new_filtered$short.names <- factor(sht_DE_new_filtered$short.names, levels = desired_order)
-
-                                                    
-                                                   
-                                                   
-                                                            
-                                                           
-                                                   
-                                                              
-                                                              
-                                                              
-                                                        
-                     
- 
-sht_DE_new_filtered$clipped_avg_logFC <- pmin(pmax(sht_DE_new_filtered$avg_logFC, -4), 4)
-
-
-
-
-ggplot(sht_DE_new_filtered, aes(x = short.names, y = avg_logFC, color = clipped_avg_logFC)) +
-  geom_point(size = 3) +
-  geom_errorbar(aes(ymin = avg_logFC - sd_logFC, ymax = avg_logFC + sd_logFC), width = 0.25) +
-  scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+ # specify breaks within this range
-
-  labs(title = "Average logFC under sulfide limiting conditions",
-       x = "Genes",
-       y = "Average LogFC",
-       color = "Avg LogFC Value") +
-  theme_minimal() +
-  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
-  facet_grid(~broad_category,scales = "free",space="free")
-

-
######putting them all together
-names(sht_DE_new_filtered)
-
##  [1] "locusID"             "X"                   "moduleFil.x"        
-##  [4] "avg_logFC"           "treatment_logFC_ave" "short.names"        
-##  [7] "moduleFil.y"         "func_category"       "broad_category"     
-## [10] "sd_logFC"            "clipped_avg_logFC"
-
names(shg_DE_av3)
-
##  [1] "X"                   "locusID"             "moduleFil.x"        
-##  [4] "avg_logFC"           "sd_logFC"            "treatment_logFC_ave"
-##  [7] "short.names"         "moduleFil.y"         "func_category"      
-## [10] "broad_category"
-
sht_DE <- sht_DE_new_filtered[,c(1,3:10)]
-shg_DE <- shg_DE_av3[,c(2:10)]
-
-dim(shg_DE)
-
## [1] 22  9
-
dim(sht_DE)
-
## [1] 42  9
-
gold_teal <- rbind(sht_DE,shg_DE)
-
-####breaking up this figure by positive and negative fold change
-head(gold_teal)
-
##              locusID moduleFil.x avg_logFC    treatment_logFC_ave short.names
-## 1 gene-L0Y14_RS00400        teal -6.353622 logFC_sulfide_limiting        torF
-## 2 gene-L0Y14_RS00515        teal  1.860711 logFC_sulfide_limiting        norB
-## 3 gene-L0Y14_RS00525        teal  2.519077 logFC_sulfide_limiting        nirS
-## 4 gene-L0Y14_RS00625        teal  1.860711 logFC_sulfide_limiting       norB1
-## 5 gene-L0Y14_RS00655        teal  2.618491 logFC_sulfide_limiting       norD2
-## 6 gene-L0Y14_RS01660        teal -2.423910 logFC_sulfide_limiting        psrA
-##                       moduleFil.y           func_category broad_category
-## 1 sulfide_teal_nitro_midnightblue                   porin      transport
-## 2                    sulfide_teal         denitrification       nitrogen
-## 3                    sulfide_teal         denitrification       nitrogen
-## 4                    sulfide_teal         denitrification       nitrogen
-## 5                    sulfide_teal         denitrification       nitrogen
-## 6                    sulfide_teal nucleotide biosynthesis   biosynthesis
-##     sd_logFC
-## 1 0.87692331
-## 2 0.53357947
-## 3 0.57017737
-## 4 0.53357947
-## 5 0.76846294
-## 6 0.03451484
-
 ##Filter for avg_logFC > 0
-sulfide_up <- gold_teal[gold_teal$avg_logFC > 0, ]
-
-# Filter for avg_logFC < 0
-sulfide_down <- gold_teal[gold_teal$avg_logFC < 0, ]
-
-
-# Filter out 'unknown'
-sulfide_up_filtered <- sulfide_up %>%
-  filter(broad_category != "unknown")
-
-sulfide_down_filtered <- sulfide_down %>%
-  filter(broad_category != "unknown")
-
-dim(sulfide_up_filtered)
-
## [1] 46  9
-
###removing the second frdB, since it was just sig for both the gold and the teal, it has the same fold change and everything...
-sulfide_up_filtered <- sulfide_up_filtered[-28,]
-###makes a clipping for the color (so its not all washed out)
-
-sulfide_up_filtered$clipped_avg_logFC <- pmin(pmax(sulfide_up_filtered$avg_logFC, -4), 4)
-
-sulfide_down_filtered$clipped_avg_logFC <- pmin(pmax(sulfide_down_filtered$avg_logFC, -4), 4)
-
-#write.csv(sulfide_up_filtered,"sulfide_up_filtered2.csv")
-##reordered in excel, faster
-SG_short.name_order <- read.csv("gold_teal_up_short.name.order.csv")
-
-dim(SG_short.name_order)
-
## [1] 45 11
-
dim(sulfide_up_filtered)
-
## [1] 45 10
-
desired_order <- SG_short.name_order[,6]
-list(desired_order)
-
## [[1]]
-##  [1] "norB"       "nirS"       "norB1"      "norD2"      "napG"      
-##  [6] "hupE"       "HyiA_small" "isp1"       "isp2"       "HyiB_large"
-## [11] "frdB_1"     "frdC_1"     "korB_2"     "korA_2"     "pfor_3"    
-## [16] "hdrB"       "korA_1"     "korB_1"     "korC"       "hdrA"      
-## [21] "flxD"       "flxC"       "flxB"       "flxA"       "aclB_full" 
-## [26] "aclA"       "idh"        "acnA"       "pntA"       "pntB"      
-## [31] "mdh_1"      "ibpA"       "ftsH"       "fusA2"      "holA"      
-## [36] "htpX"       "cmoB"       "dnaJ"       "porin"      "clpB"      
-## [41] "peptidase"  "recN"       "ptsP_E1"    "feoB"       "moaA"
-
sulfide_up_filtered$short.names <- factor(sulfide_up_filtered$short.names, levels = desired_order)
-levels(sulfide_up_filtered$short.names)
-
##  [1] "norB"       "nirS"       "norB1"      "norD2"      "napG"      
-##  [6] "hupE"       "HyiA_small" "isp1"       "isp2"       "HyiB_large"
-## [11] "frdB_1"     "frdC_1"     "korB_2"     "korA_2"     "pfor_3"    
-## [16] "hdrB"       "korA_1"     "korB_1"     "korC"       "hdrA"      
-## [21] "flxD"       "flxC"       "flxB"       "flxA"       "aclB_full" 
-## [26] "aclA"       "idh"        "acnA"       "pntA"       "pntB"      
-## [31] "mdh_1"      "ibpA"       "ftsH"       "fusA2"      "holA"      
-## [36] "htpX"       "cmoB"       "dnaJ"       "porin"      "clpB"      
-## [41] "peptidase"  "recN"       "ptsP_E1"    "feoB"       "moaA"
-
sulfide_up_filtered$short.names <- as.factor(sulfide_up_filtered$short.names)
-str(sulfide_up_filtered)
-
## 'data.frame':    45 obs. of  10 variables:
-##  $ locusID            : chr  "gene-L0Y14_RS00515" "gene-L0Y14_RS00525" "gene-L0Y14_RS00625" "gene-L0Y14_RS00655" ...
-##  $ moduleFil.x        : chr  "teal" "teal" "teal" "teal" ...
-##  $ avg_logFC          : num  1.86 2.52 1.86 2.62 4.09 ...
-##  $ treatment_logFC_ave: chr  "logFC_sulfide_limiting" "logFC_sulfide_limiting" "logFC_sulfide_limiting" "logFC_sulfide_limiting" ...
-##  $ short.names        : Factor w/ 45 levels "norB","nirS",..: 1 2 3 4 32 33 34 5 45 35 ...
-##  $ moduleFil.y        : chr  "sulfide_teal" "sulfide_teal" "sulfide_teal" "sulfide_teal" ...
-##  $ func_category      : chr  "denitrification" "denitrification" "denitrification" "denitrification" ...
-##  $ broad_category     : chr  "nitrogen" "nitrogen" "nitrogen" "nitrogen" ...
-##  $ sd_logFC           : num  0.534 0.57 0.534 0.768 1.564 ...
-##  $ clipped_avg_logFC  : num  1.86 2.52 1.86 2.62 4 ...
-
sulfide_up_filtered$short.names
-
##  [1] norB       nirS       norB1      norD2      ibpA       ftsH      
-##  [7] fusA2      napG       moaA       holA       hupE       HyiA_small
-## [13] isp1       isp2       HyiB_large frdB_1     frdC_1     htpX      
-## [19] cmoB       korB_2     korA_2     dnaJ       pfor_3     porin     
-## [25] clpB       ptsP_E1    feoB       peptidase  recN       hdrB      
-## [31] korA_1     korB_1     korC       hdrA       flxD       flxC      
-## [37] flxB       flxA       aclB_full  aclA       idh        acnA      
-## [43] pntA       pntB       mdh_1     
-## 45 Levels: norB nirS norB1 norD2 napG hupE HyiA_small isp1 isp2 ... moaA
-
##order of the genes in sulfide_up
-sulfide_up_filtered$short.names <- factor(sulfide_up_filtered$short.names, levels = desired_order)
-
-
-# Extend the desired order with genes from sulfide_down that might not be in desired_order
-extended_order <- unique(c(desired_order, as.character(sulfide_down_filtered$short.names)))
-list(extended_order)
-
## [[1]]
-##  [1] "norB"       "nirS"       "norB1"      "norD2"      "napG"      
-##  [6] "hupE"       "HyiA_small" "isp1"       "isp2"       "HyiB_large"
-## [11] "frdB_1"     "frdC_1"     "korB_2"     "korA_2"     "pfor_3"    
-## [16] "hdrB"       "korA_1"     "korB_1"     "korC"       "hdrA"      
-## [21] "flxD"       "flxC"       "flxB"       "flxA"       "aclB_full" 
-## [26] "aclA"       "idh"        "acnA"       "pntA"       "pntB"      
-## [31] "mdh_1"      "ibpA"       "ftsH"       "fusA2"      "holA"      
-## [36] "htpX"       "cmoB"       "dnaJ"       "porin"      "clpB"      
-## [41] "peptidase"  "recN"       "ptsP_E1"    "feoB"       "moaA"      
-## [46] "torF"       "psrA"       "fusA"       "trmD"       "pyrG"      
-## [51] "rimP"       "nusA"       "fusA_2"     "rhlE"       "ispB"      
-## [56] "rho"        "mcrA"       "korA_3"     "korB_3"     "pfor_like" 
-## [61] "glnL"       "nirB"       "leuC"
-
# Set factor levels for both datasets based on the extended order
-sulfide_up_filtered$short.names <- factor(sulfide_up_filtered$short.names, levels = extended_order)
-sulfide_down_filtered$short.names <- factor(sulfide_down_filtered$short.names, levels = extended_order)
-
-
-
-
-####sulfide_up plot#################################################################
-sulfide_up_plot <- ggplot(sulfide_up_filtered, aes(x = short.names, y = avg_logFC, color = clipped_avg_logFC)) +
-  geom_point(size = 3) +
-  geom_errorbar(aes(ymin = avg_logFC - sd_logFC, ymax = avg_logFC + sd_logFC), width = 0.25) +
-  scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+ 
-
-  labs(title = "Average logFC under sulfide limiting conditions",
-       x = "Genes",
-       y = "Average LogFC",
-       color = "Avg LogFC Value") +
-  theme_minimal() +
- 
-  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
-  facet_grid(~broad_category,scales = "free",space="free")
-
-
-
-
-
-
-
-####sulfide_down plot#################################################################
-sulfide_down_plot <- ggplot(sulfide_down_filtered, aes(x = short.names, y = avg_logFC, color = clipped_avg_logFC)) +
-  geom_point(size = 3) +
-  geom_errorbar(aes(ymin = avg_logFC - sd_logFC, ymax = avg_logFC + sd_logFC), width = 0.25) +
-  scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+ 
-
-  labs(title = "Average logFC under sulfide limiting conditions",
-       x = "Genes",
-       y = "Average LogFC",
-       color = "Avg LogFC Value") +
-  theme_minimal() +
-  
-  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
-  facet_grid(~broad_category,scales = "free",space="free")
-
-
-
-names(sulfide_up_filtered)
-
##  [1] "locusID"             "moduleFil.x"         "avg_logFC"          
-##  [4] "treatment_logFC_ave" "short.names"         "moduleFil.y"        
-##  [7] "func_category"       "broad_category"      "sd_logFC"           
-## [10] "clipped_avg_logFC"
-
names(sulfide_down_filtered)
-
##  [1] "locusID"             "moduleFil.x"         "avg_logFC"          
-##  [4] "treatment_logFC_ave" "short.names"         "moduleFil.y"        
-##  [7] "func_category"       "broad_category"      "sd_logFC"           
-## [10] "clipped_avg_logFC"
-
# Combine the datasets with an identifier for each
-sulfide_up_filtered$set <- "Up"
-sulfide_down_filtered$set <- "Down"
-
-combined_dataS <- rbind(sulfide_up_filtered, sulfide_down_filtered)
-# Reverse the levels so that "Up" comes before "Down"
-combined_dataS$set <- factor(combined_dataS$set, levels = c("Up", "Down"))
-
-combined_dataS <- combined_dataS %>%
-  mutate(adj_avg_logFC = case_when(
-    set == "Up" & avg_logFC < 0 ~ 0,
-    set == "Down" & avg_logFC > 0 ~ 0,
-    TRUE ~ avg_logFC
-  ))
-
-# Plotting
-p <- ggplot(combined_dataS, aes(x = adj_avg_logFC, y = short.names, color = clipped_avg_logFC)) +
-  geom_point(size = 3) +
-  geom_errorbar(aes(xmin = adj_avg_logFC - sd_logFC, xmax = adj_avg_logFC + sd_logFC), width = 0.25) +
-  scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0) +
-  labs(title = "Average logFC under sulfide limiting conditions",
-       x = "Adjusted Average LogFC",
-       y = "Genes",
-       color = "Avg LogFC Value") +
-  theme_minimal() +
-  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
-  facet_grid(cols = vars(set), rows= vars(broad_category), scales = "free_y", space = "free_y")
-
-print(p)
-

-
################################OXYGEN PURPLE ANDBLACK##################################################
-
-#####now doing this for purple oxy
-
-#names(ohp_DE3)
-table(ohp_DE3$short.names)
-
## 
-##   bamD   cphA   cvpA  dprE1    FBP  fusA2   GH16   hupE   hybO   hypE   hypo 
-##      2      2      1      2      1      2      1      2      2      2      8 
-##   isp2   mrcA   nifJ   ppdK   rocR SgpA_1   tsaC   yidC   yidD 
-##      2      2      2      2      2      2      2      2      1
-
ohp_DE3_av <- ohp_DE3[,c(1,49,55,59,57)]
-
-head(ohp_DE3_av)
-
##              locusID     logFC      comparison moduleFil short.names
-## 1 gene-L0Y14_RS05880 -4.039158   LO_vs_HO_HS_N    purple        hypo
-## 2 gene-L0Y14_RS05880 -3.657596 LO_vs_HO_H2_noN    purple        hypo
-## 3 gene-L0Y14_RS06635 -5.296420   LO_vs_HO_HS_N    purple       dprE1
-## 4 gene-L0Y14_RS06635 -4.281582 LO_vs_HO_H2_noN    purple       dprE1
-## 5 gene-L0Y14_RS06820  2.418749 LO_vs_HO_H2_noN    purple       fusA2
-## 6 gene-L0Y14_RS06820  2.176861   LO_vs_HO_HS_N    purple       fusA2
-
####dealing with the fact I have more than one genes that are called "hypo"
-
-
-# Assign a unique number for each distinct locusID with "hypo" genes
-locus_mapping <- ohp_DE3_av %>%
-  filter(short.names == "hypo") %>%
-  distinct(locusID) %>%
-  mutate(hypo_num = row_number())
-
-# Join this with the original dataframe and replace "hypo" genes
-new_df <- ohp_DE3_av %>%
-  left_join(locus_mapping, by = "locusID") %>%
-  mutate(short.names = ifelse(!is.na(hypo_num) & short.names == "hypo",
-                       paste0("hypo", hypo_num),
-                       short.names)) %>%
-  select(-hypo_num)  # Remove the auxiliary column
-
-# Print the result
-head(new_df)
-
##              locusID     logFC      comparison moduleFil short.names
-## 1 gene-L0Y14_RS05880 -4.039158   LO_vs_HO_HS_N    purple       hypo1
-## 2 gene-L0Y14_RS05880 -3.657596 LO_vs_HO_H2_noN    purple       hypo1
-## 3 gene-L0Y14_RS06635 -5.296420   LO_vs_HO_HS_N    purple       dprE1
-## 4 gene-L0Y14_RS06635 -4.281582 LO_vs_HO_H2_noN    purple       dprE1
-## 5 gene-L0Y14_RS06820  2.418749 LO_vs_HO_H2_noN    purple       fusA2
-## 6 gene-L0Y14_RS06820  2.176861   LO_vs_HO_HS_N    purple       fusA2
-
pox <- new_df
-
-
-
-
-avg_logFC_df <- new_df %>%
-  group_by(locusID, moduleFil) %>%
-  summarise(avg_logFC = mean(logFC, na.rm = TRUE),
-            sd_logFC = sd(logFC, na.rm = TRUE)) %>%
-  ungroup()
-
## `summarise()` has grouped output by 'locusID'. You can override using the
-## `.groups` argument.
-
###adding the n value too
-avg_logFC_df <- new_df %>%
-  group_by(locusID, moduleFil) %>%
-  summarise(
-    n = n(), # Add the count of non-NA values
-    avg_logFC = mean(logFC, na.rm = TRUE),
-    sd_logFC = sd(logFC, na.rm = TRUE)
-  ) %>%
-  ungroup()
-
## `summarise()` has grouped output by 'locusID'. You can override using the
-## `.groups` argument.
-
# Print the result
-head(avg_logFC_df)
-
## # A tibble: 6 × 5
-##   locusID            moduleFil     n avg_logFC sd_logFC
-##   <chr>              <chr>     <int>     <dbl>    <dbl>
-## 1 gene-L0Y14_RS05880 purple        2     -3.85    0.270
-## 2 gene-L0Y14_RS06635 purple        2     -4.79    0.718
-## 3 gene-L0Y14_RS06820 purple        2      2.30    0.171
-## 4 gene-L0Y14_RS07045 purple        2      2.42    0.107
-## 5 gene-L0Y14_RS07050 purple        2      4.01    1.56 
-## 6 gene-L0Y14_RS07420 purple        2      3.16    0.175
-
avg_logFC_df <- avg_logFC_df %>%
-  mutate(treatment_logFC_ave = "logFC_oxygen_limiting")
-
-# Print the result
-head(avg_logFC_df)
-
## # A tibble: 6 × 6
-##   locusID            moduleFil     n avg_logFC sd_logFC treatment_logFC_ave  
-##   <chr>              <chr>     <int>     <dbl>    <dbl> <chr>                
-## 1 gene-L0Y14_RS05880 purple        2     -3.85    0.270 logFC_oxygen_limiting
-## 2 gene-L0Y14_RS06635 purple        2     -4.79    0.718 logFC_oxygen_limiting
-## 3 gene-L0Y14_RS06820 purple        2      2.30    0.171 logFC_oxygen_limiting
-## 4 gene-L0Y14_RS07045 purple        2      2.42    0.107 logFC_oxygen_limiting
-## 5 gene-L0Y14_RS07050 purple        2      4.01    1.56  logFC_oxygen_limiting
-## 6 gene-L0Y14_RS07420 purple        2      3.16    0.175 logFC_oxygen_limiting
-
ohp_DE_av2 <- merge(avg_logFC_df,all_together, by = "locusID")
-
-names(ohp_DE_av2)
-
## [1] "locusID"             "moduleFil.x"         "n"                  
-## [4] "avg_logFC"           "sd_logFC"            "treatment_logFC_ave"
-## [7] "short.names"         "moduleFil.y"         "func_category"
-
poxn <- ohp_DE_av2[,c(1,3)]
-
-
-######now doing this with oxy black#################
-
-
-
-#names(ohb2_DE3)
-ohb2_DE3_av <- ohb2_DE3[,c(1,49,55,59,57)]
-
-head(ohb2_DE3_av)
-
##              locusID     logFC      comparison moduleFil3 short.names
-## 1 gene-L0Y14_RS00450 -2.998400   LO_vs_HO_HS_N  black_oxy          Hr
-## 2 gene-L0Y14_RS03075 -4.498757   LO_vs_HO_HS_N  black_oxy        hypo
-## 3 gene-L0Y14_RS03375 -3.606201   LO_vs_HO_HS_N  black_oxy        fabA
-## 4 gene-L0Y14_RS03820 -4.795972   LO_vs_HO_HS_N  black_oxy         HIT
-## 5 gene-L0Y14_RS04045 -3.506534   LO_vs_HO_HS_N  black_oxy        cbbQ
-## 6 gene-L0Y14_RS05880 -3.657596 LO_vs_HO_H2_noN  black_oxy        hypo
-
names(ohb2_DE3_av)[4] <- "moduleFil"
-
-
-
-
-
-# again, I have the hypo issue
-# Assign a unique number for each distinct locusID with "hypo" genes
-locus_mapping <- ohb2_DE3_av %>%
-  filter(short.names == "hypo") %>%
-  distinct(locusID) %>%
-  mutate(hypo_num = row_number())
-
-# Join this with the original dataframe and replace "hypo" genes
-new_df <- ohb2_DE3_av %>%
-  left_join(locus_mapping, by = "locusID") %>%
-  mutate(short.names = ifelse(!is.na(hypo_num) & short.names == "hypo",
-                       paste0("hypo", hypo_num),
-                       short.names)) %>%
-  select(-hypo_num)  # Remove the auxiliary column
-
-box <- new_df
-
-
-
-avg_logFC_df <- new_df %>%
-  group_by(locusID, moduleFil) %>%
-  summarise(avg_logFC = mean(logFC, na.rm = TRUE),
-            sd_logFC = sd(logFC, na.rm = TRUE)) %>%
-  ungroup()
-
## `summarise()` has grouped output by 'locusID'. You can override using the
-## `.groups` argument.
-
avg_logFC_df <- new_df %>%
-  group_by(locusID, moduleFil) %>%
-  summarise(
-    n = n(), # Add the count of non-NA values
-    avg_logFC = mean(logFC, na.rm = TRUE),
-    sd_logFC = sd(logFC, na.rm = TRUE)
-  ) %>%
-  ungroup()
-
## `summarise()` has grouped output by 'locusID'. You can override using the
-## `.groups` argument.
-
avg_logFC_df <- avg_logFC_df %>%
-  mutate(treatment_logFC_ave = "logFC_oxygen_limiting")
-
-# Print the result
-head(avg_logFC_df)
-
## # A tibble: 6 × 6
-##   locusID            moduleFil     n avg_logFC sd_logFC treatment_logFC_ave  
-##   <chr>              <chr>     <int>     <dbl>    <dbl> <chr>                
-## 1 gene-L0Y14_RS00450 black_oxy     1     -3.00   NA     logFC_oxygen_limiting
-## 2 gene-L0Y14_RS03075 black_oxy     1     -4.50   NA     logFC_oxygen_limiting
-## 3 gene-L0Y14_RS03375 black_oxy     1     -3.61   NA     logFC_oxygen_limiting
-## 4 gene-L0Y14_RS03820 black_oxy     1     -4.80   NA     logFC_oxygen_limiting
-## 5 gene-L0Y14_RS04045 black_oxy     1     -3.51   NA     logFC_oxygen_limiting
-## 6 gene-L0Y14_RS05880 black_oxy     2     -3.85    0.270 logFC_oxygen_limiting
-
ohb_DE_av2 <- merge(avg_logFC_df,all_together, by = "locusID")
-
-boxn <- ohb_DE_av2[,c(1,3)]
-
-
-oxy_purple_black <- rbind(ohb_DE_av2,ohp_DE_av2)
-
-
-names(oxy_purple_black)
-
## [1] "locusID"             "moduleFil.x"         "n"                  
-## [4] "avg_logFC"           "sd_logFC"            "treatment_logFC_ave"
-## [7] "short.names"         "moduleFil.y"         "func_category"
-
#write.csv(oxy_purple_black,"oxy_purple_black2.csv")
-###added broad_category, removed duplicate hypo gene
-oxy_purple_black2 <- read.csv("oxy_purple_black2.csv")
-
-
-
-
- ##Filter for avg_logFC > 0
-oxy_up <- oxy_purple_black2[oxy_purple_black2$avg_logFC > 0, ]
-
-# Filter for avg_logFC < 0
-oxy_down <- oxy_purple_black2[oxy_purple_black2$avg_logFC < 0, ]
-
-
-# Filter out 'unknown'
-oxy_up_filtered <- oxy_up %>%
-  filter(func_category != "unknown")
-
-oxy_down_filtered <- oxy_down %>%
-  filter(func_category != "unknown")
-
-dim(oxy_up_filtered)
-
## [1] 11 10
-
###makes a clipping for the color (so its not all washed out)
-
-oxy_up_filtered$clipped_avg_logFC <- pmin(pmax(oxy_up_filtered$avg_logFC, -4), 4)
-
-oxy_down_filtered$clipped_avg_logFC <- pmin(pmax(oxy_down_filtered$avg_logFC, -4), 4)
-# Combine the datasets with an identifier for each
-oxy_up_filtered$set <- "Up"
-oxy_down_filtered$set <- "Down"
-
-combined_dataO <- rbind(oxy_up_filtered, oxy_down_filtered)
-# Reverse the levels so that "Up" comes before "Down"
-combined_dataO$set <- factor(combined_dataO$set, levels = c("Up", "Down"))
-
-combined_dataO <- combined_dataO %>%
-  mutate(adj_avg_logFC = case_when(
-    set == "Up" & avg_logFC < 0 ~ 0,
-    set == "Down" & avg_logFC > 0 ~ 0,
-    TRUE ~ avg_logFC
-  ))
-
-  
-
-# Plotting
-p <- ggplot(combined_dataO, aes(x = adj_avg_logFC, y = short.names, color = clipped_avg_logFC)) +
-  geom_point(size = 3) +
-  geom_errorbar(aes(xmin = adj_avg_logFC - sd_logFC, xmax = adj_avg_logFC + sd_logFC), width = 0.25) +
-  scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0) +
-  labs(title = "Average logFC under oxygen limiting conditions",
-       x = "Adjusted Average LogFC",
-       y = "Genes",
-       color = "Avg LogFC Value") +
-  theme_minimal() +
-  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
-  facet_grid(cols = vars(set), rows= vars(func_category), scales = "free_y", space = "free_y")
-
-print(p)
-

-
####trying to combine both plots so I have same persepectives for figure
-
-names(combined_dataO)
-
##  [1] "X"                   "locusID"             "moduleFil.x"        
-##  [4] "n"                   "avg_logFC"           "sd_logFC"           
-##  [7] "treatment_logFC_ave" "short.names"         "moduleFil.y"        
-## [10] "func_category"       "clipped_avg_logFC"   "set"                
-## [13] "adj_avg_logFC"
-
combined_dataO <- combined_dataO[,c("locusID","moduleFil.x","moduleFil.y","avg_logFC","sd_logFC","treatment_logFC_ave","short.names","func_category","clipped_avg_logFC","adj_avg_logFC", "set")]
-
-combined_dataS <- combined_dataS[,c("locusID","moduleFil.x","moduleFil.y","avg_logFC","sd_logFC","treatment_logFC_ave","short.names","func_category","clipped_avg_logFC","adj_avg_logFC", "set")]
-
-names(combined_dataS)
-
##  [1] "locusID"             "moduleFil.x"         "moduleFil.y"        
-##  [4] "avg_logFC"           "sd_logFC"            "treatment_logFC_ave"
-##  [7] "short.names"         "func_category"       "clipped_avg_logFC"  
-## [10] "adj_avg_logFC"       "set"
-
combinedOS <- rbind(combined_dataO,combined_dataS)
-
-
-# Plotting
-OS <- ggplot(combinedOS, aes(x = adj_avg_logFC, y = short.names, color = clipped_avg_logFC)) +
-  geom_point(size = 3) +
-  geom_errorbar(aes(xmin = adj_avg_logFC - sd_logFC, xmax = adj_avg_logFC + sd_logFC), width = 0.25) +
-  scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0) +
-  labs(title = "Average logFC under limiting conditions",
-       x = "Adjusted Average LogFC",
-       y = "Genes",
-       color = "Avg LogFC Value") +
-  theme_minimal() +
-  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
-  facet_grid(cols = vars(treatment_logFC_ave), rows= vars(func_category), scales = "free_y", space = "free_y")
-
-print(OS)
-

-

###remaking figure 6 for paper such that all of the data points (n= -condition tereatments)

-
###here are the genes in the gold module correlated to sulfide
-all_together <- read.csv("wgcna_trait_module_hub_genes.csv")
-#names(all_together)
-all_together <- all_together[,1:4]
-
-
-
-shg_DE_av <- shg_DE2[,c(1,44,50,53)]
-
-head(shg_DE_av)
-
##              locusID     logFC      comparison      moduleFil
-## 1 gene-L0Y14_RS10410 -6.607549 LS_vs_HS_noN_HO darkgoldenrod1
-## 2 gene-L0Y14_RS00080 -3.742340 LS_vs_HS_noN_HO darkgoldenrod1
-## 3 gene-L0Y14_RS01985 -5.245126 LS_vs_HS_noN_HO darkgoldenrod1
-## 4 gene-L0Y14_RS07695  4.361391 LS_vs_HS_noN_HO darkgoldenrod1
-## 5 gene-L0Y14_RS11995  2.598792 LS_vs_HS_noN_HO darkgoldenrod1
-## 6 gene-L0Y14_RS11990  2.739793 LS_vs_HS_noN_HO darkgoldenrod1
-
#names(shg_DE_av)
-
-###here are the genes in the teal module correlated to sulfide
-
-names(sht_DE3)
-
##  [1] "X.1"                          "locusID"                     
-##  [3] "geneid"                       "start.x"                     
-##  [5] "stop.x"                       "gene_letters_andre"          
-##  [7] "accession_andre"              "start"                       
-##  [9] "stop"                         "notes"                       
-## [11] "Ontology"                     "Inference"                   
-## [13] "product"                      "protein_id"                  
-## [15] "gene_letters.notes"           "NCBI_ID"                     
-## [17] "annotations_from_andre"       "target_id"                   
-## [19] "Accession"                    "functional_role"             
-## [21] "sub_role"                     "sub_role2"                   
-## [23] "Protein"                      "Accession...Protein"         
-## [25] "seed_ortholog_evalue"         "seed_ortholog_score"         
-## [27] "best_tax_level"               "Preferred_name"              
-## [29] "GOs"                          "X"                           
-## [31] "KEGG_ko"                      "KEGG_Pathway"                
-## [33] "KEGG_Module"                  "KEGG_Reaction"               
-## [35] "KEGG_rclass"                  "BRITE"                       
-## [37] "KEGG_TC"                      "CAZy"                        
-## [39] "BiGG_Reaction"                "tax_scope"                   
-## [41] "eggNOG_OGs"                   "bestOG"                      
-## [43] "COG_Functional_Category"      "eggNOG_free_text_description"
-## [45] "logFC"                        "AveExpr"                     
-## [47] "t"                            "P.value"                     
-## [49] "adj.P.Val"                    "B"                           
-## [51] "comparison"                   "Predicted_function"          
-## [53] "condition"                    "moduleFil"
-
v <- unique(sht_DE3$locusID)
-sht_DE3_av <- sht_DE3[,c(2,45,51,54)]
-head(sht_DE3_av)
-
##              locusID     logFC      comparison moduleFil
-## 1 gene-L0Y14_RS00400 -5.671978 LS_vs_HS_noN_HO      teal
-## 2 gene-L0Y14_RS01755 -1.093415 LS_vs_HS_noN_HO      teal
-## 3 gene-L0Y14_RS04610 -1.158590 LS_vs_HS_noN_HO      teal
-## 4 gene-L0Y14_RS05075 -1.192507 LS_vs_HS_noN_HO      teal
-## 5 gene-L0Y14_RS05080 -2.083575 LS_vs_HS_noN_HO      teal
-## 6 gene-L0Y14_RS07060 -1.072405 LS_vs_HS_noN_HO      teal
-
#names(sht_DE3_av)
-
-sulfide_gt <- rbind(sht_DE3_av,shg_DE_av)
-
-
-#names(sht_DE3_av)
-
-
-#names(all_together)
-
-shortnames <- all_together[,1:2]
-
-sulfideGT <- merge(sulfide_gt,shortnames)
-list(unique(sulfideGT$short.names))
-
## [[1]]
-##  [1] "glnL"             "torF"             "norB"             "nirS"            
-##  [5] "norB1"            "norD2"            "psrA"             "fusA"            
-##  [9] "nirB"             "trmD"             "YqgE/AlgH"        "ibpA"            
-## [13] "pyrG"             "ftsH"             "rimP"             "nusA"            
-## [17] "fusA2"            "napG"             "moaA"             "DUF4340"         
-## [21] "fusA_2"           "holA"             "hupE"             "HyiA_small"      
-## [25] "isp1"             "isp2"             "HyiB_large"       "hypo"            
-## [29] "frdB_1"           "frdC_1"           "htpX"             "cmoB"            
-## [33] "peptidase"        "leuC"             "ferritin_like_AB" "korB_2"          
-## [37] "korA_2"           "rhlE"             "DUF2058"          "dnaJ"            
-## [41] "recN"             "hdrB"             "korA_1"           "korB_1"          
-## [45] "korC"             "hdrA"             "flxD"             "flxC"            
-## [49] "flxB"             "flxA"             "aclB_full"        "aclA"            
-## [53] "idh"              "acnA"             "pntA"             "pntB"            
-## [57] "mdh_1"            "pfor_3"           "ispB"             "porin"           
-## [61] "clpB"             "DUF4126"          "rho"              "ptsP_E1"         
-## [65] "feoB"             "mcrA"             "korA_3"           "korB_3"          
-## [69] "pfor_like"
-
desired_order2 <- c("pntB","pntA","mdh_1","korC","korB_1","korA_1","idh","hdrB", "hdrA", "flxD","flxC","flxB", "flxA","acnA","aclB_full","aclA","korB_2","korA_2","korB_3","korA_3","pfor_like","pfor_3","frdC_1" ,"frdB_1","HyiB_large","isp1","isp2","HyiA_small", "hupE","hypo","norD2","norB1" ,"norB", "nirS","napG","nirB","glnL","fusA2","mcrA","fusA_2","fusA","trmD","rimP","rho","rhlE","recN","peptidase","nusA","ibpA","htpX","holA","ftsH", "dnaJ","cmoB","clpB","pyrG","psrA","moaA","leuC","ispB","torF","ptsP_E1","porin","feoB")
-
-filteredGT <- subset(sulfideGT, short.names %in% desired_order2)
-filteredGT$short.names <- factor(filteredGT$short.names, levels = desired_order2)
-
-
-filteredGT$clipped_logFC <- pmin(pmax(filteredGT$logFC, -4), 4)
-
-try <- ggplot(data=filteredGT, aes(x= logFC, y = fct_rev(short.names))) +
-  geom_boxplot()+
-  geom_point(aes(color = clipped_logFC), position = position_jitter(width = 0.1,height = 0.1)) +
-  scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+
-  labs(title = "Boxplot with Colored Dots", x = "Category", y = "Value") +
-  theme_minimal()
-  
-try
-

-
table(sulfideGT$comparison,sulfideGT$short.names)
-
##                  
-##                   aclA aclB_full acnA clpB cmoB dnaJ DUF2058 DUF4126 DUF4340
-##   H2_vs_HS_N_HO      0         0    0    1    1    1       1       1       1
-##   H2_vs_HS_noN_HO    1         1    1    1    1    1       0       0       0
-##   LS_vs_HS_N_HO      0         0    0    1    0    0       1       1       0
-##   LS_vs_HS_noN_HO    1         1    1    1    0    1       0       0       0
-##   SW_vs_HS_noN_HO    1         1    1    1    1    1       1       1       1
-##                  
-##                   feoB ferritin_like_AB flxA flxB flxC flxD frdB_1 frdC_1 ftsH
-##   H2_vs_HS_N_HO      1                1    0    0    0    0      2      1    1
-##   H2_vs_HS_noN_HO    0                1    1    1    1    1      2      1    1
-##   LS_vs_HS_N_HO      1                0    0    0    0    0      0      0    1
-##   LS_vs_HS_noN_HO    0                0    1    1    1    1      2      1    0
-##   SW_vs_HS_noN_HO    1                1    1    1    1    1      2      1    1
-##                  
-##                   fusA fusA_2 fusA2 glnL hdrA hdrB holA htpX hupE HyiA_small
-##   H2_vs_HS_N_HO      1      1     1    0    0    0    1    1    1          1
-##   H2_vs_HS_noN_HO    1      1     1    1    1    1    1    1    0          1
-##   LS_vs_HS_N_HO      0      0     0    0    0    0    0    1    0          1
-##   LS_vs_HS_noN_HO    1      1     1    1    1    1    0    0    0          0
-##   SW_vs_HS_noN_HO    1      1     1    1    1    1    1    1    0          1
-##                  
-##                   HyiB_large hypo ibpA idh isp1 isp2 ispB korA_1 korA_2 korA_3
-##   H2_vs_HS_N_HO            1    2    1   0    1    1    1      0      1      1
-##   H2_vs_HS_noN_HO          1    2    1   1    1    1    1      1      1      1
-##   LS_vs_HS_N_HO            1    1    1   0    1    1    1      0      0      1
-##   LS_vs_HS_noN_HO          0    0    0   1    0    0    0      1      0      1
-##   SW_vs_HS_noN_HO          1    1    1   1    1    1    1      1      1      1
-##                  
-##                   korB_1 korB_2 korB_3 korC leuC mcrA mdh_1 moaA napG nirB nirS
-##   H2_vs_HS_N_HO        0      1      1    0    0    1     0    1    1    0    1
-##   H2_vs_HS_noN_HO      1      1      1    1    1    0     1    0    1    1    1
-##   LS_vs_HS_N_HO        0      0      0    0    0    0     0    0    0    0    1
-##   LS_vs_HS_noN_HO      1      0      1    1    1    0     1    0    0    1    0
-##   SW_vs_HS_noN_HO      1      1      1    1    1    0     1    1    1    1    1
-##                  
-##                   norB norB1 norD2 nusA peptidase pfor_3 pfor_like pntA pntB
-##   H2_vs_HS_N_HO      1     1     1    1         0      1         1    0    0
-##   H2_vs_HS_noN_HO    1     1     1    1         1      1         1    1    1
-##   LS_vs_HS_N_HO      0     0     0    1         0      0         1    0    0
-##   LS_vs_HS_noN_HO    0     0     1    1         1      0         0    1    1
-##   SW_vs_HS_noN_HO    1     1     1    1         1      1         1    1    1
-##                  
-##                   porin psrA ptsP_E1 pyrG recN rhlE rho rimP torF trmD
-##   H2_vs_HS_N_HO       1    1       1    1    1    1   1    1    1    1
-##   H2_vs_HS_noN_HO     1    1       1    1    1    1   1    1    1    1
-##   LS_vs_HS_N_HO       1    0       0    0    0    1   0    0    0    0
-##   LS_vs_HS_noN_HO     1    0       1    1    1    0   0    1    1    0
-##   SW_vs_HS_noN_HO     1    1       1    1    1    1   1    1    1    1
-##                  
-##                   YqgE/AlgH
-##   H2_vs_HS_N_HO           1
-##   H2_vs_HS_noN_HO         1
-##   LS_vs_HS_N_HO           0
-##   LS_vs_HS_noN_HO         0
-##   SW_vs_HS_noN_HO         1
-
###### now doing this for oxygen with purple and black modules, which I named pox and box
-
-pb_ox <- rbind(pox,box)
-
-###changing "hypo2 to another name so it wont get filtered out
-pb_ox <- pb_ox %>%
-  mutate(short.names = ifelse(short.names == "hypo2", "unknown_hyd1e", short.names))
-
-
-
-####oops this caught the hypo2 from the other module as well, so filtering that out
-
-pb_ox <- pb_ox %>%
-  filter(!grepl("gene-L0Y14_RS03075", locusID))
-
-####getting rid of the hypotheticals
-
-
-###in case I want this list later
-hypos_fil <- pb_ox %>%
-  filter(grepl("^hypo", short.names))
-
-
-###ok, again with the filter
-pb_ox_fil <- pb_ox %>%
-  filter(!grepl("^hypo", short.names))
-
-###checking if genes are overlapping (meaning found in both modules....they aren't)
-#table(pb_ox_fil$moduleFil,pb_ox_fil$short.names)
-
-#print(unique(pb_ox_fil$short.names))
-
-desired_order3 <- c("nifJ","ppdK","malQ","GH16","acnB","fabA","unknown_hyd1e","hypE","isp2","hybO","hupE","cphA","SgpA_1","fusA2","mrcA","yidD","yidC","yebA","tsaC","rocR","rpmB","prsK","LT_domain","lolA_like","IscA","HIT","himD","gmhA","ftsE","dut","cbbQ","bamD","Prxs","Hr","fliK1","FBP")
-
-#list(desired_order3)
-
-####ok, some of these are unknowns that I left off of the fgure, so I am going to filter them out
-
-pb_ox_fil2 <- subset(pb_ox_fil, short.names %in% desired_order3)
-pb_ox_fil2$short.names <- factor(pb_ox_fil2$short.names, levels = desired_order3)
-
-pb_ox_fil2$clipped_logFC <- pmin(pmax(pb_ox_fil2$logFC, -4), 4)
-
-
-try2 <- ggplot(data=pb_ox_fil2, aes(x= logFC, y = fct_rev(short.names))) +
-  geom_boxplot()+
-  geom_point(aes(color = clipped_logFC), position = position_jitter(width = 0.1,height = 0.1)) +
-  scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+
-  labs(title = "Boxplot with Colored Dots", x = "Category", y = "Value") +
-  scale_x_reverse()+
-  theme_minimal()
-  
-try2
-

-
table(pb_ox_fil2$comparison)
-
## 
-## LO_vs_HO_H2_noN   LO_vs_HO_HS_N 
-##              19              30
-
####ok, I have a problem, I can't get the dimensions to match for adobe illustrator, so adding dummy variables to save me time in illustrator
-
-# Define the number of dummy entries you need
-num_dummy <- 33
-
-# Creating the dummy data frame with unique dummy names
-dummy_data2 <- data.frame(
-  locusID= rep("fakeid",num_dummy),
-  logFC = rep(0, num_dummy), # Sets all logFC values to 0
-  comparison=rep("fakecomparison",num_dummy),
-  moduleFil = rep("fakemoduleFil",num_dummy),
-  short.names = paste("Dummy", 1:num_dummy, sep=""),
-  clipped_logFC = rep(4, num_dummy)# Creates "Dummy1", "Dummy2", ..., "Dummy33"
-)
-
-
-pb_ox_filDUMM <- rbind(pb_ox_fil2,dummy_data2)
-
-
-try3 <- ggplot(data=pb_ox_filDUMM, aes(x= logFC, y = fct_rev(short.names))) +
-  geom_boxplot()+
-  geom_point(aes(color = clipped_logFC), position = position_jitter(width = 0.1,height = 0.1)) +
-  scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+
-  labs(title = "Boxplot with Colored Dots", x = "Category", y = "Value") +
-  scale_x_reverse()+
-  theme_minimal()
-  
-try3
-

-
-
-
- - - - -
- - - - - - - - - - - - - - - diff --git a/analysis/WGCNA/WGCNA_ANALYSIS.html b/analysis/WGCNA/WGCNA_ANALYSIS.html index ed00f16..12dae0e 100644 --- a/analysis/WGCNA/WGCNA_ANALYSIS.html +++ b/analysis/WGCNA/WGCNA_ANALYSIS.html @@ -1300,6 +1300,9 @@

MM and GS calc

#write.csv(merged_df4,"GM_GS_sulfide_oxygen.csv") ################################################################################################################## + +
+

scatter plots

names(modNames)
## NULL
moduleColors = mergedColors
@@ -2741,7 +2744,7 @@ 

DE expression analysis of significant genes, classified as hubs, in significant modules

de<-read.csv('entotal_202200815_use_this_corrected_locus_tags.csv',header=TRUE,stringsAsFactors=FALSE)
 
-sig<-de[de$adj.P.Val<=0.1,]
+sig<-de[de$adj.P.Val<=0.05,]
 sig0<-sig[sig$logFC<=-1,]
 sig1<-sig[sig$logFC>=1,]
 sig<-rbind(sig0,sig1)
@@ -3025,7 +3028,7 @@ 

DE expression analysis of significant genes, classified as hubs, in geom_vline(xintercept = c(1.5,2.5,3.5, 4.5, 5.5,6.5,7.5,8.5,9.5,10.5,11.5,12.5,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5), color = "grey") + my.colors.sulfide_nolowSvsH2 + my.theme

-

+

####now doing this for sulfide_teal, the otehr module that is significant for sulfide
 names(sul.teal)
## [1] "locusID"   "gene"      "condition" "moduleFil"
@@ -3109,7 +3112,7 @@

DE expression analysis of significant genes, classified as hubs, in geom_point(size=5,shape=18) + my.colors.oxygen + my.theme

-

+

###### oxygen in black
 
 #names(oxy_b)
@@ -3160,7 +3163,7 @@ 

DE expression analysis of significant genes, classified as hubs, in geom_point(size=5,shape=18) + my.colors.oxygen + my.theme

-

+

average logFCs

######putting all of this together, so that I have a workable file I can use in adobe illustrator
@@ -3210,9 +3213,9 @@ 

average logFCs

## locusID moduleFil n avg_logFC sd_logFC ## <chr> <chr> <int> <dbl> <dbl> ## 1 gene-L0Y14_RS00080 darkgoldenrod1 3 -4.58 0.732 -## 2 gene-L0Y14_RS01985 darkgoldenrod1 4 -5.22 1.43 +## 2 gene-L0Y14_RS01985 darkgoldenrod1 3 -5.89 0.580 ## 3 gene-L0Y14_RS07695 darkgoldenrod1 4 5.48 1.11 -## 4 gene-L0Y14_RS10315 darkgoldenrod1 4 2.05 0.778 +## 4 gene-L0Y14_RS10315 darkgoldenrod1 3 2.38 0.487 ## 5 gene-L0Y14_RS10410 darkgoldenrod1 3 -5.97 0.737 ## 6 gene-L0Y14_RS11255 darkgoldenrod1 4 2.70 0.991
avg_logFC_df <- avg_logFC_df %>%
@@ -3224,9 +3227,9 @@ 

average logFCs

## locusID moduleFil n avg_logFC sd_logFC treatment_logFC_ave ## <chr> <chr> <int> <dbl> <dbl> <chr> ## 1 gene-L0Y14_RS00080 darkgoldenrod1 3 -4.58 0.732 logFC_sulfide_limi… -## 2 gene-L0Y14_RS01985 darkgoldenrod1 4 -5.22 1.43 logFC_sulfide_limi… +## 2 gene-L0Y14_RS01985 darkgoldenrod1 3 -5.89 0.580 logFC_sulfide_limi… ## 3 gene-L0Y14_RS07695 darkgoldenrod1 4 5.48 1.11 logFC_sulfide_limi… -## 4 gene-L0Y14_RS10315 darkgoldenrod1 4 2.05 0.778 logFC_sulfide_limi… +## 4 gene-L0Y14_RS10315 darkgoldenrod1 3 2.38 0.487 logFC_sulfide_limi… ## 5 gene-L0Y14_RS10410 darkgoldenrod1 3 -5.97 0.737 logFC_sulfide_limi… ## 6 gene-L0Y14_RS11255 darkgoldenrod1 4 2.70 0.991 logFC_sulfide_limi…
shg_DE_av2 <- merge(avg_logFC_df,all_together, by = "locusID")
@@ -3695,19 +3698,19 @@ 

average logFCs

table(ohp_DE3$short.names)
## 
 ##   bamD   cphA   cvpA  dprE1    FBP  fusA2   GH16   hupE   hybO   hypE   hypo 
-##      2      2      2      2      1      2      2      2      2      2      8 
-##   isp2   mrcA   nifJ   ppdK   rocR SgpA_1   tsaC   yfbK   yidC   yidD 
-##      2      2      2      2      2      2      2      1      2      1
+## 2 2 1 2 1 2 1 2 2 2 8 +## isp2 mrcA nifJ ppdK rocR SgpA_1 tsaC yidC yidD +## 2 2 2 2 2 2 2 2 1
ohp_DE3_av <- ohp_DE3[,c(1,49,55,59,57)]
 
 head(ohp_DE3_av)
##              locusID     logFC      comparison moduleFil short.names
 ## 1 gene-L0Y14_RS05880 -4.039158   LO_vs_HO_HS_N    purple        hypo
 ## 2 gene-L0Y14_RS05880 -3.657596 LO_vs_HO_H2_noN    purple        hypo
-## 3 gene-L0Y14_RS06635 -4.281582 LO_vs_HO_H2_noN    purple       dprE1
-## 4 gene-L0Y14_RS06635 -5.296420   LO_vs_HO_HS_N    purple       dprE1
-## 5 gene-L0Y14_RS06820  2.176861   LO_vs_HO_HS_N    purple       fusA2
-## 6 gene-L0Y14_RS06820  2.418749 LO_vs_HO_H2_noN    purple       fusA2
+## 3 gene-L0Y14_RS06635 -5.296420 LO_vs_HO_HS_N purple dprE1 +## 4 gene-L0Y14_RS06635 -4.281582 LO_vs_HO_H2_noN purple dprE1 +## 5 gene-L0Y14_RS06820 2.418749 LO_vs_HO_H2_noN purple fusA2 +## 6 gene-L0Y14_RS06820 2.176861 LO_vs_HO_HS_N purple fusA2
####dealing with the fact I have more than one genes that are called "hypo"
 
 
@@ -3730,10 +3733,10 @@ 

average logFCs

##              locusID     logFC      comparison moduleFil short.names
 ## 1 gene-L0Y14_RS05880 -4.039158   LO_vs_HO_HS_N    purple       hypo1
 ## 2 gene-L0Y14_RS05880 -3.657596 LO_vs_HO_H2_noN    purple       hypo1
-## 3 gene-L0Y14_RS06635 -4.281582 LO_vs_HO_H2_noN    purple       dprE1
-## 4 gene-L0Y14_RS06635 -5.296420   LO_vs_HO_HS_N    purple       dprE1
-## 5 gene-L0Y14_RS06820  2.176861   LO_vs_HO_HS_N    purple       fusA2
-## 6 gene-L0Y14_RS06820  2.418749 LO_vs_HO_H2_noN    purple       fusA2
+## 3 gene-L0Y14_RS06635 -5.296420 LO_vs_HO_HS_N purple dprE1 +## 4 gene-L0Y14_RS06635 -4.281582 LO_vs_HO_H2_noN purple dprE1 +## 5 gene-L0Y14_RS06820 2.418749 LO_vs_HO_H2_noN purple fusA2 +## 6 gene-L0Y14_RS06820 2.176861 LO_vs_HO_HS_N purple fusA2
pox <- new_df
 
 
@@ -3801,11 +3804,11 @@ 

average logFCs

head(ohb2_DE3_av)
##              locusID     logFC      comparison moduleFil3 short.names
 ## 1 gene-L0Y14_RS00450 -2.998400   LO_vs_HO_HS_N  black_oxy          Hr
-## 2 gene-L0Y14_RS00450 -2.057712 LO_vs_HO_H2_noN  black_oxy          Hr
-## 3 gene-L0Y14_RS01145 -2.629476   LO_vs_HO_HS_N  black_oxy        hypo
-## 4 gene-L0Y14_RS02180 -3.074314   LO_vs_HO_HS_N  black_oxy        ftsE
-## 5 gene-L0Y14_RS03075 -4.498757   LO_vs_HO_HS_N  black_oxy        hypo
-## 6 gene-L0Y14_RS03375 -3.606201   LO_vs_HO_HS_N  black_oxy        fabA
+## 2 gene-L0Y14_RS03075 -4.498757 LO_vs_HO_HS_N black_oxy hypo +## 3 gene-L0Y14_RS03375 -3.606201 LO_vs_HO_HS_N black_oxy fabA +## 4 gene-L0Y14_RS03820 -4.795972 LO_vs_HO_HS_N black_oxy HIT +## 5 gene-L0Y14_RS04045 -3.506534 LO_vs_HO_HS_N black_oxy cbbQ +## 6 gene-L0Y14_RS05880 -3.657596 LO_vs_HO_H2_noN black_oxy hypo
names(ohb2_DE3_av)[4] <- "moduleFil"
 
 
@@ -3856,12 +3859,12 @@ 

average logFCs

## # A tibble: 6 × 6
 ##   locusID            moduleFil     n avg_logFC sd_logFC treatment_logFC_ave  
 ##   <chr>              <chr>     <int>     <dbl>    <dbl> <chr>                
-## 1 gene-L0Y14_RS00450 black_oxy     2     -2.53    0.665 logFC_oxygen_limiting
-## 2 gene-L0Y14_RS01145 black_oxy     1     -2.63   NA     logFC_oxygen_limiting
-## 3 gene-L0Y14_RS02180 black_oxy     1     -3.07   NA     logFC_oxygen_limiting
-## 4 gene-L0Y14_RS03075 black_oxy     1     -4.50   NA     logFC_oxygen_limiting
-## 5 gene-L0Y14_RS03375 black_oxy     1     -3.61   NA     logFC_oxygen_limiting
-## 6 gene-L0Y14_RS03820 black_oxy     1     -4.80   NA     logFC_oxygen_limiting
+## 1 gene-L0Y14_RS00450 black_oxy 1 -3.00 NA logFC_oxygen_limiting +## 2 gene-L0Y14_RS03075 black_oxy 1 -4.50 NA logFC_oxygen_limiting +## 3 gene-L0Y14_RS03375 black_oxy 1 -3.61 NA logFC_oxygen_limiting +## 4 gene-L0Y14_RS03820 black_oxy 1 -4.80 NA logFC_oxygen_limiting +## 5 gene-L0Y14_RS04045 black_oxy 1 -3.51 NA logFC_oxygen_limiting +## 6 gene-L0Y14_RS05880 black_oxy 2 -3.85 0.270 logFC_oxygen_limiting
ohb_DE_av2 <- merge(avg_logFC_df,all_together, by = "locusID")
 
 boxn <- ohb_DE_av2[,c(1,3)]
@@ -4079,7 +4082,7 @@ 

average logFCs

theme_minimal() try
-

+

table(sulfideGT$comparison,sulfideGT$short.names)
##                  
 ##                   aclA aclB_full acnA clpB cmoB dnaJ DUF2058 DUF4126 DUF4340
@@ -4090,7 +4093,7 @@ 

average logFCs

## SW_vs_HS_noN_HO 1 1 1 1 1 1 1 1 1 ## ## feoB ferritin_like_AB flxA flxB flxC flxD frdB_1 frdC_1 ftsH -## H2_vs_HS_N_HO 1 1 0 0 1 0 2 1 1 +## H2_vs_HS_N_HO 1 1 0 0 0 0 2 1 1 ## H2_vs_HS_noN_HO 0 1 1 1 1 1 2 1 1 ## LS_vs_HS_N_HO 1 0 0 0 0 0 0 0 1 ## LS_vs_HS_noN_HO 0 0 1 1 1 1 2 1 0 @@ -4111,14 +4114,14 @@

average logFCs

## SW_vs_HS_noN_HO 1 1 1 1 1 1 1 1 1 1 ## ## korB_1 korB_2 korB_3 korC leuC mcrA mdh_1 moaA napG nirB nirS -## H2_vs_HS_N_HO 0 1 1 0 0 1 0 1 1 1 1 +## H2_vs_HS_N_HO 0 1 1 0 0 1 0 1 1 0 1 ## H2_vs_HS_noN_HO 1 1 1 1 1 0 1 0 1 1 1 ## LS_vs_HS_N_HO 0 0 0 0 0 0 0 0 0 0 1 ## LS_vs_HS_noN_HO 1 0 1 1 1 0 1 0 0 1 0 ## SW_vs_HS_noN_HO 1 1 1 1 1 0 1 1 1 1 1 ## ## norB norB1 norD2 nusA peptidase pfor_3 pfor_like pntA pntB -## H2_vs_HS_N_HO 1 1 1 1 1 1 1 0 0 +## H2_vs_HS_N_HO 1 1 1 1 0 1 1 0 0 ## H2_vs_HS_noN_HO 1 1 1 1 1 1 1 1 1 ## LS_vs_HS_N_HO 0 0 0 1 0 0 1 0 0 ## LS_vs_HS_noN_HO 0 0 1 1 1 0 0 1 1 @@ -4190,11 +4193,11 @@

average logFCs

theme_minimal() try2
-

+

table(pb_ox_fil2$comparison)
## 
 ## LO_vs_HO_H2_noN   LO_vs_HO_HS_N 
-##              20              35
+## 19 30
####ok, I have a problem, I can't get the dimensions to match for adobe illustrator, so adding dummy variables to save me time in illustrator
 
 # Define the number of dummy entries you need
@@ -4223,8 +4226,7 @@ 

average logFCs

theme_minimal() try3
-

-
#save.image("allRfrom20240306.RData")
+