-
Notifications
You must be signed in to change notification settings - Fork 0
/
Training_Day1.R
78 lines (56 loc) · 2.86 KB
/
Training_Day1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
### Training Day 1 ###
## By Cole Nawrocki ##
## Topic: Seurat Workflow Part 1 -- Setting up the object and basic process ##
# Load packages
library(Seurat)
library(Matrix)
# Set the objects to be the older, simpler version.
options(Seurat.object.assay.version = "v3")
# Read in counts.
# Note: you may have to change the file path accordingly.
cts <- read.csv("/Users/cnawrocki/Library/CloudStorage/OneDrive-UniversityofVirginia/Grainger-Lab/Bioinformatics/Learning-Resources/Example-Data/Lung12_exprMat_file.csv")
cts
# Get the counts into the correct format.
rownames(cts) <- paste(cts$fov, cts$cell_ID, sep = "_")
cts <- cts[,-c(1,2)]
# Make the counts into a sparse matrix to save memory.
cts <- as.matrix(cts) |> t()
cts <- as(cts, "CsparseMatrix")
# Note: you can still use base R data interaction on the sparse matrix. See:
cts[1:6, 1:6]
rownames(cts)
# Read in the metadata.
meta <- read.csv("/Users/cnawrocki/Library/CloudStorage/OneDrive-UniversityofVirginia/Grainger-Lab/Bioinformatics/Learning-Resources/Example-Data/Lung12_metadata_file.csv")
# Get the metadata into the correct format.
rownames(meta) <- paste(meta$fov, meta$cell_ID, sep = "_")
# Make sure that the rownames of the metadata and the colnames of the counts match exactly.
idx <- rownames(meta)
cts <- cts[,idx]
# Create the Seurat object.
obj <- CreateSeuratObject(counts = cts, meta.data = meta, project = "training_obj")
# Skipping QC for now.
# Normalize the data and find the most variable features. Apparently 2000-3000 is sufficient.
obj <- NormalizeData(obj, normalization.method = "LogNormalize")
obj <- FindVariableFeatures(obj, method = "vst", nfeatures = 2000)
# Look for metadata variables that can predict the number of counts in a cell.
FeatureScatter(obj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
# If you see a variable that is positively correlated with nCount_RNA, then it
# makes sense to regress it out. nFeature_RNA should be positively correlated
# with nCount_RNA though (this makes intuitive sense), and should not be
# regressed out, typically. We will regress it out here just for example's sake
# (so we can get used to working with the functions).
# Do some behind-the-scenes stuff to speed up the following steps.
options(future.globals.maxSize = 2e9)
plan(strategy = "multisession", workers=4)
# Scale the variable features. Regress out the variable(s) you found above.
obj <- ScaleData(obj, vars.to.regress = "nFeature_RNA")
# Run Principle Component Analysis (uses the scaled data) to do dimensional reduction.
obj <- RunPCA(obj, npcs = 50)
# Choose appropriate number of PCs. You want to choose a number past where the elbow is on this plot.
ElbowPlot(obj, ndims = 50)
# Run the UMAP with the chosen PCs.
obj <- RunUMAP(obj, dims = 1:20)
# Reverse those behind-the-scenes things.
plan(strategy = "sequential")
# You now have the ability to view the UMAP.
DimPlot(obj)