From c4f61bc705095820727ddc11e26458ff71c8e75f Mon Sep 17 00:00:00 2001 From: Amadeo <6462800+amadeovezz@users.noreply.github.com> Date: Wed, 16 Aug 2023 19:34:15 -0700 Subject: [PATCH 1/5] ignore warnings --- python-container/python-tutorial.ipynb | 109 +++++++++++++++++-------- 1 file changed, 73 insertions(+), 36 deletions(-) diff --git a/python-container/python-tutorial.ipynb b/python-container/python-tutorial.ipynb index 2e892b2..c586101 100644 --- a/python-container/python-tutorial.ipynb +++ b/python-container/python-tutorial.ipynb @@ -8,8 +8,11 @@ "source": [ "import numpy as np\n", "import pandas as pd\n", - "import scanpy as sc" - ] + "import scanpy as sc\n", + "import warnings \n", + "warnings.filterwarnings('ignore')uu" + ], + "id": "5c6c1ceedd895a4b" }, { "cell_type": "markdown", @@ -17,7 +20,8 @@ "source": [ "# This is a snippet from the scanpy tutorial \n", "go to https://scanpy-tutorials.readthedocs.io/en/latest/pbmc3k.html for full tutorial" - ] + ], + "id": "362bd1fab9d2d378" }, { "cell_type": "code", @@ -28,7 +32,8 @@ "adata = sc.read_10x_mtx('/home/tutorial/filtered_gene_bc_matrices/hg19/',\n", " var_names = 'gene_symbols',\n", " cache = True)" - ] + ], + "id": "c2c289451f4dc1d6" }, { "cell_type": "code", @@ -37,7 +42,8 @@ "outputs": [], "source": [ "adata.var_names_make_unique()" - ] + ], + "id": "4896dbeb0cd80db6" }, { "cell_type": "code", @@ -58,14 +64,16 @@ ], "source": [ "adata" - ] + ], + "id": "c05434bdae362ad5" }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Preprocessing " - ] + ], + "id": "c5300883059bb7c6" }, { "cell_type": "code", @@ -87,14 +95,16 @@ ], "source": [ "sc.pl.highest_expr_genes(adata, n_top=20, )" - ] + ], + "id": "ee7cbcef69e1769c" }, { "cell_type": "markdown", "metadata": {}, "source": [ "# basic filtering " - ] + ], + "id": "a3440c6def535ed8" }, { "cell_type": "code", @@ -113,14 +123,16 @@ "source": [ "sc.pp.filter_cells(adata, min_genes=200)\n", "sc.pp.filter_genes(adata, min_cells=3)" - ] + ], + "id": "fa2438c494490c99" }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Mitochondrial gene info" - ] + ], + "id": "89eccd900b66e4c0" }, { "cell_type": "code", @@ -130,7 +142,8 @@ "source": [ "adata.var['mt'] = adata.var_names.str.startswith('MT-') # annotate the group of mitochondrial genes as 'mt'\n", "sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)" - ] + ], + "id": "cb7ab975c0d1d881" }, { "cell_type": "code", @@ -164,7 +177,8 @@ ], "source": [ "sc.pl.violin(adata, ['n_genes_by_counts'])" - ] + ], + "id": "ab003857e6f90661" }, { "cell_type": "code", @@ -198,7 +212,8 @@ ], "source": [ "sc.pl.violin(adata, ['total_counts'])" - ] + ], + "id": "4e3878fe0e0022b8" }, { "cell_type": "code", @@ -232,14 +247,16 @@ ], "source": [ "sc.pl.violin(adata, ['pct_counts_mt'])" - ] + ], + "id": "a4c4f0134defc5af" }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Remove cells that have too many mitochondrial genes expressed or too many total counts:" - ] + ], + "id": "10a3446931d36e99" }, { "cell_type": "code", @@ -282,7 +299,8 @@ "source": [ "sc.pl.scatter(adata, x='total_counts', y='pct_counts_mt')\n", "sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts')" - ] + ], + "id": "1fd866cdffb1d51b" }, { "cell_type": "code", @@ -303,14 +321,16 @@ "source": [ "adata = adata[adata.obs.n_genes_by_counts < 2500, :]\n", "adata = adata[adata.obs.pct_counts_mt < 5, :]" - ] + ], + "id": "35764747925638fd" }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Total-count normalize (library-size correct) the data matrix 𝐗 to 10,000 reads per cell, so that counts become comparable among cells." - ] + ], + "id": "ff147ea1d55f6715" }, { "cell_type": "code", @@ -328,14 +348,16 @@ ], "source": [ "sc.pp.normalize_total(adata, target_sum=1e4)" - ] + ], + "id": "783eab52740584d2" }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Logarithmize the data:" - ] + ], + "id": "38567a51e40de51" }, { "cell_type": "code", @@ -344,14 +366,16 @@ "outputs": [], "source": [ "sc.pp.log1p(adata)\n" - ] + ], + "id": "b95b45c162ab0ea8" }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Identify highly-variable genes." - ] + ], + "id": "c8c481b23616631a" }, { "cell_type": "code", @@ -360,7 +384,8 @@ "outputs": [], "source": [ "sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)" - ] + ], + "id": "33df7a6068495b98" }, { "cell_type": "code", @@ -382,7 +407,8 @@ ], "source": [ "sc.pl.highly_variable_genes(adata)" - ] + ], + "id": "30de4bdb6e50dfcd" }, { "cell_type": "code", @@ -400,7 +426,8 @@ ], "source": [ "adata = adata[:, adata.var.highly_variable]" - ] + ], + "id": "6bcbd4da5241e276" }, { "cell_type": "code", @@ -418,14 +445,16 @@ ], "source": [ "sc.pp.regress_out(adata, ['total_counts', 'pct_counts_mt'])" - ] + ], + "id": "e4cc1fc68c6111dd" }, { "cell_type": "markdown", "metadata": {}, "source": [ "# PCA " - ] + ], + "id": "2572ff75f065049a" }, { "cell_type": "code", @@ -434,7 +463,8 @@ "outputs": [], "source": [ "sc.pp.scale(adata, max_value=10)" - ] + ], + "id": "24723dfede31c1dc" }, { "cell_type": "code", @@ -452,7 +482,8 @@ ], "source": [ "sc.tl.pca(adata, svd_solver='arpack')" - ] + ], + "id": "79cf216fdf1d7332" }, { "cell_type": "code", @@ -482,14 +513,16 @@ ], "source": [ "sc.pl.pca(adata, color='CST3')" - ] + ], + "id": "1f7e9dc799df2e2f" }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Computing the neighborhood graph" - ] + ], + "id": "fa0c299b54fc6435" }, { "cell_type": "code", @@ -498,14 +531,16 @@ "outputs": [], "source": [ "sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)" - ] + ], + "id": "89e4e3a0de7f1b2a" }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Embedding the neighborhood graph" - ] + ], + "id": "afb1c32550a3da81" }, { "cell_type": "code", @@ -514,7 +549,8 @@ "outputs": [], "source": [ "sc.tl.umap(adata)" - ] + ], + "id": "57df5c2af6923ea2" }, { "cell_type": "code", @@ -523,7 +559,8 @@ "outputs": [], "source": [ "sc.pl.umap(adata, color=['CST3', 'NKG7', 'PPBP'])" - ] + ], + "id": "acb62c0457d9c0ff" } ], "metadata": { From 9e15976ee58263706adc1ad84c0a60cccad3b6c9 Mon Sep 17 00:00:00 2001 From: Amadeo <6462800+amadeovezz@users.noreply.github.com> Date: Wed, 16 Aug 2023 20:02:53 -0700 Subject: [PATCH 2/5] get container to work with M1/M2 --- README.markdown | 10 ++++++++++ rstudio-container/start.sh | 19 ++++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/README.markdown b/README.markdown index 474453e..99ffc4c 100644 --- a/README.markdown +++ b/README.markdown @@ -33,6 +33,16 @@ ALL work must be done in this directory. It will be mounted inside the container After entering the path of your local work directory, follow instructions to copy and paste link with IP address to a web browser. +### If you are using an M1/M2 Mac + +Be sure to have: + +- The latest version of Docker + +And in Docker Settings: + +- General -> User Virtualization Framework -> ON +- Features in development -> User Rosetta for x86/amd64 emulation on Apple Silicon -> ON ## Running Scanpy Tutorial diff --git a/rstudio-container/start.sh b/rstudio-container/start.sh index caf0ab1..41fd7c9 100755 --- a/rstudio-container/start.sh +++ b/rstudio-container/start.sh @@ -3,4 +3,21 @@ echo Set your workding directory. This is the directory where all your data live Please enter full path. read dir -docker run -d -p 8787:8787 -e PASSWORD=@hackathon2021 -v $dir:/home drbueno/rstudio-single-cell \ No newline at end of file +architecture=$(uname -m) + +if [ "$architecture" == "arm64" ]; then + docker run -d --rm \ + -p 8787:8787 \ + -e PASSWORD=@hackathon2021 \ + -v $dir:/home \ + --platform linux/x86_64 \ + drbueno/rstudio-single-cell + echo "please navigate to: http://localhost:8787/" +else + docker run -d --rm \ + -p 8787:8787 \ + -e PASSWORD=@hackathon2021 \ + -v $dir:/home \ + drbueno/single-cell-nb + echo "please navigate to: http://localhost:8787/" +fi \ No newline at end of file From a593fe211e9acfb575b8f3929498a6bf124e5234 Mon Sep 17 00:00:00 2001 From: Amadeo <6462800+amadeovezz@users.noreply.github.com> Date: Thu, 17 Aug 2023 11:39:11 -0700 Subject: [PATCH 3/5] docker notes and example of working directory --- README.markdown | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.markdown b/README.markdown index 99ffc4c..985844c 100644 --- a/README.markdown +++ b/README.markdown @@ -13,7 +13,9 @@ Sample Data with tutorial also included. Users can start analyzing single cell sequencing data with Scanpy in python or Seurat in R. ## Requirements: -Docker +- Docker + +Note: The easiest way to install and use docker, is via docker desktop: https://www.docker.com/products/docker-desktop/ # Running Python Container ## Pull image from Docker hub @@ -30,6 +32,8 @@ Run (recommended to run under screen) You will be prompted to set your working directory. This is the directory where the data lives. ALL work must be done in this directory. It will be mounted inside the container in ```/home/data``` +An example of working directory: `/Users/hackathon-user/UCSF_HACKATHON_PRIMER/python-container/data` + After entering the path of your local work directory, follow instructions to copy and paste link with IP address to a web browser. @@ -39,7 +43,7 @@ Be sure to have: - The latest version of Docker -And in Docker Settings: +And in Docker Settings (using docker desktop): - General -> User Virtualization Framework -> ON - Features in development -> User Rosetta for x86/amd64 emulation on Apple Silicon -> ON @@ -88,6 +92,8 @@ Run ```./start.sh``` You will be prompted to set your working directory. This is the directory where the data lives. ALL work must be done in this directory. It will be mounted inside the container in ```/home``` +An example of working directory: `/Users/hackathon-user/UCSF_HACKATHON_PRIMER/rstudio-container/data` + Go to a web browser and visit ```localhost:8787``` From 1fa55279ff33352033add0f52983eebce443bb99 Mon Sep 17 00:00:00 2001 From: Alaa Abdellatif Date: Fri, 18 Aug 2023 12:44:10 -0700 Subject: [PATCH 4/5] Update github repository for cloning, added more minor details --- README.markdown | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README.markdown b/README.markdown index 985844c..5ffa81b 100644 --- a/README.markdown +++ b/README.markdown @@ -1,7 +1,7 @@ # Hackathon Single Cell Notebook Clone this repo to your computer. -```git clone https://github.com/raybueno/UCSF_HACKATHON_PRIMER.git``` +```git clone git@github.com:UCSF-DSCOLAB/hackathon_primer.git``` This repo contains the Dockerfile and sample data with tutorial used to create the hackathon containers for single-cell analysis for both python and R-studio. @@ -17,11 +17,13 @@ Users can start analyzing single cell sequencing data with Scanpy in python or S Note: The easiest way to install and use docker, is via docker desktop: https://www.docker.com/products/docker-desktop/ +Be sure to select the appropriate installation for you Macbook machine. + # Running Python Container ## Pull image from Docker hub After installing docker, pull the repository from docker hub. -```docker pull drbueno/single-cell-nb:latest ``` +```docker pull drbueno/single-cell-nb:latest``` ## How to Run Using Mac or Ubuntu Change directory to ```python-container```. @@ -34,8 +36,7 @@ ALL work must be done in this directory. It will be mounted inside the container An example of working directory: `/Users/hackathon-user/UCSF_HACKATHON_PRIMER/python-container/data` -After entering the path of your local work directory, follow instructions to copy and paste link with -IP address to a web browser. +After entering the path of your local work directory, follow instructions to copy and paste link with IP address to a web browser. ### If you are using an M1/M2 Mac @@ -82,7 +83,7 @@ analyzing your data. ## Pull image from Docker hub After installing docker, pull the repository from docker hub. -```docker pull drbueno/rstudio-single-cell:latest ``` +```docker pull drbueno/rstudio-single-cell:latest``` ## How to Run Using Mac or Ubuntu Change directory to ```rstudio-container```. From 2281717b1c79ca9cee259d7a005a1b118ed81966 Mon Sep 17 00:00:00 2001 From: Alaa Abdellatif Date: Fri, 18 Aug 2023 12:44:36 -0700 Subject: [PATCH 5/5] Fix bug with last execution cell missing trailing decorator --- rstudio-container/single-cell-tutorial.Rmd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rstudio-container/single-cell-tutorial.Rmd b/rstudio-container/single-cell-tutorial.Rmd index 73c0a87..42bd6c8 100644 --- a/rstudio-container/single-cell-tutorial.Rmd +++ b/rstudio-container/single-cell-tutorial.Rmd @@ -84,4 +84,5 @@ head(Idents(pbmc), 5) pbmc <- RunUMAP(pbmc, dims = 1:10) -DimPlot(pbmc, reduction = "umap") \ No newline at end of file +DimPlot(pbmc, reduction = "umap") +``` \ No newline at end of file