rnabioco · agillen · Jan 30, 2024 · Sep 7, 2023 · Sep 13, 2023 · Sep 21, 2023
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,6 @@
+.Rproj*
+*.Rproj
+.RData
+.Rhistory
+.Rproj.user
+*.DS_Store
diff --git a/README.md b/README.md
@@ -52,6 +52,7 @@ Notes: `total_impact` is set to 5 for each sample, change this to control how ma
 | Platform | Library (BC+UMI+A) | Setting | Test data |
 | :--------|:------------| :------------| :---------|
 | 10x Chromium V3 | [16 + 12 + 30](https://teichlab.github.io/scg_lib_structs/methods_html/10xChromium3.html) | chromiumV3 | ✓ |
+| 10x V3 - Ultima Genomics | [adapter + 16 + 9 + 3 ignored + 8](https://www.nature.com/articles/s41587-022-01452-6/figures/1) | chromiumV3UG | |
 | 10x Chromium V2 | [16 + 10 + 30](https://teichlab.github.io/scg_lib_structs/methods_html/10xChromium3.html) | chromiumV2 | ✓ |
 | 10x Chromium Visium | [16 + 10 + 30](https://teichlab.github.io/scg_lib_structs/methods_html/10xChromium3.html) | visium | |
 | Drop-seq | [12 + 8 + 30](https://teichlab.github.io/scg_lib_structs/methods_html/Drop-seq.html) | dropseq | ✓ |

diff --git a/Snakefile b/Snakefile
@@ -6,6 +6,7 @@ configfile: "config.yaml"
 
 DATA = config["DATA"]
 RESULTS = config["RESULTS"]
+PAIRED_SAMPLES = config["PAIRED_SAMPLES"]
 R1_SAMPLES = config["R1_SAMPLES"]
 R2_SAMPLES = config["R2_SAMPLES"]
 STAR_INDEX = config["STAR_INDEX"]
@@ -14,12 +15,18 @@ FASTQS = config["FASTQS"]
 WHITELIST_V2 = config["WHITELIST_V2"]
 WHITELIST_V3 = config["WHITELIST_V3"]
 STAR = config["STAR"]
-READS = ["R1", "R2"]
+READS = ["paired", "R1", "R2"]
 
 import json
 with open('chemistry.json') as fp:
    chemistry = json.load(fp)
 
+# paired positional approach
+PAIRs = []
+if PAIRED_SAMPLES:
+  PAIRs.extend(expand("{results}/counts/{sample}_{read}_counts.tsv.gz", results = RESULTS, sample = PAIRED_SAMPLES, read = "paired"))
+  PAIRs.extend(expand("{results}/{sample}/{sample}_{read}_Aligned.sortedByCoord.out.bam", results = RESULTS, sample = PAIRED_SAMPLES, read = "paired"))
+  PAIRs.extend(expand("{results}/bed/{sample}_{read}.bed.gz", results = RESULTS, sample = PAIRED_SAMPLES, read = "paired"))  
 # read 1 positional approach
 R1s = []
 if R1_SAMPLES:
@@ -33,16 +40,14 @@ if R2_SAMPLES:
   R2s.extend(expand("{results}/{sample}/{sample}_{read}_Aligned.sortedByCoord.out.bam", results = RESULTS, sample = R2_SAMPLES, read = "R2"))
   R2s.extend(expand("{results}/bed/{sample}_{read}.bed.gz", results = RESULTS, sample = R2_SAMPLES, read = "R2"))
 # combine
-R12s = R1s + R2s # +expand("{results}/report/multiqc_report.html", results = RESULTS)
-R12s = R12s + expand("{data}/{sample}_R1.fastq.gz", data = DATA, sample = R1_SAMPLES) + expand("{data}/{sample}_R2.fastq.gz", data = DATA, sample = R1_SAMPLES)
-print(R12s)
+all_outputs = PAIRs + R1s + R2s #+ expand("{results}/report/multiqc_report.html", results = RESULTS)
+print(all_outputs)
 
 rule all:
   input:
-    R12s = R12s
+    all_outputs = all_outputs
 
 include: "rules/check_versions.snake"
-include: "rules/download.snake"
 include: "rules/cutadapt_star.snake"
 include: "rules/count.snake"
 include: "rules/qc.snake"
diff --git a/chemistry.json b/chemistry.json
@@ -3,36 +3,47 @@
         "length": "58",
         "bc_cut": "",
         "R1": "[WHITELIST_V3,\"--soloUMIlen 12 --clip5pNbases 58 0 --soloCBstart 1 --soloCBlen 16 --soloUMIstart 17\"]",
-        "R2": "[WHITELIST_V3,\"--soloUMIlen 12\"]"
+        "R2": "[WHITELIST_V3,\"--soloUMIlen 12\"]",
+        "paired": "[\"--alignEndsProtrude 58 ConcordantPair\"]"
+    },
+    "chromiumV3UG": {
+        "length": "58",
+        "bc_cut": "",
+        "R1": "[WHITELIST_V3,\"--soloUMIlen 9 --clip5pNbases 58 --soloCBstart 23 --soloCBlen 16 --soloUMIstart 39\"]"
     },
     "chromiumV2": {
         "length": "56",
         "bc_cut": "",
         "R1": "[WHITELIST_V2,\"--soloUMIlen 10 --clip5pNbases 56 0 --soloCBstart 1 --soloCBlen 16 --soloUMIstart 17\"]",
-        "R2": "[WHITELIST_V2,\"--soloUMIlen 10\"]"
+        "R2": "[WHITELIST_V2,\"--soloUMIlen 10\"]",
+        "paired": "[\"--alignEndsProtrude 56 ConcordantPair\"]"
     },
     "dropseq": {
         "length": "50",
         "bc_cut": "",
         "R1": "[\"None --soloUMIlen 8 --clip5pNbases 50 0 --soloCBstart 1 --soloCBlen 12 --soloUMIstart 13\"]",
-        "R2": "[\"None --soloUMIlen 8 --soloCBstart 1 --soloCBlen 12 --soloUMIstart 13\"]"
+        "R2": "[\"None --soloUMIlen 8 --soloCBstart 1 --soloCBlen 12 --soloUMIstart 13\"]",
+        "paired": "[\"--alignEndsProtrude 50 ConcordantPair\"]"
     },
     "microwellseq": {
         "length": "54",
         "bc_cut": "CGACTCACTACAGGG...TCGGTGACACGATCG",
         "R1": "[\"None --soloUMIlen 6 --clip5pNbases 54 0 --soloCBstart 1 --soloCBlen 18 --soloUMIstart 19\"]",
-        "R2": "[\"None --soloUMIlen 6 --soloCBstart 1 --soloCBlen 18 --soloUMIstart 19\"]"
+        "R2": "[\"None --soloUMIlen 6 --soloCBstart 1 --soloCBlen 18 --soloUMIstart 19\"]",
+        "paired": "[\"--alignEndsProtrude 54 ConcordantPair\"]"
     },
     "bd": {
         "length": "53",
         "bc_cut": "ACTGGCCTGCGA...GGTAGCGGTGACA",
         "R1": "[\"None --soloUMIlen 8 --clip5pNbases 53 0 --soloCBstart 1 --soloCBlen 27 --soloUMIstart 28\"]",
-        "R2": "[\"None --soloUMIlen 8 --soloCBstart 1 --soloCBlen 27 --soloUMIstart 28\"]"
+        "R2": "[\"None --soloUMIlen 8 --soloCBstart 1 --soloCBlen 27 --soloUMIstart 28\"]",
+        "paired": "[\"--alignEndsProtrude 53 ConcordantPair\"]"
     },
     "indrop": {
         "length": "32",
         "bc_cut": "",
         "R1": "[\"None --soloUMIlen 6 --clip5pNbases 32 0 --soloCBstart 1 --soloCBlen 8 --soloUMIstart 9\"]",
-        "R2": "[\"None --soloUMIlen 6 --soloCBstart 1 --soloCBlen 8 --soloUMIstart 9\"]"
+        "R2": "[\"None --soloUMIlen 6 --soloCBstart 1 --soloCBlen 8 --soloUMIstart 9\"]",
+        "paired": "[\"--alignEndsProtrude 32 ConcordantPair\"]"
     }
-}
+}
diff --git a/chemistry_to_json.py b/chemistry_to_json.py
@@ -3,6 +3,8 @@
 import pandas
 chromiumV3 = {"length" : "58", "bc_cut" : "", "R1" : '[WHITELIST_V3,"--soloUMIlen 12 --clip5pNbases 58 0 --soloCBstart 1 --soloCBlen 16 --soloUMIstart 17"]', "R2" : '[WHITELIST_V3,"--soloUMIlen 12"]'}
 
+chromiumV3UG = {"length": "58", "bc_cut": "", "R1": '[WHITELIST_V3,"--soloUMIlen 9 --clip5pNbases 58 --soloCBstart 23 --soloCBlen 16 --soloUMIstart 39"]', "R2": '[WHITELIST_V3,"--soloUMIlen 9"]'}
+
 chromiumV2 = {"length" : "56", "bc_cut" : "", "R1" : '[WHITELIST_V2,"--soloUMIlen 10 --clip5pNbases 56 0 --soloCBstart 1 --soloCBlen 16 --soloUMIstart 17"]', "R2" : '[WHITELIST_V2,"--soloUMIlen 10"]'}
 
 dropseq = {"length" : "50", "bc_cut" : "", "R1" : '["None --soloUMIlen 8 --clip5pNbases 50 0 --soloCBstart 1 --soloCBlen 12 --soloUMIstart 13"]', "R2" : '["None --soloUMIlen 8 --soloCBstart 1 --soloCBlen 12 --soloUMIstart 13"]'}

diff --git a/config.yaml b/config.yaml
@@ -36,12 +36,15 @@ POLYA_SITES:
 FASTQS:
   "sample_fastqs.tsv"
 
-R1_SAMPLES:
+PAIRED_SAMPLES:
 # Sample basenames
   - test
   - test2
   - test3
 
+R1_SAMPLES:
+# Sample basenames
+
 R2_SAMPLES:
   - test
   - test2

diff --git a/inst/scripts/filter_bam_correct.py b/inst/scripts/filter_bam_correct.py
@@ -11,7 +11,7 @@
 suitable for cellranger and starsolo output
 """
 
-def correct_bam_read1(bam, outbam, target_len, filter_cut):
+def correct_bam_read1(bam, outbam, target_len, filter_cut, single_end):
 
     samfile = pysam.AlignmentFile(bam, "rb")
     outfile = pysam.AlignmentFile(outbam, "wb", template = samfile)
@@ -28,14 +28,16 @@ def correct_bam_read1(bam, outbam, target_len, filter_cut):
         if read.is_unmapped:
             # not mapped, toss
             continue
-
-        if not read.is_proper_pair:
+
+        if not single_end:
+            if not read.is_proper_pair:
             # not properly paired, toss
-            continue
+                continue
 
-        if not read.is_read1:
+        if not single_end:
+            if not read.is_read1:
             # not read1, toss
-            continue
+                continue
 
         j += 1
 
@@ -115,16 +117,20 @@ def main():
                         help ="""maximum nts off of target_len to keep""",
                         default = 10,
                         required = False)
-
+    parser.add_argument('-s',
+                        '--single',
+                        action="store_true",
+                        required = False)
     args=parser.parse_args()
 
     in_bam = args.inbam
     out_bam = args.outbam
     target_len = int(args.targetlen)
     filter_cut = float(args.filtercut)
-
+    single_end = args.single
+    print(single_end)
     print("settings- target_len: ", target_len, "; filter_cut: ", filter_cut)
-    k,i,j = correct_bam_read1(in_bam, out_bam, target_len = target_len, filter_cut = filter_cut)
+    k,i,j = correct_bam_read1(in_bam, out_bam, target_len = target_len, filter_cut = filter_cut, single_end = single_end)
     print("fraction of reads kept: ", i/j)
     print("fraction of reads corrected: ", k/i)
 

diff --git a/rules/check_versions.snake b/rules/check_versions.snake
@@ -7,9 +7,10 @@ rule check:
      checked = "{data}/check_versions.txt"
    params:
      job_name = "check",
-     memory = "select[mem>5] rusage[mem=5]", # LSF format; change as needed
    log:
      "{data}/logs/check_versions.txt"
+   resources:
+     mem_mb = 5000
    threads:
      1
    shell: