From d34726fca19c0668498c3b07ae3bd370e812f3ba Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Wed, 13 Mar 2019 12:27:52 -0400
Subject: [PATCH 01/24] initial peppy imports working

---
 rules/common.smk | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/rules/common.smk b/rules/common.smk
index f6b9635..63feb61 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -1,13 +1,36 @@
 import pandas as pd
+from peppy import Project, SNAKEMAKE_CONFIG_KEY as pep_to_snake, SAMPLE_NAME_COLNAME as PEPPY_SAMPLE_COLUMN
 from snakemake.utils import validate
 
+SAMPLE_COLUMN = "sample"
+
 report: "../report/workflow.rst"
 
 ###### Config file and sample sheets #####
-configfile: "config.yaml"
+p = Project("prjcfg.yaml")
+#print("CONFIGFILE: {}".format(configfile))
+#configfile: "config.yaml"
+configfile: getattr(p, pep_to_snake)
+print("CONFIG: {}".format(config))
 validate(config, schema="../schemas/config.schema.yaml")
 
-samples = pd.read_table(config["samples"]).set_index("sample", drop=False)
+sample_sheet_file = config["samples"]
+dt = pd.read_table(sample_sheet_file)
+
+print("DT: {}".format(dt))
+
+#samples = dt.set_index("sample", drop=False)
+samples = p.sheet
+if SAMPLE_COLUMN in samples.columns and SAMPLE_COLUMN in samples.columns:
+    raise Exception("Two sample identifier columns in samples sheet: {}".format(sample_sheet_file))
+samples.rename({PEPPY_SAMPLE_COLUMN: SAMPLE_COLUMN}, axis=1, inplace=True)
+
+print("SAMPLES: {}".format(samples))
+
+print("CONFIG SAMPLES: {}".format(config["samples"]))
+
+#print("SAMPLES:\n{}".format("\n".join("{} ({})".format(str(s), type(s)) for s in samples)))
+
 validate(samples, schema="../schemas/samples.schema.yaml")
 
 units = pd.read_table(config["units"], dtype=str).set_index(["sample", "unit"], drop=False)

From 59eb4d485e0bb847a6e7a1b8fadab1fc5f39dec1 Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Wed, 13 Mar 2019 15:31:03 -0400
Subject: [PATCH 02/24] more peppy interop

---
 config.yaml      | 2 +-
 rules/common.smk | 9 ++++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/config.yaml b/config.yaml
index fc84797..46c2d58 100644
--- a/config.yaml
+++ b/config.yaml
@@ -4,7 +4,7 @@ units: units.tsv
 ref:
   name: GRCh38.86
   # Path to the reference genome, ideally as it is provided by the GATK bundle.
-  genome: data/ref/genome.chr21.fa
+  genome: $GENOMES/hg38/hg38.fa
   # Path to any database of known variants, ideally as it is provided by the GATK bundle.
   known-variants: data/ref/dbsnp.vcf.gz
 
diff --git a/rules/common.smk b/rules/common.smk
index 63feb61..ae28c51 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -1,5 +1,7 @@
+import os
 import pandas as pd
 from peppy import Project, SNAKEMAKE_CONFIG_KEY as pep_to_snake, SAMPLE_NAME_COLNAME as PEPPY_SAMPLE_COLUMN
+from peppy.utils import expandpath
 from snakemake.utils import validate
 
 SAMPLE_COLUMN = "sample"
@@ -15,6 +17,7 @@ print("CONFIG: {}".format(config))
 validate(config, schema="../schemas/config.schema.yaml")
 
 sample_sheet_file = config["samples"]
+print("SAMPLE SHEET FILE: {}".format(sample_sheet_file))
 dt = pd.read_table(sample_sheet_file)
 
 print("DT: {}".format(dt))
@@ -37,8 +40,12 @@ units = pd.read_table(config["units"], dtype=str).set_index(["sample", "unit"],
 units.index = units.index.set_levels([i.astype(str) for i in units.index.levels])  # enforce str in index
 validate(units, schema="../schemas/units.schema.yaml")
 
+print("GENOME: {}".format(config["ref"]["genome"]))
+
 # contigs in reference genome
-contigs = pd.read_table(config["ref"]["genome"] + ".fai",
+refgen = expandpath(config["ref"]["genome"])
+print("REFGEN: {}".format(refgen))
+contigs = pd.read_table(refgen + ".fai",
                         header=None, usecols=[0], squeeze=True, dtype=str)
 
 

From f92ae7c76882cf1f28df48f12230aebc90b8e09d Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Wed, 13 Mar 2019 16:11:20 -0400
Subject: [PATCH 03/24] set the index; use master config

---
 config.yaml      | 2 +-
 rules/common.smk | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/config.yaml b/config.yaml
index 46c2d58..fc84797 100644
--- a/config.yaml
+++ b/config.yaml
@@ -4,7 +4,7 @@ units: units.tsv
 ref:
   name: GRCh38.86
   # Path to the reference genome, ideally as it is provided by the GATK bundle.
-  genome: $GENOMES/hg38/hg38.fa
+  genome: data/ref/genome.chr21.fa
   # Path to any database of known variants, ideally as it is provided by the GATK bundle.
   known-variants: data/ref/dbsnp.vcf.gz
 
diff --git a/rules/common.smk b/rules/common.smk
index ae28c51..8716a52 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -22,11 +22,11 @@ dt = pd.read_table(sample_sheet_file)
 
 print("DT: {}".format(dt))
 
-#samples = dt.set_index("sample", drop=False)
 samples = p.sheet
 if SAMPLE_COLUMN in samples.columns and SAMPLE_COLUMN in samples.columns:
     raise Exception("Two sample identifier columns in samples sheet: {}".format(sample_sheet_file))
 samples.rename({PEPPY_SAMPLE_COLUMN: SAMPLE_COLUMN}, axis=1, inplace=True)
+samples = dt.set_index(SAMPLE_COLUMN, drop=False)
 
 print("SAMPLES: {}".format(samples))
 

From 484976b559dc06a374b54dfecd3713d0521a146d Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Wed, 13 Mar 2019 16:17:31 -0400
Subject: [PATCH 04/24] cleanup

---
 rules/common.smk | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/rules/common.smk b/rules/common.smk
index 8716a52..0b4e6ff 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -10,41 +10,27 @@ report: "../report/workflow.rst"
 
 ###### Config file and sample sheets #####
 p = Project("prjcfg.yaml")
-#print("CONFIGFILE: {}".format(configfile))
-#configfile: "config.yaml"
 configfile: getattr(p, pep_to_snake)
-print("CONFIG: {}".format(config))
 validate(config, schema="../schemas/config.schema.yaml")
 
 sample_sheet_file = config["samples"]
 print("SAMPLE SHEET FILE: {}".format(sample_sheet_file))
 dt = pd.read_table(sample_sheet_file)
 
-print("DT: {}".format(dt))
-
 samples = p.sheet
 if SAMPLE_COLUMN in samples.columns and SAMPLE_COLUMN in samples.columns:
     raise Exception("Two sample identifier columns in samples sheet: {}".format(sample_sheet_file))
 samples.rename({PEPPY_SAMPLE_COLUMN: SAMPLE_COLUMN}, axis=1, inplace=True)
 samples = dt.set_index(SAMPLE_COLUMN, drop=False)
 
-print("SAMPLES: {}".format(samples))
-
-print("CONFIG SAMPLES: {}".format(config["samples"]))
-
-#print("SAMPLES:\n{}".format("\n".join("{} ({})".format(str(s), type(s)) for s in samples)))
-
 validate(samples, schema="../schemas/samples.schema.yaml")
 
 units = pd.read_table(config["units"], dtype=str).set_index(["sample", "unit"], drop=False)
 units.index = units.index.set_levels([i.astype(str) for i in units.index.levels])  # enforce str in index
 validate(units, schema="../schemas/units.schema.yaml")
 
-print("GENOME: {}".format(config["ref"]["genome"]))
-
 # contigs in reference genome
 refgen = expandpath(config["ref"]["genome"])
-print("REFGEN: {}".format(refgen))
 contigs = pd.read_table(refgen + ".fai",
                         header=None, usecols=[0], squeeze=True, dtype=str)
 

From b3661dc6bf799c985f3968b88bdfa3c73be54cfb Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Wed, 13 Mar 2019 16:18:06 -0400
Subject: [PATCH 05/24] remove additional print

---
 rules/common.smk | 1 -
 1 file changed, 1 deletion(-)

diff --git a/rules/common.smk b/rules/common.smk
index 0b4e6ff..a023dc4 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -14,7 +14,6 @@ configfile: getattr(p, pep_to_snake)
 validate(config, schema="../schemas/config.schema.yaml")
 
 sample_sheet_file = config["samples"]
-print("SAMPLE SHEET FILE: {}".format(sample_sheet_file))
 dt = pd.read_table(sample_sheet_file)
 
 samples = p.sheet

From ba4debb5471166a7e06e1d6b54b0faa204f10a6f Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Wed, 13 Mar 2019 16:24:09 -0400
Subject: [PATCH 06/24] more cleanup

---
 rules/common.smk | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/rules/common.smk b/rules/common.smk
index a023dc4..bc0529a 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -4,8 +4,6 @@ from peppy import Project, SNAKEMAKE_CONFIG_KEY as pep_to_snake, SAMPLE_NAME_COL
 from peppy.utils import expandpath
 from snakemake.utils import validate
 
-SAMPLE_COLUMN = "sample"
-
 report: "../report/workflow.rst"
 
 ###### Config file and sample sheets #####
@@ -13,14 +11,10 @@ p = Project("prjcfg.yaml")
 configfile: getattr(p, pep_to_snake)
 validate(config, schema="../schemas/config.schema.yaml")
 
-sample_sheet_file = config["samples"]
-dt = pd.read_table(sample_sheet_file)
-
 samples = p.sheet
-if SAMPLE_COLUMN in samples.columns and SAMPLE_COLUMN in samples.columns:
-    raise Exception("Two sample identifier columns in samples sheet: {}".format(sample_sheet_file))
-samples.rename({PEPPY_SAMPLE_COLUMN: SAMPLE_COLUMN}, axis=1, inplace=True)
-samples = dt.set_index(SAMPLE_COLUMN, drop=False)
+if "sample" in samples.columns and PEPPY_SAMPLE_COLUMN in samples.columns:
+    raise Exception("Two sample identifier columns in samples sheet: {}".format(config["samples"]))
+samples = samples.rename({PEPPY_SAMPLE_COLUMN: "sample"}, axis=1).set_index("sample", drop=False)
 
 validate(samples, schema="../schemas/samples.schema.yaml")
 

From e1af9a167c7d9622ef5a02369dbeb0673501d5a7 Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Thu, 14 Mar 2019 12:33:44 -0400
Subject: [PATCH 07/24] peppy files

---
 prjcfg.yaml       | 6 ++++++
 samples_peppy.tsv | 4 ++++
 2 files changed, 10 insertions(+)
 create mode 100644 prjcfg.yaml
 create mode 100644 samples_peppy.tsv

diff --git a/prjcfg.yaml b/prjcfg.yaml
new file mode 100644
index 0000000..739863e
--- /dev/null
+++ b/prjcfg.yaml
@@ -0,0 +1,6 @@
+metadata:
+#  output_dir: $PROCESSED/microtest
+  sample_annotation: samples_peppy.tsv
+
+snake_config: "config.yaml"
+
diff --git a/samples_peppy.tsv b/samples_peppy.tsv
new file mode 100644
index 0000000..cc9db9b
--- /dev/null
+++ b/samples_peppy.tsv
@@ -0,0 +1,4 @@
+sample_name
+A
+B
+

From d5b6d468ba0310a56728f494518fab1c8334926e Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Thu, 14 Mar 2019 13:30:45 -0400
Subject: [PATCH 08/24] minimize changes, shorten names

---
 rules/common.smk | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/rules/common.smk b/rules/common.smk
index bc0529a..65f88a0 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -1,20 +1,16 @@
 import os
 import pandas as pd
-from peppy import Project, SNAKEMAKE_CONFIG_KEY as pep_to_snake, SAMPLE_NAME_COLNAME as PEPPY_SAMPLE_COLUMN
-from peppy.utils import expandpath
-from snakemake.utils import validate
-
-report: "../report/workflow.rst"
+from peppy import Project, SAMPLE_NAME_COLNAME as PEP_SAMPLE_COL
 
 ###### Config file and sample sheets #####
 p = Project("prjcfg.yaml")
-configfile: getattr(p, pep_to_snake)
+configfile: p.snake_config
 validate(config, schema="../schemas/config.schema.yaml")
 
 samples = p.sheet
-if "sample" in samples.columns and PEPPY_SAMPLE_COLUMN in samples.columns:
+if "sample" in samples.columns and PEP_SAMPLE_COL in samples.columns:
     raise Exception("Two sample identifier columns in samples sheet: {}".format(config["samples"]))
-samples = samples.rename({PEPPY_SAMPLE_COLUMN: "sample"}, axis=1).set_index("sample", drop=False)
+samples = samples.rename({PEP_SAMPLE_COL: "sample"}, axis=1).set_index("sample", drop=False)
 
 validate(samples, schema="../schemas/samples.schema.yaml")
 
@@ -23,8 +19,7 @@ units.index = units.index.set_levels([i.astype(str) for i in units.index.levels]
 validate(units, schema="../schemas/units.schema.yaml")
 
 # contigs in reference genome
-refgen = expandpath(config["ref"]["genome"])
-contigs = pd.read_table(refgen + ".fai",
+contigs = pd.read_table(config["ref"]["genome"] + ".fai",
                         header=None, usecols=[0], squeeze=True, dtype=str)
 
 

From 5964ecf32f1849dc70eb10424dd614626ca13411 Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Thu, 14 Mar 2019 13:40:20 -0400
Subject: [PATCH 09/24] remove unused import

---
 rules/common.smk | 1 -
 1 file changed, 1 deletion(-)

diff --git a/rules/common.smk b/rules/common.smk
index 65f88a0..89a4d55 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -1,4 +1,3 @@
-import os
 import pandas as pd
 from peppy import Project, SAMPLE_NAME_COLNAME as PEP_SAMPLE_COL
 

From 6b54e14c9fba7186b546cf6d5c294abe4ac54c60 Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Thu, 14 Mar 2019 13:44:13 -0400
Subject: [PATCH 10/24] get back validate

---
 rules/common.smk | 1 +
 1 file changed, 1 insertion(+)

diff --git a/rules/common.smk b/rules/common.smk
index 89a4d55..195494d 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -1,5 +1,6 @@
 import pandas as pd
 from peppy import Project, SAMPLE_NAME_COLNAME as PEP_SAMPLE_COL
+from snakemake.utils import validate
 
 ###### Config file and sample sheets #####
 p = Project("prjcfg.yaml")

From 37ded42fb821ba27cc41654ce203ab8037a8920e Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Thu, 14 Mar 2019 16:44:21 -0400
Subject: [PATCH 11/24] need to check files entry

---
 prjcfg.yaml      |  1 +
 rules/common.smk | 28 ++++++++++++++++++++++++++--
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/prjcfg.yaml b/prjcfg.yaml
index 739863e..d980e49 100644
--- a/prjcfg.yaml
+++ b/prjcfg.yaml
@@ -1,6 +1,7 @@
 metadata:
 #  output_dir: $PROCESSED/microtest
   sample_annotation: samples_peppy.tsv
+  sample_subannotation: units_peppy.tsv
 
 snake_config: "config.yaml"
 
diff --git a/rules/common.smk b/rules/common.smk
index 195494d..733f066 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -2,6 +2,25 @@ import pandas as pd
 from peppy import Project, SAMPLE_NAME_COLNAME as PEP_SAMPLE_COL
 from snakemake.utils import validate
 
+
+def peppy_rename(df):
+    return df.rename({PEP_SAMPLE_COL: "sample"}, axis=1)
+
+
+def peppy_units(df):
+    if "unit" in df.columns:
+        return df
+    def count_names(names):
+        def go(rem, n, curr, acc):
+            if rem == []:
+                return acc + [n]
+            h, t = rem[0], rem[1:]
+            return go(t, n + 1, curr, acc) if h == curr else go(t, 1, h, acc + [n])
+        return go(names[1:], 1, names[0], []) if names else []
+    df.insert(1, "unit", [i for n in count_names(list(df[PEP_SAMPLE_COL])) for i in range(1, n + 1)])
+    return df
+
+
 ###### Config file and sample sheets #####
 p = Project("prjcfg.yaml")
 configfile: p.snake_config
@@ -10,12 +29,17 @@ validate(config, schema="../schemas/config.schema.yaml")
 samples = p.sheet
 if "sample" in samples.columns and PEP_SAMPLE_COL in samples.columns:
     raise Exception("Two sample identifier columns in samples sheet: {}".format(config["samples"]))
-samples = samples.rename({PEP_SAMPLE_COL: "sample"}, axis=1).set_index("sample", drop=False)
+samples = peppy_rename(samples).set_index("sample", drop=False)
 
 validate(samples, schema="../schemas/samples.schema.yaml")
 
-units = pd.read_table(config["units"], dtype=str).set_index(["sample", "unit"], drop=False)
+subann = peppy_rename(p.sample_subannotation).applymap(str)
+units = peppy_units(subann).set_index(["sample", "unit"], drop=False)
+#units = pd.read_table(config["units"], dtype=str).set_index(["sample", "unit"], drop=False)
 units.index = units.index.set_levels([i.astype(str) for i in units.index.levels])  # enforce str in index
+
+print("UNITS:\n{}".format(units))
+
 validate(units, schema="../schemas/units.schema.yaml")
 
 # contigs in reference genome

From ca76544e8edbb0436210133a27ceb0ffbb8a4c4d Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Thu, 14 Mar 2019 17:22:52 -0400
Subject: [PATCH 12/24] guards and cleanup

---
 rules/common.smk | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/rules/common.smk b/rules/common.smk
index 733f066..d31b956 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -4,6 +4,8 @@ from snakemake.utils import validate
 
 
 def peppy_rename(df):
+    if df is None:
+        return None
     return df.rename({PEP_SAMPLE_COL: "sample"}, axis=1)
 
 
@@ -33,13 +35,9 @@ samples = peppy_rename(samples).set_index("sample", drop=False)
 
 validate(samples, schema="../schemas/samples.schema.yaml")
 
-subann = peppy_rename(p.sample_subannotation).applymap(str)
-units = peppy_units(subann).set_index(["sample", "unit"], drop=False)
-#units = pd.read_table(config["units"], dtype=str).set_index(["sample", "unit"], drop=False)
+units = peppy_units(peppy_rename(p.sample_subannotation)).set_index(["sample", "unit"], drop=False)
 units.index = units.index.set_levels([i.astype(str) for i in units.index.levels])  # enforce str in index
 
-print("UNITS:\n{}".format(units))
-
 validate(units, schema="../schemas/units.schema.yaml")
 
 # contigs in reference genome

From e85a876a8cbd467697b73babd3212ba526c1ab25 Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Thu, 14 Mar 2019 18:04:28 -0400
Subject: [PATCH 13/24] clear unused KV in project config

---
 prjcfg.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/prjcfg.yaml b/prjcfg.yaml
index d980e49..cee7a6b 100644
--- a/prjcfg.yaml
+++ b/prjcfg.yaml
@@ -1,5 +1,4 @@
 metadata:
-#  output_dir: $PROCESSED/microtest
   sample_annotation: samples_peppy.tsv
   sample_subannotation: units_peppy.tsv
 

From 6711da24446ea3aaaa5f1c5c69c7fc7b2dce67fb Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Thu, 14 Mar 2019 18:04:51 -0400
Subject: [PATCH 14/24] condense and explain

---
 rules/common.smk | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/rules/common.smk b/rules/common.smk
index d31b956..35414ab 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -4,12 +4,16 @@ from snakemake.utils import validate
 
 
 def peppy_rename(df):
+    """ Rename peppy's column for sample name identification to snakemake's. """
     if df is None:
         return None
+    if "sample" in df.columns and PEP_SAMPLE_COL in df.columns:
+        raise Exception("Multiple sample identifier columns present: {}".format(", ".join(["sample", PEP_SAMPLE_COL])))
     return df.rename({PEP_SAMPLE_COL: "sample"}, axis=1)
 
 
 def peppy_units(df):
+    """ Add unit/subsample indices to peppy a data frame.  """
     if "unit" in df.columns:
         return df
     def count_names(names):
@@ -29,8 +33,6 @@ configfile: p.snake_config
 validate(config, schema="../schemas/config.schema.yaml")
 
 samples = p.sheet
-if "sample" in samples.columns and PEP_SAMPLE_COL in samples.columns:
-    raise Exception("Two sample identifier columns in samples sheet: {}".format(config["samples"]))
 samples = peppy_rename(samples).set_index("sample", drop=False)
 
 validate(samples, schema="../schemas/samples.schema.yaml")

From 6fde15f62635bdb41b6406aa173c8967747e3ee7 Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Thu, 14 Mar 2019 18:05:44 -0400
Subject: [PATCH 15/24] peppy-compatible subannotation / units sheet

---
 units_peppy.tsv | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 units_peppy.tsv

diff --git a/units_peppy.tsv b/units_peppy.tsv
new file mode 100644
index 0000000..98f46b9
--- /dev/null
+++ b/units_peppy.tsv
@@ -0,0 +1,4 @@
+sample_name	unit	platform	fq1	fq2
+A	1	ILLUMINA	data/reads/a.chr21.1.fq	data/reads/a.chr21.2.fq
+B	1	ILLUMINA	data/reads/b.chr21.1.fq	data/reads/b.chr21.2.fq
+B	2	ILLUMINA	data/reads/b.chr21.1.fq

From bd8f36b19750180687a36466bd6f3d7bbc1c7a91 Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Fri, 26 Apr 2019 17:39:36 -0400
Subject: [PATCH 16/24] see about adding units dynamically

---
 units_peppy.tsv | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/units_peppy.tsv b/units_peppy.tsv
index 98f46b9..57eaf1e 100644
--- a/units_peppy.tsv
+++ b/units_peppy.tsv
@@ -1,4 +1,4 @@
-sample_name	unit	platform	fq1	fq2
-A	1	ILLUMINA	data/reads/a.chr21.1.fq	data/reads/a.chr21.2.fq
-B	1	ILLUMINA	data/reads/b.chr21.1.fq	data/reads/b.chr21.2.fq
-B	2	ILLUMINA	data/reads/b.chr21.1.fq
+sample_name	platform	fq1	fq2
+A	ILLUMINA	data/reads/a.chr21.1.fq	data/reads/a.chr21.2.fq
+B	ILLUMINA	data/reads/b.chr21.1.fq	data/reads/b.chr21.2.fq
+B	ILLUMINA	data/reads/b.chr21.1.fq

From 6e491675f00011fe17353bb1ae6a908e44dfeaea Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Fri, 26 Apr 2019 17:39:51 -0400
Subject: [PATCH 17/24] using SnakeProject

---
 rules/common.smk | 33 ++++-----------------------------
 1 file changed, 4 insertions(+), 29 deletions(-)

diff --git a/rules/common.smk b/rules/common.smk
index 35414ab..0a68f2b 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -1,43 +1,18 @@
 import pandas as pd
-from peppy import Project, SAMPLE_NAME_COLNAME as PEP_SAMPLE_COL
+from peppy import SnakeProject
 from snakemake.utils import validate
 
 
-def peppy_rename(df):
-    """ Rename peppy's column for sample name identification to snakemake's. """
-    if df is None:
-        return None
-    if "sample" in df.columns and PEP_SAMPLE_COL in df.columns:
-        raise Exception("Multiple sample identifier columns present: {}".format(", ".join(["sample", PEP_SAMPLE_COL])))
-    return df.rename({PEP_SAMPLE_COL: "sample"}, axis=1)
-
-
-def peppy_units(df):
-    """ Add unit/subsample indices to peppy a data frame.  """
-    if "unit" in df.columns:
-        return df
-    def count_names(names):
-        def go(rem, n, curr, acc):
-            if rem == []:
-                return acc + [n]
-            h, t = rem[0], rem[1:]
-            return go(t, n + 1, curr, acc) if h == curr else go(t, 1, h, acc + [n])
-        return go(names[1:], 1, names[0], []) if names else []
-    df.insert(1, "unit", [i for n in count_names(list(df[PEP_SAMPLE_COL])) for i in range(1, n + 1)])
-    return df
-
-
 ###### Config file and sample sheets #####
-p = Project("prjcfg.yaml")
+p = SnakeProject("prjcfg.yaml")
 configfile: p.snake_config
 validate(config, schema="../schemas/config.schema.yaml")
 
-samples = p.sheet
-samples = peppy_rename(samples).set_index("sample", drop=False)
+samples = p.sample_table
 
 validate(samples, schema="../schemas/samples.schema.yaml")
 
-units = peppy_units(peppy_rename(p.sample_subannotation)).set_index(["sample", "unit"], drop=False)
+units = p.subsample_table
 units.index = units.index.set_levels([i.astype(str) for i in units.index.levels])  # enforce str in index
 
 validate(units, schema="../schemas/units.schema.yaml")

From 682bb5d99a7e188c7ccca50f2a1bf9bec3704ea1 Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Mon, 29 Apr 2019 12:56:12 -0400
Subject: [PATCH 18/24] use Snakemake naming

---
 samples_peppy.tsv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples_peppy.tsv b/samples_peppy.tsv
index cc9db9b..19b8191 100644
--- a/samples_peppy.tsv
+++ b/samples_peppy.tsv
@@ -1,4 +1,4 @@
-sample_name
+sample
 A
 B
 

From 0d2fc1f854afc45f6e60c584ad024ca5882f60fa Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Mon, 29 Apr 2019 12:58:12 -0400
Subject: [PATCH 19/24] use base anns file due to identical content

---
 prjcfg.yaml       | 2 +-
 samples_peppy.tsv | 4 ----
 2 files changed, 1 insertion(+), 5 deletions(-)
 delete mode 100644 samples_peppy.tsv

diff --git a/prjcfg.yaml b/prjcfg.yaml
index cee7a6b..4243e5b 100644
--- a/prjcfg.yaml
+++ b/prjcfg.yaml
@@ -1,5 +1,5 @@
 metadata:
-  sample_annotation: samples_peppy.tsv
+  sample_annotation: samples.tsv
   sample_subannotation: units_peppy.tsv
 
 snake_config: "config.yaml"
diff --git a/samples_peppy.tsv b/samples_peppy.tsv
deleted file mode 100644
index 19b8191..0000000
--- a/samples_peppy.tsv
+++ /dev/null
@@ -1,4 +0,0 @@
-sample
-A
-B
-

From e7301f3e7eb83fa475f4a30dcb800c4214fc115e Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Mon, 29 Apr 2019 13:06:26 -0400
Subject: [PATCH 20/24] add prj cfg that uses the base files

---
 prjcfg_native.yaml | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 prjcfg_native.yaml

diff --git a/prjcfg_native.yaml b/prjcfg_native.yaml
new file mode 100644
index 0000000..f5d96fe
--- /dev/null
+++ b/prjcfg_native.yaml
@@ -0,0 +1,6 @@
+metadata:
+  sample_annotation: samples.tsv
+  sample_subannotation: units.tsv
+
+snake_config: "config.yaml"
+

From 1750ab61711325fdc7899da69a4d63b17fb4dbb0 Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Mon, 29 Apr 2019 16:31:01 -0400
Subject: [PATCH 21/24] use the native encoding

---
 rules/common.smk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rules/common.smk b/rules/common.smk
index 0a68f2b..415a81e 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -4,7 +4,7 @@ from snakemake.utils import validate
 
 
 ###### Config file and sample sheets #####
-p = SnakeProject("prjcfg.yaml")
+p = SnakeProject("prjcfg_native.yaml")
 configfile: p.snake_config
 validate(config, schema="../schemas/config.schema.yaml")
 

From 6192d29dbfcdcda4d766dd35b1857572fa2acdfe Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Mon, 29 Apr 2019 16:40:38 -0400
Subject: [PATCH 22/24] condense config files

---
 prjcfg.yaml        | 2 +-
 prjcfg_native.yaml | 6 ------
 2 files changed, 1 insertion(+), 7 deletions(-)
 delete mode 100644 prjcfg_native.yaml

diff --git a/prjcfg.yaml b/prjcfg.yaml
index 4243e5b..f5d96fe 100644
--- a/prjcfg.yaml
+++ b/prjcfg.yaml
@@ -1,6 +1,6 @@
 metadata:
   sample_annotation: samples.tsv
-  sample_subannotation: units_peppy.tsv
+  sample_subannotation: units.tsv
 
 snake_config: "config.yaml"
 
diff --git a/prjcfg_native.yaml b/prjcfg_native.yaml
deleted file mode 100644
index f5d96fe..0000000
--- a/prjcfg_native.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-metadata:
-  sample_annotation: samples.tsv
-  sample_subannotation: units.tsv
-
-snake_config: "config.yaml"
-

From f728c3753fba886d59ce8d15911ffb70a6b52929 Mon Sep 17 00:00:00 2001
From: Vince <vince.reuter@gmail.com>
Date: Fri, 21 Jun 2019 10:27:19 -0400
Subject: [PATCH 23/24] update to reflect peppy updates;
 https://github.com/snakemake-workflows/dna-seq-gatk-variant-calling/pull/8#issuecomment-504316445

---
 prjcfg.yaml      | 5 ++---
 rules/common.smk | 1 -
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/prjcfg.yaml b/prjcfg.yaml
index f5d96fe..e5676aa 100644
--- a/prjcfg.yaml
+++ b/prjcfg.yaml
@@ -1,6 +1,5 @@
 metadata:
-  sample_annotation: samples.tsv
-  sample_subannotation: units.tsv
+  sample_table: samples.tsv
+  sample_subtable: units.tsv
 
 snake_config: "config.yaml"
-
diff --git a/rules/common.smk b/rules/common.smk
index 415a81e..73bf0ac 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -13,7 +13,6 @@ samples = p.sample_table
 validate(samples, schema="../schemas/samples.schema.yaml")
 
 units = p.subsample_table
-units.index = units.index.set_levels([i.astype(str) for i in units.index.levels])  # enforce str in index
 
 validate(units, schema="../schemas/units.schema.yaml")
 

From 0e37cfbf3c046420adeb910b8d5cf96b5eb52034 Mon Sep 17 00:00:00 2001
From: Vince <vince.reuter@gmail.com>
Date: Wed, 19 Feb 2020 15:10:25 -0500
Subject: [PATCH 24/24] fix name mistake;
 https://github.com/snakemake-workflows/dna-seq-gatk-variant-calling/pull/8/files#r380673348

---
 prjcfg.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prjcfg.yaml b/prjcfg.yaml
index e5676aa..d0aace4 100644
--- a/prjcfg.yaml
+++ b/prjcfg.yaml
@@ -1,5 +1,5 @@
 metadata:
   sample_table: samples.tsv
-  sample_subtable: units.tsv
+  subsample_table: units.tsv
 
 snake_config: "config.yaml"