diff --git a/.dockstore.yml b/.dockstore.yml index dc786a3b9ee..ea57e407ce4 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -104,7 +104,7 @@ workflows: - ah_var_store - name: GvsValidateVatTable subclass: WDL - primaryDescriptorPath: /scripts/variantstore/wdl/GvsValidateVAT.wdl + primaryDescriptorPath: /scripts/variantstore/variant_annotations_table/GvsValidateVAT.wdl filters: branches: - ah_var_store diff --git a/scripts/variantstore/wdl/GvsValidateVAT.wdl b/scripts/variantstore/variant_annotations_table/GvsValidateVAT.wdl similarity index 100% rename from scripts/variantstore/wdl/GvsValidateVAT.wdl rename to scripts/variantstore/variant_annotations_table/GvsValidateVAT.wdl diff --git a/scripts/variantstore/wdl/extract/Dockerfile b/scripts/variantstore/wdl/extract/Dockerfile index 900e1eb8f82..bb4316bf27c 100644 --- a/scripts/variantstore/wdl/extract/Dockerfile +++ b/scripts/variantstore/wdl/extract/Dockerfile @@ -8,6 +8,7 @@ RUN apt-get update && apt-get -y upgrade && apt-get -y install bcftools # Add the application source code. ADD create_cohort_extract_data_table.py /app ADD create_variant_annotation_table.py /app +ADD extract_subpop.py /app WORKDIR /app ENTRYPOINT ["/bin/bash"] diff --git a/scripts/variantstore/wdl/extract/extract_subpop.py b/scripts/variantstore/wdl/extract/extract_subpop.py new file mode 100644 index 00000000000..4d1ae8f7875 --- /dev/null +++ b/scripts/variantstore/wdl/extract/extract_subpop.py @@ -0,0 +1,22 @@ +import csv +import argparse + +def extract_subpopulation(input_path, output_path): + with open(input_path, newline='') as tsvin, open(output_path, 'w', newline='') as csvout: + tsvin = csv.reader(tsvin, delimiter='\t') + csvout = csv.writer(csvout, delimiter='\t') + + for row in tsvin: + csvout.writerow([row[0], row[4]]) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(allow_abbrev=False, description='Extract subpopulation per sample data out of a callset TSV') + parser.add_argument('--input_path',type=str, metavar='path', help='path to the original callset TSV', required=True) + parser.add_argument('--output_path',type=str, metavar='path', help='path for the output TSV', required=True) + + args = parser.parse_args() + + extract_subpopulation(args.input_path, + args.output_path) +