-
Notifications
You must be signed in to change notification settings - Fork 20
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Benb/use metadata as source of family table load #936
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,12 +2,13 @@ | |
import luigi | ||
import luigi.util | ||
|
||
from v03_pipeline.lib.misc.io import import_pedigree | ||
from v03_pipeline.lib.misc.pedigree import parse_pedigree_ht_to_families | ||
from v03_pipeline.lib.paths import remapped_and_subsetted_callset_path | ||
from v03_pipeline.lib.tasks.base.base_loading_run_params import BaseLoadingRunParams | ||
from v03_pipeline.lib.tasks.files import RawFileTask | ||
from v03_pipeline.lib.tasks.update_project_table import UpdateProjectTableTask | ||
from v03_pipeline.lib.tasks.write_family_table import WriteFamilyTableTask | ||
from v03_pipeline.lib.tasks.write_remapped_and_subsetted_callset import ( | ||
WriteRemappedAndSubsettedCallsetTask, | ||
) | ||
|
||
|
||
@luigi.util.inherits(BaseLoadingRunParams) | ||
|
@@ -26,27 +27,26 @@ def complete(self) -> bool: | |
for write_family_table_task in self.dynamic_write_family_table_tasks | ||
) | ||
|
||
def run(self): | ||
# https://luigi.readthedocs.io/en/stable/tasks.html#dynamic-dependencies | ||
# Fetch family guids from project table | ||
update_project_table_task: luigi.Target = yield self.clone( | ||
UpdateProjectTableTask, | ||
) | ||
project_ht = hl.read_table(update_project_table_task.path) | ||
family_guids_in_project_table = set(hl.eval(project_ht.globals.family_guids)) | ||
def requires(self) -> list[luigi.Task]: | ||
return [ | ||
self.clone( | ||
WriteRemappedAndSubsettedCallsetTask, | ||
), | ||
self.clone( | ||
UpdateProjectTableTask, | ||
), | ||
] | ||
|
||
# Fetch family guids from pedigree | ||
pedigree_ht_task: luigi.Target = yield RawFileTask(self.project_pedigree_path) | ||
pedigree_ht = import_pedigree(pedigree_ht_task.path) | ||
families_guids_in_pedigree = { | ||
f.family_guid for f in parse_pedigree_ht_to_families(pedigree_ht) | ||
} | ||
|
||
# Intersect them | ||
family_guids_to_load = ( | ||
family_guids_in_project_table & families_guids_in_pedigree | ||
def run(self): | ||
ht = hl.read_matrix_table( | ||
remapped_and_subsetted_callset_path( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just sanity checking that this works for AnVIL loading where we don't have remaps (or usually subsets) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah the same file gets generated, we just run a "subset" that has all families in it. |
||
self.reference_genome, | ||
self.dataset_type, | ||
self.callset_path, | ||
self.project_guid, | ||
), | ||
) | ||
for family_guid in family_guids_to_load: | ||
for family_guid in set(hl.eval(ht.globals.family_samples).keys()): | ||
self.dynamic_write_family_table_tasks.add( | ||
self.clone(WriteFamilyTableTask, family_guid=family_guid), | ||
) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,4 @@ | ||
Project_GUID Family_GUID Family_ID Individual_ID Paternal_ID Maternal_ID Sex | ||
R0114_project4 123_1 123 NA19675_1 F | ||
R0114_project4 234_1 234 NA19678_1 M | ||
R0114_project4 234_1 234 NA19678_999 F |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The specific failure was a family existing in the pedigree, failing a check and thus being excluded from the rest of the pipeline, but being present on the project table from a different callset.