Skip to content

Commit

Permalink
issue #1053 - cohort genotype versions - make common cohort version m…
Browse files Browse the repository at this point in the history
…atch cohort.version
  • Loading branch information
davmlaw authored and TheMadBug committed May 22, 2024
1 parent 82381d2 commit f6f7fb5
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 9 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Generated by Django 4.2.10 on 2024-05-20 01:39

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('snpdb', '0131_download_liftover_chain_files'),
]

operations = [
migrations.AlterUniqueTogether(
name='cohortgenotypecollection',
unique_together=set(),
),
migrations.AddField(
model_name='cohortgenotypecollection',
name='collection_type',
field=models.CharField(choices=[('C', 'Common'), ('U', 'Uncommon')], default='U', max_length=1),
),
migrations.AlterUniqueTogether(
name='cohortgenotypecollection',
unique_together={('cohort', 'cohort_version', 'collection_type')},
),
]
27 changes: 27 additions & 0 deletions snpdb/migrations/0133_one_off_set_cgc_common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Generated by Django 4.2.10 on 2024-05-20 01:39

from django.db import migrations
from django.db.models import Subquery, OuterRef


def _one_off_set_cgc_common(apps, _schema_editor):
CohortGenotypeCollection = apps.get_model("snpdb", "CohortGenotypeCollection")
Cohort = apps.get_model("snpdb", "Cohort")

CGC_COMMON = "C"

# To avoid the unique_together (cohort/version) the common ones were left as 0
cgc_qs = CohortGenotypeCollection.objects.filter(common_filter__isnull=False, cohort_version=0)
cohort_version_subquery = Subquery(Cohort.objects.filter(id=OuterRef('cohort_id')).values('version')[:1])
cgc_qs.update(collection_type=CGC_COMMON, cohort_version=cohort_version_subquery)


class Migration(migrations.Migration):

dependencies = [
('snpdb', '0132_alter_cohortgenotypecollection_unique_together_and_more'),
]

operations = [
migrations.RunPython(_one_off_set_cgc_common, reverse_code=lambda _, __: None),
]
16 changes: 13 additions & 3 deletions snpdb/models/models_cohort.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from library.preview_request import PreviewModelMixin, PreviewKeyValue
from library.utils import invert_dict
from patients.models_enums import Zygosity
from snpdb.models.models_enums import ImportStatus
from snpdb.models.models_enums import ImportStatus, CohortGenotypeCollectionType
from snpdb.models.models_genome import GenomeBuild
from snpdb.models.models_variant import Variant
from snpdb.models.models_vcf import VCF, Sample
Expand Down Expand Up @@ -376,13 +376,23 @@ class CohortGenotypeCollection(RelatedModelsPartitionModel):
celery_task = models.CharField(max_length=36, null=True)
task_version = models.ForeignKey(CohortGenotypeTaskVersion, null=True, on_delete=CASCADE)
marked_for_deletion = models.BooleanField(null=False, default=False)
# common_collection will be set on the 'interesting/rare' CGC
collection_type = models.CharField(max_length=1, choices=CohortGenotypeCollectionType.choices,
default=CohortGenotypeCollectionType.UNCOMMON)
# common_collection will be set on the 'uncommon' (interesting/rare) CGC
common_collection = models.OneToOneField('self', null=True, related_name="uncommon", on_delete=CASCADE)
# common filter will be set on the 'common' CGC
common_filter = models.ForeignKey(CohortGenotypeCommonFilterVersion, null=True, on_delete=PROTECT)

class Meta:
unique_together = ('cohort', 'cohort_version')
unique_together = ('cohort', 'cohort_version', 'collection_type')

def __str__(self) -> str:
parts = [f"CohortGenotypeCollection: {self.cohort}"]
if self.cohort_version != self.cohort.version:
parts.append(f"(v.{self.cohort.version} != {self.cohort_version})")
if self.common_filter:
parts.append(str(self.common_filter))
return " ".join(parts)

def percent_common(self) -> float:
common = self.common_collection.cohortgenotype_set.count()
Expand Down
5 changes: 5 additions & 0 deletions snpdb/models/models_enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,3 +227,8 @@ class DataState(models.TextChoices):
@staticmethod
def should_create_new_record(data_state):
return data_state not in [DataState.DELETED, DataState.SKIPPED]


class CohortGenotypeCollectionType(models.TextChoices):
COMMON = "C", "Common"
UNCOMMON = "U", "Uncommon"
17 changes: 11 additions & 6 deletions snpdb/tasks/cohort_genotype_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,19 +75,23 @@ def create_cohort_genotype_collection(cohort):
name = f"{cohort.name} ({cohort.pk}:{cohort.version})"
num_samples = cohort.cohortsample_set.count()
common_collection = None
kwargs = {
"cohort": cohort,
"cohort_version": cohort.version,
"num_samples": num_samples,
}

if common_filter := get_common_filter(cohort.genome_build):
common_collection = CohortGenotypeCollection.objects.create(name=f"{name} common",
cohort=cohort,
cohort_version=0, # so it isn't retrieved
common_filter=common_filter,
num_samples=num_samples)
collection_type=CohortGenotypeCollectionType.COMMON,
**kwargs)
logging.info(f"Created common collection: {common_collection}")

collection = CohortGenotypeCollection.objects.create(name=name,
cohort=cohort,
cohort_version=cohort.version,
common_collection=common_collection,
num_samples=num_samples)
collection_type=CohortGenotypeCollectionType.UNCOMMON,
**kwargs)

logging.info(f"Created {collection}")
return collection
Expand All @@ -96,6 +100,7 @@ def create_cohort_genotype_collection(cohort):
def _get_sample_zygosity_count_sql(sample_value, zygosity):
return f'CASE WHEN (({sample_value}) = \'{zygosity}\') THEN 1 ELSE 0 END'


def _get_left_outer_join_on_variant(partition_table):
return f'LEFT OUTER JOIN "{partition_table}" ON ("snpdb_variant"."id" = "{partition_table}"."variant_id")'

Expand Down

0 comments on commit f6f7fb5

Please sign in to comment.