diff --git a/snpdb/migrations/0132_alter_cohortgenotypecollection_unique_together_and_more.py b/snpdb/migrations/0132_alter_cohortgenotypecollection_unique_together_and_more.py new file mode 100644 index 000000000..d0dc3e21d --- /dev/null +++ b/snpdb/migrations/0132_alter_cohortgenotypecollection_unique_together_and_more.py @@ -0,0 +1,26 @@ +# Generated by Django 4.2.10 on 2024-05-20 01:39 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('snpdb', '0131_download_liftover_chain_files'), + ] + + operations = [ + migrations.AlterUniqueTogether( + name='cohortgenotypecollection', + unique_together=set(), + ), + migrations.AddField( + model_name='cohortgenotypecollection', + name='collection_type', + field=models.CharField(choices=[('C', 'Common'), ('U', 'Uncommon')], default='U', max_length=1), + ), + migrations.AlterUniqueTogether( + name='cohortgenotypecollection', + unique_together={('cohort', 'cohort_version', 'collection_type')}, + ), + ] diff --git a/snpdb/migrations/0133_one_off_set_cgc_common.py b/snpdb/migrations/0133_one_off_set_cgc_common.py new file mode 100644 index 000000000..c89ac2e17 --- /dev/null +++ b/snpdb/migrations/0133_one_off_set_cgc_common.py @@ -0,0 +1,27 @@ +# Generated by Django 4.2.10 on 2024-05-20 01:39 + +from django.db import migrations +from django.db.models import Subquery, OuterRef + + +def _one_off_set_cgc_common(apps, _schema_editor): + CohortGenotypeCollection = apps.get_model("snpdb", "CohortGenotypeCollection") + Cohort = apps.get_model("snpdb", "Cohort") + + CGC_COMMON = "C" + + # To avoid the unique_together (cohort/version) the common ones were left as 0 + cgc_qs = CohortGenotypeCollection.objects.filter(common_filter__isnull=False, cohort_version=0) + cohort_version_subquery = Subquery(Cohort.objects.filter(id=OuterRef('cohort_id')).values('version')[:1]) + cgc_qs.update(collection_type=CGC_COMMON, cohort_version=cohort_version_subquery) + + +class Migration(migrations.Migration): + + dependencies = [ + ('snpdb', '0132_alter_cohortgenotypecollection_unique_together_and_more'), + ] + + operations = [ + migrations.RunPython(_one_off_set_cgc_common, reverse_code=lambda _, __: None), + ] diff --git a/snpdb/models/models_cohort.py b/snpdb/models/models_cohort.py index 6c3b80adc..1472c4506 100644 --- a/snpdb/models/models_cohort.py +++ b/snpdb/models/models_cohort.py @@ -26,7 +26,7 @@ from library.preview_request import PreviewModelMixin, PreviewKeyValue from library.utils import invert_dict from patients.models_enums import Zygosity -from snpdb.models.models_enums import ImportStatus +from snpdb.models.models_enums import ImportStatus, CohortGenotypeCollectionType from snpdb.models.models_genome import GenomeBuild from snpdb.models.models_variant import Variant from snpdb.models.models_vcf import VCF, Sample @@ -376,13 +376,23 @@ class CohortGenotypeCollection(RelatedModelsPartitionModel): celery_task = models.CharField(max_length=36, null=True) task_version = models.ForeignKey(CohortGenotypeTaskVersion, null=True, on_delete=CASCADE) marked_for_deletion = models.BooleanField(null=False, default=False) - # common_collection will be set on the 'interesting/rare' CGC + collection_type = models.CharField(max_length=1, choices=CohortGenotypeCollectionType.choices, + default=CohortGenotypeCollectionType.UNCOMMON) + # common_collection will be set on the 'uncommon' (interesting/rare) CGC common_collection = models.OneToOneField('self', null=True, related_name="uncommon", on_delete=CASCADE) # common filter will be set on the 'common' CGC common_filter = models.ForeignKey(CohortGenotypeCommonFilterVersion, null=True, on_delete=PROTECT) class Meta: - unique_together = ('cohort', 'cohort_version') + unique_together = ('cohort', 'cohort_version', 'collection_type') + + def __str__(self) -> str: + parts = [f"CohortGenotypeCollection: {self.cohort}"] + if self.cohort_version != self.cohort.version: + parts.append(f"(v.{self.cohort.version} != {self.cohort_version})") + if self.common_filter: + parts.append(str(self.common_filter)) + return " ".join(parts) def percent_common(self) -> float: common = self.common_collection.cohortgenotype_set.count() diff --git a/snpdb/models/models_enums.py b/snpdb/models/models_enums.py index 3c067649c..9b4b2ea08 100644 --- a/snpdb/models/models_enums.py +++ b/snpdb/models/models_enums.py @@ -227,3 +227,8 @@ class DataState(models.TextChoices): @staticmethod def should_create_new_record(data_state): return data_state not in [DataState.DELETED, DataState.SKIPPED] + + +class CohortGenotypeCollectionType(models.TextChoices): + COMMON = "C", "Common" + UNCOMMON = "U", "Uncommon" diff --git a/snpdb/tasks/cohort_genotype_tasks.py b/snpdb/tasks/cohort_genotype_tasks.py index 4e97c12ef..3e1b48233 100644 --- a/snpdb/tasks/cohort_genotype_tasks.py +++ b/snpdb/tasks/cohort_genotype_tasks.py @@ -75,19 +75,23 @@ def create_cohort_genotype_collection(cohort): name = f"{cohort.name} ({cohort.pk}:{cohort.version})" num_samples = cohort.cohortsample_set.count() common_collection = None + kwargs = { + "cohort": cohort, + "cohort_version": cohort.version, + "num_samples": num_samples, + } + if common_filter := get_common_filter(cohort.genome_build): common_collection = CohortGenotypeCollection.objects.create(name=f"{name} common", - cohort=cohort, - cohort_version=0, # so it isn't retrieved common_filter=common_filter, - num_samples=num_samples) + collection_type=CohortGenotypeCollectionType.COMMON, + **kwargs) logging.info(f"Created common collection: {common_collection}") collection = CohortGenotypeCollection.objects.create(name=name, - cohort=cohort, - cohort_version=cohort.version, common_collection=common_collection, - num_samples=num_samples) + collection_type=CohortGenotypeCollectionType.UNCOMMON, + **kwargs) logging.info(f"Created {collection}") return collection @@ -96,6 +100,7 @@ def create_cohort_genotype_collection(cohort): def _get_sample_zygosity_count_sql(sample_value, zygosity): return f'CASE WHEN (({sample_value}) = \'{zygosity}\') THEN 1 ELSE 0 END' + def _get_left_outer_join_on_variant(partition_table): return f'LEFT OUTER JOIN "{partition_table}" ON ("snpdb_variant"."id" = "{partition_table}"."variant_id")'