From e151e95d4e5de7ba25f5c7a8e8a2b3081261ad3e Mon Sep 17 00:00:00 2001 From: hiro-mishima <90213198+hiro-mishima@users.noreply.github.com> Date: Thu, 4 Apr 2024 10:53:09 -0400 Subject: [PATCH] [DC-3779] Add aou_death to EhrSubmissionDataCutoff --- .../ehr_submission_data_cutoff.py | 4 +-- .../ehr_submission_data_cutoff_test.py | 33 +++++++++++++++++-- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/data_steward/cdr_cleaner/cleaning_rules/ehr_submission_data_cutoff.py b/data_steward/cdr_cleaner/cleaning_rules/ehr_submission_data_cutoff.py index cbc65508d3..46e01a2c3e 100644 --- a/data_steward/cdr_cleaner/cleaning_rules/ehr_submission_data_cutoff.py +++ b/data_steward/cdr_cleaner/cleaning_rules/ehr_submission_data_cutoff.py @@ -13,7 +13,7 @@ # Project imports from cdr_cleaner.cleaning_rules.base_cleaning_rule import BaseCleaningRule -from common import JINJA_ENV, AOU_REQUIRED +from common import JINJA_ENV, AOU_REQUIRED, AOU_DEATH from constants import bq_utils as bq_consts from utils import pipeline_logging from resources import fields_for, validate_date_string @@ -86,7 +86,7 @@ def get_affected_tables(self): :return: list of affected tables """ tables = [] - for table in AOU_REQUIRED: + for table in AOU_REQUIRED + [AOU_DEATH]: # skips the person table if table == 'person': diff --git a/tests/integration_tests/data_steward/cdr_cleaner/cleaning_rules/ehr_submission_data_cutoff_test.py b/tests/integration_tests/data_steward/cdr_cleaner/cleaning_rules/ehr_submission_data_cutoff_test.py index 8604dcc439..6b645d966b 100644 --- a/tests/integration_tests/data_steward/cdr_cleaner/cleaning_rules/ehr_submission_data_cutoff_test.py +++ b/tests/integration_tests/data_steward/cdr_cleaner/cleaning_rules/ehr_submission_data_cutoff_test.py @@ -46,13 +46,15 @@ def setUpClass(cls): cls.rule_instance = EhrSubmissionDataCutoff(project_id, dataset_id, sandbox_id) - for table_name in common.VISIT_OCCURRENCE: + for table_name in [common.VISIT_OCCURRENCE, common.AOU_DEATH]: sandbox_table_name = cls.rule_instance.sandbox_table_for(table_name) cls.fq_sandbox_table_names.append( f'{cls.project_id}.{cls.sandbox_id}.{sandbox_table_name}') cls.fq_table_names.append( f'{cls.project_id}.{cls.dataset_id}.{common.VISIT_OCCURRENCE}') + cls.fq_table_names.append( + f'{cls.project_id}.{cls.dataset_id}.{common.AOU_DEATH}') # call super to set up the client, create datasets, and create # empty test tables @@ -82,8 +84,10 @@ def test_ehr_submission_data_cutoff(self, mock_get_affected_tables): statements and the tables_and_counts variable. """ # mocks the return value of get_affected_tables as we only want to loop through the - # visit_occurrence not all of the CDM tables - mock_get_affected_tables.return_value = [common.VISIT_OCCURRENCE] + # visit_occurrence and aou_death, not all of the CDM tables + mock_get_affected_tables.return_value = [ + common.VISIT_OCCURRENCE, common.AOU_DEATH + ] queries = [] visit_occurrence_tmpl = self.jinja_env.from_string(""" @@ -105,6 +109,19 @@ def test_ehr_submission_data_cutoff(self, mock_get_affected_tables): cdm_table=common.VISIT_OCCURRENCE) queries.append(visit_occurrence_tmpl) + aou_death_tmpl = self.jinja_env.from_string(""" + INSERT INTO `{{project}}.{{dataset}}.aou_death` + (aou_death_id, person_id, death_date, death_datetime, death_type_concept_id, + src_id, primary_death_record) + VALUES + ('a1', 1, '2020-01-01', NULL, 0, 'hpo_a', False), + ('h1', 1, '2010-01-01', '2010-01-01 00:00:00', 0, 'healthpro', True), + ('a2', 2, '2020-01-01', '2020-01-01 00:00:00', 0, 'hpo_a', False), + ('b2', 2, '2024-01-01', '2024-01-01 00:00:00', 0, 'hpo_b', True), + ('h3', 3, NULL, NULL, 0, 'Staff Portal: HealthPro', False) + """).render(project=self.project_id, dataset=self.dataset_id) + queries.append(aou_death_tmpl) + self.load_test_data(queries) table_and_counts = [{ @@ -130,6 +147,16 @@ def test_ehr_submission_data_cutoff(self, mock_get_affected_tables): parse('2020-03-06 11:00:00 UTC'), parse('2020-03-07').date(), parse('2020-03-07 11:00:00 UTC'), 4) ] + }, { + 'fq_table_name': + '.'.join([self.fq_dataset_name, 'aou_death']), + 'fq_sandbox_table_name': + f'{self.fq_sandbox_name}.{self.rule_instance.sandbox_table_for(common.AOU_DEATH)}', + 'loaded_ids': ['a1', 'h1', 'a2', 'b2', 'h3'], + 'sandboxed_ids': ['b2'], + 'fields': ['aou_death_id', 'person_id', 'primary_death_record'], + 'cleaned_values': [('a1', 1, False), ('h1', 1, True), + ('a2', 2, False), ('h3', 3, False)] }] self.default_test(table_and_counts)