Skip to content

Commit

Permalink
[DC-3793] Update self_reported_population to self_reported_category (#…
Browse files Browse the repository at this point in the history
…1868)

* [DC-3793] Update self_reported_population to self_reported_category
  • Loading branch information
nishanthpp93 authored Jun 28, 2024
1 parent 24680c1 commit 30fc9c1
Show file tree
Hide file tree
Showing 6 changed files with 116 additions and 115 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,9 @@
# 3. sex_at_birth_source_value
# 4. state_of_residence_concept_id
# 5. state_of_residence_source_value
# 6. self_reported_population_concept_id
# 7. self_reported_population_source_value
# 8. self_reported_population_source_concept_id<br>
# 6. self_reported_category_concept_id
# 7. self_reported_category_source_value
# 8. self_reported_category_source_concept_id<br>
#
#
# Investigate any failed output.
Expand All @@ -119,12 +119,12 @@
,COUNTIF((p.sex_at_birth_source_value IS NULL AND pe.sex_at_birth_source_value IS NOT NULL)
OR (p.sex_at_birth_source_value IS NOT NULL AND pe.sex_at_birth_source_value IS NULL)) as ne_nulls_sex_at_birth_source_value
-- check self reported population columns --
,COUNTIF((p.self_reported_population_concept_id IS NULL AND pe.self_reported_population_concept_id IS NOT NULL)
OR (p.self_reported_population_concept_id IS NOT NULL AND pe.self_reported_population_concept_id IS NULL)) as ne_nulls_self_reported_population_concept_id
,COUNTIF((p.self_reported_population_source_value IS NULL AND pe.self_reported_population_source_value IS NOT NULL)
OR (p.self_reported_population_source_value IS NOT NULL AND pe.self_reported_population_source_value IS NULL)) as ne_nulls_self_reported_population_source_value
,COUNTIF((p.self_reported_population_source_concept_id IS NULL AND pe.self_reported_population_source_concept_id IS NOT NULL)
OR (p.self_reported_population_source_concept_id IS NOT NULL AND pe.self_reported_population_source_concept_id IS NULL)) as ne_nulls_self_reported_population_source_concept_id
,COUNTIF((p.self_reported_category_concept_id IS NULL AND pe.self_reported_category_concept_id IS NOT NULL)
OR (p.self_reported_category_concept_id IS NOT NULL AND pe.self_reported_category_concept_id IS NULL)) as ne_nulls_self_reported_category_concept_id
,COUNTIF((p.self_reported_category_source_value IS NULL AND pe.self_reported_category_source_value IS NOT NULL)
OR (p.self_reported_category_source_value IS NOT NULL AND pe.self_reported_category_source_value IS NULL)) as ne_nulls_self_reported_category_source_value
,COUNTIF((p.self_reported_category_source_concept_id IS NULL AND pe.self_reported_category_source_concept_id IS NOT NULL)
OR (p.self_reported_category_source_concept_id IS NOT NULL AND pe.self_reported_category_source_concept_id IS NULL)) as ne_nulls_self_reported_category_source_concept_id
FROM `{{dest_project_id}}.{{dest_dataset_id}}.person` p
JOIN `{{dest_project_id}}.{{dest_dataset_id}}.person_ext` pe
USING (person_id)
Expand Down Expand Up @@ -241,9 +241,9 @@
UNION ALL
SELECT
'nulls_self_reported_population_concept_id_check' AS check
'nulls_self_reported_category_concept_id_check' AS check
,CASE
WHEN c.ne_nulls_self_reported_population_concept_id > 0
WHEN c.ne_nulls_self_reported_category_concept_id > 0
THEN 'FAILED'
ELSE 'passed'
END AS result
Expand All @@ -252,9 +252,9 @@
UNION ALL
SELECT
'self_reported_population_source_value_check' AS check
'self_reported_category_source_value_check' AS check
,CASE
WHEN c.ne_nulls_self_reported_population_source_value > 0
WHEN c.ne_nulls_self_reported_category_source_value > 0
THEN 'FAILED'
ELSE 'passed'
END AS result
Expand All @@ -263,9 +263,9 @@
UNION ALL
SELECT
'null_self_reported_population_source_concept_id_check' AS check
'null_self_reported_category_source_concept_id_check' AS check
,CASE
WHEN c.ne_nulls_self_reported_population_source_concept_id > 0
WHEN c.ne_nulls_self_reported_category_source_concept_id > 0
THEN 'FAILED'
ELSE 'passed'
END AS result
Expand All @@ -285,9 +285,9 @@
# 3. sex_at_birth_source_value
# 4. state_of_residence_concept_id
# 5. state_of_residence_source_value
# 6. self_reported_population_concept_id
# 7. self_reported_population_source_value
# 8. self_reported_population_source_concept_id<br>
# 6. self_reported_category_concept_id
# 7. self_reported_category_source_value
# 8. self_reported_category_source_concept_id<br>
#
# Investigate any failed output.

Expand All @@ -312,12 +312,12 @@
,COUNTIF((p.sex_at_birth_source_value IS NULL AND pe.sex_at_birth_source_value IS NOT NULL)
OR(p.sex_at_birth_source_value IS NOT NULL AND pe.sex_at_birth_source_value IS NULL)) AS ne_nulls_sex_at_birth_source_value
-- check self reported population columns --
,COUNTIF((p.self_reported_population_concept_id IS NULL AND pe.self_reported_population_concept_id IS NOT NULL)
OR (p.self_reported_population_concept_id IS NOT NULL AND pe.self_reported_population_concept_id IS NULL)) as ne_nulls_self_reported_population_concept_id
,COUNTIF((p.self_reported_population_source_value IS NULL AND pe.self_reported_population_source_value IS NOT NULL)
OR (p.self_reported_population_source_value IS NOT NULL AND pe.self_reported_population_source_value IS NULL)) as ne_nulls_self_reported_population_source_value
,COUNTIF((p.self_reported_population_source_concept_id IS NULL AND pe.self_reported_population_source_concept_id IS NOT NULL)
OR (p.self_reported_population_source_concept_id IS NOT NULL AND pe.self_reported_population_source_concept_id IS NULL)) as ne_nulls_self_reported_population_source_concept_id
,COUNTIF((p.self_reported_category_concept_id IS NULL AND pe.self_reported_category_concept_id IS NOT NULL)
OR (p.self_reported_category_concept_id IS NOT NULL AND pe.self_reported_category_concept_id IS NULL)) as ne_nulls_self_reported_category_concept_id
,COUNTIF((p.self_reported_category_source_value IS NULL AND pe.self_reported_category_source_value IS NOT NULL)
OR (p.self_reported_category_source_value IS NOT NULL AND pe.self_reported_category_source_value IS NULL)) as ne_nulls_self_reported_category_source_value
,COUNTIF((p.self_reported_category_source_concept_id IS NULL AND pe.self_reported_category_source_concept_id IS NOT NULL)
OR (p.self_reported_category_source_concept_id IS NOT NULL AND pe.self_reported_category_source_concept_id IS NULL)) as ne_nulls_self_reported_category_source_concept_id
FROM `{{dest_project_id}}.{{dest_dataset_id}}.person` p
JOIN `{{src_project_id}}.{{src_dataset_id}}.person_ext` pe
USING(person_id))
Expand Down Expand Up @@ -436,9 +436,9 @@
UNION ALL
SELECT
'nulls_self_reported_population_concept_id_check' AS check
'nulls_self_reported_category_concept_id_check' AS check
,CASE
WHEN c.ne_nulls_self_reported_population_concept_id > 0
WHEN c.ne_nulls_self_reported_category_concept_id > 0
THEN 'FAILED'
ELSE 'passed'
END AS result
Expand All @@ -447,9 +447,9 @@
UNION ALL
SELECT
'self_reported_population_source_value_check' AS check
'self_reported_category_source_value_check' AS check
,CASE
WHEN c.ne_nulls_self_reported_population_source_value > 0
WHEN c.ne_nulls_self_reported_category_source_value > 0
THEN 'FAILED'
ELSE 'passed'
END AS result
Expand All @@ -458,9 +458,9 @@
UNION ALL
SELECT
'null_self_reported_population_source_concept_id_check' AS check
'null_self_reported_category_source_concept_id_check' AS check
,CASE
WHEN c.ne_nulls_self_reported_population_source_concept_id > 0
WHEN c.ne_nulls_self_reported_category_source_concept_id > 0
THEN 'FAILED'
ELSE 'passed'
END AS result
Expand Down Expand Up @@ -623,4 +623,4 @@
Investigation needed.
'''
render_message(df, success_msg, failure_msg)
# -
# -
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from utils import auth
from gcloud.bq import BigQueryClient
from analytics.cdr_ops.notebook_utils import execute, IMPERSONATION_SCOPES

pd.options.display.max_rows = 120

# + tags=["parameters"]
Expand Down Expand Up @@ -77,7 +78,6 @@
AND value_as_concept_id = 45883720)
""")


q = query.render(project_id=project_id, deid_base_cdr=deid_base_cdr)
result = execute(client, q)

Expand Down Expand Up @@ -394,7 +394,7 @@
df1

# Query 2.5 "Race Ethnicity: person_ext self reported population DC-3787"
# Verify that the person_ext self_reported_population fields are populated correctly.
# Verify that the person_ext self_reported_category fields are populated correctly.

# has to be deid_base
query = JINJA_ENV.from_string("""
Expand All @@ -409,35 +409,35 @@
ORDER BY person_id, value_source_concept_id)
GROUP BY 1))
SELECT DISTINCT races, c_races, race_source_value, ethnicity_source_value, race_source_concept_id, race_concept_id, self_reported_population_source_value, self_reported_population_source_concept_id, self_reported_population_concept_id
SELECT DISTINCT races, c_races, race_source_value, ethnicity_source_value, race_source_concept_id, race_concept_id, self_reported_category_source_value, self_reported_category_source_concept_id, self_reported_category_concept_id
FROM obs
LEFT JOIN `{{project_id}}.{{deid_base_cdr}}.person`
USING (person_id)
LEFT JOIN `{{project_id}}.{{deid_base_cdr}}.person_ext`
USING (person_id)
WHERE
-- check srp column multi pop --
(REGEXP_CONTAINS(obs.c_races, r'\|') AND self_reported_population_source_value != 'WhatRaceEthnicity_GeneralizedMultPopulations' )
(REGEXP_CONTAINS(obs.c_races, r'\|') AND self_reported_category_source_value != 'WhatRaceEthnicity_GeneralizedMultPopulations' )
-- check srp column single pop not hispanic--
OR (NOT (REGEXP_CONTAINS(obs.c_races, r'\|') )) AND (race_source_value != self_reported_population_source_value AND (race_source_value = 'AoUDRC_NoneIndicated' AND self_reported_population_source_value != 'WhatRaceEthnicity_Hispanic'))
OR (NOT (REGEXP_CONTAINS(obs.c_races, r'\|') )) AND (race_source_value != self_reported_category_source_value AND (race_source_value = 'AoUDRC_NoneIndicated' AND self_reported_category_source_value != 'WhatRaceEthnicity_Hispanic'))
-- check srp column single pop hispanic--
OR (race_source_value = 'AoUDRC_NoneIndicated' AND self_reported_population_source_value != 'WhatRaceEthnicity_Hispanic')
OR (race_source_value = 'AoUDRC_NoneIndicated' AND self_reported_category_source_value != 'WhatRaceEthnicity_Hispanic')
-- check only expected srpsv exist --
OR (self_reported_population_source_value NOT IN ('WhatRaceEthnicity_GeneralizedMultPopulations','WhatRaceEthnicity_GeneralizedPopulation', 'WhatRaceEthnicity_Black','WhatRaceEthnicity_White','WhatRaceEthnicity_Asian' ,'WhatRaceEthnicity_Hispanic','PMI_PreferNotToAnswer', 'PMI_Skip', 'WhatRaceEthnicity_RaceEthnicityNoneOfThese','WhatRaceEthnicity_AIAN',
OR (self_reported_category_source_value NOT IN ('WhatRaceEthnicity_GeneralizedMultPopulations','WhatRaceEthnicity_GeneralizedPopulation', 'WhatRaceEthnicity_Black','WhatRaceEthnicity_White','WhatRaceEthnicity_Asian' ,'WhatRaceEthnicity_Hispanic','PMI_PreferNotToAnswer', 'PMI_Skip', 'WhatRaceEthnicity_RaceEthnicityNoneOfThese','WhatRaceEthnicity_AIAN',
'WhatRaceEthnicity_MENA','WhatRaceEthnicity_NHPI'))
-- check for expected concept_ids per srpsv --
OR (self_reported_population_source_value = 'WhatRaceEthnicity_GeneralizedMultPopulations' AND (self_reported_population_concept_id != 2000000008 OR self_reported_population_source_concept_id != 2000000008))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_GeneralizedPopulation' AND (self_reported_population_concept_id != 2000000001 OR self_reported_population_source_concept_id != 2000000001))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_Black' AND (self_reported_population_concept_id != 8516 OR self_reported_population_source_concept_id != 1586143))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_White' AND (self_reported_population_concept_id != 8527 OR self_reported_population_source_concept_id != 1586146))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_Asian' AND (self_reported_population_concept_id != 8515 OR self_reported_population_source_concept_id != 1586142))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_Hispanic' AND (self_reported_population_concept_id != 1586147 OR self_reported_population_source_concept_id != 1586147))
OR (self_reported_population_source_value = 'PMI_PreferNotToAnswer' AND (self_reported_population_concept_id != 1177221 OR self_reported_population_source_concept_id != 903079))
OR (self_reported_population_source_value = 'PMI_Skip' AND (self_reported_population_concept_id != 903096 OR self_reported_population_source_concept_id != 903096))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_RaceEthnicityNoneOfThese' AND (self_reported_population_concept_id != 45882607 OR self_reported_population_source_concept_id != 1586148))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_AIAN' AND (self_reported_population_concept_id != 8657 OR self_reported_population_source_concept_id != 1586141)) -- ct only --
OR (self_reported_population_source_value = 'WhatRaceEthnicity_MENA' AND (self_reported_population_concept_id != 38003615 OR self_reported_population_source_concept_id != 1586144)) -- ct only --
OR (self_reported_population_source_value = 'WhatRaceEthnicity_NHPI' AND (self_reported_population_concept_id != 8557 OR self_reported_population_source_concept_id != 1586145)) -- ct only --
OR (self_reported_category_source_value = 'WhatRaceEthnicity_GeneralizedMultPopulations' AND (self_reported_category_concept_id != 2000000008 OR self_reported_category_source_concept_id != 2000000008))
OR (self_reported_category_source_value = 'WhatRaceEthnicity_GeneralizedPopulation' AND (self_reported_category_concept_id != 2000000001 OR self_reported_category_source_concept_id != 2000000001))
OR (self_reported_category_source_value = 'WhatRaceEthnicity_Black' AND (self_reported_category_concept_id != 8516 OR self_reported_category_source_concept_id != 1586143))
OR (self_reported_category_source_value = 'WhatRaceEthnicity_White' AND (self_reported_category_concept_id != 8527 OR self_reported_category_source_concept_id != 1586146))
OR (self_reported_category_source_value = 'WhatRaceEthnicity_Asian' AND (self_reported_category_concept_id != 8515 OR self_reported_category_source_concept_id != 1586142))
OR (self_reported_category_source_value = 'WhatRaceEthnicity_Hispanic' AND (self_reported_category_concept_id != 1586147 OR self_reported_category_source_concept_id != 1586147))
OR (self_reported_category_source_value = 'PMI_PreferNotToAnswer' AND (self_reported_category_concept_id != 1177221 OR self_reported_category_source_concept_id != 903079))
OR (self_reported_category_source_value = 'PMI_Skip' AND (self_reported_category_concept_id != 903096 OR self_reported_category_source_concept_id != 903096))
OR (self_reported_category_source_value = 'WhatRaceEthnicity_RaceEthnicityNoneOfThese' AND (self_reported_category_concept_id != 45882607 OR self_reported_category_source_concept_id != 1586148))
OR (self_reported_category_source_value = 'WhatRaceEthnicity_AIAN' AND (self_reported_category_concept_id != 8657 OR self_reported_category_source_concept_id != 1586141)) -- ct only --
OR (self_reported_category_source_value = 'WhatRaceEthnicity_MENA' AND (self_reported_category_concept_id != 38003615 OR self_reported_category_source_concept_id != 1586144)) -- ct only --
OR (self_reported_category_source_value = 'WhatRaceEthnicity_NHPI' AND (self_reported_category_concept_id != 8557 OR self_reported_category_source_concept_id != 1586145)) -- ct only --
ORDER BY 1,2
""")
Expand Down Expand Up @@ -669,13 +669,13 @@
'query': 'Query 4.0 date not shifited',
'result': 'PASS'
},
ignore_index=True)
ignore_index=True)
else:
df = df.append({
'query': 'Query 4.0 date not shifited',
'result': 'Failure'
},
ignore_index=True)
ignore_index=True)
df1

# ## Query 5.0 [DC-1051] Verify that "PPI Drop Duplicates" Rule is excluded COPE responses
Expand Down Expand Up @@ -795,6 +795,7 @@
ignore_index=True)
df1


# # Summary_deid_base_validation


Expand All @@ -806,5 +807,3 @@ def highlight_cells(val):

df.style.applymap(highlight_cells).set_properties(**{'text-align': 'left'})
# -


Loading

0 comments on commit 30fc9c1

Please sign in to comment.