Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DC-3793] Update self_reported_population to self_reported_category #1868

Merged
merged 1 commit into from
Jun 28, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
[DC-3793] Update self_reported_population to self_reported_category
nishanthpp93 committed Jun 24, 2024
commit 36acdd4f43318f283c9cc10bd75d3362443c5098
Original file line number Diff line number Diff line change
@@ -91,9 +91,9 @@
# 3. sex_at_birth_source_value
# 4. state_of_residence_concept_id
# 5. state_of_residence_source_value
# 6. self_reported_population_concept_id
# 7. self_reported_population_source_value
# 8. self_reported_population_source_concept_id<br>
# 6. self_reported_category_concept_id
# 7. self_reported_category_source_value
# 8. self_reported_category_source_concept_id<br>
#
#
# Investigate any failed output.
@@ -119,12 +119,12 @@
,COUNTIF((p.sex_at_birth_source_value IS NULL AND pe.sex_at_birth_source_value IS NOT NULL)
OR (p.sex_at_birth_source_value IS NOT NULL AND pe.sex_at_birth_source_value IS NULL)) as ne_nulls_sex_at_birth_source_value
-- check self reported population columns --
,COUNTIF((p.self_reported_population_concept_id IS NULL AND pe.self_reported_population_concept_id IS NOT NULL)
OR (p.self_reported_population_concept_id IS NOT NULL AND pe.self_reported_population_concept_id IS NULL)) as ne_nulls_self_reported_population_concept_id
,COUNTIF((p.self_reported_population_source_value IS NULL AND pe.self_reported_population_source_value IS NOT NULL)
OR (p.self_reported_population_source_value IS NOT NULL AND pe.self_reported_population_source_value IS NULL)) as ne_nulls_self_reported_population_source_value
,COUNTIF((p.self_reported_population_source_concept_id IS NULL AND pe.self_reported_population_source_concept_id IS NOT NULL)
OR (p.self_reported_population_source_concept_id IS NOT NULL AND pe.self_reported_population_source_concept_id IS NULL)) as ne_nulls_self_reported_population_source_concept_id
,COUNTIF((p.self_reported_category_concept_id IS NULL AND pe.self_reported_category_concept_id IS NOT NULL)
OR (p.self_reported_category_concept_id IS NOT NULL AND pe.self_reported_category_concept_id IS NULL)) as ne_nulls_self_reported_category_concept_id
,COUNTIF((p.self_reported_category_source_value IS NULL AND pe.self_reported_category_source_value IS NOT NULL)
OR (p.self_reported_category_source_value IS NOT NULL AND pe.self_reported_category_source_value IS NULL)) as ne_nulls_self_reported_category_source_value
,COUNTIF((p.self_reported_category_source_concept_id IS NULL AND pe.self_reported_category_source_concept_id IS NOT NULL)
OR (p.self_reported_category_source_concept_id IS NOT NULL AND pe.self_reported_category_source_concept_id IS NULL)) as ne_nulls_self_reported_category_source_concept_id
FROM `{{dest_project_id}}.{{dest_dataset_id}}.person` p
JOIN `{{dest_project_id}}.{{dest_dataset_id}}.person_ext` pe
USING (person_id)
@@ -241,9 +241,9 @@
UNION ALL
SELECT
'nulls_self_reported_population_concept_id_check' AS check
'nulls_self_reported_category_concept_id_check' AS check
,CASE
WHEN c.ne_nulls_self_reported_population_concept_id > 0
WHEN c.ne_nulls_self_reported_category_concept_id > 0
THEN 'FAILED'
ELSE 'passed'
END AS result
@@ -252,9 +252,9 @@
UNION ALL
SELECT
'self_reported_population_source_value_check' AS check
'self_reported_category_source_value_check' AS check
,CASE
WHEN c.ne_nulls_self_reported_population_source_value > 0
WHEN c.ne_nulls_self_reported_category_source_value > 0
THEN 'FAILED'
ELSE 'passed'
END AS result
@@ -263,9 +263,9 @@
UNION ALL
SELECT
'null_self_reported_population_source_concept_id_check' AS check
'null_self_reported_category_source_concept_id_check' AS check
,CASE
WHEN c.ne_nulls_self_reported_population_source_concept_id > 0
WHEN c.ne_nulls_self_reported_category_source_concept_id > 0
THEN 'FAILED'
ELSE 'passed'
END AS result
@@ -285,9 +285,9 @@
# 3. sex_at_birth_source_value
# 4. state_of_residence_concept_id
# 5. state_of_residence_source_value
# 6. self_reported_population_concept_id
# 7. self_reported_population_source_value
# 8. self_reported_population_source_concept_id<br>
# 6. self_reported_category_concept_id
# 7. self_reported_category_source_value
# 8. self_reported_category_source_concept_id<br>
#
# Investigate any failed output.

@@ -312,12 +312,12 @@
,COUNTIF((p.sex_at_birth_source_value IS NULL AND pe.sex_at_birth_source_value IS NOT NULL)
OR(p.sex_at_birth_source_value IS NOT NULL AND pe.sex_at_birth_source_value IS NULL)) AS ne_nulls_sex_at_birth_source_value
-- check self reported population columns --
,COUNTIF((p.self_reported_population_concept_id IS NULL AND pe.self_reported_population_concept_id IS NOT NULL)
OR (p.self_reported_population_concept_id IS NOT NULL AND pe.self_reported_population_concept_id IS NULL)) as ne_nulls_self_reported_population_concept_id
,COUNTIF((p.self_reported_population_source_value IS NULL AND pe.self_reported_population_source_value IS NOT NULL)
OR (p.self_reported_population_source_value IS NOT NULL AND pe.self_reported_population_source_value IS NULL)) as ne_nulls_self_reported_population_source_value
,COUNTIF((p.self_reported_population_source_concept_id IS NULL AND pe.self_reported_population_source_concept_id IS NOT NULL)
OR (p.self_reported_population_source_concept_id IS NOT NULL AND pe.self_reported_population_source_concept_id IS NULL)) as ne_nulls_self_reported_population_source_concept_id
,COUNTIF((p.self_reported_category_concept_id IS NULL AND pe.self_reported_category_concept_id IS NOT NULL)
OR (p.self_reported_category_concept_id IS NOT NULL AND pe.self_reported_category_concept_id IS NULL)) as ne_nulls_self_reported_category_concept_id
,COUNTIF((p.self_reported_category_source_value IS NULL AND pe.self_reported_category_source_value IS NOT NULL)
OR (p.self_reported_category_source_value IS NOT NULL AND pe.self_reported_category_source_value IS NULL)) as ne_nulls_self_reported_category_source_value
,COUNTIF((p.self_reported_category_source_concept_id IS NULL AND pe.self_reported_category_source_concept_id IS NOT NULL)
OR (p.self_reported_category_source_concept_id IS NOT NULL AND pe.self_reported_category_source_concept_id IS NULL)) as ne_nulls_self_reported_category_source_concept_id
FROM `{{dest_project_id}}.{{dest_dataset_id}}.person` p
JOIN `{{src_project_id}}.{{src_dataset_id}}.person_ext` pe
USING(person_id))
@@ -436,9 +436,9 @@
UNION ALL
SELECT
'nulls_self_reported_population_concept_id_check' AS check
'nulls_self_reported_category_concept_id_check' AS check
,CASE
WHEN c.ne_nulls_self_reported_population_concept_id > 0
WHEN c.ne_nulls_self_reported_category_concept_id > 0
THEN 'FAILED'
ELSE 'passed'
END AS result
@@ -447,9 +447,9 @@
UNION ALL
SELECT
'self_reported_population_source_value_check' AS check
'self_reported_category_source_value_check' AS check
,CASE
WHEN c.ne_nulls_self_reported_population_source_value > 0
WHEN c.ne_nulls_self_reported_category_source_value > 0
THEN 'FAILED'
ELSE 'passed'
END AS result
@@ -458,9 +458,9 @@
UNION ALL
SELECT
'null_self_reported_population_source_concept_id_check' AS check
'null_self_reported_category_source_concept_id_check' AS check
,CASE
WHEN c.ne_nulls_self_reported_population_source_concept_id > 0
WHEN c.ne_nulls_self_reported_category_source_concept_id > 0
THEN 'FAILED'
ELSE 'passed'
END AS result
@@ -623,4 +623,4 @@
Investigation needed.
'''
render_message(df, success_msg, failure_msg)
# -
# -
Original file line number Diff line number Diff line change
@@ -20,6 +20,7 @@
from utils import auth
from gcloud.bq import BigQueryClient
from analytics.cdr_ops.notebook_utils import execute, IMPERSONATION_SCOPES

pd.options.display.max_rows = 120

# + tags=["parameters"]
@@ -77,7 +78,6 @@
AND value_as_concept_id = 45883720)
""")


q = query.render(project_id=project_id, deid_base_cdr=deid_base_cdr)
result = execute(client, q)

@@ -394,7 +394,7 @@
df1

# Query 2.5 "Race Ethnicity: person_ext self reported population DC-3787"
# Verify that the person_ext self_reported_population fields are populated correctly.
# Verify that the person_ext self_reported_category fields are populated correctly.

# has to be deid_base
query = JINJA_ENV.from_string("""
@@ -409,35 +409,35 @@
ORDER BY person_id, value_source_concept_id)
GROUP BY 1))
SELECT DISTINCT races, c_races, race_source_value, ethnicity_source_value, race_source_concept_id, race_concept_id, self_reported_population_source_value, self_reported_population_source_concept_id, self_reported_population_concept_id
SELECT DISTINCT races, c_races, race_source_value, ethnicity_source_value, race_source_concept_id, race_concept_id, self_reported_category_source_value, self_reported_category_source_concept_id, self_reported_category_concept_id
FROM obs
LEFT JOIN `{{project_id}}.{{deid_base_cdr}}.person`
USING (person_id)
LEFT JOIN `{{project_id}}.{{deid_base_cdr}}.person_ext`
USING (person_id)
WHERE
-- check srp column multi pop --
(REGEXP_CONTAINS(obs.c_races, r'\|') AND self_reported_population_source_value != 'WhatRaceEthnicity_GeneralizedMultPopulations' )
(REGEXP_CONTAINS(obs.c_races, r'\|') AND self_reported_category_source_value != 'WhatRaceEthnicity_GeneralizedMultPopulations' )
-- check srp column single pop not hispanic--
OR (NOT (REGEXP_CONTAINS(obs.c_races, r'\|') )) AND (race_source_value != self_reported_population_source_value AND (race_source_value = 'AoUDRC_NoneIndicated' AND self_reported_population_source_value != 'WhatRaceEthnicity_Hispanic'))
OR (NOT (REGEXP_CONTAINS(obs.c_races, r'\|') )) AND (race_source_value != self_reported_category_source_value AND (race_source_value = 'AoUDRC_NoneIndicated' AND self_reported_category_source_value != 'WhatRaceEthnicity_Hispanic'))
-- check srp column single pop hispanic--
OR (race_source_value = 'AoUDRC_NoneIndicated' AND self_reported_population_source_value != 'WhatRaceEthnicity_Hispanic')
OR (race_source_value = 'AoUDRC_NoneIndicated' AND self_reported_category_source_value != 'WhatRaceEthnicity_Hispanic')
-- check only expected srpsv exist --
OR (self_reported_population_source_value NOT IN ('WhatRaceEthnicity_GeneralizedMultPopulations','WhatRaceEthnicity_GeneralizedPopulation', 'WhatRaceEthnicity_Black','WhatRaceEthnicity_White','WhatRaceEthnicity_Asian' ,'WhatRaceEthnicity_Hispanic','PMI_PreferNotToAnswer', 'PMI_Skip', 'WhatRaceEthnicity_RaceEthnicityNoneOfThese','WhatRaceEthnicity_AIAN',
OR (self_reported_category_source_value NOT IN ('WhatRaceEthnicity_GeneralizedMultPopulations','WhatRaceEthnicity_GeneralizedPopulation', 'WhatRaceEthnicity_Black','WhatRaceEthnicity_White','WhatRaceEthnicity_Asian' ,'WhatRaceEthnicity_Hispanic','PMI_PreferNotToAnswer', 'PMI_Skip', 'WhatRaceEthnicity_RaceEthnicityNoneOfThese','WhatRaceEthnicity_AIAN',
'WhatRaceEthnicity_MENA','WhatRaceEthnicity_NHPI'))
-- check for expected concept_ids per srpsv --
OR (self_reported_population_source_value = 'WhatRaceEthnicity_GeneralizedMultPopulations' AND (self_reported_population_concept_id != 2000000008 OR self_reported_population_source_concept_id != 2000000008))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_GeneralizedPopulation' AND (self_reported_population_concept_id != 2000000001 OR self_reported_population_source_concept_id != 2000000001))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_Black' AND (self_reported_population_concept_id != 8516 OR self_reported_population_source_concept_id != 1586143))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_White' AND (self_reported_population_concept_id != 8527 OR self_reported_population_source_concept_id != 1586146))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_Asian' AND (self_reported_population_concept_id != 8515 OR self_reported_population_source_concept_id != 1586142))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_Hispanic' AND (self_reported_population_concept_id != 1586147 OR self_reported_population_source_concept_id != 1586147))
OR (self_reported_population_source_value = 'PMI_PreferNotToAnswer' AND (self_reported_population_concept_id != 1177221 OR self_reported_population_source_concept_id != 903079))
OR (self_reported_population_source_value = 'PMI_Skip' AND (self_reported_population_concept_id != 903096 OR self_reported_population_source_concept_id != 903096))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_RaceEthnicityNoneOfThese' AND (self_reported_population_concept_id != 45882607 OR self_reported_population_source_concept_id != 1586148))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_AIAN' AND (self_reported_population_concept_id != 8657 OR self_reported_population_source_concept_id != 1586141)) -- ct only --
OR (self_reported_population_source_value = 'WhatRaceEthnicity_MENA' AND (self_reported_population_concept_id != 38003615 OR self_reported_population_source_concept_id != 1586144)) -- ct only --
OR (self_reported_population_source_value = 'WhatRaceEthnicity_NHPI' AND (self_reported_population_concept_id != 8557 OR self_reported_population_source_concept_id != 1586145)) -- ct only --
OR (self_reported_category_source_value = 'WhatRaceEthnicity_GeneralizedMultPopulations' AND (self_reported_category_concept_id != 2000000008 OR self_reported_category_source_concept_id != 2000000008))
OR (self_reported_category_source_value = 'WhatRaceEthnicity_GeneralizedPopulation' AND (self_reported_category_concept_id != 2000000001 OR self_reported_category_source_concept_id != 2000000001))
OR (self_reported_category_source_value = 'WhatRaceEthnicity_Black' AND (self_reported_category_concept_id != 8516 OR self_reported_category_source_concept_id != 1586143))
OR (self_reported_category_source_value = 'WhatRaceEthnicity_White' AND (self_reported_category_concept_id != 8527 OR self_reported_category_source_concept_id != 1586146))
OR (self_reported_category_source_value = 'WhatRaceEthnicity_Asian' AND (self_reported_category_concept_id != 8515 OR self_reported_category_source_concept_id != 1586142))
OR (self_reported_category_source_value = 'WhatRaceEthnicity_Hispanic' AND (self_reported_category_concept_id != 1586147 OR self_reported_category_source_concept_id != 1586147))
OR (self_reported_category_source_value = 'PMI_PreferNotToAnswer' AND (self_reported_category_concept_id != 1177221 OR self_reported_category_source_concept_id != 903079))
OR (self_reported_category_source_value = 'PMI_Skip' AND (self_reported_category_concept_id != 903096 OR self_reported_category_source_concept_id != 903096))
OR (self_reported_category_source_value = 'WhatRaceEthnicity_RaceEthnicityNoneOfThese' AND (self_reported_category_concept_id != 45882607 OR self_reported_category_source_concept_id != 1586148))
OR (self_reported_category_source_value = 'WhatRaceEthnicity_AIAN' AND (self_reported_category_concept_id != 8657 OR self_reported_category_source_concept_id != 1586141)) -- ct only --
OR (self_reported_category_source_value = 'WhatRaceEthnicity_MENA' AND (self_reported_category_concept_id != 38003615 OR self_reported_category_source_concept_id != 1586144)) -- ct only --
OR (self_reported_category_source_value = 'WhatRaceEthnicity_NHPI' AND (self_reported_category_concept_id != 8557 OR self_reported_category_source_concept_id != 1586145)) -- ct only --
ORDER BY 1,2
""")
@@ -669,13 +669,13 @@
'query': 'Query 4.0 date not shifited',
'result': 'PASS'
},
ignore_index=True)
ignore_index=True)
else:
df = df.append({
'query': 'Query 4.0 date not shifited',
'result': 'Failure'
},
ignore_index=True)
ignore_index=True)
df1

# ## Query 5.0 [DC-1051] Verify that "PPI Drop Duplicates" Rule is excluded COPE responses
@@ -795,6 +795,7 @@
ignore_index=True)
df1


# # Summary_deid_base_validation


@@ -806,5 +807,3 @@ def highlight_cells(val):

df.style.applymap(highlight_cells).set_properties(**{'text-align': 'left'})
# -


Original file line number Diff line number Diff line change
@@ -22,6 +22,7 @@
from utils import auth
from gcloud.bq import BigQueryClient
from analytics.cdr_ops.notebook_utils import execute, IMPERSONATION_SCOPES

pd.options.display.max_rows = 120

# + papermill={"duration": 0.023643, "end_time": "2021-02-02T22:30:31.880820", "exception": false, "start_time": "2021-02-02T22:30:31.857177", "status": "completed"} tags=["parameters"]
@@ -62,13 +63,13 @@
'query': 'Query1 observation',
'result': 'PASS'
},
ignore_index=True)
ignore_index=True)
else:
summary = summary.append({
'query': 'Query1 observation',
'result': 'Failure'
},
ignore_index=True)
summary = summary.append(
{
'query': 'Query1 observation',
'result': 'Failure'
}, ignore_index=True)
result.T

# + [markdown] papermill={"duration": 0.023633, "end_time": "2021-02-02T22:30:36.860798", "exception": false, "start_time": "2021-02-02T22:30:36.837165", "status": "completed"} tags=[]
@@ -95,13 +96,13 @@
'query': 'Query2 condition',
'result': 'PASS'
},
ignore_index=True)
ignore_index=True)
else:
summary = summary.append({
'query': 'Query2 condition',
'result': 'Failure'
},
ignore_index=True)
ignore_index=True)
result.T

# # 3 Verify that in procedure_occurrence table if procedure_occurrence_source_concept_id AND the procedure_occurrence_concept_id both of those fields are null OR zero, the row should be removed.
@@ -123,13 +124,13 @@
'query': 'Query3 procedure',
'result': 'PASS'
},
ignore_index=True)
ignore_index=True)
else:
summary = summary.append({
'query': 'Query3 procedure',
'result': 'Failure'
},
ignore_index=True)
ignore_index=True)
result.T

# + [markdown] papermill={"duration": 0.023649, "end_time": "2021-02-02T22:30:39.115495", "exception": false, "start_time": "2021-02-02T22:30:39.091846", "status": "completed"} tags=[]
@@ -152,16 +153,15 @@
'query': 'Query4 visit',
'result': 'PASS'
},
ignore_index=True)
ignore_index=True)
else:
summary = summary.append({
'query': 'Query4 visit',
'result': 'Failure'
},
ignore_index=True)
ignore_index=True)
result.T


# + [markdown] papermill={"duration": 0.023649, "end_time": "2021-02-02T22:30:39.115495", "exception": false, "start_time": "2021-02-02T22:30:39.091846", "status": "completed"} tags=[]
# # 5 Verify that in drug_exposure table if drug_exposure_source_concept_id AND the drug_exposure_concept_id both of those fields are null OR zero, the row should be removed.

@@ -182,13 +182,14 @@
'query': 'Query5 drug_exposure',
'result': 'PASS'
},
ignore_index=True)
ignore_index=True)
else:
summary = summary.append({
'query': 'Query5 drug_exposure',
'result': 'Failure'
},
ignore_index=True)
summary = summary.append(
{
'query': 'Query5 drug_exposure',
'result': 'Failure'
},
ignore_index=True)
result.T

# + [markdown] papermill={"duration": 0.023649, "end_time": "2021-02-02T22:30:39.115495", "exception": false, "start_time": "2021-02-02T22:30:39.091846", "status": "completed"} tags=[]
@@ -211,13 +212,13 @@
'query': 'Query6 device',
'result': 'PASS'
},
ignore_index=True)
ignore_index=True)
else:
summary = summary.append({
'query': 'Query6 device',
'result': 'Failure'
},
ignore_index=True)
ignore_index=True)
result.T

# + [markdown] papermill={"duration": 0.023649, "end_time": "2021-02-02T22:30:39.115495", "exception": false, "start_time": "2021-02-02T22:30:39.091846", "status": "completed"} tags=[]
@@ -240,13 +241,14 @@
'query': 'Query7 measurement',
'result': 'PASS'
},
ignore_index=True)
ignore_index=True)
else:
summary = summary.append({
'query': 'Query7, measurement',
'result': 'Failure'
},
ignore_index=True)
summary = summary.append(
{
'query': 'Query7, measurement',
'result': 'Failure'
},
ignore_index=True)
result.T
# -

@@ -287,8 +289,8 @@
result
# -

Query9 "Race Ethnicity: person_ext self reported population DC-3789"
Verify that the person_ext self_reported_population fields are populated correctly.
# Query9 "Race Ethnicity: person_ext self reported population DC-3789"
# Verify that the person_ext self_reported_category fields are populated correctly.

# has to be deid_clean
query = JINJA_ENV.from_string("""
@@ -303,35 +305,35 @@
ORDER BY person_id, value_source_concept_id)
GROUP BY 1))
SELECT DISTINCT races, c_races, race_source_value, ethnicity_source_value, race_source_concept_id, race_concept_id, self_reported_population_source_value, self_reported_population_source_concept_id, self_reported_population_concept_id
SELECT DISTINCT races, c_races, race_source_value, ethnicity_source_value, race_source_concept_id, race_concept_id, self_reported_category_source_value, self_reported_category_source_concept_id, self_reported_category_concept_id
FROM obs
LEFT JOIN `{{project_id}}.{{deid_clean_cdr}}.person`
USING (person_id)
LEFT JOIN `{{project_id}}.{{deid_clean_cdr}}.person_ext`
USING (person_id)
WHERE
-- check srp column multi pop --
(REGEXP_CONTAINS(obs.c_races, r'\|') AND self_reported_population_source_value != 'WhatRaceEthnicity_GeneralizedMultPopulations' )
(REGEXP_CONTAINS(obs.c_races, r'\|') AND self_reported_category_source_value != 'WhatRaceEthnicity_GeneralizedMultPopulations' )
-- check srp column single pop not hispanic--
OR (NOT (REGEXP_CONTAINS(obs.c_races, r'\|') )) AND (race_source_value != self_reported_population_source_value AND (race_source_value = 'AoUDRC_NoneIndicated' AND self_reported_population_source_value != 'WhatRaceEthnicity_Hispanic'))
OR (NOT (REGEXP_CONTAINS(obs.c_races, r'\|') )) AND (race_source_value != self_reported_category_source_value AND (race_source_value = 'AoUDRC_NoneIndicated' AND self_reported_category_source_value != 'WhatRaceEthnicity_Hispanic'))
-- check srp column single pop hispanic--
OR (race_source_value = 'AoUDRC_NoneIndicated' AND self_reported_population_source_value != 'WhatRaceEthnicity_Hispanic')
OR (race_source_value = 'AoUDRC_NoneIndicated' AND self_reported_category_source_value != 'WhatRaceEthnicity_Hispanic')
-- check only expected srpsv exist --
OR (self_reported_population_source_value NOT IN ('WhatRaceEthnicity_GeneralizedMultPopulations','WhatRaceEthnicity_GeneralizedPopulation', 'WhatRaceEthnicity_Black','WhatRaceEthnicity_White','WhatRaceEthnicity_Asian' ,'WhatRaceEthnicity_Hispanic','PMI_PreferNotToAnswer', 'PMI_Skip', 'WhatRaceEthnicity_RaceEthnicityNoneOfThese','WhatRaceEthnicity_AIAN',
OR (self_reported_category_source_value NOT IN ('WhatRaceEthnicity_GeneralizedMultPopulations','WhatRaceEthnicity_GeneralizedPopulation', 'WhatRaceEthnicity_Black','WhatRaceEthnicity_White','WhatRaceEthnicity_Asian' ,'WhatRaceEthnicity_Hispanic','PMI_PreferNotToAnswer', 'PMI_Skip', 'WhatRaceEthnicity_RaceEthnicityNoneOfThese','WhatRaceEthnicity_AIAN',
'WhatRaceEthnicity_MENA','WhatRaceEthnicity_NHPI'))
-- check for expected concept_ids per srpsv --
OR (self_reported_population_source_value = 'WhatRaceEthnicity_GeneralizedMultPopulations' AND (self_reported_population_concept_id != 2000000008 OR self_reported_population_source_concept_id != 2000000008))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_GeneralizedPopulation' AND (self_reported_population_concept_id != 2000000001 OR self_reported_population_source_concept_id != 2000000001))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_Black' AND (self_reported_population_concept_id != 8516 OR self_reported_population_source_concept_id != 1586143))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_White' AND (self_reported_population_concept_id != 8527 OR self_reported_population_source_concept_id != 1586146))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_Asian' AND (self_reported_population_concept_id != 8515 OR self_reported_population_source_concept_id != 1586142))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_Hispanic' AND (self_reported_population_concept_id != 1586147 OR self_reported_population_source_concept_id != 1586147))
OR (self_reported_population_source_value = 'PMI_PreferNotToAnswer' AND (self_reported_population_concept_id != 1177221 OR self_reported_population_source_concept_id != 903079))
OR (self_reported_population_source_value = 'PMI_Skip' AND (self_reported_population_concept_id != 903096 OR self_reported_population_source_concept_id != 903096))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_RaceEthnicityNoneOfThese' AND (self_reported_population_concept_id != 45882607 OR self_reported_population_source_concept_id != 1586148))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_AIAN' AND (self_reported_population_concept_id != 8657 OR self_reported_population_source_concept_id != 1586141)) -- ct only --
OR (self_reported_population_source_value = 'WhatRaceEthnicity_MENA' AND (self_reported_population_concept_id != 38003615 OR self_reported_population_source_concept_id != 1586144)) -- ct only --
OR (self_reported_population_source_value = 'WhatRaceEthnicity_NHPI' AND (self_reported_population_concept_id != 8557 OR self_reported_population_source_concept_id != 1586145)) -- ct only --
OR (self_reported_category_source_value = 'WhatRaceEthnicity_GeneralizedMultPopulations' AND (self_reported_category_concept_id != 2000000008 OR self_reported_category_source_concept_id != 2000000008))
OR (self_reported_category_source_value = 'WhatRaceEthnicity_GeneralizedPopulation' AND (self_reported_category_concept_id != 2000000001 OR self_reported_category_source_concept_id != 2000000001))
OR (self_reported_category_source_value = 'WhatRaceEthnicity_Black' AND (self_reported_category_concept_id != 8516 OR self_reported_category_source_concept_id != 1586143))
OR (self_reported_category_source_value = 'WhatRaceEthnicity_White' AND (self_reported_category_concept_id != 8527 OR self_reported_category_source_concept_id != 1586146))
OR (self_reported_category_source_value = 'WhatRaceEthnicity_Asian' AND (self_reported_category_concept_id != 8515 OR self_reported_category_source_concept_id != 1586142))
OR (self_reported_category_source_value = 'WhatRaceEthnicity_Hispanic' AND (self_reported_category_concept_id != 1586147 OR self_reported_category_source_concept_id != 1586147))
OR (self_reported_category_source_value = 'PMI_PreferNotToAnswer' AND (self_reported_category_concept_id != 1177221 OR self_reported_category_source_concept_id != 903079))
OR (self_reported_category_source_value = 'PMI_Skip' AND (self_reported_category_concept_id != 903096 OR self_reported_category_source_concept_id != 903096))
OR (self_reported_category_source_value = 'WhatRaceEthnicity_RaceEthnicityNoneOfThese' AND (self_reported_category_concept_id != 45882607 OR self_reported_category_source_concept_id != 1586148))
OR (self_reported_category_source_value = 'WhatRaceEthnicity_AIAN' AND (self_reported_category_concept_id != 8657 OR self_reported_category_source_concept_id != 1586141)) -- ct only --
OR (self_reported_category_source_value = 'WhatRaceEthnicity_MENA' AND (self_reported_category_concept_id != 38003615 OR self_reported_category_source_concept_id != 1586144)) -- ct only --
OR (self_reported_category_source_value = 'WhatRaceEthnicity_NHPI' AND (self_reported_category_concept_id != 8557 OR self_reported_category_source_concept_id != 1586145)) -- ct only --
ORDER BY 1,2
""")
Original file line number Diff line number Diff line change
@@ -16,9 +16,9 @@
sex_at_birth_source_concept_id: value_source_concept_id in observation where observation_source_concept_id = 1585845
sex_at_birth_source_value: concept_code in the concept table where joining from observation where
observation_source_concept_id = 1585845
self_reported_population_concept_id: value_as_concept_id in observation where observation_source_concept_id = 1586140
self_reported_population_source_concept_id: value_source_concept_id in observation where observation_source_concept_id = 1586140
self_reported_population_source_value: concept_code in the concept table where joining from observation where
self_reported_category_concept_id: value_as_concept_id in observation where observation_source_concept_id = 1586140
self_reported_category_source_concept_id: value_source_concept_id in observation where observation_source_concept_id = 1586140
self_reported_category_source_value: concept_code in the concept table where joining from observation where
observation_source_concept_id = 1586140
"""
import logging
@@ -40,9 +40,9 @@
t.sex_at_birth_concept_id = COALESCE(os.value_as_concept_id, 0),
t.sex_at_birth_source_concept_id = COALESCE(os.value_source_concept_id, 0),
t.sex_at_birth_source_value = COALESCE(sc.concept_code, 'No matching concept'),
t.self_reported_population_concept_id = COALESCE(srp.value_as_concept_id, 0),
t.self_reported_population_source_concept_id = COALESCE(srp.value_source_concept_id, 0),
t.self_reported_population_source_value = COALESCE(srp.value_source_value, 'No matching concept')
t.self_reported_category_concept_id = COALESCE(srp.value_as_concept_id, 0),
t.self_reported_category_source_concept_id = COALESCE(srp.value_source_concept_id, 0),
t.self_reported_category_source_value = COALESCE(srp.value_source_value, 'No matching concept')
FROM
`{{project}}.{{dataset}}.person` p
LEFT JOIN
Original file line number Diff line number Diff line change
@@ -43,19 +43,19 @@
},
{
"type": "integer",
"name": "self_reported_population_concept_id",
"name": "self_reported_category_concept_id",
"mode": "nullable",
"description": "[All of Us OMOP extension] A foreign key to the participant's self-reported population concept."
},
{
"type": "integer",
"name": "self_reported_population_source_concept_id",
"name": "self_reported_category_source_concept_id",
"mode": "nullable",
"description": "[All of Us OMOP extension] A foreign key to the participant's self-reported population source concept."
},
{
"type": "string",
"name": "self_reported_population_source_value",
"name": "self_reported_category_source_value",
"mode": "nullable",
"description": "[All of Us OMOP extension] The source code for the participant's self reported population."
}
12 changes: 6 additions & 6 deletions data_steward/tools/recreate_person.py
Original file line number Diff line number Diff line change
@@ -22,11 +22,11 @@
OPTIONS(description="[All of Us OMOP extension] A foreign key to the biological sex at birth source concept."),
ADD COLUMN IF NOT EXISTS sex_at_birth_source_value STRING
OPTIONS(description="[All of Us OMOP extension] The source code for the biological sex at birth."),
ADD COLUMN IF NOT EXISTS self_reported_population_concept_id INT64
ADD COLUMN IF NOT EXISTS self_reported_category_concept_id INT64
OPTIONS(description="[All of Us OMOP extension] A foreign key to the participant's self-reported population concept."),
ADD COLUMN IF NOT EXISTS self_reported_population_source_concept_id INT64
ADD COLUMN IF NOT EXISTS self_reported_category_source_concept_id INT64
OPTIONS(description="[All of Us OMOP extension] A foreign key to the participant's self-reported population source concept."),
ADD COLUMN IF NOT EXISTS self_reported_population_source_value STRING
ADD COLUMN IF NOT EXISTS self_reported_category_source_value STRING
OPTIONS(description="[All of Us OMOP extension] The source code for the participant's self reported population.")
""")

@@ -38,9 +38,9 @@
sex_at_birth_concept_id = ext.sex_at_birth_concept_id,
sex_at_birth_source_concept_id = ext.sex_at_birth_source_concept_id,
sex_at_birth_source_value = ext.sex_at_birth_source_value,
self_reported_population_concept_id = ext.self_reported_population_concept_id,
self_reported_population_source_concept_id = ext.self_reported_population_source_concept_id,
self_reported_population_source_value = ext.self_reported_population_source_value
self_reported_category_concept_id = ext.self_reported_category_concept_id,
self_reported_category_source_concept_id = ext.self_reported_category_source_concept_id,
self_reported_category_source_value = ext.self_reported_category_source_value
FROM
`{{person_ext.project}}.{{person_ext.dataset_id}}.{{person_ext.table_id}}` ext
WHERE p.person_id = ext.person_id