Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DC-3632] Remove irrelevant bq_utils import statements #1841

Merged
merged 6 commits into from
Jan 22, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
[DC-3632] Remove five cases of unused imports
Michael Schmidt committed Jan 3, 2024
commit b93180310bb9e4feef45a907ad9e111da0738035
25 changes: 12 additions & 13 deletions data_steward/analytics/cdr_ops/ad_hoc_analyses/cdr_person_id.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# # Person
# ## Person ID validation

import bq_utils
import utils.bq
from notebooks.parameters import RDR_DATASET_ID, EHR_DATASET_ID

@@ -13,7 +12,7 @@
hpo_ids = utils.bq.query("""
SELECT REPLACE(table_id, '_person', '') AS hpo_id
FROM `{EHR_DATASET_ID}.__TABLES__`
WHERE table_id LIKE '%person'
WHERE table_id LIKE '%person'
AND table_id NOT LIKE '%unioned_ehr_%' AND table_id NOT LIKE '\\\_%'
""".format(EHR_DATASET_ID=EHR_DATASET_ID)).hpo_id.tolist()

@@ -31,7 +30,7 @@
(SELECT COUNT(1) AS n
FROM {EHR_DATASET_ID}.{h}_person e
WHERE NOT EXISTS(
SELECT 1
SELECT 1
FROM {RDR_DATASET_ID}.person r
WHERE r.person_id = e.person_id)) not_in_rdr
ON TRUE
@@ -63,31 +62,31 @@
RDR_EHR_NAME_MATCH_QUERY = '''
WITH
rdr_first_name AS
(SELECT DISTINCT person_id,
FIRST_VALUE(value_as_string)
(SELECT DISTINCT person_id,
FIRST_VALUE(value_as_string)
OVER (PARTITION BY person_id, observation_source_value ORDER BY value_as_string) val
FROM {RDR_DATASET_ID}.observation
WHERE observation_source_value = 'PIIName_First'),

rdr_last_name AS
(SELECT DISTINCT person_id,
FIRST_VALUE(value_as_string)
(SELECT DISTINCT person_id,
FIRST_VALUE(value_as_string)
OVER (PARTITION BY person_id, observation_source_value ORDER BY value_as_string) val
FROM {RDR_DATASET_ID}.observation
WHERE observation_source_value = 'PIIName_Last'),

rdr_name AS
(SELECT
(SELECT
f.person_id person_id,
f.val first_name,
f.val first_name,
l.val last_name
FROM rdr_first_name f JOIN rdr_last_name l USING (person_id))

SELECT
'{HPO_ID}' hpo_id,
rdr.person_id rdr_person_id,
rdr.first_name rdr_first_name,
rdr.last_name rdr_last_name,
rdr.person_id rdr_person_id,
rdr.first_name rdr_first_name,
rdr.last_name rdr_last_name,
pii.person_id pii_person_id,
pii.first_name pii_first_name,
pii.middle_name pii_middle_name,
@@ -97,7 +96,7 @@
FROM rdr_name rdr
JOIN `{EHR_DATASET_ID}.{HPO_ID}_pii_name` pii
ON pii.first_name = rdr.first_name
AND pii.last_name = rdr.last_name
AND pii.last_name = rdr.last_name
LEFT JOIN `{EHR_DATASET_ID}.{HPO_ID}_person` p
ON pii.person_id = p.person_id
'''
29 changes: 14 additions & 15 deletions data_steward/analytics/cdr_ops/ad_hoc_analyses/cohort_testing.py
Original file line number Diff line number Diff line change
@@ -21,7 +21,6 @@
# - Record count (condition_occurrence)
#
# - We want to determine if these fluctations are potentially caused by OMOP vocabulary issues. If this is the case, we should be able to determine similar trends in AoU data.
import bq_utils
import utils.bq
from notebooks import parameters

@@ -42,8 +41,8 @@
q4_2018_hypo_obs_card_query = """
SELECT
DISTINCT
co.condition_concept_id, c.concept_name, COUNT(DISTINCT p.person_id) AS num_persons,
COUNT(DISTINCT co.condition_occurrence_id) as num_records,
co.condition_concept_id, c.concept_name, COUNT(DISTINCT p.person_id) AS num_persons,
COUNT(DISTINCT co.condition_occurrence_id) as num_records,
ROUND(COUNT(DISTINCT co.condition_occurrence_id) / COUNT(DISTINCT p.person_id), 2) as records_per_capita

FROM
@@ -190,8 +189,8 @@
q2_2019_hypo_obs_card_query = """
SELECT
DISTINCT
co.condition_concept_id, c.concept_name, COUNT(DISTINCT p.person_id) AS num_persons,
COUNT(DISTINCT co.condition_occurrence_id) as num_records,
co.condition_concept_id, c.concept_name, COUNT(DISTINCT p.person_id) AS num_persons,
COUNT(DISTINCT co.condition_occurrence_id) as num_records,
ROUND(COUNT(DISTINCT co.condition_occurrence_id) / COUNT(DISTINCT p.person_id), 2) as records_per_capita

FROM
@@ -339,14 +338,14 @@
SELECT
DISTINCT
q4.*, q2.*, (SUM(q2.num_persons) - SUM(q4.old_num_persons)) as person_difference,
(SUM(q2.num_records) - SUM(q4.old_num_records)) as record_difference
(SUM(q2.num_records) - SUM(q4.old_num_records)) as record_difference
FROM

(SELECT
DISTINCT
co.condition_concept_id as old_condition_concept_id, c.concept_name as old_concept_name,
COUNT(DISTINCT p.person_id) AS old_num_persons,
COUNT(DISTINCT co.condition_occurrence_id) as old_num_records,
co.condition_concept_id as old_condition_concept_id, c.concept_name as old_concept_name,
COUNT(DISTINCT p.person_id) AS old_num_persons,
COUNT(DISTINCT co.condition_occurrence_id) as old_num_records,
ROUND(COUNT(DISTINCT co.condition_occurrence_id) / COUNT(DISTINCT p.person_id), 2) as old_records_per_capita

FROM
@@ -378,13 +377,13 @@

GROUP BY 1, 2
ORDER BY old_num_persons DESC) q4

LEFT JOIN

(SELECT
DISTINCT
co.condition_concept_id, c.concept_name, COUNT(DISTINCT p.person_id) AS num_persons,
COUNT(DISTINCT co.condition_occurrence_id) as num_records,
co.condition_concept_id, c.concept_name, COUNT(DISTINCT p.person_id) AS num_persons,
COUNT(DISTINCT co.condition_occurrence_id) as num_records,
ROUND(COUNT(DISTINCT co.condition_occurrence_id) / COUNT(DISTINCT p.person_id), 2) as records_per_capita

FROM
@@ -416,10 +415,10 @@

GROUP BY 1, 2
ORDER BY num_persons DESC) q2

ON
q4.old_condition_concept_id = q2.condition_concept_id

GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
ORDER BY old_num_persons DESC

15 changes: 7 additions & 8 deletions data_steward/analytics/cdr_ops/ad_hoc_analyses/deid_race.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-
# +
import bq_utils
import utils.bq
from notebooks import render, parameters
import pandas as pd
@@ -27,19 +26,19 @@

MULTIRACIAL_DIST_QUERY = """
WITH race_combo AS
(SELECT o.person_id,
o.questionnaire_response_id,
(SELECT o.person_id,
o.questionnaire_response_id,
STRING_AGG(REPLACE(c.concept_code, 'WhatRaceEthnicity_', ''), ' ' ORDER BY value_source_value) selected_races
FROM {DATASET}.observation o
JOIN {VOCAB}.concept c ON o.value_source_concept_id = c.concept_id
JOIN {VOCAB}.concept c ON o.value_source_concept_id = c.concept_id
WHERE observation_source_concept_id = 1586140
GROUP BY person_id, questionnaire_response_id)
SELECT
selected_races,

SELECT
selected_races,
(LENGTH(selected_races) - LENGTH(REPLACE(selected_races, ' ', '')) + 1) AS selected_count,
COUNT(DISTINCT person_id) row_count
FROM race_combo
FROM race_combo
GROUP BY selected_races
ORDER BY selected_count, selected_races
"""
Original file line number Diff line number Diff line change
@@ -16,7 +16,6 @@

# +
import datetime
import bq_utils
import utils.bq
from notebooks.parameters import RDR_PROJECT_ID, RDR_DATASET_ID, EHR_DATASET_ID

@@ -65,17 +64,17 @@
# ## EHR Site Submission Counts

utils.bq.query('''
SELECT
SELECT
l.Org_ID AS org_id,
l.HPO_ID AS hpo_id,
l.Site_Name AS site_name,
table_id AS table_id,
table_id AS table_id,
row_count AS row_count
FROM `{EHR_DATASET_ID}.__TABLES__` AS t
JOIN `lookup_tables.hpo_site_id_mappings` AS l
JOIN `lookup_tables.hpo_site_id_mappings` AS l
ON STARTS_WITH(table_id,lower(l.HPO_ID))=true
WHERE table_id like '%person%' AND
NOT(table_id like '%unioned_ehr_%') AND
NOT(table_id like '%unioned_ehr_%') AND
l.hpo_id <> ''
ORDER BY Display_Order
'''.format(EHR_DATASET_ID=EHR_DATASET_ID))
@@ -84,7 +83,7 @@
hpo_ids = utils.bq.query("""
SELECT REPLACE(table_id, '_person', '') AS hpo_id
FROM `{EHR_DATASET_ID}.__TABLES__`
WHERE table_id LIKE '%person'
WHERE table_id LIKE '%person'
AND table_id NOT LIKE '%unioned_ehr_%' AND table_id NOT LIKE '\\\_%'
""".format(EHR_DATASET_ID=EHR_DATASET_ID)).hpo_id.tolist()