Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(Study View) Cancer studies chart filter refactoring #5031

Draft
wants to merge 49 commits into
base: clickhouse
Choose a base branch
from
Draft
Changes from 1 commit
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
4901c09
add clickhouse mode
alisman Jun 29, 2023
25db97b
trigger precommit hooks (#4926)
uklineale Jun 18, 2024
5084fff
add new endpoints and use subdomain
alisman Jun 24, 2024
5033e09
conditionally point clickhouse at secondary subdomain of beta
alisman Jun 25, 2024
cba5de7
Add clickhouse endpoint validation tool
alisman Jul 15, 2024
42d7143
Adjust patient treatments
alisman Aug 1, 2024
8c6baf1
Add Sample Treatments endpoint
haynescd Aug 6, 2024
888c586
Merge pull request #4956 from alisman/clickhousePatient
haynescd Aug 6, 2024
2a74c4e
add stuff
alisman Aug 13, 2024
6e3ec5a
Improve validation
alisman Aug 13, 2024
3ec5d87
Improve validation and use CH endpoints on all
alisman Aug 14, 2024
9d8cee8
Fix sorting and improve console report
alisman Aug 15, 2024
9183de0
Clickhouse tests (#4970)
alisman Aug 22, 2024
01d5883
Clickhouse fix (#4971)
alisman Aug 22, 2024
69dde84
fix test maker filterUrl (#4972)
alisman Aug 22, 2024
8e59189
more fixes for test maker (#4973)
alisman Aug 22, 2024
b94ce4d
Clickhouse test generation fix (#4975)
alisman Aug 22, 2024
44aef62
Fix show/hide button
alisman Aug 22, 2024
00a695d
Add stop listening button
alisman Aug 22, 2024
2c97c4c
Improve validator logging
alisman Aug 23, 2024
fea9806
add specs for clinical data filters (#4979)
onursumer Aug 23, 2024
d524c2c
Adding tests for genomic data bin counts filters and generic assay da…
dippindots Aug 23, 2024
281a44c
Add tests for sample lists counts and custom data filters (#4981)
gblaih Aug 27, 2024
ea123f5
test spec (#4978)
fuzhaoyuan Aug 28, 2024
643d153
Clickhouse (#4993)
alisman Sep 6, 2024
074cbaa
Enhance grouping of api test logging
alisman Sep 6, 2024
cecc3a1
Remove errant log
alisman Sep 6, 2024
0960351
Remove errant log
alisman Sep 6, 2024
6fca9f4
Fix loading of test json
alisman Sep 6, 2024
795ceb8
Update watchTests script to use env var for cbio path
haynescd Sep 6, 2024
1b50de4
introduce delete fields mechanism to overcome non-important validatio…
alisman Sep 11, 2024
1775e66
add caselist list delete field
alisman Sep 11, 2024
34ac9df
Uncomment validation
alisman Sep 13, 2024
5a93252
Improvements to clickhouse validation (#5001)
alisman Sep 17, 2024
4a71a05
Add genomic data endpoints
alisman Sep 17, 2024
93cb333
Add support to test new Treatment Endpoints (#5002)
haynescd Sep 17, 2024
8d56559
force sort of object keys to make test pass
alisman Sep 18, 2024
6b95f15
remove clinical data tab
alisman Sep 19, 2024
1d98489
clone data for compare counts
alisman Sep 20, 2024
027ce29
Enhance and fix the api test infrastructure, add ability to run tests…
alisman Sep 27, 2024
6884d03
Assert response (#5014)
alisman Oct 7, 2024
fc4bdd0
Add studyId to validation sorting for filtered samples (#5017)
alisman Oct 7, 2024
0cd3a54
Validate axio (#5021)
alisman Oct 15, 2024
9aa1378
add supress behavior to public.js (#5024)
alisman Oct 18, 2024
c9f1be7
Remove hardcoded filter
alisman Oct 18, 2024
dcfd297
Do not let clickhouse points at secondary subdomain of beta (#5027)
dippindots Oct 22, 2024
b03c0f2
cancer study chart filter refactoring
dippindots Oct 22, 2024
540c12f
Fix default charts get changed when selecting smaller study set issue
dippindots Oct 30, 2024
bf6faf3
revert molecularProfiles to use queriedPhysicalStudyIds
onursumer Nov 14, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add supress behavior to public.js (#5024)
fix provision of column-store body
add public.js to validate data dog csv
  • Loading branch information
alisman authored Oct 18, 2024
commit 9aa1378a20ac3b3c37ede835141b10c2345ad79a
99 changes: 79 additions & 20 deletions api-e2e/clickhouse.js
Original file line number Diff line number Diff line change
@@ -10,27 +10,43 @@ const client = createClient({
request_timeout: 600000,
});

async function main() {
async function mainInsert() {
const studies = await getStudies();

const entities = await getEntities();

let profiles = await getProfiles();

const then = Date.now();

for (const study of studies) {
console.log(
`inserting ${study.cancer_study_id} ${study.cancer_study_identifier}`
);
//profiles = profiles.slice(0,1);

//console.log(profiles);

for (const profile of profiles) {
console.log(`inserting ${profile.stable_id}`);
try {
await insertStudy(study.cancer_study_id);
await insertProfile(profile.genetic_profile_id);
} catch (ex) {
console.log(
`ERROR inserting ${study.cancer_study_id} ${study.cancer_study_identifier}`
);
console.log(`ERROR inserting ${profile.stable_id}`);
console.log(ex);
}
}

// for (const study of studies) {
// console.log(
// `inserting ${study.cancer_study_id} ${study.cancer_study_identifier}`
// );
// try {
// await insertStudy(study.cancer_study_id);
// } catch (ex) {
// console.log(
// `ERROR inserting ${study.cancer_study_id} ${study.cancer_study_identifier}`
// );
// console.log(ex);
// }
// }

// for (const entity of entities) {
// try {
// await insertProfile(2071, entity.genetic_entity_id);
@@ -47,6 +63,10 @@ async function main() {
//console.log(result);
}

async function mainPatient() {
getPatientIds();
}

async function insertStudy(cancer_study_id) {
const resultSet = await client.query({
query: queryByStudies({ cancer_study_id }),
@@ -58,9 +78,22 @@ async function insertStudy(cancer_study_id) {
return dataset;
}

async function insertProfile(genetic_profile_id, genetic_entity_id) {
async function insertProfile(genetic_profile_id) {
const resultSet = await client.query({
query: queryByProfiles({ genetic_profile_id }),
format: 'JSONEachRow',
});

const dataset = await resultSet.json(); // or `row.text` to avoid parsing JSON

return dataset;
}

async function getProfiles() {
const resultSet = await client.query({
query: queryByProfiles({ genetic_profile_id, genetic_entity_id }),
query: `SELECT * FROM genetic_profile
WHERE genetic_profile.genetic_alteration_type NOT IN ('GENERIC_ASSAY', 'MUTATION_EXTENDED', 'STRUCTURAL_VARIANT')
`,
format: 'JSONEachRow',
});

@@ -81,6 +114,33 @@ async function getEntities() {
return dataset;
}

async function getPatientIds() {
const resultSet = await client.query({
query: `SELECT p.stable_id, cs.cancer_study_identifier FROM patient p
JOIN cancer_study cs ON cs.cancer_study_id = p.cancer_study_id

`,
format: 'JSONEachRow',
});

const dataset = await resultSet.json(); // or `row.text` to avoid parsing JSON

const grouped = _.groupBy(
dataset,
p =>
`${p.stable_id.replace(
/-/g,
''
)}|${p.cancer_study_identifier.toUpperCase()}`
);

console.log(_.values(grouped).length);

console.log(_.values(grouped).filter(a => a.length > 1));

//return dataset;
}

async function getStudies() {
const resultSet = await client.query({
query: 'select * from cancer_study',
@@ -94,7 +154,7 @@ async function getStudies() {

const queryByStudies = _.template(
`
INSERT INTO TABLE genetic_alteration_derived2
INSERT INTO TABLE genetic_alteration_derivedFIXED
SELECT
sample_unique_id,
cancer_study_identifier,
@@ -111,8 +171,8 @@ FROM
(SELECT
g.hugo_gene_symbol AS hugo_gene_symbol,
gp.stable_id as stable_id,
arrayMap(x -> (x = '' ? NULL : x), splitByString(',', assumeNotNull(trim(trailing ',' from ga.values)))) AS alteration_value,
arrayMap(x -> (x = '' ? NULL : toInt32(x)), splitByString(',', assumeNotNull(trim(trailing ',' from gps.ordered_sample_list)))) AS sample_id
arrayMap(x -> (x = '' ? NULL : x), splitByString(',', assumeNotNull(substring(ga.values, 1, length(ga.values) - 1)))) AS alteration_value,
arrayMap(x -> (x = '' ? NULL : toInt32(x)), splitByString(',', assumeNotNull(substring(gps.ordered_sample_list, 1, length(gps.ordered_sample_list) - 1)))) AS sample_id
FROM
genetic_profile gp
JOIN genetic_profile_samples gps ON gp.genetic_profile_id = gps.genetic_profile_id
@@ -131,7 +191,7 @@ FROM

const queryByProfiles = _.template(
`
INSERT INTO TABLE genetic_alteration_derived2
INSERT INTO TABLE genetic_alteration_derivedFIXED
SELECT
sample_unique_id,
cancer_study_identifier,
@@ -148,16 +208,15 @@ FROM
(SELECT
g.hugo_gene_symbol AS hugo_gene_symbol,
gp.stable_id as stable_id,
arrayMap(x -> (x = '' ? NULL : x), splitByString(',', assumeNotNull(trim(trailing ',' from ga.values)))) AS alteration_value,
arrayMap(x -> (x = '' ? NULL : toInt32(x)), splitByString(',', assumeNotNull(trim(trailing ',' from gps.ordered_sample_list)))) AS sample_id
arrayMap(x -> (x = '' ? NULL : x), splitByString(',', assumeNotNull(substring(ga.values, 1, length(ga.values) - 1)))) AS alteration_value,
arrayMap(x -> (x = '' ? NULL : toInt32(x)), splitByString(',', assumeNotNull(substring(gps.ordered_sample_list, 1, length(gps.ordered_sample_list) - 1)))) AS sample_id
FROM
genetic_profile gp
JOIN genetic_profile_samples gps ON gp.genetic_profile_id = gps.genetic_profile_id
JOIN genetic_alteration ga ON gp.genetic_profile_id = ga.genetic_profile_id
JOIN gene g ON ga.genetic_entity_id = g.genetic_entity_id
WHERE
ga.genetic_profile_id=<%= genetic_profile_id %> AND
ga.genetic_entity_id=<%= genetic_entity_id %>
ga.genetic_profile_id='<%= genetic_profile_id %>'
AND
gp.genetic_alteration_type NOT IN ('GENERIC_ASSAY', 'MUTATION_EXTENDED', 'STRUCTURAL_VARIANT')
)
@@ -167,4 +226,4 @@ FROM
`
);

main();
mainInsert();
Loading