From 79d36e73f1aeff6d0ab4697e77aa210752772ad6 Mon Sep 17 00:00:00 2001 From: Charles Haynes <33608920+haynescd@users.noreply.github.com> Date: Wed, 26 Jun 2024 15:15:36 -0400 Subject: [PATCH] Consolidate SQL files used to create CH Tables (#10867) --- pom.xml | 4 - .../db-scripts/clickhouse/clickhouse.sql | 251 ++++++++++++++++-- .../clickhouse/clickhouse_migration.sql | 144 ---------- .../db-scripts/clickhouse/derived_tables.sql | 47 ---- .../clickhouse/materialized_views.sql | 63 ----- .../resources/db-scripts/clickhouse/views.sql | 27 -- .../mybatisclickhouse/StudyViewMapper.xml | 10 +- .../AbstractTestcontainers.java | 8 +- 8 files changed, 234 insertions(+), 320 deletions(-) delete mode 100644 src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql delete mode 100644 src/main/resources/db-scripts/clickhouse/derived_tables.sql delete mode 100644 src/main/resources/db-scripts/clickhouse/views.sql diff --git a/pom.xml b/pom.xml index a1ff655502c..33883b323d0 100644 --- a/pom.xml +++ b/pom.xml @@ -448,8 +448,6 @@ cgds.sql migration.sql clickhouse/clickhouse.sql - clickhouse/clickhouse_views.sql - clickhouse/clickhouse_migration.sql clickhouse/materialized_views.sql @@ -460,8 +458,6 @@ false clickhouse/clickhouse.sql - clickhouse/views.sql - clickhouse/clickhouse_migration.sql clickhouse/materialized_views.sql diff --git a/src/main/resources/db-scripts/clickhouse/clickhouse.sql b/src/main/resources/db-scripts/clickhouse/clickhouse.sql index 4e9897566bd..fcbe2fe1d01 100644 --- a/src/main/resources/db-scripts/clickhouse/clickhouse.sql +++ b/src/main/resources/db-scripts/clickhouse/clickhouse.sql @@ -1,28 +1,11 @@ +DROP TABLE IF EXISTS sample_to_gene_panel_derived; +DROP TABLE IF EXISTS gene_panel_to_gene_derived; +DROP TABLE IF EXISTS sample_derived; DROP TABLE IF EXISTS genomic_event_derived; -DROP TABLE IF EXISTS sample_to_gene_panel; -DROP TABLE IF EXISTS gene_panel_to_gene; +DROP TABLE IF EXISTS clinical_data_derived; -CREATE TABLE IF NOT EXISTS genomic_event_derived -( - sample_unique_id String, - hugo_gene_symbol String, - gene_panel_stable_id LowCardinality(String), - cancer_study_identifier LowCardinality(String), - genetic_profile_stable_id LowCardinality(String), - variant_type LowCardinality(String), - mutation_variant String, - mutation_type LowCardinality(String), - mutation_status LowCardinality(String), - driver_filter LowCardinality(String), - driver_tiers_filter LowCardinality(String), - cna_alteration Nullable(Int8), - cna_cytoband String, - sv_event_info String, - patient_unique_id String -) ENGINE = MergeTree -ORDER BY ( variant_type, hugo_gene_symbol, genetic_profile_stable_id, sample_unique_id); -CREATE TABLE sample_to_gene_panel +CREATE TABLE sample_to_gene_panel_derived ( sample_unique_id String, alteration_type LowCardinality(String), @@ -31,13 +14,40 @@ CREATE TABLE sample_to_gene_panel ) ENGINE = MergeTree() ORDER BY (gene_panel_id, alteration_type, sample_unique_id); -CREATE TABLE gene_panel_to_gene +INSERT INTO sample_to_gene_panel_derived +SELECT + concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id, + genetic_alteration_type AS alteration_type, + -- If a mutation is found in a gene that is not in a gene panel we assume Whole Exome Sequencing WES + ifnull(gene_panel.stable_id, 'WES') AS gene_panel_id, + cs.cancer_study_identifier AS cancer_study_identifier +FROM sample_profile sp + INNER JOIN genetic_profile gp ON sample_profile.genetic_profile_id = gp.genetic_profile_id + LEFT JOIN gene_panel ON sp.panel_id = gene_panel.internal_id + INNER JOIN sample ON sp.sample_id = sample.internal_id + INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id; + +CREATE TABLE gene_panel_to_gene_derived ( gene_panel_id LowCardinality(String), gene String ) ENGINE = MergeTree() ORDER BY (gene_panel_id); +INSERT INTO gene_panel_to_gene_derived +SELECT + gp.stable_id AS gene_panel_id, + g.hugo_gene_symbol AS gene +FROM gene_panel gp + INNER JOIN gene_panel_list gpl ON gp.internal_id = gpl.internal_id + INNER JOIN gene g ON g.entrez_gene_id = gpl.gene_id +UNION ALL +SELECT + 'WES' AS gene_panel_id, + gene.hugo_gene_symbol AS gene +FROM gene +WHERE gene.entrez_gene_id > 0; + CREATE TABLE sample_derived ( sample_unique_id String, @@ -50,4 +60,197 @@ CREATE TABLE sample_derived internal_id Int ) ENGINE = MergeTree - ORDER BY (cancer_study_identifier, sample_unique_id); \ No newline at end of file + ORDER BY (cancer_study_identifier, sample_unique_id); + +INSERT INTO sample_derived +SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id, + base64Encode(sample.stable_id) AS sample_unique_id_base64, + sample.stable_id AS sample_stable_id, + concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id, + p.stable_id AS patient_stable_id, + base64Encode(p.stable_id) AS patient_unique_id_base64, + cs.cancer_study_identifier AS cancer_study_identifier, + sample.internal_id AS internal_id +FROM sample + INNER JOIN patient AS p ON sample.patient_id = p.internal_id + INNER JOIN cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id; + + +CREATE TABLE IF NOT EXISTS genomic_event_derived +( + sample_unique_id String, + hugo_gene_symbol String, + gene_panel_stable_id LowCardinality(String), + cancer_study_identifier LowCardinality(String), + genetic_profile_stable_id LowCardinality(String), + variant_type LowCardinality(String), + mutation_variant String, + mutation_type LowCardinality(String), + mutation_status LowCardinality(String), + driver_filter LowCardinality(String), + driver_tiers_filter LowCardinality(String), + cna_alteration Nullable(Int8), + cna_cytoband String, + sv_event_info String, + patient_unique_id String +) ENGINE = MergeTree + ORDER BY ( variant_type, hugo_gene_symbol, genetic_profile_stable_id, sample_unique_id); + +INSERT INTO genomic_event_derived +-- Insert Mutations +SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id, + gene.hugo_gene_symbol AS hugo_gene_symbol, + ifNull(gp.stable_id, 'WES') AS gene_panel_stable_id, + cs.cancer_study_identifier AS cancer_study_identifier, + g.stable_id AS genetic_profile_stable_id, + 'mutation' AS variant_type, + me.protein_change AS mutation_variant, + me.mutation_type AS mutation_type, + mutation.mutation_status AS mutation_status, + 'NA' AS driver_filter, + 'NA' AS drivet_tiers_filter, + NULL AS cna_alteration, + '' AS cna_cytoband, + '' AS sv_event_info, + concat(cs.cancer_study_identifier, '_', patient.stable_id) AS patient_unique_id +FROM mutation + INNER JOIN mutation_event AS me ON mutation.mutation_event_id = me.mutation_event_id + INNER JOIN sample_profile sp + ON mutation.sample_id = sp.sample_id AND mutation.genetic_profile_id = sp.genetic_profile_id + LEFT JOIN gene_panel gp ON sp.panel_id = gp.internal_id + LEFT JOIN genetic_profile g ON sp.genetic_profile_id = g.genetic_profile_id + INNER JOIN cancer_study cs ON g.cancer_study_id = cs.cancer_study_id + INNER JOIN sample ON mutation.sample_id = sample.internal_id + INNER JOIN patient on sample.patient_id = patient.internal_id + LEFT JOIN gene ON mutation.entrez_gene_id = gene.entrez_gene_id +UNION ALL +-- Insert CNA Genes +SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id, + gene.hugo_gene_symbol AS hugo_gene_symbol, + ifNull(gp.stable_id, 'WES') AS gene_panel_stable_id, + cs.cancer_study_identifier AS cancer_study_identifier, + g.stable_id AS genetic_profile_stable_id, + 'cna' AS variant_type, + 'NA' AS mutation_variant, + 'NA' AS mutation_type, + 'NA' AS mutation_status, + 'NA' AS driver_filter, + 'NA' AS drivet_tiers_filter, + ce.alteration AS cna_alteration, + rgg.cytoband AS cna_cytoband, + '' AS sv_event_info, + concat(cs.cancer_study_identifier, '_', patient.stable_id) AS patient_unique_id +FROM cna_event ce + INNER JOIN sample_cna_event sce ON ce.cna_event_id = sce.cna_event_id + INNER JOIN sample_profile sp ON sce.sample_id = sp.sample_id AND sce.genetic_profile_id = sp.genetic_profile_id + LEFT JOIN gene_panel gp ON sp.panel_id = gp.internal_id + INNER JOIN genetic_profile g ON sp.genetic_profile_id = g.genetic_profile_id + INNER JOIN cancer_study cs ON g.cancer_study_id = cs.cancer_study_id + INNER JOIN sample ON sce.sample_id = sample.internal_id + INNER JOIN patient on sample.patient_id = patient.internal_id + INNER JOIN gene ON ce.entrez_gene_id = gene.entrez_gene_id + INNER JOIN reference_genome_gene rgg ON rgg.entrez_gene_id = ce.entrez_gene_id +UNION ALL +-- Insert Structural Variants Site1 +SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) AS sample_unique_id, + gene.hugo_gene_symbol AS hugo_gene_symbol, + ifNull(gene_panel.stable_id, 'WES') AS gene_panel_stable_id, + cs.cancer_study_identifier AS cancer_study_identifier, + gp.stable_id AS genetic_profile_stable_id, + 'structural_variant' AS variant_type, + 'NA' AS mutation_variant, + 'NA' AS mutation_type, + 'NA' AS mutation_status, + 'NA' AS driver_filter, + 'NA' AS drivet_tiers_filter, + NULL AS cna_alteration, + '' AS cna_cytoband, + event_info AS sv_event_info, + concat(cs.cancer_study_identifier, '_', patient.stable_id) AS patient_unique_id +FROM structural_variant sv + INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id + INNER JOIN sample s ON sv.sample_id = s.internal_id + INNER JOIN patient on s.patient_id = patient.internal_id + INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id + INNER JOIN gene ON sv.site1_entrez_gene_id = gene.entrez_gene_id + INNER JOIN sample_profile ON s.internal_id = sample_profile.sample_id AND sample_profile.genetic_profile_id = sv.genetic_profile_id + LEFT JOIN gene_panel ON sample_profile.panel_id = gene_panel.internal_id +UNION ALL +-- Insert Structural Variants Site2 +SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) AS sample_unique_id, + gene.hugo_gene_symbol AS hugo_gene_symbol, + ifNull(gene_panel.stable_id, 'WES') AS gene_panel_stable_id, + cs.cancer_study_identifier AS cancer_study_identifier, + gp.stable_id AS genetic_profile_stable_id, + 'structural_variant' AS variant_type, + 'NA' AS mutation_variant, + 'NA' AS mutation_type, + 'NA' AS mutation_status, + 'NA' AS driver_filter, + 'NA' AS drivet_tiers_filter, + NULL AS cna_alteration, + '' AS cna_cytoband, + event_info AS sv_event_info, + concat(cs.cancer_study_identifier, '_', patient.stable_id) AS patient_unique_id +FROM structural_variant sv + INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id + INNER JOIN sample s ON sv.sample_id = s.internal_id + INNER JOIN patient on s.patient_id = patient.internal_id + INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id + INNER JOIN gene ON sv.site2_entrez_gene_id = gene.entrez_gene_id + INNER JOIN sample_profile ON s.internal_id = sample_profile.sample_id AND sample_profile.genetic_profile_id = sv.genetic_profile_id + LEFT JOIN gene_panel ON sample_profile.panel_id = gene_panel.internal_id +WHERE + sv.site2_entrez_gene_id != sv.site1_entrez_gene_id + OR sv.site1_entrez_gene_id IS NULL; + +CREATE TABLE IF NOT EXISTS clinical_data_derived +( + sample_unique_id String, + patient_unique_id String, + attribute_name LowCardinality(String), + attribute_value String, + cancer_study_identifier LowCardinality(String), + type LowCardinality(String) +) + ENGINE=MergeTree + ORDER BY (type, attribute_name, sample_unique_id); + +-- Insert sample attribute data +INSERT INTO TABLE clinical_data_derived +SELECT sm.sample_unique_id AS sample_unique_id, + sm.patient_unique_id AS patient_unique_id, + cam.attr_id AS attribute_name, + ifNull(csamp.attr_value, '') AS attribute_value, + cs.cancer_study_identifier AS cancer_study_identifier, + 'sample' AS type +FROM sample_derived AS sm + INNER JOIN cancer_study AS cs + ON sm.cancer_study_identifier = cs.cancer_study_identifier + FULL OUTER JOIN clinical_attribute_meta AS cam + ON cs.cancer_study_id = cam.cancer_study_id + FULL OUTER JOIN clinical_sample AS csamp + ON (sm.internal_id = csamp.internal_id) AND (csamp.attr_id = cam.attr_id) +WHERE cam.patient_attribute = 0; + +-- INSERT patient attribute data +INSERT INTO TABLE clinical_data_derived +SELECT '' AS sample_unique_id, + concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id, + cam.attr_id AS attribute_name, + ifNull(clinpat.attr_value, '') AS attribute_value, + cs.cancer_study_identifier AS cancer_study_identifier, + 'patient' AS type +FROM patient AS p + INNER JOIN cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id + FULL OUTER JOIN clinical_attribute_meta AS cam + ON cs.cancer_study_id = cam.cancer_study_id + FULL OUTER JOIN clinical_patient AS clinpat + ON (p.internal_id = clinpat.internal_id) AND (clinpat.attr_id = cam.attr_id) +WHERE cam.patient_attribute = 1; + +OPTIMIZE TABLE sample_to_gene_panel_derived; +OPTIMIZE TABLE gene_panel_to_gene_derived; +OPTIMIZE TABLE sample_derived; +OPTIMIZE TABLE genomic_event_derived; +OPTIMIZE TABLE clinical_data_derived; diff --git a/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql b/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql deleted file mode 100644 index 76223de462d..00000000000 --- a/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql +++ /dev/null @@ -1,144 +0,0 @@ - -INSERT INTO sample_to_gene_panel -SELECT - concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id, - genetic_alteration_type AS alteration_type, - -- If a mutation is found in a gene that is not in a gene panel we assume Whole Exome Sequencing WES - ifnull(gene_panel.stable_id, 'WES') AS gene_panel_id, - cs.cancer_study_identifier AS cancer_study_identifier -FROM sample_profile sp - INNER JOIN genetic_profile gp ON sample_profile.genetic_profile_id = gp.genetic_profile_id - LEFT JOIN gene_panel ON sp.panel_id = gene_panel.internal_id - INNER JOIN sample ON sp.sample_id = sample.internal_id - INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id; - -INSERT INTO gene_panel_to_gene -SELECT - gp.stable_id AS gene_panel_id, - g.hugo_gene_symbol AS gene -FROM gene_panel gp - INNER JOIN gene_panel_list gpl ON gp.internal_id = gpl.internal_id - INNER JOIN gene g ON g.entrez_gene_id = gpl.gene_id -UNION ALL -SELECT - 'WES' AS gene_panel_id, - gene.hugo_gene_symbol AS gene -FROM gene -WHERE gene.entrez_gene_id > 0; - -INSERT INTO genomic_event_derived -SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id, - gene.hugo_gene_symbol AS hugo_gene_symbol, - ifNull(gp.stable_id, 'WES') AS gene_panel_stable_id, - cs.cancer_study_identifier AS cancer_study_identifier, - g.stable_id AS genetic_profile_stable_id, - 'mutation' AS variant_type, - me.protein_change AS mutation_variant, - me.mutation_type AS mutation_type, - mutation.mutation_status AS mutation_status, - 'NA' AS driver_filter, - 'NA' AS drivet_tiers_filter, - NULL AS cna_alteration, - '' AS cna_cytoband, - '' AS sv_event_info, - concat(cs.cancer_study_identifier, '_', patient.stable_id) AS patient_unique_id -FROM mutation - INNER JOIN mutation_event AS me ON mutation.mutation_event_id = me.mutation_event_id - INNER JOIN sample_profile sp - ON mutation.sample_id = sp.sample_id AND mutation.genetic_profile_id = sp.genetic_profile_id - LEFT JOIN gene_panel gp ON sp.panel_id = gp.internal_id - LEFT JOIN genetic_profile g ON sp.genetic_profile_id = g.genetic_profile_id - INNER JOIN cancer_study cs ON g.cancer_study_id = cs.cancer_study_id - INNER JOIN sample ON mutation.sample_id = sample.internal_id - INNER JOIN patient on sample.patient_id = patient.internal_id - LEFT JOIN gene ON mutation.entrez_gene_id = gene.entrez_gene_id -UNION ALL -SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id, - gene.hugo_gene_symbol AS hugo_gene_symbol, - ifNull(gp.stable_id, 'WES') AS gene_panel_stable_id, - cs.cancer_study_identifier AS cancer_study_identifier, - g.stable_id AS genetic_profile_stable_id, - 'cna' AS variant_type, - 'NA' AS mutation_variant, - 'NA' AS mutation_type, - 'NA' AS mutation_status, - 'NA' AS driver_filter, - 'NA' AS drivet_tiers_filter, - ce.alteration AS cna_alteration, - rgg.cytoband AS cna_cytoband, - '' AS sv_event_info, - concat(cs.cancer_study_identifier, '_', patient.stable_id) AS patient_unique_id -FROM cna_event ce - INNER JOIN sample_cna_event sce ON ce.cna_event_id = sce.cna_event_id - INNER JOIN sample_profile sp ON sce.sample_id = sp.sample_id AND sce.genetic_profile_id = sp.genetic_profile_id - LEFT JOIN gene_panel gp ON sp.panel_id = gp.internal_id - INNER JOIN genetic_profile g ON sp.genetic_profile_id = g.genetic_profile_id - INNER JOIN cancer_study cs ON g.cancer_study_id = cs.cancer_study_id - INNER JOIN sample ON sce.sample_id = sample.internal_id - INNER JOIN patient on sample.patient_id = patient.internal_id - INNER JOIN gene ON ce.entrez_gene_id = gene.entrez_gene_id - INNER JOIN reference_genome_gene rgg ON rgg.entrez_gene_id = ce.entrez_gene_id -UNION ALL -SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) AS sample_unique_id, - gene.hugo_gene_symbol AS hugo_gene_symbol, - ifNull(gene_panel.stable_id, 'WES') AS gene_panel_stable_id, - cs.cancer_study_identifier AS cancer_study_identifier, - gp.stable_id AS genetic_profile_stable_id, - 'structural_variant' AS variant_type, - 'NA' AS mutation_variant, - 'NA' AS mutation_type, - 'NA' AS mutation_status, - 'NA' AS driver_filter, - 'NA' AS drivet_tiers_filter, - NULL AS cna_alteration, - '' AS cna_cytoband, - event_info AS sv_event_info, - concat(cs.cancer_study_identifier, '_', patient.stable_id) AS patient_unique_id -FROM structural_variant sv - INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id - INNER JOIN sample s ON sv.sample_id = s.internal_id - INNER JOIN patient on s.patient_id = patient.internal_id - INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id - INNER JOIN gene ON sv.site1_entrez_gene_id = gene.entrez_gene_id - INNER JOIN sample_profile ON s.internal_id = sample_profile.sample_id AND sample_profile.genetic_profile_id = sv.genetic_profile_id - LEFT JOIN gene_panel ON sample_profile.panel_id = gene_panel.internal_id -UNION ALL -SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) AS sample_unique_id, - gene.hugo_gene_symbol AS hugo_gene_symbol, - ifNull(gene_panel.stable_id, 'WES') AS gene_panel_stable_id, - cs.cancer_study_identifier AS cancer_study_identifier, - gp.stable_id AS genetic_profile_stable_id, - 'structural_variant' AS variant_type, - 'NA' AS mutation_variant, - 'NA' AS mutation_type, - 'NA' AS mutation_status, - 'NA' AS driver_filter, - 'NA' AS drivet_tiers_filter, - NULL AS cna_alteration, - '' AS cna_cytoband, - event_info AS sv_event_info, - concat(cs.cancer_study_identifier, '_', patient.stable_id) AS patient_unique_id -FROM structural_variant sv - INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id - INNER JOIN sample s ON sv.sample_id = s.internal_id - INNER JOIN patient on s.patient_id = patient.internal_id - INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id - INNER JOIN gene ON sv.site2_entrez_gene_id = gene.entrez_gene_id - INNER JOIN sample_profile ON s.internal_id = sample_profile.sample_id AND sample_profile.genetic_profile_id = sv.genetic_profile_id - LEFT JOIN gene_panel ON sample_profile.panel_id = gene_panel.internal_id -WHERE - sv.site2_entrez_gene_id != sv.site1_entrez_gene_id - OR sv.site1_entrez_gene_id IS NULL; - -INSERT INTO sample_derived -SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id, - base64Encode(sample.stable_id) AS sample_unique_id_base64, - sample.stable_id AS sample_stable_id, - concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id, - p.stable_id AS patient_stable_id, - base64Encode(p.stable_id) AS patient_unique_id_base64, - cs.cancer_study_identifier AS cancer_study_identifier, - sample.internal_id AS internal_id -FROM sample - INNER JOIN patient AS p ON sample.patient_id = p.internal_id - INNER JOIN cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id \ No newline at end of file diff --git a/src/main/resources/db-scripts/clickhouse/derived_tables.sql b/src/main/resources/db-scripts/clickhouse/derived_tables.sql deleted file mode 100644 index 0e982ed106f..00000000000 --- a/src/main/resources/db-scripts/clickhouse/derived_tables.sql +++ /dev/null @@ -1,47 +0,0 @@ --- clinical_data_derived -DROP TABLE IF EXISTS clinical_data_derived; -CREATE TABLE IF NOT EXISTS clinical_data_derived -( - sample_unique_id String, - patient_unique_id String, - attribute_name String, - attribute_value String, - cancer_study_identifier String, - type String -) - ENGINE=MergeTree - ORDER BY sample_unique_id; - --- Insert sample attribute data -INSERT INTO TABLE clinical_data_derived -SELECT sm.sample_unique_id AS sample_unique_id, - sm.patient_unique_id AS patient_unique_id, - cam.attr_id AS attribute_name, - csamp.attr_value AS attribute_value, - cs.cancer_study_identifier AS cancer_study_identifier, - 'sample' AS type -FROM sample_mv AS sm - INNER JOIN cancer_study AS cs - ON sm.cancer_study_identifier = cs.cancer_study_identifier - FULL OUTER JOIN clinical_attribute_meta AS cam - ON cs.cancer_study_id = cam.cancer_study_id - FULL OUTER JOIN clinical_sample AS csamp - ON (sm.internal_id = csamp.internal_id) AND (csamp.attr_id = cam.attr_id) -WHERE cam.patient_attribute = 0; - --- INSERT patient attribute data -INSERT INTO TABLE clinical_data_derived -SELECT NULL AS sample_unique_id, - concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id, - cam.attr_id AS attribute_name, - clinpat.attr_value AS attribute_value, - cs.cancer_study_identifier AS cancer_study_identifier, - 'patient' AS type -FROM patient AS p - INNER JOIN cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id - FULL OUTER JOIN clinical_attribute_meta AS cam - ON cs.cancer_study_id = cam.cancer_study_id - FULL OUTER JOIN clinical_patient AS clinpat - ON (p.internal_id = clinpat.internal_id) AND (clinpat.attr_id = cam.attr_id) -WHERE cam.patient_attribute = 1; - diff --git a/src/main/resources/db-scripts/clickhouse/materialized_views.sql b/src/main/resources/db-scripts/clickhouse/materialized_views.sql index 8595d6edb4d..0bde005094b 100644 --- a/src/main/resources/db-scripts/clickhouse/materialized_views.sql +++ b/src/main/resources/db-scripts/clickhouse/materialized_views.sql @@ -1,46 +1,6 @@ -DROP TABLE IF EXISTS sample_columnstore; DROP TABLE IF EXISTS sample_list_columnstore; -DROP VIEW IF EXISTS sample_columnstore_mv; DROP VIEW IF EXISTS sample_list_columnstore_mv; --- sample_columnstore -CREATE TABLE IF NOT EXISTS sample_columnstore -( - sample_unique_id VARCHAR(45), - sample_unique_id_base64 VARCHAR(45), - sample_stable_id VARCHAR(45), - patient_unique_id VARCHAR(45), - patient_unique_id_base64 VARCHAR(45), - patient_stable_id VARCHAR(45), - cancer_study_identifier VARCHAR(45) -) - ENGINE = MergeTree - ORDER BY (sample_unique_id, patient_unique_id, cancer_study_identifier); - -INSERT INTO sample_columnstore -SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id, - base64Encode(sample.stable_id) as sample_unique_id_base64, - sample.stable_id as sample_stable_id, - concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, - p.stable_id as patient_stable_id, - base64Encode(p.stable_id) as patient_unique_id_base64, - cs.cancer_study_identifier as cancer_study_identifier -FROM sample - INNER JOIN patient p on sample.patient_id = p.internal_id - INNER JOIN cancer_study cs on p.cancer_study_id = cs.cancer_study_id; - -CREATE MATERIALIZED VIEW sample_columnstore_mv TO sample_columnstore AS -SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id, - sample.stable_id as sample_stable_id, - base64Encode(sample.stable_id) as sample_unique_id_base64, - concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, - p.stable_id as patient_stable_id, - base64Encode(p.stable_id) as patient_unique_id_base64, - cs.cancer_study_identifier as cancer_study_identifier -FROM sample - INNER JOIN patient p on sample.patient_id = p.internal_id - INNER JOIN cancer_study cs on p.cancer_study_id = cs.cancer_study_id; - CREATE TABLE IF NOT EXISTS sample_list_columnstore ( sample_unique_id VARCHAR(45), @@ -70,26 +30,3 @@ FROM sample_list as sl INNER JOIN sample_list_list AS sll on sll.list_id = sl.list_id INNER JOIN sample AS s on s.internal_id = sll.sample_id INNER JOIN cancer_study cs on sl.cancer_study_id = cs.cancer_study_id; - - - --- SAMPLE_MV -DROP VIEW IF EXISTS sample_mv; -CREATE MATERIALIZED VIEW sample_mv - ENGINE = AggregatingMergeTree() - ORDER BY internal_id - SETTINGS allow_nullable_key = 1 - POPULATE -AS - -SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id, - base64Encode(sample.stable_id) AS sample_unique_id_base64, - sample.stable_id AS sample_stable_id, - concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id, - p.stable_id AS patient_stable_id, - base64Encode(p.stable_id) AS patient_unique_id_base64, - cs.cancer_study_identifier AS cancer_study_identifier, - sample.internal_id AS internal_id -FROM sample - INNER JOIN patient AS p ON sample.patient_id = p.internal_id - INNER JOIN cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id; diff --git a/src/main/resources/db-scripts/clickhouse/views.sql b/src/main/resources/db-scripts/clickhouse/views.sql deleted file mode 100644 index 27845d1052b..00000000000 --- a/src/main/resources/db-scripts/clickhouse/views.sql +++ /dev/null @@ -1,27 +0,0 @@ -DROP VIEW IF EXISTS sample_view; -DROP VIEW IF EXISTS sample_list_view; - - -CREATE VIEW sample_view - AS -SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id, - base64Encode(sample.stable_id) as sample_unique_id_base64, - sample.stable_id as sample_stable_id, - concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, - p.stable_id as patient_stable_id, - base64Encode(p.stable_id) as patient_unique_id_base64, - cs.cancer_study_identifier as cancer_study_identifier -FROM sample - INNER JOIN patient p on sample.patient_id = p.internal_id - INNER JOIN cancer_study cs on p.cancer_study_id = cs.cancer_study_id; - -CREATE VIEW sample_list_view - AS -SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, - sl.stable_id as sample_list_stable_id, - sl.name as name, - cs.cancer_study_identifier as cancer_study_identifier -FROM sample_list as sl - INNER JOIN sample_list_list AS sll on sll.list_id = sl.list_id - INNER JOIN sample AS s on s.internal_id = sll.sample_id - INNER JOIN cancer_study cs on sl.cancer_study_id = cs.cancer_study_id; \ No newline at end of file diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml index 078afabea47..78926fff053 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml @@ -257,8 +257,8 @@ SELECT gene as hugoGeneSymbol, COUNT(*) as numberOfProfiledCases - FROM sample_to_gene_panel stgp - INNER JOIN gene_panel_to_gene gptg on stgp.gene_panel_id = gptg.gene_panel_id + FROM sample_to_gene_panel_derived stgp + INNER JOIN gene_panel_to_gene_derived gptg on stgp.gene_panel_id = gptg.gene_panel_id stgp.alteration_type = '${alterationType}' AND @@ -277,7 +277,7 @@