From 79d36e73f1aeff6d0ab4697e77aa210752772ad6 Mon Sep 17 00:00:00 2001
From: Charles Haynes <33608920+haynescd@users.noreply.github.com>
Date: Wed, 26 Jun 2024 15:15:36 -0400
Subject: [PATCH] Consolidate SQL files used to create CH Tables (#10867)
---
pom.xml | 4 -
.../db-scripts/clickhouse/clickhouse.sql | 251 ++++++++++++++++--
.../clickhouse/clickhouse_migration.sql | 144 ----------
.../db-scripts/clickhouse/derived_tables.sql | 47 ----
.../clickhouse/materialized_views.sql | 63 -----
.../resources/db-scripts/clickhouse/views.sql | 27 --
.../mybatisclickhouse/StudyViewMapper.xml | 10 +-
.../AbstractTestcontainers.java | 8 +-
8 files changed, 234 insertions(+), 320 deletions(-)
delete mode 100644 src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql
delete mode 100644 src/main/resources/db-scripts/clickhouse/derived_tables.sql
delete mode 100644 src/main/resources/db-scripts/clickhouse/views.sql
diff --git a/pom.xml b/pom.xml
index a1ff655502c..33883b323d0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -448,8 +448,6 @@
cgds.sql
migration.sql
clickhouse/clickhouse.sql
- clickhouse/clickhouse_views.sql
- clickhouse/clickhouse_migration.sql
clickhouse/materialized_views.sql
@@ -460,8 +458,6 @@
false
clickhouse/clickhouse.sql
- clickhouse/views.sql
- clickhouse/clickhouse_migration.sql
clickhouse/materialized_views.sql
diff --git a/src/main/resources/db-scripts/clickhouse/clickhouse.sql b/src/main/resources/db-scripts/clickhouse/clickhouse.sql
index 4e9897566bd..fcbe2fe1d01 100644
--- a/src/main/resources/db-scripts/clickhouse/clickhouse.sql
+++ b/src/main/resources/db-scripts/clickhouse/clickhouse.sql
@@ -1,28 +1,11 @@
+DROP TABLE IF EXISTS sample_to_gene_panel_derived;
+DROP TABLE IF EXISTS gene_panel_to_gene_derived;
+DROP TABLE IF EXISTS sample_derived;
DROP TABLE IF EXISTS genomic_event_derived;
-DROP TABLE IF EXISTS sample_to_gene_panel;
-DROP TABLE IF EXISTS gene_panel_to_gene;
+DROP TABLE IF EXISTS clinical_data_derived;
-CREATE TABLE IF NOT EXISTS genomic_event_derived
-(
- sample_unique_id String,
- hugo_gene_symbol String,
- gene_panel_stable_id LowCardinality(String),
- cancer_study_identifier LowCardinality(String),
- genetic_profile_stable_id LowCardinality(String),
- variant_type LowCardinality(String),
- mutation_variant String,
- mutation_type LowCardinality(String),
- mutation_status LowCardinality(String),
- driver_filter LowCardinality(String),
- driver_tiers_filter LowCardinality(String),
- cna_alteration Nullable(Int8),
- cna_cytoband String,
- sv_event_info String,
- patient_unique_id String
-) ENGINE = MergeTree
-ORDER BY ( variant_type, hugo_gene_symbol, genetic_profile_stable_id, sample_unique_id);
-CREATE TABLE sample_to_gene_panel
+CREATE TABLE sample_to_gene_panel_derived
(
sample_unique_id String,
alteration_type LowCardinality(String),
@@ -31,13 +14,40 @@ CREATE TABLE sample_to_gene_panel
) ENGINE = MergeTree()
ORDER BY (gene_panel_id, alteration_type, sample_unique_id);
-CREATE TABLE gene_panel_to_gene
+INSERT INTO sample_to_gene_panel_derived
+SELECT
+ concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id,
+ genetic_alteration_type AS alteration_type,
+ -- If a mutation is found in a gene that is not in a gene panel we assume Whole Exome Sequencing WES
+ ifnull(gene_panel.stable_id, 'WES') AS gene_panel_id,
+ cs.cancer_study_identifier AS cancer_study_identifier
+FROM sample_profile sp
+ INNER JOIN genetic_profile gp ON sample_profile.genetic_profile_id = gp.genetic_profile_id
+ LEFT JOIN gene_panel ON sp.panel_id = gene_panel.internal_id
+ INNER JOIN sample ON sp.sample_id = sample.internal_id
+ INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id;
+
+CREATE TABLE gene_panel_to_gene_derived
(
gene_panel_id LowCardinality(String),
gene String
) ENGINE = MergeTree()
ORDER BY (gene_panel_id);
+INSERT INTO gene_panel_to_gene_derived
+SELECT
+ gp.stable_id AS gene_panel_id,
+ g.hugo_gene_symbol AS gene
+FROM gene_panel gp
+ INNER JOIN gene_panel_list gpl ON gp.internal_id = gpl.internal_id
+ INNER JOIN gene g ON g.entrez_gene_id = gpl.gene_id
+UNION ALL
+SELECT
+ 'WES' AS gene_panel_id,
+ gene.hugo_gene_symbol AS gene
+FROM gene
+WHERE gene.entrez_gene_id > 0;
+
CREATE TABLE sample_derived
(
sample_unique_id String,
@@ -50,4 +60,197 @@ CREATE TABLE sample_derived
internal_id Int
)
ENGINE = MergeTree
- ORDER BY (cancer_study_identifier, sample_unique_id);
\ No newline at end of file
+ ORDER BY (cancer_study_identifier, sample_unique_id);
+
+INSERT INTO sample_derived
+SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id,
+ base64Encode(sample.stable_id) AS sample_unique_id_base64,
+ sample.stable_id AS sample_stable_id,
+ concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id,
+ p.stable_id AS patient_stable_id,
+ base64Encode(p.stable_id) AS patient_unique_id_base64,
+ cs.cancer_study_identifier AS cancer_study_identifier,
+ sample.internal_id AS internal_id
+FROM sample
+ INNER JOIN patient AS p ON sample.patient_id = p.internal_id
+ INNER JOIN cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id;
+
+
+CREATE TABLE IF NOT EXISTS genomic_event_derived
+(
+ sample_unique_id String,
+ hugo_gene_symbol String,
+ gene_panel_stable_id LowCardinality(String),
+ cancer_study_identifier LowCardinality(String),
+ genetic_profile_stable_id LowCardinality(String),
+ variant_type LowCardinality(String),
+ mutation_variant String,
+ mutation_type LowCardinality(String),
+ mutation_status LowCardinality(String),
+ driver_filter LowCardinality(String),
+ driver_tiers_filter LowCardinality(String),
+ cna_alteration Nullable(Int8),
+ cna_cytoband String,
+ sv_event_info String,
+ patient_unique_id String
+) ENGINE = MergeTree
+ ORDER BY ( variant_type, hugo_gene_symbol, genetic_profile_stable_id, sample_unique_id);
+
+INSERT INTO genomic_event_derived
+-- Insert Mutations
+SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id,
+ gene.hugo_gene_symbol AS hugo_gene_symbol,
+ ifNull(gp.stable_id, 'WES') AS gene_panel_stable_id,
+ cs.cancer_study_identifier AS cancer_study_identifier,
+ g.stable_id AS genetic_profile_stable_id,
+ 'mutation' AS variant_type,
+ me.protein_change AS mutation_variant,
+ me.mutation_type AS mutation_type,
+ mutation.mutation_status AS mutation_status,
+ 'NA' AS driver_filter,
+ 'NA' AS drivet_tiers_filter,
+ NULL AS cna_alteration,
+ '' AS cna_cytoband,
+ '' AS sv_event_info,
+ concat(cs.cancer_study_identifier, '_', patient.stable_id) AS patient_unique_id
+FROM mutation
+ INNER JOIN mutation_event AS me ON mutation.mutation_event_id = me.mutation_event_id
+ INNER JOIN sample_profile sp
+ ON mutation.sample_id = sp.sample_id AND mutation.genetic_profile_id = sp.genetic_profile_id
+ LEFT JOIN gene_panel gp ON sp.panel_id = gp.internal_id
+ LEFT JOIN genetic_profile g ON sp.genetic_profile_id = g.genetic_profile_id
+ INNER JOIN cancer_study cs ON g.cancer_study_id = cs.cancer_study_id
+ INNER JOIN sample ON mutation.sample_id = sample.internal_id
+ INNER JOIN patient on sample.patient_id = patient.internal_id
+ LEFT JOIN gene ON mutation.entrez_gene_id = gene.entrez_gene_id
+UNION ALL
+-- Insert CNA Genes
+SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id,
+ gene.hugo_gene_symbol AS hugo_gene_symbol,
+ ifNull(gp.stable_id, 'WES') AS gene_panel_stable_id,
+ cs.cancer_study_identifier AS cancer_study_identifier,
+ g.stable_id AS genetic_profile_stable_id,
+ 'cna' AS variant_type,
+ 'NA' AS mutation_variant,
+ 'NA' AS mutation_type,
+ 'NA' AS mutation_status,
+ 'NA' AS driver_filter,
+ 'NA' AS drivet_tiers_filter,
+ ce.alteration AS cna_alteration,
+ rgg.cytoband AS cna_cytoband,
+ '' AS sv_event_info,
+ concat(cs.cancer_study_identifier, '_', patient.stable_id) AS patient_unique_id
+FROM cna_event ce
+ INNER JOIN sample_cna_event sce ON ce.cna_event_id = sce.cna_event_id
+ INNER JOIN sample_profile sp ON sce.sample_id = sp.sample_id AND sce.genetic_profile_id = sp.genetic_profile_id
+ LEFT JOIN gene_panel gp ON sp.panel_id = gp.internal_id
+ INNER JOIN genetic_profile g ON sp.genetic_profile_id = g.genetic_profile_id
+ INNER JOIN cancer_study cs ON g.cancer_study_id = cs.cancer_study_id
+ INNER JOIN sample ON sce.sample_id = sample.internal_id
+ INNER JOIN patient on sample.patient_id = patient.internal_id
+ INNER JOIN gene ON ce.entrez_gene_id = gene.entrez_gene_id
+ INNER JOIN reference_genome_gene rgg ON rgg.entrez_gene_id = ce.entrez_gene_id
+UNION ALL
+-- Insert Structural Variants Site1
+SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) AS sample_unique_id,
+ gene.hugo_gene_symbol AS hugo_gene_symbol,
+ ifNull(gene_panel.stable_id, 'WES') AS gene_panel_stable_id,
+ cs.cancer_study_identifier AS cancer_study_identifier,
+ gp.stable_id AS genetic_profile_stable_id,
+ 'structural_variant' AS variant_type,
+ 'NA' AS mutation_variant,
+ 'NA' AS mutation_type,
+ 'NA' AS mutation_status,
+ 'NA' AS driver_filter,
+ 'NA' AS drivet_tiers_filter,
+ NULL AS cna_alteration,
+ '' AS cna_cytoband,
+ event_info AS sv_event_info,
+ concat(cs.cancer_study_identifier, '_', patient.stable_id) AS patient_unique_id
+FROM structural_variant sv
+ INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id
+ INNER JOIN sample s ON sv.sample_id = s.internal_id
+ INNER JOIN patient on s.patient_id = patient.internal_id
+ INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id
+ INNER JOIN gene ON sv.site1_entrez_gene_id = gene.entrez_gene_id
+ INNER JOIN sample_profile ON s.internal_id = sample_profile.sample_id AND sample_profile.genetic_profile_id = sv.genetic_profile_id
+ LEFT JOIN gene_panel ON sample_profile.panel_id = gene_panel.internal_id
+UNION ALL
+-- Insert Structural Variants Site2
+SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) AS sample_unique_id,
+ gene.hugo_gene_symbol AS hugo_gene_symbol,
+ ifNull(gene_panel.stable_id, 'WES') AS gene_panel_stable_id,
+ cs.cancer_study_identifier AS cancer_study_identifier,
+ gp.stable_id AS genetic_profile_stable_id,
+ 'structural_variant' AS variant_type,
+ 'NA' AS mutation_variant,
+ 'NA' AS mutation_type,
+ 'NA' AS mutation_status,
+ 'NA' AS driver_filter,
+ 'NA' AS drivet_tiers_filter,
+ NULL AS cna_alteration,
+ '' AS cna_cytoband,
+ event_info AS sv_event_info,
+ concat(cs.cancer_study_identifier, '_', patient.stable_id) AS patient_unique_id
+FROM structural_variant sv
+ INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id
+ INNER JOIN sample s ON sv.sample_id = s.internal_id
+ INNER JOIN patient on s.patient_id = patient.internal_id
+ INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id
+ INNER JOIN gene ON sv.site2_entrez_gene_id = gene.entrez_gene_id
+ INNER JOIN sample_profile ON s.internal_id = sample_profile.sample_id AND sample_profile.genetic_profile_id = sv.genetic_profile_id
+ LEFT JOIN gene_panel ON sample_profile.panel_id = gene_panel.internal_id
+WHERE
+ sv.site2_entrez_gene_id != sv.site1_entrez_gene_id
+ OR sv.site1_entrez_gene_id IS NULL;
+
+CREATE TABLE IF NOT EXISTS clinical_data_derived
+(
+ sample_unique_id String,
+ patient_unique_id String,
+ attribute_name LowCardinality(String),
+ attribute_value String,
+ cancer_study_identifier LowCardinality(String),
+ type LowCardinality(String)
+)
+ ENGINE=MergeTree
+ ORDER BY (type, attribute_name, sample_unique_id);
+
+-- Insert sample attribute data
+INSERT INTO TABLE clinical_data_derived
+SELECT sm.sample_unique_id AS sample_unique_id,
+ sm.patient_unique_id AS patient_unique_id,
+ cam.attr_id AS attribute_name,
+ ifNull(csamp.attr_value, '') AS attribute_value,
+ cs.cancer_study_identifier AS cancer_study_identifier,
+ 'sample' AS type
+FROM sample_derived AS sm
+ INNER JOIN cancer_study AS cs
+ ON sm.cancer_study_identifier = cs.cancer_study_identifier
+ FULL OUTER JOIN clinical_attribute_meta AS cam
+ ON cs.cancer_study_id = cam.cancer_study_id
+ FULL OUTER JOIN clinical_sample AS csamp
+ ON (sm.internal_id = csamp.internal_id) AND (csamp.attr_id = cam.attr_id)
+WHERE cam.patient_attribute = 0;
+
+-- INSERT patient attribute data
+INSERT INTO TABLE clinical_data_derived
+SELECT '' AS sample_unique_id,
+ concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id,
+ cam.attr_id AS attribute_name,
+ ifNull(clinpat.attr_value, '') AS attribute_value,
+ cs.cancer_study_identifier AS cancer_study_identifier,
+ 'patient' AS type
+FROM patient AS p
+ INNER JOIN cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id
+ FULL OUTER JOIN clinical_attribute_meta AS cam
+ ON cs.cancer_study_id = cam.cancer_study_id
+ FULL OUTER JOIN clinical_patient AS clinpat
+ ON (p.internal_id = clinpat.internal_id) AND (clinpat.attr_id = cam.attr_id)
+WHERE cam.patient_attribute = 1;
+
+OPTIMIZE TABLE sample_to_gene_panel_derived;
+OPTIMIZE TABLE gene_panel_to_gene_derived;
+OPTIMIZE TABLE sample_derived;
+OPTIMIZE TABLE genomic_event_derived;
+OPTIMIZE TABLE clinical_data_derived;
diff --git a/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql b/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql
deleted file mode 100644
index 76223de462d..00000000000
--- a/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql
+++ /dev/null
@@ -1,144 +0,0 @@
-
-INSERT INTO sample_to_gene_panel
-SELECT
- concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id,
- genetic_alteration_type AS alteration_type,
- -- If a mutation is found in a gene that is not in a gene panel we assume Whole Exome Sequencing WES
- ifnull(gene_panel.stable_id, 'WES') AS gene_panel_id,
- cs.cancer_study_identifier AS cancer_study_identifier
-FROM sample_profile sp
- INNER JOIN genetic_profile gp ON sample_profile.genetic_profile_id = gp.genetic_profile_id
- LEFT JOIN gene_panel ON sp.panel_id = gene_panel.internal_id
- INNER JOIN sample ON sp.sample_id = sample.internal_id
- INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id;
-
-INSERT INTO gene_panel_to_gene
-SELECT
- gp.stable_id AS gene_panel_id,
- g.hugo_gene_symbol AS gene
-FROM gene_panel gp
- INNER JOIN gene_panel_list gpl ON gp.internal_id = gpl.internal_id
- INNER JOIN gene g ON g.entrez_gene_id = gpl.gene_id
-UNION ALL
-SELECT
- 'WES' AS gene_panel_id,
- gene.hugo_gene_symbol AS gene
-FROM gene
-WHERE gene.entrez_gene_id > 0;
-
-INSERT INTO genomic_event_derived
-SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id,
- gene.hugo_gene_symbol AS hugo_gene_symbol,
- ifNull(gp.stable_id, 'WES') AS gene_panel_stable_id,
- cs.cancer_study_identifier AS cancer_study_identifier,
- g.stable_id AS genetic_profile_stable_id,
- 'mutation' AS variant_type,
- me.protein_change AS mutation_variant,
- me.mutation_type AS mutation_type,
- mutation.mutation_status AS mutation_status,
- 'NA' AS driver_filter,
- 'NA' AS drivet_tiers_filter,
- NULL AS cna_alteration,
- '' AS cna_cytoband,
- '' AS sv_event_info,
- concat(cs.cancer_study_identifier, '_', patient.stable_id) AS patient_unique_id
-FROM mutation
- INNER JOIN mutation_event AS me ON mutation.mutation_event_id = me.mutation_event_id
- INNER JOIN sample_profile sp
- ON mutation.sample_id = sp.sample_id AND mutation.genetic_profile_id = sp.genetic_profile_id
- LEFT JOIN gene_panel gp ON sp.panel_id = gp.internal_id
- LEFT JOIN genetic_profile g ON sp.genetic_profile_id = g.genetic_profile_id
- INNER JOIN cancer_study cs ON g.cancer_study_id = cs.cancer_study_id
- INNER JOIN sample ON mutation.sample_id = sample.internal_id
- INNER JOIN patient on sample.patient_id = patient.internal_id
- LEFT JOIN gene ON mutation.entrez_gene_id = gene.entrez_gene_id
-UNION ALL
-SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id,
- gene.hugo_gene_symbol AS hugo_gene_symbol,
- ifNull(gp.stable_id, 'WES') AS gene_panel_stable_id,
- cs.cancer_study_identifier AS cancer_study_identifier,
- g.stable_id AS genetic_profile_stable_id,
- 'cna' AS variant_type,
- 'NA' AS mutation_variant,
- 'NA' AS mutation_type,
- 'NA' AS mutation_status,
- 'NA' AS driver_filter,
- 'NA' AS drivet_tiers_filter,
- ce.alteration AS cna_alteration,
- rgg.cytoband AS cna_cytoband,
- '' AS sv_event_info,
- concat(cs.cancer_study_identifier, '_', patient.stable_id) AS patient_unique_id
-FROM cna_event ce
- INNER JOIN sample_cna_event sce ON ce.cna_event_id = sce.cna_event_id
- INNER JOIN sample_profile sp ON sce.sample_id = sp.sample_id AND sce.genetic_profile_id = sp.genetic_profile_id
- LEFT JOIN gene_panel gp ON sp.panel_id = gp.internal_id
- INNER JOIN genetic_profile g ON sp.genetic_profile_id = g.genetic_profile_id
- INNER JOIN cancer_study cs ON g.cancer_study_id = cs.cancer_study_id
- INNER JOIN sample ON sce.sample_id = sample.internal_id
- INNER JOIN patient on sample.patient_id = patient.internal_id
- INNER JOIN gene ON ce.entrez_gene_id = gene.entrez_gene_id
- INNER JOIN reference_genome_gene rgg ON rgg.entrez_gene_id = ce.entrez_gene_id
-UNION ALL
-SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) AS sample_unique_id,
- gene.hugo_gene_symbol AS hugo_gene_symbol,
- ifNull(gene_panel.stable_id, 'WES') AS gene_panel_stable_id,
- cs.cancer_study_identifier AS cancer_study_identifier,
- gp.stable_id AS genetic_profile_stable_id,
- 'structural_variant' AS variant_type,
- 'NA' AS mutation_variant,
- 'NA' AS mutation_type,
- 'NA' AS mutation_status,
- 'NA' AS driver_filter,
- 'NA' AS drivet_tiers_filter,
- NULL AS cna_alteration,
- '' AS cna_cytoband,
- event_info AS sv_event_info,
- concat(cs.cancer_study_identifier, '_', patient.stable_id) AS patient_unique_id
-FROM structural_variant sv
- INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id
- INNER JOIN sample s ON sv.sample_id = s.internal_id
- INNER JOIN patient on s.patient_id = patient.internal_id
- INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id
- INNER JOIN gene ON sv.site1_entrez_gene_id = gene.entrez_gene_id
- INNER JOIN sample_profile ON s.internal_id = sample_profile.sample_id AND sample_profile.genetic_profile_id = sv.genetic_profile_id
- LEFT JOIN gene_panel ON sample_profile.panel_id = gene_panel.internal_id
-UNION ALL
-SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) AS sample_unique_id,
- gene.hugo_gene_symbol AS hugo_gene_symbol,
- ifNull(gene_panel.stable_id, 'WES') AS gene_panel_stable_id,
- cs.cancer_study_identifier AS cancer_study_identifier,
- gp.stable_id AS genetic_profile_stable_id,
- 'structural_variant' AS variant_type,
- 'NA' AS mutation_variant,
- 'NA' AS mutation_type,
- 'NA' AS mutation_status,
- 'NA' AS driver_filter,
- 'NA' AS drivet_tiers_filter,
- NULL AS cna_alteration,
- '' AS cna_cytoband,
- event_info AS sv_event_info,
- concat(cs.cancer_study_identifier, '_', patient.stable_id) AS patient_unique_id
-FROM structural_variant sv
- INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id
- INNER JOIN sample s ON sv.sample_id = s.internal_id
- INNER JOIN patient on s.patient_id = patient.internal_id
- INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id
- INNER JOIN gene ON sv.site2_entrez_gene_id = gene.entrez_gene_id
- INNER JOIN sample_profile ON s.internal_id = sample_profile.sample_id AND sample_profile.genetic_profile_id = sv.genetic_profile_id
- LEFT JOIN gene_panel ON sample_profile.panel_id = gene_panel.internal_id
-WHERE
- sv.site2_entrez_gene_id != sv.site1_entrez_gene_id
- OR sv.site1_entrez_gene_id IS NULL;
-
-INSERT INTO sample_derived
-SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id,
- base64Encode(sample.stable_id) AS sample_unique_id_base64,
- sample.stable_id AS sample_stable_id,
- concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id,
- p.stable_id AS patient_stable_id,
- base64Encode(p.stable_id) AS patient_unique_id_base64,
- cs.cancer_study_identifier AS cancer_study_identifier,
- sample.internal_id AS internal_id
-FROM sample
- INNER JOIN patient AS p ON sample.patient_id = p.internal_id
- INNER JOIN cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id
\ No newline at end of file
diff --git a/src/main/resources/db-scripts/clickhouse/derived_tables.sql b/src/main/resources/db-scripts/clickhouse/derived_tables.sql
deleted file mode 100644
index 0e982ed106f..00000000000
--- a/src/main/resources/db-scripts/clickhouse/derived_tables.sql
+++ /dev/null
@@ -1,47 +0,0 @@
--- clinical_data_derived
-DROP TABLE IF EXISTS clinical_data_derived;
-CREATE TABLE IF NOT EXISTS clinical_data_derived
-(
- sample_unique_id String,
- patient_unique_id String,
- attribute_name String,
- attribute_value String,
- cancer_study_identifier String,
- type String
-)
- ENGINE=MergeTree
- ORDER BY sample_unique_id;
-
--- Insert sample attribute data
-INSERT INTO TABLE clinical_data_derived
-SELECT sm.sample_unique_id AS sample_unique_id,
- sm.patient_unique_id AS patient_unique_id,
- cam.attr_id AS attribute_name,
- csamp.attr_value AS attribute_value,
- cs.cancer_study_identifier AS cancer_study_identifier,
- 'sample' AS type
-FROM sample_mv AS sm
- INNER JOIN cancer_study AS cs
- ON sm.cancer_study_identifier = cs.cancer_study_identifier
- FULL OUTER JOIN clinical_attribute_meta AS cam
- ON cs.cancer_study_id = cam.cancer_study_id
- FULL OUTER JOIN clinical_sample AS csamp
- ON (sm.internal_id = csamp.internal_id) AND (csamp.attr_id = cam.attr_id)
-WHERE cam.patient_attribute = 0;
-
--- INSERT patient attribute data
-INSERT INTO TABLE clinical_data_derived
-SELECT NULL AS sample_unique_id,
- concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id,
- cam.attr_id AS attribute_name,
- clinpat.attr_value AS attribute_value,
- cs.cancer_study_identifier AS cancer_study_identifier,
- 'patient' AS type
-FROM patient AS p
- INNER JOIN cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id
- FULL OUTER JOIN clinical_attribute_meta AS cam
- ON cs.cancer_study_id = cam.cancer_study_id
- FULL OUTER JOIN clinical_patient AS clinpat
- ON (p.internal_id = clinpat.internal_id) AND (clinpat.attr_id = cam.attr_id)
-WHERE cam.patient_attribute = 1;
-
diff --git a/src/main/resources/db-scripts/clickhouse/materialized_views.sql b/src/main/resources/db-scripts/clickhouse/materialized_views.sql
index 8595d6edb4d..0bde005094b 100644
--- a/src/main/resources/db-scripts/clickhouse/materialized_views.sql
+++ b/src/main/resources/db-scripts/clickhouse/materialized_views.sql
@@ -1,46 +1,6 @@
-DROP TABLE IF EXISTS sample_columnstore;
DROP TABLE IF EXISTS sample_list_columnstore;
-DROP VIEW IF EXISTS sample_columnstore_mv;
DROP VIEW IF EXISTS sample_list_columnstore_mv;
--- sample_columnstore
-CREATE TABLE IF NOT EXISTS sample_columnstore
-(
- sample_unique_id VARCHAR(45),
- sample_unique_id_base64 VARCHAR(45),
- sample_stable_id VARCHAR(45),
- patient_unique_id VARCHAR(45),
- patient_unique_id_base64 VARCHAR(45),
- patient_stable_id VARCHAR(45),
- cancer_study_identifier VARCHAR(45)
-)
- ENGINE = MergeTree
- ORDER BY (sample_unique_id, patient_unique_id, cancer_study_identifier);
-
-INSERT INTO sample_columnstore
-SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id,
- base64Encode(sample.stable_id) as sample_unique_id_base64,
- sample.stable_id as sample_stable_id,
- concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id,
- p.stable_id as patient_stable_id,
- base64Encode(p.stable_id) as patient_unique_id_base64,
- cs.cancer_study_identifier as cancer_study_identifier
-FROM sample
- INNER JOIN patient p on sample.patient_id = p.internal_id
- INNER JOIN cancer_study cs on p.cancer_study_id = cs.cancer_study_id;
-
-CREATE MATERIALIZED VIEW sample_columnstore_mv TO sample_columnstore AS
-SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id,
- sample.stable_id as sample_stable_id,
- base64Encode(sample.stable_id) as sample_unique_id_base64,
- concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id,
- p.stable_id as patient_stable_id,
- base64Encode(p.stable_id) as patient_unique_id_base64,
- cs.cancer_study_identifier as cancer_study_identifier
-FROM sample
- INNER JOIN patient p on sample.patient_id = p.internal_id
- INNER JOIN cancer_study cs on p.cancer_study_id = cs.cancer_study_id;
-
CREATE TABLE IF NOT EXISTS sample_list_columnstore
(
sample_unique_id VARCHAR(45),
@@ -70,26 +30,3 @@ FROM sample_list as sl
INNER JOIN sample_list_list AS sll on sll.list_id = sl.list_id
INNER JOIN sample AS s on s.internal_id = sll.sample_id
INNER JOIN cancer_study cs on sl.cancer_study_id = cs.cancer_study_id;
-
-
-
--- SAMPLE_MV
-DROP VIEW IF EXISTS sample_mv;
-CREATE MATERIALIZED VIEW sample_mv
- ENGINE = AggregatingMergeTree()
- ORDER BY internal_id
- SETTINGS allow_nullable_key = 1
- POPULATE
-AS
-
-SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id,
- base64Encode(sample.stable_id) AS sample_unique_id_base64,
- sample.stable_id AS sample_stable_id,
- concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id,
- p.stable_id AS patient_stable_id,
- base64Encode(p.stable_id) AS patient_unique_id_base64,
- cs.cancer_study_identifier AS cancer_study_identifier,
- sample.internal_id AS internal_id
-FROM sample
- INNER JOIN patient AS p ON sample.patient_id = p.internal_id
- INNER JOIN cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id;
diff --git a/src/main/resources/db-scripts/clickhouse/views.sql b/src/main/resources/db-scripts/clickhouse/views.sql
deleted file mode 100644
index 27845d1052b..00000000000
--- a/src/main/resources/db-scripts/clickhouse/views.sql
+++ /dev/null
@@ -1,27 +0,0 @@
-DROP VIEW IF EXISTS sample_view;
-DROP VIEW IF EXISTS sample_list_view;
-
-
-CREATE VIEW sample_view
- AS
-SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id,
- base64Encode(sample.stable_id) as sample_unique_id_base64,
- sample.stable_id as sample_stable_id,
- concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id,
- p.stable_id as patient_stable_id,
- base64Encode(p.stable_id) as patient_unique_id_base64,
- cs.cancer_study_identifier as cancer_study_identifier
-FROM sample
- INNER JOIN patient p on sample.patient_id = p.internal_id
- INNER JOIN cancer_study cs on p.cancer_study_id = cs.cancer_study_id;
-
-CREATE VIEW sample_list_view
- AS
-SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id,
- sl.stable_id as sample_list_stable_id,
- sl.name as name,
- cs.cancer_study_identifier as cancer_study_identifier
-FROM sample_list as sl
- INNER JOIN sample_list_list AS sll on sll.list_id = sl.list_id
- INNER JOIN sample AS s on s.internal_id = sll.sample_id
- INNER JOIN cancer_study cs on sl.cancer_study_id = cs.cancer_study_id;
\ No newline at end of file
diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml
index 078afabea47..78926fff053 100644
--- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml
+++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml
@@ -257,8 +257,8 @@
SELECT
gene as hugoGeneSymbol,
COUNT(*) as numberOfProfiledCases
- FROM sample_to_gene_panel stgp
- INNER JOIN gene_panel_to_gene gptg on stgp.gene_panel_id = gptg.gene_panel_id
+ FROM sample_to_gene_panel_derived stgp
+ INNER JOIN gene_panel_to_gene_derived gptg on stgp.gene_panel_id = gptg.gene_panel_id
stgp.alteration_type = '${alterationType}'
AND
@@ -277,7 +277,7 @@