From 9b1c3ee04dfa08424089af32337b59da4b00746b Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Thu, 7 Sep 2023 14:42:18 -0400 Subject: [PATCH] feat!: add / update to copy number methods from metaschema branch (#508) Close #450 , #458 , #465 , #444 Notes: - feat: add parsed to copy number change endpoint (#453) - fix!: /parsed_to_cn_var should accept number, definite range, and indefinite range (#455) - feat: add do_liftover param in parsed to copy number endpoints (#459) - refactor!: parsed to copy number should accept request body (#460) - fix: Copy Number Count copies should be Number or Indef/Def Range (#468) - fix: allow comparator to be set in start/end values (#469) --- ...cnv.py => test_amplification_to_cx_var.py} | 0 .../test_parsed_to_abs_cnv.py | 1076 +++++++++++++++-- variation/main.py | 120 +- variation/query.py | 2 +- variation/schemas/copy_number_schema.py | 448 +++++++ .../schemas/hgvs_to_copy_number_schema.py | 12 +- variation/schemas/service_schema.py | 143 +-- variation/to_copy_number_variation.py | 665 ++++++---- 8 files changed, 1944 insertions(+), 522 deletions(-) rename tests/to_copy_number_variation/{test_amplification_to_rel_cnv.py => test_amplification_to_cx_var.py} (100%) create mode 100644 variation/schemas/copy_number_schema.py diff --git a/tests/to_copy_number_variation/test_amplification_to_rel_cnv.py b/tests/to_copy_number_variation/test_amplification_to_cx_var.py similarity index 100% rename from tests/to_copy_number_variation/test_amplification_to_rel_cnv.py rename to tests/to_copy_number_variation/test_amplification_to_cx_var.py diff --git a/tests/to_copy_number_variation/test_parsed_to_abs_cnv.py b/tests/to_copy_number_variation/test_parsed_to_abs_cnv.py index 35ec1028..c10c805f 100644 --- a/tests/to_copy_number_variation/test_parsed_to_abs_cnv.py +++ b/tests/to_copy_number_variation/test_parsed_to_abs_cnv.py @@ -1,18 +1,33 @@ -"""Test that parsed_to_cn_var works correctly""" +"""Test that parsed_to_copy_number works correctly""" +from copy import deepcopy + import pytest -from ga4gh.vrsatile.pydantic.vrs_models import CopyNumberCount +from ga4gh.vrs import models +from ga4gh.vrsatile.pydantic.vrs_models import ( + Comparator, + CopyChange, + CopyNumberChange, + CopyNumberCount, + VRSTypes, +) +from pydantic.error_wrappers import ValidationError -from variation.schemas.service_schema import ClinVarAssembly +from variation.schemas.copy_number_schema import ( + ClinVarAssembly, + ParsedToCnVarQuery, + ParsedToCxVarQuery, +) +from variation.to_copy_number_variation import ToCopyNumberError @pytest.fixture(scope="module") -def copy_number_gain1(): +def cn_gain1(): """Create test fixture for clinvar copy number gain. https://www.ncbi.nlm.nih.gov/clinvar/variation/145208/?new_evidence=true """ variation = { "type": "CopyNumberCount", - "id": "ga4gh:CN.N6C9rWBjrNuiIhJkPxdPlRKvSGKoFynr", + "_id": "ga4gh:CN.N6C9rWBjrNuiIhJkPxdPlRKvSGKoFynr", "subject": { "type": "SequenceLocation", "_id": "ga4gh:VSL.JTsxd9PiPZaIPL9Tl3ss78GYYnDeogvf", @@ -37,13 +52,13 @@ def copy_number_gain1(): @pytest.fixture(scope="module") -def copy_number_gain2(): +def cn_gain2(): """Create test fixture for clinvar copy number gain. https://www.ncbi.nlm.nih.gov/clinvar/variation/146181/?new_evidence=true """ variation = { "type": "CopyNumberCount", - "id": "ga4gh:CN.xOEIBXGfoM8TUA2RKNWINRze_hWT1lPP", + "_id": "ga4gh:CN.xOEIBXGfoM8TUA2RKNWINRze_hWT1lPP", "subject": { "type": "SequenceLocation", "_id": "ga4gh:VSL.9moblqAMqfEryr9pRUxqZMiOkqbsy5Ml", @@ -68,13 +83,44 @@ def copy_number_gain2(): @pytest.fixture(scope="module") -def copy_number_loss1(): +def cn_gain2_37(): + """Create test fixture for clinvar copy number gain on GRCh37 assembly. + https://www.ncbi.nlm.nih.gov/clinvar/variation/146181/?new_evidence=true + """ + variation = { + "type": "CopyNumberCount", + "_id": "ga4gh:CN.xBZNtPDxQMQh-YH6dRSNweB5unZOH2Sd", + "subject": { + "type": "SequenceLocation", + "_id": "ga4gh:VSL.ZDrbZBtlCmJShhfLTmaNDsBpDDC3v9_A", + "sequence_id": "ga4gh:SQ.zIMZb3Ft7RdWa5XYq0PxIlezLY2ccCgt", + "interval": { + "type": "SequenceInterval", + "start": { + "type": "IndefiniteRange", + "value": 32031011, + "comparator": "<=", + }, + "end": { + "type": "IndefiniteRange", + "value": 32509926, + "comparator": ">=", + }, + }, + }, + "copies": {"type": "Number", "value": 2}, + } + return CopyNumberCount(**variation) + + +@pytest.fixture(scope="module") +def cn_loss1(): """Create test fixture for clinvar copy number loss. https://www.ncbi.nlm.nih.gov/clinvar/variation/146181/?new_evidence=true """ variation = { "type": "CopyNumberCount", - "id": "ga4gh:CN.IhWQwwhYFtjQAG7BejsVYy-KiM4RMyed", + "_id": "ga4gh:CN.IhWQwwhYFtjQAG7BejsVYy-KiM4RMyed", "subject": { "type": "SequenceLocation", "_id": "ga4gh:VSL.Szlw1t4YMuaO7lLwFJ-T7fGTcXuhNNKB", @@ -99,13 +145,13 @@ def copy_number_loss1(): @pytest.fixture(scope="module") -def copy_number_loss2(): +def cn_loss2(): """Create test fixture for clinvar copy number loss. https://www.ncbi.nlm.nih.gov/clinvar/variation/148425/?new_evidence=true """ variation = { "type": "CopyNumberCount", - "id": "ga4gh:CN.sxRwv8l26F1PdcovpJR5HEpgFOY8J95Q", + "_id": "ga4gh:CN.sxRwv8l26F1PdcovpJR5HEpgFOY8J95Q", "subject": { "type": "SequenceLocation", "_id": "ga4gh:VSL.Bp-86GeYti1DBmrj_Dtz7qNIMF5ygx5y", @@ -129,181 +175,977 @@ def copy_number_loss2(): return CopyNumberCount(**variation) -def test_parsed_copy_number_gain( - test_cnv_handler, copy_number_gain1, copy_number_gain2 -): - """Test that parsed_to_cn_var works for parsed copy number gain queries""" +@pytest.fixture(scope="module") +def cn_definite_number(): + """Create test fixture for copy number count using definite range for start and + number for end + """ + variation = { + "type": "CopyNumberCount", + "_id": "ga4gh:CN.aATLCiOlVCq5BKMCcTDBqYVEliC49tPR", + "subject": { + "type": "SequenceLocation", + "_id": "ga4gh:VSL.A-UPQD0yO3qJX9x24xA08epIwwTl0I0C", + "sequence_id": "ga4gh:SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU", + "interval": { + "start": {"type": "DefiniteRange", "min": 143134062, "max": 143134064}, + "end": {"type": "Number", "value": 143284670}, + }, + }, + "copies": {"type": "Number", "value": 3}, + } + return CopyNumberCount(**variation) + + +@pytest.fixture(scope="module") +def cx_numbers(): + """Create test fixture for copy number change using numbers for start and end""" + variation = { + "type": "CopyNumberChange", + "_id": "ga4gh:CX.KYAQwf8-DQu23LsDbFHP0BiRzrCmu46x", + "subject": { + "type": "SequenceLocation", + "_id": "ga4gh:VSL.djv1Oq_qNjDialZakqQGoUAJvohREPBL", + "sequence_id": "ga4gh:SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", + "interval": { + "start": {"type": "Number", "value": 10000}, + "end": {"type": "Number", "value": 1223133}, + }, + }, + "copy_change": "efo:0030069", + } + return CopyNumberChange(**variation) + + +@pytest.fixture(scope="module") +def cx_definite_ranges(): + """Create test fixture for copy number change using definite ranges for start and + end + """ + variation = { + "type": "CopyNumberChange", + "_id": "ga4gh:CX.W3abRGYwOwJcvk1kezbHrerd3tyF8SxE", + "subject": { + "type": "SequenceLocation", + "_id": "ga4gh:VSL.Fmrkp62nMVu2Ii4ceQyWBEYvdxSL2WiE", + "sequence_id": "ga4gh:SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", + "interval": { + "start": {"type": "DefiniteRange", "min": 10000, "max": 10005}, + "end": {"type": "DefiniteRange", "min": 1223131, "max": 1223134}, + }, + }, + "copy_change": "efo:0030069", + } + return CopyNumberChange(**variation) + + +@pytest.fixture(scope="module") +def cx_indefinite_ranges(): + """Create test fixture for copy number change using indefinite ranges for start and + end + """ + variation = { + "type": "CopyNumberChange", + "_id": "ga4gh:CX.tvy_rL1vob0qbdTYSJYbQX-eSiXD0F7s", + "subject": { + "type": "SequenceLocation", + "_id": "ga4gh:VSL.OORU6dTBtpXb6VPPsMwBF7W-5x4uouHl", + "sequence_id": "ga4gh:SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", + "interval": { + "start": { + "type": "IndefiniteRange", + "comparator": "<=", + "value": 10000, + }, + "end": { + "type": "IndefiniteRange", + "comparator": ">=", + "value": 1223130, + }, + }, + }, + "copy_change": "efo:0030069", + } + return CopyNumberChange(**variation) + + +@pytest.fixture(scope="module") +def cx_number_indefinite(): + """Create test fixture for copy number change using number for start and indefinite + range for end + """ + variation = { + "type": "CopyNumberChange", + "_id": "ga4gh:CX.usMUXixq6VPvzqOL5EyesdZOqMVf9IVP", + "subject": { + "type": "SequenceLocation", + "_id": "ga4gh:VSL.E5SD9yRYzWTLaV2JXGSvM6-pDh8w82Sv", + "sequence_id": "ga4gh:SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", + "interval": { + "start": {"type": "Number", "value": 10000}, + "end": { + "type": "IndefiniteRange", + "comparator": ">=", + "value": 1223130, + }, + }, + }, + "copy_change": "efo:0030069", + } + return CopyNumberChange(**variation) + + +def test_get_parsed_ac(test_cnv_handler): + """Test that _get_parsed_ac works correctly""" + for assembly in [ClinVarAssembly.GRCH37, ClinVarAssembly.HG19]: + resp = test_cnv_handler._get_parsed_ac(assembly, "chr7", use_grch38=False) + assert resp.lifted_over is False + assert resp.accession == "ga4gh:SQ.IW78mgV5Cqf6M24hy52hPjyyo5tCCd86" + + resp = test_cnv_handler._get_parsed_ac(assembly, "chr7", use_grch38=True) + assert resp.lifted_over is True + assert resp.accession == "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul" + + for assembly in [ClinVarAssembly.GRCH38, ClinVarAssembly.HG38]: + resp = test_cnv_handler._get_parsed_ac(assembly, "chr7", use_grch38=False) + assert resp.lifted_over is False + assert resp.accession == "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul" + + resp = test_cnv_handler._get_parsed_ac(assembly, "chr7", use_grch38=True) + assert resp.lifted_over is False + assert resp.accession == "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul" + + with pytest.raises(ToCopyNumberError) as e: + test_cnv_handler._get_parsed_ac( + ClinVarAssembly.NCBI36, "chr7", use_grch38=False + ) + assert str(e.value) == "NCBI36 assembly is not currently supported" + + with pytest.raises(ToCopyNumberError) as e: + test_cnv_handler._get_parsed_ac(ClinVarAssembly.HG18, "chr7", use_grch38=False) + assert str(e.value) == "hg18 assembly is not currently supported" + + +def test_get_parsed_ac_chr(test_cnv_handler): + """Test that _get_parsed_ac_chr works correctly""" + resp = test_cnv_handler._get_parsed_ac_chr("NC_000007.13", False) + assert resp.accession == "ga4gh:SQ.IW78mgV5Cqf6M24hy52hPjyyo5tCCd86" + assert resp.chromosome == "chr7" + assert resp.lifted_over is False + + resp = test_cnv_handler._get_parsed_ac_chr("NC_000007.13", True) + assert resp.accession == "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul" + assert resp.chromosome == "chr7" + assert resp.lifted_over is True + + for do_liftover in [True, False]: + resp = test_cnv_handler._get_parsed_ac_chr("NC_000007.14", do_liftover) + assert resp.accession == "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul" + assert resp.chromosome == "chr7" + assert resp.lifted_over is False + + # if genomic ac not provided + with pytest.raises(ToCopyNumberError) as e: + test_cnv_handler._get_parsed_ac_chr("NP_000542.1", False) + assert str(e.value) == "Not a supported genomic accession: NP_000542.1" + + # invalid accession + with pytest.raises(ToCopyNumberError) as e: + test_cnv_handler._get_parsed_ac_chr("NC_00000713", False) + assert ( + str(e.value) == "SeqRepo unable to get translated identifiers for NC_00000713" + ) + + +def test_validate_pos(test_cnv_handler): + """Test that _validate_ac_pos works correctly""" + resp = test_cnv_handler._validate_ac_pos("NC_000007.14", 140753336) + assert resp is None + + # invalid accession + with pytest.raises(ToCopyNumberError) as e: + test_cnv_handler._validate_ac_pos("NC_00000714", 140753336) + assert str(e.value) == "Accession not found in SeqRepo: NC_00000714" + + # invalid pos + with pytest.raises(ToCopyNumberError) as e: + test_cnv_handler._validate_ac_pos("NC_000007.14", 159345975) + assert str(e.value) == "Position (159345975) is not valid on NC_000007.14" + + # invalid pos + with pytest.raises(ToCopyNumberError) as e: + test_cnv_handler._validate_ac_pos("NC_000007.14", 99999999999) + assert str(e.value) == "SeqRepo ValueError: Position out of range (99999999998)" + + +def test_get_vrs_loc_start_or_end(test_cnv_handler): + """Test that _get_vrs_loc_start_or_end works correctly""" + ac = "NC_000007.14" + pos0 = 140753336 + pos1 = 140753350 + + # Number start + resp = test_cnv_handler._get_vrs_loc_start_or_end( + ac, pos0, VRSTypes.NUMBER, is_start=True + ) + assert resp == models.Number(value=140753335, type="Number") + + # Number end + resp = test_cnv_handler._get_vrs_loc_start_or_end( + ac, pos0, VRSTypes.NUMBER, is_start=False + ) + assert resp == models.Number(value=140753336, type="Number") + + # Definite Range start + resp = test_cnv_handler._get_vrs_loc_start_or_end( + ac, pos0, VRSTypes.DEFINITE_RANGE, is_start=True, pos1=pos1 + ) + assert resp == models.DefiniteRange( + min=140753335, max=140753349, type="DefiniteRange" + ) + + # Definite Range end + resp = test_cnv_handler._get_vrs_loc_start_or_end( + ac, pos0, VRSTypes.DEFINITE_RANGE, is_start=False, pos1=pos1 + ) + assert resp == models.DefiniteRange( + min=140753337, max=140753351, type="DefiniteRange" + ) + + # Indefinite Range start + resp = test_cnv_handler._get_vrs_loc_start_or_end( + ac, + pos0, + VRSTypes.INDEFINITE_RANGE, + is_start=True, + comparator=Comparator.LT_OR_EQUAL, + ) + assert resp == models.IndefiniteRange( + comparator="<=", value=140753335, type="IndefiniteRange" + ) + + # Indefinite Range end + resp = test_cnv_handler._get_vrs_loc_start_or_end( + ac, + pos0, + VRSTypes.INDEFINITE_RANGE, + is_start=False, + comparator=Comparator.GT_OR_EQUAL, + ) + assert resp == models.IndefiniteRange( + comparator=">=", value=140753336, type="IndefiniteRange" + ) + + +def test_liftover_pos(test_cnv_handler): + """Test that _liftover_pos works correctly""" + resp = test_cnv_handler._liftover_pos("chr7", 140453136, 140453137, None, None) + assert resp == { + "start0": 140753336, + "end0": 140753337, + "start1": None, + "end1": None, + } + + resp = test_cnv_handler._liftover_pos( + "chr7", 140453136, 140453137, 140453138, 140453139 + ) + assert resp == { + "start0": 140753336, + "end0": 140753337, + "start1": 140753338, + "end1": 140753339, + } + + # invalid pos + with pytest.raises(ToCopyNumberError) as e: + test_cnv_handler._liftover_pos("chr7", 159345975, 159345976, None, None) + assert str(e.value) == "Unable to liftover: chr7 with pos 159345975" + + +def test_parsed_copy_number_gain(test_cnv_handler, cn_gain1, cn_gain2, cn_gain2_37): + """Test that parsed_to_copy_number works for parsed copy number gain queries""" # https://www.ncbi.nlm.nih.gov/clinvar/variation/145208/?new_evidence=true - resp = test_cnv_handler.parsed_to_cn_var( - 143134063, 143284670, 3, assembly=ClinVarAssembly.GRCH37, chr="chr1" + rb = ParsedToCnVarQuery( + start0=143134063, + end0=143284670, + copies0=3, + assembly=ClinVarAssembly.GRCH37, + chromosome="chr1", + start_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, ) - assert resp.copy_number_count.dict() == copy_number_gain1.dict() + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_count.dict() == cn_gain1.dict() assert resp.warnings == [] - resp = test_cnv_handler.parsed_to_cn_var( - 143134063, 143284670, 3, assembly=ClinVarAssembly.HG19, chr="chr1" + rb = ParsedToCnVarQuery( + start0=143134063, + end0=143284670, + copies0=3, + assembly=ClinVarAssembly.HG19, + chromosome="chr1", + start_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, ) - assert resp.copy_number_count.dict() == copy_number_gain1.dict() + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_count.dict() == cn_gain1.dict() assert resp.warnings == [] - resp = test_cnv_handler.parsed_to_cn_var( - 143134063, 143284670, 3, accession="NC_000001.10" + rb = ParsedToCnVarQuery( + start0=143134063, + end0=143284670, + copies0=3, + accession="NC_000001.10", + start_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, ) - assert resp.copy_number_count.dict() == copy_number_gain1.dict() + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_count.dict() == cn_gain1.dict() assert resp.warnings == [] # https://www.ncbi.nlm.nih.gov/clinvar/variation/146181/?new_evidence=true - resp = test_cnv_handler.parsed_to_cn_var( - 31738809, 32217725, 2, assembly=ClinVarAssembly.GRCH38, chr="chr15" + # 38 + rb = ParsedToCnVarQuery( + start0=31738809, + end0=32217725, + copies0=2, + assembly=ClinVarAssembly.GRCH38, + chromosome="chr15", + start_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, ) - assert resp.copy_number_count.dict() == copy_number_gain2.dict() + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_count.dict() == cn_gain2.dict() assert resp.warnings == [] - resp = test_cnv_handler.parsed_to_cn_var( - 31738809, 32217725, 2, assembly=ClinVarAssembly.GRCH38, chr="15" + # 38 with liftover (shouldnt do anything) + rb = ParsedToCnVarQuery( + start0=31738809, + end0=32217725, + copies0=2, + assembly=ClinVarAssembly.GRCH38, + chromosome="chr15", + do_liftover=True, + start_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, ) - assert resp.copy_number_count.dict() == copy_number_gain2.dict() + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_count.dict() == cn_gain2.dict() assert resp.warnings == [] - resp = test_cnv_handler.parsed_to_cn_var( - 31738809, 32217725, 2, assembly=ClinVarAssembly.HG38, chr="chr15" + # 38 with liftover (shouldnt do anything) + rb = ParsedToCnVarQuery( + start0=31738809, + end0=32217725, + copies0=2, + assembly=ClinVarAssembly.HG38, + chromosome="chr15", + do_liftover=True, + start_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, ) - assert resp.copy_number_count.dict() == copy_number_gain2.dict() + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_count.dict() == cn_gain2.dict() assert resp.warnings == [] - resp = test_cnv_handler.parsed_to_cn_var( - 31738809, 32217725, 2, accession="NC_000015.10" + # 38 + rb = ParsedToCnVarQuery( + start0=31738809, + end0=32217725, + copies0=2, + assembly=ClinVarAssembly.HG38, + chromosome="chr15", + start_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, ) - assert resp.copy_number_count.dict() == copy_number_gain2.dict() + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_count.dict() == cn_gain2.dict() assert resp.warnings == [] + # 38 accession + rb = ParsedToCnVarQuery( + start0=31738809, + end0=32217725, + copies0=2, + accession="NC_000015.10", + start_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_count.dict() == cn_gain2.dict() + assert resp.warnings == [] -def test_parsed_copy_number_loss( - test_cnv_handler, copy_number_loss1, copy_number_loss2 -): - """Test that parsed_to_cn_var works for parsed copy number loss queries""" - # https://www.ncbi.nlm.nih.gov/clinvar/variation/1299222/?new_evidence=true - resp = test_cnv_handler.parsed_to_cn_var( - 10491132, 10535643, 1, assembly=ClinVarAssembly.GRCH37, chr="chrX" + # 38 accession with liftover (shouldnt do anything) + rb = ParsedToCnVarQuery( + start0=31738809, + end0=32217725, + copies0=2, + accession="NC_000015.10", + start_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, ) - assert resp.copy_number_count.dict() == copy_number_loss1.dict() + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_count.dict() == cn_gain2.dict() assert resp.warnings == [] - resp = test_cnv_handler.parsed_to_cn_var( - 10491132, 10535643, 1, assembly=ClinVarAssembly.HG19, chr="chrX" + # 37 with liftover + rb = ParsedToCnVarQuery( + start0=32031012, + end0=32509926, + copies0=2, + accession="NC_000015.9", + do_liftover=True, + start_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, ) - assert resp.copy_number_count.dict() == copy_number_loss1.dict() + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_count.dict() == cn_gain2.dict() assert resp.warnings == [] - resp = test_cnv_handler.parsed_to_cn_var( - 10491132, 10535643, 1, assembly=ClinVarAssembly.HG19, chr="X" + # 37 chr+accession with liftover + rb = ParsedToCnVarQuery( + start0=32031012, + end0=32509926, + copies0=2, + chromosome="chr15", + assembly=ClinVarAssembly.GRCH37, + do_liftover=True, + start_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, ) - assert resp.copy_number_count.dict() == copy_number_loss1.dict() + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_count.dict() == cn_gain2.dict() assert resp.warnings == [] - resp = test_cnv_handler.parsed_to_cn_var( - 10491132, 10535643, 1, accession="NC_000023.10" + # 37 with no liftover + rb = ParsedToCnVarQuery( + start0=32031012, + end0=32509926, + copies0=2, + accession="NC_000015.9", + start_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_count.dict() == cn_gain2_37.dict() + assert resp.warnings == [] + + # 37 chr+accession with no liftover + rb = ParsedToCnVarQuery( + start0=32031012, + end0=32509926, + copies0=2, + chromosome="chr15", + assembly=ClinVarAssembly.GRCH37, + start_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_count.dict() == cn_gain2_37.dict() + assert resp.warnings == [] + + +def test_parsed_copy_number_loss(test_cnv_handler, cn_loss1, cn_loss2): + """Test that parsed_to_copy_number works for parsed copy number loss queries""" + # https://www.ncbi.nlm.nih.gov/clinvar/variation/1299222/?new_evidence=true + rb = ParsedToCnVarQuery( + start0=10491132, + end0=10535643, + copies0=1, + assembly=ClinVarAssembly.GRCH37, + chromosome="chrX", + start_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, ) - assert resp.copy_number_count.dict() == copy_number_loss1.dict() + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_count.dict() == cn_loss1.dict() + assert resp.warnings == [] + + rb = ParsedToCnVarQuery( + start0=10491132, + end0=10535643, + copies0=1, + assembly=ClinVarAssembly.HG19, + chromosome="chrX", + start_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_count.dict() == cn_loss1.dict() + assert resp.warnings == [] + + rb = ParsedToCnVarQuery( + start0=10491132, + end0=10535643, + copies0=1, + accession="NC_000023.10", + start_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_count.dict() == cn_loss1.dict() assert resp.warnings == [] # https://www.ncbi.nlm.nih.gov/clinvar/variation/148425/?new_evidence=true - resp = test_cnv_handler.parsed_to_cn_var( - 10001, 1223133, 0, assembly=ClinVarAssembly.GRCH38, chr="chrY" + rb = ParsedToCnVarQuery( + start0=10001, + end0=1223133, + copies0=0, + assembly=ClinVarAssembly.GRCH38, + chromosome="chrY", + start_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_count.dict() == cn_loss2.dict() + assert resp.warnings == [] + + rb = ParsedToCnVarQuery( + start0=10001, + end0=1223133, + copies0=0, + assembly=ClinVarAssembly.HG38, + chromosome="chrY", + start_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_count.dict() == cn_loss2.dict() + assert resp.warnings == [] + + rb = ParsedToCnVarQuery( + start0=10001, + end0=1223133, + copies0=0, + accession="NC_000024.10", + start_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_count.dict() == cn_loss2.dict() + assert resp.warnings == [] + + +def test_to_parsed_cn_var(test_cnv_handler, cn_definite_number): + """Test that parsed_to_copy_number works correctly for copy number count""" + # start uses definite and end uses number + rb = ParsedToCnVarQuery( + start0=143134063, + end0=143284670, + copies0=3, + assembly=ClinVarAssembly.GRCH37, + chromosome="chr1", + start_pos_type=VRSTypes.DEFINITE_RANGE, + start1=143134065, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_count.dict() == cn_definite_number.dict() + assert resp.warnings == [] + + # copies is definite range + rb = ParsedToCnVarQuery( + start0=143134063, + end0=143284670, + copies0=3, + copies1=5, + copies_type=VRSTypes.DEFINITE_RANGE, + assembly=ClinVarAssembly.GRCH37, + chromosome="chr1", + start_pos_type=VRSTypes.DEFINITE_RANGE, + start1=143134065, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + expected = deepcopy(cn_definite_number.dict(by_alias=True)) + expected["copies"] = {"type": "DefiniteRange", "min": 3, "max": 5} + expected["_id"] = "ga4gh:CN.a4afOf1fvsbTeJchw0-Pu0IKcl3qg4Gx" + assert resp.copy_number_count.dict(by_alias=True) == expected + assert resp.warnings == [] + + # copies is indefinite range <= + rb = ParsedToCnVarQuery( + start0=143134063, + end0=143284670, + copies0=3, + copies_comparator=Comparator.LT_OR_EQUAL, + copies_type=VRSTypes.INDEFINITE_RANGE, + assembly=ClinVarAssembly.GRCH37, + chromosome="chr1", + start_pos_type=VRSTypes.DEFINITE_RANGE, + start1=143134065, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + expected = deepcopy(cn_definite_number.dict(by_alias=True)) + expected["copies"] = {"type": "IndefiniteRange", "comparator": "<=", "value": 3} + expected["_id"] = "ga4gh:CN.-Mzi9_FMDbTCSxaiK_FeScOWL4Hk_ewE" + assert resp.copy_number_count.dict(by_alias=True) == expected + assert resp.warnings == [] + + # copies is indefinite range >= + rb = ParsedToCnVarQuery( + start0=143134063, + end0=143284670, + copies0=3, + copies_comparator=Comparator.GT_OR_EQUAL, + copies_type=VRSTypes.INDEFINITE_RANGE, + assembly=ClinVarAssembly.GRCH37, + chromosome="chr1", + start_pos_type=VRSTypes.DEFINITE_RANGE, + start1=143134065, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + expected = deepcopy(cn_definite_number.dict(by_alias=True)) + expected["copies"] = {"type": "IndefiniteRange", "comparator": ">=", "value": 3} + expected["_id"] = "ga4gh:CN.y6QsEjqm13HDr7OpV0tcHvX_7vTpNiuO" + assert resp.copy_number_count.dict(by_alias=True) == expected + assert resp.warnings == [] + + # start_pos and end_pos indefinite range + rb = ParsedToCnVarQuery( + start0=143134063, + end0=143284670, + copies0=3, + assembly=ClinVarAssembly.GRCH37, + chromosome="chr1", + start_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.GT_OR_EQUAL, + end_pos_comparator=Comparator.LT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_count.dict(by_alias=True) == { + "type": "CopyNumberCount", + "_id": "ga4gh:CN.mrldOoV1cPJScJNU-q9gWxxviJdjYYyA", + "subject": { + "type": "SequenceLocation", + "_id": "ga4gh:VSL.pDAsXSkrLzEor_c1mbo257uKIt0Ii9sX", + "sequence_id": "ga4gh:SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU", + "interval": { + "type": "SequenceInterval", + "start": { + "type": "IndefiniteRange", + "value": 143134062, + "comparator": ">=", + }, + "end": { + "type": "IndefiniteRange", + "value": 143284670, + "comparator": "<=", + }, + }, + }, + "copies": {"type": "Number", "value": 3}, + } + + +def test_parsed_to_cx_var( + test_cnv_handler, + cx_numbers, + cx_definite_ranges, + cx_indefinite_ranges, + cx_number_indefinite, +): + """Test that parsed_to_copy_number works for copy number change""" + # start and end use number + rb = ParsedToCxVarQuery( + start0=10001, + end0=1223133, + copy_change=CopyChange.COMPLETE_GENOMIC_LOSS, + assembly=ClinVarAssembly.GRCH38, + chromosome="chrY", + start_pos_type=VRSTypes.NUMBER, + end_pos_type=VRSTypes.NUMBER, ) - assert resp.copy_number_count.dict() == copy_number_loss2.dict() + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_change.dict() == cx_numbers.dict() assert resp.warnings == [] - resp = test_cnv_handler.parsed_to_cn_var( - 10001, 1223133, 0, assembly=ClinVarAssembly.HG38, chr="chrY" + # start and end use definite ranges + rb = ParsedToCxVarQuery( + start0=10001, + end0=1223130, + copy_change=CopyChange.COMPLETE_GENOMIC_LOSS, + assembly=ClinVarAssembly.GRCH38, + chromosome="chrY", + start_pos_type=VRSTypes.DEFINITE_RANGE, + end_pos_type=VRSTypes.DEFINITE_RANGE, + start1=10006, + end1=1223133, ) - assert resp.copy_number_count.dict() == copy_number_loss2.dict() + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_change.dict() == cx_definite_ranges.dict() assert resp.warnings == [] - resp = test_cnv_handler.parsed_to_cn_var( - 10001, 1223133, 0, accession="NC_000024.10" + # start and end use indefinite ranges + rb = ParsedToCxVarQuery( + start0=10001, + end0=1223130, + copy_change=CopyChange.COMPLETE_GENOMIC_LOSS, + assembly=ClinVarAssembly.GRCH38, + chromosome="chrY", + start_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, ) - assert resp.copy_number_count.dict() == copy_number_loss2.dict() + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_change.dict() == cx_indefinite_ranges.dict() + assert resp.warnings == [] + + # start uses number and end use indefinite range + rb = ParsedToCxVarQuery( + start0=10001, + end0=1223130, + copy_change=CopyChange.COMPLETE_GENOMIC_LOSS, + assembly=ClinVarAssembly.GRCH38, + chromosome="chrY", + start_pos_type=VRSTypes.NUMBER, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_change.dict() == cx_number_indefinite.dict() assert resp.warnings == [] def test_invalid(test_cnv_handler): - """Test invalid queries returns Text variation and warnings""" + """Test invalid copy number queries returns Text variation and warnings""" + # Invalid Copy Change + with pytest.raises(ValidationError) as e: + ParsedToCxVarQuery( + start0=10491132, + end0=10535643, + copy_change="efo:1234", + accession="NC_000001.10", + ) + assert "value is not a valid enumeration member" in str(e.value) + # NCBI36/hg18 assembly - # https://www.ncbi.nlm.nih.gov/clinvar/variation/443961/?new_evidence=true - expected_w = ["NCBI36 assembly is not currently supported"] - resp = test_cnv_handler.parsed_to_cn_var( - 2623228, - 3150942, - 3, + rb = ParsedToCxVarQuery( + start0=2623228, + end0=3150942, + copy_change=CopyChange.GAIN, assembly=ClinVarAssembly.NCBI36, - chr="chr1", + chromosome="chr1", untranslatable_returns_text=True, ) - assert resp.copy_number_count.type == "Text" - assert resp.warnings == expected_w + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_change.type == "Text" + assert resp.warnings == ["NCBI36 assembly is not currently supported"] - resp = test_cnv_handler.parsed_to_cn_var( - 2623228, - 3150942, - 3, + rb = ParsedToCxVarQuery( + start0=2623228, + end0=3150942, + copy_change=CopyChange.GAIN, assembly=ClinVarAssembly.HG18, - chr="chr1", + chromosome="chr1", untranslatable_returns_text=True, ) - assert resp.copy_number_count.type == "Text" - assert resp.warnings == expected_w + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_change.type == "Text" + assert resp.warnings == ["hg18 assembly is not currently supported"] - # Must give both assembly + chr or accession - expected_w = ["Must provide either `accession` or both `assembly` and `chr`."] - resp = test_cnv_handler.parsed_to_cn_var( - 31738809, - 32217725, - 2, - assembly=ClinVarAssembly.HG38, - untranslatable_returns_text=True, - ) - assert resp.copy_number_count.type == "Text" - assert resp.warnings == expected_w + # Must give both assembly + chromosome or accession + ac_assembly_chr_msg = "Must provide either `accession` or both `assembly` and `chromosome`" # noqa: E501 + with pytest.raises(ValidationError) as e: + ParsedToCxVarQuery( + start0=31738809, + end0=32217725, + copy_change=CopyChange.GAIN, + assembly="hg38", + untranslatable_returns_text=True, + ) + assert ac_assembly_chr_msg in str(e.value) - resp = test_cnv_handler.parsed_to_cn_var( - 31738809, 32217725, 2, chr="chr15", untranslatable_returns_text=True - ) - assert resp.copy_number_count.type == "Text" - assert resp.warnings == expected_w + # Must give both assembly + chromosome or accession + with pytest.raises(ValidationError) as e: + ParsedToCxVarQuery( + start0=31738809, + end0=32217725, + copy_change=CopyChange.GAIN, + chromosome="chr15", + untranslatable_returns_text=True, + ) + assert ac_assembly_chr_msg in str(e.value) - resp = test_cnv_handler.parsed_to_cn_var( - 31738809, 32217725, 2, untranslatable_returns_text=True - ) - assert resp.copy_number_count.type == "Text" - assert resp.warnings == expected_w + # Must give both assembly + chromosome or accession + with pytest.raises(ValidationError) as e: + ParsedToCxVarQuery( + start0=31738809, + end0=32217725, + copy_change=CopyChange.GAIN, + untranslatable_returns_text=True, + ) + assert ac_assembly_chr_msg in str(e.value) - # invalid chr - resp = test_cnv_handler.parsed_to_cn_var( - 10001, 1223133, 0, assembly=ClinVarAssembly.GRCH38, chr="z" + # invalid chromosome + with pytest.raises(ValidationError) as e: + ParsedToCxVarQuery( + start0=10001, + end0=1223133, + copy_change=CopyChange.GAIN, + assembly=ClinVarAssembly.GRCH38, + chromosome="z", + ) + assert "`chromosome`, z, does not match r'^chr(X|Y|([1-9]|1[0-9]|2[0-2]))$'" in str( + e.value ) - assert resp.copy_number_count is None - assert resp.warnings == [ - "SeqRepo unable to get translated identifiers for GRCh38:z" - ] # invalid assembly - resp = test_cnv_handler.parsed_to_cn_var(10001, 1223133, 0, assembly="GRCh99") - assert resp.copy_number_count is None - assert resp.warnings + with pytest.raises(ValidationError) as e: + ParsedToCxVarQuery( + start0=10001, end0=1223133, copy_change=CopyChange.GAIN, assembly="GRCh99" + ) + assert "value is not a valid enumeration member" in str(e.value) # invalid accession - resp = test_cnv_handler.parsed_to_cn_var( - 10491132, 10535643, 1, accession="NC_00002310" + rb = ParsedToCxVarQuery( + start0=10491132, + end0=10535643, + copy_change=CopyChange.GAIN, + accession="NC_00002310", ) - assert resp.copy_number_count is None + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_change is None assert resp.warnings == [ "SeqRepo unable to get translated identifiers for NC_00002310" ] # Invalid position - resp = test_cnv_handler.parsed_to_cn_var( - 31738809, 2302991250, 2, accession="NC_000015.10" + rb = ParsedToCxVarQuery( + start0=31738809, + end0=2302991250, + copy_change=CopyChange.GAIN, + accession="NC_000015.10", + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_change is None + assert resp.warnings == ["SeqRepo ValueError: Position out of range (2302991249)"] + + # start must be less than end + with pytest.raises(ValidationError) as e: + ParsedToCxVarQuery( + start0=10001, + end0=1223130, + copy_change=CopyChange.COMPLETE_GENOMIC_LOSS, + assembly=ClinVarAssembly.GRCH38, + chromosome="chrY", + start_pos_type=VRSTypes.DEFINITE_RANGE, + end_pos_type=VRSTypes.DEFINITE_RANGE, + start1=1223132, + end1=1223133, + ) + assert "end positions must be greater than start" in str(e.value) + + # start1 not provided + with pytest.raises(ValidationError) as e: + ParsedToCxVarQuery( + start0=10001, + end0=1223130, + copy_change=CopyChange.COMPLETE_GENOMIC_LOSS, + assembly=ClinVarAssembly.GRCH38, + chromosome="chrY", + start_pos_type=VRSTypes.DEFINITE_RANGE, + ) + assert "`start1` is required for definite ranges" in str(e.value) + + # copies1 not provided when copies_type is DefiniteRange + with pytest.raises(ValidationError) as e: + ParsedToCnVarQuery( + start0=143134063, + end0=143284670, + copies0=3, + copies_type=VRSTypes.DEFINITE_RANGE, + assembly=ClinVarAssembly.GRCH37, + chromosome="chr1", + start_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + assert "`copies1` must be provided for `copies_type == DefiniteRange`" in str( + e.value ) - assert resp.copy_number_count is None - assert resp.warnings == ["Position out of range (2302991250)"] + + # copies_comparator not provided when copies_type is IndefiniteRange + with pytest.raises(ValidationError) as e: + ParsedToCnVarQuery( + start0=143134063, + end0=143284670, + copies0=3, + copies_type=VRSTypes.INDEFINITE_RANGE, + assembly=ClinVarAssembly.GRCH37, + chromosome="chr1", + start_pos_type=VRSTypes.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + assert ( + "`copies_comparator` must be provided for `copies_type == IndefiniteRange`" + in str(e.value) + ) + + # `start_pos_comparator` not provided when start_pos_type is Indefinite Range + with pytest.raises(ValidationError) as e: + ParsedToCnVarQuery( + start0=31738809, + end0=32217725, + copies0=2, + assembly=ClinVarAssembly.GRCH38, + chromosome="chr15", + start_pos_type=VRSTypes.INDEFINITE_RANGE, + end_pos_type=VRSTypes.NUMBER, + ) + assert "`start_pos_comparator` is required for indefinite ranges" in str(e.value) + + # `end_pos_comparator` not provided when end_pos_type is Indefinite Range + with pytest.raises(ValidationError) as e: + ParsedToCnVarQuery( + start0=31738809, + end0=32217725, + copies0=2, + assembly=ClinVarAssembly.GRCH38, + chromosome="chr15", + start_pos_type=VRSTypes.NUMBER, + end_pos_type=VRSTypes.INDEFINITE_RANGE, + ) + assert "`end_pos_comparator` is required for indefinite ranges" in str(e.value) diff --git a/variation/main.py b/variation/main.py index 9574cc39..6b4be334 100644 --- a/variation/main.py +++ b/variation/main.py @@ -1,4 +1,5 @@ """Main application for FastAPI.""" +import traceback from datetime import datetime from enum import Enum from typing import List, Literal, Optional, Union @@ -17,6 +18,13 @@ from variation import logger from variation.query import QueryHandler from variation.schemas import NormalizeService, ServiceMeta, ToVRSService +from variation.schemas.copy_number_schema import ( + AmplificationToCxVarService, + ParsedToCnVarQuery, + ParsedToCnVarService, + ParsedToCxVarQuery, + ParsedToCxVarService, +) from variation.schemas.hgvs_to_copy_number_schema import ( HgvsToCopyNumberChangeService, HgvsToCopyNumberCountService, @@ -26,9 +34,7 @@ TranslateIdentifierService, ) from variation.schemas.service_schema import ( - AmplificationToCxVarService, ClinVarAssembly, - ParsedToCnVarService, ToCdnaService, ToGenomicService, ) @@ -567,66 +573,70 @@ async def hgvs_to_copy_number_change( return resp -assembly_descr = ( - "Assembly. If `accession` is set, will ignore `assembly` and `chr`. " - "If `accession` not set, must provide both `assembly` and `chr`." -) -chr_descr = "Chromosome. Must set when `assembly` is set." -accession_descr = ( - "Accession. If `accession` is set, will ignore `assembly` and " - "`chr`. If `accession` not set, must provide both `assembly` and `chr`." -) -start_descr = "Start position as residue coordinate" -end_descr = "End position as residue coordinate" -total_copies_descr = "Total copies for Copy Number Count variation object" - - -@app.get( +@app.post( "/variation/parsed_to_cn_var", - summary="Given parsed ClinVar Copy Number Gain/Loss components, return " - "VRS Copy Number Count Variation", + summary="Given parsed genomic components, return VRS Copy Number Count " + "Variation", response_description="A response to a validly-formed query.", description="Return VRS Copy Number Count Variation", response_model=ParsedToCnVarService, tags=[Tag.TO_COPY_NUMBER_VARIATION], ) -def parsed_to_cn_var( - assembly: Optional[ClinVarAssembly] = Query(None, description=assembly_descr), - chr: Optional[str] = Query(None, description=chr_descr), - accession: Optional[str] = Query(None, description=accession_descr), - start: int = Query(..., description=start_descr), - end: int = Query(..., description=end_descr), - total_copies: int = Query(..., description=total_copies_descr), - untranslatable_returns_text: bool = Query(False, description=untranslatable_descr), -) -> ParsedToCnVarService: - """Given parsed ClinVar Copy Number Gain/Loss components, return Copy Number Count - Variation - - :param int start: Start position as residue coordinate - :param int end: End position as residue coordinate - :param int total_copies: Total copies for Copy Number Count variation object - :param Optional[ClinVarAssembly] assembly: Assembly. If `accession` is set, - will ignore `assembly` and `chr`. If `accession` not set, must provide - both `assembly` and `chr`. - :param Optional[str] chr: Chromosome. Must set when `assembly` is set. - :param Optional[str] accession: Accession. If `accession` is set, - will ignore `assembly` and `chr`. If `accession` not set, must provide - both `assembly` and `chr`. - :param bool untranslatable_returns_text: `True` return VRS Text Object when - unable to translate or normalize query. `False` return `None` when - unable to translate or normalize query. - :return: Tuple containing Copy Number Count variation and list of warnings +def parsed_to_cn_var(request_body: ParsedToCnVarQuery) -> ParsedToCnVarService: + """Given parsed genomic components, return Copy Number Count Variation. + + :param request_body: Request body + :return: ParsedToCnVarService containing Copy Number Count variation and list of + warnings """ - resp = query_handler.to_copy_number_handler.parsed_to_cn_var( - start, - end, - total_copies, - assembly, - chr, - accession, - untranslatable_returns_text=untranslatable_returns_text, - ) - return resp + try: + resp = query_handler.to_copy_number_handler.parsed_to_copy_number(request_body) + except Exception: + traceback_resp = traceback.format_exc().splitlines() + logger.exception(traceback_resp) + + return ParsedToCnVarService( + copy_number_count=None, + warnings=["Unhandled exception. See logs for more details."], + service_meta_=ServiceMeta( + version=__version__, response_datetime=datetime.now() + ), + ) + else: + return resp + + +@app.post( + "/variation/parsed_to_cx_var", + summary="Given parsed genomic components, return VRS Copy Number Change " + "Variation", + response_description="A response to a validly-formed query.", + description="Return VRS Copy Number Change Variation", + response_model=ParsedToCxVarService, + tags=[Tag.TO_COPY_NUMBER_VARIATION], +) +def parsed_to_cx_var(request_body: ParsedToCxVarQuery) -> ParsedToCxVarService: + """Given parsed genomic components, return Copy Number Change Variation + + :param request_body: Request body + :return: ParsedToCxVarService containing Copy Number Change variation and list of + warnings + """ + try: + resp = query_handler.to_copy_number_handler.parsed_to_copy_number(request_body) + except Exception: + traceback_resp = traceback.format_exc().splitlines() + logger.exception(traceback_resp) + + return ParsedToCxVarService( + copy_number_count=None, + warnings=["Unhandled exception. See logs for more details."], + service_meta_=ServiceMeta( + version=__version__, response_datetime=datetime.now() + ), + ) + else: + return resp amplification_to_cx_var_descr = ( diff --git a/variation/query.py b/variation/query.py index 6dbda340..f25cff60 100644 --- a/variation/query.py +++ b/variation/query.py @@ -91,5 +91,5 @@ def __init__( *to_protein_params ) self.to_copy_number_handler = ToCopyNumberVariation( - *to_vrs_params + [gene_query_handler] + *to_vrs_params + [gene_query_handler, uta_db] ) diff --git a/variation/schemas/copy_number_schema.py b/variation/schemas/copy_number_schema.py new file mode 100644 index 00000000..8d0ccbe9 --- /dev/null +++ b/variation/schemas/copy_number_schema.py @@ -0,0 +1,448 @@ +"""Module containing schemas for services""" +import re +from enum import Enum +from typing import Any, Dict, Literal, Optional, Type, Union + +from ga4gh.vrsatile.pydantic.vrs_models import ( + Comparator, + CopyChange, + CopyNumberChange, + CopyNumberCount, + SequenceLocation, + Text, + VRSTypes, +) +from pydantic import BaseModel, Field, StrictBool, StrictInt, StrictStr, root_validator +from pydantic.main import ModelMetaclass + +from variation.schemas.normalize_response_schema import ServiceResponse +from variation.version import __version__ + + +class ClinVarAssembly(str, Enum): + """Define assemblies in ClinVar""" + + GRCH38 = "GRCh38" + GRCH37 = "GRCh37" + NCBI36 = "NCBI36" + HG38 = "hg38" + HG19 = "hg19" + HG18 = "hg18" + + +def validate_parsed_fields(cls: ModelMetaclass, v: Dict) -> Dict: + """Validate base copy number query fields + - `accession` or both `assembly` and `chromosome` must be provided + - `start1` is required when `start_pos_type` is a definite + range. + - `end1` is required when `end_pos_type` is a Definite Range. + - `start_pos_comparator` is required when `start_pos_type` is an Indefinite + Range + - `end_pos_comparator` is required when `end_pos_type` is an Indefinite Range + - End positions must be greater than start positions + """ + ac_assembly_chr_msg = "Must provide either `accession` or both `assembly` and `chromosome`" # noqa: E501 + assembly = v.get("assembly") + chromosome = v.get("chromosome") + assembly_chr_set = assembly and chromosome + assert v.get("accession") or assembly_chr_set, ac_assembly_chr_msg + + if assembly_chr_set: + pattern = r"^chr(X|Y|([1-9]|1[0-9]|2[0-2]))$" + assert re.match( + pattern, chromosome + ), f"`chromosome`, {chromosome}, does not match r'{pattern}'" + + start0 = v["start0"] + start1 = v.get("start1") + if v["start_pos_type"] == VRSTypes.DEFINITE_RANGE: + assert start1 is not None, "`start1` is required for definite ranges" + assert start1 > start0, "`start0` must be less than `start1`" + elif v["start_pos_type"] == VRSTypes.INDEFINITE_RANGE: + assert v.get( + "start_pos_comparator" + ), "`start_pos_comparator` is required for indefinite ranges" + + end0 = v["end0"] + end1 = v.get("end1") + if v["end_pos_type"] == VRSTypes.DEFINITE_RANGE: + assert end1 is not None, "`end1` is required for definite ranges" + assert end1 > end0, "`end0` must be less than `end1`" + elif v["end_pos_type"] == VRSTypes.INDEFINITE_RANGE: + assert v.get( + "end_pos_comparator" + ), "`end_pos_comparator` is required for indefinite ranges" + + err_msg = "end positions must be greater than start" + if start1 is None: + assert end0 > start0, err_msg + else: + assert end0 > start1, err_msg + + +class ParsedToCopyNumberQuery(BaseModel): + """Define base model for parsed to copy number queries""" + + assembly: Optional[ClinVarAssembly] = Field( + None, + description=( + "Assembly. Ignored, along with `chromosome`, if `accession` is " "provided." + ), + ) + chromosome: Optional[StrictStr] = Field( + None, + description=( + "Chromosome. Must contain `chr` prefix, i.e. 'chr7'. Must provide " + "when `assembly` is provided." + ), + ) + accession: Optional[StrictStr] = Field( + None, + description=( + "Genomic RefSeq accession. If `accession` is provided, will " + "ignore `assembly` and `chromosome`. If `accession` is not " + "provided, must provide both `assembly` and `chromosome`." + ), + ) + start0: StrictInt = Field( + ..., + description=( + "Start position (residue coords). If `start_pos_type` is a " + "Definite Range, this will be the min start position." + ), + ) + end0: StrictInt = Field( + ..., + description=( + "End position (residue coords). If `end_pos_type` is a definite " + "range, this will be the min end position." + ), + ) + start_pos_comparator: Optional[Comparator] = Field( + None, + description=( + "Must provide when `start_pos_type` is an Indefinite Range. " + "Indicates which direction the range is indefinite. To represent " + "(#_?), set to '<='. To represent (?_#), set to '>='." + ), + ) + end_pos_comparator: Optional[Comparator] = Field( + None, + description=( + "Must provide when `end_pos_type` is an Indefinite Range. " + "Indicates which direction the range is indefinite. To represent " + "(#_?), set to '<='. To represent (?_#), set to '>='." + ), + ) + start_pos_type: Literal[ + VRSTypes.NUMBER, VRSTypes.DEFINITE_RANGE, VRSTypes.INDEFINITE_RANGE + ] = Field( + VRSTypes.NUMBER, + description="The type of the start value in the VRS SequenceLocation", + ) + end_pos_type: Literal[ + VRSTypes.NUMBER, VRSTypes.DEFINITE_RANGE, VRSTypes.INDEFINITE_RANGE + ] = Field( + VRSTypes.NUMBER, description="Type of the end value in the VRS SequenceLocation" + ) + start1: Optional[StrictInt] = Field( + None, + description=( + "Only provided when `start_pos_type` is a Definite Range, this " + "will be the max start position." + ), + ) + end1: Optional[StrictInt] = Field( + None, + description=( + "Only provided when `end_pos_type` is a Definite Range, this " + "will be the max end position." + ), + ) + do_liftover: StrictBool = Field( + False, description="Whether or not to liftover to GRCh38 assembly" + ) + untranslatable_returns_text: StrictBool = Field( + False, + description=( + "When set to `True`, return VRS Text Object when unable to " + "translate or normalize query. When set to `False`, return `None` " + "when unable to translate or normalize query." + ), + ) + + +class ParsedToCnVarQuery(ParsedToCopyNumberQuery): + """Define query for parsed to copy number count variation endpoint""" + + copies0: StrictInt = Field( + ..., + description=( + "Number of copies. When `copies_type` is a Number or Indefinite " + "Range, this will be the `value` for copies. When `copies_type` " + "is an Definite Range, this will be the `min` copies." + ), + ) + copies1: Optional[StrictInt] = Field( + None, + description=( + "Must provide when `copies_type` is a Definite Range. This will " + "be the `max` copies." + ), + ) + copies_type: Literal[ + VRSTypes.NUMBER, VRSTypes.DEFINITE_RANGE, VRSTypes.INDEFINITE_RANGE + ] = Field(VRSTypes.NUMBER, description="Type for the `copies` in the `subject`") + copies_comparator: Optional[Comparator] = Field( + None, + description=( + "Must provide when `copies_type` is an Indefinite Range. " + "Indicates which direction the range is indefinite." + ), + ) + + @root_validator(pre=False, skip_on_failure=True) + def validate_fields(cls: ModelMetaclass, v: Dict) -> Dict: + """Validate fields. + + - `copies1` should exist when `copies_type == VRSTypes.DEFINITE_RANGE` + - `copies_comparator` should exist when + `copies_type == VRSTypes.INDEFINITE_RANGE` + """ + validate_parsed_fields(cls, v) + copies1 = v.get("copies1") + copies_type = v.get("copies_type") + copies_comparator = v.get("copies_comparator") + + if copies_type == VRSTypes.DEFINITE_RANGE: + assert ( + copies1 + ), "`copies1` must be provided for `copies_type == DefiniteRange`" + elif copies_type == VRSTypes.INDEFINITE_RANGE: + assert ( + copies_comparator + ), "`copies_comparator` must be provided for `copies_type == IndefiniteRange`" # noqa: E501 + + return v + + class Config: + """Configure model.""" + + @staticmethod + def schema_extra( + schema: Dict[str, Any], model: Type["ParsedToCnVarQuery"] + ) -> None: + """Configure OpenAPI schema.""" + if "title" in schema.keys(): + schema.pop("title", None) + for prop in schema.get("properties", {}).values(): + prop.pop("title", None) + schema["example"] = { + "assembly": "GRCh37", + "chromosome": "chr1", + "accession": None, + "start0": 143134063, + "end0": 143284670, + "copies0": 3, + "copies1": None, + "copies_comparator": None, + "copies_type": "Number", + "start_pos_comparator": "<=", + "end_pos_comparator": ">=", + "start_pos_type": "IndefiniteRange", + "end_pos_type": "IndefiniteRange", + "start1": None, + "end1": None, + "do_liftover": False, + "untranslatable_returns_text": False, + } + + +class ParsedToCnVarService(ServiceResponse): + """A response for translating parsed components to Copy Number Count""" + + copy_number_count: Optional[Union[Text, CopyNumberCount]] + + class Config: + """Configure model.""" + + @staticmethod + def schema_extra( + schema: Dict[str, Any], model: Type["ParsedToCnVarService"] + ) -> None: + """Configure OpenAPI schema.""" + if "title" in schema.keys(): + schema.pop("title", None) + for prop in schema.get("properties", {}).values(): + prop.pop("title", None) + schema["example"] = { + "copy_number_count": { + "_id": "ga4gh:CN.N6C9rWBjrNuiIhJkPxdPlRKvSGKoFynr", + "type": "CopyNumberCount", + "subject": { + "_id": "ga4gh:VSL.JTsxd9PiPZaIPL9Tl3ss78GYYnDeogvf", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU", + "interval": { + "start": { + "type": "IndefiniteRange", + "value": 143134062, + "comparator": "<=", + }, + "end": { + "type": "IndefiniteRange", + "value": 143284670, + "comparator": ">=", + }, + }, + }, + "copies": {"type": "Number", "value": 3}, + }, + "service_meta_": { + "name": "variation-normalizer", + "version": "0.2.17", + "response_datetime": "2022-01-26T22:23:41.821673", + "url": "https://github.com/cancervariants/variation-normalization", + }, + } + + +class ParsedToCxVarQuery(ParsedToCopyNumberQuery): + """Define query for parsed to copy number change variation endpoint""" + + copy_change: CopyChange + + @root_validator(pre=False, skip_on_failure=True) + def validate_fields(cls: ModelMetaclass, v: Dict) -> Dict: + """Validate fields""" + validate_parsed_fields(cls, v) + return v + + class Config: + """Configure model.""" + + @staticmethod + def schema_extra( + schema: Dict[str, Any], model: Type["ParsedToCxVarQuery"] + ) -> None: + """Configure OpenAPI schema.""" + if "title" in schema.keys(): + schema.pop("title", None) + for prop in schema.get("properties", {}).values(): + prop.pop("title", None) + schema["example"] = { + "assembly": "GRCh38", + "chromosome": "chrY", + "accession": None, + "start0": 10001, + "end0": 1223133, + "copy_change": "efo:0030069", + "start_pos_type": "Number", + "end_pos_type": "Number", + "start1": None, + "end1": None, + "do_liftover": False, + "untranslatable_returns_text": False, + } + + +class ParsedToCxVarService(ServiceResponse): + """A response for translating parsed components to Copy Number Change""" + + copy_number_change: Optional[Union[Text, CopyNumberChange]] + + class Config: + """Configure model.""" + + @staticmethod + def schema_extra( + schema: Dict[str, Any], model: Type["ParsedToCxVarService"] + ) -> None: + """Configure OpenAPI schema.""" + if "title" in schema.keys(): + schema.pop("title", None) + for prop in schema.get("properties", {}).values(): + prop.pop("title", None) + schema["example"] = { + "copy_number_change": { + "type": "CopyNumberChange", + "_id": "ga4gh:CX.KYAQwf8-DQu23LsDbFHP0BiRzrCmu46x", + "subject": { + "type": "SequenceLocation", + "_id": "ga4gh:VSL.djv1Oq_qNjDialZakqQGoUAJvohREPBL", + "sequence_id": "ga4gh:SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", + "interval": { + "start": {"type": "Number", "value": 10000}, + "end": {"type": "Number", "value": 1223133}, + }, + }, + "copy_change": "efo:0030069", + }, + "service_meta_": { + "name": "variation-normalizer", + "version": __version__, + "response_datetime": "2022-01-26T22:23:41.821673", + "url": "https://github.com/cancervariants/variation-normalization", + }, + } + + +class AmplificationToCxVarQuery(BaseModel): + """Define query for amplification to copy number change variation endpoint""" + + gene: str + sequence_id: Optional[str] + start: Optional[int] + end: Optional[int] + sequence_location: Optional[SequenceLocation] + + +class AmplificationToCxVarService(ServiceResponse): + """A response for translating Amplification queries to Copy Number Change""" + + query: Optional[AmplificationToCxVarQuery] = None + amplification_label: Optional[str] + copy_number_change: Optional[Union[Text, CopyNumberChange]] + + class Config: + """Configure model.""" + + @staticmethod + def schema_extra( + schema: Dict[str, Any], model: Type["AmplificationToCxVarService"] + ) -> None: + """Configure OpenAPI schema.""" + if "title" in schema.keys(): + schema.pop("title", None) + for prop in schema.get("properties", {}).values(): + prop.pop("title", None) + schema["example"] = { + "query": { + "gene": "braf", + "sequence_id": None, + "start": None, + "end": None, + "sequence_location": None, + }, + "amplification_label": "BRAF Amplification", + "copy_number_change": { + "_id": "ga4gh:CX.TZBOQe5xFojvFJ1XjQQD0633rStHtGUs", + "type": "CopyNumberChange", + "subject": { + "_id": "ga4gh:VSL.xZU3kL8F6t2ca6WH_26CWKfNW9-owhR4", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + "interval": { + "start": {"type": "Number", "value": 140713327}, + "end": {"type": "Number", "value": 140924929}, + }, + }, + "copy_change": "efo:0030072", + }, + "service_meta_": { + "version": "0.7.dev0", + "response_datetime": "2022-09-29T15:08:18.696882", + "name": "variation-normalizer", + "url": "https://github.com/cancervariants/variation-normalization", + }, + } diff --git a/variation/schemas/hgvs_to_copy_number_schema.py b/variation/schemas/hgvs_to_copy_number_schema.py index f56ea39a..38caa404 100644 --- a/variation/schemas/hgvs_to_copy_number_schema.py +++ b/variation/schemas/hgvs_to_copy_number_schema.py @@ -28,15 +28,15 @@ def schema_extra( schema["example"] = { "hgvs_expr": "NC_000003.12:g.49531262dup", "copy_number_count": { - "_id": "ga4gh:CN.wIUwSQ9MQdv-2dsoDo-RjI97iK3Mn5m6", + "_id": "ga4gh:CN.lxbM1jOtrVgrwy_SHSSd3o2QkCRRswyf", "type": "CopyNumberCount", "subject": { - "_id": "ga4gh:VSL.G_J9WrfooiONRgjbmGPuCBYbBYFQnYOg", + "_id": "ga4gh:VSL.0dgeuVKngTm5HWjNjcZ9PO-fnbNmKmBv", "type": "SequenceLocation", "sequence_id": "ga4gh:SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", "interval": { "type": "SequenceInterval", - "start": {"type": "Number", "value": 49531260}, + "start": {"type": "Number", "value": 49531261}, "end": {"type": "Number", "value": 49531262}, }, }, @@ -72,15 +72,15 @@ def schema_extra( schema["example"] = { "hgvs_expr": "NC_000003.12:g.49531262dup", "copy_number_change": { - "_id": "ga4gh:CX.hGuvyiJmDtx4-MRjsLja0fb_DqOE2chN", + "_id": "ga4gh:CX.49PTi3fDMxTdYRLp-svfrHrHc_pIAWT3", "type": "CopyNumberChange", "subject": { - "_id": "ga4gh:VSL.G_J9WrfooiONRgjbmGPuCBYbBYFQnYOg", + "_id": "ga4gh:VSL.0dgeuVKngTm5HWjNjcZ9PO-fnbNmKmBv", "type": "SequenceLocation", "sequence_id": "ga4gh:SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", "interval": { "type": "SequenceInterval", - "start": {"type": "Number", "value": 49531260}, + "start": {"type": "Number", "value": 49531261}, "end": {"type": "Number", "value": 49531262}, }, }, diff --git a/variation/schemas/service_schema.py b/variation/schemas/service_schema.py index 1b1fd999..3f349acc 100644 --- a/variation/schemas/service_schema.py +++ b/variation/schemas/service_schema.py @@ -1,18 +1,11 @@ """Module containing schemas for services""" from enum import Enum -from typing import Any, Dict, Optional, Type, Union +from typing import Any, Dict, Type from cool_seq_tool.schemas import ToCdnaService as ToCdna from cool_seq_tool.schemas import ToGenomicService as ToGenomic -from ga4gh.vrsatile.pydantic.vrs_models import ( - CopyNumberChange, - CopyNumberCount, - SequenceLocation, - Text, -) -from pydantic import BaseModel, StrictStr -from variation.schemas.normalize_response_schema import ServiceMeta, ServiceResponse +from variation.schemas.normalize_response_schema import ServiceMeta class ClinVarAssembly(str, Enum): @@ -26,138 +19,6 @@ class ClinVarAssembly(str, Enum): HG18 = "hg18" -class ParsedToCnVarQuery(BaseModel): - """Define query for parsed to copy number count variation endpoint""" - - assembly: Optional[ClinVarAssembly] = None - chr: Optional[StrictStr] = None - accession: Optional[StrictStr] = None - start: int - end: int - total_copies: int - - -class ParsedToCnVarService(ServiceResponse): - """A response for translating parsed components to Copy Number Count""" - - query: Optional[ParsedToCnVarQuery] = None - copy_number_count: Optional[Union[Text, CopyNumberCount]] - - class Config: - """Configure model.""" - - @staticmethod - def schema_extra( - schema: Dict[str, Any], model: Type["ParsedToCnVarService"] - ) -> None: - """Configure OpenAPI schema.""" - if "title" in schema.keys(): - schema.pop("title", None) - for prop in schema.get("properties", {}).values(): - prop.pop("title", None) - schema["example"] = { - "query": { - "assembly": "GRCh37", - "chr": "1", - "accession": None, - "start": 143134063, - "end": 143284670, - "total_copies": 3, - }, - "copy_number_count": { - "_id": "ga4gh:CN.N6C9rWBjrNuiIhJkPxdPlRKvSGKoFynr", - "type": "CopyNumberCount", - "subject": { - "_id": "ga4gh:VSL.JTsxd9PiPZaIPL9Tl3ss78GYYnDeogvf", - "type": "SequenceLocation", - "sequence_id": "ga4gh:SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU", - "interval": { - "type": "SequenceInterval", - "start": { - "type": "IndefiniteRange", - "value": 143134062, - "comparator": "<=", - }, - "end": { - "type": "IndefiniteRange", - "value": 143284670, - "comparator": ">=", - }, - }, - }, - "copies": {"type": "Number", "value": 3}, - }, - "service_meta_": { - "name": "variation-normalizer", - "version": "0.2.17", - "response_datetime": "2022-01-26T22:23:41.821673", - "url": "https://github.com/cancervariants/variation-normalization", - }, - } - - -class AmplificationToCxVarQuery(BaseModel): - """Define query for amplification to copy number change variation endpoint""" - - gene: str - sequence_id: Optional[str] - start: Optional[int] - end: Optional[int] - sequence_location: Optional[SequenceLocation] - - -class AmplificationToCxVarService(ServiceResponse): - """A response for translating Amplification queries to Copy Number Change""" - - query: Optional[AmplificationToCxVarQuery] = None - amplification_label: Optional[str] - copy_number_change: Optional[Union[Text, CopyNumberChange]] - - class Config: - """Configure model.""" - - @staticmethod - def schema_extra( - schema: Dict[str, Any], model: Type["AmplificationToCxVarService"] - ) -> None: - """Configure OpenAPI schema.""" - if "title" in schema.keys(): - schema.pop("title", None) - for prop in schema.get("properties", {}).values(): - prop.pop("title", None) - schema["example"] = { - "query": { - "gene": "braf", - "sequence_id": None, - "start": None, - "end": None, - "sequence_location": None, - }, - "amplification_label": "BRAF Amplification", - "copy_number_change": { - "_id": "ga4gh:CX.TZBOQe5xFojvFJ1XjQQD0633rStHtGUs", - "type": "CopyNumberChange", - "subject": { - "_id": "ga4gh:VSL.xZU3kL8F6t2ca6WH_26CWKfNW9-owhR4", - "type": "SequenceLocation", - "sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", - "interval": { - "type": "SequenceInterval", - "start": {"type": "Number", "value": 140713327}, - "end": {"type": "Number", "value": 140924929}, - }, - }, - "copy_change": "efo:0030072", - }, - "service_meta_": { - "version": "0.7.dev0", - "response_datetime": "2022-09-29T15:08:18.696882", - "name": "variation-normalizer", - "url": "https://github.com/cancervariants/variation-normalization", - }, - } - - class ToCdnaService(ToCdna): """Service model response for protein -> cDNA""" diff --git a/variation/to_copy_number_variation.py b/variation/to_copy_number_variation.py index e5369979..f50fc355 100644 --- a/variation/to_copy_number_variation.py +++ b/variation/to_copy_number_variation.py @@ -1,16 +1,18 @@ """Module for to copy number variation translation""" from datetime import datetime -from typing import List, Optional, Tuple, Union +from typing import Dict, List, NamedTuple, Optional, Tuple, Union from urllib.parse import unquote -from cool_seq_tool.data_sources import SeqRepoAccess +from cool_seq_tool.data_sources import SeqRepoAccess, UTADatabase from ga4gh.core import ga4gh_identify from ga4gh.vrs import models from ga4gh.vrsatile.pydantic.vrs_models import ( + Comparator, CopyChange, CopyNumberChange, CopyNumberCount, Text, + VRSTypes, ) from gene.query import QueryHandler as GeneQueryHandler from gene.schemas import MatchType as GeneMatchType @@ -19,6 +21,14 @@ from variation.classify import Classify from variation.schemas.app_schemas import Endpoint from variation.schemas.classification_response_schema import ClassificationType +from variation.schemas.copy_number_schema import ( + AmplificationToCxVarQuery, + AmplificationToCxVarService, + ParsedToCnVarQuery, + ParsedToCnVarService, + ParsedToCxVarQuery, + ParsedToCxVarService, +) from variation.schemas.hgvs_to_copy_number_schema import ( HgvsToCopyNumberChangeService, HgvsToCopyNumberCountService, @@ -27,13 +37,7 @@ HGVSDupDelModeOption, ServiceMeta, ) -from variation.schemas.service_schema import ( - AmplificationToCxVarQuery, - AmplificationToCxVarService, - ClinVarAssembly, - ParsedToCnVarQuery, - ParsedToCnVarService, -) +from variation.schemas.service_schema import ClinVarAssembly from variation.schemas.token_response_schema import TokenType from variation.schemas.validation_response_schema import ValidationResult from variation.to_vrs import ToVRS @@ -51,6 +55,25 @@ ] +class ToCopyNumberError(Exception): + """Custom exceptions when representing copy number""" + + +class ParsedAccessionSummary(NamedTuple): + """Represents accession for parsed endpoints""" + + accession: str + lifted_over: bool + + +class ParsedChromosomeSummary(NamedTuple): + """Represents chromosome and assembly for parsed endpoints""" + + accession: str + chromosome: str + lifted_over: bool + + class ToCopyNumberVariation(ToVRS): """Class for representing copy number variation""" @@ -62,6 +85,7 @@ def __init__( validator: Validate, translator: Translate, gene_normalizer: GeneQueryHandler, + uta: UTADatabase, ) -> None: """Initialize theToCopyNumberVariation class @@ -71,44 +95,21 @@ def __init__( :param validator: Instance for validating classification :param translator: Instance for translating valid results to VRS representations :param gene_normalizer: Client for normalizing gene concepts + :param uta: Access to UTA queries """ super().__init__(seqrepo_access, tokenizer, classifier, validator, translator) self.gene_normalizer = gene_normalizer + self.uta = uta @staticmethod - def _parsed_to_text( - start: int, - end: int, - total_copies: int, - warnings: List[str], - assembly: Optional[str] = None, - chr: Optional[str] = None, - accession: Optional[str] = None, - ) -> Tuple[Text, List[str]]: + def _parsed_to_text(params: Dict) -> Text: """Return response for invalid query for parsed_to_cn_var - :param int start: Start position as residue coordinate - :param int end: End position as residue coordinate - :param int total_copies: Total copies for Copy Number Count variation object - :param List[str] warnings: List of warnings - :param Optional[ClinVarAssembly] assembly: Assembly. If `accession` is set, - will ignore `assembly` and `chr`. If `accession` not set, must provide - both `assembly` and `chr`. - :param Optional[str] chr: Chromosome. Must set when `assembly` is set. - :param Optional[str] accession: Accession. If `accession` is set, - will ignore `assembly` and `chr`. If `accession` not set, must provide - both `assembly` and `chr`. - :return: Tuple containing text variation and warnings + :param params: Parameters for initial query + :return: Variation represented as VRS Text object """ text_label = "" - for val, name in [ - (assembly, "assembly"), - (chr, "chr"), - (accession, "accession"), - (start, "start"), - (end, "end"), - (total_copies, "total_copies"), - ]: + for name, val in params.items(): val = val if val else "None" text_label += f"{name}={val}&" @@ -116,7 +117,7 @@ def _parsed_to_text( variation = models.Text(definition=definition, type="Text") _id = ga4gh_identify(variation) variation = Text(definition=definition, id=_id) - return variation, warnings + return variation async def _get_valid_results(self, q: str) -> Tuple[List[ValidationResult], List]: """Get valid results for to copy number variation endpoint @@ -226,16 +227,16 @@ async def hgvs_to_copy_number_count( do_liftover: bool = False, untranslatable_returns_text: bool = False, ) -> HgvsToCopyNumberCountService: - """Given hgvs, return abolute copy number variation - - :param str hgvs_expr: HGVS expression - :param int baseline_copies: Baseline copies number - :param bool do_liftover: Whether or not to liftover to GRCh38 assembly - :param bool untranslatable_returns_text: `True` return VRS Text Object when - unable to translate or normalize query. `False` return `None` when - unable to translate or normalize query. - :return: HgvsToCopyNumberCountService containing Copy Number Count - Variation and warnings + """Given hgvs, return absolute copy number variation + + :param hgvs_expr: HGVS expression + :param baseline_copies: Baseline copies number + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :param untranslatable_returns_text: `True` return VRS Text Object when unable to + translate or normalize query. `False` return `None` when unable to translate + or normalize query. + :return: HgvsToCopyNumberCountService containing Copy Number Count Variation and + warnings """ valid_results, warnings = await self._get_valid_results(hgvs_expr) cn_var, warnings = await self._hgvs_to_cnv_resp( @@ -266,14 +267,14 @@ async def hgvs_to_copy_number_change( ) -> HgvsToCopyNumberChangeService: """Given hgvs, return copy number change variation - :param str hgvs_expr: HGVS expression - :param Optional[CopyChange] copy_change: The copy change - :param bool do_liftover: Whether or not to liftover to GRCh38 assembly - :param bool untranslatable_returns_text: `True` return VRS Text Object when - unable to translate or normalize query. `False` return `None` when - unable to translate or normalize query. - :return: HgvsToCopyNumberChangeService containing Copy Number Change - Variation and warnings + :param hgvs_expr: HGVS expression + :param copy_change: The copy change + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :param untranslatable_returns_text: `True` return VRS Text Object when unable to + translate or normalize query. `False` return `None` when unable to translate + or normalize query. + :return: HgvsToCopyNumberChangeService containing Copy Number Change Variation + and warnings """ valid_results, warnings = await self._get_valid_results(hgvs_expr) cx_var, warnings = await self._hgvs_to_cnv_resp( @@ -295,150 +296,406 @@ async def hgvs_to_copy_number_change( copy_number_change=cx_var, ) - def parsed_to_cn_var( - self, - start: int, - end: int, - total_copies: int, - assembly: Optional[ClinVarAssembly] = None, - chr: Optional[str] = None, - accession: Optional[str] = None, - untranslatable_returns_text: bool = False, - ) -> ParsedToCnVarService: - """Given parsed ClinVar Copy Number Gain/Loss components, return Copy Number - Count Variation - - :param int start: Start position as residue coordinate - :param int end: End position as residue coordinate - :param int total_copies: Total copies for Copy Number Count variation object - :param Optional[ClinVarAssembly] assembly: Assembly. If `accession` is set, - will ignore `assembly` and `chr`. If `accession` not set, must provide - both `assembly` and `chr`. - :param Optional[str] chr: Chromosome. Must set when `assembly` is set. - :param Optional[str] accession: Accession. If `accession` is set, - will ignore `assembly` and `chr`. If `accession` not set, must provide - both `assembly` and `chr`. - :param bool untranslatable_returns_text: `True` return VRS Text Object when - unable to translate or normalize query. `False` return `None` when - unable to translate or normalize query. - :return: ParsedToCnVarService containing Copy Number Count variation - and list of warnings + def _get_parsed_ac( + self, assembly: ClinVarAssembly, chromosome: str, use_grch38: bool = False + ) -> ParsedAccessionSummary: + """Get accession for parsed components + + :param assembly: Assembly + :param chromosome: Chromosome + :param use_grch38: Whether or not to use GRCh38 assembly + :raises ToCopyNumberError: If unable to translate assembly and chromosome + to an accession + :return: ParsedAccessionSummary containing accession and whether or not it was + lifted over """ - variation = None - warnings = list() - try: - og_query = ParsedToCnVarQuery( - assembly=assembly, - chr=chr, - accession=accession, - start=start, - end=end, - total_copies=total_copies, - ) - except ValidationError as e: - warnings.append(str(e)) - og_query = None + accession = None + lifted_over = False + og_assembly = assembly + + if assembly == ClinVarAssembly.HG38: + assembly = ClinVarAssembly.GRCH38 + elif assembly == ClinVarAssembly.HG19: + assembly = ClinVarAssembly.GRCH37 + elif assembly == ClinVarAssembly.HG18: + assembly = ClinVarAssembly.NCBI36 + + if use_grch38 and assembly != ClinVarAssembly.GRCH38: + lifted_over = True + assembly = ClinVarAssembly.GRCH38 + + if assembly != ClinVarAssembly.NCBI36: + # Variation Normalizer does not support NCBI36 yet + query = f"{assembly.value}:{chromosome}" + aliases, error = self.seqrepo_access.translate_identifier(query, "ga4gh") + if aliases: + accession = aliases[0] + else: + raise ToCopyNumberError(str(error)) else: - if accession: - pass - elif assembly and chr: - if assembly == ClinVarAssembly.HG38: - assembly = ClinVarAssembly.GRCH38 - elif assembly == ClinVarAssembly.HG19: - assembly = ClinVarAssembly.GRCH37 - elif assembly == ClinVarAssembly.HG18: - assembly = ClinVarAssembly.NCBI36 - - if assembly != ClinVarAssembly.NCBI36: - # Variation Normalizer does not support NCBI36 yet - query = f"{assembly.value}:{chr}" - aliases, w = self.seqrepo_access.translate_identifier(query) - if w: - warnings.append(w) - else: - accession = ( - [a for a in aliases if a.startswith("refseq:")] or [None] - )[0] - if not accession: - warnings.append( - f"Unable to find RefSeq accession for {query}" - ) - else: - warnings.append( - f"{assembly.value} assembly is not currently supported" - ) + raise ToCopyNumberError( + f"{og_assembly.value} assembly is not currently supported" + ) + + return ParsedAccessionSummary(lifted_over=lifted_over, accession=accession) + + def _get_parsed_ac_chr( + self, accession: str, do_liftover: bool + ) -> ParsedChromosomeSummary: + """Get accession and chromosome for parsed components + + :param accession: Genomic accession + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :raises ToCopyNumberError: If unable to translate accession + :return: ParsedChromosomeSummary containing chromosome, accession, and whether + or not it was lifted over + """ + chromosome = None + new_ac = None + lifted_over = False + + aliases, error = self.seqrepo_access.translate_identifier(accession) + if error: + raise ToCopyNumberError(error) + + grch_aliases = [ + a for a in aliases if a.startswith(("GRCh38:chr", "GRCh37:chr")) + ] + + if grch_aliases: + grch_record = grch_aliases[0] + chromosome = grch_record.split(":")[-1] + + if grch_record.startswith("GRCh38") or not do_liftover: + new_ac = [a for a in aliases if a.startswith("ga4gh")][0] else: - warnings.append( - "Must provide either `accession` or both `assembly` " "and `chr`." + grch38_query = grch_record.replace("GRCh37", "GRCh38") + aliases, error = self.seqrepo_access.translate_identifier( + grch38_query, "ga4gh" ) - if warnings: - if untranslatable_returns_text: - variation, warnings = self._parsed_to_text( - start, end, total_copies, warnings, assembly, chr, accession - ) + if error: + raise ToCopyNumberError(error) + + lifted_over = True + new_ac = aliases[0] else: - try: - sequence_id, w = self.seqrepo_access.translate_identifier( - accession, "ga4gh" - ) - except (IndexError, TypeError): - warnings.append( - f"{accession} does not have an associated " f"ga4gh identifier" + raise ToCopyNumberError(f"Not a supported genomic accession: {accession}") + + return ParsedChromosomeSummary( + accession=new_ac, chromosome=chromosome, lifted_over=lifted_over + ) + + def _validate_ac_pos(self, accession: str, pos: int) -> None: + """Validate position for parsed components + + :param accession: Genomic accession + :param pos: Position on accession + :raises ToCopyNumberError: If position is not valid on accession or + if accession is not found in seqrepo + """ + try: + ref = self.seqrepo_access.sr[accession][pos - 1] + except ValueError as e: + raise ToCopyNumberError( + f"SeqRepo ValueError: {str(e).replace('start', 'Position')}" + ) + except KeyError: + raise ToCopyNumberError(f"Accession not found in SeqRepo: {accession}") + else: + if ref == "": + raise ToCopyNumberError(f"Position ({pos}) is not valid on {accession}") + + def _get_vrs_loc_start_or_end( + self, + accession: str, + pos0: int, + pos_type: Union[ + VRSTypes.NUMBER, VRSTypes.DEFINITE_RANGE, VRSTypes.INDEFINITE_RANGE + ], + is_start: bool = True, + pos1: Optional[int] = None, + comparator: Optional[Comparator] = None, + ) -> Union[models.Number, models.DefiniteRange, models.IndefiniteRange]: + """Get VRS Sequence Location start and end values + + :param accession: Genomic accession for sequence + :param pos0: Position (residue coords). If `pos_type` is a definite range, + this will be the min start position + :param pos_type: Type of the pos value in VRS Sequence Location + :param is_start: `True` if position(s) describing VRS start value. `False` if + position(s) describing VRS end value + :param pos1: Only set when end is a definite range, this will be the max end + position + :param comparator: Must provide when `pos_type` is an Indefinite Range. + Indicates which direction the range is indefinite. To represent (#_?), set + to '<='. To represent (?_#), set to '>='. + :raises ToCopyNumberError: If position is not valid on accession when + using definite range + :return: VRS start or end value for sequence location + """ + if pos_type == VRSTypes.DEFINITE_RANGE: + self._validate_ac_pos(accession, pos1) + + vrs_val = models.DefiniteRange( + min=pos0 - 1 if is_start else pos0 + 1, + max=pos1 - 1 if is_start else pos1 + 1, + type="DefiniteRange", + ) + elif pos_type == VRSTypes.NUMBER: + vrs_val = models.Number(value=pos0 - 1 if is_start else pos0, type="Number") + else: + vrs_val = models.IndefiniteRange( + comparator=comparator.value, + value=pos0 - 1 if is_start else pos0, + type="IndefiniteRange", + ) + + return vrs_val + + def _get_parsed_seq_loc( + self, + accession: str, + chromosome: str, + start0: int, + start_pos_type: Union[ + VRSTypes.NUMBER, VRSTypes.DEFINITE_RANGE, VRSTypes.INDEFINITE_RANGE + ], + end0: int, + end_pos_type: Union[ + VRSTypes.NUMBER, VRSTypes.DEFINITE_RANGE, VRSTypes.INDEFINITE_RANGE + ], + start1: Optional[int] = None, + end1: Optional[int] = None, + liftover_pos: bool = False, + start_pos_comparator: Optional[Comparator] = None, + end_pos_comparator: Optional[Comparator] = None, + ) -> Tuple[Optional[Dict], Optional[str]]: + """Get sequence location for parsed components. Accession will be validated. + + :param accession: Genomic accession for sequence + :param chromosome: Chromosome + :param start0: Start position (residue coords). If start is a definite range, + this will be the min start position + :param start_pos_type: Type of the start value in VRS Sequence Location + :param end0: End position (residue coords). If end is a definite range, this + will be the min end position + :param end_pos_type: Type of the end value in VRS Sequence Location + :param start1: Only set when start is a definite range, this will be the max + start position + :param end1: Only set when end is a definite range, this will be the max end + position + :param liftover_pos: Whether or not to liftover positions + :param start_pos_comparator: Must provide when `start_pos_type` is an Indefinite + Range. Indicates which direction the range is indefinite. To represent + (#_?), set to '<='. To represent (?_#), set to '>='. + :param end_pos_comparator: Must provide when `end_pos_type` is an Indefinite + Range. Indicates which direction the range is indefinite. To represent + (#_?), set to '<='. To represent (?_#), set to '>='. + :raises ToCopyNumberError: If error lifting over positions, translating + accession, positions not valid on accession, + :return: Tuple containing VRS sequence location represented as dict (if valid) + and warning (if invalid) + """ + seq_loc = None + + # Liftover pos if needed + if liftover_pos: + liftover_pos = self._liftover_pos(chromosome, start0, end0, start1, end1) + start0 = liftover_pos["start0"] + end0 = liftover_pos["end0"] + start1 = liftover_pos["start1"] + end1 = liftover_pos["end1"] + + sequence_ids, error = self.seqrepo_access.translate_identifier( + accession, "ga4gh" + ) + if error: + raise ToCopyNumberError(error) + + sequence_id = sequence_ids[0] + + for pos in [start0, end0]: + # validate start0 and end0 since they're always required + self._validate_ac_pos(accession, pos) + + start_vrs = self._get_vrs_loc_start_or_end( + accession, + start0, + start_pos_type, + is_start=True, + pos1=start1, + comparator=start_pos_comparator, + ) + + end_vrs = self._get_vrs_loc_start_or_end( + accession, + end0, + end_pos_type, + is_start=False, + pos1=end1, + comparator=end_pos_comparator, + ) + + seq_loc = models.SequenceLocation( + type="SequenceLocation", + sequence_id=sequence_id, + interval=models.SequenceInterval( + start=start_vrs, + end=end_vrs, + ), + ) + seq_loc._id = ga4gh_identify(seq_loc) + + return seq_loc.as_dict() if seq_loc else seq_loc + + def _liftover_pos( + self, + chromosome: str, + start0: int, + end0: int, + start1: Optional[int], + end1: Optional[int], + ) -> Dict: + """Liftover GRCh37 positions to GRCh38 positions + + :param chromosome: Chromosome. Must be contain 'chr' prefix, i.e 'chr7'. + :param start0: Start position (residue coords) GRCh37 assembly. If start is a + definite range, this will be the min start position + :param end0: End position (residue coords) GRCh37 assembly. If end is a definite + range, this will be the min end position + :param start1: Only set when start is a definite range, this will be the max + start position. GRCh37 assembly + :param end1: Only set when end is a definite range, this will be the max end + position. GRCh37 assembly + :raises ToCopyNumberError: If unable to liftover position + :return: Dictionary containing lifted over positions + ('start0', 'end0', 'start1', 'end1') + """ + liftover_pos = {"start0": None, "end0": None, "start1": None, "end1": None} + + for k, pos in [ + ("start0", start0), + ("end0", end0), + ("start1", start1), + ("end1", end1), + ]: + if pos is not None: + liftover = self.uta.liftover_37_to_38.convert_coordinate( + chromosome, pos ) - else: - if w: - warnings.append(w) + if not liftover: + raise ToCopyNumberError( + f"Unable to liftover: {chromosome} with pos {pos}" + ) else: - sequence_id = sequence_id[0] + liftover_pos[k] = liftover[0][1] - if warnings: - if untranslatable_returns_text: - variation, warnings = self._parsed_to_text( - start, end, total_copies, warnings, assembly, chr, accession - ) + return liftover_pos + + def parsed_to_copy_number( + self, request_body: Union[ParsedToCnVarQuery, ParsedToCxVarQuery] + ) -> Union[ParsedToCnVarService, ParsedToCxVarService]: + """Given parsed genomic components, return Copy Number Count or Copy Number + Change Variation + + :param request_body: request body + :return: If `copy_number_type` is Copy Number Count, return ParsedToCnVarService + containing Copy Number Count variation and list of warnings. Else, return + ParsedToCxVarService containing Copy Number Change variation and list of + warnings + """ + variation = None + warnings = [] + + is_cx = isinstance(request_body, ParsedToCxVarQuery) + lifted_over = False + + try: + if not request_body.accession: + accession_summary = self._get_parsed_ac( + request_body.assembly, + request_body.chromosome, + use_grch38=request_body.do_liftover, + ) + chromosome = request_body.chromosome + accession = accession_summary.accession + lifted_over = accession_summary.lifted_over else: - try: - self.seqrepo_access.sr[accession][start - 1] - self.seqrepo_access.sr[accession][end] - except ValueError as e: - warnings.append(str(e).replace("start", "Position")) - if untranslatable_returns_text: - variation, warnings = self._parsed_to_text( - start, end, total_copies, warnings, assembly, chr, accession - ) + chr_summary = self._get_parsed_ac_chr( + request_body.accession, request_body.do_liftover + ) + accession = chr_summary.accession + chromosome = chr_summary.chromosome + lifted_over = chr_summary.lifted_over + + seq_loc = self._get_parsed_seq_loc( + accession, + chromosome, + request_body.start0, + request_body.start_pos_type, + request_body.end0, + request_body.end_pos_type, + start1=request_body.start1, + end1=request_body.end1, + start_pos_comparator=request_body.start_pos_comparator, + end_pos_comparator=request_body.end_pos_comparator, + liftover_pos=request_body.do_liftover and lifted_over, + ) + except ToCopyNumberError as e: + warnings.append(str(e)) + else: + if is_cx: + variation = { + "type": "CopyNumberChange", + "subject": seq_loc, + "copy_change": request_body.copy_change, + } + variation["_id"] = ga4gh_identify(models.CopyNumberChange(**variation)) + variation = CopyNumberChange(**variation) + else: + if request_body.copies_type == VRSTypes.NUMBER: + copies = {"value": request_body.copies0, "type": "Number"} + elif request_body.copies_type == VRSTypes.DEFINITE_RANGE: + copies = { + "min": request_body.copies0, + "max": request_body.copies1, + "type": "DefiniteRange", + } else: - location = models.SequenceLocation( - type="SequenceLocation", - sequence_id=sequence_id, - interval=models.SequenceInterval( - type="SequenceInterval", - start=models.IndefiniteRange( - comparator="<=", value=start - 1, type="IndefiniteRange" - ), - end=models.IndefiniteRange( - comparator=">=", value=end, type="IndefiniteRange" - ), - ), - ) - location._id = ga4gh_identify(location) - variation = { - "type": "CopyNumberCount", - "subject": location.as_dict(), - "copies": {"value": total_copies, "type": "Number"}, + copies = { + "value": request_body.copies0, + "comparator": request_body.copies_comparator.value, + "type": "IndefiniteRange", } - variation["_id"] = ga4gh_identify( - models.CopyNumberCount(**variation) - ) - variation = CopyNumberCount(**variation) - return ParsedToCnVarService( - query=og_query, - copy_number_count=variation, - warnings=warnings, - service_meta_=ServiceMeta( + variation = { + "type": "CopyNumberCount", + "subject": seq_loc, + "copies": copies, + } + variation["_id"] = ga4gh_identify(models.CopyNumberCount(**variation)) + variation = CopyNumberCount(**variation) + + if warnings and request_body.untranslatable_returns_text: + variation = self._parsed_to_text(request_body.dict()) + + service_params = { + "warnings": warnings, + "service_meta_": ServiceMeta( version=__version__, response_datetime=datetime.now() ), + } + + if is_cx: + service_params["copy_number_change"] = variation + else: + service_params["copy_number_count"] = variation + + return ( + ParsedToCxVarService(**service_params) + if is_cx + else ParsedToCnVarService(**service_params) ) def amplification_to_cx_var( @@ -454,20 +711,20 @@ def amplification_to_cx_var( 1. sequence_id, start, end (must provide ALL) 2. use the gene-normalizer to get the SequenceLocation - :param str gene: Gene query - :param Optional[str] sequence_id: Sequence ID for the location. If set, - must also provide `start` and `end` - :param Optional[int] start: Start position as residue coordinate for the - sequence location. If set, must also provide `sequence_id` and `end` - :param Optional[int] end: End position as residue coordinate for the sequence - location. If set, must also provide `sequence_id` and `start` - :param bool untranslatable_returns_text: `True` return VRS Text Object when - unable to translate or normalize query. `False` return `None` when - unable to translate or normalize query. - :return: AmplificationToCxVarService containing Copy Number Change and - list of warnings + :param gene: Gene query + :param sequence_id: Sequence ID for the location. If set, must also provide + `start` and `end` + :param start: Start position as residue coordinate for the sequence location. + If set, must also provide `sequence_id` and `end` + :param end: End position as residue coordinate for the sequence location. If + set, must also provide `sequence_id` and `start` + :param untranslatable_returns_text: `True` return VRS Text Object when unable to + translate or normalize query. `False` return `None` when unable to translate + or normalize query. + :return: AmplificationToCxVarService containing Copy Number Change and list of + warnings """ - warnings = list() + warnings = [] amplification_label = None variation = None try: @@ -537,9 +794,13 @@ def amplification_to_cx_var( warnings.append(f"gene-normalizer returned no match for gene: {gene}") if not variation and untranslatable_returns_text: - text_variation = models.Text(definition=amplification_label, type="Text") - text_variation.id = ga4gh_identify(text_variation) - variation = Text(**text_variation.as_dict()) + params = { + "gene": gene, + "sequence_id": sequence_id, + "start": start, + "end": end, + } + variation = self._parsed_to_text(params) return AmplificationToCxVarService( query=og_query,