Skip to content

Commit

Permalink
Merge pull request #286 from cancervariants/issue-285
Browse files Browse the repository at this point in the history
feat: Add endpoint for parsed clinvar copy number gain/loss
  • Loading branch information
korikuzma authored May 3, 2022
2 parents 0ee02e0 + 55435ae commit 3806317
Show file tree
Hide file tree
Showing 5 changed files with 541 additions and 1 deletion.
268 changes: 268 additions & 0 deletions tests/test_parsed_to_abs_cnv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,268 @@
"""Test that parsed_to_abs_cnv works correctly"""
import pytest
from ga4gh.vrsatile.pydantic.vrs_models import AbsoluteCopyNumber

from variation.schemas.service_schema import ClinVarAssembly


@pytest.fixture(scope="module")
def copy_number_gain1():
"""Create test fixture for clinvar copy number gain.
https://www.ncbi.nlm.nih.gov/clinvar/variation/145208/?new_evidence=true
"""
variation = {
"type": "AbsoluteCopyNumber",
"id": "ga4gh:VAC.accZJeJtNj0Zqv7KVqkT87ClTlg-4nwa",
"subject": {
"type": "SequenceLocation",
"id": "ga4gh:VSL.JTsxd9PiPZaIPL9Tl3ss78GYYnDeogvf",
"sequence_id": "ga4gh:SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU",
"interval": {
"type": "SequenceInterval",
"start": {
"type": "IndefiniteRange",
"value": 143134062,
"comparator": "<="
},
"end": {
"type": "IndefiniteRange",
"value": 143284670,
"comparator": ">="
}
}
},
"copies": {"type": "Number", "value": 3}
}
return AbsoluteCopyNumber(**variation)


@pytest.fixture(scope="module")
def copy_number_gain2():
"""Create test fixture for clinvar copy number gain.
https://www.ncbi.nlm.nih.gov/clinvar/variation/146181/?new_evidence=true
"""
variation = {
"type": "AbsoluteCopyNumber",
"id": "ga4gh:VAC.oTO2JUsQdoJ2fudae5uO5uVNvIu7oA8m",
"subject": {
"type": "SequenceLocation",
"id": "ga4gh:VSL.9moblqAMqfEryr9pRUxqZMiOkqbsy5Ml",
"sequence_id": "ga4gh:SQ.AsXvWL1-2i5U_buw6_niVIxD6zTbAuS6",
"interval": {
"type": "SequenceInterval",
"start": {
"type": "IndefiniteRange",
"value": 31738808,
"comparator": "<="
},
"end": {
"type": "IndefiniteRange",
"value": 32217725,
"comparator": ">="
}
}
},
"copies": {"type": "Number", "value": 2}
}
return AbsoluteCopyNumber(**variation)


@pytest.fixture(scope="module")
def copy_number_loss1():
"""Create test fixture for clinvar copy number loss.
https://www.ncbi.nlm.nih.gov/clinvar/variation/146181/?new_evidence=true
"""
variation = {
"type": "AbsoluteCopyNumber",
"id": "ga4gh:VAC.5QpagPqyrE4vUigmRi58NrPmPhQsI7kM",
"subject": {
"type": "SequenceLocation",
"id": "ga4gh:VSL.Szlw1t4YMuaO7lLwFJ-T7fGTcXuhNNKB",
"sequence_id": "ga4gh:SQ.v7noePfnNpK8ghYXEqZ9NukMXW7YeNsm",
"interval": {
"type": "SequenceInterval",
"start": {
"type": "IndefiniteRange",
"value": 10491131,
"comparator": "<="
},
"end": {
"type": "IndefiniteRange",
"value": 10535643,
"comparator": ">="
}
}
},
"copies": {"type": "Number", "value": 1}
}
return AbsoluteCopyNumber(**variation)


@pytest.fixture(scope="module")
def copy_number_loss2():
"""Create test fixture for clinvar copy number loss.
https://www.ncbi.nlm.nih.gov/clinvar/variation/148425/?new_evidence=true
"""
variation = {
"type": "AbsoluteCopyNumber",
"id": "ga4gh:VAC.ZWN8WnEksqBj4bKFIB60Wag6hGeeobB5",
"subject": {
"type": "SequenceLocation",
"id": "ga4gh:VSL.Bp-86GeYti1DBmrj_Dtz7qNIMF5ygx5y",
"sequence_id": "ga4gh:SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5",
"interval": {
"type": "SequenceInterval",
"start": {
"type": "IndefiniteRange",
"value": 10000,
"comparator": "<="
},
"end": {
"type": "IndefiniteRange",
"value": 1223133,
"comparator": ">="
}
}
},
"copies": {"type": "Number", "value": 0}
}
return AbsoluteCopyNumber(**variation)


def test_parsed_copy_number_gain(test_query_handler, copy_number_gain1,
copy_number_gain2):
"""Test that parsed_to_abs_cnv works for parsed copy number gain queries"""
# https://www.ncbi.nlm.nih.gov/clinvar/variation/145208/?new_evidence=true
resp, w = test_query_handler.parsed_to_abs_cnv(
143134063, 143284670, 3, assembly="GRCh37", chr="chr1")
assert resp.dict() == copy_number_gain1.dict()
assert w == []

resp, w = test_query_handler.parsed_to_abs_cnv(
143134063, 143284670, 3, assembly="hg19", chr="chr1")
assert resp.dict() == copy_number_gain1.dict()
assert w == []

resp, w = test_query_handler.parsed_to_abs_cnv(
143134063, 143284670, 3, accession="NC_000001.10")
assert resp.dict() == copy_number_gain1.dict()
assert w == []

# https://www.ncbi.nlm.nih.gov/clinvar/variation/146181/?new_evidence=true
resp, w = test_query_handler.parsed_to_abs_cnv(
31738809, 32217725, 2, assembly="GRCh38", chr="chr15")
assert resp.dict() == copy_number_gain2.dict()
assert w == []

resp, w = test_query_handler.parsed_to_abs_cnv(
31738809, 32217725, 2, assembly="GRCh38", chr="15")
assert resp.dict() == copy_number_gain2.dict()
assert w == []

resp, w = test_query_handler.parsed_to_abs_cnv(
31738809, 32217725, 2, assembly="hg38", chr="chr15")
assert resp.dict() == copy_number_gain2.dict()
assert w == []

resp, w = test_query_handler.parsed_to_abs_cnv(
31738809, 32217725, 2, accession="NC_000015.10")
assert resp.dict() == copy_number_gain2.dict()
assert w == []


def test_parsed_copy_number_loss(test_query_handler, copy_number_loss1,
copy_number_loss2):
"""Test that parsed_to_abs_cnv works for parsed copy number loss queries"""
# https://www.ncbi.nlm.nih.gov/clinvar/variation/1299222/?new_evidence=true
resp, w = test_query_handler.parsed_to_abs_cnv(
10491132, 10535643, 1, assembly=ClinVarAssembly.GRCH37, chr="chrX")
assert resp.dict() == copy_number_loss1.dict()
assert w == []

resp, w = test_query_handler.parsed_to_abs_cnv(
10491132, 10535643, 1, assembly=ClinVarAssembly.HG19, chr="chrX")
assert resp.dict() == copy_number_loss1.dict()
assert w == []

resp, w = test_query_handler.parsed_to_abs_cnv(
10491132, 10535643, 1, assembly=ClinVarAssembly.HG19, chr="X")
assert resp.dict() == copy_number_loss1.dict()
assert w == []

resp, w = test_query_handler.parsed_to_abs_cnv(
10491132, 10535643, 1, accession="NC_000023.10")
assert resp.dict() == copy_number_loss1.dict()
assert w == []

# https://www.ncbi.nlm.nih.gov/clinvar/variation/148425/?new_evidence=true
resp, w = test_query_handler.parsed_to_abs_cnv(
10001, 1223133, 0, assembly=ClinVarAssembly.GRCH38, chr="chrY")
assert resp.dict() == copy_number_loss2.dict()
assert w == []

resp, w = test_query_handler.parsed_to_abs_cnv(
10001, 1223133, 0, assembly=ClinVarAssembly.HG38, chr="chrY")
assert resp.dict() == copy_number_loss2.dict()
assert w == []

resp, w = test_query_handler.parsed_to_abs_cnv(
10001, 1223133, 0, accession="NC_000024.10")
assert resp.dict() == copy_number_loss2.dict()
assert w == []


def test_invalid(test_query_handler):
"""Test invalid queries returns Text variation and warnings"""
# NCBI36/hg18 assembly
# https://www.ncbi.nlm.nih.gov/clinvar/variation/443961/?new_evidence=true
expected_w = ["NCBI36 assembly is not current supported"]
resp, w = test_query_handler.parsed_to_abs_cnv(
2623228, 3150942, 3, assembly=ClinVarAssembly.NCBI36, chr="chr1")
assert resp.type == "Text"
assert w == expected_w

resp, w = test_query_handler.parsed_to_abs_cnv(
2623228, 3150942, 3, assembly=ClinVarAssembly.HG18, chr="chr1")
assert resp.type == "Text"
assert w == expected_w

# Must give both assembly + chr or accession
expected_w = ["Must provide either `accession` or both `assembly` and `chr`."]
resp, w = test_query_handler.parsed_to_abs_cnv(
31738809, 32217725, 2, assembly="hg38")
assert resp.type == "Text"
assert w == expected_w

resp, w = test_query_handler.parsed_to_abs_cnv(
31738809, 32217725, 2, chr="chr15")
assert resp.type == "Text"
assert w == expected_w

resp, w = test_query_handler.parsed_to_abs_cnv(
31738809, 32217725, 2)
assert resp.type == "Text"
assert w == expected_w

# invalid chr
resp, w = test_query_handler.parsed_to_abs_cnv(
10001, 1223133, 0, assembly=ClinVarAssembly.GRCH38, chr="z")
assert resp.type == "Text"
assert w == ["SeqRepo unable to get translated identifiers for GRCh38:z"]

# invalid assembly
resp, w = test_query_handler.parsed_to_abs_cnv(
10001, 1223133, 0, assembly="GRCh99", chr="Y")
assert resp.type == "Text"
assert w == ["SeqRepo unable to get translated identifiers for GRCh99:Y"]

# invalid accession
resp, w = test_query_handler.parsed_to_abs_cnv(
10491132, 10535643, 1, accession="NC_00002310")
assert resp.type == "Text"
assert w == ["NC_00002310 does not exist in SeqRepo"]

# Invalid position
resp, w = test_query_handler.parsed_to_abs_cnv(
31738809, 2302991250, 2, accession="NC_000015.10")
assert resp.type == "Text"
assert w == ["Position out of range (2302991250)"]
58 changes: 58 additions & 0 deletions variation/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from variation.schemas.normalize_response_schema \
import HGVSDupDelMode as HGVSDupDelModeEnum, ToCanonicalVariationFmt, \
ToCanonicalVariationService, TranslateIdentifierService
from variation.schemas.service_schema import ClinVarAssembly, ParsedToAbsCnvQuery, \
ParsedToAbsCnvService
from .version import __version__
from .schemas.vrs_python_translator_schema import TranslateFromFormat, \
TranslateFromService, TranslateFromQuery, VrsPythonMeta
Expand Down Expand Up @@ -502,3 +504,59 @@ async def hgvs_to_relative_copy_number(
),
relative_copy_number=variation
)


assembly_descr = "Assembly. If `accession` is set, will ignore `assembly` and `chr`. "\
"If `accession` not set, must provide both `assembly` and `chr`."
chr_descr = "Chromosome. Must set when `assembly` is set."
accession_descr = "Accession. If `accession` is set, will ignore `assembly` and "\
"`chr`. If `accession` not set, must provide both `assembly` and `chr`." # noqa: E501
start_descr = "Start position as residue coordinate"
end_descr = "End position as residue coordinate"
total_copies_descr = "Total copies for Absolute Copy Number variation object"


@app.get("/variation/parsed_to_abs_cnv",
summary="Given parsed ClinVar Copy Number Gain/Loss components, return "
"absolute copy number variation",
response_description="A response to a validly-formed query.",
description="Return VRS Absolute Copy Number Variation",
response_model=ParsedToAbsCnvService,
tags=[Tags.TO_COPY_NUMBER_VARIATION]
)
def parsed_to_abs_cnv(
assembly: Optional[ClinVarAssembly] = Query(None, description=assembly_descr),
chr: Optional[str] = Query(None, description=chr_descr),
accession: Optional[str] = Query(None, description=accession_descr),
start: int = Query(..., description=start_descr),
end: int = Query(..., description=end_descr),
total_copies: int = Query(..., description=total_copies_descr)
) -> ParsedToAbsCnvService:
"""Given parsed ClinVar Copy Number Gain/Loss components, return Absolute
Copy Number Variation
:param int start: Start position as residue coordinate
:param int end: End position as residue coordinate
:param int total_copies: Total copies for Absolute Copy Number variation object
:param Optional[ClinVarAssembly] assembly: Assembly. If `accession` is set,
will ignore `assembly` and `chr`. If `accession` not set, must provide
both `assembly` and `chr`.
:param Optional[str] chr: Chromosome. Must set when `assembly` is set.
:param Optional[str] accession: Accession. If `accession` is set,
will ignore `assembly` and `chr`. If `accession` not set, must provide
both `assembly` and `chr`.
:return: Tuple containing Absolute Copy Number variation and list of warnings
"""
variation, warnings = query_handler.parsed_to_abs_cnv(
start, end, total_copies, assembly, chr, accession)
query = ParsedToAbsCnvQuery(assembly=assembly, chr=chr, accession=accession,
start=start, end=end, total_copies=total_copies)
return ParsedToAbsCnvService(
query=query,
absolute_copy_number=variation,
warnings=warnings,
service_meta_=ServiceMeta(
version=__version__,
response_datetime=datetime.now()
)
)
Loading

0 comments on commit 3806317

Please sign in to comment.