From c8f541917dd9aedcad8878a5d220e6d4d8ff8fe6 Mon Sep 17 00:00:00 2001 From: korikuzma Date: Wed, 23 Aug 2023 22:20:19 -0400 Subject: [PATCH 01/16] fix: allele normalization + pin pydantic version --- setup.cfg | 2 +- src/ga4gh/vrs/normalize.py | 152 ++++- tests/cassettes/test_normalize_allele.yaml | 707 ++++----------------- tests/test_vrs_normalize.py | 32 +- 4 files changed, 265 insertions(+), 628 deletions(-) diff --git a/setup.cfg b/setup.cfg index 599734cc..bad405ee 100644 --- a/setup.cfg +++ b/setup.cfg @@ -54,7 +54,7 @@ install_requires = jsonschema>=4.17.3 numpy pyyaml - pydantic>=2.0.3 + pydantic == 2.1.1 setup_requires = cython pytest-runner diff --git a/src/ga4gh/vrs/normalize.py b/src/ga4gh/vrs/normalize.py index cac2e0a4..c4f98ce2 100644 --- a/src/ga4gh/vrs/normalize.py +++ b/src/ga4gh/vrs/normalize.py @@ -3,8 +3,8 @@ See https://vrs.ga4gh.org/en/stable/impl-guide/normalization.html """ - -import logging +from enum import IntEnum +from typing import NamedTuple, Optional, Union from bioutils.normalize import normalize as _normalize, NormalizationMode from ga4gh.core import is_pydantic_instance, ga4gh_digest, pydantic_copy @@ -12,52 +12,162 @@ from ._internal import models from .dataproxy import SequenceProxy -_logger = logging.getLogger(__name__) +class PosType(IntEnum): + """Define the kind of position on a location""" -def _normalize_allele(input_allele, data_proxy): + INTEGER = 0 + RANGE_LT_OR_EQUAL = 1 + RANGE_GT_OR_EQUAL = 2 + + +class LocationPos(NamedTuple): + """Define Allele location pos value and type""" + + value: int + pos_type: PosType + + +def _get_allele_location_pos( + allele_vo: models.Allele, use_start: bool = True +) -> Optional[LocationPos]: + """Get Allele location start or end value for interval + + :param allele_vo: VRS Allele object + :param use_start: `True` if using `allele_vo.location.start`. `False` if using + `allele_vo.location.end` + :return: A `LocationPos` if using integer or indefinite range. Otherwise return + `None` """ - Converts .location.sequence into an IRI if it is a SequenceReference because it makes the code simpler. - If it started as a sequence reference, put it back as one at the end. + if use_start: + pos = allele_vo.location.start + else: + pos = allele_vo.location.end + + if isinstance(pos, int): + val = pos + pos_type = PosType.INTEGER + else: + pos0_is_none = pos.root[0] is None + pos1_is_none = pos.root[1] is None + + if not pos0_is_none and not pos1_is_none: # definite range + return None + + val = pos.root[0] or pos.root[1] + if pos0_is_none: + pos_type = PosType.RANGE_LT_OR_EQUAL + else: + pos_type = PosType.RANGE_GT_OR_EQUAL + + return LocationPos(value=val, pos_type=pos_type) + + +def _get_new_allele_location_pos( + new_pos_val: int, pos_type: PosType +) -> Union[int, models.Range]: + """Get updated location pos on normalized allele + + :param new_pos_val: New position after normalization + :param pos_type: Original position type used in pre-normalized VRS Allele object + :return: Updated position as integer or VRS Range object + """ + if pos_type == PosType.INTEGER: + val = new_pos_val + elif pos_type == PosType.RANGE_LT_OR_EQUAL: + val = models.Range([None, new_pos_val]) + else: + val = models.Range([new_pos_val, None]) + return val + + +def _normalize_allele(input_allele, data_proxy): + """Normalize Allele using "fully-justified" normalization adapted from NCBI's + VOCA. Fully-justified normalization expands such ambiguous representation over the + entire region of ambiguity, resulting in an unambiguous representation that may be + readily compared with other alleles. + + Does not attempt to normalize Allele's with definite ranges. Will return the + `input_allele` """ allele = pydantic_copy(input_allele) + + # Temporarily convert SequenceReference to IRI because it makes the code simpler. + # This will be changed back to SequenceReference at the end of the method sequence_reference = None if isinstance(allele.location.sequence, models.SequenceReference): sequence_reference = allele.location.sequence allele.location.sequence = models.IRI(sequence_reference.refgetAccession) - sequence = SequenceProxy(data_proxy, allele.location.sequence.root) + # Get reference sequence and interval + ref_seq = SequenceProxy(data_proxy, allele.location.sequence.root) + start = _get_allele_location_pos(allele, use_start=True) + if start is None: + return input_allele - ival = (allele.location.start, allele.location.end) + end = _get_allele_location_pos(allele, use_start=False) + if end is None: + return input_allele - _allele_state = allele.state.type - _states_with_sequence = ["ReferenceLengthExpression", "LiteralSequenceExpression"] - if _allele_state in _states_with_sequence: + ival = (start.value, end.value) + + # Get alleles (the sequences to be normalized) for _normalize + if allele.state.sequence: alleles = (None, allele.state.sequence.root) - elif _allele_state == "RepeatedSequenceExpression" and \ - allele.state.seq_expr.type in _states_with_sequence: - alleles = (None, allele.state.seq_expr.sequence.root) else: alleles = (None, "") - new_allele = pydantic_copy(allele) + # If one of Reference Allele Sequence or Alternate Allele Sequence is empty, + # store the length of the non-empty sequence: this is the Repeat Subunit Length + len_ref_seq = len(ref_seq[ival[0]: ival[1]]) + len_alt_seq = len(alleles[1]) + if not len_ref_seq and len_alt_seq: + # Insertion + repeat_subunit_len = len_alt_seq + elif len_ref_seq and not len_alt_seq: + # Deletion + repeat_subunit_len = len_ref_seq + else: + repeat_subunit_len = 0 + new_allele = pydantic_copy(allele) try: - new_ival, new_alleles = _normalize(sequence, + new_ival, new_alleles = _normalize(ref_seq, ival, alleles=alleles, mode=NormalizationMode.EXPAND, anchor_length=0) - new_allele.location.start = new_ival[0] - new_allele.location.end = new_ival[1] - - if new_allele.state.type in _states_with_sequence: - new_allele.state.sequence = models.SequenceString(new_alleles[1]) + new_allele.location.start = _get_new_allele_location_pos( + new_ival[0], start.pos_type + ) + new_allele.location.end = _get_new_allele_location_pos( + new_ival[1], end.pos_type + ) + new_ref_seq = ref_seq[new_ival[0]: new_ival[1]] + + if not new_ref_seq: + # If the reference sequence is empty this is an unambiguous insertion. + # Return a new Allele with the trimmed alternate sequence as a Literal + # Sequence Expression + new_allele.state = models.LiteralSequenceExpression( + sequence=models.SequenceString(new_alleles[1]) + ) + else: + # Otherwise, return a new Allele using a reference length expression, using + # a Location specified by the coordinates of the new ival, a length + # specified by the length of the alternate allele, and a repeat subunit + # length + allele.state = models.ReferenceLengthExpression( + length=len(new_alleles[1]), + sequence=models.SequenceString(new_ref_seq), + repeat_subunit_length=repeat_subunit_len + ) except ValueError: # Occurs for ref agree Alleles (when alt = ref) pass + # Convert IRI back to SequenceReference if sequence_reference: new_allele.location.sequence = sequence_reference diff --git a/tests/cassettes/test_normalize_allele.yaml b/tests/cassettes/test_normalize_allele.yaml index 4ac50f74..920f284f 100644 --- a/tests/cassettes/test_normalize_allele.yaml +++ b/tests/cassettes/test_normalize_allele.yaml @@ -9,31 +9,34 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.24.0 + - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/metadata/refseq:NC_000019.10 + uri: http://localhost:5000/seqrepo/1/metadata/refseq:NC_000006.12 response: body: - string: "{\n \"added\": \"2016-08-24T08:19:02Z\",\n \"aliases\": [\n \"Ensembl:19\",\n - \ \"ensembl:19\",\n \"GRCh38:19\",\n \"GRCh38:chr19\",\n \"GRCh38.p1:19\",\n - \ \"GRCh38.p1:chr19\",\n \"GRCh38.p10:19\",\n \"GRCh38.p10:chr19\",\n - \ \"GRCh38.p11:19\",\n \"GRCh38.p11:chr19\",\n \"GRCh38.p12:19\",\n - \ \"GRCh38.p12:chr19\",\n \"GRCh38.p2:19\",\n \"GRCh38.p2:chr19\",\n - \ \"GRCh38.p3:19\",\n \"GRCh38.p3:chr19\",\n \"GRCh38.p4:19\",\n \"GRCh38.p4:chr19\",\n - \ \"GRCh38.p5:19\",\n \"GRCh38.p5:chr19\",\n \"GRCh38.p6:19\",\n \"GRCh38.p6:chr19\",\n - \ \"GRCh38.p7:19\",\n \"GRCh38.p7:chr19\",\n \"GRCh38.p8:19\",\n \"GRCh38.p8:chr19\",\n - \ \"GRCh38.p9:19\",\n \"GRCh38.p9:chr19\",\n \"MD5:b0eba2c7bb5c953d1e06a508b5e487de\",\n - \ \"NCBI:NC_000019.10\",\n \"refseq:NC_000019.10\",\n \"SEGUID:AHxM5/L8jIX08UhBBkKXkiO5rhY\",\n - \ \"SHA1:007c4ce7f2fc8c85f4f148410642979223b9ae16\",\n \"VMC:GS_IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\",\n - \ \"sha512t24u:IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\",\n \"ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\"\n - \ ],\n \"alphabet\": \"ACGNT\",\n \"length\": 58617616\n}\n" + string: "{\n \"added\": \"2016-08-27T21:22:36Z\",\n \"aliases\": [\n \"GRCh38:6\",\n + \ \"GRCh38:chr6\",\n \"GRCh38.p1:6\",\n \"GRCh38.p1:chr6\",\n \"GRCh38.p10:6\",\n + \ \"GRCh38.p10:chr6\",\n \"GRCh38.p11:6\",\n \"GRCh38.p11:chr6\",\n + \ \"GRCh38.p12:6\",\n \"GRCh38.p12:chr6\",\n \"GRCh38.p2:6\",\n \"GRCh38.p2:chr6\",\n + \ \"GRCh38.p3:6\",\n \"GRCh38.p3:chr6\",\n \"GRCh38.p4:6\",\n \"GRCh38.p4:chr6\",\n + \ \"GRCh38.p5:6\",\n \"GRCh38.p5:chr6\",\n \"GRCh38.p6:6\",\n \"GRCh38.p6:chr6\",\n + \ \"GRCh38.p7:6\",\n \"GRCh38.p7:chr6\",\n \"GRCh38.p8:6\",\n \"GRCh38.p8:chr6\",\n + \ \"GRCh38.p9:6\",\n \"GRCh38.p9:chr6\",\n \"MD5:5691468a67c7e7a7b5f2a3a683792c29\",\n + \ \"NCBI:NC_000006.12\",\n \"refseq:NC_000006.12\",\n \"SEGUID:WZuaTlR1qIRxrJ5dpG2Z0ydeqX4\",\n + \ \"SHA1:599b9a4e5475a88471ac9e5da46d99d3275ea97e\",\n \"VMC:GS_0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV\",\n + \ \"sha512t24u:0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV\",\n \"ga4gh:SQ.0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV\"\n + \ ],\n \"alphabet\": \"ACGNTY\",\n \"length\": 170805979\n}\n" headers: + Connection: + - close Content-Length: - - '1035' + - '975' Content-Type: - application/json + Date: + - Thu, 24 Aug 2023 02:16:39 GMT Server: - - TornadoServer/6.0.4 + - Werkzeug/2.2.2 Python/3.10.4 status: code: 200 message: OK @@ -47,97 +50,23 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.24.0 + - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000019.10?start=44908821&end=44908822 + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000006.12?start=26090950&end=26090951 response: body: string: C headers: - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Server: - - TornadoServer/6.0.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate Connection: - - keep-alive - User-Agent: - - python-requests/2.24.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000019.10?start=44908820&end=44908821 - response: - body: - string: G - headers: + - close Content-Length: - '1' Content-Type: - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 02:16:39 GMT Server: - - TornadoServer/6.0.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.24.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000019.10?start=44908822&end=44908823 - response: - body: - string: G - headers: - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Server: - - TornadoServer/6.0.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.24.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000019.10?start=44908821&end=44908821 - response: - body: - string: '' - headers: - Content-Length: - - '0' - Content-Type: - - text/plain; charset=utf-8 - Server: - - TornadoServer/6.0.4 + - Werkzeug/2.2.2 Python/3.10.4 status: code: 200 message: OK @@ -151,173 +80,23 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.24.0 + - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000019.10?start=44908822&end=44908822 - response: - body: - string: '' - headers: - Content-Length: - - '0' - Content-Type: - - text/plain; charset=utf-8 - Server: - - TornadoServer/6.0.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.24.0 - method: GET - uri: http://localhost:5000/seqrepo/1/metadata/refseq:NC_000019.10 - response: - body: - string: "{\n \"added\": \"2016-08-24T08:19:02Z\",\n \"aliases\": [\n \"Ensembl:19\",\n - \ \"ensembl:19\",\n \"GRCh38:19\",\n \"GRCh38:chr19\",\n \"GRCh38.p1:19\",\n - \ \"GRCh38.p1:chr19\",\n \"GRCh38.p10:19\",\n \"GRCh38.p10:chr19\",\n - \ \"GRCh38.p11:19\",\n \"GRCh38.p11:chr19\",\n \"GRCh38.p12:19\",\n - \ \"GRCh38.p12:chr19\",\n \"GRCh38.p2:19\",\n \"GRCh38.p2:chr19\",\n - \ \"GRCh38.p3:19\",\n \"GRCh38.p3:chr19\",\n \"GRCh38.p4:19\",\n \"GRCh38.p4:chr19\",\n - \ \"GRCh38.p5:19\",\n \"GRCh38.p5:chr19\",\n \"GRCh38.p6:19\",\n \"GRCh38.p6:chr19\",\n - \ \"GRCh38.p7:19\",\n \"GRCh38.p7:chr19\",\n \"GRCh38.p8:19\",\n \"GRCh38.p8:chr19\",\n - \ \"GRCh38.p9:19\",\n \"GRCh38.p9:chr19\",\n \"MD5:b0eba2c7bb5c953d1e06a508b5e487de\",\n - \ \"NCBI:NC_000019.10\",\n \"refseq:NC_000019.10\",\n \"SEGUID:AHxM5/L8jIX08UhBBkKXkiO5rhY\",\n - \ \"SHA1:007c4ce7f2fc8c85f4f148410642979223b9ae16\",\n \"VMC:GS_IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\",\n - \ \"sha512t24u:IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\",\n \"ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\"\n - \ ],\n \"alphabet\": \"ACGNT\",\n \"length\": 58617616\n}\n" - headers: - Content-Length: - - '1035' - Content-Type: - - application/json - Server: - - TornadoServer/6.0.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.24.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000019.10?start=44908821&end=44908822 + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000006.12?start=26090950&end=26090951 response: body: string: C headers: - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Server: - - TornadoServer/6.0.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate Connection: - - keep-alive - User-Agent: - - python-requests/2.24.0 - method: GET - uri: http://localhost:5000/seqrepo/1/metadata/refseq:NC_000019.10 - response: - body: - string: "{\n \"added\": \"2016-08-24T08:19:02Z\",\n \"aliases\": [\n \"Ensembl:19\",\n - \ \"ensembl:19\",\n \"GRCh38:19\",\n \"GRCh38:chr19\",\n \"GRCh38.p1:19\",\n - \ \"GRCh38.p1:chr19\",\n \"GRCh38.p10:19\",\n \"GRCh38.p10:chr19\",\n - \ \"GRCh38.p11:19\",\n \"GRCh38.p11:chr19\",\n \"GRCh38.p12:19\",\n - \ \"GRCh38.p12:chr19\",\n \"GRCh38.p2:19\",\n \"GRCh38.p2:chr19\",\n - \ \"GRCh38.p3:19\",\n \"GRCh38.p3:chr19\",\n \"GRCh38.p4:19\",\n \"GRCh38.p4:chr19\",\n - \ \"GRCh38.p5:19\",\n \"GRCh38.p5:chr19\",\n \"GRCh38.p6:19\",\n \"GRCh38.p6:chr19\",\n - \ \"GRCh38.p7:19\",\n \"GRCh38.p7:chr19\",\n \"GRCh38.p8:19\",\n \"GRCh38.p8:chr19\",\n - \ \"GRCh38.p9:19\",\n \"GRCh38.p9:chr19\",\n \"MD5:b0eba2c7bb5c953d1e06a508b5e487de\",\n - \ \"NCBI:NC_000019.10\",\n \"refseq:NC_000019.10\",\n \"SEGUID:AHxM5/L8jIX08UhBBkKXkiO5rhY\",\n - \ \"SHA1:007c4ce7f2fc8c85f4f148410642979223b9ae16\",\n \"VMC:GS_IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\",\n - \ \"sha512t24u:IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\",\n \"ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\"\n - \ ],\n \"alphabet\": \"ACGNT\",\n \"length\": 58617616\n}\n" - headers: - Content-Length: - - '1035' - Content-Type: - - application/json - Server: - - TornadoServer/6.0.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.24.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000019.10?start=44908821&end=44908823 - response: - body: - string: CG - headers: - Content-Length: - - '2' - Content-Type: - - text/plain; charset=utf-8 - Server: - - TornadoServer/6.0.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.24.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000019.10?start=44908820&end=44908821 - response: - body: - string: G - headers: + - close Content-Length: - '1' Content-Type: - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 02:16:39 GMT Server: - - TornadoServer/6.0.4 + - Werkzeug/2.2.2 Python/3.10.4 status: code: 200 message: OK @@ -331,71 +110,34 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.24.0 + - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000019.10?start=44908819&end=44908820 + uri: http://localhost:5000/seqrepo/1/metadata/refseq:NC_000023.11 response: body: - string: A - headers: - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Server: - - TornadoServer/6.0.4 - status: - code: 200 - message: OK -- request: - body: null + string: "{\n \"added\": \"2016-08-27T23:57:18Z\",\n \"aliases\": [\n \"GRCh38:X\",\n + \ \"GRCh38:chrX\",\n \"GRCh38.p1:X\",\n \"GRCh38.p1:chrX\",\n \"GRCh38.p10:X\",\n + \ \"GRCh38.p10:chrX\",\n \"GRCh38.p11:X\",\n \"GRCh38.p11:chrX\",\n + \ \"GRCh38.p12:X\",\n \"GRCh38.p12:chrX\",\n \"GRCh38.p2:X\",\n \"GRCh38.p2:chrX\",\n + \ \"GRCh38.p3:X\",\n \"GRCh38.p3:chrX\",\n \"GRCh38.p4:X\",\n \"GRCh38.p4:chrX\",\n + \ \"GRCh38.p5:X\",\n \"GRCh38.p5:chrX\",\n \"GRCh38.p6:X\",\n \"GRCh38.p6:chrX\",\n + \ \"GRCh38.p7:X\",\n \"GRCh38.p7:chrX\",\n \"GRCh38.p8:X\",\n \"GRCh38.p8:chrX\",\n + \ \"GRCh38.p9:X\",\n \"GRCh38.p9:chrX\",\n \"MD5:2b3a55ff7f58eb308420c8a9b11cac50\",\n + \ \"NCBI:NC_000023.11\",\n \"refseq:NC_000023.11\",\n \"SEGUID:Z9QbQrrPjpjXSMJesDYqC3A43lA\",\n + \ \"SHA1:67d41b42bacf8e98d748c25eb0362a0b7038de50\",\n \"VMC:GS_w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP\",\n + \ \"sha512t24u:w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP\",\n \"ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP\"\n + \ ],\n \"alphabet\": \"ACGNRSTWY\",\n \"length\": 156040895\n}\n" headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate Connection: - - keep-alive - User-Agent: - - python-requests/2.24.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000019.10?start=44908823&end=44908824 - response: - body: - string: C - headers: + - close Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Server: - - TornadoServer/6.0.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.24.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000019.10?start=44908824&end=44908825 - response: - body: - string: C - headers: - Content-Length: - - '1' + - '978' Content-Type: - - text/plain; charset=utf-8 + - application/json + Date: + - Thu, 24 Aug 2023 02:16:39 GMT Server: - - TornadoServer/6.0.4 + - Werkzeug/2.2.2 Python/3.10.4 status: code: 200 message: OK @@ -409,45 +151,23 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.24.0 + - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000019.10?start=44908820&end=44908821 + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980377 response: body: - string: G + string: TA headers: - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Server: - - TornadoServer/6.0.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate Connection: - - keep-alive - User-Agent: - - python-requests/2.24.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000019.10?start=44908823&end=44908824 - response: - body: - string: C - headers: + - close Content-Length: - - '1' + - '2' Content-Type: - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 02:16:39 GMT Server: - - TornadoServer/6.0.4 + - Werkzeug/2.2.2 Python/3.10.4 status: code: 200 message: OK @@ -461,56 +181,23 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.24.0 - method: GET - uri: http://localhost:5000/seqrepo/1/metadata/refseq:NC_000023.11 - response: - body: - string: "{\n \"added\": \"2016-08-27T23:57:18Z\",\n \"aliases\": [\n \ - \"GRCh38:X\",\n \"GRCh38:chrX\",\n \"GRCh38.p1:X\",\n \"GRCh38.p1:chrX\",\n \" - GRCh38.p10:X\",\n \"GRCh38.p10:chrX\",\n \"GRCh38.p11:X\",\n \"GRCh38.p11:chrX\",\n \" - GRCh38.p12:X\",\n \"GRCh38.p12:chrX\",\n \"GRCh38.p2:X\",\n \"GRCh38.p2:chrX\",\n \" - GRCh38.p3:X\",\n \"GRCh38.p3:chrX\",\n \"GRCh38.p4:X\",\n \"GRCh38.p4:chrX\",\n \" - GRCh38.p5:X\",\n \"GRCh38.p5:chrX\",\n \"GRCh38.p6:X\",\n \"GRCh38.p6:chrX\",\n \" - GRCh38.p7:X\",\n \"GRCh38.p7:chrX\",\n \"GRCh38.p8:X\",\n \"GRCh38.p8:chrX\",\n \" - GRCh38.p9:X\",\n \"GRCh38.p9:chrX\",\n \"MD5:2b3a55ff7f58eb308420c8a9b11cac50\",\n \" - NCBI:NC_000023.11\",\n \"refseq:NC_000023.11\",\n \"SEGUID:Z9QbQrrPjpjXSMJesDYqC3A43lA\",\n \" - SHA1:67d41b42bacf8e98d748c25eb0362a0b7038de50\",\n \"VMC:GS_w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP\",\n \" - sha512t24u:w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP\",\n \"ga4gh:GS.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP\"\n ],\n \" - alphabet\": \"ACGNRSTWY\",\n \"length\": 156040895\n}\n" - headers: - Content-Length: - - '978' - Content-Type: - - application/json - Server: - - TornadoServer/6.0.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.24.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980377 response: body: - string: "TA" + string: TA headers: + Connection: + - close Content-Length: - '2' Content-Type: - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 02:16:39 GMT Server: - - TornadoServer/6.0.4 + - Werkzeug/2.2.2 Python/3.10.4 status: code: 200 message: OK @@ -524,19 +211,23 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.24.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980374&end=155980375 response: body: - string: "T" + string: T headers: + Connection: + - close Content-Length: - '1' Content-Type: - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 02:16:39 GMT Server: - - TornadoServer/6.0.4 + - Werkzeug/2.2.2 Python/3.10.4 status: code: 200 message: OK @@ -550,19 +241,23 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.24.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980377&end=155980378 response: body: - string: "A" + string: A headers: + Connection: + - close Content-Length: - '1' Content-Type: - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 02:16:39 GMT Server: - - TornadoServer/6.0.4 + - Werkzeug/2.2.2 Python/3.10.4 status: code: 200 message: OK @@ -576,19 +271,23 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.24.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980375 response: body: - string: "" + string: '' headers: + Connection: + - close Content-Length: - '0' Content-Type: - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 02:16:39 GMT Server: - - TornadoServer/6.0.4 + - Werkzeug/2.2.2 Python/3.10.4 status: code: 200 message: OK @@ -602,19 +301,23 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.24.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980377&end=155980377 response: body: - string: "" + string: '' headers: + Connection: + - close Content-Length: - '0' Content-Type: - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 02:16:39 GMT Server: - - TornadoServer/6.0.4 + - Werkzeug/2.2.2 Python/3.10.4 status: code: 200 message: OK @@ -628,86 +331,23 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.24.0 + - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/metadata/refseq:NC_000001.11 + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980377 response: body: - string: "{\n \"added\": \"2016-08-27T21:17:00Z\",\n \"aliases\": [\n \" - GRCh38:1\",\n \"GRCh38:chr1\",\n \"GRCh38.p1:1\",\n \"GRCh38.p1:chr1\" - ,\n \"GRCh38.p10:1\",\n \"GRCh38.p10:chr1\",\n \"GRCh38.p11:1\",\n \" - GRCh38.p11:chr1\",\n \"GRCh38.p12:1\",\n \"GRCh38.p12:chr1\",\n \" - GRCh38.p2:1\",\n \"GRCh38.p2:chr1\",\n \"GRCh38.p3:1\",\n \"GRCh38.p3:chr1\" - ,\n \"GRCh38.p4:1\",\n \"GRCh38.p4:chr1\",\n \"GRCh38.p5:1\",\n \" - GRCh38.p5:chr1\",\n \"GRCh38.p6:1\",\n \"GRCh38.p6:chr1\",\n \" - GRCh38.p7:1\",\n \"GRCh38.p7:chr1\",\n \"GRCh38.p8:1\",\n \" - GRCh38.p8:chr1\",\n \"GRCh38.p9:1\",\n \"GRCh38.p9:chr1\",\n \" - MD5:6aef897c3d6ff0c78aff06ac189178dd\",\n \"NCBI:NC_000001.11\",\n \" - refseq:NC_000001.11\",\n \"SEGUID:FCUd6VJ6uikS/VWLbhGdVmj2rOA\",\n \" - SHA1:14251de9527aba2912fd558b6e119d5668f6ace0\",\n \" - VMC:GS_Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO\",\n \" - sha512t24u:Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO\",\n \" - ga4gh:GS.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO\"\n ],\n \"alphabet\": \" - ACGMNRT\",\n \"length\": 248956422\n}\n" - headers: - Content-Length: - - '976' - Content-Type: - - application/json - Server: - - TornadoServer/6.0.4 - status: - code: 200 - message: OK -- request: - body: null + string: TA headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate Connection: - - keep-alive - User-Agent: - - python-requests/2.24.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000001.11?start=244988599&end=244988601 - response: - body: - string: "GC" - headers: + - close Content-Length: - '2' Content-Type: - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 02:16:39 GMT Server: - - TornadoServer/6.0.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.24.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000001.11?start=244988598&end=244988599 - response: - body: - string: "G" - headers: - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Server: - - TornadoServer/6.0.4 + - Werkzeug/2.2.2 Python/3.10.4 status: code: 200 message: OK @@ -721,139 +361,34 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.24.0 + - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000001.11?start=244988601&end=244988602 - response: - body: - string: "T" - headers: - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Server: - - TornadoServer/6.0.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.24.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000001.11?start=244988599&end=244988599 - response: - body: - string: "" - headers: - Content-Length: - - '0' - Content-Type: - - text/plain; charset=utf-8 - Server: - - TornadoServer/6.0.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.24.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000001.11?start=244988601&end=244988601 + uri: http://localhost:5000/seqrepo/1/metadata/refseq:NC_000023.11 response: body: - string: "" - headers: - Content-Length: - - '0' - Content-Type: - - text/plain; charset=utf-8 - Server: - - TornadoServer/6.0.4 - status: - code: 200 - message: OK -- request: - body: null + string: "{\n \"added\": \"2016-08-27T23:57:18Z\",\n \"aliases\": [\n \"GRCh38:X\",\n + \ \"GRCh38:chrX\",\n \"GRCh38.p1:X\",\n \"GRCh38.p1:chrX\",\n \"GRCh38.p10:X\",\n + \ \"GRCh38.p10:chrX\",\n \"GRCh38.p11:X\",\n \"GRCh38.p11:chrX\",\n + \ \"GRCh38.p12:X\",\n \"GRCh38.p12:chrX\",\n \"GRCh38.p2:X\",\n \"GRCh38.p2:chrX\",\n + \ \"GRCh38.p3:X\",\n \"GRCh38.p3:chrX\",\n \"GRCh38.p4:X\",\n \"GRCh38.p4:chrX\",\n + \ \"GRCh38.p5:X\",\n \"GRCh38.p5:chrX\",\n \"GRCh38.p6:X\",\n \"GRCh38.p6:chrX\",\n + \ \"GRCh38.p7:X\",\n \"GRCh38.p7:chrX\",\n \"GRCh38.p8:X\",\n \"GRCh38.p8:chrX\",\n + \ \"GRCh38.p9:X\",\n \"GRCh38.p9:chrX\",\n \"MD5:2b3a55ff7f58eb308420c8a9b11cac50\",\n + \ \"NCBI:NC_000023.11\",\n \"refseq:NC_000023.11\",\n \"SEGUID:Z9QbQrrPjpjXSMJesDYqC3A43lA\",\n + \ \"SHA1:67d41b42bacf8e98d748c25eb0362a0b7038de50\",\n \"VMC:GS_w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP\",\n + \ \"sha512t24u:w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP\",\n \"ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP\"\n + \ ],\n \"alphabet\": \"ACGNRSTWY\",\n \"length\": 156040895\n}\n" headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate Connection: - - keep-alive - User-Agent: - - python-requests/2.24.0 - method: GET - uri: http://localhost:5000/seqrepo/1/metadata/refseq:NC_000006.12 - response: - body: - string: "{\n \"added\": \"2016-08-27T21:22:36Z\",\n \"aliases\": [\n \" - GRCh38:6\",\n \"GRCh38:chr6\",\n \"GRCh38.p1:6\",\n \"GRCh38.p1:chr6\" - ,\n \"GRCh38.p10:6\",\n \"GRCh38.p10:chr6\",\n \"GRCh38.p11:6\",\n \" - GRCh38.p11:chr6\",\n \"GRCh38.p12:6\",\n \"GRCh38.p12:chr6\",\n \" - GRCh38.p2:6\",\n \"GRCh38.p2:chr6\",\n \"GRCh38.p3:6\",\n \" - GRCh38.p3:chr6\",\n \"GRCh38.p4:6\",\n \"GRCh38.p4:chr6\",\n \" - GRCh38.p5:6\",\n \"GRCh38.p5:chr6\",\n \"GRCh38.p6:6\",\n \" - GRCh38.p6:chr6\",\n \"GRCh38.p7:6\",\n \"GRCh38.p7:chr6\",\n \" - GRCh38.p8:6\",\n \"GRCh38.p8:chr6\",\n \"GRCh38.p9:6\",\n \" - GRCh38.p9:chr6\",\n \"MD5:5691468a67c7e7a7b5f2a3a683792c29\",\n \" - NCBI:NC_000006.12\",\n \"refseq:NC_000006.12\",\n \" - SEGUID:WZuaTlR1qIRxrJ5dpG2Z0ydeqX4\",\n \" - SHA1:599b9a4e5475a88471ac9e5da46d99d3275ea97e\",\n \" - VMC:GS_0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV\",\n \" - sha512t24u:0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV\",\n \" - ga4gh:GS.0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV\"\n ],\n \"alphabet\": \" - ACGNTY\",\n \"length\": 170805979\n}\n" - headers: + - close Content-Length: - - '975' + - '978' Content-Type: - application/json + Date: + - Thu, 24 Aug 2023 02:16:39 GMT Server: - - TornadoServer/6.0.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.24.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000006.12?start=26090950&end=26090951 - response: - body: - string: "C" - headers: - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Server: - - TornadoServer/6.0.4 + - Werkzeug/2.2.2 Python/3.10.4 status: code: 200 message: OK diff --git a/tests/test_vrs_normalize.py b/tests/test_vrs_normalize.py index ab1e54fe..908080bb 100644 --- a/tests/test_vrs_normalize.py +++ b/tests/test_vrs_normalize.py @@ -35,27 +35,18 @@ } } -seq_loc = { - "type": "SequenceLocation", - "sequence": "refseq:NC_000001.11", - "start": [None, 244988599], - "end": [244988601, None] - } allele_dict3 = { "type": "Allele", - "location": seq_loc, + "location": { + "type": "SequenceLocation", + "sequence": "refseq:NC_000023.11", + "start": [155980374, 155980375], + "end": [155980377, 155980378] + }, "state": { - "type": "RepeatedSequenceExpression", - "seq_expr": { - "location": seq_loc, - "type": "DerivedSequenceExpression", - "reverse_complement": False - }, - "count": { - "type": "Number", - "value": 2 - } + "sequence": "", + "type": "LiteralSequenceExpression" } } @@ -70,6 +61,7 @@ def test_normalize_allele(rest_dataproxy): allele2 = normalize(allele1, rest_dataproxy) assert allele1 == allele2 - allele1 = models.Allele(**allele_dict3) - allele2 = normalize(allele1, rest_dataproxy) - assert allele1 == allele2 + # Definite ranges are not normalized + allele3 = models.Allele(**allele_dict3) + allele3_after_norm = normalize(allele3, rest_dataproxy) + assert allele3_after_norm == allele3 From 1ed754c2963e17fb85cec9fe58f79f92fe720189 Mon Sep 17 00:00:00 2001 From: korikuzma Date: Wed, 23 Aug 2023 22:36:40 -0400 Subject: [PATCH 02/16] re-run translator tests --- ...NC_000007.14:g.55181220del-expected2].yaml | 189 +++++++-- ...g.55181230_55181231insGGCT-expected3].yaml | 210 ++-------- ...NC_000007.14:g.55181320A>T-expected1].yaml | 132 +++++-- ...NC_000013.11:g.32316467dup-expected5].yaml | 195 +++++++-- ....11:g.32331093_32331094dup-expected4].yaml | 371 ++++-------------- ...s[NC_000013.11:g.32936732=-expected0].yaml | 64 ++- ...vs[NM_001331029.1:n.872A>G-expected6].yaml | 130 ++++-- ...hgvs[NM_181798.1:n.1263G>T-expected7].yaml | 130 ++++-- tests/extras/cassettes/test_to_spdi.yaml | 38 +- 9 files changed, 770 insertions(+), 689 deletions(-) diff --git a/tests/extras/cassettes/test_hgvs[NC_000007.14:g.55181220del-expected2].yaml b/tests/extras/cassettes/test_hgvs[NC_000007.14:g.55181220del-expected2].yaml index fcaf9952..a8d77d1b 100644 --- a/tests/extras/cassettes/test_hgvs[NC_000007.14:g.55181220del-expected2].yaml +++ b/tests/extras/cassettes/test_hgvs[NC_000007.14:g.55181220del-expected2].yaml @@ -9,7 +9,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul response: @@ -34,7 +34,7 @@ interactions: Content-Type: - application/json Date: - - Sat, 17 Jun 2023 01:04:34 GMT + - Thu, 24 Aug 2023 02:35:38 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -50,7 +50,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181219&end=55181220 response: @@ -64,7 +64,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:34 GMT + - Thu, 24 Aug 2023 02:35:38 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -80,7 +80,37 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181219&end=55181220 + response: + body: + string: T + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 02:35:38 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181218&end=55181219 response: @@ -94,7 +124,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:34 GMT + - Thu, 24 Aug 2023 02:35:38 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -110,7 +140,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181220&end=55181221 response: @@ -124,7 +154,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:34 GMT + - Thu, 24 Aug 2023 02:35:38 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -140,7 +170,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181219&end=55181219 response: @@ -154,7 +184,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:34 GMT + - Thu, 24 Aug 2023 02:35:38 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -170,7 +200,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181220&end=55181220 response: @@ -184,7 +214,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:34 GMT + - Thu, 24 Aug 2023 02:35:38 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -200,7 +230,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181219&end=55181220 response: @@ -214,7 +244,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:34 GMT + - Thu, 24 Aug 2023 02:35:38 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -230,7 +260,102 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181219&end=55181220 + response: + body: + string: T + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 02:35:38 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000007.14&rettype=fasta&seq_start=55181219&seq_stop=55181220&tool=bioutils&email=biocommons-dev@googlegroups.com + response: + body: + string: !!binary | + H4sIAAAAAAAAALLzc443AAFzPUMTK1NTQwtDI0NLXQjDyEDBIz83X6E4sSAzNa9YITmjCMgtzs9N + VTDXUXAPcs4wttArMDRRCCjKzE0sqlRwLC5OzU3KqeRyDuHiAgAAAP//AwCZNMWOWwAAAA== + headers: + Access-Control-Allow-Origin: + - '*' + Access-Control-Expose-Headers: + - X-RateLimit-Limit,X-RateLimit-Remaining + Cache-Control: + - private + Connection: + - Keep-Alive + Content-Disposition: + - attachment; filename="sequence.fasta" + Content-Security-Policy: + - upgrade-insecure-requests + Content-Type: + - text/plain + Date: + - Thu, 24 Aug 2023 02:35:39 GMT + Keep-Alive: + - timeout=4, max=40 + NCBI-PHID: + - D0BD0EC90D3A84D50000545904B66F2D.1.1.m_5 + NCBI-SID: + - DD107D042EF5EC1F_991ASID + Referrer-Policy: + - origin-when-cross-origin + Server: + - Finatra + Set-Cookie: + - ncbi_sid=DD107D042EF5EC1F_991ASID; domain=.nih.gov; path=/; expires=Sat, 24 + Aug 2024 02:35:39 GMT + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-RateLimit-Limit: + - '3' + X-RateLimit-Remaining: + - '2' + X-UA-Compatible: + - IE=Edge + X-XSS-Protection: + - 1; mode=block + content-encoding: + - gzip + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 method: GET uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000007.14&rettype=fasta&seq_start=55181220&seq_stop=55181220&tool=bioutils&email=biocommons-dev@googlegroups.com response: @@ -254,20 +379,20 @@ interactions: Content-Type: - text/plain Date: - - Sat, 17 Jun 2023 01:04:34 GMT + - Thu, 24 Aug 2023 02:35:40 GMT Keep-Alive: - timeout=4, max=40 NCBI-PHID: - - D0BD7A022C7C6FC500006196F8D78473.1.1.m_5 + - 939B3D945261CB950000438BDBB0F197.1.1.m_5 NCBI-SID: - - 91F987A90C4FE16A_3574SID + - EC13E242331DA962_27A7SID Referrer-Policy: - origin-when-cross-origin Server: - Finatra Set-Cookie: - - ncbi_sid=91F987A90C4FE16A_3574SID; domain=.nih.gov; path=/; expires=Mon, 17 - Jun 2024 01:04:34 GMT + - ncbi_sid=EC13E242331DA962_27A7SID; domain=.nih.gov; path=/; expires=Sat, 24 + Aug 2024 02:35:40 GMT Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload Transfer-Encoding: @@ -275,7 +400,7 @@ interactions: X-RateLimit-Limit: - '3' X-RateLimit-Remaining: - - '1' + - '2' X-UA-Compatible: - IE=Edge X-XSS-Protection: @@ -295,15 +420,15 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET - uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000007.14&rettype=fasta&seq_start=55181220&seq_stop=55181240&tool=bioutils&email=biocommons-dev@googlegroups.com + uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000007.14&rettype=fasta&seq_start=55181219&seq_stop=55181240&tool=bioutils&email=biocommons-dev@googlegroups.com response: body: string: !!binary | - H4sIAAAAAAAAALLzc443AAFzPUMTK1NTQwtDIyMDXQjDxEDBIz83X6E4sSAzNa9YITmjCMgtzs9N - VTDXUXAPcs4wttArMDRRCCjKzE0sqlRwLC5OzU3KqeQKcXd3dHQHghBnZ8cQ9xB3ZyDg4gIAAAD/ - /wMAmg0v4W4AAAA= + H4sIAAAAAAAAALLzc443AAFzPUMTK1NTQwtDI0NLXQjDxEDBIz83X6E4sSAzNa9YITmjCMgtzs9N + VTDXUXAPcs4wttArMDRRCCjKzE0sqlRwLC5OzU3KqeRyDnF3d3R0B4IQZ2fHEPcQd2cg4OICAAAA + //8DAAHwlzhvAAAA headers: Access-Control-Allow-Origin: - '*' @@ -320,20 +445,20 @@ interactions: Content-Type: - text/plain Date: - - Sat, 17 Jun 2023 01:04:34 GMT + - Thu, 24 Aug 2023 02:35:41 GMT Keep-Alive: - timeout=4, max=40 NCBI-PHID: - - D0BD7A022C7C6FC500003D96FBDCD3A4.1.1.m_5 + - D0BD0EC90D3A84D500003B590E9810AC.1.1.m_5 NCBI-SID: - - 06B6CAAF520F975A_7A87SID + - BAC346314D8AB27C_B674SID Referrer-Policy: - origin-when-cross-origin Server: - Finatra Set-Cookie: - - ncbi_sid=06B6CAAF520F975A_7A87SID; domain=.nih.gov; path=/; expires=Mon, 17 - Jun 2024 01:04:34 GMT + - ncbi_sid=BAC346314D8AB27C_B674SID; domain=.nih.gov; path=/; expires=Sat, 24 + Aug 2024 02:35:42 GMT Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload Transfer-Encoding: @@ -341,7 +466,7 @@ interactions: X-RateLimit-Limit: - '3' X-RateLimit-Remaining: - - '1' + - '2' X-UA-Compatible: - IE=Edge X-XSS-Protection: diff --git a/tests/extras/cassettes/test_hgvs[NC_000007.14:g.55181230_55181231insGGCT-expected3].yaml b/tests/extras/cassettes/test_hgvs[NC_000007.14:g.55181230_55181231insGGCT-expected3].yaml index b727a5de..2b07229a 100644 --- a/tests/extras/cassettes/test_hgvs[NC_000007.14:g.55181230_55181231insGGCT-expected3].yaml +++ b/tests/extras/cassettes/test_hgvs[NC_000007.14:g.55181230_55181231insGGCT-expected3].yaml @@ -9,7 +9,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul response: @@ -34,7 +34,7 @@ interactions: Content-Type: - application/json Date: - - Sat, 17 Jun 2023 01:04:35 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -50,7 +50,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181230&end=55181230 response: @@ -64,7 +64,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:35 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -80,21 +80,21 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181229&end=55181230 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181230&end=55181230 response: body: - string: C + string: '' headers: Connection: - close Content-Length: - - '1' + - '0' Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:35 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -110,9 +110,9 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181230&end=55181231 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181229&end=55181230 response: body: string: C @@ -124,7 +124,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:35 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -140,21 +140,21 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181230&end=55181230 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181230&end=55181231 response: body: - string: '' + string: C headers: Connection: - close Content-Length: - - '0' + - '1' Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:35 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -170,7 +170,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181230&end=55181230 response: @@ -184,7 +184,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:35 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -200,124 +200,23 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 - method: GET - uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000007.14&rettype=fasta&seq_start=55181231&seq_stop=55181231&tool=bioutils&email=biocommons-dev@googlegroups.com - response: - body: - string: !!binary | - H4sIAAAAAAAAALLzc443AAFzPUMTK1NTQwtDI2NDXRhDwSM/N1+hOLEgMzWvWCE5owjILc7PTVUw - 11FwD3LOMLbQKzA0UQgoysxNLKpUcCwuTs1NyqnkcubiAgAAAP//AwCoL8VLWgAAAA== - headers: - Access-Control-Allow-Origin: - - '*' - Access-Control-Expose-Headers: - - X-RateLimit-Limit,X-RateLimit-Remaining - Cache-Control: - - private - Connection: - - Keep-Alive - Content-Disposition: - - attachment; filename="sequence.fasta" - Content-Security-Policy: - - upgrade-insecure-requests - Content-Type: - - text/plain - Date: - - Sat, 17 Jun 2023 01:04:35 GMT - Keep-Alive: - - timeout=4, max=40 - NCBI-PHID: - - 322C88C3B864A66500005B882C71FAF6.1.1.m_5 - NCBI-SID: - - 9D0BE2B460CC0B21_E5FFSID - Referrer-Policy: - - origin-when-cross-origin - Server: - - Finatra - Set-Cookie: - - ncbi_sid=9D0BE2B460CC0B21_E5FFSID; domain=.nih.gov; path=/; expires=Mon, 17 - Jun 2024 01:04:35 GMT - Strict-Transport-Security: - - max-age=31536000; includeSubDomains; preload - Transfer-Encoding: - - chunked - X-RateLimit-Limit: - - '3' - X-RateLimit-Remaining: - - '1' - X-UA-Compatible: - - IE=Edge - X-XSS-Protection: - - 1; mode=block - content-encoding: - - gzip - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET - uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000007.14&rettype=fasta&seq_start=55181230&seq_stop=55181250&tool=bioutils&email=biocommons-dev@googlegroups.com + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181230&end=55181230 response: body: - string: !!binary | - H4sIAAAAAAAAALLzc443AAFzPUMTK1NTQwtDI2MDXQjD1EDBIz83X6E4sSAzNa9YITmjCMgtzs9N - VTDXUXAPcs4wttArMDRRCCjKzE0sqlRwLC5OzU3KqeRydnYMcQ9xdwaCECAKcQ5xd3fm4gIAAAD/ - /wMAmUGUnW4AAAA= + string: '' headers: - Access-Control-Allow-Origin: - - '*' - Access-Control-Expose-Headers: - - X-RateLimit-Limit,X-RateLimit-Remaining - Cache-Control: - - private Connection: - - Keep-Alive - Content-Disposition: - - attachment; filename="sequence.fasta" - Content-Security-Policy: - - upgrade-insecure-requests + - close + Content-Length: + - '0' Content-Type: - - text/plain + - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:36 GMT - Keep-Alive: - - timeout=4, max=40 - NCBI-PHID: - - 322C88C3B864A66500004E882E0A4658.1.1.m_5 - NCBI-SID: - - C594BF4A99D6CFF8_948ASID - Referrer-Policy: - - origin-when-cross-origin + - Thu, 24 Aug 2023 02:35:42 GMT Server: - - Finatra - Set-Cookie: - - ncbi_sid=C594BF4A99D6CFF8_948ASID; domain=.nih.gov; path=/; expires=Mon, 17 - Jun 2024 01:04:36 GMT - Strict-Transport-Security: - - max-age=31536000; includeSubDomains; preload - Transfer-Encoding: - - chunked - X-RateLimit-Limit: - - '3' - X-RateLimit-Remaining: - - '1' - X-UA-Compatible: - - IE=Edge - X-XSS-Protection: - - 1; mode=block - content-encoding: - - gzip + - Werkzeug/2.2.2 Python/3.10.4 status: code: 200 message: OK @@ -331,58 +230,23 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET - uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000007.14&rettype=fasta&seq_start=55181227&seq_stop=55181230&tool=bioutils&email=biocommons-dev@googlegroups.com + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181230&end=55181230 response: body: - string: !!binary | - H4sIAAAAAAAAALLzc443AAFzPUMTK1NTQwtDIyNzXQjD2EDBIz83X6E4sSAzNa9YITmjCMgtzs9N - VTDXUXAPcs4wttArMDRRCCjKzE0sqlRwLC5OzU3KqeRydw9x5uICAAAA//8DAImcWFxdAAAA + string: '' headers: - Access-Control-Allow-Origin: - - '*' - Access-Control-Expose-Headers: - - X-RateLimit-Limit,X-RateLimit-Remaining - Cache-Control: - - private Connection: - - Keep-Alive - Content-Disposition: - - attachment; filename="sequence.fasta" - Content-Security-Policy: - - upgrade-insecure-requests + - close + Content-Length: + - '0' Content-Type: - - text/plain + - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:36 GMT - Keep-Alive: - - timeout=4, max=40 - NCBI-PHID: - - 939B236CFCF1AA15000058A14C241824.1.1.m_5 - NCBI-SID: - - B00CB2AA37D87866_4816SID - Referrer-Policy: - - origin-when-cross-origin + - Thu, 24 Aug 2023 02:35:42 GMT Server: - - Finatra - Set-Cookie: - - ncbi_sid=B00CB2AA37D87866_4816SID; domain=.nih.gov; path=/; expires=Mon, 17 - Jun 2024 01:04:37 GMT - Strict-Transport-Security: - - max-age=31536000; includeSubDomains; preload - Transfer-Encoding: - - chunked - X-RateLimit-Limit: - - '3' - X-RateLimit-Remaining: - - '1' - X-UA-Compatible: - - IE=Edge - X-XSS-Protection: - - 1; mode=block - content-encoding: - - gzip + - Werkzeug/2.2.2 Python/3.10.4 status: code: 200 message: OK diff --git a/tests/extras/cassettes/test_hgvs[NC_000007.14:g.55181320A>T-expected1].yaml b/tests/extras/cassettes/test_hgvs[NC_000007.14:g.55181320A>T-expected1].yaml index 94c259de..d2a1ca35 100644 --- a/tests/extras/cassettes/test_hgvs[NC_000007.14:g.55181320A>T-expected1].yaml +++ b/tests/extras/cassettes/test_hgvs[NC_000007.14:g.55181320A>T-expected1].yaml @@ -9,7 +9,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/metadata/refseq:NC_000007.14 response: @@ -34,7 +34,7 @@ interactions: Content-Type: - application/json Date: - - Sat, 17 Jun 2023 01:04:32 GMT + - Thu, 24 Aug 2023 02:35:36 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -50,7 +50,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul response: @@ -75,7 +75,7 @@ interactions: Content-Type: - application/json Date: - - Sat, 17 Jun 2023 01:04:32 GMT + - Thu, 24 Aug 2023 02:35:36 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -91,7 +91,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181319&end=55181320 response: @@ -105,7 +105,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:32 GMT + - Thu, 24 Aug 2023 02:35:36 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -121,7 +121,37 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181319&end=55181320 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 02:35:36 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181318&end=55181319 response: @@ -135,7 +165,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:32 GMT + - Thu, 24 Aug 2023 02:35:36 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -151,7 +181,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181320&end=55181321 response: @@ -165,7 +195,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:32 GMT + - Thu, 24 Aug 2023 02:35:36 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -181,7 +211,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181320&end=55181321 response: @@ -195,7 +225,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:32 GMT + - Thu, 24 Aug 2023 02:35:36 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -211,7 +241,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181319&end=55181319 response: @@ -225,7 +255,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:32 GMT + - Thu, 24 Aug 2023 02:35:36 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -241,7 +271,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181320&end=55181320 response: @@ -255,7 +285,37 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:32 GMT + - Thu, 24 Aug 2023 02:35:36 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181319&end=55181320 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 02:35:37 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -271,7 +331,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul response: @@ -296,7 +356,7 @@ interactions: Content-Type: - application/json Date: - - Sat, 17 Jun 2023 01:04:32 GMT + - Thu, 24 Aug 2023 02:35:37 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -312,7 +372,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul?start=55181319&end=55181320 response: @@ -326,7 +386,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:33 GMT + - Thu, 24 Aug 2023 02:35:37 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -342,7 +402,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000007.14&rettype=fasta&seq_start=55181320&seq_stop=55181320&tool=bioutils&email=biocommons-dev@googlegroups.com response: @@ -366,20 +426,20 @@ interactions: Content-Type: - text/plain Date: - - Sat, 17 Jun 2023 01:04:32 GMT + - Thu, 24 Aug 2023 02:35:37 GMT Keep-Alive: - timeout=4, max=40 NCBI-PHID: - - D0BD7A022C7C6FC500004D96F27FB4D6.1.1.m_5 + - 939B3D945261CB9500002C8BCA1E5615.1.1.m_5 NCBI-SID: - - 2DACA0C3DC694FC7_FC49SID + - 631E263434F06885_DB6DSID Referrer-Policy: - origin-when-cross-origin Server: - Finatra Set-Cookie: - - ncbi_sid=2DACA0C3DC694FC7_FC49SID; domain=.nih.gov; path=/; expires=Mon, 17 - Jun 2024 01:04:32 GMT + - ncbi_sid=631E263434F06885_DB6DSID; domain=.nih.gov; path=/; expires=Sat, 24 + Aug 2024 02:35:37 GMT Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload Transfer-Encoding: @@ -407,15 +467,15 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET - uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000007.14&rettype=fasta&seq_start=55181320&seq_stop=55181340&tool=bioutils&email=biocommons-dev@googlegroups.com + uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000007.14&rettype=fasta&seq_start=55181319&seq_stop=55181340&tool=bioutils&email=biocommons-dev@googlegroups.com response: body: string: !!binary | - H4sIAAAAAAAAALLzc443AAFzPUMTK1NTQwtDYyMDXQjDxEDBIz83X6E4sSAzNa9YITmjCMgtzs9N - VTDXUXAPcs4wttArMDRRCCjKzE0sqlRwLC5OzU3KqeRydHQGAUdn9xAgdHYGohB3Li4AAAAA//8D - AKrmuS9uAAAA + H4sIAAAAAAAAALLzc443AAFzPUMTK1NTQwtDY0NLXQjDxEDBIz83X6E4sSAzNa9YITmjCMgtzs9N + VTDXUXAPcs4wttArMDRRCCjKzE0sqlRwLC5OzU3KqeRydnR0BgFHZ/cQIHR2BqIQdy4uAAAAAP// + AwAhZlzpbwAAAA== headers: Access-Control-Allow-Origin: - '*' @@ -432,20 +492,20 @@ interactions: Content-Type: - text/plain Date: - - Sat, 17 Jun 2023 01:04:32 GMT + - Thu, 24 Aug 2023 02:35:38 GMT Keep-Alive: - timeout=4, max=40 NCBI-PHID: - - 939B236CFCF1AA15000053A143205DFE.1.1.m_5 + - 939B3D945261CB950000638BCE6A4EAF.1.1.m_5 NCBI-SID: - - DDC9B27ECA7F1CC1_1C9ESID + - 94AF026551B096FB_BE1FSID Referrer-Policy: - origin-when-cross-origin Server: - Finatra Set-Cookie: - - ncbi_sid=DDC9B27ECA7F1CC1_1C9ESID; domain=.nih.gov; path=/; expires=Mon, 17 - Jun 2024 01:04:33 GMT + - ncbi_sid=94AF026551B096FB_BE1FSID; domain=.nih.gov; path=/; expires=Sat, 24 + Aug 2024 02:35:38 GMT Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload Transfer-Encoding: diff --git a/tests/extras/cassettes/test_hgvs[NC_000013.11:g.32316467dup-expected5].yaml b/tests/extras/cassettes/test_hgvs[NC_000013.11:g.32316467dup-expected5].yaml index c438cbb0..a7e8c307 100644 --- a/tests/extras/cassettes/test_hgvs[NC_000013.11:g.32316467dup-expected5].yaml +++ b/tests/extras/cassettes/test_hgvs[NC_000013.11:g.32316467dup-expected5].yaml @@ -9,7 +9,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/NC_000013.11?start=32316466&end=32316467 response: @@ -23,7 +23,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:41 GMT + - Thu, 24 Aug 2023 02:35:43 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -39,7 +39,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT response: @@ -65,7 +65,7 @@ interactions: Content-Type: - application/json Date: - - Sat, 17 Jun 2023 01:04:41 GMT + - Thu, 24 Aug 2023 02:35:43 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -81,7 +81,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32316466&end=32316467 response: @@ -95,7 +95,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:41 GMT + - Thu, 24 Aug 2023 02:35:43 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -111,7 +111,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32316466&end=32316467 response: @@ -125,7 +125,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:41 GMT + - Thu, 24 Aug 2023 02:35:43 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -141,7 +141,37 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32316466&end=32316467 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 02:35:43 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32316465&end=32316466 response: @@ -155,7 +185,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:41 GMT + - Thu, 24 Aug 2023 02:35:43 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -171,7 +201,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32316467&end=32316468 response: @@ -185,7 +215,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:41 GMT + - Thu, 24 Aug 2023 02:35:43 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -201,7 +231,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32316466&end=32316467 response: @@ -215,7 +245,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:41 GMT + - Thu, 24 Aug 2023 02:35:43 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -231,7 +261,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32316467&end=32316467 response: @@ -245,7 +275,37 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:41 GMT + - Thu, 24 Aug 2023 02:35:43 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32316466&end=32316467 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 02:35:43 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -261,7 +321,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32316466&end=32316467 response: @@ -275,7 +335,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:41 GMT + - Thu, 24 Aug 2023 02:35:43 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -291,7 +351,72 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 + method: GET + uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000013.11&rettype=fasta&seq_start=32316466&seq_stop=32316467&tool=bioutils&email=biocommons-dev@googlegroups.com + response: + body: + string: !!binary | + H4sIAAAAAAAAALLzc443AAJDYz1DQytjI2NDMxMzM10ow1zBIz83X6E4sSAzNa9YITmjCMgtzs9N + VTA01lFwD3LOMLbQKzA0UQgoysxNLKpUcCwuTs1NyqnkCnHk4gIAAAD//wMAfY62d1wAAAA= + headers: + Access-Control-Allow-Origin: + - '*' + Access-Control-Expose-Headers: + - X-RateLimit-Limit,X-RateLimit-Remaining + Cache-Control: + - private + Connection: + - Keep-Alive + Content-Disposition: + - attachment; filename="sequence.fasta" + Content-Security-Policy: + - upgrade-insecure-requests + Content-Type: + - text/plain + Date: + - Thu, 24 Aug 2023 02:35:43 GMT + Keep-Alive: + - timeout=4, max=40 + NCBI-PHID: + - 939B3D945261CB950000318BE8266158.1.1.m_5 + NCBI-SID: + - 03EB3C85C6972737_8AD3SID + Referrer-Policy: + - origin-when-cross-origin + Server: + - Finatra + Set-Cookie: + - ncbi_sid=03EB3C85C6972737_8AD3SID; domain=.nih.gov; path=/; expires=Sat, 24 + Aug 2024 02:35:43 GMT + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-RateLimit-Limit: + - '3' + X-RateLimit-Remaining: + - '2' + X-UA-Compatible: + - IE=Edge + X-XSS-Protection: + - 1; mode=block + content-encoding: + - gzip + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 method: GET uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000013.11&rettype=fasta&seq_start=32316467&seq_stop=32316467&tool=bioutils&email=biocommons-dev@googlegroups.com response: @@ -315,20 +440,20 @@ interactions: Content-Type: - text/plain Date: - - Sat, 17 Jun 2023 01:04:40 GMT + - Thu, 24 Aug 2023 02:35:44 GMT Keep-Alive: - timeout=4, max=40 NCBI-PHID: - - 322C88C3B864A665000024883973E939.1.1.m_5 + - D0BD0EC90D3A84D5000023591BC2630E.1.1.m_5 NCBI-SID: - - 21345483F31464C3_317ESID + - 5F270D49B2084566_9803SID Referrer-Policy: - origin-when-cross-origin Server: - Finatra Set-Cookie: - - ncbi_sid=21345483F31464C3_317ESID; domain=.nih.gov; path=/; expires=Mon, 17 - Jun 2024 01:04:41 GMT + - ncbi_sid=5F270D49B2084566_9803SID; domain=.nih.gov; path=/; expires=Sat, 24 + Aug 2024 02:35:44 GMT Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload Transfer-Encoding: @@ -356,15 +481,15 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET - uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000013.11&rettype=fasta&seq_start=32316467&seq_stop=32316487&tool=bioutils&email=biocommons-dev@googlegroups.com + uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000013.11&rettype=fasta&seq_start=32316466&seq_stop=32316487&tool=bioutils&email=biocommons-dev@googlegroups.com response: body: string: !!binary | - H4sIAAAAAAAAALLzc443AAJDYz1DQytjI2NDMxMzc10Iw8JcwSM/N1+hOLEgMzWvWCE5owjILc7P - TVUwNNZRcA9yzjC20CswNFEIKMrMTSyqVHAsLk7NTcqp5HIMCXF3dwxxdnZ0dHQHQXcgk4sLAAAA - //8DAG14V2hvAAAA + H4sIAAAAAAAAALLzc443AAJDYz1DQytjI2NDMxMzM10Iw8JcwSM/N1+hOLEgMzWvWCE5owjILc7P + TVUwNNZRcA9yzjC20CswNFEIKMrMTSyqVHAsLk7NTcqp5ApxDAlxd3cMcXZ2dHR0B0F3IJOLCwAA + AP//AwDLG37GcAAAAA== headers: Access-Control-Allow-Origin: - '*' @@ -381,20 +506,20 @@ interactions: Content-Type: - text/plain Date: - - Sat, 17 Jun 2023 01:04:41 GMT + - Thu, 24 Aug 2023 02:35:45 GMT Keep-Alive: - timeout=4, max=40 NCBI-PHID: - - 939B236CFCF1AA15000027A15AE8E37B.1.1.m_5 + - D0BD0EC90D3A84D500002B59205FA11B.1.1.m_5 NCBI-SID: - - 22EACE5C291E1D28_BD4CSID + - D10289F9B420159A_7BECSID Referrer-Policy: - origin-when-cross-origin Server: - Finatra Set-Cookie: - - ncbi_sid=22EACE5C291E1D28_BD4CSID; domain=.nih.gov; path=/; expires=Mon, 17 - Jun 2024 01:04:41 GMT + - ncbi_sid=D10289F9B420159A_7BECSID; domain=.nih.gov; path=/; expires=Sat, 24 + Aug 2024 02:35:45 GMT Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload Transfer-Encoding: @@ -402,7 +527,7 @@ interactions: X-RateLimit-Limit: - '3' X-RateLimit-Remaining: - - '1' + - '2' X-UA-Compatible: - IE=Edge X-XSS-Protection: diff --git a/tests/extras/cassettes/test_hgvs[NC_000013.11:g.32331093_32331094dup-expected4].yaml b/tests/extras/cassettes/test_hgvs[NC_000013.11:g.32331093_32331094dup-expected4].yaml index 05906d33..d61d2e4a 100644 --- a/tests/extras/cassettes/test_hgvs[NC_000013.11:g.32331093_32331094dup-expected4].yaml +++ b/tests/extras/cassettes/test_hgvs[NC_000013.11:g.32331093_32331094dup-expected4].yaml @@ -9,7 +9,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/NC_000013.11?start=32331092&end=32331094 response: @@ -23,7 +23,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:37 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -39,7 +39,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT response: @@ -65,7 +65,7 @@ interactions: Content-Type: - application/json Date: - - Sat, 17 Jun 2023 01:04:37 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -81,7 +81,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32331092&end=32331094 response: @@ -95,7 +95,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:37 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -111,7 +111,37 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32331092&end=32331094 + response: + body: + string: TT + headers: + Connection: + - close + Content-Length: + - '2' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 02:35:42 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32331093&end=32331094 response: @@ -125,7 +155,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:37 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -141,7 +171,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32331092&end=32331093 response: @@ -155,7 +185,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:37 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -171,7 +201,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32331091&end=32331092 response: @@ -185,7 +215,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:37 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -201,7 +231,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32331090&end=32331091 response: @@ -215,7 +245,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:37 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -231,7 +261,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32331089&end=32331090 response: @@ -245,7 +275,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:38 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -261,7 +291,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32331088&end=32331089 response: @@ -275,7 +305,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:38 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -291,7 +321,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32331087&end=32331088 response: @@ -305,7 +335,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:38 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -321,7 +351,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32331086&end=32331087 response: @@ -335,7 +365,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:38 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -351,7 +381,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32331085&end=32331086 response: @@ -365,7 +395,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:38 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -381,7 +411,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32331084&end=32331085 response: @@ -395,7 +425,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:38 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -411,7 +441,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32331083&end=32331084 response: @@ -425,7 +455,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:38 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -441,7 +471,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32331082&end=32331083 response: @@ -455,7 +485,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:38 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -471,7 +501,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32331081&end=32331082 response: @@ -485,7 +515,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:38 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -501,7 +531,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32331094&end=32331095 response: @@ -515,7 +545,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:38 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -531,7 +561,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32331082&end=32331094 response: @@ -545,7 +575,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:38 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -561,7 +591,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32331094&end=32331094 response: @@ -575,7 +605,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:38 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -591,7 +621,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32331082&end=32331094 response: @@ -605,271 +635,10 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:38 GMT + - Thu, 24 Aug 2023 02:35:42 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: code: 200 message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.29.0 - method: GET - uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000013.11&rettype=fasta&seq_start=32331083&seq_stop=32331094&tool=bioutils&email=biocommons-dev@googlegroups.com - response: - body: - string: !!binary | - H4sIAAAAAAAAALLzc443AAJDYz1DQytjI2NjQwMLY10Iw9JEwSM/N1+hOLEgMzWvWCE5owjILc7P - TVUwNNZRcA9yzjC20CswNFEIKMrMTSyqVHAsLk7NTcqp5ApBAlxcAAAAAP//AwBz5s6GZgAAAA== - headers: - Access-Control-Allow-Origin: - - '*' - Access-Control-Expose-Headers: - - X-RateLimit-Limit,X-RateLimit-Remaining - Cache-Control: - - private - Connection: - - Keep-Alive - Content-Disposition: - - attachment; filename="sequence.fasta" - Content-Security-Policy: - - upgrade-insecure-requests - Content-Type: - - text/plain - Date: - - Sat, 17 Jun 2023 01:04:37 GMT - Keep-Alive: - - timeout=4, max=40 - NCBI-PHID: - - 322C88C3B864A665000053883184093D.1.1.m_5 - NCBI-SID: - - 7460204ECE221625_1751SID - Referrer-Policy: - - origin-when-cross-origin - Server: - - Finatra - Set-Cookie: - - ncbi_sid=7460204ECE221625_1751SID; domain=.nih.gov; path=/; expires=Mon, 17 - Jun 2024 01:04:38 GMT - Strict-Transport-Security: - - max-age=31536000; includeSubDomains; preload - Transfer-Encoding: - - chunked - X-RateLimit-Limit: - - '3' - X-RateLimit-Remaining: - - '2' - X-UA-Compatible: - - IE=Edge - X-XSS-Protection: - - 1; mode=block - content-encoding: - - gzip - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.29.0 - method: GET - uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000013.11&rettype=fasta&seq_start=32331094&seq_stop=32331094&tool=bioutils&email=biocommons-dev@googlegroups.com - response: - body: - string: !!binary | - H4sIAAAAAAAAALLzc443AAJDYz1DQytjI2NjQwNLE10YQ8EjPzdfoTixIDM1r1ghOaMIyC3Oz01V - MDTWUXAPcs4wttArMDRRCCjKzE0sqlRwLC5OzU3KqeQK4eICAAAA//8DAOcxeKNbAAAA - headers: - Access-Control-Allow-Origin: - - '*' - Access-Control-Expose-Headers: - - X-RateLimit-Limit,X-RateLimit-Remaining - Cache-Control: - - private - Connection: - - Keep-Alive - Content-Disposition: - - attachment; filename="sequence.fasta" - Content-Security-Policy: - - upgrade-insecure-requests - Content-Type: - - text/plain - Date: - - Sat, 17 Jun 2023 01:04:39 GMT - Keep-Alive: - - timeout=4, max=40 - NCBI-PHID: - - 322C88C3B864A66500004C8835631755.1.1.m_5 - NCBI-SID: - - 0787634CFDA8561D_47C9SID - Referrer-Policy: - - origin-when-cross-origin - Server: - - Finatra - Set-Cookie: - - ncbi_sid=0787634CFDA8561D_47C9SID; domain=.nih.gov; path=/; expires=Mon, 17 - Jun 2024 01:04:39 GMT - Strict-Transport-Security: - - max-age=31536000; includeSubDomains; preload - Transfer-Encoding: - - chunked - X-RateLimit-Limit: - - '3' - X-RateLimit-Remaining: - - '2' - X-UA-Compatible: - - IE=Edge - X-XSS-Protection: - - 1; mode=block - content-encoding: - - gzip - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.29.0 - method: GET - uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000013.11&rettype=fasta&seq_start=32331083&seq_stop=32331114&tool=bioutils&email=biocommons-dev@googlegroups.com - response: - body: - string: !!binary | - H4sIAAAAAAAAALLzc443AAJDYz1DQytjI2NjQwMLY10ww9DQRMEjPzdfoTixIDM1r1ghOaMIyC3O - z01VMDTWUXAPcs4wttArACoLKMrMTSyqVHAsLk7NTcqp5ApBAu6O7u4h7kAyxBnIcQaS7iFcXAAA - AAD//wMAlOTJvHoAAAA= - headers: - Access-Control-Allow-Origin: - - '*' - Access-Control-Expose-Headers: - - X-RateLimit-Limit,X-RateLimit-Remaining - Cache-Control: - - private - Connection: - - Keep-Alive - Content-Disposition: - - attachment; filename="sequence.fasta" - Content-Security-Policy: - - upgrade-insecure-requests - Content-Type: - - text/plain - Date: - - Sat, 17 Jun 2023 01:04:40 GMT - Keep-Alive: - - timeout=4, max=40 - NCBI-PHID: - - 939B236CFCF1AA15000044A156FDAE8F.1.1.m_5 - NCBI-SID: - - 69CA492CCCF1D9ED_30ACSID - Referrer-Policy: - - origin-when-cross-origin - Server: - - Finatra - Set-Cookie: - - ncbi_sid=69CA492CCCF1D9ED_30ACSID; domain=.nih.gov; path=/; expires=Mon, 17 - Jun 2024 01:04:40 GMT - Strict-Transport-Security: - - max-age=31536000; includeSubDomains; preload - Transfer-Encoding: - - chunked - X-RateLimit-Limit: - - '3' - X-RateLimit-Remaining: - - '1' - X-UA-Compatible: - - IE=Edge - X-XSS-Protection: - - 1; mode=block - content-encoding: - - gzip - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.29.0 - method: GET - uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000013.11&rettype=fasta&seq_start=32331093&seq_stop=32331094&tool=bioutils&email=biocommons-dev@googlegroups.com - response: - body: - string: !!binary | - H4sIAAAAAAAAALLzc443AAJDYz1DQytjI2NjQwNLY10ow0TBIz83X6E4sSAzNa9YITmjCMgtzs9N - VTA01lFwD3LOMLbQKzA0UQgoysxNLKpUcCwuTs1NyqnkCgnh4gIAAAD//wMAmBgGAVwAAAA= - headers: - Access-Control-Allow-Origin: - - '*' - Access-Control-Expose-Headers: - - X-RateLimit-Limit,X-RateLimit-Remaining - Cache-Control: - - private - Connection: - - Keep-Alive - Content-Disposition: - - attachment; filename="sequence.fasta" - Content-Security-Policy: - - upgrade-insecure-requests - Content-Type: - - text/plain - Date: - - Sat, 17 Jun 2023 01:04:40 GMT - Keep-Alive: - - timeout=4, max=40 - NCBI-PHID: - - 939B236CFCF1AA15000050A158560489.1.1.m_5 - NCBI-SID: - - 6DE6FC530437B9A4_6D0FSID - Referrer-Policy: - - origin-when-cross-origin - Server: - - Finatra - Set-Cookie: - - ncbi_sid=6DE6FC530437B9A4_6D0FSID; domain=.nih.gov; path=/; expires=Mon, 17 - Jun 2024 01:04:40 GMT - Strict-Transport-Security: - - max-age=31536000; includeSubDomains; preload - Transfer-Encoding: - - chunked - X-RateLimit-Limit: - - '3' - X-RateLimit-Remaining: - - '0' - X-UA-Compatible: - - IE=Edge - X-XSS-Protection: - - 1; mode=block - content-encoding: - - gzip - status: - code: 200 - message: OK version: 1 diff --git a/tests/extras/cassettes/test_hgvs[NC_000013.11:g.32936732=-expected0].yaml b/tests/extras/cassettes/test_hgvs[NC_000013.11:g.32936732=-expected0].yaml index 36213f2f..be77c70a 100644 --- a/tests/extras/cassettes/test_hgvs[NC_000013.11:g.32936732=-expected0].yaml +++ b/tests/extras/cassettes/test_hgvs[NC_000013.11:g.32936732=-expected0].yaml @@ -9,7 +9,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/NC_000013.11?start=32936731&end=32936732 response: @@ -23,7 +23,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:29 GMT + - Thu, 24 Aug 2023 02:35:32 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -39,7 +39,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT response: @@ -65,7 +65,7 @@ interactions: Content-Type: - application/json Date: - - Sat, 17 Jun 2023 01:04:29 GMT + - Thu, 24 Aug 2023 02:35:32 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -81,7 +81,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32936731&end=32936732 response: @@ -95,7 +95,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:29 GMT + - Thu, 24 Aug 2023 02:35:32 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -111,7 +111,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32936731&end=32936732 response: @@ -125,7 +125,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:29 GMT + - Thu, 24 Aug 2023 02:35:32 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -141,14 +141,44 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET - uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000013.11&rettype=fasta&seq_start=32936732&seq_stop=32936732&tool=bioutils&email=biocommons-dev@googlegroups.com + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32936731&end=32936732 + response: + body: + string: C + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 02:35:32 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NC_000013.11&rettype=fasta&seq_start=32936731&seq_stop=32936732&tool=bioutils&email=biocommons-dev@googlegroups.com response: body: string: !!binary | - H4sIAAAAAAAAALLzc443AAJDYz1DQytjI0tjM3NjI10YQ8EjPzdfoTixIDM1r1ghOaMIyC3Oz01V - MDTWUXAPcs4wttArMDRRCCjKzE0sqlRwLC5OzU3KqeRy5uICAAAA//8DALFcilZbAAAA + H4sIAAAAAAAAALLzc443AAJDYz1DQytjI0tjM3NjQ10ow0jBIz83X6E4sSAzNa9YITmjCMgtzs9N + VTA01lFwD3LOMLbQKzA0UQgoysxNLKpUcCwuTs1Nyqnkcnbm4gIAAAD//wMA49yXRFwAAAA= headers: Access-Control-Allow-Origin: - '*' @@ -165,20 +195,20 @@ interactions: Content-Type: - text/plain Date: - - Sat, 17 Jun 2023 01:04:31 GMT + - Thu, 24 Aug 2023 02:35:36 GMT Keep-Alive: - timeout=4, max=40 NCBI-PHID: - - D0BD7A022C7C6FC500002E96EE3E6D62.1.1.m_5 + - 939B3D945261CB9500005F8BC3E00CE8.1.1.m_5 NCBI-SID: - - 29CB87EB7B7DD577_9797SID + - 7312853517AD9CE9_1F4ESID Referrer-Policy: - origin-when-cross-origin Server: - Finatra Set-Cookie: - - ncbi_sid=29CB87EB7B7DD577_9797SID; domain=.nih.gov; path=/; expires=Mon, 17 - Jun 2024 01:04:32 GMT + - ncbi_sid=7312853517AD9CE9_1F4ESID; domain=.nih.gov; path=/; expires=Sat, 24 + Aug 2024 02:35:36 GMT Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload Transfer-Encoding: diff --git a/tests/extras/cassettes/test_hgvs[NM_001331029.1:n.872A>G-expected6].yaml b/tests/extras/cassettes/test_hgvs[NM_001331029.1:n.872A>G-expected6].yaml index 98366397..a06c5a68 100644 --- a/tests/extras/cassettes/test_hgvs[NM_001331029.1:n.872A>G-expected6].yaml +++ b/tests/extras/cassettes/test_hgvs[NM_001331029.1:n.872A>G-expected6].yaml @@ -9,7 +9,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/metadata/refseq:NM_001331029.1 response: @@ -28,7 +28,7 @@ interactions: Content-Type: - application/json Date: - - Sat, 17 Jun 2023 01:04:42 GMT + - Thu, 24 Aug 2023 02:35:46 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -44,7 +44,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ.MBIgVnoHFw34aFqNUVGM0zgjC3d-v8dK response: @@ -63,7 +63,7 @@ interactions: Content-Type: - application/json Date: - - Sat, 17 Jun 2023 01:04:42 GMT + - Thu, 24 Aug 2023 02:35:46 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -79,7 +79,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.MBIgVnoHFw34aFqNUVGM0zgjC3d-v8dK?start=871&end=872 response: @@ -93,7 +93,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:42 GMT + - Thu, 24 Aug 2023 02:35:46 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -109,7 +109,37 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.MBIgVnoHFw34aFqNUVGM0zgjC3d-v8dK?start=871&end=872 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 02:35:46 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.MBIgVnoHFw34aFqNUVGM0zgjC3d-v8dK?start=870&end=871 response: @@ -123,7 +153,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:42 GMT + - Thu, 24 Aug 2023 02:35:46 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -139,7 +169,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.MBIgVnoHFw34aFqNUVGM0zgjC3d-v8dK?start=872&end=873 response: @@ -153,7 +183,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:42 GMT + - Thu, 24 Aug 2023 02:35:46 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -169,7 +199,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.MBIgVnoHFw34aFqNUVGM0zgjC3d-v8dK?start=871&end=871 response: @@ -183,7 +213,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:42 GMT + - Thu, 24 Aug 2023 02:35:46 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -199,7 +229,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.MBIgVnoHFw34aFqNUVGM0zgjC3d-v8dK?start=872&end=872 response: @@ -213,7 +243,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:42 GMT + - Thu, 24 Aug 2023 02:35:46 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -229,7 +259,37 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.MBIgVnoHFw34aFqNUVGM0zgjC3d-v8dK?start=871&end=872 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 02:35:46 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ.MBIgVnoHFw34aFqNUVGM0zgjC3d-v8dK response: @@ -248,7 +308,7 @@ interactions: Content-Type: - application/json Date: - - Sat, 17 Jun 2023 01:04:42 GMT + - Thu, 24 Aug 2023 02:35:46 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -264,7 +324,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.MBIgVnoHFw34aFqNUVGM0zgjC3d-v8dK?start=871&end=872 response: @@ -278,7 +338,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:42 GMT + - Thu, 24 Aug 2023 02:35:46 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -294,7 +354,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NM_001331029.1&rettype=fasta&seq_start=872&seq_stop=872&tool=bioutils&email=biocommons-dev@googlegroups.com response: @@ -319,20 +379,20 @@ interactions: Content-Type: - text/plain Date: - - Sat, 17 Jun 2023 01:04:41 GMT + - Thu, 24 Aug 2023 02:35:46 GMT Keep-Alive: - timeout=4, max=40 NCBI-PHID: - - 322C88C3B864A665000029883CE8D086.1.1.m_5 + - D0BD0EC90D3A84D500004C59259982A0.1.1.m_5 NCBI-SID: - - 80E9370C369020AD_8FD5SID + - 03BEC3B9E76657C4_FA3CSID Referrer-Policy: - origin-when-cross-origin Server: - Finatra Set-Cookie: - - ncbi_sid=80E9370C369020AD_8FD5SID; domain=.nih.gov; path=/; expires=Mon, 17 - Jun 2024 01:04:42 GMT + - ncbi_sid=03BEC3B9E76657C4_FA3CSID; domain=.nih.gov; path=/; expires=Sat, 24 + Aug 2024 02:35:46 GMT Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload Transfer-Encoding: @@ -340,7 +400,7 @@ interactions: X-RateLimit-Limit: - '3' X-RateLimit-Remaining: - - '1' + - '2' X-UA-Compatible: - IE=Edge X-XSS-Protection: @@ -360,15 +420,15 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET - uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NM_001331029.1&rettype=fasta&seq_start=872&seq_stop=892&tool=bioutils&email=biocommons-dev@googlegroups.com + uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NM_001331029.1&rettype=fasta&seq_start=871&seq_stop=892&tool=bioutils&email=biocommons-dev@googlegroups.com response: body: string: !!binary | - H4sIAAAAAAAAAAzGsQrCMBQF0L1fcUeFKk062DoIsUNdLKFkl6cEG7BJSF4E/95O51ym+6NpRNuK - RvZHce5O8tD1ErewBmSKzvqMmAJb5xGXkONCTNlCINl3+RCH9EMuz+IdQ8grdlprMW/b1+BEPr+S - i4wvJUee0dVY50lVyoxKDWZQW8zGOCpjquoPAAD//wMAQOE1ipAAAAA= + H4sIAAAAAAAAAAzGsQrCMBQF0L1fcUeFKnntYOsgxA51sZSSXZ4SbMAmIXkV/Hs7nXMZ7g+lqK5J + Ve2Rzs2JDk1b4RaWgMzRWZ8RUxDrPOIccpxZOFsQkn2vH5aQfsjrc/VOQNUVu3Ecadq2LyGJfX4l + FwVfTo69oCmxTIMujDa91p3p9Baz0ffamKL4AwAA//8DAJnR/l+RAAAA headers: Access-Control-Allow-Origin: - '*' @@ -385,20 +445,20 @@ interactions: Content-Type: - text/plain Date: - - Sat, 17 Jun 2023 01:04:42 GMT + - Thu, 24 Aug 2023 02:35:47 GMT Keep-Alive: - timeout=4, max=40 NCBI-PHID: - - 322C88C3B864A665000028883F0093E3.1.1.m_5 + - D0BD0EC90D3A84D500002B59296FBF1B.1.1.m_5 NCBI-SID: - - FBC24E5DE2F0B43E_EC52SID + - 6F5BDE5FCA5032DB_F0A8SID Referrer-Policy: - origin-when-cross-origin Server: - Finatra Set-Cookie: - - ncbi_sid=FBC24E5DE2F0B43E_EC52SID; domain=.nih.gov; path=/; expires=Mon, 17 - Jun 2024 01:04:43 GMT + - ncbi_sid=6F5BDE5FCA5032DB_F0A8SID; domain=.nih.gov; path=/; expires=Sat, 24 + Aug 2024 02:35:47 GMT Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload Transfer-Encoding: diff --git a/tests/extras/cassettes/test_hgvs[NM_181798.1:n.1263G>T-expected7].yaml b/tests/extras/cassettes/test_hgvs[NM_181798.1:n.1263G>T-expected7].yaml index 37d4f6f3..8bc9925b 100644 --- a/tests/extras/cassettes/test_hgvs[NM_181798.1:n.1263G>T-expected7].yaml +++ b/tests/extras/cassettes/test_hgvs[NM_181798.1:n.1263G>T-expected7].yaml @@ -9,7 +9,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/metadata/refseq:NM_181798.1 response: @@ -28,7 +28,7 @@ interactions: Content-Type: - application/json Date: - - Sat, 17 Jun 2023 01:04:43 GMT + - Thu, 24 Aug 2023 02:35:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -44,7 +44,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ.KN07u-RFqd1dTyOWOG98HnOq87Nq-ZIg response: @@ -63,7 +63,7 @@ interactions: Content-Type: - application/json Date: - - Sat, 17 Jun 2023 01:04:43 GMT + - Thu, 24 Aug 2023 02:35:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -79,7 +79,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.KN07u-RFqd1dTyOWOG98HnOq87Nq-ZIg?start=1262&end=1263 response: @@ -93,7 +93,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:43 GMT + - Thu, 24 Aug 2023 02:35:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -109,7 +109,37 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.KN07u-RFqd1dTyOWOG98HnOq87Nq-ZIg?start=1262&end=1263 + response: + body: + string: G + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 02:35:48 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.KN07u-RFqd1dTyOWOG98HnOq87Nq-ZIg?start=1261&end=1262 response: @@ -123,7 +153,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:43 GMT + - Thu, 24 Aug 2023 02:35:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -139,7 +169,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.KN07u-RFqd1dTyOWOG98HnOq87Nq-ZIg?start=1263&end=1264 response: @@ -153,7 +183,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:43 GMT + - Thu, 24 Aug 2023 02:35:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -169,7 +199,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.KN07u-RFqd1dTyOWOG98HnOq87Nq-ZIg?start=1262&end=1262 response: @@ -183,7 +213,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:44 GMT + - Thu, 24 Aug 2023 02:35:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -199,7 +229,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.KN07u-RFqd1dTyOWOG98HnOq87Nq-ZIg?start=1263&end=1263 response: @@ -213,7 +243,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:44 GMT + - Thu, 24 Aug 2023 02:35:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -229,7 +259,37 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.KN07u-RFqd1dTyOWOG98HnOq87Nq-ZIg?start=1262&end=1263 + response: + body: + string: G + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 02:35:48 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ.KN07u-RFqd1dTyOWOG98HnOq87Nq-ZIg response: @@ -248,7 +308,7 @@ interactions: Content-Type: - application/json Date: - - Sat, 17 Jun 2023 01:04:44 GMT + - Thu, 24 Aug 2023 02:35:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -264,7 +324,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.KN07u-RFqd1dTyOWOG98HnOq87Nq-ZIg?start=1262&end=1263 response: @@ -278,7 +338,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Sat, 17 Jun 2023 01:04:44 GMT + - Thu, 24 Aug 2023 02:35:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -294,7 +354,7 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NM_181798.1&rettype=fasta&seq_start=1263&seq_stop=1263&tool=bioutils&email=biocommons-dev@googlegroups.com response: @@ -319,20 +379,20 @@ interactions: Content-Type: - text/plain Date: - - Sat, 17 Jun 2023 01:04:43 GMT + - Thu, 24 Aug 2023 02:35:48 GMT Keep-Alive: - timeout=4, max=40 NCBI-PHID: - - 322C88C3B864A665000064884142F0CA.1.1.m_5 + - 939B3D945261CB9500002B8BFE1352E9.1.1.m_5 NCBI-SID: - - 276009E1E93550F1_B8F9SID + - C37CF93F6F18ED4F_AC89SID Referrer-Policy: - origin-when-cross-origin Server: - Finatra Set-Cookie: - - ncbi_sid=276009E1E93550F1_B8F9SID; domain=.nih.gov; path=/; expires=Mon, 17 - Jun 2024 01:04:43 GMT + - ncbi_sid=C37CF93F6F18ED4F_AC89SID; domain=.nih.gov; path=/; expires=Sat, 24 + Aug 2024 02:35:49 GMT Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload Transfer-Encoding: @@ -340,7 +400,7 @@ interactions: X-RateLimit-Limit: - '3' X-RateLimit-Remaining: - - '1' + - '2' X-UA-Compatible: - IE=Edge X-XSS-Protection: @@ -360,15 +420,15 @@ interactions: Connection: - keep-alive User-Agent: - - python-requests/2.29.0 + - python-requests/2.31.0 method: GET - uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NM_181798.1&rettype=fasta&seq_start=1263&seq_stop=1268&tool=bioutils&email=biocommons-dev@googlegroups.com + uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=NM_181798.1&rettype=fasta&seq_start=1262&seq_stop=1268&tool=bioutils&email=biocommons-dev@googlegroups.com response: body: string: !!binary | - H4sIAAAAAAAAAAzEuwrCMBgG0D1P8Y0KrZAKGh0E6VBBDFSyy9+a1kBuJGnBt9cznIt8vLjgx5PY - 8TNvDvv6n8AtuIBM0WifEUOhnM3isAZbaNb1TEW/MX7Ie22Rl2EiZ+wXPZx2g07g2Nxb2fNthZLI - 5zGZWLBSMuQLmgruKa+sU6pVHWM/AAAA//8DAATZgc6FAAAA + H4sIAAAAAAAAAAzEuwrCMBgG0D1P8Y0KrZAMWh2E0qGCGKhkl781rYHcSNKCb69nOFf5ePGGn87N + gV+4OIr6X4NbcAGZotE+I4ZCOZvVYQu20KLrhYp+Y/qQ99oir+NMztgvBjjtRp3Asbt3cuD7CiWR + z1MysWCjZMgXiAruKVvW9kp1qmfsBwAA//8DAFMv0XCGAAAA headers: Access-Control-Allow-Origin: - '*' @@ -385,20 +445,20 @@ interactions: Content-Type: - text/plain Date: - - Sat, 17 Jun 2023 01:04:44 GMT + - Thu, 24 Aug 2023 02:35:50 GMT Keep-Alive: - timeout=4, max=40 NCBI-PHID: - - 322C88C3B864A66500005C88433FE3A9.1.1.m_5 + - D0BD0EC90D3A84D500002D5931FC0B2B.1.1.m_5 NCBI-SID: - - FEE1B01EFB3EB9C2_D894SID + - DD21CA9B7A0D8433_87CASID Referrer-Policy: - origin-when-cross-origin Server: - Finatra Set-Cookie: - - ncbi_sid=FEE1B01EFB3EB9C2_D894SID; domain=.nih.gov; path=/; expires=Mon, 17 - Jun 2024 01:04:44 GMT + - ncbi_sid=DD21CA9B7A0D8433_87CASID; domain=.nih.gov; path=/; expires=Sat, 24 + Aug 2024 02:35:50 GMT Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload Transfer-Encoding: diff --git a/tests/extras/cassettes/test_to_spdi.yaml b/tests/extras/cassettes/test_to_spdi.yaml index dedae948..a1d293b1 100644 --- a/tests/extras/cassettes/test_to_spdi.yaml +++ b/tests/extras/cassettes/test_to_spdi.yaml @@ -11,7 +11,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/metadata/refseq:NC_000013.11 + uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT response: body: string: "{\n \"added\": \"2016-08-27T23:50:14Z\",\n \"aliases\": [\n \"GRCh38:13\",\n @@ -35,9 +35,9 @@ interactions: Content-Type: - application/json Date: - - Thu, 17 Aug 2023 03:03:25 GMT + - Thu, 24 Aug 2023 02:35:32 GMT Server: - - Werkzeug/2.2.3 Python/3.11.4 + - Werkzeug/2.2.2 Python/3.10.4 status: code: 200 message: OK @@ -53,33 +53,21 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT?start=32936731&end=32936732 response: body: - string: "{\n \"added\": \"2016-08-27T23:50:14Z\",\n \"aliases\": [\n \"GRCh38:13\",\n - \ \"GRCh38:chr13\",\n \"GRCh38.p1:13\",\n \"GRCh38.p1:chr13\",\n \"GRCh38.p10:13\",\n - \ \"GRCh38.p10:chr13\",\n \"GRCh38.p11:13\",\n \"GRCh38.p11:chr13\",\n - \ \"GRCh38.p12:13\",\n \"GRCh38.p12:chr13\",\n \"GRCh38.p2:13\",\n - \ \"GRCh38.p2:chr13\",\n \"GRCh38.p3:13\",\n \"GRCh38.p3:chr13\",\n - \ \"GRCh38.p4:13\",\n \"GRCh38.p4:chr13\",\n \"GRCh38.p5:13\",\n \"GRCh38.p5:chr13\",\n - \ \"GRCh38.p6:13\",\n \"GRCh38.p6:chr13\",\n \"GRCh38.p7:13\",\n \"GRCh38.p7:chr13\",\n - \ \"GRCh38.p8:13\",\n \"GRCh38.p8:chr13\",\n \"GRCh38.p9:13\",\n \"GRCh38.p9:chr13\",\n - \ \"MD5:a5437debe2ef9c9ef8f3ea2874ae1d82\",\n \"NCBI:NC_000013.11\",\n - \ \"refseq:NC_000013.11\",\n \"SEGUID:2oDBty0yKV9wHo7gg+Bt+fPgi5o\",\n - \ \"SHA1:da80c1b72d32295f701e8ee083e06df9f3e08b9a\",\n \"VMC:GS__0wi-qoDrvram155UmcSC-zA5ZK4fpLT\",\n - \ \"sha512t24u:_0wi-qoDrvram155UmcSC-zA5ZK4fpLT\",\n \"ga4gh:SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT\"\n - \ ],\n \"alphabet\": \"ACGKNTY\",\n \"length\": 114364328\n}\n" + string: C headers: Connection: - close Content-Length: - - '1002' + - '1' Content-Type: - - application/json + - text/plain; charset=utf-8 Date: - - Thu, 17 Aug 2023 03:03:25 GMT + - Thu, 24 Aug 2023 02:35:32 GMT Server: - - Werkzeug/2.2.3 Python/3.11.4 + - Werkzeug/2.2.2 Python/3.10.4 status: code: 200 message: OK @@ -107,9 +95,9 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 17 Aug 2023 03:03:25 GMT + - Thu, 24 Aug 2023 02:35:32 GMT Server: - - Werkzeug/2.2.3 Python/3.11.4 + - Werkzeug/2.2.2 Python/3.10.4 status: code: 200 message: OK @@ -149,9 +137,9 @@ interactions: Content-Type: - application/json Date: - - Thu, 17 Aug 2023 03:03:25 GMT + - Thu, 24 Aug 2023 02:35:32 GMT Server: - - Werkzeug/2.2.3 Python/3.11.4 + - Werkzeug/2.2.2 Python/3.10.4 status: code: 200 message: OK From 8ac1649d3d965155cc2978fb9e5955883632ed26 Mon Sep 17 00:00:00 2001 From: korikuzma Date: Thu, 24 Aug 2023 00:35:37 -0400 Subject: [PATCH 03/16] didnt use new allele for state --- src/ga4gh/vrs/normalize.py | 5 ++--- tests/test_vrs_normalize.py | 19 ++++++++++++++++++- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/ga4gh/vrs/normalize.py b/src/ga4gh/vrs/normalize.py index c4f98ce2..819fa677 100644 --- a/src/ga4gh/vrs/normalize.py +++ b/src/ga4gh/vrs/normalize.py @@ -158,10 +158,9 @@ def _normalize_allele(input_allele, data_proxy): # a Location specified by the coordinates of the new ival, a length # specified by the length of the alternate allele, and a repeat subunit # length - allele.state = models.ReferenceLengthExpression( + new_allele.state = models.ReferenceLengthExpression( length=len(new_alleles[1]), - sequence=models.SequenceString(new_ref_seq), - repeat_subunit_length=repeat_subunit_len + repeatSubunitLength=repeat_subunit_len ) except ValueError: # Occurs for ref agree Alleles (when alt = ref) diff --git a/tests/test_vrs_normalize.py b/tests/test_vrs_normalize.py index 908080bb..7bdcaa73 100644 --- a/tests/test_vrs_normalize.py +++ b/tests/test_vrs_normalize.py @@ -36,6 +36,22 @@ } +allele_dict2_normalized = { + "type": "Allele", + "location": { + "type": "SequenceLocation", + "sequence": "refseq:NC_000023.11", + "start": [None, 155980375], + "end": [155980377, None] + }, + "state": { + "length": 0, + "repeatSubunitLength": 2, + "type": "ReferenceLengthExpression" + } +} + + allele_dict3 = { "type": "Allele", "location": { @@ -59,7 +75,8 @@ def test_normalize_allele(rest_dataproxy): allele1 = models.Allele(**allele_dict2) allele2 = normalize(allele1, rest_dataproxy) - assert allele1 == allele2 + assert allele1 != allele2 + assert allele2 == models.Allele(**allele_dict2_normalized) # Definite ranges are not normalized allele3 = models.Allele(**allele_dict3) From 21132359c589a52dc5406ad6aafc91bb3bb98502 Mon Sep 17 00:00:00 2001 From: korikuzma Date: Thu, 24 Aug 2023 10:15:19 -0400 Subject: [PATCH 04/16] use one line --- src/ga4gh/vrs/normalize.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/ga4gh/vrs/normalize.py b/src/ga4gh/vrs/normalize.py index 819fa677..2f4c8aff 100644 --- a/src/ga4gh/vrs/normalize.py +++ b/src/ga4gh/vrs/normalize.py @@ -55,10 +55,7 @@ def _get_allele_location_pos( return None val = pos.root[0] or pos.root[1] - if pos0_is_none: - pos_type = PosType.RANGE_LT_OR_EQUAL - else: - pos_type = PosType.RANGE_GT_OR_EQUAL + pos_type = PosType.RANGE_LT_OR_EQUAL if pos0_is_none else PosType.RANGE_GT_OR_EQUAL return LocationPos(value=val, pos_type=pos_type) From b5797349d47f66d3d272ffa925e703041ef67dc8 Mon Sep 17 00:00:00 2001 From: korikuzma Date: Thu, 24 Aug 2023 12:43:01 -0400 Subject: [PATCH 05/16] include sequence in rle + paramaterize limit --- src/ga4gh/vrs/normalize.py | 35 ++- tests/cassettes/test_normalize_allele.yaml | 335 ++++++++++++++++++++- tests/test_vrs_normalize.py | 38 ++- 3 files changed, 383 insertions(+), 25 deletions(-) diff --git a/src/ga4gh/vrs/normalize.py b/src/ga4gh/vrs/normalize.py index 2f4c8aff..341ebb17 100644 --- a/src/ga4gh/vrs/normalize.py +++ b/src/ga4gh/vrs/normalize.py @@ -78,12 +78,17 @@ def _get_new_allele_location_pos( return val -def _normalize_allele(input_allele, data_proxy): +def _normalize_allele(input_allele, data_proxy, rle_seq_limit=50): """Normalize Allele using "fully-justified" normalization adapted from NCBI's VOCA. Fully-justified normalization expands such ambiguous representation over the entire region of ambiguity, resulting in an unambiguous representation that may be readily compared with other alleles. + :param input_allele: Input VRS Allele object + :param data_proxy: SeqRepo dataproxy + :param rle_seq_limit: If RLE is set as the new state, set the limit for the length + of the `sequence`. To exclude, set to 0. + Does not attempt to normalize Allele's with definite ranges. Will return the `input_allele` """ @@ -125,7 +130,7 @@ def _normalize_allele(input_allele, data_proxy): # Deletion repeat_subunit_len = len_ref_seq else: - repeat_subunit_len = 0 + repeat_subunit_len = len_alt_seq - len_ref_seq new_allele = pydantic_copy(allele) try: @@ -155,10 +160,17 @@ def _normalize_allele(input_allele, data_proxy): # a Location specified by the coordinates of the new ival, a length # specified by the length of the alternate allele, and a repeat subunit # length + sequence = models.SequenceString(new_alleles[1]) + len_sequence = len(sequence.root) + new_allele.state = models.ReferenceLengthExpression( - length=len(new_alleles[1]), + length=len_sequence, repeatSubunitLength=repeat_subunit_len ) + + if rle_seq_limit and len_sequence < rle_seq_limit: + new_allele.state.sequence = sequence + except ValueError: # Occurs for ref agree Alleles (when alt = ref) pass @@ -179,27 +191,26 @@ def _normalize_haplotype(o, data_proxy=None): return o -def _normalize_variationset(o, data_proxy=None): - o.members = sorted(o.members, key=ga4gh_digest) - return o - - handlers = { "Allele": _normalize_allele, "Haplotype": _normalize_haplotype, - "VariationSet": _normalize_variationset, } -def normalize(vo, data_proxy=None): - """normalize given vrs object, regardless of type""" +def normalize(vo, data_proxy=None, **kwargs): + """normalize given vrs object, regardless of type + + kwargs: + rle_seq_limit: If RLE is set as the new state, set the limit for the length + of the `sequence`. To exclude `state.sequence`, set to 0. + """ assert is_pydantic_instance(vo) vo_type = vo.type if vo_type in handlers: handler = handlers[vo_type] - return handler(vo, data_proxy) + return handler(vo, data_proxy, **kwargs) # No handler for vo_type; pass-through unchanged return vo diff --git a/tests/cassettes/test_normalize_allele.yaml b/tests/cassettes/test_normalize_allele.yaml index 920f284f..aced451c 100644 --- a/tests/cassettes/test_normalize_allele.yaml +++ b/tests/cassettes/test_normalize_allele.yaml @@ -34,7 +34,7 @@ interactions: Content-Type: - application/json Date: - - Thu, 24 Aug 2023 02:16:39 GMT + - Thu, 24 Aug 2023 16:40:28 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -64,7 +64,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 02:16:39 GMT + - Thu, 24 Aug 2023 16:40:28 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -94,7 +94,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 02:16:39 GMT + - Thu, 24 Aug 2023 16:40:28 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -135,7 +135,7 @@ interactions: Content-Type: - application/json Date: - - Thu, 24 Aug 2023 02:16:39 GMT + - Thu, 24 Aug 2023 16:40:28 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -165,7 +165,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 02:16:39 GMT + - Thu, 24 Aug 2023 16:40:28 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -195,7 +195,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 02:16:39 GMT + - Thu, 24 Aug 2023 16:40:28 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -225,7 +225,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 02:16:39 GMT + - Thu, 24 Aug 2023 16:40:28 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -255,7 +255,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 02:16:39 GMT + - Thu, 24 Aug 2023 16:40:28 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -285,7 +285,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 02:16:39 GMT + - Thu, 24 Aug 2023 16:40:28 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -315,7 +315,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 02:16:39 GMT + - Thu, 24 Aug 2023 16:40:28 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -345,7 +345,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 02:16:39 GMT + - Thu, 24 Aug 2023 16:40:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -386,7 +386,318 @@ interactions: Content-Type: - application/json Date: - - Thu, 24 Aug 2023 02:16:39 GMT + - Thu, 24 Aug 2023 16:40:29 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/metadata/refseq:NC_000023.11 + response: + body: + string: "{\n \"added\": \"2016-08-27T23:57:18Z\",\n \"aliases\": [\n \"GRCh38:X\",\n + \ \"GRCh38:chrX\",\n \"GRCh38.p1:X\",\n \"GRCh38.p1:chrX\",\n \"GRCh38.p10:X\",\n + \ \"GRCh38.p10:chrX\",\n \"GRCh38.p11:X\",\n \"GRCh38.p11:chrX\",\n + \ \"GRCh38.p12:X\",\n \"GRCh38.p12:chrX\",\n \"GRCh38.p2:X\",\n \"GRCh38.p2:chrX\",\n + \ \"GRCh38.p3:X\",\n \"GRCh38.p3:chrX\",\n \"GRCh38.p4:X\",\n \"GRCh38.p4:chrX\",\n + \ \"GRCh38.p5:X\",\n \"GRCh38.p5:chrX\",\n \"GRCh38.p6:X\",\n \"GRCh38.p6:chrX\",\n + \ \"GRCh38.p7:X\",\n \"GRCh38.p7:chrX\",\n \"GRCh38.p8:X\",\n \"GRCh38.p8:chrX\",\n + \ \"GRCh38.p9:X\",\n \"GRCh38.p9:chrX\",\n \"MD5:2b3a55ff7f58eb308420c8a9b11cac50\",\n + \ \"NCBI:NC_000023.11\",\n \"refseq:NC_000023.11\",\n \"SEGUID:Z9QbQrrPjpjXSMJesDYqC3A43lA\",\n + \ \"SHA1:67d41b42bacf8e98d748c25eb0362a0b7038de50\",\n \"VMC:GS_w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP\",\n + \ \"sha512t24u:w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP\",\n \"ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP\"\n + \ ],\n \"alphabet\": \"ACGNRSTWY\",\n \"length\": 156040895\n}\n" + headers: + Connection: + - close + Content-Length: + - '978' + Content-Type: + - application/json + Date: + - Thu, 24 Aug 2023 16:40:29 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980373&end=155980375 + response: + body: + string: GT + headers: + Connection: + - close + Content-Length: + - '2' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 16:40:29 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980373&end=155980375 + response: + body: + string: GT + headers: + Connection: + - close + Content-Length: + - '2' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 16:40:29 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980374&end=155980375 + response: + body: + string: T + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 16:40:29 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980373&end=155980374 + response: + body: + string: G + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 16:40:29 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980372&end=155980373 + response: + body: + string: G + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 16:40:29 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980376 + response: + body: + string: T + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 16:40:29 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980373&end=155980375 + response: + body: + string: GT + headers: + Connection: + - close + Content-Length: + - '2' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 16:40:29 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980375 + response: + body: + string: '' + headers: + Connection: + - close + Content-Length: + - '0' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 16:40:29 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980373&end=155980375 + response: + body: + string: GT + headers: + Connection: + - close + Content-Length: + - '2' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 16:40:29 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: diff --git a/tests/test_vrs_normalize.py b/tests/test_vrs_normalize.py index 7bdcaa73..94f946fd 100644 --- a/tests/test_vrs_normalize.py +++ b/tests/test_vrs_normalize.py @@ -67,6 +67,38 @@ } +allele_dict4 = { + "type": "Allele", + "location": { + "type": "SequenceLocation", + "sequence": "refseq:NC_000023.11", + "start": 155980373, + "end": 155980375 + }, + "state": { + "sequence": "GTGT", + "type": "LiteralSequenceExpression" + } +} + + +allele_dict4_normalized = { + "type": "Allele", + "location": { + "type": "SequenceLocation", + "sequence": "refseq:NC_000023.11", + "start": 155980373, + "end": 155980375 + }, + "state": { + "length": 4, + "repeatSubunitLength": 2, + "sequence": "GTGT", + "type": "ReferenceLengthExpression" + } +} + + @pytest.mark.vcr def test_normalize_allele(rest_dataproxy): allele1 = models.Allele(**allele_dict) @@ -74,7 +106,7 @@ def test_normalize_allele(rest_dataproxy): assert allele1 == allele2 allele1 = models.Allele(**allele_dict2) - allele2 = normalize(allele1, rest_dataproxy) + allele2 = normalize(allele1, rest_dataproxy, rle_seq_limit=0) assert allele1 != allele2 assert allele2 == models.Allele(**allele_dict2_normalized) @@ -82,3 +114,7 @@ def test_normalize_allele(rest_dataproxy): allele3 = models.Allele(**allele_dict3) allele3_after_norm = normalize(allele3, rest_dataproxy) assert allele3_after_norm == allele3 + + allele4 = models.Allele(**allele_dict4) + allele4_after_norm = normalize(allele4, rest_dataproxy) + assert allele4_after_norm == models.Allele(**allele_dict4_normalized) From 59424bc5f26a6552a3191e3d93425ebf5347fc98 Mon Sep 17 00:00:00 2001 From: korikuzma Date: Thu, 24 Aug 2023 13:37:59 -0400 Subject: [PATCH 06/16] maybe fixed normalize function?? --- src/ga4gh/vrs/normalize.py | 74 ++-- tests/cassettes/test_normalize_allele.yaml | 408 +-------------------- tests/test_vrs_normalize.py | 24 +- 3 files changed, 56 insertions(+), 450 deletions(-) diff --git a/src/ga4gh/vrs/normalize.py b/src/ga4gh/vrs/normalize.py index 341ebb17..ab080eb3 100644 --- a/src/ga4gh/vrs/normalize.py +++ b/src/ga4gh/vrs/normalize.py @@ -112,40 +112,62 @@ def _normalize_allele(input_allele, data_proxy, rle_seq_limit=50): return input_allele ival = (start.value, end.value) - - # Get alleles (the sequences to be normalized) for _normalize if allele.state.sequence: alleles = (None, allele.state.sequence.root) else: alleles = (None, "") - # If one of Reference Allele Sequence or Alternate Allele Sequence is empty, - # store the length of the non-empty sequence: this is the Repeat Subunit Length - len_ref_seq = len(ref_seq[ival[0]: ival[1]]) - len_alt_seq = len(alleles[1]) - if not len_ref_seq and len_alt_seq: - # Insertion - repeat_subunit_len = len_alt_seq - elif len_ref_seq and not len_alt_seq: - # Deletion - repeat_subunit_len = len_ref_seq + # Trim common flanking sequence from Allele sequences. + try: + trim_ival, trim_alleles = _normalize(ref_seq, ival, alleles, mode=None, trim=True) + except ValueError: + # Occurs for ref agree Alleles (when alt = ref) + len_ref_seq = len_alt_seq = 0 else: - repeat_subunit_len = len_alt_seq - len_ref_seq + trim_ref_seq = ref_seq[trim_ival[0]: trim_ival[1]] + trim_alt_seq = trim_alleles[1] + len_ref_seq = len(trim_ref_seq) + len_alt_seq = len(trim_alt_seq) + + # Compare the two allele sequences + if not len_ref_seq and not len_alt_seq: + return input_allele new_allele = pydantic_copy(allele) - try: - new_ival, new_alleles = _normalize(ref_seq, - ival, - alleles=alleles, - mode=NormalizationMode.EXPAND, - anchor_length=0) + if len_ref_seq and len_alt_seq: + new_allele.location.start = _get_new_allele_location_pos( + trim_ival[0], start.pos_type + ) + new_allele.location.end = _get_new_allele_location_pos( + trim_ival[1], end.pos_type + ) + new_allele.state.sequence = models.SequenceString(trim_alleles[1]) + if sequence_reference: + new_allele.location.sequence = sequence_reference + return new_allele + + # Determine bounds of ambiguity + try: + new_ival, new_alleles = _normalize( + ref_seq, + trim_ival, + alleles=trim_alleles, + mode=NormalizationMode.EXPAND, + anchor_length=0, + trim=False # Don't need to trim, since we already did it above + ) + except ValueError: + # Occurs for ref agree Alleles (when alt = ref) + pass + else: new_allele.location.start = _get_new_allele_location_pos( new_ival[0], start.pos_type ) new_allele.location.end = _get_new_allele_location_pos( new_ival[1], end.pos_type ) + new_ref_seq = ref_seq[new_ival[0]: new_ival[1]] if not new_ref_seq: @@ -156,26 +178,18 @@ def _normalize_allele(input_allele, data_proxy, rle_seq_limit=50): sequence=models.SequenceString(new_alleles[1]) ) else: - # Otherwise, return a new Allele using a reference length expression, using - # a Location specified by the coordinates of the new ival, a length - # specified by the length of the alternate allele, and a repeat subunit - # length + # Otherwise, return a new Allele using a RLE sequence = models.SequenceString(new_alleles[1]) len_sequence = len(sequence.root) new_allele.state = models.ReferenceLengthExpression( length=len_sequence, - repeatSubunitLength=repeat_subunit_len + repeatSubunitLength=len_ref_seq or len_alt_seq ) if rle_seq_limit and len_sequence < rle_seq_limit: - new_allele.state.sequence = sequence - - except ValueError: - # Occurs for ref agree Alleles (when alt = ref) - pass + new_allele.state.sequence = sequence - # Convert IRI back to SequenceReference if sequence_reference: new_allele.location.sequence = sequence_reference diff --git a/tests/cassettes/test_normalize_allele.yaml b/tests/cassettes/test_normalize_allele.yaml index aced451c..ffea9acc 100644 --- a/tests/cassettes/test_normalize_allele.yaml +++ b/tests/cassettes/test_normalize_allele.yaml @@ -34,7 +34,7 @@ interactions: Content-Type: - application/json Date: - - Thu, 24 Aug 2023 16:40:28 GMT + - Thu, 24 Aug 2023 17:31:27 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -64,37 +64,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 16:40:28 GMT - Server: - - Werkzeug/2.2.2 Python/3.10.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.31.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000006.12?start=26090950&end=26090951 - response: - body: - string: C - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Thu, 24 Aug 2023 16:40:28 GMT + - Thu, 24 Aug 2023 17:31:27 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -135,7 +105,7 @@ interactions: Content-Type: - application/json Date: - - Thu, 24 Aug 2023 16:40:28 GMT + - Thu, 24 Aug 2023 17:31:27 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -165,157 +135,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 16:40:28 GMT - Server: - - Werkzeug/2.2.2 Python/3.10.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.31.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980377 - response: - body: - string: TA - headers: - Connection: - - close - Content-Length: - - '2' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Thu, 24 Aug 2023 16:40:28 GMT - Server: - - Werkzeug/2.2.2 Python/3.10.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.31.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980374&end=155980375 - response: - body: - string: T - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Thu, 24 Aug 2023 16:40:28 GMT - Server: - - Werkzeug/2.2.2 Python/3.10.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.31.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980377&end=155980378 - response: - body: - string: A - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Thu, 24 Aug 2023 16:40:28 GMT - Server: - - Werkzeug/2.2.2 Python/3.10.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.31.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980375 - response: - body: - string: '' - headers: - Connection: - - close - Content-Length: - - '0' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Thu, 24 Aug 2023 16:40:28 GMT - Server: - - Werkzeug/2.2.2 Python/3.10.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.31.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980377&end=155980377 - response: - body: - string: '' - headers: - Connection: - - close - Content-Length: - - '0' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Thu, 24 Aug 2023 16:40:28 GMT + - Thu, 24 Aug 2023 17:31:27 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -345,7 +165,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 16:40:29 GMT + - Thu, 24 Aug 2023 17:31:27 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -386,7 +206,7 @@ interactions: Content-Type: - application/json Date: - - Thu, 24 Aug 2023 16:40:29 GMT + - Thu, 24 Aug 2023 17:31:28 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -427,187 +247,7 @@ interactions: Content-Type: - application/json Date: - - Thu, 24 Aug 2023 16:40:29 GMT - Server: - - Werkzeug/2.2.2 Python/3.10.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.31.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980373&end=155980375 - response: - body: - string: GT - headers: - Connection: - - close - Content-Length: - - '2' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Thu, 24 Aug 2023 16:40:29 GMT - Server: - - Werkzeug/2.2.2 Python/3.10.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.31.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980373&end=155980375 - response: - body: - string: GT - headers: - Connection: - - close - Content-Length: - - '2' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Thu, 24 Aug 2023 16:40:29 GMT - Server: - - Werkzeug/2.2.2 Python/3.10.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.31.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980374&end=155980375 - response: - body: - string: T - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Thu, 24 Aug 2023 16:40:29 GMT - Server: - - Werkzeug/2.2.2 Python/3.10.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.31.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980373&end=155980374 - response: - body: - string: G - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Thu, 24 Aug 2023 16:40:29 GMT - Server: - - Werkzeug/2.2.2 Python/3.10.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.31.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980372&end=155980373 - response: - body: - string: G - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Thu, 24 Aug 2023 16:40:29 GMT - Server: - - Werkzeug/2.2.2 Python/3.10.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.31.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980376 - response: - body: - string: T - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Thu, 24 Aug 2023 16:40:29 GMT + - Thu, 24 Aug 2023 17:31:28 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -637,7 +277,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 16:40:29 GMT + - Thu, 24 Aug 2023 17:31:28 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -667,37 +307,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 16:40:29 GMT - Server: - - Werkzeug/2.2.2 Python/3.10.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.31.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980373&end=155980375 - response: - body: - string: GT - headers: - Connection: - - close - Content-Length: - - '2' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Thu, 24 Aug 2023 16:40:29 GMT + - Thu, 24 Aug 2023 17:31:28 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: diff --git a/tests/test_vrs_normalize.py b/tests/test_vrs_normalize.py index 94f946fd..5e64e3c8 100644 --- a/tests/test_vrs_normalize.py +++ b/tests/test_vrs_normalize.py @@ -36,22 +36,6 @@ } -allele_dict2_normalized = { - "type": "Allele", - "location": { - "type": "SequenceLocation", - "sequence": "refseq:NC_000023.11", - "start": [None, 155980375], - "end": [155980377, None] - }, - "state": { - "length": 0, - "repeatSubunitLength": 2, - "type": "ReferenceLengthExpression" - } -} - - allele_dict3 = { "type": "Allele", "location": { @@ -91,10 +75,8 @@ "end": 155980375 }, "state": { - "length": 4, - "repeatSubunitLength": 2, "sequence": "GTGT", - "type": "ReferenceLengthExpression" + "type": "LiteralSequenceExpression" } } @@ -107,14 +89,14 @@ def test_normalize_allele(rest_dataproxy): allele1 = models.Allele(**allele_dict2) allele2 = normalize(allele1, rest_dataproxy, rle_seq_limit=0) - assert allele1 != allele2 - assert allele2 == models.Allele(**allele_dict2_normalized) + assert allele1 == allele2 # Definite ranges are not normalized allele3 = models.Allele(**allele_dict3) allele3_after_norm = normalize(allele3, rest_dataproxy) assert allele3_after_norm == allele3 + # Duplication allele4 = models.Allele(**allele_dict4) allele4_after_norm = normalize(allele4, rest_dataproxy) assert allele4_after_norm == models.Allele(**allele_dict4_normalized) From 14e5bc92ea75b1c6b834644ab74bac44a4655156 Mon Sep 17 00:00:00 2001 From: korikuzma Date: Thu, 24 Aug 2023 13:40:24 -0400 Subject: [PATCH 07/16] revert --- tests/test_vrs_normalize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_vrs_normalize.py b/tests/test_vrs_normalize.py index 5e64e3c8..3f832014 100644 --- a/tests/test_vrs_normalize.py +++ b/tests/test_vrs_normalize.py @@ -88,7 +88,7 @@ def test_normalize_allele(rest_dataproxy): assert allele1 == allele2 allele1 = models.Allele(**allele_dict2) - allele2 = normalize(allele1, rest_dataproxy, rle_seq_limit=0) + allele2 = normalize(allele1, rest_dataproxy) assert allele1 == allele2 # Definite ranges are not normalized From 67368910551fb5844556251e2ff9e6a7f927a430 Mon Sep 17 00:00:00 2001 From: korikuzma Date: Thu, 24 Aug 2023 14:33:52 -0400 Subject: [PATCH 08/16] fix setting trimmed alleles --- src/ga4gh/vrs/normalize.py | 6 +- tests/cassettes/test_normalize_allele.yaml | 438 ++++++++++++++++++++- tests/test_vrs_normalize.py | 26 +- 3 files changed, 453 insertions(+), 17 deletions(-) diff --git a/src/ga4gh/vrs/normalize.py b/src/ga4gh/vrs/normalize.py index ab080eb3..847edf0d 100644 --- a/src/ga4gh/vrs/normalize.py +++ b/src/ga4gh/vrs/normalize.py @@ -152,10 +152,8 @@ def _normalize_allele(input_allele, data_proxy, rle_seq_limit=50): new_ival, new_alleles = _normalize( ref_seq, trim_ival, - alleles=trim_alleles, - mode=NormalizationMode.EXPAND, - anchor_length=0, - trim=False # Don't need to trim, since we already did it above + (None, trim_alleles[1]), + mode=NormalizationMode.EXPAND ) except ValueError: # Occurs for ref agree Alleles (when alt = ref) diff --git a/tests/cassettes/test_normalize_allele.yaml b/tests/cassettes/test_normalize_allele.yaml index ffea9acc..dd7ced3c 100644 --- a/tests/cassettes/test_normalize_allele.yaml +++ b/tests/cassettes/test_normalize_allele.yaml @@ -34,7 +34,7 @@ interactions: Content-Type: - application/json Date: - - Thu, 24 Aug 2023 17:31:27 GMT + - Thu, 24 Aug 2023 18:29:30 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -64,7 +64,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 17:31:27 GMT + - Thu, 24 Aug 2023 18:29:30 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -105,7 +105,7 @@ interactions: Content-Type: - application/json Date: - - Thu, 24 Aug 2023 17:31:27 GMT + - Thu, 24 Aug 2023 18:29:30 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -135,7 +135,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 17:31:27 GMT + - Thu, 24 Aug 2023 18:29:30 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -165,7 +165,187 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 17:31:27 GMT + - Thu, 24 Aug 2023 18:29:30 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980377 + response: + body: + string: TA + headers: + Connection: + - close + Content-Length: + - '2' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 18:29:30 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980374&end=155980375 + response: + body: + string: T + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 18:29:30 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980377&end=155980378 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 18:29:30 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980375 + response: + body: + string: '' + headers: + Connection: + - close + Content-Length: + - '0' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 18:29:30 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980377&end=155980377 + response: + body: + string: '' + headers: + Connection: + - close + Content-Length: + - '0' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 18:29:30 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980377 + response: + body: + string: TA + headers: + Connection: + - close + Content-Length: + - '2' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 18:29:30 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -206,7 +386,7 @@ interactions: Content-Type: - application/json Date: - - Thu, 24 Aug 2023 17:31:28 GMT + - Thu, 24 Aug 2023 18:29:30 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -247,7 +427,7 @@ interactions: Content-Type: - application/json Date: - - Thu, 24 Aug 2023 17:31:28 GMT + - Thu, 24 Aug 2023 18:29:30 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -277,7 +457,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 17:31:28 GMT + - Thu, 24 Aug 2023 18:29:30 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -307,7 +487,247 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 17:31:28 GMT + - Thu, 24 Aug 2023 18:29:30 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980375 + response: + body: + string: '' + headers: + Connection: + - close + Content-Length: + - '0' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 18:29:30 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980374&end=155980375 + response: + body: + string: T + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 18:29:30 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980373&end=155980374 + response: + body: + string: G + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 18:29:30 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980372&end=155980373 + response: + body: + string: G + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 18:29:30 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980376 + response: + body: + string: T + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 18:29:30 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980373&end=155980375 + response: + body: + string: GT + headers: + Connection: + - close + Content-Length: + - '2' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 18:29:30 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980375 + response: + body: + string: '' + headers: + Connection: + - close + Content-Length: + - '0' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 18:29:30 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980373&end=155980375 + response: + body: + string: GT + headers: + Connection: + - close + Content-Length: + - '2' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Thu, 24 Aug 2023 18:29:30 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: diff --git a/tests/test_vrs_normalize.py b/tests/test_vrs_normalize.py index 3f832014..771576fc 100644 --- a/tests/test_vrs_normalize.py +++ b/tests/test_vrs_normalize.py @@ -36,6 +36,22 @@ } +allele_dict2_normalized = { + "type": "Allele", + "location": { + "type": "SequenceLocation", + "sequence": "refseq:NC_000023.11", + "start": [None, 155980375], + "end": [155980377, None] + }, + "state": { + "length": 0, + "repeatSubunitLength": 2, + "type": "ReferenceLengthExpression" + } +} + + allele_dict3 = { "type": "Allele", "location": { @@ -65,7 +81,6 @@ } } - allele_dict4_normalized = { "type": "Allele", "location": { @@ -75,8 +90,10 @@ "end": 155980375 }, "state": { + "length": 4, + "repeatSubunitLength": 2, "sequence": "GTGT", - "type": "LiteralSequenceExpression" + "type": "ReferenceLengthExpression" } } @@ -88,8 +105,9 @@ def test_normalize_allele(rest_dataproxy): assert allele1 == allele2 allele1 = models.Allele(**allele_dict2) - allele2 = normalize(allele1, rest_dataproxy) - assert allele1 == allele2 + allele2 = normalize(allele1, rest_dataproxy, rle_seq_limit=0) + assert allele1 != allele2 + assert allele2 == models.Allele(**allele_dict2_normalized) # Definite ranges are not normalized allele3 = models.Allele(**allele_dict3) From 99384620eae5bd9e25d2f1d3084cf94736a2c1b5 Mon Sep 17 00:00:00 2001 From: korikuzma Date: Thu, 24 Aug 2023 19:33:21 -0400 Subject: [PATCH 09/16] allow for no rle_seq_limit + update example --- src/ga4gh/vrs/normalize.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/ga4gh/vrs/normalize.py b/src/ga4gh/vrs/normalize.py index 847edf0d..c339af35 100644 --- a/src/ga4gh/vrs/normalize.py +++ b/src/ga4gh/vrs/normalize.py @@ -87,7 +87,9 @@ def _normalize_allele(input_allele, data_proxy, rle_seq_limit=50): :param input_allele: Input VRS Allele object :param data_proxy: SeqRepo dataproxy :param rle_seq_limit: If RLE is set as the new state, set the limit for the length - of the `sequence`. To exclude, set to 0. + of the `sequence`. + To exclude `sequence` from the response, set to 0. + For no limit, set to `None`. Does not attempt to normalize Allele's with definite ranges. Will return the `input_allele` @@ -185,7 +187,7 @@ def _normalize_allele(input_allele, data_proxy, rle_seq_limit=50): repeatSubunitLength=len_ref_seq or len_alt_seq ) - if rle_seq_limit and len_sequence < rle_seq_limit: + if (rle_seq_limit and len_sequence < rle_seq_limit) or (rle_seq_limit is None): new_allele.state.sequence = sequence if sequence_reference: @@ -251,13 +253,13 @@ def normalize(vo, data_proxy=None, **kwargs): }, "type": "Allele" } - allele = models.Allele(**allele_dict) + a = models.Allele(**allele_dict) - allele2 = normalize(allele, dp) + allele2 = normalize(a, dp) - allele.state.sequence = "C" - allele3 = normalize(allele, dp) + a.state.sequence.root = "C" + allele3 = normalize(a, dp) - allele.location.interval.end = 44908823 - allele.state.sequence = "" - allele4 = normalize(allele, dp) + a.location.end = 44908823 + a.state.sequence.root = "" + allele4 = normalize(a, dp) From 1b7c2ff03b16a4d7cce5219be8b965946ef763c2 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Fri, 25 Aug 2023 19:22:23 -0400 Subject: [PATCH 10/16] Update src/ga4gh/vrs/normalize.py Co-authored-by: Alex H. Wagner, PhD --- src/ga4gh/vrs/normalize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ga4gh/vrs/normalize.py b/src/ga4gh/vrs/normalize.py index c339af35..749624a0 100644 --- a/src/ga4gh/vrs/normalize.py +++ b/src/ga4gh/vrs/normalize.py @@ -31,7 +31,7 @@ class LocationPos(NamedTuple): def _get_allele_location_pos( allele_vo: models.Allele, use_start: bool = True ) -> Optional[LocationPos]: - """Get Allele location start or end value for interval + """Get a representative position for Alleles with Location start or end defined by Range :param allele_vo: VRS Allele object :param use_start: `True` if using `allele_vo.location.start`. `False` if using From daa21958139215b459dbf82970d5a28089478952 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Fri, 25 Aug 2023 19:22:40 -0400 Subject: [PATCH 11/16] Update src/ga4gh/vrs/normalize.py Co-authored-by: Alex H. Wagner, PhD --- src/ga4gh/vrs/normalize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ga4gh/vrs/normalize.py b/src/ga4gh/vrs/normalize.py index 749624a0..a4143c59 100644 --- a/src/ga4gh/vrs/normalize.py +++ b/src/ga4gh/vrs/normalize.py @@ -91,7 +91,7 @@ def _normalize_allele(input_allele, data_proxy, rle_seq_limit=50): To exclude `sequence` from the response, set to 0. For no limit, set to `None`. - Does not attempt to normalize Allele's with definite ranges. Will return the + Does not attempt to normalize Alleles with definite ranges and will instead return the `input_allele` """ allele = pydantic_copy(input_allele) From 812f480ba141c91609bdeea6f566988b5b701e91 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Fri, 25 Aug 2023 19:28:37 -0400 Subject: [PATCH 12/16] Update src/ga4gh/vrs/normalize.py Co-authored-by: Alex H. Wagner, PhD --- src/ga4gh/vrs/normalize.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/ga4gh/vrs/normalize.py b/src/ga4gh/vrs/normalize.py index a4143c59..b2863b1b 100644 --- a/src/ga4gh/vrs/normalize.py +++ b/src/ga4gh/vrs/normalize.py @@ -179,16 +179,15 @@ def _normalize_allele(input_allele, data_proxy, rle_seq_limit=50): ) else: # Otherwise, return a new Allele using a RLE - sequence = models.SequenceString(new_alleles[1]) - len_sequence = len(sequence.root) + len_sequence = len(new_alleles[1]) new_allele.state = models.ReferenceLengthExpression( length=len_sequence, repeatSubunitLength=len_ref_seq or len_alt_seq ) - if (rle_seq_limit and len_sequence < rle_seq_limit) or (rle_seq_limit is None): - new_allele.state.sequence = sequence + if (rle_seq_limit and len_sequence <= rle_seq_limit) or (rle_seq_limit is None): + new_allele.state.sequence = models.SequenceString(new_alleles[1]) if sequence_reference: new_allele.location.sequence = sequence_reference From af267f0aab0f63a201aebe3a9a059f2ebba53cba Mon Sep 17 00:00:00 2001 From: korikuzma Date: Wed, 30 Aug 2023 09:50:35 -0400 Subject: [PATCH 13/16] refactor handling getting alias for SequenceProxy --- src/ga4gh/vrs/normalize.py | 16 +-- tests/cassettes/test_normalize_allele.yaml | 117 +++++++++++++++++---- tests/test_vrs_normalize.py | 23 ++++ 3 files changed, 122 insertions(+), 34 deletions(-) diff --git a/src/ga4gh/vrs/normalize.py b/src/ga4gh/vrs/normalize.py index b2863b1b..14fcd52c 100644 --- a/src/ga4gh/vrs/normalize.py +++ b/src/ga4gh/vrs/normalize.py @@ -96,15 +96,14 @@ def _normalize_allele(input_allele, data_proxy, rle_seq_limit=50): """ allele = pydantic_copy(input_allele) - # Temporarily convert SequenceReference to IRI because it makes the code simpler. - # This will be changed back to SequenceReference at the end of the method - sequence_reference = None if isinstance(allele.location.sequence, models.SequenceReference): - sequence_reference = allele.location.sequence - allele.location.sequence = models.IRI(sequence_reference.refgetAccession) + alias = f"ga4gh:{allele.location.sequence.refgetAccession}" + else: + # IRI + alias = allele.location.sequence.root # Get reference sequence and interval - ref_seq = SequenceProxy(data_proxy, allele.location.sequence.root) + ref_seq = SequenceProxy(data_proxy, alias) start = _get_allele_location_pos(allele, use_start=True) if start is None: return input_allele @@ -145,8 +144,6 @@ def _normalize_allele(input_allele, data_proxy, rle_seq_limit=50): trim_ival[1], end.pos_type ) new_allele.state.sequence = models.SequenceString(trim_alleles[1]) - if sequence_reference: - new_allele.location.sequence = sequence_reference return new_allele # Determine bounds of ambiguity @@ -189,9 +186,6 @@ def _normalize_allele(input_allele, data_proxy, rle_seq_limit=50): if (rle_seq_limit and len_sequence <= rle_seq_limit) or (rle_seq_limit is None): new_allele.state.sequence = models.SequenceString(new_alleles[1]) - if sequence_reference: - new_allele.location.sequence = sequence_reference - return new_allele diff --git a/tests/cassettes/test_normalize_allele.yaml b/tests/cassettes/test_normalize_allele.yaml index dd7ced3c..13f1e26f 100644 --- a/tests/cassettes/test_normalize_allele.yaml +++ b/tests/cassettes/test_normalize_allele.yaml @@ -34,7 +34,7 @@ interactions: Content-Type: - application/json Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:45 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -64,7 +64,78 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:45 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ.0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV + response: + body: + string: "{\n \"added\": \"2016-08-27T21:22:36Z\",\n \"aliases\": [\n \"GRCh38:6\",\n + \ \"GRCh38:chr6\",\n \"GRCh38.p1:6\",\n \"GRCh38.p1:chr6\",\n \"GRCh38.p10:6\",\n + \ \"GRCh38.p10:chr6\",\n \"GRCh38.p11:6\",\n \"GRCh38.p11:chr6\",\n + \ \"GRCh38.p12:6\",\n \"GRCh38.p12:chr6\",\n \"GRCh38.p2:6\",\n \"GRCh38.p2:chr6\",\n + \ \"GRCh38.p3:6\",\n \"GRCh38.p3:chr6\",\n \"GRCh38.p4:6\",\n \"GRCh38.p4:chr6\",\n + \ \"GRCh38.p5:6\",\n \"GRCh38.p5:chr6\",\n \"GRCh38.p6:6\",\n \"GRCh38.p6:chr6\",\n + \ \"GRCh38.p7:6\",\n \"GRCh38.p7:chr6\",\n \"GRCh38.p8:6\",\n \"GRCh38.p8:chr6\",\n + \ \"GRCh38.p9:6\",\n \"GRCh38.p9:chr6\",\n \"MD5:5691468a67c7e7a7b5f2a3a683792c29\",\n + \ \"NCBI:NC_000006.12\",\n \"refseq:NC_000006.12\",\n \"SEGUID:WZuaTlR1qIRxrJ5dpG2Z0ydeqX4\",\n + \ \"SHA1:599b9a4e5475a88471ac9e5da46d99d3275ea97e\",\n \"VMC:GS_0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV\",\n + \ \"sha512t24u:0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV\",\n \"ga4gh:SQ.0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV\"\n + \ ],\n \"alphabet\": \"ACGNTY\",\n \"length\": 170805979\n}\n" + headers: + Connection: + - close + Content-Length: + - '975' + Content-Type: + - application/json + Date: + - Wed, 30 Aug 2023 13:26:45 GMT + Server: + - Werkzeug/2.2.2 Python/3.10.4 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.31.0 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV?start=26090950&end=26090951 + response: + body: + string: C + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Wed, 30 Aug 2023 13:26:45 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -105,7 +176,7 @@ interactions: Content-Type: - application/json Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:45 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -135,7 +206,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:45 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -165,7 +236,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:45 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -195,7 +266,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:45 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -225,7 +296,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:45 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -255,7 +326,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:45 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -285,7 +356,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:45 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -315,7 +386,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:45 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -345,7 +416,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:45 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -386,7 +457,7 @@ interactions: Content-Type: - application/json Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:45 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -427,7 +498,7 @@ interactions: Content-Type: - application/json Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:45 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -457,7 +528,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:46 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -487,7 +558,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:46 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -517,7 +588,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:46 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -547,7 +618,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:46 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -577,7 +648,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:46 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -607,7 +678,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:46 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -637,7 +708,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:46 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -667,7 +738,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:46 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -697,7 +768,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:46 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -727,7 +798,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Thu, 24 Aug 2023 18:29:30 GMT + - Wed, 30 Aug 2023 13:26:46 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: diff --git a/tests/test_vrs_normalize.py b/tests/test_vrs_normalize.py index 771576fc..2a34d25d 100644 --- a/tests/test_vrs_normalize.py +++ b/tests/test_vrs_normalize.py @@ -21,6 +21,25 @@ "type": "Allele" } + +allele_dict_sequence_reference = { + "location": { + "end": 26090951, + "start": 26090950, + "sequence": { + "type": "SequenceReference", + "refgetAccession": "SQ.0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV" + }, + "type": "SequenceLocation" + }, + "state": { + "sequence": "C", + "type": "LiteralSequenceExpression" + }, + "type": "Allele" +} + + allele_dict2 = { "type": "Allele", "location": { @@ -104,6 +123,10 @@ def test_normalize_allele(rest_dataproxy): allele2 = normalize(allele1, rest_dataproxy) assert allele1 == allele2 + allele1_seq_ref = models.Allele(**allele_dict_sequence_reference) + allele2_seq_ref = normalize(allele1_seq_ref, rest_dataproxy) + assert allele1_seq_ref == allele2_seq_ref + allele1 = models.Allele(**allele_dict2) allele2 = normalize(allele1, rest_dataproxy, rle_seq_limit=0) assert allele1 != allele2 From 05deaca902b46e1d28fd0f2d5552b900d5580f1c Mon Sep 17 00:00:00 2001 From: korikuzma Date: Wed, 30 Aug 2023 10:55:55 -0400 Subject: [PATCH 14/16] iri must be dereferenced + update tests --- src/ga4gh/vrs/normalize.py | 6 ++- tests/cassettes/test_normalize_allele.yaml | 56 +++++++++++----------- tests/test_vrs_normalize.py | 4 +- 3 files changed, 35 insertions(+), 31 deletions(-) diff --git a/src/ga4gh/vrs/normalize.py b/src/ga4gh/vrs/normalize.py index 14fcd52c..387ef164 100644 --- a/src/ga4gh/vrs/normalize.py +++ b/src/ga4gh/vrs/normalize.py @@ -84,6 +84,10 @@ def _normalize_allele(input_allele, data_proxy, rle_seq_limit=50): entire region of ambiguity, resulting in an unambiguous representation that may be readily compared with other alleles. + This function assumes that IRIs are dereferenced, providing either the accession + (refseq:NC_000006.12, NC_000006.12) or ga4gh identifier for a sequence + (ga4gh:SQ.0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV). + :param input_allele: Input VRS Allele object :param data_proxy: SeqRepo dataproxy :param rle_seq_limit: If RLE is set as the new state, set the limit for the length @@ -99,7 +103,7 @@ def _normalize_allele(input_allele, data_proxy, rle_seq_limit=50): if isinstance(allele.location.sequence, models.SequenceReference): alias = f"ga4gh:{allele.location.sequence.refgetAccession}" else: - # IRI + # Dereferenced IRI alias = allele.location.sequence.root # Get reference sequence and interval diff --git a/tests/cassettes/test_normalize_allele.yaml b/tests/cassettes/test_normalize_allele.yaml index 13f1e26f..04b1a7f2 100644 --- a/tests/cassettes/test_normalize_allele.yaml +++ b/tests/cassettes/test_normalize_allele.yaml @@ -11,7 +11,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/metadata/refseq:NC_000006.12 + uri: http://localhost:5000/seqrepo/1/metadata/NC_000006.12 response: body: string: "{\n \"added\": \"2016-08-27T21:22:36Z\",\n \"aliases\": [\n \"GRCh38:6\",\n @@ -34,7 +34,7 @@ interactions: Content-Type: - application/json Date: - - Wed, 30 Aug 2023 13:26:45 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -52,7 +52,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000006.12?start=26090950&end=26090951 + uri: http://localhost:5000/seqrepo/1/sequence/NC_000006.12?start=26090950&end=26090951 response: body: string: C @@ -64,7 +64,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 13:26:45 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -105,7 +105,7 @@ interactions: Content-Type: - application/json Date: - - Wed, 30 Aug 2023 13:26:45 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -135,7 +135,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 13:26:45 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -176,7 +176,7 @@ interactions: Content-Type: - application/json Date: - - Wed, 30 Aug 2023 13:26:45 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -206,7 +206,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 13:26:45 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -236,7 +236,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 13:26:45 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -266,7 +266,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 13:26:45 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -296,7 +296,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 13:26:45 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -326,7 +326,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 13:26:45 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -356,7 +356,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 13:26:45 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -386,7 +386,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 13:26:45 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -416,7 +416,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 13:26:45 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -434,7 +434,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/metadata/refseq:NC_000023.11 + uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP response: body: string: "{\n \"added\": \"2016-08-27T23:57:18Z\",\n \"aliases\": [\n \"GRCh38:X\",\n @@ -457,7 +457,7 @@ interactions: Content-Type: - application/json Date: - - Wed, 30 Aug 2023 13:26:45 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -498,7 +498,7 @@ interactions: Content-Type: - application/json Date: - - Wed, 30 Aug 2023 13:26:45 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -528,7 +528,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 13:26:46 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -558,7 +558,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 13:26:46 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -588,7 +588,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 13:26:46 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -618,7 +618,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 13:26:46 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -648,7 +648,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 13:26:46 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -678,7 +678,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 13:26:46 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -708,7 +708,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 13:26:46 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -738,7 +738,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 13:26:46 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -768,7 +768,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 13:26:46 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -798,7 +798,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 13:26:46 GMT + - Wed, 30 Aug 2023 14:40:08 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: diff --git a/tests/test_vrs_normalize.py b/tests/test_vrs_normalize.py index 2a34d25d..5490ab9e 100644 --- a/tests/test_vrs_normalize.py +++ b/tests/test_vrs_normalize.py @@ -11,7 +11,7 @@ "location": { "end": 26090951, "start": 26090950, - "sequence": "refseq:NC_000006.12", + "sequence": "NC_000006.12", "type": "SequenceLocation" }, "state": { @@ -75,7 +75,7 @@ "type": "Allele", "location": { "type": "SequenceLocation", - "sequence": "refseq:NC_000023.11", + "sequence": "ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", "start": [155980374, 155980375], "end": [155980377, 155980378] }, From b64aa8be02375f0377fbcc90e016ecadedd0cf27 Mon Sep 17 00:00:00 2001 From: korikuzma Date: Wed, 30 Aug 2023 14:18:13 -0400 Subject: [PATCH 15/16] _normalize_allele always expects a SequenceReference --- src/ga4gh/vrs/normalize.py | 9 +- tests/cassettes/test_normalize_allele.yaml | 157 ++++++--------------- tests/test_vrs_normalize.py | 44 +++--- 3 files changed, 67 insertions(+), 143 deletions(-) diff --git a/src/ga4gh/vrs/normalize.py b/src/ga4gh/vrs/normalize.py index 387ef164..ae67d20e 100644 --- a/src/ga4gh/vrs/normalize.py +++ b/src/ga4gh/vrs/normalize.py @@ -84,9 +84,9 @@ def _normalize_allele(input_allele, data_proxy, rle_seq_limit=50): entire region of ambiguity, resulting in an unambiguous representation that may be readily compared with other alleles. - This function assumes that IRIs are dereferenced, providing either the accession - (refseq:NC_000006.12, NC_000006.12) or ga4gh identifier for a sequence - (ga4gh:SQ.0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV). + This function assumes that IRIs are dereferenced, providing a `SequenceReference` as + the `allele.location.sequence`. If a `SequenceReference` is not provided, the allele + will be returned as is with no normalization. :param input_allele: Input VRS Allele object :param data_proxy: SeqRepo dataproxy @@ -103,8 +103,7 @@ def _normalize_allele(input_allele, data_proxy, rle_seq_limit=50): if isinstance(allele.location.sequence, models.SequenceReference): alias = f"ga4gh:{allele.location.sequence.refgetAccession}" else: - # Dereferenced IRI - alias = allele.location.sequence.root + return input_allele # Get reference sequence and interval ref_seq = SequenceProxy(data_proxy, alias) diff --git a/tests/cassettes/test_normalize_allele.yaml b/tests/cassettes/test_normalize_allele.yaml index 04b1a7f2..3d158f65 100644 --- a/tests/cassettes/test_normalize_allele.yaml +++ b/tests/cassettes/test_normalize_allele.yaml @@ -1,75 +1,4 @@ interactions: -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.31.0 - method: GET - uri: http://localhost:5000/seqrepo/1/metadata/NC_000006.12 - response: - body: - string: "{\n \"added\": \"2016-08-27T21:22:36Z\",\n \"aliases\": [\n \"GRCh38:6\",\n - \ \"GRCh38:chr6\",\n \"GRCh38.p1:6\",\n \"GRCh38.p1:chr6\",\n \"GRCh38.p10:6\",\n - \ \"GRCh38.p10:chr6\",\n \"GRCh38.p11:6\",\n \"GRCh38.p11:chr6\",\n - \ \"GRCh38.p12:6\",\n \"GRCh38.p12:chr6\",\n \"GRCh38.p2:6\",\n \"GRCh38.p2:chr6\",\n - \ \"GRCh38.p3:6\",\n \"GRCh38.p3:chr6\",\n \"GRCh38.p4:6\",\n \"GRCh38.p4:chr6\",\n - \ \"GRCh38.p5:6\",\n \"GRCh38.p5:chr6\",\n \"GRCh38.p6:6\",\n \"GRCh38.p6:chr6\",\n - \ \"GRCh38.p7:6\",\n \"GRCh38.p7:chr6\",\n \"GRCh38.p8:6\",\n \"GRCh38.p8:chr6\",\n - \ \"GRCh38.p9:6\",\n \"GRCh38.p9:chr6\",\n \"MD5:5691468a67c7e7a7b5f2a3a683792c29\",\n - \ \"NCBI:NC_000006.12\",\n \"refseq:NC_000006.12\",\n \"SEGUID:WZuaTlR1qIRxrJ5dpG2Z0ydeqX4\",\n - \ \"SHA1:599b9a4e5475a88471ac9e5da46d99d3275ea97e\",\n \"VMC:GS_0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV\",\n - \ \"sha512t24u:0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV\",\n \"ga4gh:SQ.0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV\"\n - \ ],\n \"alphabet\": \"ACGNTY\",\n \"length\": 170805979\n}\n" - headers: - Connection: - - close - Content-Length: - - '975' - Content-Type: - - application/json - Date: - - Wed, 30 Aug 2023 14:40:08 GMT - Server: - - Werkzeug/2.2.2 Python/3.10.4 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.31.0 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/NC_000006.12?start=26090950&end=26090951 - response: - body: - string: C - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Wed, 30 Aug 2023 14:40:08 GMT - Server: - - Werkzeug/2.2.2 Python/3.10.4 - status: - code: 200 - message: OK - request: body: null headers: @@ -105,7 +34,7 @@ interactions: Content-Type: - application/json Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -135,7 +64,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -153,7 +82,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/metadata/refseq:NC_000023.11 + uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP response: body: string: "{\n \"added\": \"2016-08-27T23:57:18Z\",\n \"aliases\": [\n \"GRCh38:X\",\n @@ -176,7 +105,7 @@ interactions: Content-Type: - application/json Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -194,7 +123,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980377 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980375&end=155980377 response: body: string: TA @@ -206,7 +135,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -224,7 +153,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980377 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980375&end=155980377 response: body: string: TA @@ -236,7 +165,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -254,7 +183,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980377 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980375&end=155980377 response: body: string: TA @@ -266,7 +195,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -284,7 +213,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980374&end=155980375 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980374&end=155980375 response: body: string: T @@ -296,7 +225,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -314,7 +243,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980377&end=155980378 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980377&end=155980378 response: body: string: A @@ -326,7 +255,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -344,7 +273,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980375 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980375&end=155980375 response: body: string: '' @@ -356,7 +285,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -374,7 +303,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980377&end=155980377 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980377&end=155980377 response: body: string: '' @@ -386,7 +315,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -404,7 +333,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980377 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980375&end=155980377 response: body: string: TA @@ -416,7 +345,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -457,7 +386,7 @@ interactions: Content-Type: - application/json Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -475,7 +404,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/metadata/refseq:NC_000023.11 + uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP response: body: string: "{\n \"added\": \"2016-08-27T23:57:18Z\",\n \"aliases\": [\n \"GRCh38:X\",\n @@ -498,7 +427,7 @@ interactions: Content-Type: - application/json Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -516,7 +445,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980373&end=155980375 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980373&end=155980375 response: body: string: GT @@ -528,7 +457,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -546,7 +475,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980375 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980375&end=155980375 response: body: string: '' @@ -558,7 +487,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:48 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -576,7 +505,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980375 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980375&end=155980375 response: body: string: '' @@ -588,7 +517,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:49 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -606,7 +535,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980374&end=155980375 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980374&end=155980375 response: body: string: T @@ -618,7 +547,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:49 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -636,7 +565,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980373&end=155980374 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980373&end=155980374 response: body: string: G @@ -648,7 +577,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:49 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -666,7 +595,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980372&end=155980373 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980372&end=155980373 response: body: string: G @@ -678,7 +607,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:49 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -696,7 +625,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980376 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980375&end=155980376 response: body: string: T @@ -708,7 +637,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:49 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -726,7 +655,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980373&end=155980375 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980373&end=155980375 response: body: string: GT @@ -738,7 +667,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:49 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -756,7 +685,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980375&end=155980375 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980375&end=155980375 response: body: string: '' @@ -768,7 +697,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:49 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: @@ -786,7 +715,7 @@ interactions: User-Agent: - python-requests/2.31.0 method: GET - uri: http://localhost:5000/seqrepo/1/sequence/refseq:NC_000023.11?start=155980373&end=155980375 + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP?start=155980373&end=155980375 response: body: string: GT @@ -798,7 +727,7 @@ interactions: Content-Type: - text/plain; charset=utf-8 Date: - - Wed, 30 Aug 2023 14:40:08 GMT + - Wed, 30 Aug 2023 18:17:49 GMT Server: - Werkzeug/2.2.2 Python/3.10.4 status: diff --git a/tests/test_vrs_normalize.py b/tests/test_vrs_normalize.py index 5490ab9e..4e5589e8 100644 --- a/tests/test_vrs_normalize.py +++ b/tests/test_vrs_normalize.py @@ -8,21 +8,6 @@ # allele_dict = { - "location": { - "end": 26090951, - "start": 26090950, - "sequence": "NC_000006.12", - "type": "SequenceLocation" - }, - "state": { - "sequence": "C", - "type": "LiteralSequenceExpression" - }, - "type": "Allele" -} - - -allele_dict_sequence_reference = { "location": { "end": 26090951, "start": 26090950, @@ -44,7 +29,10 @@ "type": "Allele", "location": { "type": "SequenceLocation", - "sequence": "refseq:NC_000023.11", + "sequence": { + "type": "SequenceReference", + "refgetAccession": "SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP" + }, "start": [None, 155980375], "end": [155980377, None] }, @@ -59,7 +47,10 @@ "type": "Allele", "location": { "type": "SequenceLocation", - "sequence": "refseq:NC_000023.11", + "sequence": { + "type": "SequenceReference", + "refgetAccession": "SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP" + }, "start": [None, 155980375], "end": [155980377, None] }, @@ -75,7 +66,10 @@ "type": "Allele", "location": { "type": "SequenceLocation", - "sequence": "ga4gh:SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", + "sequence": { + "type": "SequenceReference", + "refgetAccession": "SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP" + }, "start": [155980374, 155980375], "end": [155980377, 155980378] }, @@ -90,7 +84,10 @@ "type": "Allele", "location": { "type": "SequenceLocation", - "sequence": "refseq:NC_000023.11", + "sequence": { + "type": "SequenceReference", + "refgetAccession": "SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP" + }, "start": 155980373, "end": 155980375 }, @@ -104,7 +101,10 @@ "type": "Allele", "location": { "type": "SequenceLocation", - "sequence": "refseq:NC_000023.11", + "sequence": { + "type": "SequenceReference", + "refgetAccession": "SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP" + }, "start": 155980373, "end": 155980375 }, @@ -123,10 +123,6 @@ def test_normalize_allele(rest_dataproxy): allele2 = normalize(allele1, rest_dataproxy) assert allele1 == allele2 - allele1_seq_ref = models.Allele(**allele_dict_sequence_reference) - allele2_seq_ref = normalize(allele1_seq_ref, rest_dataproxy) - assert allele1_seq_ref == allele2_seq_ref - allele1 = models.Allele(**allele_dict2) allele2 = normalize(allele1, rest_dataproxy, rle_seq_limit=0) assert allele1 != allele2 From 2e1923a19677af10f53cd29ca7cd8dc70e4b814c Mon Sep 17 00:00:00 2001 From: korikuzma Date: Wed, 30 Aug 2023 15:16:17 -0400 Subject: [PATCH 16/16] add log warning when IRI is passed --- src/ga4gh/vrs/normalize.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/ga4gh/vrs/normalize.py b/src/ga4gh/vrs/normalize.py index ae67d20e..504c5561 100644 --- a/src/ga4gh/vrs/normalize.py +++ b/src/ga4gh/vrs/normalize.py @@ -3,6 +3,7 @@ See https://vrs.ga4gh.org/en/stable/impl-guide/normalization.html """ +import logging from enum import IntEnum from typing import NamedTuple, Optional, Union @@ -13,6 +14,9 @@ from .dataproxy import SequenceProxy +_logger = logging.getLogger(__name__) + + class PosType(IntEnum): """Define the kind of position on a location""" @@ -103,6 +107,10 @@ def _normalize_allele(input_allele, data_proxy, rle_seq_limit=50): if isinstance(allele.location.sequence, models.SequenceReference): alias = f"ga4gh:{allele.location.sequence.refgetAccession}" else: + _logger.warning( + "`input_allele.location.sequence` expects a `SequenceReference`, returning " + "`input_allele` with no normalization." + ) return input_allele # Get reference sequence and interval