Skip to content

Commit

Permalink
Limit bulk coordinate search to build GRCh37
Browse files Browse the repository at this point in the history
  • Loading branch information
susannasiebert committed May 18, 2020
1 parent 49497c3 commit ef0d0d0
Showing 1 changed file with 44 additions and 41 deletions.
85 changes: 44 additions & 41 deletions civicpy/civic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1226,48 +1226,51 @@ def is_sorted(prev_q, current_q):
def append_match(matches_list, query, ct_row):
matches_list[query].append(Match(**ct_row.to_dict()))

while query_pointer < len(sorted_queries) and ct_pointer < len(ct):
if last_query_pointer != query_pointer:
q = sorted_queries[query_pointer]
if match_start is not None:
ct_pointer = match_start
match_start = None
last_query_pointer = query_pointer
c = ct.iloc[ct_pointer]
q_chr = str(q.chr)
c_chr = c.chr
if q_chr < c_chr:
query_pointer += 1
continue
if q_chr > c_chr:
ct_pointer += 1
continue
q_start = int(q.start)
c_start = c.start
q_stop = int(q.stop)
c_stop = c.stop
if q_start > c_stop:
ct_pointer += 1
continue
if q_stop < c_start:
query_pointer += 1
continue
if search_mode == 'any':
append_match(matches, q, c)
elif search_mode == 'exact' and q_start == c_start and q_stop == c_stop:
q_alt = q.alt
c_alt = c.alt
q_ref = q.ref
c_ref = c.ref
if (not (q_alt != '*' and q_alt != c_alt)) and (not (q_ref != '*' and q_ref != c_ref)):
if coordinate_query.build == 'GRCh37':
while query_pointer < len(sorted_queries) and ct_pointer < len(ct):
if last_query_pointer != query_pointer:
q = sorted_queries[query_pointer]
if match_start is not None:
ct_pointer = match_start
match_start = None
last_query_pointer = query_pointer
c = ct.iloc[ct_pointer]
q_chr = str(q.chr)
c_chr = c.chr
if q_chr < c_chr:
query_pointer += 1
continue
if q_chr > c_chr:
ct_pointer += 1
continue
q_start = int(q.start)
c_start = c.start
q_stop = int(q.stop)
c_stop = c.stop
if q_start > c_stop:
ct_pointer += 1
continue
if q_stop < c_start:
query_pointer += 1
continue
if search_mode == 'any':
append_match(matches, q, c)
elif search_mode == 'exact' and q_start == c_start and q_stop == c_stop:
q_alt = q.alt
c_alt = c.alt
q_ref = q.ref
c_ref = c.ref
if (not (q_alt != '*' and q_alt != c_alt)) and (not (q_ref != '*' and q_ref != c_ref)):
append_match(matches, q, c)
elif search_mode == 'query_encompassing' and q_start <= c_start and q_stop >= c_stop:
append_match(matches, q, c)
elif search_mode == 'query_encompassing' and q_start <= c_start and q_stop >= c_stop:
append_match(matches, q, c)
elif search_mode == 'record_encompassing' and c_start <= q_start and c_stop >= q_stop:
append_match(matches, q, c)
if match_start is None:
match_start = ct_pointer
ct_pointer += 1
elif search_mode == 'record_encompassing' and c_start <= q_start and c_stop >= q_stop:
append_match(matches, q, c)
if match_start is None:
match_start = ct_pointer
ct_pointer += 1
else:
raise ValueError("Bulk coordinate search only supports build GRCh37")
return dict(matches)


Expand Down

0 comments on commit ef0d0d0

Please sign in to comment.