Skip to content

Commit

Permalink
Support multiple semicolon-separated strings when filtering by ID
Browse files Browse the repository at this point in the history
Previously the following expressions

    ID="@Testid"
    ID="rs123"
    ID!="rs123"

would not match the ID string "rs123;rs456" as expected.

Resolves #2190
  • Loading branch information
pd3 committed May 27, 2024
1 parent 81c2643 commit 62142f3
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 22 deletions.
3 changes: 3 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@

Changes affecting the whole of bcftools, or multiple commands:

* Support multiple semicolon-separated strings when filtering by ID using -i/-e (#2190).
For example, `-i 'ID="rs123"'` now correctly matches `rs123;rs456`

* bcftools query
* bcftools +split-vep

Expand Down
58 changes: 36 additions & 22 deletions filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -706,34 +706,48 @@ static void filters_cmp_id(token_t *atok, token_t *btok, token_t *rtok, bcf1_t *
{
token_t *tmp = atok; atok = btok; btok = tmp;
}
if ( atok->hash )

char *id = line->d.id;
int pass = 0;

while ( id )
{
if ( rtok->tok_type!=TOK_EQ && rtok->tok_type!=TOK_NE )
error("Only == and != operators are supported for strings read from a file\n");
char *ep = strchr(id,';');
if ( ep ) *ep = 0;

int ret = khash_str2int_has_key(atok->hash, line->d.id);
if ( rtok->tok_type==TOK_NE ) ret = ret ? 0 : 1;
rtok->pass_site = ret;
return;
}
if ( atok->hash )
{
if ( rtok->tok_type!=TOK_EQ && rtok->tok_type!=TOK_NE )
error("Only == and != operators are supported for strings read from a file\n");

if ( !btok->str_value.l ) error("Error occurred while evaluating the expression\n");
pass = khash_str2int_has_key(atok->hash, id);
}
else
{
if ( !btok->str_value.l ) error("Error occurred while evaluating the expression\n");

if ( rtok->tok_type==TOK_EQ )
rtok->pass_site = strcmp(btok->str_value.s,line->d.id) ? 0 : 1;
else if ( rtok->tok_type==TOK_NE )
rtok->pass_site = strcmp(btok->str_value.s,line->d.id) ? 1 : 0;
else
{
if ( rtok->tok_type!=TOK_LIKE && rtok->tok_type!=TOK_NLIKE )
error("Only the following operators are supported for querying ID: ==, !=, ~, !~; the operator type %d is not supported (%p %p)\n",
rtok->tok_type,atok->regex,btok->regex);
if ( rtok->tok_type==TOK_EQ || rtok->tok_type==TOK_NE )
pass = strcmp(btok->str_value.s,id) ? 0 : 1;
else
{
if ( rtok->tok_type!=TOK_LIKE && rtok->tok_type!=TOK_NLIKE )
error("Only the following operators are supported for querying ID: ==, !=, ~, !~; the operator type %d is not supported (%p %p)\n",
rtok->tok_type,atok->regex,btok->regex);

regex_t *regex = atok->regex ? atok->regex : (btok->regex ? btok->regex : NULL);
if ( !regex ) error("fixme: regex initialization failed\n");
rtok->pass_site = regexec(regex,line->d.id, 0,NULL,0) ? 0 : 1;
if ( rtok->tok_type==TOK_NLIKE ) rtok->pass_site = rtok->pass_site ? 0 : 1;
regex_t *regex = atok->regex ? atok->regex : (btok->regex ? btok->regex : NULL);
if ( !regex ) error("fixme: regex initialization failed\n");
pass = regexec(regex,id, 0,NULL,0) ? 0 : 1;
}
}
if ( ep )
{
*ep = ';';
id = ep + 1;
}
if ( pass || !ep ) break;
}
if ( rtok->tok_type==TOK_NE || rtok->tok_type==TOK_NE) pass = pass ? 0 : 1;
rtok->pass_site = pass;
}

/**
Expand Down
1 change: 1 addition & 0 deletions test/query.filter.id.3.out
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ss124;abc
1 change: 1 addition & 0 deletions test/query.filter.id.3.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
abc
2 changes: 2 additions & 0 deletions test/query.filter.id.4.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
rs123
.
4 changes: 4 additions & 0 deletions test/test.pl
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,10 @@
run_test(\&test_vcf_query,$opts,in=>'query.smpl',out=>'query.smpl.6.out',args=>q[-l -S {PATH}/query.smpl.txt]);
run_test(\&test_vcf_query,$opts,in=>'query.filter.id',out=>'query.filter.id.1.out',args=>q[-f'%ID\\n' -i'ID~"s12"']);
run_test(\&test_vcf_query,$opts,in=>'query.filter.id',out=>'query.filter.id.2.out',args=>q[-f'%ID\\n' -i'ID="rs123"']);
run_test(\&test_vcf_query,$opts,in=>'query.filter.id',out=>'query.filter.id.3.out',args=>q[-f'%ID\\n' -i'ID="abc"']);
run_test(\&test_vcf_query,$opts,in=>'query.filter.id',out=>'query.filter.id.3.out',args=>q[-f'%ID\\n' -i'ID=@].$$opts{path}.q[/query.filter.id.3.txt']);
run_test(\&test_vcf_query,$opts,in=>'query.filter.id',out=>'query.filter.id.4.out',args=>q[-f'%ID\\n' -i'ID!="abc"']);
run_test(\&test_vcf_query,$opts,in=>'query.filter.id',out=>'query.filter.id.4.out',args=>q[-f'%ID\\n' -i'ID!=@].$$opts{path}.q[/query.filter.id.3.txt']);
run_test(\&test_vcf_query,$opts,in=>'filter.12',out=>'query.85.out',args=>q[-i'FILTER="A"' -f'%FILTER\\n']);
run_test(\&test_vcf_query,$opts,in=>'filter.12',out=>'query.86.out',args=>q[-i'FILTER~"A"' -f'%FILTER\\n']);
run_test(\&test_vcf_query,$opts,in=>'filter.12',out=>'query.87.out',args=>q[-i'FILTER="A;B"' -f'%FILTER\\n']);
Expand Down

0 comments on commit 62142f3

Please sign in to comment.