Skip to content

Commit

Permalink
Add --disable-automatic-newline option; Improve automatic behavior
Browse files Browse the repository at this point in the history
- Add new `-N, --disable-automatic-newline` option for pre-1.18 query formatting
  behavior when newline would not be added when missing

- Make the automatic addition of the newline character in a more predictable way and,
  when missing, always put it at the end of the expression. In version 1.18 it could
  be added at the end of the expression (for per-site expressions) or inside the square
  brackets (for per-sample expressions). The new behavior is:

    - if the formatting expression contains a newline character, do nothing
    - if there is no newline character and -N, --disable-automatic-newline is given, do nothing
    - if there is no newline character and -N is not given, insert newline at the end of the expression

Resolves #1969
  • Loading branch information
pd3 committed Aug 3, 2023
1 parent 122a564 commit c7cbe0b
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 28 deletions.
20 changes: 19 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
@@ -1,7 +1,25 @@
## Release a.b

## Release 1.18 (25th July 2023)

Changes affecting specific commands:

* bcftools query

- Add new `-N, --disable-automatic-newline` option for pre-1.18 query formatting behavior
when newline would not be added when missing

- Make the automatic addition of the newline character in a more predictable way and,
when missing, always put it at the end of the expression. In version 1.18 it could
be added at the end of the expression (for per-site expressions) or inside the square
brackets (for per-sample expressions). The new behavior is:

- if the formatting expression contains a newline character, do nothing
- if there is no newline character and -N, --disable-automatic-newline is given, do nothing
- if there is no newline character and -N is not given, insert newline at the end of the expression

See #1969 for details

## Release 1.18 (25th July 2023)

Changes affecting the whole of bcftools, or multiple commands:

Expand Down
35 changes: 12 additions & 23 deletions convert.c
Original file line number Diff line number Diff line change
Expand Up @@ -1709,29 +1709,18 @@ static void force_newline_(convert_t *convert)
}
if ( has_newline ) return;

// A newline is not present, force it. But where to add it?
// Consider
// -f'%CHROM[ %SAMPLE]\n'
// vs
// -f'[%CHROM %SAMPLE\n]'
for (i=0; i<convert->nfmt; i++)
if ( !convert->fmt[i].is_gt_field && convert->fmt[i].key ) break;

if ( i < convert->nfmt )
register_tag(convert, "\n", 0, T_SEP); // the first case
else
{
// the second case
i = convert->nfmt - 1;
if ( !convert->fmt[i].key )
{
convert->fmt[i].key = strdup("\n");
convert->fmt[i].is_gt_field = 1;
register_tag(convert, NULL, 0, T_SEP);
}
else
register_tag(convert, "\n", 1, T_SEP);
}
// A newline is not present, force it. But where to add it? Always at the end.
//
// Briefly, in 1.18, we considered the following automatic behavior, which for
// per-site output it would add it at the end of the expression and for per-sample
// output it would add it inside the square brackets:
// -f'%CHROM[ %SAMPLE]\n'
// -f'[%CHROM %SAMPLE\n]'
//
// However, this is an annoyance for users, as it is not entirely clear what
// will happen unless one understands the internals well (#1969)

register_tag(convert, "\n", 0, T_SEP);
}

int convert_set_option(convert_t *convert, enum convert_option opt, ...)
Expand Down
7 changes: 7 additions & 0 deletions doc/bcftools.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2904,6 +2904,13 @@ Extracts fields from VCF or BCF files and outputs them in user-defined format.
*-l, --list-samples*::
list sample names and exit

*-N, --disable-automatic-newline*::
disable automatic addition of a missing newline character at the end of the formatting
expression. By default, the program checks if the expression contains a newline
and appends it if not, to prevent formatting the entire output into a single
line by mistake. Note that versions prior to 1.18 had no automatic check and newline
had to be included explicitly.

*-o, --output* 'FILE'::
see *<<common_options,Common Options>>*

Expand Down
1 change: 0 additions & 1 deletion test/test.pl
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,6 @@
run_test(\&test_vcf_query,$opts,in=>'filter.12',out=>'query.90.out',args=>q[-i'FILTER!~"A;B"' -f'%FILTER\\n']);
run_test(\&test_vcf_query,$opts,in=>'filter.10',out=>'query.91.out',args=>q[-i'DP%10==2' -f'[ %DP]\\n']);
run_test(\&test_vcf_query,$opts,in=>'query.header',out=>'query.95.out',args=>q[-H -f'[%CHROM %POS %SAMPLE %DP %GT\\n]']);
run_test(\&test_vcf_query,$opts,in=>'query.header',out=>'query.95.out',args=>q[-H -f'[%CHROM %POS %SAMPLE %DP %GT]']);
run_test(\&test_vcf_query,$opts,in=>'query.header',out=>'query.96.out',args=>q[-H -f'[%CHROM %POS %SAMPLE %DP %GT]\\n']);
run_test(\&test_vcf_query,$opts,in=>'query.header',out=>'query.97.out',args=>q[-H -f'%CHROM %POS[ %SAMPLE %DP %GT]\\n']);
run_test(\&test_vcf_query,$opts,in=>'query.header',out=>'query.97.out',args=>q[-H -f'%CHROM %POS[ %SAMPLE %DP %GT]']);
Expand Down
10 changes: 7 additions & 3 deletions vcfquery.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ typedef struct
bcf_hdr_t *header;
int sample_is_file;
char **argv, *format_str, *sample_list, *targets_list, *regions_list, *vcf_list, *fn_out;
int argc, list_columns, print_header, allow_undef_tags, force_samples;
int argc, list_columns, print_header, allow_undef_tags, force_samples, force_newline;
FILE *out;
}
args_t;
Expand Down Expand Up @@ -94,7 +94,7 @@ static void init_data(args_t *args)
smpl_ilist_destroy(ilist);
}
args->convert = convert_init(args->header, samples, nsamples, args->format_str);
convert_set_option(args->convert, force_newline, 1);
if ( args->force_newline ) convert_set_option(args->convert, force_newline, 1);
convert_set_option(args->convert, subset_samples, &args->smpl_pass);
if ( args->allow_undef_tags ) convert_set_option(args->convert, allow_undef_tags, 1);
free(samples);
Expand Down Expand Up @@ -236,6 +236,7 @@ static void usage(void)
fprintf(stderr, " -H, --print-header Print header\n");
fprintf(stderr, " -i, --include EXPR Select sites for which the expression is true (see man page for details)\n");
fprintf(stderr, " -l, --list-samples Print the list of samples and exit\n");
fprintf(stderr, " -N, --disable-automatic-newline Disable automatic addition of newline character when not present\n");
fprintf(stderr, " -o, --output FILE Output file name [stdout]\n");
fprintf(stderr, " -r, --regions REGION Restrict to comma-separated list of regions\n");
fprintf(stderr, " -R, --regions-file FILE Restrict to regions listed in a file\n");
Expand All @@ -259,6 +260,7 @@ int main_vcfquery(int argc, char *argv[])
int c, collapse = 0;
args_t *args = (args_t*) calloc(1,sizeof(args_t));
args->argc = argc; args->argv = argv;
args->force_newline = 1;
int regions_is_file = 0, targets_is_file = 0;
int regions_overlap = 1;
int targets_overlap = 0;
Expand All @@ -267,6 +269,7 @@ int main_vcfquery(int argc, char *argv[])
{
{"help",0,0,'h'},
{"list-samples",0,0,'l'},
{"disable-automatic-newline",required_argument,NULL,'N'},
{"include",1,0,'i'},
{"exclude",1,0,'e'},
{"format",1,0,'f'},
Expand All @@ -288,10 +291,11 @@ int main_vcfquery(int argc, char *argv[])
{"allow-undef-tags",0,0,'u'},
{0,0,0,0}
};
while ((c = getopt_long(argc, argv, "hlr:R:f:a:s:S:Ht:T:c:v:i:e:o:u",loptions,NULL)) >= 0) {
while ((c = getopt_long(argc, argv, "hlr:R:f:a:s:S:Ht:T:c:v:i:e:o:uN",loptions,NULL)) >= 0) {
switch (c) {
case 'o': args->fn_out = optarg; break;
case 'f': args->format_str = strdup(optarg); break;
case 'N': args->force_newline = 0; break;
case 'H': args->print_header = 1; break;
case 'v': args->vcf_list = optarg; break;
case 'c':
Expand Down

0 comments on commit c7cbe0b

Please sign in to comment.