Skip to content

Commit

Permalink
Merge branch 'runid-bam' into 'dev'
Browse files Browse the repository at this point in the history
[CW-3963] Add --runids option to bamstats

See merge request epi2melabs/fastcat!57
  • Loading branch information
cjw85 committed Apr 18, 2024
2 parents ff2a239 + 0f81218 commit afc7299
Show file tree
Hide file tree
Showing 8 changed files with 42 additions and 10 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [v0.17.1]
### Added
- `--runids` option to `bamstats` for enumerating detected run identifiers.

## [v0.17.0]
### Added
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ fastcat: src/fastcat/main.o src/fastcat/args.o src/fastcat/writer.o src/fastqcom
-o $@


bamstats: src/bamstats/main.o src/bamstats/args.o src/bamstats/readstats.o src/bamstats/bamiter.o src/fastqcomments.o src/common.o src/stats.o $(STATIC_HTSLIB)
bamstats: src/bamstats/main.o src/bamstats/args.o src/bamstats/readstats.o src/bamstats/bamiter.o src/fastqcomments.o src/common.o src/stats.o src/kh_counter.o $(STATIC_HTSLIB)
$(CC) -Isrc -Ihtslib $(WARNINGS) -fstack-protector-strong -D_FORTIFY_SOURCE=2 \
$(CFLAGS) $(EXTRA_CFLAGS) $(EXTRA_LDFLAGS) \
$^ $(ARGP) \
Expand Down
6 changes: 6 additions & 0 deletions src/bamstats/args.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ static struct argp_option options[] = {
"Sample name (if given, adds a 'sample_name' column).", 0},
{"flagstats", 'f', "FLAGSTATS", 0,
"File for outputting alignment flag counts.", 0},
{"runids", 'i', "ID SUMMARY", 0,
"Run ID summary output", 0},
{"histograms", 0x400, "DIRECTORY", 0,
"Directory for outputting histogram information. (default: bamstats-histograms)", 0},
{0, 0, 0, 0,
Expand Down Expand Up @@ -63,6 +65,9 @@ static error_t parse_opt (int key, char *arg, struct argp_state *state) {
case 'f':
arguments->flagstats = arg;
break;
case 'i':
arguments->runids = arg;
break;
case 0x400:
arguments->histograms = arg;
break;
Expand Down Expand Up @@ -120,6 +125,7 @@ arguments_t parse_arguments(int argc, char** argv) {
arguments_t args;
args.bam = NULL;
args.flagstats = NULL;
args.runids = NULL;
args.histograms = "bamstats-histograms";
args.sample = NULL;
args.ref = NULL;
Expand Down
1 change: 1 addition & 0 deletions src/bamstats/args.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
typedef struct arguments {
const char** bam;
char* flagstats;
char* runids;
char* histograms;
char *sample;
char* ref;
Expand Down
25 changes: 22 additions & 3 deletions src/bamstats/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ int main(int argc, char *argv[]) {
}

if (nfile > 1) {
fprintf(stderr, "WARNING: Results from multiple files will not be coordinate sorted.\n");
fprintf(stderr, "ERROR: Multiple input files detected, this program currently supports only a single file.\n");
exit(EXIT_FAILURE);
}

write_header(args.sample);
Expand Down Expand Up @@ -115,6 +116,7 @@ int main(int argc, char *argv[]) {
);
}

kh_counter_t *run_ids = kh_counter_init();
read_stats* length_stats = create_length_stats();
read_stats* qual_stats = create_qual_stats(QUAL_HIST_WIDTH);
read_stats* acc_stats = create_qual_stats(ACC_HIST_WIDTH);
Expand All @@ -132,7 +134,7 @@ int main(int argc, char *argv[]) {
args.read_group, args.tag_name, args.tag_value,
flag_counts, args.unmapped,
length_stats, qual_stats, acc_stats, cov_stats,
length_stats_unmapped, qual_stats_unmapped);
length_stats_unmapped, qual_stats_unmapped, run_ids);

// write flagstat counts if requested
if (flag_counts != NULL) {
Expand Down Expand Up @@ -175,7 +177,7 @@ int main(int argc, char *argv[]) {
args.read_group, args.tag_name, args.tag_value,
flag_counts, args.unmapped,
length_stats, qual_stats, acc_stats, cov_stats,
length_stats_unmapped, qual_stats_unmapped);
length_stats_unmapped, qual_stats_unmapped, run_ids);
if (flag_counts != NULL) {
write_stats(flag_counts->counts[0], chr, args.sample, flagstats);
}
Expand Down Expand Up @@ -223,12 +225,29 @@ int main(int argc, char *argv[]) {
fclose(stats_fp); free(path);
}

// write runids summary
if (args.runids != NULL) {
stats_fp = fopen(args.runids, "w");
fprintf(stats_fp, "filename\t");
if (args.sample != NULL) fprintf(stats_fp, "sample_name\t");
fprintf(stats_fp, "run_id\tcount\n");
for (khiter_t k = 0; k < kh_end(run_ids); ++k) {
if (kh_exist(run_ids, k)) {
fprintf(stats_fp, "%s\t", args.bam[0]);
if (args.sample != NULL) fprintf(stats_fp, "%s\t", args.sample);
fprintf(stats_fp, "%s\t%d\n", kh_key(run_ids, k), kh_val(run_ids, k));
}
}
fclose(stats_fp);
}

destroy_length_stats(length_stats);
destroy_qual_stats(qual_stats);
destroy_qual_stats(acc_stats);
destroy_qual_stats(cov_stats);
destroy_length_stats(length_stats_unmapped);
destroy_qual_stats(qual_stats_unmapped);
kh_counter_destroy(run_ids);

if (flagstats != NULL) {
fclose(flagstats);
Expand Down
9 changes: 5 additions & 4 deletions src/bamstats/readstats.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "../common.h"
#include "../stats.h"
#include "../kh_counter.h"
#include "bamiter.h"
#include "readstats.h"
#include "args.h"
Expand Down Expand Up @@ -166,6 +167,7 @@ int get_duplex_tag(bam1_t* b) {
* @param cov_stats read_stats* for accumulating read alignment coverage information.
* @param length_stats_unmapped read_stats* for accumulating read length information for unmapped reads.
* @param qual_stats_unmapped read_stats* for accumulating read quality information for unmapped reads.
* @param runids kh_counter_t* for accumulating runids.
* @returns void. Prints output to stdout.
*
*/
Expand All @@ -175,7 +177,7 @@ void process_bams(
const char *read_group, const char tag_name[2], const int tag_value,
flag_stats *flag_counts, bool unmapped,
read_stats* length_stats, read_stats* qual_stats, read_stats* acc_stats, read_stats* cov_stats,
read_stats* length_stats_unmapped, read_stats* qual_stats_unmapped) {
read_stats* length_stats_unmapped, read_stats* qual_stats_unmapped, kh_counter_t* runids) {
if (chr != NULL) {
if (strcmp(chr, "*") == 0) {
fprintf(stderr, "Processing: Unplaced reads\n");
Expand Down Expand Up @@ -218,9 +220,8 @@ void process_bams(
}
}
// set NULL runid to empty string
if (runid == NULL) {
runid = "";
}
if (runid == NULL) runid = "";
kh_counter_increment(runids, runid);
// get start time
start_time = "";
tag = bam_get_tag_caseinsensitive((const bam1_t*) b, "st");
Expand Down
4 changes: 3 additions & 1 deletion src/bamstats/readstats.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "args.h"
#include "../stats.h"
#include "../kh_counter.h"


// struct for flagstat counts
Expand Down Expand Up @@ -52,6 +53,7 @@ void destroy_flag_stats(flag_stats* stats);
* @param cov_stats read_stats* for accumulating read alignment coverage information.
* @param length_stats_unmapped read_stats* for accumulating read length information for unmapped reads.
* @param qual_stats_unmapped read_stats* for accumulating read quality information for unmapped reads.
* @param runids runid_t* for accumulating runid information.
* @returns void. Prints output to stdout.
*
*/
Expand All @@ -61,6 +63,6 @@ void process_bams(
const char *read_group, const char tag_name[2], const int tag_value,
flag_stats *flag_counts, bool unmapped,
read_stats* length_stats, read_stats* qual_stats, read_stats* acc_stats, read_stats* cov_stats,
read_stats* length_stats_unmapped, read_stats* qual_stats_unmapped);
read_stats* length_stats_unmapped, read_stats* qual_stats_unmapped, kh_counter_t* runids);

#endif
2 changes: 1 addition & 1 deletion src/version.h
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@

const char *argp_program_version = "0.17.0";
const char *argp_program_version = "0.17.1";

0 comments on commit afc7299

Please sign in to comment.