Skip to content

Commit

Permalink
Merge branch 'comment-header' into 'dev'
Browse files Browse the repository at this point in the history
Rework reheadering to always output valid samtag

See merge request epi2melabs/fastcat!17
  • Loading branch information
cjw85 committed Jan 30, 2023
2 parents 816f149 + 7d44d4a commit 9b333ba
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 8 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [v0.9.0]
### Fixed
- Ensure reheadered fastq is indeed formatted as a valid SAM tag(s).

## [v0.8.0]
### Added
- Option to bamstats to add 'sample_name' column equivalent to fastcat.
Expand Down
12 changes: 11 additions & 1 deletion src/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,16 @@ char *substring(char *string, int position, int length) {
return ptr;
}

int replace_char(char *str, char orig, char rep) {
char *ix = str;
int n = 0;
while((ix = strchr(ix, orig)) != NULL) {
*ix++ = rep;
n++;
}
return n;
}

const double qprobs[100] = {
1.00000000e+00, 7.94328235e-01, 6.30957344e-01, 5.01187234e-01,
3.98107171e-01, 3.16227766e-01, 2.51188643e-01, 1.99526231e-01,
Expand Down Expand Up @@ -125,4 +135,4 @@ float mean_qual_from_bam(u_int8_t* qual, size_t len) {
}
qsum /= len;
return -10 * log10(qsum);
}
}
11 changes: 11 additions & 0 deletions src/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,17 @@ void *xalloc(size_t num, size_t size, char* msg);
*/
char *substring(char *string, int position, int length);

/** Globally replace a char in a char*
*
* @param str char* source string
* @param orig original character
* @param rep replacement
* @returns number of times replacement made
*
*/
int replace_char(char *str, char orig, char rep);


// https://en.wikipedia.org/wiki/Kahan_summation_algorithm
void kahan_sum(double* sum, double term, double* c);

Expand Down
29 changes: 23 additions & 6 deletions src/fastcat/writer.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <sys/types.h>

#include "writer.h"
#include "common.h"
#include "../fastqcomments.h"

char* strip_path(char* input) {
Expand Down Expand Up @@ -73,12 +74,28 @@ void _write_read(writer writer, kseq_t* seq, read_meta meta, void* handle) {
static const char* wcomment_fmt = "@%s %s\n%s\n+\n%s\n";
static const char* nocomment_fmt = "@%s\n%s\n+\n%s\n";

if (writer->reheader && meta->valid) {
(*write)(
handle, reheader_fmt,
seq->name.s, meta->runid, meta->barcode, meta->barcode_alias,
meta->flow_cell_id, meta->start_time, meta->read_number, meta->channel,
seq->seq.s, seq->qual.s);
if (writer->reheader) {
// If we have all items, write individual tags, else just a generic comment tag.
// We could write out the explicit tags we have but a) thats effort b) any
// files from a device will have all these fields c) doesn't clearly indicate
// something went wrong d) allows us to pass on arbitrary information (so
// is probably how we should have done this in the first place).
if (meta->valid) {
(*write)(
handle, reheader_fmt,
seq->name.s, meta->runid, meta->barcode, meta->barcode_alias,
meta->flow_cell_id, meta->start_time, meta->read_number, meta->channel,
seq->seq.s, seq->qual.s);
}
else {
char* buf = xalloc(seq->comment.l + 6, sizeof(char), "Temporary buffer");
strncpy(buf, "CO:Z:", 5);
strncat(buf, seq->comment.s, seq->comment.l);
// remove tabs to space (because multi-tags are tab delimited), this is fine because the existing comments should only contain space as delimiter (not information).
replace_char(buf, '\t', ' ');
(*write)(handle, wcomment_fmt, seq->name.s, buf, seq->seq.s, seq->qual.s);
free(buf);
}
}
else if (seq->comment.l > 0) {
(*write)(handle, wcomment_fmt, seq->name.s, seq->comment.s, seq->seq.s, seq->qual.s);
Expand Down
2 changes: 1 addition & 1 deletion src/version.h
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@

const char *argp_program_version = "0.8.0";
const char *argp_program_version = "0.9.0";
Binary file modified test/data/bcMangled.fastq.gz
Binary file not shown.

0 comments on commit 9b333ba

Please sign in to comment.