Skip to content

Commit

Permalink
Merge branch 'release/1.13.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
David Jones committed Mar 6, 2018
2 parents 3f55a8c + 568ecdf commit 60a8d97
Show file tree
Hide file tree
Showing 8 changed files with 705 additions and 49 deletions.
70 changes: 49 additions & 21 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,69 +1,97 @@
### 1.12.1
# CHANGES

## 1.13.0

* Overlapping reads now handled #78
* Fixes for #64, #65, #61

## 1.12.1

* Resolves #74

### 1.12.0
## 1.12.0

* Now outputs snp and mut vcf files as gzipped.
* Requires [zlib](https://zlib.net/) >= 1.2.3.5

### 1.11.3
## 1.11.3

* Makefile update to cope with central/prefix install of htslib

### 1.11.2
## 1.11.2

* Update htslib version (1.11.1 updated an unused reference to this archive)

### 1.11.0
## 1.11.0

* Rearrangement to htslib pileup code gives large speed increases

### 1.10.1
## 1.10.1

* Correction to fclose checking in estep.c

### 1.10.0
## 1.10.0

* Added checks to all fflush and fclose calls

### 1.9.5
## 1.9.5

* Removed dependency on ENA during compilation

### 1.9.4
## 1.9.4

* Correct main method to ensure failure is passed through from running the by section main methods.

### 1.9.3
## 1.9.3

* Fix bug in ignore regions file reading where bed file resulted in incorrect coords

### 1.9.2
## 1.9.2

* Resolves #45 - New [samtools/htslib 1.3](https://github.com/samtools/htslib/releases/tag/1.3) to remove need for patch.

### 1.9.1
## 1.9.1

* Removed unnecessary dependancy on `rsync` when `cp` will do.

### 1.9.0
## 1.9.0

* Corrections to install methods to ensure all relevant files were installed.

### 1.8.0
## 1.8.0

* Commmandline params checked
* Readlength taken into account in split sections
* setup.sh installs perl scripts

### 1.7.3
## 1.7.3

* Fixed generateCavemanUMNormVCF - command line help has errors #28
* Fixed check BAM headers in setup for readgroups #34

### 1.7.2
## 1.7.2

* RG lane id search no longer requires ID to be at the beginning of the RG line.

### 1.7.1
## 1.7.1

* Added header read to bam_access_get_by_position_counts method. Fixes #41

### 1.7.0
## 1.7.0

* Updated merge script to check number of files against splitList.

### 1.6.4
## 1.6.4

* Fixed #37, Fixed bug where error thrown when no CN file, and should be using default.
* Fixed #38, removed need for stack memory usage in array reading and writing.

### 1.6.2
## 1.6.2

* Fixed bug in generateCavemanVCFUnatchedNormalPanel.c where q was not read at commandline.

### 1.6.0
## 1.6.0

* Cram support added using htslib
* Added setup script to download and compile htslib with patch (required for CRAM support).
* Added 2015 and date additional info to licenses in all files
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CAVEMAN_VERSION=1.12.1
CAVEMAN_VERSION=1.13.0
TEST_REF?=""
#Compiler
CC?=gcc
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ use of an index parameter. The split step is designed to divide the genome into
chunks of adjustable size to optimise for runtime/memory usage requirements.
For simple execution of CaVEMan please see [cgpCaVEManWrapper](https://github.com/cancerit/cgpCaVEManWrapper)

| Master | Dev |
|---|---|
| [![Build Status](https://travis-ci.org/cancerit/CaVEMan.svg?branch=master)](https://travis-ci.org/cancerit/CaVEMan) | [![Build Status](https://travis-ci.org/cancerit/CaVEMan.svg?branch=dev)](https://travis-ci.org/cancerit/CaVEMan) |
| Master | Dev |
| ------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------- |
| [![Build Status](https://travis-ci.org/cancerit/CaVEMan.svg?branch=master)](https://travis-ci.org/cancerit/CaVEMan) | [![Build Status](https://travis-ci.org/cancerit/CaVEMan.svg?branch=dev)](https://travis-ci.org/cancerit/CaVEMan) |

Installation
============
Expand Down
16 changes: 16 additions & 0 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@

SOURCE_HTSLIB="https://github.com/samtools/htslib/releases/download/1.3.2/htslib-1.3.2.tar.bz2"

REQUIRED_MIN_LIBZ="1.2.3.5"

function version_gt() { test "$(printf '%s\n' "$@" | sort -V | head -n 1)" != "$1"; }

get_distro () {
EXT=""
DECOMP="gunzip -f"
Expand Down Expand Up @@ -77,6 +81,18 @@ INIT_DIR=`pwd`
set +x
echo; echo

LIBZ_VER=`ldconfig -v | grep libz.so | perl -pi -e 'chomp($_); $_=~s/^\s+libz\.so\.1\s+.+\s+libz\.so\.//;'`
echo $LIBZ_VER
if version_gt $LIBZ_VER $REQUIRED_MIN_LIBZ ; then
echo "Found acceptable libz version $LIBZ_VER."
echo "Continuing install"
else
echo "ERROR: CaVEMan requires libz version >= $REQUIRED_MIN_LIBZ"
echo "Found libz version: $LIBZ_VER"
echo "Exiting install"
exit 1
fi

set -ue

# cleanup inst_path
Expand Down
51 changes: 46 additions & 5 deletions src/bam_access.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@
#include <dbg.h>
#include <bam_access.h>
#include <time.h>
#include "khash.h"

KHASH_MAP_INIT_STR(strh,uint8_t)

static file_holder *norm = NULL;
static file_holder *tum = NULL;
Expand Down Expand Up @@ -102,6 +105,11 @@ int bam_access_get_avg_readlength_from_bam(htsFile *sf){
bam1_t *b = bam_init1();
int ret;
while ((ret = sam_read1(sf, head, b)) >= 0 && read_count < 100) {
if((b->core.flag & BAM_FSECONDARY)
|| (b->core.flag & BAM_FSUPPLEMENTARY)
|| (b->core.flag & BAM_FQCFAIL)){
continue;
}
read_count++;
read_length_sum += b->core.l_qseq;
}
Expand All @@ -111,12 +119,28 @@ int bam_access_get_avg_readlength_from_bam(htsFile *sf){

int pos_counts_callback(uint32_t tid, uint32_t pos, int n_plp, const bam_pileup1_t *pil, void *data, int strand){
file_holder *norm = (file_holder *) data;
khash_t(strh) *h;
khiter_t k;
h = kh_init(strh);
int i=0;
for(i=0;i<n_plp;i++){
const bam_pileup1_t *p = pil + i;
bam1_t *algn = p->b;

int absent;
uint8_t cbase = bam_seqi(bam_get_seq(algn),p->qpos);
if(!(p->is_del) && bam_get_qual(algn)[p->qpos] >= min_base_qual && (cbase == 1 || cbase == 2 || cbase == 4 || cbase == 8)){//check bases are ACGT
k = kh_put(strh, h, bam_get_qname(p->b), &absent);
uint8_t pre_b;
if(!absent){ //Read already processed to get base processed (we only increment if base is different between overlapping read pairs)
k = kh_get(strh, h, bam_get_qname(p->b));
pre_b = kh_val(h,k);
}else{
//Add the value to the hash
kh_value(h, k) = cbase;
}

if(!(p->is_del) && bam_get_qual(algn)[p->qpos] >= min_base_qual && (cbase == 1 || cbase == 2 || cbase == 4 || cbase == 8)
&& (absent || pre_b != cbase)){//check bases are ACGT and not same base in overlapping read]]
int loc = (pos + 1) - norm->beg;
if(norm->base_counts[loc] == NULL || norm->base_counts[loc] == 0){
if(strand == 1 ){
Expand All @@ -141,9 +165,10 @@ int pos_counts_callback(uint32_t tid, uint32_t pos, int n_plp, const bam_pileup1
}
}//End of checking base is ACGT & fits quality requirements
}//Iteration through pileups at this position

kh_destroy(strh, h);
return 0;
error:
kh_destroy(strh, h);
return 1;
}

Expand Down Expand Up @@ -507,13 +532,28 @@ int bam_access_compare_read_pos_t(const void *in_a, const void *in_b){

int reads_at_pos_callback(uint32_t tid, uint32_t pos, int n_plp, const bam_pileup1_t *pil, void *data, int sorted, int isnorm){
file_holder* bams = (file_holder* )data;
khash_t(strh) *h;
khiter_t k;
char *nom = malloc(sizeof(char) * 350);
h = kh_init(strh);
int i=0;
for(i=0;i<n_plp;i++){
const bam_pileup1_t *p = pil + i;
const bam_pileup1_t *p = pil + i;
int qual = bam_get_qual(p->b)[p->qpos];
uint8_t c = bam_seqi(bam_get_seq(p->b), p->qpos);
if(!(p->is_del) && qual >= min_base_qual && (c == 1 || c == 2 || c == 4 || c == 8)){

int absent;
k = kh_put(strh, h, bam_get_qname(p->b), &absent);
uint8_t pre_b;
if(!absent){ //Read already processed to get base processed (we only increment if base is different between overlapping read pairs)
k = kh_get(strh, h, bam_get_qname(p->b));
pre_b = kh_val(h,k);
}else{
//Add the value to the hash
kh_value(h, k) = c;
}

if(!(p->is_del) && qual >= min_base_qual && (c == 1 || c == 2 || c == 4 || c == 8)&& (absent || pre_b != c)){
//Now we add a new read pos struct to the list since the read is valid.
read_pos_t *rp = malloc(sizeof(struct read_pos_t));
check_mem(rp);
Expand Down Expand Up @@ -551,9 +591,10 @@ int reads_at_pos_callback(uint32_t tid, uint32_t pos, int n_plp, const bam_pileu
}//End of if this is a useful read, ACGT, and within qual boundaries.
}//End iterating through pileups at this position
free(nom);

kh_destroy(strh, h);
return 0;
error:
kh_destroy(strh, h);
return 1;
}

Expand Down
4 changes: 2 additions & 2 deletions src/estep.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ void estep_print_usage (int exit_code){
printf("-w --species [string] Species name (eg Human), required if bam header SQ lines do not contain AS and SP information.\n");
printf("-n --normal-copy-number [int] Copy number to use when filling gaps in the normal copy number file [default:%d].\n",normal_copy_number);
printf("-t --tumour-copy-number [int] Copy number to use when filling gaps in the tumour copy number file [default:%d].\n",tumour_copy_number);
printf("-l --normal-protocol [string] Normal protocol. Ideally this should match -r but not checked (WGS|WGX|RNA) [default:%s].\n",norm_prot);
printf("-r --tumour-protocol [string] Tumour protocol. Ideally this should match -l but not checked (WGS|WGX|RNA) [default:%s].\n",tum_prot);
printf("-l --normal-protocol [string] Normal protocol. Ideally this should match -r but not checked (WGS|WXS|RNA) [default:%s].\n",norm_prot);
printf("-r --tumour-protocol [string] Tumour protocol. Ideally this should match -l but not checked (WGS|WXS|RNA) [default:%s].\n",tum_prot);
printf("-P --normal-platform [string] Normal platform. Overrides the values retrieved from bam header.\n");
printf("-T --tumour-platform [string] Tumour platform. Overrides the values retrieved from bam header.\n");
printf("-M --max-copy-number [int] Maximum copy number permitted. If exceeded the copy number for the offending region will be set to this value. [default:%d].\n",max_copy_number);
Expand Down
Loading

0 comments on commit 60a8d97

Please sign in to comment.