Skip to content

Commit

Permalink
Merge commit 'refs/pull/206/head' of https://github.com/wtsi-npg/npg_…
Browse files Browse the repository at this point in the history
…seq_pipeline into devel
  • Loading branch information
dkj committed Jun 9, 2017
2 parents 24e5163 + ade8b28 commit fc2ad1e
Show file tree
Hide file tree
Showing 9 changed files with 26 additions and 96 deletions.
4 changes: 4 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
LIST OF CHANGES
---------------

- Chained execution of RNA-SeQC to the vtfp/viv alignment cmd for RNA-Seq libraries only:
entries for qc check rna_seqc removed from central function and parallelisation.
code that created rna_seqc-specific directories has been removed as this is
now handled by the check itself using qc_out arg.
- remove GCLP-specific code and configuration files
- remove unused force_p4 attribute
- OLB analysis removed
Expand Down
1 change: 0 additions & 1 deletion data/config_files/function_list_central.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
- qc_genotype
- qc_verify_bam_id
- qc_upstream_tags
- qc_rna_seqc
- run_analysis_complete
- update_ml_warehouse
- archive_to_irods_samplesheet
Expand Down
1 change: 0 additions & 1 deletion data/config_files/function_list_central_qc_run.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
- qc_genotype
- qc_verify_bam_id
- qc_upstream_tags
- qc_rna_seqc
- run_analysis_complete
- run_archival_in_progress
- copy_interop_files_to_irods
Expand Down
1 change: 0 additions & 1 deletion data/config_files/parallelisation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ c:
qc_genotype: 1
qc_upstream_tags: 1
qc_verify_bam_id: 1
qc_rna_seqc: 1
d:
upload_auto_qc_to_qc_database: 1
upload_fastqcheck_to_qc_database: 1
Expand Down
17 changes: 13 additions & 4 deletions lib/npg_pipeline/archive/file/generation/seq_alignment.pm
Original file line number Diff line number Diff line change
Expand Up @@ -392,31 +392,40 @@ sub _lsf_alignment_command { ## no critic (Subroutines::ProhibitExcessComplexity
q{&&},
_qc_command('bam_flagstats', $archive_path, $qcpath, $l, $is_plex, $nchs_outfile_label),
: q(),
$do_rna ? join q( ),
q{&&},
_qc_command('rna_seqc', $archive_path, $qcpath, $l, $is_plex),
: q()
),
q(');
}

sub _qc_command {##no critic (Subroutines::ProhibitManyArgs)
my ($check_name, $qc_in, $qc_out, $l, $is_plex, $subset) = @_;

my $args = {'id_run' => $l->id_run, 'position' => $l->position};
my $args = {'id_run' => $l->id_run,
'position'=> $l->position,
'qc_out' => $qc_out,
'check' => $check_name,};

if ($is_plex && defined $l->tag_index) {
$args->{'tag_index'} = $l->tag_index;
}
if ($check_name eq 'bam_flagstats') {

if ($check_name =~ /^bam_flagstats|rna_seqc$/smx) {
if ($subset) {
$args->{'subset'} = $subset;
}
$args->{'qc_in'} = $qc_in;
} else {
$args->{'qc_in'} = q[$] . 'PWD';
}
$args->{'qc_out'} = $qc_out;
$args->{'check'} = $check_name;

my $command = q[];
foreach my $arg (sort keys %{$args}) {
$command .= join q[ ], q[ --].$arg, $args->{$arg};
}

return $QC_SCRIPT_NAME . $command;
}

Expand Down
63 changes: 3 additions & 60 deletions lib/npg_pipeline/archive/file/qc.pm
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package npg_pipeline::archive::file::qc;
use Moose;
use Readonly;
use File::Spec;
use File::Path qw{make_path};
use Class::Load qw{load_class};

use npg_pipeline::lsf_job;
Expand All @@ -16,10 +15,6 @@ Readonly::Scalar my $QC_SCRIPT_NAME => q{qc};
Readonly::Scalar my $LSF_MEMORY_REQ => 6000;
Readonly::Scalar my $LSF_MEMORY_REQ_ADAPTER => 1500;
Readonly::Scalar my $LSF_INDEX_MULTIPLIER => 10_000;
Readonly::Scalar my $REQUIRES_QC_REPORT_DIR => {
rna_seqc => 'rna_seqc',
};


has q{qc_to_run} => (isa => q{Str},
is => q{ro},
Expand Down Expand Up @@ -52,16 +47,6 @@ sub BUILD {
return;
}

has q{_qc_report_dirs} => (isa => q{HashRef[Str]},
is => q{ro},
traits => [q{Hash}],
default => sub { { } },
handles => {
_set_rpt_qc_report_dir => q{set},
_get_rpt_qc_report_dir => q{get},
},
);

sub run_qc {
my ($self, $arg_refs) = @_;

Expand All @@ -78,28 +63,6 @@ sub run_qc {
}
}

if ($REQUIRES_QC_REPORT_DIR->{$qc_to_run}) {
my @archive_qc_path = ($self->archive_path, q[qc], $REQUIRES_QC_REPORT_DIR->{$qc_to_run});
foreach my $position ($self->positions()) {
my $rp = join q[_], $self->id_run(), $position;
my $qc_report_dir = File::Spec->catdir(@archive_qc_path, $rp);
if (! -d $qc_report_dir) {
make_path($qc_report_dir);
$self->_set_rpt_qc_report_dir($rp, $qc_report_dir);
}
if ($self->is_multiplexed_lane($position)) {
foreach my $tag (@{$self->get_tag_index_list($position)}) {
my $rpt = join q[#], $rp, $tag;
$qc_report_dir = File::Spec->catdir(@archive_qc_path, $rp, $rpt);
if (! -d $qc_report_dir) {
make_path($qc_report_dir);
$self->_set_rpt_qc_report_dir($rpt, $qc_report_dir);
}
}
}
}
}

my $required_job_completion = $arg_refs->{'required_job_completion'};
$required_job_completion ||= q{};

Expand Down Expand Up @@ -198,17 +161,6 @@ sub _qc_command {
}
$c .= qq{ --qc_in=$qc_in --qc_out=$qc_out};

if ($REQUIRES_QC_REPORT_DIR->{$self->qc_to_run()}) {
my @archive_qc_path = ($archive_path, q[qc], $REQUIRES_QC_REPORT_DIR->{$self->qc_to_run()});
my $rptstr = join q[_], $self->id_run(), (defined $indexed ? $lanestr : $self->lsb_jobindex());
my $qc_report_dir = File::Spec->catdir(@archive_qc_path, $rptstr);
if (defined $indexed) {
$rptstr = join q[#], $rptstr, $tagstr;
$qc_report_dir = File::Spec->catdir($qc_report_dir, $rptstr);
}
$c .= qq{ --qc_report_dir=$qc_report_dir};
}

return $c;
}

Expand All @@ -218,9 +170,9 @@ sub _should_run {
my $qc = $self->qc_to_run();

if (($qc =~ /^tag_metrics|upstream_tags|gc_bias|verify_bam_id$/smx) ||
($qc =~ /^genotype|pulldown_metrics|rna_seqc$/smx)) {
($qc =~ /^genotype|pulldown_metrics$/smx)) {
my $is_multiplexed_lane = $self->is_multiplexed_lane($position);
if ($qc =~ /^gc_bias|verify_bam_id|genotype|pulldown_metrics|rna_seqc$/smx) {
if ($qc =~ /^gc_bias|verify_bam_id|genotype|pulldown_metrics$/smx) {
my $can_run = ((!defined $tag_index) && !$is_multiplexed_lane) ||
((defined $tag_index) && $is_multiplexed_lane);
if (!$can_run) {
Expand All @@ -241,13 +193,6 @@ sub _should_run {
if ($self->has_repository && $self->_check_uses_refrepos()) {
$init_hash->{'repository'} = $self->repository;
}
if ($REQUIRES_QC_REPORT_DIR->{$qc}) {
my $qc_report_dir_key = join q[_], $self->id_run(), $position;
if (defined $tag_index) {
$qc_report_dir_key = join q[#], $qc_report_dir_key, $tag_index;
}
$init_hash->{'qc_report_dir'} = $self->_get_rpt_qc_report_dir($qc_report_dir_key);
}

return $self->_qc_module_name()->new($init_hash)->can_run();
}
Expand Down Expand Up @@ -284,7 +229,7 @@ sub _lsf_options {
my ($self, $qc_to_run) = @_;

my $resources;
if ($qc_to_run =~ /insert_size|sequence_error|ref_match|pulldown_metrics|rna_seqc/smx ) {
if ($qc_to_run =~ /insert_size|sequence_error|ref_match|pulldown_metrics/smx ) {
$resources = npg_pipeline::lsf_job->new(memory => $LSF_MEMORY_REQ)->memory_spec();
} elsif ($qc_to_run eq q[adapter]) {
$resources = npg_pipeline::lsf_job->new(memory => $LSF_MEMORY_REQ_ADAPTER)->memory_spec() .
Expand Down Expand Up @@ -352,8 +297,6 @@ Launches the qc jobs.
=item Class::Load
=item File::Path
=back
=head1 INCOMPATIBILITIES
Expand Down
27 changes: 0 additions & 27 deletions lib/npg_pipeline/archive/folder/generation.pm
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ sub create_dir {
my $qc_dir = $self->qc_path();
my $qc_log_dir = $qc_dir . q{/log};
my $tileviz_dir = $qc_dir . q{/tileviz};
my $rna_seqc_dir = $qc_dir . q{/rna_seqc};

#############
# check existence of archive directory
Expand Down Expand Up @@ -56,21 +55,6 @@ sub create_dir {
}
}

#############
# check existence of rna_seqc directory
# create if it doesn't

if ( ! -d $rna_seqc_dir) {
my $mk_rna_seqc_dir_cmd = qq{mkdir -p $rna_seqc_dir};
$self->debug($mk_rna_seqc_dir_cmd);
my $return = qx{$mk_rna_seqc_dir_cmd};
if ( $CHILD_ERROR ) {
$self->logcroak($tileviz_dir,
qq{ does not exist and unable to create: $CHILD_ERROR },
$return);
}
}

#############
# check existence of multiplex lane and qc directory
# create if they doesn't
Expand Down Expand Up @@ -125,11 +109,6 @@ sub create_dir {
$self->warn("could not chgrp $tileviz_dir\n\t$rc"); # not fatal
}

$self->info("chgrp $owning_group $rna_seqc_dir");
$rc = `chgrp $owning_group $rna_seqc_dir`;
if ( $CHILD_ERROR ) {
$self->warn("could not chgrp $rna_seqc_dir\n\t$rc"); # not fatal
}
############
# ensure that the owning group is what we expect

Expand Down Expand Up @@ -167,12 +146,6 @@ sub create_dir {
$self->warn("could not chmod $tileviz_dir\n\t$rc"); # not fatal
}

$self->info("chmod u=rwx,g=srxw,o=rx $rna_seqc_dir");
$rc = `chmod u=rwx,g=srxw,o=rx $rna_seqc_dir`;
if ( $CHILD_ERROR ) {
$self->warn("could not chmod $rna_seqc_dir\n\t$rc"); # not fatal
}

$self->info("chmod u=rwx,g=srxw,o=rx $archive_log_dir");
$rc = `chmod u=rwx,g=srxw,o=rx $archive_log_dir`;
if ( $CHILD_ERROR ) {
Expand Down
1 change: 0 additions & 1 deletion t/10-pluggable_harold_central.t
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ my $runfolder_path = $util->analysis_runfolder_path();
qc_genotype
qc_verify_bam_id
qc_upstream_tags
qc_rna_seqc
run_analysis_complete
update_ml_warehouse
archive_to_irods_samplesheet
Expand Down
7 changes: 6 additions & 1 deletion t/20-archive_file_generation-seq_alignment.t
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,14 @@ subtest 'test 1' => sub {

my $qc_in = $dir . q[/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/lane4];
my $qc_out = join q[/], $qc_in, q[qc];
my $qc_report_dir = $dir . q[/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/qc/rna_seqc/12597_4/12597_4#3];

my $args = {};
$args->{'40003'} = qq{bash -c '\ mkdir -p $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#3 ; cd $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#3 && vtfp.pl -param_vals $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/lane4/12597_4#3_p4s2_pv_in.json -export_param_vals 12597_4#3_p4s2_pv_out_\${LSB_JOBID}.json -keys cfgdatadir -vals \$(dirname \$(readlink -f \$(which vtfp.pl)))/../data/vtlib/ -keys aligner_numthreads -vals `npg_pipeline_job_env_to_threads` -keys br_numthreads_val -vals `npg_pipeline_job_env_to_threads --exclude 1 --divide 2` -keys b2c_mt_val -vals `npg_pipeline_job_env_to_threads --exclude 2 --divide 2` -prune_nodes '"'"'fop.*samtools_stats_F0.*00_bait.*'"'"' \$(dirname \$(dirname \$(readlink -f \$(which vtfp.pl))))/data/vtlib/alignment_wtsi_stage2_template.json > run_12597_4#3.json && viv.pl -s -x -v 3 -o viv_12597_4#3.log run_12597_4#3.json } .
qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --tag_index 3} .
qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --subset phix --tag_index 3} .
q{ && qc --check alignment_filter_metrics --id_run 12597 --position 4 --qc_in $PWD --qc_out } .$qc_out.q{ --tag_index 3}.
qq{ && qc --check rna_seqc --id_run 12597 --position 4 --qc_in $qc_in --qc_out } . $qc_out . q{ --tag_index 3}.
q{ '};

$args->{'40000'} = qq{bash -c '\ mkdir -p $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 ; cd $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 && vtfp.pl -param_vals $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/lane4/12597_4#0_p4s2_pv_in.json -export_param_vals 12597_4#0_p4s2_pv_out_\${LSB_JOBID}.json -keys cfgdatadir -vals \$(dirname \$(readlink -f \$(which vtfp.pl)))/../data/vtlib/ -keys aligner_numthreads -vals `npg_pipeline_job_env_to_threads` -keys br_numthreads_val -vals `npg_pipeline_job_env_to_threads --exclude 1 --divide 2` -keys b2c_mt_val -vals `npg_pipeline_job_env_to_threads --exclude 2 --divide 2` -prune_nodes '"'"'fop.*samtools_stats_F0.*00_bait.*'"'"' \$(dirname \$(dirname \$(readlink -f \$(which vtfp.pl))))/data/vtlib/alignment_wtsi_stage2_template.json > run_12597_4#0.json && viv.pl -s -x -v 3 -o viv_12597_4#0.log run_12597_4#0.json } .
Expand Down Expand Up @@ -145,6 +147,7 @@ subtest 'test 1' => sub {
qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --tag_index 3} .
qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --subset phix --tag_index 3} .
q{ && qc --check alignment_filter_metrics --id_run 12597 --position 4 --qc_in $PWD --qc_out } .$qc_out.q{ --tag_index 3}.
qq{ && qc --check rna_seqc --id_run 12597 --position 4 --qc_in $qc_in --qc_out } . $qc_out . q{ --tag_index 3}.
qq( '","40000":"bash -c ' mkdir -p $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 ; cd $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 && vtfp.pl -param_vals $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/lane4/12597_4#0_p4s2_pv_in.json -export_param_vals 12597_4#0_p4s2_pv_out_\${LSB_JOBID}.json -keys cfgdatadir -vals \$(dirname \$(readlink -f \$(which vtfp.pl)))/../data/vtlib/ -keys aligner_numthreads -vals `npg_pipeline_job_env_to_threads` -keys br_numthreads_val -vals `npg_pipeline_job_env_to_threads --exclude 1 --divide 2` -keys b2c_mt_val -vals `npg_pipeline_job_env_to_threads --exclude 2 --divide 2` -prune_nodes '\\"'\\"'fop.*samtools_stats_F0.*00_bait.*'\\"'\\"' \$(dirname \$(dirname \$(readlink -f \$(which vtfp.pl))))/data/vtlib/alignment_wtsi_stage2_template.json > run_12597_4#0.json && viv.pl -s -x -v 3 -o viv_12597_4#0.log run_12597_4#0.json ) .
qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --tag_index 0} .
qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --subset phix --tag_index 0} .
Expand Down Expand Up @@ -268,11 +271,13 @@ subtest 'test 2' => sub {

my $qc_in = qq{$dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive};
my $qc_out = join q[/], $qc_in, q[qc];
my $qc_report_dir = qq[$dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive/qc/rna_seqc/13066_8];
my $args = {};
$args->{8} = qq{bash -c ' mkdir -p $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive/tmp_\$LSB_JOBID/13066_8 ; cd $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive/tmp_\$LSB_JOBID/13066_8 && vtfp.pl -param_vals $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/13066_8_p4s2_pv_in.json -export_param_vals 13066_8_p4s2_pv_out_\${LSB_JOBID}.json -keys cfgdatadir -vals \$(dirname \$(readlink -f \$(which vtfp.pl)))/../data/vtlib/ -keys aligner_numthreads -vals `npg_pipeline_job_env_to_threads` -keys br_numthreads_val -vals `npg_pipeline_job_env_to_threads --exclude 1 --divide 2` -keys b2c_mt_val -vals `npg_pipeline_job_env_to_threads --exclude 2 --divide 2` -prune_nodes '"'"'fop.*samtools_stats_F0.*00_bait.*'"'"' \$(dirname \$(dirname \$(readlink -f \$(which vtfp.pl))))/data/vtlib/alignment_wtsi_stage2_template.json > run_13066_8.json && viv.pl -s -x -v 3 -o viv_13066_8.log run_13066_8.json } .
qq{ && qc --check bam_flagstats --id_run 13066 --position 8 --qc_in $qc_in --qc_out $qc_out} .
qq{ && qc --check bam_flagstats --id_run 13066 --position 8 --qc_in $qc_in --qc_out $qc_out --subset phix} .
q{ && qc --check alignment_filter_metrics --id_run 13066 --position 8 --qc_in $PWD --qc_out } . $qc_out . q{ '};
q{ && qc --check alignment_filter_metrics --id_run 13066 --position 8 --qc_in $PWD --qc_out } . $qc_out .
qq{ && qc --check rna_seqc --id_run 13066 --position 8 --qc_in $qc_in --qc_out } . $qc_out . q{ '};

lives_ok {$rna_gen->_generate_command_arguments([8])}
'no error generating command arguments';
Expand Down

0 comments on commit fc2ad1e

Please sign in to comment.