diff --git a/Changes b/Changes index ee188d218..65187e03d 100644 --- a/Changes +++ b/Changes @@ -1,6 +1,10 @@ LIST OF CHANGES --------------- + - Chained execution of RNA-SeQC to the vtfp/viv alignment cmd for RNA-Seq libraries only: + entries for qc check rna_seqc removed from central function and parallelisation. + code that created rna_seqc-specific directories has been removed as this is + now handled by the check itself using qc_out arg. - remove GCLP-specific code and configuration files - remove unused force_p4 attribute - OLB analysis removed diff --git a/data/config_files/function_list_central.yml b/data/config_files/function_list_central.yml index 6727f31a9..a5b58162d 100644 --- a/data/config_files/function_list_central.yml +++ b/data/config_files/function_list_central.yml @@ -25,7 +25,6 @@ - qc_genotype - qc_verify_bam_id - qc_upstream_tags -- qc_rna_seqc - run_analysis_complete - update_ml_warehouse - archive_to_irods_samplesheet diff --git a/data/config_files/function_list_central_qc_run.yml b/data/config_files/function_list_central_qc_run.yml index fdc5b7089..721e0c2b5 100644 --- a/data/config_files/function_list_central_qc_run.yml +++ b/data/config_files/function_list_central_qc_run.yml @@ -22,7 +22,6 @@ - qc_genotype - qc_verify_bam_id - qc_upstream_tags -- qc_rna_seqc - run_analysis_complete - run_archival_in_progress - copy_interop_files_to_irods diff --git a/data/config_files/parallelisation.yml b/data/config_files/parallelisation.yml index 3a9f96f5b..384e62c8e 100644 --- a/data/config_files/parallelisation.yml +++ b/data/config_files/parallelisation.yml @@ -18,7 +18,6 @@ c: qc_genotype: 1 qc_upstream_tags: 1 qc_verify_bam_id: 1 - qc_rna_seqc: 1 d: upload_auto_qc_to_qc_database: 1 upload_fastqcheck_to_qc_database: 1 diff --git a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm index 16c5b03d2..e84fb6590 100644 --- a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm +++ b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm @@ -392,6 +392,10 @@ sub _lsf_alignment_command { ## no critic (Subroutines::ProhibitExcessComplexity q{&&}, _qc_command('bam_flagstats', $archive_path, $qcpath, $l, $is_plex, $nchs_outfile_label), : q(), + $do_rna ? join q( ), + q{&&}, + _qc_command('rna_seqc', $archive_path, $qcpath, $l, $is_plex), + : q() ), q('); } @@ -399,11 +403,16 @@ sub _lsf_alignment_command { ## no critic (Subroutines::ProhibitExcessComplexity sub _qc_command {##no critic (Subroutines::ProhibitManyArgs) my ($check_name, $qc_in, $qc_out, $l, $is_plex, $subset) = @_; - my $args = {'id_run' => $l->id_run, 'position' => $l->position}; + my $args = {'id_run' => $l->id_run, + 'position'=> $l->position, + 'qc_out' => $qc_out, + 'check' => $check_name,}; + if ($is_plex && defined $l->tag_index) { $args->{'tag_index'} = $l->tag_index; } - if ($check_name eq 'bam_flagstats') { + + if ($check_name =~ /^bam_flagstats|rna_seqc$/smx) { if ($subset) { $args->{'subset'} = $subset; } @@ -411,12 +420,12 @@ sub _qc_command {##no critic (Subroutines::ProhibitManyArgs) } else { $args->{'qc_in'} = q[$] . 'PWD'; } - $args->{'qc_out'} = $qc_out; - $args->{'check'} = $check_name; + my $command = q[]; foreach my $arg (sort keys %{$args}) { $command .= join q[ ], q[ --].$arg, $args->{$arg}; } + return $QC_SCRIPT_NAME . $command; } diff --git a/lib/npg_pipeline/archive/file/qc.pm b/lib/npg_pipeline/archive/file/qc.pm index ccc966d7d..652263a4f 100644 --- a/lib/npg_pipeline/archive/file/qc.pm +++ b/lib/npg_pipeline/archive/file/qc.pm @@ -3,7 +3,6 @@ package npg_pipeline::archive::file::qc; use Moose; use Readonly; use File::Spec; -use File::Path qw{make_path}; use Class::Load qw{load_class}; use npg_pipeline::lsf_job; @@ -16,10 +15,6 @@ Readonly::Scalar my $QC_SCRIPT_NAME => q{qc}; Readonly::Scalar my $LSF_MEMORY_REQ => 6000; Readonly::Scalar my $LSF_MEMORY_REQ_ADAPTER => 1500; Readonly::Scalar my $LSF_INDEX_MULTIPLIER => 10_000; -Readonly::Scalar my $REQUIRES_QC_REPORT_DIR => { - rna_seqc => 'rna_seqc', -}; - has q{qc_to_run} => (isa => q{Str}, is => q{ro}, @@ -52,16 +47,6 @@ sub BUILD { return; } -has q{_qc_report_dirs} => (isa => q{HashRef[Str]}, - is => q{ro}, - traits => [q{Hash}], - default => sub { { } }, - handles => { - _set_rpt_qc_report_dir => q{set}, - _get_rpt_qc_report_dir => q{get}, - }, - ); - sub run_qc { my ($self, $arg_refs) = @_; @@ -78,28 +63,6 @@ sub run_qc { } } - if ($REQUIRES_QC_REPORT_DIR->{$qc_to_run}) { - my @archive_qc_path = ($self->archive_path, q[qc], $REQUIRES_QC_REPORT_DIR->{$qc_to_run}); - foreach my $position ($self->positions()) { - my $rp = join q[_], $self->id_run(), $position; - my $qc_report_dir = File::Spec->catdir(@archive_qc_path, $rp); - if (! -d $qc_report_dir) { - make_path($qc_report_dir); - $self->_set_rpt_qc_report_dir($rp, $qc_report_dir); - } - if ($self->is_multiplexed_lane($position)) { - foreach my $tag (@{$self->get_tag_index_list($position)}) { - my $rpt = join q[#], $rp, $tag; - $qc_report_dir = File::Spec->catdir(@archive_qc_path, $rp, $rpt); - if (! -d $qc_report_dir) { - make_path($qc_report_dir); - $self->_set_rpt_qc_report_dir($rpt, $qc_report_dir); - } - } - } - } - } - my $required_job_completion = $arg_refs->{'required_job_completion'}; $required_job_completion ||= q{}; @@ -198,17 +161,6 @@ sub _qc_command { } $c .= qq{ --qc_in=$qc_in --qc_out=$qc_out}; - if ($REQUIRES_QC_REPORT_DIR->{$self->qc_to_run()}) { - my @archive_qc_path = ($archive_path, q[qc], $REQUIRES_QC_REPORT_DIR->{$self->qc_to_run()}); - my $rptstr = join q[_], $self->id_run(), (defined $indexed ? $lanestr : $self->lsb_jobindex()); - my $qc_report_dir = File::Spec->catdir(@archive_qc_path, $rptstr); - if (defined $indexed) { - $rptstr = join q[#], $rptstr, $tagstr; - $qc_report_dir = File::Spec->catdir($qc_report_dir, $rptstr); - } - $c .= qq{ --qc_report_dir=$qc_report_dir}; - } - return $c; } @@ -218,9 +170,9 @@ sub _should_run { my $qc = $self->qc_to_run(); if (($qc =~ /^tag_metrics|upstream_tags|gc_bias|verify_bam_id$/smx) || - ($qc =~ /^genotype|pulldown_metrics|rna_seqc$/smx)) { + ($qc =~ /^genotype|pulldown_metrics$/smx)) { my $is_multiplexed_lane = $self->is_multiplexed_lane($position); - if ($qc =~ /^gc_bias|verify_bam_id|genotype|pulldown_metrics|rna_seqc$/smx) { + if ($qc =~ /^gc_bias|verify_bam_id|genotype|pulldown_metrics$/smx) { my $can_run = ((!defined $tag_index) && !$is_multiplexed_lane) || ((defined $tag_index) && $is_multiplexed_lane); if (!$can_run) { @@ -241,13 +193,6 @@ sub _should_run { if ($self->has_repository && $self->_check_uses_refrepos()) { $init_hash->{'repository'} = $self->repository; } - if ($REQUIRES_QC_REPORT_DIR->{$qc}) { - my $qc_report_dir_key = join q[_], $self->id_run(), $position; - if (defined $tag_index) { - $qc_report_dir_key = join q[#], $qc_report_dir_key, $tag_index; - } - $init_hash->{'qc_report_dir'} = $self->_get_rpt_qc_report_dir($qc_report_dir_key); - } return $self->_qc_module_name()->new($init_hash)->can_run(); } @@ -284,7 +229,7 @@ sub _lsf_options { my ($self, $qc_to_run) = @_; my $resources; - if ($qc_to_run =~ /insert_size|sequence_error|ref_match|pulldown_metrics|rna_seqc/smx ) { + if ($qc_to_run =~ /insert_size|sequence_error|ref_match|pulldown_metrics/smx ) { $resources = npg_pipeline::lsf_job->new(memory => $LSF_MEMORY_REQ)->memory_spec(); } elsif ($qc_to_run eq q[adapter]) { $resources = npg_pipeline::lsf_job->new(memory => $LSF_MEMORY_REQ_ADAPTER)->memory_spec() . @@ -352,8 +297,6 @@ Launches the qc jobs. =item Class::Load -=item File::Path - =back =head1 INCOMPATIBILITIES diff --git a/lib/npg_pipeline/archive/folder/generation.pm b/lib/npg_pipeline/archive/folder/generation.pm index 95a166a23..ce0267aba 100644 --- a/lib/npg_pipeline/archive/folder/generation.pm +++ b/lib/npg_pipeline/archive/folder/generation.pm @@ -19,7 +19,6 @@ sub create_dir { my $qc_dir = $self->qc_path(); my $qc_log_dir = $qc_dir . q{/log}; my $tileviz_dir = $qc_dir . q{/tileviz}; - my $rna_seqc_dir = $qc_dir . q{/rna_seqc}; ############# # check existence of archive directory @@ -56,21 +55,6 @@ sub create_dir { } } - ############# - # check existence of rna_seqc directory - # create if it doesn't - - if ( ! -d $rna_seqc_dir) { - my $mk_rna_seqc_dir_cmd = qq{mkdir -p $rna_seqc_dir}; - $self->debug($mk_rna_seqc_dir_cmd); - my $return = qx{$mk_rna_seqc_dir_cmd}; - if ( $CHILD_ERROR ) { - $self->logcroak($tileviz_dir, - qq{ does not exist and unable to create: $CHILD_ERROR }, - $return); - } - } - ############# # check existence of multiplex lane and qc directory # create if they doesn't @@ -125,11 +109,6 @@ sub create_dir { $self->warn("could not chgrp $tileviz_dir\n\t$rc"); # not fatal } - $self->info("chgrp $owning_group $rna_seqc_dir"); - $rc = `chgrp $owning_group $rna_seqc_dir`; - if ( $CHILD_ERROR ) { - $self->warn("could not chgrp $rna_seqc_dir\n\t$rc"); # not fatal - } ############ # ensure that the owning group is what we expect @@ -167,12 +146,6 @@ sub create_dir { $self->warn("could not chmod $tileviz_dir\n\t$rc"); # not fatal } - $self->info("chmod u=rwx,g=srxw,o=rx $rna_seqc_dir"); - $rc = `chmod u=rwx,g=srxw,o=rx $rna_seqc_dir`; - if ( $CHILD_ERROR ) { - $self->warn("could not chmod $rna_seqc_dir\n\t$rc"); # not fatal - } - $self->info("chmod u=rwx,g=srxw,o=rx $archive_log_dir"); $rc = `chmod u=rwx,g=srxw,o=rx $archive_log_dir`; if ( $CHILD_ERROR ) { diff --git a/t/10-pluggable_harold_central.t b/t/10-pluggable_harold_central.t index dd31b65ba..2bf83154a 100644 --- a/t/10-pluggable_harold_central.t +++ b/t/10-pluggable_harold_central.t @@ -75,7 +75,6 @@ my $runfolder_path = $util->analysis_runfolder_path(); qc_genotype qc_verify_bam_id qc_upstream_tags - qc_rna_seqc run_analysis_complete update_ml_warehouse archive_to_irods_samplesheet diff --git a/t/20-archive_file_generation-seq_alignment.t b/t/20-archive_file_generation-seq_alignment.t index 9ba17d73d..531c001ab 100644 --- a/t/20-archive_file_generation-seq_alignment.t +++ b/t/20-archive_file_generation-seq_alignment.t @@ -105,12 +105,14 @@ subtest 'test 1' => sub { my $qc_in = $dir . q[/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/lane4]; my $qc_out = join q[/], $qc_in, q[qc]; + my $qc_report_dir = $dir . q[/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/qc/rna_seqc/12597_4/12597_4#3]; my $args = {}; $args->{'40003'} = qq{bash -c '\ mkdir -p $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#3 ; cd $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#3 && vtfp.pl -param_vals $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/lane4/12597_4#3_p4s2_pv_in.json -export_param_vals 12597_4#3_p4s2_pv_out_\${LSB_JOBID}.json -keys cfgdatadir -vals \$(dirname \$(readlink -f \$(which vtfp.pl)))/../data/vtlib/ -keys aligner_numthreads -vals `npg_pipeline_job_env_to_threads` -keys br_numthreads_val -vals `npg_pipeline_job_env_to_threads --exclude 1 --divide 2` -keys b2c_mt_val -vals `npg_pipeline_job_env_to_threads --exclude 2 --divide 2` -prune_nodes '"'"'fop.*samtools_stats_F0.*00_bait.*'"'"' \$(dirname \$(dirname \$(readlink -f \$(which vtfp.pl))))/data/vtlib/alignment_wtsi_stage2_template.json > run_12597_4#3.json && viv.pl -s -x -v 3 -o viv_12597_4#3.log run_12597_4#3.json } . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --tag_index 3} . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --subset phix --tag_index 3} . q{ && qc --check alignment_filter_metrics --id_run 12597 --position 4 --qc_in $PWD --qc_out } .$qc_out.q{ --tag_index 3}. + qq{ && qc --check rna_seqc --id_run 12597 --position 4 --qc_in $qc_in --qc_out } . $qc_out . q{ --tag_index 3}. q{ '}; $args->{'40000'} = qq{bash -c '\ mkdir -p $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 ; cd $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 && vtfp.pl -param_vals $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/lane4/12597_4#0_p4s2_pv_in.json -export_param_vals 12597_4#0_p4s2_pv_out_\${LSB_JOBID}.json -keys cfgdatadir -vals \$(dirname \$(readlink -f \$(which vtfp.pl)))/../data/vtlib/ -keys aligner_numthreads -vals `npg_pipeline_job_env_to_threads` -keys br_numthreads_val -vals `npg_pipeline_job_env_to_threads --exclude 1 --divide 2` -keys b2c_mt_val -vals `npg_pipeline_job_env_to_threads --exclude 2 --divide 2` -prune_nodes '"'"'fop.*samtools_stats_F0.*00_bait.*'"'"' \$(dirname \$(dirname \$(readlink -f \$(which vtfp.pl))))/data/vtlib/alignment_wtsi_stage2_template.json > run_12597_4#0.json && viv.pl -s -x -v 3 -o viv_12597_4#0.log run_12597_4#0.json } . @@ -145,6 +147,7 @@ subtest 'test 1' => sub { qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --tag_index 3} . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --subset phix --tag_index 3} . q{ && qc --check alignment_filter_metrics --id_run 12597 --position 4 --qc_in $PWD --qc_out } .$qc_out.q{ --tag_index 3}. + qq{ && qc --check rna_seqc --id_run 12597 --position 4 --qc_in $qc_in --qc_out } . $qc_out . q{ --tag_index 3}. qq( '","40000":"bash -c ' mkdir -p $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 ; cd $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 && vtfp.pl -param_vals $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/lane4/12597_4#0_p4s2_pv_in.json -export_param_vals 12597_4#0_p4s2_pv_out_\${LSB_JOBID}.json -keys cfgdatadir -vals \$(dirname \$(readlink -f \$(which vtfp.pl)))/../data/vtlib/ -keys aligner_numthreads -vals `npg_pipeline_job_env_to_threads` -keys br_numthreads_val -vals `npg_pipeline_job_env_to_threads --exclude 1 --divide 2` -keys b2c_mt_val -vals `npg_pipeline_job_env_to_threads --exclude 2 --divide 2` -prune_nodes '\\"'\\"'fop.*samtools_stats_F0.*00_bait.*'\\"'\\"' \$(dirname \$(dirname \$(readlink -f \$(which vtfp.pl))))/data/vtlib/alignment_wtsi_stage2_template.json > run_12597_4#0.json && viv.pl -s -x -v 3 -o viv_12597_4#0.log run_12597_4#0.json ) . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --tag_index 0} . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --subset phix --tag_index 0} . @@ -268,11 +271,13 @@ subtest 'test 2' => sub { my $qc_in = qq{$dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive}; my $qc_out = join q[/], $qc_in, q[qc]; + my $qc_report_dir = qq[$dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive/qc/rna_seqc/13066_8]; my $args = {}; $args->{8} = qq{bash -c ' mkdir -p $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive/tmp_\$LSB_JOBID/13066_8 ; cd $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive/tmp_\$LSB_JOBID/13066_8 && vtfp.pl -param_vals $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/13066_8_p4s2_pv_in.json -export_param_vals 13066_8_p4s2_pv_out_\${LSB_JOBID}.json -keys cfgdatadir -vals \$(dirname \$(readlink -f \$(which vtfp.pl)))/../data/vtlib/ -keys aligner_numthreads -vals `npg_pipeline_job_env_to_threads` -keys br_numthreads_val -vals `npg_pipeline_job_env_to_threads --exclude 1 --divide 2` -keys b2c_mt_val -vals `npg_pipeline_job_env_to_threads --exclude 2 --divide 2` -prune_nodes '"'"'fop.*samtools_stats_F0.*00_bait.*'"'"' \$(dirname \$(dirname \$(readlink -f \$(which vtfp.pl))))/data/vtlib/alignment_wtsi_stage2_template.json > run_13066_8.json && viv.pl -s -x -v 3 -o viv_13066_8.log run_13066_8.json } . qq{ && qc --check bam_flagstats --id_run 13066 --position 8 --qc_in $qc_in --qc_out $qc_out} . qq{ && qc --check bam_flagstats --id_run 13066 --position 8 --qc_in $qc_in --qc_out $qc_out --subset phix} . - q{ && qc --check alignment_filter_metrics --id_run 13066 --position 8 --qc_in $PWD --qc_out } . $qc_out . q{ '}; + q{ && qc --check alignment_filter_metrics --id_run 13066 --position 8 --qc_in $PWD --qc_out } . $qc_out . + qq{ && qc --check rna_seqc --id_run 13066 --position 8 --qc_in $qc_in --qc_out } . $qc_out . q{ '}; lives_ok {$rna_gen->_generate_command_arguments([8])} 'no error generating command arguments';