From aea45efc30a24289e205802fb304bd2c017fc56f Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Mon, 23 Jan 2017 16:43:25 +0000 Subject: [PATCH 01/27] update sequencescape warehouse loader job --- Changes | 3 +++ lib/npg_pipeline/pluggable/harold/post_qc_review.pm | 9 +++------ t/10-pluggable_harold_central.t | 5 ++--- t/10-pluggable_harold_post_qc_review.t | 6 +++--- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/Changes b/Changes index 46046cb94..8f7739a73 100644 --- a/Changes +++ b/Changes @@ -3,6 +3,9 @@ LIST OF CHANGES - replaces the original log role with the one from DNAP utilities, which provides a Log4perl logger and some convenience methods. + - new signature for the sequencescape warehouse loader so that it uses + samplsheet LIMs driver at the analysis stage and ml_warehouse_fc_cache + LIMs driver at the archival stage release 51.6 - test and code fixes to ensure problem-free tests under Perl 5.22.2 diff --git a/lib/npg_pipeline/pluggable/harold/post_qc_review.pm b/lib/npg_pipeline/pluggable/harold/post_qc_review.pm index 4b71d58c4..223315739 100644 --- a/lib/npg_pipeline/pluggable/harold/post_qc_review.pm +++ b/lib/npg_pipeline/pluggable/harold/post_qc_review.pm @@ -225,14 +225,11 @@ sub _update_warehouse_command { my $post_qc_complete = $option and (ref $option eq 'HASH') and $option->{'post_qc_complete'} ? 1 : 0; my $id_run = $self->id_run; - my $command = q[]; + my $command = qq{$loader_name --verbose --id_run $id_run}; if ($loader_name eq 'warehouse_loader') { - # Currently, we need pool library name and link to plexes in SeqQC. - # Therefore, we need to run live. - $command = join q[], map {q[unset ] . $_ . q[;]} npg_pipeline::cache->env_vars; + $command .= q{ --lims_driver_type }; + $command .= $post_qc_complete ? 'ml_warehouse_fc_cache' : 'samplesheet'; } - - $command .= qq{$loader_name --verbose --id_run $id_run}; my $job_name = join q{_}, $loader_name, $id_run, $self->pipeline_name; my $path = $self->make_log_dir($self->recalibrated_path()); my $prereq = q[]; diff --git a/t/10-pluggable_harold_central.t b/t/10-pluggable_harold_central.t index f20516c20..d145ce4d8 100644 --- a/t/10-pluggable_harold_central.t +++ b/t/10-pluggable_harold_central.t @@ -163,10 +163,9 @@ my $runfolder_path = $util->analysis_runfolder_path(); my $timestamp = $pb->timestamp; my $recalibrated_path = $pb->recalibrated_path(); my $log_dir = $pb->make_log_dir( $recalibrated_path ); - my $unset_string = 'unset NPG_WEBSERVICE_CACHE_DIR;unset NPG_CACHED_SAMPLESHEET_FILE;'; my $expected_command = q[bsub -q lowload 50 -J warehouse_loader_1234_central ] . - qq[-o $log_dir/warehouse_loader_1234_central_] . $timestamp . - qq[.out '${unset_string}warehouse_loader --verbose --id_run 1234']; + qq[-o $log_dir/warehouse_loader_1234_central_] . $timestamp . q[.out ] . + qq['warehouse_loader --verbose --id_run 1234 --lims_driver_type samplesheet']; is($pb->_update_warehouse_command('warehouse_loader', (50)), $expected_command, 'update warehouse command'); } diff --git a/t/10-pluggable_harold_post_qc_review.t b/t/10-pluggable_harold_post_qc_review.t index 40948cea7..055c636ba 100644 --- a/t/10-pluggable_harold_post_qc_review.t +++ b/t/10-pluggable_harold_post_qc_review.t @@ -34,7 +34,7 @@ use_ok('npg_pipeline::pluggable::harold::post_qc_review'); upload_auto_qc_to_qc_database run_run_archived run_qc_complete - update_warehouse + update_warehouse_post_qc_complete ); my @original = @functions_in_order; unshift @original, 'lsf_start'; @@ -64,10 +64,9 @@ use_ok('npg_pipeline::pluggable::harold::post_qc_review'); my $log_dir_in_outgoing = $log_dir; $log_dir_in_outgoing =~ s{/analysis/}{/outgoing/}smx; my $job_name = 'warehouse_loader_1234_post_qc_review'; - my $unset_string = 'unset NPG_WEBSERVICE_CACHE_DIR;unset NPG_CACHED_SAMPLESHEET_FILE;'; my $prefix = qq[bsub -q lowload 50 -J $job_name ] . qq[-o $log_dir/${job_name}_${timestamp}.out]; - my $command = qq['${unset_string}warehouse_loader --verbose --id_run 1234']; + my $command = q['warehouse_loader --verbose --id_run 1234 --lims_driver_type samplesheet']; is($post_qc_review->_update_warehouse_command('warehouse_loader', (50)), qq[$prefix $command], 'update warehouse command'); @@ -75,6 +74,7 @@ use_ok('npg_pipeline::pluggable::harold::post_qc_review'); $prefix = qq[bsub -q lowload 50 -J $job_name ] . qq[-o $log_dir_in_outgoing/${job_name}_${timestamp}.out]; my $preexec = qq(-E "[ -d '${log_dir_in_outgoing}' ]"); + $command = q['warehouse_loader --verbose --id_run 1234 --lims_driver_type ml_warehouse_fc_cache']; is($post_qc_review->_update_warehouse_command( 'warehouse_loader', (50, {}, {'post_qc_complete' => 1})), join(q[ ],$prefix,$preexec,$command), From 1d7c46f580351be2d6cce8e163abdc51501acdc2 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Wed, 26 Apr 2017 23:17:38 +0100 Subject: [PATCH 02/27] initial code deletion --- .../function_list_central_olb.yml | 36 -- data/config_files/illumina_pipeline.ini | 7 - data/config_files/pb_cal_pipeline.ini | 16 - lib/npg_pipeline/analysis/bustard4pbcb.pm | 260 --------- .../analysis/harold_calibration_bam.pm | 518 +----------------- lib/npg_pipeline/pluggable/harold/central.pm | 97 +--- .../roles/business/harold_calibration_reqs.pm | 229 -------- t/25-analysis-bustard4pbcb.t | 141 ----- t/25-harold_calibration_bam.t | 255 --------- 9 files changed, 21 insertions(+), 1538 deletions(-) delete mode 100644 data/config_files/function_list_central_olb.yml delete mode 100644 data/config_files/illumina_pipeline.ini delete mode 100644 data/config_files/pb_cal_pipeline.ini delete mode 100644 lib/npg_pipeline/analysis/bustard4pbcb.pm delete mode 100644 lib/npg_pipeline/roles/business/harold_calibration_reqs.pm delete mode 100644 t/25-analysis-bustard4pbcb.t diff --git a/data/config_files/function_list_central_olb.yml b/data/config_files/function_list_central_olb.yml deleted file mode 100644 index 6cf8a38e7..000000000 --- a/data/config_files/function_list_central_olb.yml +++ /dev/null @@ -1,36 +0,0 @@ ---- -- create_archive_directory -- create_empty_fastq -- create_summary_link_analysis -- run_analysis_in_progress -- lane_analysis_in_progress -- bustard_matrix_lanes -- bustard_matrix_all -- bustard_phasing_lanes -- bustard_phasing_all -- bustard_basecalls_lanes -- bustard_basecalls_all -- p4_stage1_analysis -- update_warehouse -- update_ml_warehouse -- run_secondary_analysis_in_progress -- bam2fastqcheck_and_cached_fastq -- qc_qX_yield -- qc_adapter -- qc_insert_size -- qc_sequence_error -- qc_gc_fraction -- qc_ref_match -- seq_alignment -- update_ml_warehouse -- bam_cluster_counter_check -- seqchksum_comparator -- qc_gc_bias -- qc_pulldown_metrics -- qc_genotype -- qc_verify_bam_id -- qc_upstream_tags -- run_analysis_complete -- update_ml_warehouse -- archive_to_irods_samplesheet -- run_qc_review_pending diff --git a/data/config_files/illumina_pipeline.ini b/data/config_files/illumina_pipeline.ini deleted file mode 100644 index ed7e53f24..000000000 --- a/data/config_files/illumina_pipeline.ini +++ /dev/null @@ -1,7 +0,0 @@ -olb=/software/solexa/src/OLB-1.9.4 -bustard_exe=bin/bustard.py -bcl_to_qseq=bin/setupBclToQseq.py -control_based_phasing=autoCONTROL_LANE -lane_based_phasing=lane -control_based_matrix=autoCONTROL_LANE -lane_based_matrix=lane diff --git a/data/config_files/pb_cal_pipeline.ini b/data/config_files/pb_cal_pipeline.ini deleted file mode 100644 index 3e1959eed..000000000 --- a/data/config_files/pb_cal_pipeline.ini +++ /dev/null @@ -1,16 +0,0 @@ -cal_table_script=pb_calibration -recalibration_script=pb_predictor -alignment_script=pb_align -default_directory_for_qseqs=rta -make_stats=makeStats.pl -second_basecall_script=pb_second_basecall -cal_table_suffix=_purity_cycle_caltable.txt -default_aligner=bwa -random=5 -t_filter=2 -mem_calibration=3072 -mem_score=1725 -region_size=200 -region_mismatch_threshold=0.016 -region_insertion_threshold=0.016 -region_deletion_threshold=0.016 diff --git a/lib/npg_pipeline/analysis/bustard4pbcb.pm b/lib/npg_pipeline/analysis/bustard4pbcb.pm deleted file mode 100644 index f4a889079..000000000 --- a/lib/npg_pipeline/analysis/bustard4pbcb.pm +++ /dev/null @@ -1,260 +0,0 @@ -package npg_pipeline::analysis::bustard4pbcb; - -use Moose; -use Moose::Util::TypeConstraints; -use Carp; -use Cwd; -use File::Spec::Functions; -use File::Slurp; -use Try::Tiny; -use Readonly; - -use npg_pipeline::lsf_job; -extends q{npg_pipeline::base}; - -our $VERSION = '0'; - -=head1 NAME - -npg_pipeline::analysis::bustard4pbcb - -=head1 SYNOPSIS - -=head1 DESCRIPTION - -OLB bustard preprocessing for the pbcal bam pipeline - -=head1 SUBROUTINES/METHODS - -=cut - -Readonly::Scalar our $MEM_REQ => 13_800; # total MB used by a make -Readonly::Scalar our $CPUS_NUM => q{8,16}; - -subtype 'NpgPipelinePluggableObject' - => as 'Object' - => where { ref =~ /^npg_pipeline::pluggable/smxi; }; - -has q{+id_run} => ( required => 1, ); - -has q{pipeline} => ( isa => q{NpgPipelinePluggableObject}, - is => q{ro}, - ); - -has q{bustard_home} => ( isa => q{Str}, - is => q{ro}, - required => 1, - ); - -has q{script_path} => ( isa => q{Str}, - is => q{ro}, - lazy_build => 1, - ); -sub _build_script_path { - my $self = shift; - return catfile($self->illumina_pipeline_conf()->{olb}, - $self->illumina_pipeline_conf()->{bustard_exe}); -} - -has q{bustard_dir} => ( isa => q{Str}, - is => q{ro}, - lazy_build => 1, - ); -sub _build_bustard_dir { - my $self = shift; - - $self->info(q[Running Bustard makefile creation]); - my $bustard_command = $self->_bustard_command(); - $self->info("Bustard command: $bustard_command"); - my $rc = system $bustard_command; - my @lines = (); - try { - @lines = read_file($self->_bustard_output_file()); - }; - if ($rc) { - my $error= "Bustard command '$bustard_command' failed with code $rc"; - if (@lines) { - $error .= q[ ] . join q[ ], @lines; - } - $self->logcroak($error); - } - if (!@lines) { - $self->logcroak(q[No bustard output in ] . $self->_bustard_output_file()); - } - return $self->_get_bustard_dir(@lines); -} - -has q{_bustard_output_file} => ( isa => q{Str}, - is => q{ro}, - lazy_build => 1, - ); -sub _build__bustard_output_file { - my $self = shift; - return catfile($self->bustard_home, q[bustard_output_] . $self->timestamp() . q[.txt]); -} - -sub _get_bustard_dir { - my ($self, @lines) = @_; - my $line = q[]; - ##no critic (RegularExpressions::ProhibitEscapedMetacharacters) - foreach (@lines) { - if (/^Sequence\ folder/ixms) { - $line = $_; - last; - } - } - ## use critic - if (!$line) { - $self->logcroak(q[No record about bustard directory (Sequence folder) in ] . $self->_bustard_output_file()); - } - (my $dir) = $line =~ /:\s+(\S+)$/smx; - return $dir; -} - -sub _bustard_command { - my ($self) = shift; - - my $timestamp = $self->timestamp(); - my ($time) = $timestamp =~ /-(\d+)$/smx; - my $bustard_out = catfile($self->bustard_home, "bustard_output_$timestamp.txt"); - my @command = (); - push @command, "LOGNAME=$time"; - push @command, $self->script_path; - if ( $self->has_override_all_bustard_options() ) { - push @command, $self->override_all_bustard_options(); - } else { - push @command, '--make --CIF --keep-dif-files --no-eamss --phasing=lane --matrix=lane'; - my $tile_list = $self->tile_list() || join q[,], map {"s_$_"} $self->positions(); - push @command, "--tiles=$tile_list"; - } - push @command, $self->bustard_home; - push @command, '> ' . $self->_bustard_output_file(); - push @command, '2>&1'; - return join q[ ], @command; -} - -sub _make_command { - my ($self, $step_name, $deps) = @_; - - my $position_string = ($step_name =~ /lanes$/smx) ? $self->lsb_jobindex() : q{}, - $deps ||= q{}; - (my $target) = $step_name =~ /(matrix|phasing)/smx; - if ($target) { - if ($position_string) { - $target .= "_$position_string"; - } - $target .= q{_finished.txt}; - } else { - $target = $position_string ? "s_$position_string" : 'all'; - } - - my $job_name = join q[_], 'bustard', $step_name, $self->id_run(), $self->timestamp(); - my $index = $position_string ? q{.%I} : q{}; - my $output_name = $job_name . $index . q{.%J.out}; - $output_name = catfile(q{log} , $output_name); - if ($position_string) { - $job_name .= '[' . join(q[,], $self->positions()) . ']'; - } - - my @command = (); - push @command, 'bsub'; - push @command, "-n $CPUS_NUM"; - push @command, '-q ' . $self->lsf_queue; - push @command, "-o $output_name"; - push @command, "-J $job_name"; - my $memory_spec = npg_pipeline::lsf_job->new(memory => $MEM_REQ)->memory_spec(); - push @command, $self->pipeline->fs_resource_string( { - resource_string => qq{$memory_spec -R 'span[hosts=1]'}, - ##no critic (BuiltinFunctions::ProhibitStringySplit) - counter_slots_per_job => (split q{,}, $CPUS_NUM)[0], - ##use critic - }); - if ($deps) { push @command, $deps; } - push @command, q['make -j `npg_pipeline_job_env_to_threads` ] . qq[$target']; - return join q[ ], @command; -} - -=head2 make - - Submits bustard 'make' jobs for post-run analysis as a single step. - - my @job_ids = $bObj->make($step_name, $required_job_completion); - -=cut - -sub make { - my ($self, $step_name, $required_job_completion) = @_; - if (!$self->pipeline) { - $self->logcroak('To submit a job, pipeline accessor should be set'); - } - my $working = getcwd(); - chdir $self->bustard_dir; - my $command = $self->_make_command($step_name, $required_job_completion); - $self->debug("Bustard make command: $command"); - my @ids = $self->pipeline->submit_bsub_command($command); - chdir $working; - return @ids; -} - -no Moose::Util::TypeConstraints; -no Moose; -__PACKAGE__->meta->make_immutable; - -1; - -__END__ - - -=head1 DIAGNOSTICS - -=head1 CONFIGURATION AND ENVIRONMENT - -=head1 DEPENDENCIES - -=over - -=item Carp - -=item Readonly - -=item Moose - -=item Moose::Util::TypeConstraints - -=item Cwd - -=item File::Spec::Functions - -=item File::Slurp - -=item Try::Tiny - -=item npg_pipeline::base - -=back - -=head1 INCOMPATIBILITIES - -=head1 BUGS AND LIMITATIONS - -=head1 AUTHOR - -Andy Brown -Marina Gourtovaia - -=head1 LICENSE AND COPYRIGHT - -Copyright (C) 2014 Genome Research Ltd - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . diff --git a/lib/npg_pipeline/analysis/harold_calibration_bam.pm b/lib/npg_pipeline/analysis/harold_calibration_bam.pm index e748267d0..3d00ec5e9 100644 --- a/lib/npg_pipeline/analysis/harold_calibration_bam.pm +++ b/lib/npg_pipeline/analysis/harold_calibration_bam.pm @@ -1,30 +1,18 @@ package npg_pipeline::analysis::harold_calibration_bam; use Moose; -use Carp; -use English qw{-no_match_vars}; -use Cwd; use Readonly; -use List::MoreUtils qw{any}; -use File::Spec; -use File::Basename; - use npg_tracking::util::types; use npg_pipeline::lsf_job; +extends 'npg_pipeline::base'; +with 'npg_common::roles::software_location'; + our $VERSION = '0'; -Readonly::Scalar our $PB_ALIGN_BAM_PREFIX => q{pb_align_}; Readonly::Scalar our $MAKE_STATS_J => 4; Readonly::Scalar our $MAKE_STATS_MEM => 350; - -extends q{npg_pipeline::base}; -with qw{ - npg_common::roles::software_location - npg_pipeline::roles::business::harold_calibration_reqs -}; - =head1 NAME npg_pipeline::analysis::harold_calibration_bam @@ -35,44 +23,23 @@ npg_pipeline::analysis::harold_calibration_bam =head1 DESCRIPTION -Object runner to launch internal calibration instead of CASAVA based calibration - =head1 SUBROUTINES/METHODS -=head2 spatial_filter_path - -Absolute path to spatial_filter executable - -=cut - -has 'spatial_filter_path' => ( - is => 'ro', - isa => 'NpgCommonResolvedPathExecutable', - coerce => 1, - default => 'spatial_filter', - ); - -=head2 pb_calibration_bin +=head2 bcl2qseq -Directory where pb bcalibration family executables are +Absolute path to executable that generates Illumina basecall stats =cut -has 'pb_calibration_bin' => ( - isa => 'NpgTrackingDirectory', - is => 'ro', - lazy => 1, - builder => '_build_pb_calibration_bin', - ); -sub _build_pb_calibration_bin { - my $self = shift; - return dirname($self->spatial_filter_path()); -} +has 'bcl2qseq' => ( isa => 'NpgTrackingExcecutable', + is => 'ro', + default => 'setupBclToQseq.py', +); -sub _generate_illumina_basecall_stats_command { +sub _generate_command { my ( $self, $arg_refs ) = @_; - my $job_dependencies = $arg_refs->{required_job_completion}; + my $job_dependencies = $arg_refs->{'required_job_completion'}; my $basecall_dir = $self->basecall_path(); my $dir = $self->bam_basecall_path(); @@ -91,7 +58,7 @@ sub _generate_illumina_basecall_stats_command { my $hosts = 1; my $memory_spec = join q[], npg_pipeline::lsf_job->new(memory => $MAKE_STATS_MEM)->memory_spec(), " -R 'span[hosts=$hosts]'"; push @command, $self->fs_resource_string( { - resource_string => $memory_spec, + resource_string => $memory_spec, counter_slots_per_job => $MAKE_STATS_J, } ); push @command, q{-n } . $MAKE_STATS_J; @@ -99,8 +66,7 @@ sub _generate_illumina_basecall_stats_command { push @command, q["]; # " enclose command in quotes - my $bcl2qseq_path = join q[/], $self->illumina_pipeline_conf()->{olb}, $self->illumina_pipeline_conf()->{bcl_to_qseq}; - + my $bcl2qseq_path = $self->bcl2qseq; my $cmd = join q[ && ], qq{cd $dir}, q{if [[ -f Makefile ]]; then echo Makefile already present 1>&2; else echo creating bcl2qseq Makefile 1>&2; }. @@ -117,452 +83,14 @@ sub _generate_illumina_basecall_stats_command { =head2 generate_illumina_basecall_stats -Use Illumina tools to generate the (per run) BustardSummary and IVC reports (from on instrument RTA basecalling). +Use Illumina tools to generate the (per run) BustardSummary +and IVC reports (from on instrument RTA basecalling). =cut sub generate_illumina_basecall_stats{ my ( $self, $arg_refs ) = @_; - my @id_runs = $self->submit_bsub_command( $self->_generate_illumina_basecall_stats_command($arg_refs) ); - return @id_runs; -} - -=head2 generate_alignment_files - -submit the jobs which will generate bam alignment files ready to pass onto calibration table generator - - my $aJobIds = $oHaroldCalibration->generate_alignment_files({ - required_job_completion => $sJobRequirenmentString, - }); - -=cut - -sub generate_alignment_files { - my ( $self, $arg_refs ) = @_; - - my $job_ids = []; - my $job_dependencies = $arg_refs->{'required_job_completion'}; - - # create the calibration directory - my $pb_cal_dir = $self->create_pb_calibration_directory(); - - $self->_set_recalibrated_path( $self->pb_cal_path() ); - - foreach my $position ( $self->positions ) { - if ( ! $self->is_spiked_lane( $position ) ){ - $self->warn("Lane $position is not spiked with phiX, no PB_cal alignment job needed"); - next; - } - $self->_generate_alignment_file_per_lane({ - position => $position, - job_ids => $job_ids, - job_dependencies => $job_dependencies - }); - } - - return @{ $job_ids }; -} - -=head2 generate_calibration_table - -submit the bsub jobs which will create the calibration tables, returning an array of job_ids. - - my $aJobIds = $oHaroldCalibration->generate_calibration_table({ - required_job_completion => $sJobRequirenmentString, - }); - -=cut - -sub generate_calibration_table { - my ($self, $arg_refs) = @_; - - if ( !$self->recalibration() ) { - $self->warn(q{This has been set to run with no recalibration step}); - return (); - } - - my $job_ids = []; - my $job_dependencies = $arg_refs->{'required_job_completion'}; - - # create the calibration directory - my $pb_cal_dir = $self->create_pb_calibration_directory(); - - $self->_set_recalibrated_path( $self->pb_cal_path() ); - - my $snp_file = $self->control_snp_file(); - - foreach my $position ( $self->positions ) { - if ( ! $self->is_spiked_lane( $position ) ){ - $self->warn("Lane $position is not spiked with phiX, no PB_cal calibration table job needed"); - next; - } - $self->_generate_calibration_table_per_lane( { - position => $position, - job_ids => $job_ids, - job_dependencies => $job_dependencies, - snp_file => $snp_file, - } ); - } - - return @{ $job_ids }; -} - -=head2 generate_recalibrated_bam - -submit the bsub jobs which will recalibrate the lanes, returning an array of job_ids. - - my $aJobIds = $oHaroldCalibration->generate_recalibrated_bam({ - required_job_completion => $sJobRequirenmentString, - }); - -=cut - -sub generate_recalibrated_bam { - my ($self, $arg_refs) = @_; - - $self->_bam_merger_cmd(); - - my $pb_cal_dir = $self->pb_cal_path(); - - if ( ! $self->directory_exists($pb_cal_dir) ) { - $self->warn(qq{$pb_cal_dir does not exist, not executing jobs}); - return (); - } - - my $job_ids = []; - my $job_dependencies = $arg_refs->{'required_job_completion'}; - - foreach my $position ( $self->positions ) { - my $arg_ref_hash = { - job_ids => $job_ids, - position => $position, - job_dependencies => $job_dependencies, - }; - $self->_generate_recalibrated_bam_per_lane( $arg_ref_hash ); - } - - return @{ $job_ids }; -} - -########## -# private methods - -sub _generate_recalibrated_bam_per_lane { - my ( $self, $arg_refs ) = @_; - - my $lane = $arg_refs->{'position'}; - - my $cal_table_1_to_use = $self->calibration_table_name( { - id_run => $self->id_run(), - position => $lane, - } ); - - - my $args_bam= { - position => $lane, - job_dependencies => $arg_refs->{'job_dependencies'}, - ct => $cal_table_1_to_use, - }; - - my $bsub_command = $self->_recalibration_bsub_command( $args_bam ); - $self->debug($bsub_command); - - push @{ $arg_refs->{'job_ids'} }, $self->submit_bsub_command( $bsub_command ); - - return; -} - -sub _generate_calibration_table_per_lane { - my ( $self, $arg_refs ) = @_; - - my $args = { - position => $arg_refs->{'position'}, - job_dependencies => $arg_refs->{'job_dependencies'}, - is_spiked_phix => 1, - snp_file => $arg_refs->{'snp_file'}, - }; - - my $bsub_command = $self->_calibration_table_bsub_command( $args ); - - $self->debug($bsub_command); - - push @{ $arg_refs->{'job_ids'} }, $self->submit_bsub_command($bsub_command); - - return; -} - -# generate the alignment file -sub _generate_alignment_file_per_lane { - my ( $self, $arg_refs ) = @_; - - my $bsub_command = $self->_alignment_file_bsub_command( { - position => $arg_refs->{'position'}, - job_dependencies => $arg_refs->{'job_dependencies'}, - ref_seq => $self->control_ref(), - is_paired => $self->is_paired_read(), - is_spiked_phix => 1, - } ); - - $self->debug($bsub_command); - - push @{ $arg_refs->{'job_ids'} }, $self->submit_bsub_command($bsub_command); - - return; -} - -# generate bsub command for generating the alignment files required -sub _alignment_file_bsub_command { - my ( $self, $arg_refs ) = @_; - - my $position = $arg_refs->{'position'}; - my $job_dependencies = $arg_refs->{'job_dependencies'}; - my $ref_seq = $arg_refs->{'ref_seq'}; - my $is_paired = $arg_refs->{'is_paired'}; - my $is_spiked_phix = $arg_refs->{'is_spiked_phix'}; - - my $mem_size = $self->general_values_conf()->{bam_creation_memory}; - my $timestamp = $self->timestamp(); - my $bsub_queue = $self->lsf_queue; - my $id_run = $self->id_run(); - - my $job_name = $self->is_paired_read() ? $self->align_job() . q{_} . $id_run . q{_} . $position . q{_paired_} . $timestamp - : $self->align_job() . q{_} . $id_run . q{_} . $position . q{_} . $timestamp - ; - - my @command; - push @command, q{cd}, $self->pb_cal_path(), q{&&}; - push @command, $self->pb_calibration_bin() . q{/} . $self->alignment_script(); - push @command, q{--aln_parms "-t "`npg_pipeline_job_env_to_threads` }; - push @command, q{--sam_parms "-t "`npg_pipeline_job_env_to_threads --maximum 8` }; - if ($self->spatial_filter) { - push @command, q{--spatial_filter}; - push @command, q{--sf_parms "} . q{--region_size } . $self->pb_cal_pipeline_conf()->{region_size} . q{ } - . q{--region_mismatch_threshold } . $self->pb_cal_pipeline_conf()->{region_mismatch_threshold} . q{ } - . q{--region_insertion_threshold } . $self->pb_cal_pipeline_conf()->{region_insertion_threshold} . q{ } - . q{--region_deletion_threshold } . $self->pb_cal_pipeline_conf()->{region_deletion_threshold} . q{ } - . q{--tileviz } . $self->qc_path . q{/} . q(tileviz) . q{/} .$id_run. q{_} . $position . q{ } - . q{"}; - push @command, q{--bam_join_jar } . $self->_bam_merger_jar; - }; - push @command, q{--ref } . $ref_seq; - if( $is_paired ) { - push @command, q{--read1 1}; - push @command, q{--read2 2}; - } else { - push @command, q{--read 0}; - } - push @command, q{--bam }.$self->bam_basecall_path().q{/}.$id_run.q{_}.$position.q{.bam}; - push @command, q{--prefix } . $PB_ALIGN_BAM_PREFIX . $id_run.q{_}.$position; - push @command, q{--pf_filter}; - - my $job_command = join q[ ], @command; - $job_command=~s/'/'"'"'/smxg; - - @command = (); - push @command, 'bsub'; - push @command, "-q $bsub_queue"; - push @command, $self->ref_adapter_pre_exec_string(); - push @command, q{-o }.$self->pb_cal_path().q{/log/}. $job_name . q{.%J.out}; - push @command, "-J $job_name"; - - my $hosts = 1; - my $memory_spec = join q[], npg_pipeline::lsf_job->new(memory => $mem_size)->memory_spec(), " -R 'span[hosts=$hosts]'"; - push @command, $self->fs_resource_string( { - resource_string => $memory_spec, - counter_slots_per_job => $self->general_values_conf()->{io_resource_slots}, - } ); - push @command, q{-n } . $self->general_values_conf()->{bwa_aln_threads}; - push @command, $job_dependencies || q[]; - push @command, "'$job_command'"; # " enclose command in quotes - - return join q[ ], @command; -} - - - -# generate bsub command for recalibrating the lane qseq data -sub _recalibration_bsub_command { - my ($self, $arg_refs) = @_; - my $position = $arg_refs->{'position'}; - my $job_dependencies = $arg_refs->{'job_dependencies'}; - my $id_run = $self->id_run(); - - my $output_bam = $id_run . q{_} . $position . q{.bam}; - my $output_bam_md5 = $output_bam . q{.md5}; - my $input_bam = q{../} . $output_bam; - my $input_bam_md5 = $input_bam . q{.md5}; - my $phix_bam = $PB_ALIGN_BAM_PREFIX . $id_run . q{_} .$position . q{.bam}; - - #pb_calibration_cmd - my @command_pb_cal; - push @command_pb_cal, $self->pb_calibration_bin() . q{/} . $self->recalibration_script(); - push @command_pb_cal, q{--u}; - push @command_pb_cal, q{--bam } . $input_bam; - if ($self->dif_files_path()) { - push @command_pb_cal, q{--intensity_dir } . $self->dif_files_path(); # for dif file location, it should be bustard_dir if OLB - } - - my $cycle_start1 = 1; - my $alims = $self->lims->associated_child_lims_ia; - #if read 1 has an inline index reset cycle_start1 to the first cycle after the index - if ($alims->{$position}->inline_index_exists && $alims->{$position}->inline_index_read == 1) { - $cycle_start1 += $alims->{$position}->inline_index_end; - } - if( !$self->is_paired_read() ){ - push @command_pb_cal, qq{--cstart $cycle_start1}; - }else{ - push @command_pb_cal, qq{--cstart1 $cycle_start1}; - my @r2r = $self->read2_cycle_range(); - my $cycle_start2 = $r2r[0]; - #if read 2 has an inline index reset cycle_start2 to the first cycle after the index - if ($alims->{$position}->inline_index_exists && $alims->{$position}->inline_index_read == 2) { - $cycle_start2 += $alims->{$position}->inline_index_end; - } - push @command_pb_cal, qq{--cstart2 $cycle_start2}; - } - - my $cl_table1 = $arg_refs->{ct}; - push @command_pb_cal, qq{--ct $cl_table1}; - - my $pb_calibration_cmd = join q[ ], @command_pb_cal; - #finish pb_calibration_cmd; - - #bam merge command - my $bam_merge_cmd = q{ } . $self->_bam_merger_cmd() . qq{ O=$output_bam ALIGNED=$phix_bam}; - - #bjob now - my $mem_size = $self->mem_score(); - my $timestamp = $self->timestamp(); - my $bsub_queue = $self->lsf_queue; - my $job_name = $self->score_job() . q{_} . $id_run . q{_} . $position . q{_} . $timestamp; - - my @command; - push @command, 'bsub'; - push @command, "-q $bsub_queue"; - push @command, q{-o }.$self->pb_cal_path().q{/log/}. $job_name . q{.%J.out}; - push @command, "-J $job_name"; - - my $hosts = 1; - my $memory_spec = join q[], npg_pipeline::lsf_job->new(memory => $mem_size)->memory_spec(), " -R 'span[hosts=$hosts]'"; - push @command, $self->fs_resource_string( { - resource_string => $memory_spec, - counter_slots_per_job => 2 * $self->general_values_conf()->{io_resource_slots}, - } ); - push @command, $job_dependencies || q[]; - - push @command, q[']; # ' enclose command in quotes - push @command, q{cd}, $self->pb_cal_path(), q{&&}; - - my $check_cl_table = qq{-f $cl_table1}; - - my $check_cmd = qq{if [[ -f $phix_bam ]]; then echo phix alignment so merging alignments with 1>&2; set -o pipefail; (if [ $check_cl_table ]; then echo recalibrated qvals 1>&2; $pb_calibration_cmd ; else echo no recalibration 1>&2; cat $input_bam ; fi;) | }; - if ($self->spatial_filter) { - $check_cmd .= qq{ ( if [[ -f ${phix_bam}.filter ]]; then echo applying spatial filter 1>&2; } . $self->pb_calibration_bin() . q{/} . - qq{spatial_filter -u -a -f -F ${phix_bam}.filter - } . - q{2> >( tee /dev/stderr | } . qq{qc --check spatial_filter --id_run $id_run --position $position --qc_out } . $self->qc_path . q{ );} . - q{ else echo no spatial filter 1>&2; cat; fi;) | }; - } - $check_cmd .= qq{$bam_merge_cmd; else echo symlinking as no phix alignment 1>&2; rm -f $output_bam; ln -s $input_bam $output_bam; rm -f $output_bam_md5; ln -s $input_bam_md5 $output_bam_md5; fi}; - $check_cmd =~ s/'/'"'"'/smxg; # cope with any single ' quote in the command when submitting command within single ' quote in bash -c argument - null op here? - $check_cmd = "bash -c '$check_cmd'"; # >( ...) is a bash'ish - - $check_cmd =~ s/'/'"'"'/smxg; # cope with any single ' quote in the command when submitting command within single ' quote in bsub command line argument - push @command,$check_cmd; - - push @command, q[']; # ' closing quote - - my $bsub_command = join q[ ], @command; - - return $bsub_command; -} - -# generate bsub command for generating the calibration table required -sub _calibration_table_bsub_command { - my ($self, $arg_refs) = @_; - my $position = $arg_refs->{'position'}; - my $job_dependencies = $arg_refs->{'job_dependencies'}; - - my $mem_size = $self->mem_calibration(); - my $timestamp = $self->timestamp(); - my $bsub_queue = $self->lsf_queue; - my $id_run = $self->id_run(); - - my $job_name = $self->cal_table_job() . q{_} . $id_run . q{_} . $position . q{_} . $timestamp ; - - my @command; - push @command, 'bsub'; - push @command, "-q $bsub_queue"; - push @command, $self->ref_adapter_pre_exec_string(); - push @command, q{-o }.$self->pb_cal_path().q{/log/}. $job_name . q{.%J.out}; - push @command, "-J $job_name"; - - my $hosts = 1; - my $memory_spec = join q[], npg_pipeline::lsf_job->new(memory => $mem_size)->memory_spec(), " -R 'span[hosts=$hosts]'"; - push @command, $self->fs_resource_string( { - resource_string => $memory_spec, - counter_slots_per_job => 2 * $self->general_values_conf()->{io_resource_slots}, - } ); - push @command, $job_dependencies || q[]; - - push @command, q["]; # " enclose command in quotes - push @command, q{cd}, $self->pb_cal_path(), q{&&}; - push @command, $self->pb_calibration_bin() . q{/} . $self->cal_table_script(); - push @command, q{--intensity_dir }. $self->dif_files_path(); # for dif file location, change to bustard if olb - push @command, q{--t_filter } . $self->t_filter(); - push @command, q{--prefix } . $id_run . q{_} . $position ; - - my $cycle_start1 = 1; - #if read 1 has an inline index reset cycle_start1 to the first cycle after the index - my $alims = $self->lims->associated_child_lims_ia; - if ($alims->{$position}->inline_index_exists && $alims->{$position}->inline_index_read == 1) { - $cycle_start1 += $alims->{$position}->inline_index_end; - } - if( !$self->is_paired_read() ){ - push @command, qq{--cstart $cycle_start1}; - }else{ - push @command, qq{--cstart1 $cycle_start1}; - my @r2r = $self->read2_cycle_range(); - my $cycle_start2 = $r2r[0]; - #if read 2 has an inline index reset cycle_start2 to the first cycle after the index - if ($alims->{$position}->inline_index_exists && $alims->{$position}->inline_index_read == 2) { - $cycle_start2 += $alims->{$position}->inline_index_end; - } - push @command, qq{--cstart2 $cycle_start2}; - } - - if ( $arg_refs->{is_spiked_phix} ) { - if (!$arg_refs->{snp_file}) { - $self->logcroak('SNP file not available'); - } - push @command, q{--snp } . $arg_refs->{snp_file}; - } - - push @command, qq{--bam ${PB_ALIGN_BAM_PREFIX}${id_run}_${position}.bam}; - - push @command, q["]; # " closing quote - - my $bsub_command = join q[ ], @command; - return $bsub_command; -} - -has q{_bam_merger_jar} => ( - isa => q{NpgCommonResolvedPathJarFile}, - is => q{ro}, - coerce => 1, - default => q{BamMerger.jar}, - ); - -has q{_bam_merger_cmd} => (isa => q{Str}, - is => q{ro}, - lazy_build => 1, - ); - -sub _build__bam_merger_cmd{ - my $self = shift; - - return $self->java_cmd . q{ -Xmx1024m} - . q{ -jar } . $self->_bam_merger_jar() - . q{ CREATE_MD5_FILE=true VALIDATION_STRINGENCY=SILENT KEEP=true I=/dev/stdin REPLACE_QUAL=true}; + return $self->submit_bsub_command($self->_generate_command($arg_refs)); } no Moose; @@ -583,18 +111,8 @@ __END__ =item Moose -=item Carp - -=item English -no_match_vars - =item Readonly -=item List::MoreUtils - -=item File::Basename - -=item File::Spec - =item npg_tracking::util::types =item npg_common::roles::software_location @@ -607,11 +125,11 @@ __END__ =head1 AUTHOR -Guoying Qi +Steven Leonard =head1 LICENSE AND COPYRIGHT -Copyright (C) 2015 Genome Research Ltd +Copyright (C) 2017 Genome Research Ltd This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/lib/npg_pipeline/pluggable/harold/central.pm b/lib/npg_pipeline/pluggable/harold/central.pm index 799100ac7..7190aaf23 100644 --- a/lib/npg_pipeline/pluggable/harold/central.pm +++ b/lib/npg_pipeline/pluggable/harold/central.pm @@ -3,7 +3,6 @@ package npg_pipeline::pluggable::harold::central; use Moose; use Carp; use English qw{-no_match_vars}; -use Readonly; use File::Spec; use List::MoreUtils qw/any/; @@ -26,25 +25,8 @@ Pluggable module runner for the main pipeline =cut -Readonly::Array our @OLB_FUNCTIONS => qw/ matrix_lanes matrix_all - phasing_lanes phasing_all - basecalls_lanes basecalls_all - /; =head1 SUBROUTINES/METHODS -=cut - -has '_pbcal_obj' => ( - isa => 'npg_pipeline::analysis::harold_calibration_bam', - is => 'ro', - lazy => 1, - builder => '_build_pbcal_obj', - ); -sub _build_pbcal_obj { - my $self = shift; - return $self->new_with_cloned_attributes(q{npg_pipeline::analysis::harold_calibration_bam}); -} - =head2 prepare Sets all paths needed during the lifetime of the analysis runfolder. @@ -58,7 +40,6 @@ override 'prepare' => sub { my $self = shift; $self->_set_paths(); super(); # Correct order! - $self->_inject_bustard_functions(); return; }; @@ -172,34 +153,6 @@ sub _set_bam_basecall_dependent_paths { return; } - -#### -# Dynamically creates functions to run OLB preprocessing. -# -sub _inject_bustard_functions { - my $self = shift; - - foreach my $function (@OLB_FUNCTIONS) { - ##no critic (TestingAndDebugging::ProhibitNoStrict TestingAndDebugging::ProhibitNoWarnings) - no strict 'refs'; - no warnings 'redefine'; - my $fpointer = 'bustard_' . $function; - if ($self->olb) { - *{$fpointer}= sub { my ($self, @args) = @_; - my $job_dep = shift @args; - return npg_pipeline::analysis::bustard4pbcb->new( - pipeline=>$self, - bustard_home=>$self->intensity_path, - bustard_dir=>$self->basecall_path, - id_run=>$self->id_run, - lanes=>$self->lanes)->make($function,$job_dep); }; - } else { - *{$fpointer}= sub { $self->info('OLB preprocessing switched off, not running ' . $function ); return (); } - } - } - return; -} - =head2 illumina_basecall_stats Use Illumina tools to generate the (per run) BustardSummary and IVC reports (from on instrument RTA basecalling). @@ -213,50 +166,8 @@ sub illumina_basecall_stats { $self->info(q{HiSeqX sequencing instrument, illumina_basecall_stats will not be run}); return (); } - return $self->_run_harold_steps( q{generate_illumina_basecall_stats}, @args); -} - -=head2 harold_alignment_files - -Generate the alignment files to now be used for generating calibration tables - -=cut - -sub harold_alignment_files { - my ($self, @args) = @_; - return $self->_run_harold_steps( q{generate_alignment_files}, @args); -} - -=head2 harold_calibration_tables - -Generate the calibration tables used for harold recalibration - -=cut - -sub harold_calibration_tables { - my ($self, @args) = @_; - if ( !$self->recalibration() ) { - $self->info(q{recalibration is false, no recalibration will be performed}); - return (); - } - return $self->_run_harold_steps( q{generate_calibration_table}, @args); -} - -=head2 harold_recalibration - -submit the recalibration jobs - -=cut - -sub harold_recalibration { - my ($self, @args) = @_; - return $self->_run_harold_steps( q{generate_recalibrated_bam}, @args); -} - -sub _run_harold_steps { - my ($self, $method, @args) = @_; - my $required_job_completion = shift @args; - return $self->_pbcal_obj->$method({required_job_completion => $required_job_completion,}); + return $self->new_with_cloned_attributes(q{npg_pipeline::analysis::harold_calibration_bam}) + ->generate_illumina_basecall_stats(@args); } =head2 split_bam_by_tag @@ -395,8 +306,6 @@ __END__ =item English -no_match_vars -=item Readonly - =item File::Spec =item List::MoreUtils @@ -413,7 +322,7 @@ Guoying Qi =head1 LICENSE AND COPYRIGHT -Copyright (C) 2014 Genome Research Limited +Copyright (C) 2017 Genome Research Limited This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/lib/npg_pipeline/roles/business/harold_calibration_reqs.pm b/lib/npg_pipeline/roles/business/harold_calibration_reqs.pm deleted file mode 100644 index 551a4317c..000000000 --- a/lib/npg_pipeline/roles/business/harold_calibration_reqs.pm +++ /dev/null @@ -1,229 +0,0 @@ -package npg_pipeline::roles::business::harold_calibration_reqs; - -use Moose::Role; -use English qw{-no_match_vars}; -use Carp; -use Readonly; - -requires qw{directory_exists}; - -our $VERSION = '0'; - -# hard-coded default parameters for running harold_calibration steps, can be overriden on the command line -# whilst these can be overridden, they are requested to be how the pipeline operates, so we don't -# want them in a config file -Readonly::Scalar our $CAL_TABLE_JOB => q{PB_cal_table}; -Readonly::Scalar our $SCORE_JOB => q{PB_cal_score}; -Readonly::Scalar our $ALIGN_JOB => q{PB_cal_align}; -Readonly::Scalar our $PB_DIRECTORY => q{PB_cal}; - -=head1 NAME - -npg_pipeline::roles::business::harold_calibration_reqs - -=head1 SYNOPSIS - - package MyPackage; - use Moose; - ... - with qw{npg_pipeline::roles::business::harold_calibration_reqs}; - -=head1 DESCRIPTION - -This role is designed to be able to apply all the harold calibration variables, including lsf requirements, -for internal running of the harold calibration steps. - -Note, your class must provide the following methods - - 'directory_exists' - -=head1 SUBROUTINES/METHODS -=cut - -has q{random} => (isa => q{Int}, is => q{ro}, lazy_build => 1, - documentation => q{Default from pb_cal_pipeline.ini},); - -sub _build_random { - my ( $self ) = @_; - return $self->pb_cal_pipeline_conf()->{random}; -} - -has q{t_filter} => (isa => q{Int}, is => q{ro}, lazy_build => 1, - documentation => q{t_filter value},); - -sub _build_t_filter { - my ( $self ) = @_; - return $self->pb_cal_pipeline_conf()->{t_filter}; -} - -has q{mem_calibration} => (isa => q{Int}, is => q{ro}, lazy_build => 1, - documentation => q{memory to be used for calibration table creation jobs},); - -sub _build_mem_calibration { - my ( $self ) = @_; - return $self->pb_cal_pipeline_conf()->{mem_calibration}; -} - -has q{mem_score} => (isa => q{Int}, is => q{ro}, lazy_build => 1, - documentation => q{memory to be used for scoring jobs},); - -sub _build_mem_score { - my ( $self ) = @_; - return $self->pb_cal_pipeline_conf()->{mem_score}; -} - -has q{cal_table_job} => (isa => q{Str}, is => q{ro}, lazy_build => 1, - documentation => qq{Default : $CAL_TABLE_JOB},); - -sub _build_cal_table_job { return $CAL_TABLE_JOB; } - -has q{cal_table_script} => (isa => q{Str}, is => q{ro}, lazy_build => 1, - documentation => q{Default from pb_cal_pipeline.ini},); - -sub _build_cal_table_script { - my ( $self ) = @_; - return $self->pb_cal_pipeline_conf()->{cal_table_script}; -} - -has q{align_job} => (isa => q{Str}, is => q{ro}, lazy_build => 1, - documentation => qq{Default : $ALIGN_JOB},); - -sub _build_align_job { return $ALIGN_JOB; } - -has q{alignment_script} => (isa => q{Str}, is => q{ro}, lazy_build => 1, - documentation => q{Default from pb_cal_pipeline.ini},); - -sub _build_alignment_script { - my ( $self ) = @_; - return $self->pb_cal_pipeline_conf()->{alignment_script}; -} - -has q{recalibration_script} => (isa => q{Str}, is => q{ro}, lazy_build => 1, - documentation => q{Default from pb_cal_pipeline.ini},); - -sub _build_recalibration_script { - my ( $self ) = @_; - return $self->pb_cal_pipeline_conf()->{recalibration_script}; -} - -has q{pb_directory} => (isa => q{Str}, is => q{ro}, lazy_build => 1, init_arg => undef); - -sub _build_pb_directory { return $PB_DIRECTORY; } - -has q{score_job} => (isa => q{Str}, is => q{ro}, lazy_build => 1, - documentation => qq{Default : $SCORE_JOB},); - -sub _build_score_job { return $SCORE_JOB; } - -has q{region_size} => (isa => q{Int}, is => q{ro}, lazy_build => 1, - documentation => q{Default in pb_cal_pipeline.ini},); - -sub _build_region_size { - my ( $self ) = @_; - return $self->pb_cal_pipeline_conf()->{region_size}; -} - -=head2 calibration_table_name - -generates the calibration table name expected, requiring the id_run and read to be passes in -if no control lane can be worked out, will return an empty string - - my $sCalibrationTableName = $class->calibration_table_name( $iIdRun, $iRead ); - -=cut - -sub calibration_table_name { - my ($self, $arg_refs ) = @_; - my $id_run = $arg_refs->{id_run}; - if( $arg_refs->{read} ) { - $self->logcroak(q{read is a deprecated argument}); - } - my $position = $arg_refs->{position}; - # set the mode - if( $arg_refs->{mode} ) { - $self->logcroak(q{mode is a deprecated argument}); - } - - if ( ! $position ) { - $self->warn(q{no position obtained}); - return q{}; - } - - return $id_run . q{_} . $position . $self->pb_cal_pipeline_conf()->{cal_table_suffix}; -} - - -=head2 create_pb_calibration_directory - -checks for the existence of a pb_calibration directory and if it doesn't exist, will create it - -returns the path of the pb_calibration directory - -=cut - -sub create_pb_calibration_directory { - my ( $self ) = @_; - - my $pb_cal_dir = $self->pb_cal_path(); - - if ( ! $self->directory_exists( $pb_cal_dir ) ) { - $self->info(qq{Creating $pb_cal_dir}); - - my $output = qx[mkdir $pb_cal_dir]; - if ($CHILD_ERROR) { - $self->logcroak(qq{Unable to create $pb_cal_dir}); - } - - $self->info(qq{Created : $output}); - } - - $self->make_log_dir( $pb_cal_dir ); - - return $pb_cal_dir; -} - -1; -__END__ - -=head1 DIAGNOSTICS - -=head1 CONFIGURATION AND ENVIRONMENT - -=head1 DEPENDENCIES - -=over - -=item Moose::Role - -=item Carp - -=item English -no_match_vars - -=item Readonly - -=back - -=head1 INCOMPATIBILITIES - -=head1 BUGS AND LIMITATIONS - -=head1 AUTHOR - -Andy Brown - -=head1 LICENSE AND COPYRIGHT - -Copyright (C) 2014 Genome Research Ltd - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . diff --git a/t/25-analysis-bustard4pbcb.t b/t/25-analysis-bustard4pbcb.t deleted file mode 100644 index cc7490772..000000000 --- a/t/25-analysis-bustard4pbcb.t +++ /dev/null @@ -1,141 +0,0 @@ -use strict; -use warnings; -use Test::More tests => 18; -use Test::Exception; -use Log::Log4perl qw(:levels); -use t::util; - -my $util = t::util->new(); -my $tmp_dir = $util->temp_directory(); - -Log::Log4perl->easy_init({layout => '%d %-5p %c - %m%n', - level => $DEBUG, - file => join(q[/], $tmp_dir, 'logfile'), - utf8 => 1}); - -$ENV{TEST_DIR} = $tmp_dir; -$ENV{TEST_FS_RESOURCE} = q{nfs_12}; -local $ENV{NPG_WEBSERVICE_CACHE_DIR} = q[t/data]; -local $ENV{PATH} = join q[:], q[t/bin], q[t/bin/software/solexa/bin], $ENV{PATH}; -my $mem_units = 'MB'; - -use_ok(q{npg_pipeline::analysis::bustard4pbcb}); - -my $runfolder_path = $util->analysis_runfolder_path(); -my $bustard_home = qq{$runfolder_path/Data/Intensities}; -my $bustard_rta = qq{$bustard_home/Bustard_RTA}; -my $config_path = qq{$runfolder_path/Config}; - -my $req_job_completion = q{-w'done(123) && done(321)'}; - -sub set_staging_analysis_area { - `rm -rf $tmp_dir/nfs/sf45`; - `mkdir -p $bustard_home`; - `mkdir $config_path`; - return 1; -} - -{ - set_staging_analysis_area(); - my $bustard; - lives_ok { - $bustard = npg_pipeline::analysis::bustard4pbcb->new( - id_run => 1234, - bustard_home => $bustard_home, - ); - } q{no croak creating new object with id_run and bustard_home attributes}; - isa_ok($bustard, q{npg_pipeline::analysis::bustard4pbcb}, q{$bustard}); - - require "npg_pipeline/pluggable.pm"; - lives_ok { - $bustard = npg_pipeline::analysis::bustard4pbcb->new( - pipeline => npg_pipeline::pluggable->new(id_run=>1), - id_run => 1234, - bustard_home => $bustard_home, - bustard_dir => join ($bustard_home, 'BUSTARD_NPG'), - ); - } q{no croak creating new object with pipeline, id_run, bustard_home and bustard_dir attributes}; - is($bustard->script_path, '/software/solexa/src/OLB-1.9.4/bin/bustard.py', 'live bustard script path'); - - throws_ok { - npg_pipeline::analysis::bustard4pbcb->new( - pipeline => $util, - id_run => 1234, - bustard_home => $bustard_home, - bustard_dir => join ($bustard_home, 'BUSTARD_NPG'), - )} qr/Validation failed for 'NpgPipelinePluggableObject'/, 'error when pipeline object has wrong type'; -} - -{ - my $bustard = npg_pipeline::analysis::bustard4pbcb->new( - pipeline => npg_pipeline::pluggable->new(), - id_run => 1234, - bustard_home => $bustard_home, - bustard_dir => join ($bustard_home, 'BUSTARD_NPG'), - timestamp => '20091028-101635', - script_path => '/bin/true', - ); - - my $lsf_index_string = $bustard->lsb_jobindex(); - - my $expected_cmd = qq{LOGNAME=101635 /bin/true --make --CIF --keep-dif-files --no-eamss --phasing=lane --matrix=lane --tiles=s_1,s_2,s_3,s_4,s_5,s_6,s_7,s_8 $bustard_home > $bustard_home/bustard_output_20091028-101635.txt 2>&1}; - is( $bustard->_bustard_command(), $expected_cmd, q{bustard command}); - - my $mem = 13800; - my $mem_limit = npg_pipeline::lsf_job->new(memory => $mem, memory_units =>$mem_units)->_scale_mem_limit(); - $expected_cmd = q{bsub -n 8,16 -q srpipeline -o log/bustard_basecalls_all_1234_20091028-101635.%J.out -J bustard_basecalls_all_1234_20091028-101635 -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=8]' -M}.$mem_limit.q{ -R 'span[hosts=1]' -w'done(123) && done(321)' 'make -j `npg_pipeline_job_env_to_threads` all'}; - is( $bustard->_make_command('basecalls_all', $req_job_completion), $expected_cmd, q{command for basecalls all generated correctly}); - - $expected_cmd = q{bsub -n 8,16 -q srpipeline -o log/bustard_basecalls_lanes_1234_20091028-101635.%I.%J.out -J bustard_basecalls_lanes_1234_20091028-101635[1,2,3,4,5,6,7,8] -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=8]' -M}.$mem_limit.q{ -R 'span[hosts=1]' -w'done(123) && done(321)' 'make -j `npg_pipeline_job_env_to_threads` s_} . $lsf_index_string . q{'}; - is( $bustard->_make_command('basecalls_lanes', $req_job_completion), $expected_cmd, q{command for basecall lanes generated correctly}); - - $bustard = npg_pipeline::analysis::bustard4pbcb->new( - pipeline => npg_pipeline::pluggable->new(), - id_run => 1234, - bustard_home => $bustard_home, - bustard_dir => join ($bustard_home, 'BUSTARD_NPG'), - timestamp => '20091028-101635', - script_path => 'bustard_script', - lanes => [1,3,5], - ); - - $expected_cmd = qq{LOGNAME=101635 bustard_script --make --CIF --keep-dif-files --no-eamss --phasing=lane --matrix=lane --tiles=s_1,s_3,s_5 $bustard_home > $bustard_home/bustard_output_20091028-101635.txt 2>&1}; - is( $bustard->_bustard_command(), $expected_cmd, q{bustard command}); - - $expected_cmd = q{bsub -n 8,16 -q srpipeline -o log/bustard_matrix_lanes_1234_20091028-101635.%I.%J.out -J bustard_matrix_lanes_1234_20091028-101635[1,3,5] -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=8]' -M}.$mem_limit.q{ -R 'span[hosts=1]' 'make -j `npg_pipeline_job_env_to_threads` matrix_`echo $LSB_JOBINDEX`_finished.txt'}; - is ($bustard->_make_command('matrix_lanes'), $expected_cmd, 'matrix lane command'); - - $expected_cmd = q{bsub -n 8,16 -q srpipeline -o log/bustard_phasing_all_1234_20091028-101635.%J.out -J bustard_phasing_all_1234_20091028-101635 -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=8]' -M}.$mem_limit.q{ -R 'span[hosts=1]' 'make -j `npg_pipeline_job_env_to_threads` phasing_finished.txt'}; - is ($bustard->_make_command('phasing_all'), $expected_cmd, 'phasing all command'); -} - -{ - my $bustard = npg_pipeline::analysis::bustard4pbcb->new( - id_run => 1234, - bustard_home => $bustard_home, - timestamp => '20091028-101635', - script_path => 'none', - ); - throws_ok { $bustard->bustard_dir } qr/ not found/, 'error when bustard command not found'; - - $bustard = npg_pipeline::analysis::bustard4pbcb->new( - id_run => 1234, - bustard_home => $bustard_home, - script_path => '/bin/true', - ); - throws_ok { $bustard->bustard_dir } qr/No bustard output in/, 'error when bustard output file is empty'; - - $bustard = npg_pipeline::analysis::bustard4pbcb->new( - id_run => 1234, - bustard_home => $bustard_home, - script_path => '/bin/true', - ); - throws_ok { $bustard->_get_bustard_dir(qw/one two three/) } qr/No record about bustard directory/, 'error when bustard output file does not contain the bustard directory name'; - - my $dir; - lives_ok { $dir = $bustard->_get_bustard_dir('one', 'Sequence folder: folder', 'three')} 'parsing bustard output lives'; - is ($dir, 'folder', 'correct bustard directory extracted'); - is($bustard->_get_bustard_dir('one', 'Sequence folder:folder', 'three'), undef, 'undef returned if line format is wrong'); -} - -1; diff --git a/t/25-harold_calibration_bam.t b/t/25-harold_calibration_bam.t index 857d22197..e25b03dcc 100644 --- a/t/25-harold_calibration_bam.t +++ b/t/25-harold_calibration_bam.t @@ -22,14 +22,7 @@ Log::Log4perl->easy_init({layout => '%d %-5p %c - %m%n', file => join(q[/], $tdir, 'logfile'), utf8 => 1}); -my $sp = join q[/], $tdir, 'spatial_filter'; -my $java = join q[/], $tdir, 'java'; -foreach my $tool (($sp, $java)) { - `touch $tool`; - `chmod +x $tool`; -} local $ENV{PATH} = join q[:], qq[$curdir/t/bin], $tdir, $ENV{PATH}; -local $ENV{CLASSPATH} = q{t/bin/software/solexa/jars}; my $id_run; my $mem_units = 'MB'; @@ -51,254 +44,6 @@ sub set_staging_analysis_area { return 1; } -{ - set_staging_analysis_area(); - my $harold; - lives_ok { - $harold = npg_pipeline::analysis::harold_calibration_bam->new({ - id_run => 1234, - run_folder => q{123456_IL2_1234}, - runfolder_path => $runfolder_path, - timestamp => q{20091028-101635}, - verbose => 0, - repository => $repos, - no_bsub => 1, - recalibration => 1, - }); - } q{create $harold object ok}; - - isa_ok($harold, q{npg_pipeline::analysis::harold_calibration_bam}, q{$harold}); - is($harold->pb_calibration_bin, $tdir, 'pb calibration bin is correct'); - is($harold->spatial_filter_path, $sp, 'spatial filter path is correct'); -} - -{ - set_staging_analysis_area(); - my $harold; - lives_ok { - $harold = npg_pipeline::analysis::harold_calibration_bam->new({ - id_run => 1234, - run_folder => q{123456_IL2_1234}, - runfolder_path => $runfolder_path, - timestamp => q{20091028-101635}, - verbose => 0, - repository => $repos, - dif_files_path => $bustard_home, - spatial_filter => 1, - no_bsub => 1, - recalibration => 1, - force_phix_split => 0, - }); - } q{create $harold object ok}; - - my $req_job_completion = q{-w'done(123) && done(321)'}; - my $arg_refs = { - required_job_completion => $req_job_completion, - }; - - my @job_ids; - my $mem = 3072; - my $mem_limit = npg_pipeline::lsf_job->new(memory => $mem, memory_units =>$mem_units)->_scale_mem_limit(); - my $job = qq{bsub -q srpipeline -E 'npg_pipeline_preexec_references --repository $curdir/t/data/sequence' -o $bustard_rta/PB_cal/log/PB_cal_table_1234_4_20091028-101635.%J.out -J PB_cal_table_1234_4_20091028-101635 -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=8]' -M}.$mem_limit.qq{ -R 'span[hosts=1]' -w'done(123) && done(321)' " cd $bustard_rta/PB_cal && $tdir/pb_calibration --intensity_dir $bustard_home --t_filter 2 --prefix 1234_4 --cstart 1 --bam pb_align_1234_4.bam "}; - lives_ok { - @job_ids = $harold->generate_calibration_table( $arg_refs ); - } q{no croak submitting calibration table jobs}; - - is( scalar @job_ids, 1, q{8 jobs created}); - - is( $harold->_calibration_table_bsub_command( { - dir => $bustard_rta, - position => 4, - job_dependencies => $req_job_completion, - ref_seq => q{phix-illumina.fa}, - } ), $job, q{generated bsub command is correct} ); - - my $cal_table = q{1234_4_purity_cycle_caltable.txt}; - is( $harold->calibration_table_name( { id_run => 1234, position=>4 } ), $cal_table, q{generated calibration table name is correct}); - $mem = 1725; - $mem_limit = npg_pipeline::lsf_job->new(memory => $mem, memory_units =>$mem_units)->_scale_mem_limit(); - $job = qq{bsub -q srpipeline -o $bustard_rta/PB_cal/log/PB_cal_score_1234_3_20091028-101635.%J.out -J PB_cal_score_1234_3_20091028-101635 -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=8]' -M}.$mem_limit.qq{ -R 'span[hosts=1]' -w'done(123) && done(321)' ' cd $bustard_rta/PB_cal && bash -c '"'"'if [[ -f pb_align_1234_3.bam ]]; then echo phix alignment so merging alignments with 1>&2; set -o pipefail; (if [ -f 1234_4_purity_cycle_caltable.txt ]; then echo recalibrated qvals 1>&2; $tdir/pb_predictor --u --bam ../1234_3.bam --intensity_dir $bustard_home --cstart 1 --ct 1234_4_purity_cycle_caltable.txt ; else echo no recalibration 1>&2; cat ../1234_3.bam ; fi;) | ( if [[ -f pb_align_1234_3.bam.filter ]]; then echo applying spatial filter 1>&2; $sp -u -a -f -F pb_align_1234_3.bam.filter - 2> >( tee /dev/stderr | qc --check spatial_filter --id_run 1234 --position 3 --qc_out $bustard_home/Bustard_RTA/PB_cal/archive/qc ); else echo no spatial filter 1>&2; cat; fi;) | $java -Xmx1024m -jar $curdir/t/bin/software/solexa/jars/BamMerger.jar CREATE_MD5_FILE=true VALIDATION_STRINGENCY=SILENT KEEP=true I=/dev/stdin REPLACE_QUAL=true O=1234_3.bam ALIGNED=pb_align_1234_3.bam; else echo symlinking as no phix alignment 1>&2; rm -f 1234_3.bam; ln -s ../1234_3.bam 1234_3.bam; rm -f 1234_3.bam.md5; ln -s ../1234_3.bam.md5 1234_3.bam.md5; fi'"'"' '}; - my $expect_job = $harold->_recalibration_bsub_command( { - position => 3, - job_dependencies => $req_job_completion, - ct => $cal_table, - } ); - is($expect_job, $job, q{generated bsub command for recalibration job is correct}); - - lives_ok { - @job_ids = $harold->generate_recalibrated_bam($arg_refs); - } q{no croak submitting recalibration jobs}; - is( scalar @job_ids, 8, q{8 jobs created}); -} - -{ - set_staging_analysis_area(); - my $harold; - lives_ok { - $harold = npg_pipeline::analysis::harold_calibration_bam->new({ - id_run => 8797, - run_folder => q{121112_HS20_08797_A_C18TEACXX}, - runfolder_path => $runfolder_path, - timestamp => q{20121112-123456}, - verbose => 0, - repository => $repos, - dif_files_path => $bustard_home, - spatial_filter => 1, - no_bsub => 1, - recalibration => 1, - }); - } q{create $harold object ok}; - - my $req_job_completion = q{-w'done(123) && done(321)'}; - my $arg_refs = { - required_job_completion => $req_job_completion, - }; - - my @job_ids; - my $mem = 3072; - my $mem_limit = npg_pipeline::lsf_job->new(memory => $mem, memory_units =>$mem_units)->_scale_mem_limit(); - my $job = qq{bsub -q srpipeline -E 'npg_pipeline_preexec_references --repository $curdir/t/data/sequence' -o $bustard_rta/PB_cal/log/PB_cal_table_8797_8_20121112-123456.%J.out -J PB_cal_table_8797_8_20121112-123456 -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=8]' -M}.$mem_limit.qq{ -R 'span[hosts=1]' -w'done(123) && done(321)' " cd $bustard_rta/PB_cal && $tdir/pb_calibration --intensity_dir $bustard_home --t_filter 2 --prefix 8797_8 --cstart 11 --bam pb_align_8797_8.bam "}; - - lives_ok { - @job_ids = $harold->generate_calibration_table( $arg_refs ); - } q{no croak submitting calibration table jobs}; - - is( scalar @job_ids, 8, q{8 jobs created}); - - is( $harold->_calibration_table_bsub_command( { - dir => $bustard_rta, - position => 8, - job_dependencies => $req_job_completion, - ref_seq => q{phix-illumina.fa}, - } ), $job, q{generated bsub command is correct} ); - - my $cal_table = q{8797_7_purity_cycle_caltable.txt}; - is( $harold->calibration_table_name( { id_run => 8797, position=>7 } ), $cal_table, q{generated calibration table name is correct}); - $mem = 1725; - $mem_limit = npg_pipeline::lsf_job->new(memory => $mem, memory_units =>$mem_units)->_scale_mem_limit(); - $job = qq{bsub -q srpipeline -o $bustard_rta/PB_cal/log/PB_cal_score_8797_7_20121112-123456.%J.out -J PB_cal_score_8797_7_20121112-123456 -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=8]' -M}.$mem_limit.qq{ -R 'span[hosts=1]' -w'done(123) && done(321)' ' cd $bustard_rta/PB_cal && bash -c '"'"'if [[ -f pb_align_8797_7.bam ]]; then echo phix alignment so merging alignments with 1>&2; set -o pipefail; (if [ -f 8797_7_purity_cycle_caltable.txt ]; then echo recalibrated qvals 1>&2; $tdir/pb_predictor --u --bam ../8797_7.bam --intensity_dir $bustard_home --cstart 11 --ct 8797_7_purity_cycle_caltable.txt ; else echo no recalibration 1>&2; cat ../8797_7.bam ; fi;) | ( if [[ -f pb_align_8797_7.bam.filter ]]; then echo applying spatial filter 1>&2; $sp -u -a -f -F pb_align_8797_7.bam.filter - 2> >( tee /dev/stderr | qc --check spatial_filter --id_run 8797 --position 7 --qc_out $bustard_home/Bustard_RTA/PB_cal/archive/qc ); else echo no spatial filter 1>&2; cat; fi;) | $java -Xmx1024m -jar $curdir/t/bin/software/solexa/jars/BamMerger.jar CREATE_MD5_FILE=true VALIDATION_STRINGENCY=SILENT KEEP=true I=/dev/stdin REPLACE_QUAL=true O=8797_7.bam ALIGNED=pb_align_8797_7.bam; else echo symlinking as no phix alignment 1>&2; rm -f 8797_7.bam; ln -s ../8797_7.bam 8797_7.bam; rm -f 8797_7.bam.md5; ln -s ../8797_7.bam.md5 8797_7.bam.md5; fi'"'"' '} ; - my $expect_job = $harold->_recalibration_bsub_command( { - position => 7, - job_dependencies => $req_job_completion, - ct => $cal_table, - } ); - is($expect_job, $job, q{generated bsub command for recalibration job is correct}); - - lives_ok { - @job_ids = $harold->generate_recalibrated_bam($arg_refs); - } q{no croak submitting recalibration jobs}; - is( scalar @job_ids, 8, q{8 jobs created}); -} - -{ - set_staging_analysis_area(); - my $harold; - $id_run = 4846; - lives_ok { - $harold = npg_pipeline::analysis::harold_calibration_bam->new({ - id_run => $id_run, - run_folder => q{123456_IL2_1234}, - runfolder_path => $runfolder_path, - timestamp => q{20091028-101635}, - verbose => 0, - repository => $repos, - bam_basecall_path => $runfolder_path . q{/Data/Intensities/BaseCalls}, - no_bsub => 1, - recalibration => 1, - force_phix_split => 0, - }); - } q{create $harold object ok}; - - isa_ok($harold, q{npg_pipeline::analysis::harold_calibration_bam}, q{$harold}); - - my @job_ids = $harold->generate_alignment_files({}); - is( scalar @job_ids, 0, q{no job ids for alignment as no spiked phix lane} ); - - @job_ids = $harold->generate_calibration_table({}); - is( scalar @job_ids, 0, q{no job ids for calibration table as no spiked phix lane} ); - - @job_ids = $harold->generate_recalibrated_bam({}); - is( scalar @job_ids, 8, q{8 job ids for recalibration even if no spiked phix lane} ); -} - -{ - set_staging_analysis_area(); - my $harold; - $id_run = 4846; - lives_ok { - $harold = npg_pipeline::analysis::harold_calibration_bam->new({ - id_run => $id_run, - run_folder => q{123456_IL2_1234}, - runfolder_path => $runfolder_path, - timestamp => q{20091028-101635}, - verbose => 0, - repository => $repos, - bam_basecall_path => $runfolder_path . q{/Data/Intensities/BaseCalls}, - no_bsub => 1, - recalibration => 1, - force_phix_split => 1, - }); - } q{create $harold object ok}; - - isa_ok($harold, q{npg_pipeline::analysis::harold_calibration_bam}, q{$harold}); - - my @job_ids = $harold->generate_alignment_files({}); - is( scalar @job_ids, 8, q{8 job ids for alignment as no spiked phix lane but force phix split} ); - - @job_ids = $harold->generate_calibration_table({}); - is( scalar @job_ids, 8, q{8 job ids for calibration table as no spiked phix lane but force phix split} ); - - @job_ids = $harold->generate_recalibrated_bam({}); - is( scalar @job_ids, 8, q{8 job ids for recalibration even if no spiked phix lane but force phix split} ); -} - -{ - set_staging_analysis_area(); - my $harold; - $id_run = 1234; - lives_ok { - $harold = npg_pipeline::analysis::harold_calibration_bam->new({ - id_run => $id_run, - run_folder => q{123456_IL2_1234}, - runfolder_path => $runfolder_path, - timestamp => q{20091028-101635}, - verbose => 0, - repository => $repos, - dif_files_path => $runfolder_path . q{/Data/Intensities}, - bam_basecall_path => $runfolder_path . q{/Data/Intensities/BaseCalls}, - no_bsub => 1, - spatial_filter => 1, - recalibration => 1, - force_phix_split => 0, - }); - } q{create $harold object ok}; - - my $arg_refs = { - timestamp => q{20091028-101635}, - position => 1, - job_dependencies => q{-w 'done(1234) && done(4321)'}, - ref_seq => q{t/data/sequence/references/Human/default/all/bwa/someref.fa.bwt}, - }; - - my $mem = 16000; - my $mem_limit = npg_pipeline::lsf_job->new(memory => $mem, memory_units =>$mem_units)->_scale_mem_limit(); - my $single_read_alignment_command = qq{bsub -q srpipeline -E 'npg_pipeline_preexec_references --repository $curdir/t/data/sequence' -o $bustard_rta/PB_cal/}.q{log/PB_cal_align_1234_1_20091028-101635.%J.out -J PB_cal_align_1234_1_20091028-101635 -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=4]' -M}.$mem_limit.q{ -R 'span[hosts=1]' -n 6,12 -w 'done(1234) && done(4321)' '} . qq{cd $bustard_rta/PB_cal && $tdir} . q{/pb_align --aln_parms "-t "`npg_pipeline_job_env_to_threads` --sam_parms "-t "`npg_pipeline_job_env_to_threads --maximum 8` --spatial_filter --sf_parms "--region_size 200 --region_mismatch_threshold 0.016 --region_insertion_threshold 0.016 --region_deletion_threshold 0.016 --tileviz } . $bustard_home . q{/Bustard_RTA/PB_cal/archive/qc/tileviz/1234_1 " } . qq{--bam_join_jar $curdir/t/bin/software/solexa/jars/BamMerger.jar} . q{ --ref t/data/sequence/references/Human/default/all/bwa/someref.fa.bwt --read 0 --bam } . $bustard_home . q{/BaseCalls/1234_1.bam --prefix pb_align_1234_1 --pf_filter'}; - my $paired_read_alignment_command = qq{bsub -q srpipeline -E 'npg_pipeline_preexec_references --repository $curdir/t/data/sequence' -o $bustard_rta/PB_cal/log/PB_cal_align_1234_1_20091028-101635.%J.out -J PB_cal_align_1234_1_20091028-101635 -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=4]' -M}.$mem_limit.q{ -R 'span[hosts=1]' -n 6,12 -w 'done(1234) && done(4321)' '} . qq{cd $bustard_rta/PB_cal && $tdir} . q{/pb_align --aln_parms "-t "`npg_pipeline_job_env_to_threads` --sam_parms "-t "`npg_pipeline_job_env_to_threads --maximum 8` --spatial_filter --sf_parms "--region_size 200 --region_mismatch_threshold 0.016 --region_insertion_threshold 0.016 --region_deletion_threshold 0.016 --tileviz } . $bustard_home . q{/Bustard_RTA/PB_cal/archive/qc/tileviz/1234_1 " } . qq{--bam_join_jar $curdir/t/bin/software/solexa/jars/BamMerger.jar} . qq{ --ref t/data/sequence/references/Human/default/all/bwa/someref.fa.bwt --read1 1 --read2 2 --bam $bustard_home/BaseCalls/1234_1.bam --prefix pb_align_1234_1 --pf_filter'}; - my $spiked_read_alignment_command = qq{bsub -q srpipeline -E 'npg_pipeline_preexec_references --repository $curdir/t/data/sequence' -o $bustard_rta/PB_cal/log/PB_cal_align_1234_1_20091028-101635.%J.out -J PB_cal_align_1234_1_20091028-101635 -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=4]' -M}.$mem_limit.q{ -R 'span[hosts=1]' -n 6,12 -w 'done(1234) && done(4321)' '} . qq{cd $bustard_rta/PB_cal && $tdir} . q{/pb_align --aln_parms "-t "`npg_pipeline_job_env_to_threads` --sam_parms "-t "`npg_pipeline_job_env_to_threads --maximum 8` --spatial_filter --sf_parms "--region_size 200 --region_mismatch_threshold 0.016 --region_insertion_threshold 0.016 --region_deletion_threshold 0.016 --tileviz } . $bustard_home . q{/Bustard_RTA/PB_cal/archive/qc/tileviz/1234_1 " } . qq{--bam_join_jar $curdir/t/bin/software/solexa/jars/BamMerger.jar} . qq{ --ref t/data/sequence/references/PhiX/default/all/fasta/phix-illumina.fa --read1 1 --read2 2 --bam $bustard_home/BaseCalls/1234_1.bam --prefix pb_align_1234_1 --pf_filter'}; - - is( $harold->_alignment_file_bsub_command( $arg_refs ), $single_read_alignment_command, q{single read alignment bsub command is correct} ); - - $arg_refs->{is_paired} = 1; - is( $harold->_alignment_file_bsub_command( $arg_refs ), $paired_read_alignment_command, q{paired read alignment bsub command is correct} ); - - $arg_refs->{is_spiked_phix} = 1; - $arg_refs->{ref_seq} = q{t/data/sequence/references/PhiX/default/all/fasta/phix-illumina.fa}; - is( $harold->_alignment_file_bsub_command( $arg_refs ), $spiked_read_alignment_command, q{paired read alignment bsub command is correct} ); - - my @job_ids = $harold->generate_alignment_files({}); - is( scalar @job_ids, 1, q{1 job ids, one spiked phix lane} ); -} - { set_staging_analysis_area(); my $harold; From a58d062262a9a7023681bdb5f400bebe9edd6b78 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Thu, 11 May 2017 11:35:31 +0100 Subject: [PATCH 03/27] further code deletions and fixes to tests --- Changes | 4 + MANIFEST | 10 +- ...tion_bam.pm => illumina_basecall_stats.pm} | 24 ++-- lib/npg_pipeline/base.pm | 22 ++-- lib/npg_pipeline/pluggable/harold/central.pm | 106 ++++-------------- lib/npg_pipeline/roles/business/base.pm | 28 ----- .../roles/business/flag_options.pm | 27 +---- t/10-base.t | 11 +- t/10-pluggable_harold_central.t | 38 +------ t/20-archive_file-to_irods.t | 4 +- t/20-archive_logs.t | 1 - t/25-analysis-illumina_basecall_stats.t | 70 ++++++++++++ t/25-harold_calibration_bam.t | 80 ------------- 13 files changed, 122 insertions(+), 303 deletions(-) rename lib/npg_pipeline/analysis/{harold_calibration_bam.pm => illumina_basecall_stats.pm} (86%) create mode 100644 t/25-analysis-illumina_basecall_stats.t delete mode 100644 t/25-harold_calibration_bam.t diff --git a/Changes b/Changes index ed0719b55..87d54c9c4 100644 --- a/Changes +++ b/Changes @@ -1,6 +1,10 @@ LIST OF CHANGES --------------- + - OLB analysis removed + - recalibration removed + - pb_cal_path and dif_files_path accessors disabled + release 51.9 - p4stage2 speed-up by caching references - p4stage2 errors in getting a reference made fatal diff --git a/MANIFEST b/MANIFEST index 99e6486a0..85449d810 100644 --- a/MANIFEST +++ b/MANIFEST @@ -12,18 +12,14 @@ Build.PL Changes data/config_files/function_list_central.yml data/config_files/function_list_central_gclp.yml -data/config_files/function_list_central_olb.yml data/config_files/function_list_central_qc_run.yml data/config_files/function_list_post_qc_review.yml data/config_files/function_list_post_qc_review_gclp.yml data/config_files/general_values.ini -data/config_files/illumina_pipeline.ini data/config_files/parallelisation.yml -data/config_files/pb_cal_pipeline.ini -lib/npg_pipeline/analysis/bustard4pbcb.pm lib/npg_pipeline/analysis/create_lane_tag_file.pm lib/npg_pipeline/analysis/FixConfigFiles.pm -lib/npg_pipeline/analysis/harold_calibration_bam.pm +lib/npg_pipeline/analysis/illumina_basecall_stats.pm lib/npg_pipeline/analysis/split_bam_by_tag.pm lib/npg_pipeline/archive/file/BamClusterCounts.pm lib/npg_pipeline/archive/file/generation.pm @@ -52,7 +48,6 @@ lib/npg_pipeline/pluggable/harold/post_qc_review.pm lib/npg_pipeline/roles/accessor.pm lib/npg_pipeline/roles/business/base.pm lib/npg_pipeline/roles/business/flag_options.pm -lib/npg_pipeline/roles/business/harold_calibration_reqs.pm lib/npg_pipeline/run/folder/link.pm lib/npg_tracking/daemon/analysis.pm lib/npg_tracking/daemon/archival.pm @@ -82,11 +77,10 @@ t/20-archive_illumina_analysis.t t/20-archive_qc.t t/20-archive_logs.t t/21-references_adapters.t -t/25-analysis-bustard4pbcb.t t/25-analysis-create_lane_tag_file.t t/25-analysis-FixConfigFiles.t t/25-analysis-split_bam_by_tag.t -t/25-harold_calibration_bam.t +t/25-analysis-illumina_basecall_stats.t t/30-launcher-status.t t/30-run_folder-link.t t/35-archive_file_generation-BamClusterCounts.t diff --git a/lib/npg_pipeline/analysis/harold_calibration_bam.pm b/lib/npg_pipeline/analysis/illumina_basecall_stats.pm similarity index 86% rename from lib/npg_pipeline/analysis/harold_calibration_bam.pm rename to lib/npg_pipeline/analysis/illumina_basecall_stats.pm index 3d00ec5e9..704fd5732 100644 --- a/lib/npg_pipeline/analysis/harold_calibration_bam.pm +++ b/lib/npg_pipeline/analysis/illumina_basecall_stats.pm @@ -1,8 +1,7 @@ -package npg_pipeline::analysis::harold_calibration_bam; +package npg_pipeline::analysis::illumina_basecall_stats; use Moose; use Readonly; -use npg_tracking::util::types; use npg_pipeline::lsf_job; extends 'npg_pipeline::base'; @@ -15,12 +14,10 @@ Readonly::Scalar our $MAKE_STATS_MEM => 350; =head1 NAME -npg_pipeline::analysis::harold_calibration_bam + npg_pipeline::analysis::illumina_basecall_stats =head1 SYNOPSIS - my $oHaroldCalibration = npg_pipeline:analysis::harold_calibration_bam->new(); - =head1 DESCRIPTION =head1 SUBROUTINES/METHODS @@ -31,10 +28,13 @@ Absolute path to executable that generates Illumina basecall stats =cut -has 'bcl2qseq' => ( isa => 'NpgTrackingExcecutable', - is => 'ro', - default => 'setupBclToQseq.py', -); +has 'bcl2qseq' => ( isa => 'NpgCommonResolvedPathExecutable', + is => 'ro', + coerce => 1, + lazy_build => 1,); +sub _build_bcl2qseq { + return 'setupBclToQseq.py' +} sub _generate_command { my ( $self, $arg_refs ) = @_; @@ -81,14 +81,14 @@ sub _generate_command { return join q[ ], @command; } -=head2 generate_illumina_basecall_stats +=head2 generate Use Illumina tools to generate the (per run) BustardSummary and IVC reports (from on instrument RTA basecalling). =cut -sub generate_illumina_basecall_stats{ +sub generate { my ( $self, $arg_refs ) = @_; return $self->submit_bsub_command($self->_generate_command($arg_refs)); } @@ -113,8 +113,6 @@ __END__ =item Readonly -=item npg_tracking::util::types - =item npg_common::roles::software_location =back diff --git a/lib/npg_pipeline/base.pm b/lib/npg_pipeline/base.pm index fc07b377e..6d5c6ad6b 100644 --- a/lib/npg_pipeline/base.pm +++ b/lib/npg_pipeline/base.pm @@ -19,16 +19,20 @@ with qw{ MooseX::Getopt MooseX::AttributeCloner WTSI::DNAP::Utilities::Loggable - npg_tracking::illumina::run::short_info - npg_tracking::illumina::run::folder npg_pipeline::roles::accessor + npg_tracking::illumina::run::short_info npg_pipeline::roles::business::base }; + +with 'npg_tracking::illumina::run::folder' => { + -excludes => [qw(pb_cal_path dif_files_path)] + }; + with q{npg_tracking::illumina::run::long_info}; with q{npg_pipeline::roles::business::flag_options}; Readonly::Scalar my $DEFAULT_JOB_ID_FOR_NO_BSUB => 50; -Readonly::Array my @FLAG2FUNCTION_LIST => qw/ olb qc_run gclp /; +Readonly::Array my @FLAG2FUNCTION_LIST => qw/ qc_run gclp /; $ENV{LSB_DEFAULTPROJECT} ||= q{pipeline}; @@ -442,8 +446,6 @@ sub _build_function_list_conf { } =head2 general_values_conf -=head2 illumina_pipeline_conf -=head2 pb_cal_pipeline_conf =head2 parallelisation_conf Returns a hashref of configuration details from the relevant configuration file @@ -451,8 +453,6 @@ Returns a hashref of configuration details from the relevant configuration file =cut has [ qw{ general_values_conf - illumina_pipeline_conf - pb_cal_pipeline_conf parallelisation_conf } ] => ( isa => q{HashRef}, @@ -465,14 +465,6 @@ sub _build_general_values_conf { my ( $self ) = @_; return $self->read_config( $self->conf_file_path(q{general_values.ini}) ); } -sub _build_illumina_pipeline_conf { - my ( $self ) = @_; - return $self->read_config( $self->conf_file_path(q{illumina_pipeline.ini}) ); -} -sub _build_pb_cal_pipeline_conf { - my ( $self ) = @_; - return $self->read_config( $self->conf_file_path(q{pb_cal_pipeline.ini}) ); -} sub _build_parallelisation_conf { my ( $self ) = @_; return $self->read_config( $self->conf_file_path(q{parallelisation.yml}) ); diff --git a/lib/npg_pipeline/pluggable/harold/central.pm b/lib/npg_pipeline/pluggable/harold/central.pm index 7190aaf23..44ad67d16 100644 --- a/lib/npg_pipeline/pluggable/harold/central.pm +++ b/lib/npg_pipeline/pluggable/harold/central.pm @@ -32,8 +32,6 @@ Pluggable module runner for the main pipeline Sets all paths needed during the lifetime of the analysis runfolder. Creates any of the paths that do not exist. - Dynamically adds bustard functions to the object; - =cut override 'prepare' => sub { @@ -52,8 +50,10 @@ override 'prepare' => sub { sub _set_paths { my $self = shift; + my $sep = q[/]; + if ( ! $self->has_intensity_path() ) { - my $ipath = $self->runfolder_path() . q{/Data/Intensities}; + my $ipath = join $sep, $self->runfolder_path(), q{Data}, q{Intensities}; if (!-e $ipath) { $self->info(qq{Intensities path $ipath not found}); $ipath = $self->runfolder_path(); @@ -62,94 +62,31 @@ sub _set_paths { } $self->info('Intensities path: ', $self->intensity_path() ); - # If preprocessing with OLB, to set the paths mentioned below, - # one needs to know the name of the bustard directory. - # This name is not known till the bustard scripts is run. - # Therefore, if using OLB, delay creating these directories. - if (!$self->olb) { - if ( ! $self->has_dif_files_path() ) { - $self->set_dif_files_path( $self->intensity_path() ); - } - $self->info('Dif files path: ', $self->dif_files_path() ); - - if ( ! $self->has_basecall_path() ) { - my $bpath = $self->intensity_path() . q{/BaseCalls}; - if (!-e $bpath) { - $self->warn(qq{BaseCalls path $bpath not found}); - $bpath = $self->runfolder_path(); - } - $self->_set_basecall_path( $bpath); + if ( ! $self->has_basecall_path() ) { + my $bpath = join $sep, $self->intensity_path() . q{BaseCalls}; + if (!-e $bpath) { + $self->warn(qq{BaseCalls path $bpath not found}); + $bpath = $self->runfolder_path(); } - $self->info('BaseCalls path: ' . $self->basecall_path() ); + $self->_set_basecall_path( $bpath); } + $self->info('BaseCalls path: ' . $self->basecall_path() ); - if( ! $self->has_bam_basecall_path() ) { - my $bam_basecalls_dir = $self->intensity_path() . q{/} .q{BAM_basecalls_} . $self->timestamp(); + if( ! $self->has_bam_basecall_path() ) { + my $bam_basecalls_dir = join $sep, $self->intensity_path(), q{BAM_basecalls_} . $self->timestamp(); $self->make_log_dir( $bam_basecalls_dir ); $self->set_bam_basecall_path( $bam_basecalls_dir ); } $self->info('BAM_basecall path: ' . $self->bam_basecall_path()); - $self->_set_bam_basecall_dependent_paths(); - - - if ($self->olb) { - my $bustard_dir = $self->new_with_cloned_attributes(q{npg_pipeline::analysis::bustard4pbcb}, - {bustard_home => $self->intensity_path,})->bustard_dir(); - $self->set_dif_files_path( $bustard_dir ); - $self->_set_basecall_path( $bustard_dir ); - $self->info("basecall and dif_files paths set to $bustard_dir"); - $self->make_log_dir( $bustard_dir ); - } - - return; -} - -### -# -# If unset, sets recalibrated_path and pb_cal_path. -# - -sub _set_bam_basecall_dependent_paths { - my $self = shift; - my $pathways = { - recalibrated_path => undef, - pb_cal_path => undef, - }; - - # for each of the paths, see if they have been prepopulated - foreach my $path ( keys %{ $pathways } ) { - my $has_method = q{has_} . $path; - if ( $self->$has_method() ) { - $pathways->{$path} = $self->$path(); - } - } - # if recalibrated_path or pb_cal_path are not set, but the other is, match them up - if ( $pathways->{recalibrated_path} && ! $pathways->{pb_cal_path} ) { - $pathways->{pb_cal_path} = $pathways->{recalibrated_path}; - } - if ( ! $pathways->{recalibrated_path} && $pathways->{pb_cal_path} ) { - $pathways->{recalibrated_path} = $pathways->{pb_cal_path}; + if (! $self->has_recalibrated_path()) { + $self->_set_recalibrated_path(join $sep, $self->bam_basecall_path(), 'no_cal') } + $self->make_log_dir($self->recalibrated_path()); + $self->info('PB_cal path: ' . $self->recalibrated_path()); - # if there is no recalibrated_path and pb_cal_path, then create them and store - if ( ! $pathways->{recalibrated_path} ) { - my $recalibrated_level_dir = !$self->recalibration() ? q{no_cal} - : q{PB_cal_bam} - ; - $self->make_log_dir( $self->bam_basecall_path() . q{/} . $recalibrated_level_dir ); - $pathways->{recalibrated_path} = $self->bam_basecall_path() . q{/} . $recalibrated_level_dir; - $pathways->{pb_cal_path} = $self->bam_basecall_path() . q{/} . $recalibrated_level_dir; - } - # for each of these, go and set them (we know we must have created them by now) - foreach my $path ( keys %{ $pathways } ) { - my $set_method = q{_set_} . $path; - $self->$set_method( $pathways->{$path} ); - } - - $self->info('PB_cal path: ' . $self->pb_cal_path()); - $self->info('Recalibrated_path: ' . $self->recalibrated_path() ); $self->make_log_dir( $self->status_files_path ); + return; } @@ -166,8 +103,8 @@ sub illumina_basecall_stats { $self->info(q{HiSeqX sequencing instrument, illumina_basecall_stats will not be run}); return (); } - return $self->new_with_cloned_attributes(q{npg_pipeline::analysis::harold_calibration_bam}) - ->generate_illumina_basecall_stats(@args); + return $self->new_with_cloned_attributes(q{npg_pipeline::analysis::illumina_basecall_stats}) + ->generate(@args); } =head2 split_bam_by_tag @@ -260,7 +197,8 @@ sub _bam2fastqcheck_and_cached_fastq_command { my $job_name = join q{_}, q{bam2fastqcheck_and_cached_fastq}, $id_run, $timestamp; my $out = $job_name . q{.%I.%J.out}; - $out = File::Spec->catfile($self->make_log_dir($self->pb_cal_path), $out ); + $out = File::Spec->catfile($self->make_log_dir($self->recalibrated_path), $out ); + $job_name = q{'} . $job_name . npg_pipeline::lsf_job->create_array_string( $self->positions()) . q{'}; my $job_sub = q{bsub -q } . $self->lsf_queue() . q{ } . @@ -268,7 +206,7 @@ sub _bam2fastqcheck_and_cached_fastq_command { qq{ $required_job_completion -J $job_name -o $out }; $job_sub .= q{'} . q{generate_cached_fastq --path } . $self->archive_path() . - q{ --file } . $self->pb_cal_path() . q{/} . $id_run . q{_} . $self->lsb_jobindex() . q{.bam} . + q{ --file } . $self->recalibrated_path() . q{/} . $id_run . q{_} . $self->lsb_jobindex() . q{.bam} . q{'}; $self->debug($job_sub); diff --git a/lib/npg_pipeline/roles/business/base.pm b/lib/npg_pipeline/roles/business/base.pm index 3db3a34de..6283468a9 100644 --- a/lib/npg_pipeline/roles/business/base.pm +++ b/lib/npg_pipeline/roles/business/base.pm @@ -253,34 +253,6 @@ sub all_positions { return @position; } -=head2 tile_list - -A string of wildcards for tiles for OLB, defaults to an empty string - -=cut - -has q{tile_list} => (isa => q{Str}, - is => q{ro}, - default => q{}, - documentation => q{string of wildcards for tiles for OLB, defaults to an empty string},); - -=head2 override_all_bustard_options - -Overrides all bustard options (including any given via other options) as a string - it is up to the user to ensure all are correct and given - -=head2 has_override_all_bustard_options - -predicate to ensure that options are available - -=cut - -has q{override_all_bustard_options} => ( - isa => q{Str}, - is => q{ro}, - predicate => q{has_override_all_bustard_options}, - documentation => q{Overrides all bustard options (including any given via other options) as a string - it is up to the user to ensure all are correct and given - i.e. only use if you know what you are doing.}, -); - =head2 repository A custom reference repository root directory. diff --git a/lib/npg_pipeline/roles/business/flag_options.pm b/lib/npg_pipeline/roles/business/flag_options.pm index 529cf5115..39e6be45c 100644 --- a/lib/npg_pipeline/roles/business/flag_options.pm +++ b/lib/npg_pipeline/roles/business/flag_options.pm @@ -40,18 +40,6 @@ has q{no_summary_link} => ( documentation => q{Turn off creating a Latest_Summary link}, ); -=head2 recalibration - -Switches recalibration on, false by default - -=cut - -has q{recalibration} => ( - isa => q{Bool}, - is => q{ro}, - documentation => q{Switches recalibration on, false by default}, -); - =head2 no_fix_config_files flag option to request that config files are not checked and fixed @@ -172,19 +160,6 @@ sub _build_local { return $self->no_bsub ? 1 : 0; } -=head2 olb - -Switches on OLB Bustard preprocessing to generate dif and bcl -files and Bustard XML Summary from cif files; false by default. - -=cut - -has q{olb} => ( - isa => q{Bool}, - is => q{ro}, - documentation => q{Switches on OLB Bustard preprocessing}, -); - =head2 spatial_filter Do we want to use the spatial_filter program? @@ -236,7 +211,7 @@ Andy Brown =head1 LICENSE AND COPYRIGHT -Copyright (C) 2015 Genome Research Ltd +Copyright (C) 2017 Genome Research Ltd This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/t/10-base.t b/t/10-base.t index 71d318372..902f0905f 100644 --- a/t/10-base.t +++ b/t/10-base.t @@ -1,6 +1,6 @@ use strict; use warnings; -use Test::More tests => 91; +use Test::More tests => 89; use Test::Exception; use File::Temp qw(tempdir tempfile); use File::Copy qw(cp); @@ -51,14 +51,7 @@ use_ok(q{npg_pipeline::base}); { my $base = npg_pipeline::base->new(); - - foreach my $config_group ( qw{ - general_values_conf - illumina_pipeline_conf - pb_cal_pipeline_conf - } ) { - isa_ok( $base->$config_group(), q{HASH}, q{$} . qq{base->$config_group} ); - } + isa_ok( $base->general_values_conf(), q{HASH}); } { diff --git a/t/10-pluggable_harold_central.t b/t/10-pluggable_harold_central.t index 5f5f0e385..6e4afc59e 100644 --- a/t/10-pluggable_harold_central.t +++ b/t/10-pluggable_harold_central.t @@ -1,6 +1,6 @@ use strict; use warnings; -use Test::More tests => 32; +use Test::More tests => 23; use Test::Exception; use Cwd qw/getcwd/; use List::MoreUtils qw/ any none /; @@ -21,7 +21,6 @@ Log::Log4perl->easy_init({layout => '%d %-5p %c - %m%n', file => join(q[/], $tdir, 'logfile'), utf8 => 1}); -local $ENV{TEST_DIR} = $tdir; local $ENV{NPG_WEBSERVICE_CACHE_DIR} = q[t/data]; local $ENV{TEST_FS_RESOURCE} = q{nfs_12}; @@ -86,38 +85,6 @@ my $runfolder_path = $util->analysis_runfolder_path(); is_deeply( $pipeline->function_order() , $expected_function_order, q{Function order correct} ); } -{ - local $ENV{CLASSPATH} = q{t/bin/software/solexa/jars}; - my $pipeline; - lives_ok { - $pipeline = $central->new( - id_run => 1234, - runfolder_path => $runfolder_path, - recalibration => 0, - no_bsub => 1, - spider => 0, - ); - } q{no croak creating new object}; - - ok( !scalar $pipeline->harold_calibration_tables(), q{no calibration tables launched} ); - ok(!$pipeline->olb, 'not olb pipeline'); - lives_ok { $pipeline->prepare() } 'prepare lives'; - ok( $pipeline->illumina_basecall_stats(), q{olb false - illumina_basecall_stats job launched} ); - my $bool = none {$_ =~ /bustard/} @{$pipeline->function_order()}; - ok( $bool, 'bustard functions are out'); - - $pipeline = $central->new( - runfolder_path => $runfolder_path, - no_bsub => 1, - olb => 1, - ); - is ($pipeline->function_list, - abs_path(getcwd() . '/data/config_files/function_list_central_olb.yml'), - 'olb function list'); - $bool = any {$_ =~ /bustard/} @{$pipeline->function_order()}; - ok( $bool, 'bustard functions are in'); -} - { my $pb; lives_ok { @@ -189,8 +156,7 @@ mkdir $rf; is ($pb->intensity_path, $rf, 'intensities path is set to runfolder'); is ($pb->basecall_path, $rf, 'basecall path is set to runfolder'); is ($pb->bam_basecall_path, join(q[/],$rf,q{BAM_basecalls_22-May}), 'bam basecall path is created'); - is ($pb->pb_cal_path, join(q[/],$pb->bam_basecall_path, 'no_cal'), 'pb_cal path set'); - is ($pb->recalibrated_path, $pb->pb_cal_path, 'recalibrated directory set'); + is ($pb->recalibrated_path, join(q[/],$pb->bam_basecall_path, 'no_cal'), 'recalibrated path set'); my $status_path = $pb->status_files_path(); is ($status_path, join(q[/],$rf,q{BAM_basecalls_22-May}, q{status}), 'status directory path'); ok(-d $status_path, 'status directory created'); diff --git a/t/20-archive_file-to_irods.t b/t/20-archive_file-to_irods.t index f05aed820..eb60b57c8 100644 --- a/t/20-archive_file-to_irods.t +++ b/t/20-archive_file-to_irods.t @@ -8,7 +8,6 @@ use_ok('npg_pipeline::archive::file::to_irods'); my $util = t::util->new(); -$ENV{TEST_DIR} = $util->temp_directory(); $ENV{TEST_FS_RESOURCE} = q{nfs_12}; local $ENV{NPG_WEBSERVICE_CACHE_DIR} = q[t/data]; local $ENV{PATH} = join q[:], q[t/bin], q[t/bin/software/solexa/bin], $ENV{PATH}; @@ -20,9 +19,8 @@ my $pb_cal = q[/Data/Intensities/Bustard1.3.4_09-07-2009_auto/PB_cal]; my $pb_cal_path = $analysis_runfolder_path . $pb_cal; sub create_analysis { - `rm -rf $tmp_dir/nfs/sf45`; `mkdir -p $analysis_runfolder_path/$pb_cal/archive`; - `mkdir $analysis_runfolder_path/Config`; + `mkdir -p $analysis_runfolder_path/Config`; `cp t/data/Recipes/Recipe_GA2_37Cycle_PE_v6.1.xml $analysis_runfolder_path/`; `cp t/data/Recipes/TileLayout.xml $analysis_runfolder_path/Config/`; `ln -s $pb_cal $analysis_runfolder_path/Latest_Summary`; diff --git a/t/20-archive_logs.t b/t/20-archive_logs.t index ff16ad1cb..b655c7168 100644 --- a/t/20-archive_logs.t +++ b/t/20-archive_logs.t @@ -21,7 +21,6 @@ my $pb_cal_path = $analysis_runfolder_path . $pb_cal; my $rfpath = '/nfs/sf45/IL2/outgoing/123456_IL2_1234'; sub create_analysis { - `rm -rf $tmp_dir/nfs/sf45`; `mkdir -p $analysis_runfolder_path/$pb_cal/archive`; `mkdir $analysis_runfolder_path/Config`; `cp t/data/Recipes/Recipe_GA2_37Cycle_PE_v6.1.xml $analysis_runfolder_path/`; diff --git a/t/25-analysis-illumina_basecall_stats.t b/t/25-analysis-illumina_basecall_stats.t new file mode 100644 index 000000000..3ee5e3610 --- /dev/null +++ b/t/25-analysis-illumina_basecall_stats.t @@ -0,0 +1,70 @@ +use strict; +use warnings; +use Test::More tests => 4; +use Test::Exception; +use Cwd; +use Log::Log4perl qw(:levels); + +use npg_tracking::util::abs_path qw(abs_path); +use t::util; + +my $util = t::util->new(); + +my $curdir = abs_path(getcwd()); +my $tdir = $util->temp_directory(); + +local $ENV{NPG_WEBSERVICE_CACHE_DIR} = $curdir . q{/t/data}; +local $ENV{TEST_FS_RESOURCE} = q{nfs_12}; + +Log::Log4perl->easy_init({layout => '%d %-5p %c - %m%n', + level => $DEBUG, + file => join(q[/], $tdir, 'logfile'), + utf8 => 1}); + +my $e = join q[/], $tdir, 'setupBclToQseq.py'; +open my $fh, '>', $e; +print "#!/usr/bin/env python\n"; +close $fh; +chmod 0755, $e; + +local $ENV{PATH} = join q[:], qq[$curdir/t/bin], $tdir, $ENV{PATH}; + +use_ok(q{npg_pipeline::analysis::illumina_basecall_stats}); + +{ + my $runfolder_path = $util->analysis_runfolder_path(); + my $bustard_rta = qq{$runfolder_path/Data/Intensities/Bustard_RTA}; + `mkdir -p $bustard_rta`; + + my $obj; + my $id_run = 1234; + my $bam_basecall_path = $runfolder_path . q{/Data/Intensities/BAM_basecalls}; + my $basecall_path = $runfolder_path . q{/Data/Intensities/BaseCalls}; + lives_ok { + $obj = npg_pipeline::analysis::illumina_basecall_stats->new({ + id_run => $id_run, + run_folder => q{123456_IL2_1234}, + runfolder_path => $runfolder_path, + timestamp => q{20091028-101635}, + verbose => 0, + bam_basecall_path => $bam_basecall_path, + no_bsub => 1, + }) + } q{create object ok}; + + my $arg_refs = { + timestamp => q{20091028-101635}, + position => 1, + job_dependencies => q{-w 'done(1234) && done(4321)'}, + }; + my $mem = 350; + my $mem_limit = npg_pipeline::lsf_job->new(memory => $mem, memory_units =>'MB')->_scale_mem_limit(); + my $expected_command = qq(bsub -q srpipeline -o $bam_basecall_path/log/basecall_stats_1234_20091028-101635.%J.out -J basecall_stats_1234_20091028-101635 -R 'select[mem>).$mem.q{] rusage[mem=}.$mem.q{,nfs_12=4]' -M} . $mem_limit . qq( -R 'span[hosts=1]' -n 4 " cd $bam_basecall_path && if [[ -f Makefile ]]; then echo Makefile already present 1>&2; else echo creating bcl2qseq Makefile 1>&2; ) . qq($tdir/setupBclToQseq.py -b $basecall_path -o $bam_basecall_path --overwrite; fi && make -j 4 Matrix Phasing && make -j 4 BustardSummary.x{s,m}l "); + is( $obj->_generate_command( $arg_refs ), $expected_command, + q{Illumina basecalls stats generation bsub command is correct} ); + + my @job_ids = $obj->generate($arg_refs); + is( scalar @job_ids, 1, q{1 job ids, generate Illumina basecall stats} ); +} + +1; diff --git a/t/25-harold_calibration_bam.t b/t/25-harold_calibration_bam.t deleted file mode 100644 index e25b03dcc..000000000 --- a/t/25-harold_calibration_bam.t +++ /dev/null @@ -1,80 +0,0 @@ -use strict; -use warnings; -use Test::More tests => 39; -use Test::Exception; -use t::util; -use Cwd; -use Log::Log4perl qw(:levels); - -use npg_tracking::util::abs_path qw(abs_path); -my $util = t::util->new(); - -my $curdir = abs_path(getcwd()); -my $repos = join q[/], $curdir, 't/data/sequence'; - -my $tdir = $util->temp_directory(); -$ENV{TEST_DIR} = $tdir; -$ENV{TEST_FS_RESOURCE} = q{nfs_12}; -$ENV{NPG_WEBSERVICE_CACHE_DIR} = $curdir . q{/t/data}; - -Log::Log4perl->easy_init({layout => '%d %-5p %c - %m%n', - level => $DEBUG, - file => join(q[/], $tdir, 'logfile'), - utf8 => 1}); - -local $ENV{PATH} = join q[:], qq[$curdir/t/bin], $tdir, $ENV{PATH}; - -my $id_run; -my $mem_units = 'MB'; - -use_ok(q{npg_pipeline::analysis::harold_calibration_bam}); - -my $runfolder_path = $util->analysis_runfolder_path(); -my $bustard_home = qq{$runfolder_path/Data/Intensities}; -my $bustard_rta = qq{$bustard_home/Bustard_RTA}; -my $gerald_rta = qq{$bustard_rta/GERALD_RTA}; -my $config_path = qq{$runfolder_path/Config}; - -sub set_staging_analysis_area { - `rm -rf /tmp/nfs/sf45`; - `mkdir -p $bustard_rta`; - `mkdir -p $config_path`; - `cp t/data/Recipes/Recipe_GA2_37Cycle_PE_v6.1.xml $runfolder_path/`; - `cp t/data/Recipes/TileLayout.xml $config_path/`; - return 1; -} - -{ - set_staging_analysis_area(); - my $harold; - $id_run = 1234; - lives_ok { - $harold = npg_pipeline::analysis::harold_calibration_bam->new({ - id_run => $id_run, - run_folder => q{123456_IL2_1234}, - runfolder_path => $runfolder_path, - timestamp => q{20091028-101635}, - verbose => 0, - repository => $repos, - bam_basecall_path => $runfolder_path . q{/Data/Intensities/BAM_basecalls}, - no_bsub => 1, - recalibration => 1, - }); - } q{create $harold object ok}; - - my $arg_refs = { - timestamp => q{20091028-101635}, - position => 1, - job_dependencies => q{-w 'done(1234) && done(4321)'}, - ref_seq => q{t/data/sequence/references/Human/default/all/bwa/someref.fa.bwt}, - }; - my $mem = 350; - my $mem_limit = npg_pipeline::lsf_job->new(memory => $mem, memory_units =>$mem_units)->_scale_mem_limit(); - my $expected_command = q(bsub -q srpipeline -o /nfs/sf45/IL2/analysis/123456_IL2_1234/Data/Intensities/BAM_basecalls/log/basecall_stats_1234_20091028-101635.%J.out -J basecall_stats_1234_20091028-101635 -R 'select[mem>).$mem.q{] rusage[mem=}.$mem.q{,nfs_12=4]' -M}.$mem_limit.q( -R 'span[hosts=1]' -n 4 " cd /nfs/sf45/IL2/analysis/123456_IL2_1234/Data/Intensities/BAM_basecalls && if [[ -f Makefile ]]; then echo Makefile already present 1>&2; else echo creating bcl2qseq Makefile 1>&2; /software/solexa/src/OLB-1.9.4/bin/setupBclToQseq.py -b /nfs/sf45/IL2/analysis/123456_IL2_1234/Data/Intensities/BaseCalls -o /nfs/sf45/IL2/analysis/123456_IL2_1234/Data/Intensities/BAM_basecalls --overwrite; fi && make -j 4 Matrix Phasing && make -j 4 BustardSummary.x{s,m}l "); - is( $util->drop_temp_part_from_paths( $harold->_generate_illumina_basecall_stats_command( $arg_refs ) ), $expected_command, q{Illumina basecalls stats generation bsub command is correct} ); - - my @job_ids = $harold->generate_illumina_basecall_stats($arg_refs); - is( scalar @job_ids, 1, q{1 job ids, generate Illumina basecall stats} ); -} - -1; From bd8f809377d886b89cc13170da307f7d825e6ea8 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Thu, 11 May 2017 12:57:22 +0100 Subject: [PATCH 04/27] tests simplification --- t/20-archive_qc.t | 14 ------ ...archive_file_generation-BamClusterCounts.t | 3 +- t/util.pm | 44 +++---------------- 3 files changed, 6 insertions(+), 55 deletions(-) diff --git a/t/20-archive_qc.t b/t/20-archive_qc.t index 83617ef15..e9e47b090 100644 --- a/t/20-archive_qc.t +++ b/t/20-archive_qc.t @@ -10,18 +10,14 @@ use File::Slurp; use Cwd; use t::util; -local $ENV{PATH} = join q[:], q[t/bin], q[t/bin/software/solexa/bin], $ENV{PATH}; - use_ok('npg_pipeline::archive::file::qc'); my $util = t::util->new(); my $tmp = $util->temp_directory(); -$ENV{TEST_DIR} = $tmp; $ENV{TEST_FS_RESOURCE} = q{nfs_12}; local $ENV{NPG_WEBSERVICE_CACHE_DIR} = q[t/data]; local $ENV{PATH} = join q[:], q[t/bin], q[t/bin/software/solexa/bin], $ENV{PATH}; -my $run_folder = $util->default_runfolder(); my $pbcal = q{/nfs/sf45/IL2/analysis/123456_IL2_1234/Data/Intensities/Bustard1.3.4_09-07-2009_auto/PB_cal}; my $recalibrated = $util->analysis_runfolder_path() . q{/Data/Intensities/Bustard1.3.4_09-07-2009_auto/PB_cal}; @@ -32,7 +28,6 @@ $arg_refs->{'required_job_completion'} = $job_dep;; { throws_ok { npg_pipeline::archive::file::qc->new( - run_folder => $run_folder, runfolder_path => $util->analysis_runfolder_path(), recalibrated_path => $recalibrated, ) @@ -43,7 +38,6 @@ $arg_refs->{'required_job_completion'} = $job_dep;; my $aqc; lives_ok { $aqc = npg_pipeline::archive::file::qc->new( - run_folder => $run_folder, runfolder_path => $util->analysis_runfolder_path(), recalibrated_path => $recalibrated, qc_to_run => q{adapter}, @@ -68,7 +62,6 @@ $arg_refs->{'required_job_completion'} = $job_dep;; my $aqc; lives_ok { $aqc = npg_pipeline::archive::file::qc->new( - run_folder => $run_folder, runfolder_path => $util->analysis_runfolder_path(), recalibrated_path => $recalibrated, qc_to_run => q{qX_yield}, @@ -90,7 +83,6 @@ $arg_refs->{'required_job_completion'} = $job_dep;; { my $aqc = npg_pipeline::archive::file::qc->new( - run_folder => $run_folder, runfolder_path => $util->analysis_runfolder_path(), recalibrated_path => $recalibrated, qc_to_run => q{qX_yield}, @@ -117,7 +109,6 @@ $arg_refs->{'required_job_completion'} = $job_dep;; my $runfolder_path = $util->analysis_runfolder_path(); my $aqc = npg_pipeline::archive::file::qc->new( - run_folder => $run_folder, runfolder_path => $runfolder_path, recalibrated_path => $recalibrated, lanes => [7], @@ -132,7 +123,6 @@ $arg_refs->{'required_job_completion'} = $job_dep;; local $ENV{NPG_WEBSERVICE_CACHE_DIR} = q[]; local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = 't/data/qc/1234_samplesheet_amended.csv'; $aqc = npg_pipeline::archive::file::qc->new( - run_folder => $run_folder, runfolder_path => $runfolder_path, recalibrated_path => $recalibrated, lanes => [8], @@ -145,7 +135,6 @@ $arg_refs->{'required_job_completion'} = $job_dep;; is(scalar@jids, 2, q{2 job ids returned}); # the lane is a pool $aqc = npg_pipeline::archive::file::qc->new( - run_folder => $run_folder, runfolder_path => $runfolder_path, recalibrated_path => $recalibrated, lanes => [8], @@ -166,7 +155,6 @@ $arg_refs->{'required_job_completion'} = $job_dep;; my $runfolder_path = $util->analysis_runfolder_path(); my $aqc = npg_pipeline::archive::file::qc->new( - run_folder => $run_folder, runfolder_path => $runfolder_path, recalibrated_path => $recalibrated, lanes => [7], @@ -184,7 +172,6 @@ $arg_refs->{'required_job_completion'} = $job_dep;; $aqc = npg_pipeline::archive::file::qc->new( id_run => 14353, - run_folder => $run_folder, runfolder_path => $util->analysis_runfolder_path(), recalibrated_path => $recalibrated, lanes => [1], @@ -276,7 +263,6 @@ $arg_refs->{'required_job_completion'} = $job_dep;; my $init = { id_run => 14043, - run_folder => $rf_name, runfolder_path => $rf_path, bam_basecall_path => $analysis_dir, archive_path => $archive_dir, diff --git a/t/35-archive_file_generation-BamClusterCounts.t b/t/35-archive_file_generation-BamClusterCounts.t index 59bdc390e..8727200a4 100644 --- a/t/35-archive_file_generation-BamClusterCounts.t +++ b/t/35-archive_file_generation-BamClusterCounts.t @@ -10,7 +10,6 @@ use_ok( q{npg_pipeline::archive::file::BamClusterCounts} ); my $util = t::util->new({}); my $dir = $util->temp_directory(); -$ENV{TEST_DIR} = $dir; local $ENV{NPG_WEBSERVICE_CACHE_DIR} = q[t/data]; local $ENV{PATH} = join q[:], q[t/bin], q[t/bin/software/solexa/bin], $ENV{PATH}; @@ -21,7 +20,7 @@ Log::Log4perl->easy_init({layout => '%d %-5p %c - %m%n', $util->create_multiplex_analysis(); my $analysis_runfolder_path = $util->analysis_runfolder_path(); -my $bam_basecall_path = $util->standard_analysis_bustard_path(); +my $bam_basecall_path = $util->standard_bam_basecall_path(); qx{cp t/data/summary_files/BustardSummary_mp.xml $bam_basecall_path/BustardSummary.xml}; my $recalibrated_path = $util->standard_analysis_recalibrated_path(); my $archive_path = $recalibrated_path . q{/archive}; diff --git a/t/util.pm b/t/util.pm index dcecff44b..9634175da 100644 --- a/t/util.pm +++ b/t/util.pm @@ -8,64 +8,31 @@ use Readonly; use Cwd qw(getcwd); use npg::api::request; -Readonly::Scalar our $TEMP_DIR => q{/tmp}; Readonly::Scalar our $NFS_STAGING_DISK => q{/nfs/sf45}; -has q{cwd} => ( - isa => q{Str}, - is => q{ro}, - lazy_build => 1, -); - -sub _build_cwd { - my ( $self ) = @_; - return getcwd(); -} - -# for getting a temporary directory which will clean up itself, and should not clash with other people attempting to run the tests has q{temp_directory} => ( isa => q{Str}, is => q{ro}, lazy_build => 1, ); sub _build_temp_directory { - my ( $self ) = @_; - - my $tempdir = tempdir( - DIR => $TEMP_DIR, - CLEANUP => 1, - ); - return $tempdir; + return tempdir(CLEANUP => 1); } ############### # path setups Readonly::Scalar our $DEFAULT_RUNFOLDER => q{123456_IL2_1234}; - Readonly::Scalar our $ANALYSIS_RUNFOLDER_PATH => $NFS_STAGING_DISK . q{/IL2/analysis/} . $DEFAULT_RUNFOLDER; -Readonly::Scalar our $OUTGOING_RUNFOLDER_PATH => $NFS_STAGING_DISK . q{/IL2/outgoing/} . $DEFAULT_RUNFOLDER; Readonly::Scalar our $BUSTARD_PATH => qq{$ANALYSIS_RUNFOLDER_PATH/Data/Intensities/Bustard1.3.4_09-07-2009_auto}; Readonly::Scalar our $RECALIBRATED_PATH => qq{$BUSTARD_PATH/PB_cal}; -sub default_runfolder { - my ( $self ) = @_; - return $DEFAULT_RUNFOLDER; -} - -sub test_run_folder { - my ($self) = @_; - my $test_run_folder_path = $self->temp_directory() . $ANALYSIS_RUNFOLDER_PATH; - my ($run_folder) = $test_run_folder_path =~ /(\d+_IL\d+_\d+)/xms; - return $run_folder; -} - sub analysis_runfolder_path { my ( $self ) = @_; return $self->temp_directory() . $ANALYSIS_RUNFOLDER_PATH; } -sub standard_analysis_bustard_path { +sub standard_bam_basecall_path { my ( $self ) = @_; return $self->temp_directory() . $BUSTARD_PATH; } @@ -169,7 +136,7 @@ sub set_rta_staging_analysis_area { } `cp t/data/Recipes/TileLayout.xml $analysis_runfolder_path/Config/`; `touch $recalibrated_path/touch_file`; - return {bustard_path => $bustard_path, recalibrated_path => $recalibrated_path, runfolder_path => $analysis_runfolder_path}; + return {recalibrated_path => $recalibrated_path, runfolder_path => $analysis_runfolder_path}; } sub remove_staging { @@ -185,17 +152,16 @@ sub remove_staging { sub drop_temp_part_from_paths { my ( $self, $path ) = @_; my $temp_dir = $self->temp_directory(); - my $cwd = $self->cwd(); + my $cwd = getcwd(); $path =~ s{\Q$temp_dir\E}{}gxms; $path =~ s{\Q$cwd/\E}{}gxms; $path =~ s{\Q$cwd\E}{}gxms; return $path; } -# ensure that the environment variables do not get passed around and that extraneous files do not get left behind +# ensure that the environment variables do not get passed around sub DEMOLISH { $ENV{ npg::api::request->cache_dir_var_name() } = q{}; - unlink 'Latest_Summary'; } 1; From 1a91fcd13fd35768cd2ea0a4016ffc062bd6f12a Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Mon, 15 May 2017 15:32:31 +0100 Subject: [PATCH 05/27] redirect stderr output to a log --- Changes | 3 +++ bin/npg_pipeline_central | 1 + bin/npg_pipeline_post_qc_review | 1 + 3 files changed, 5 insertions(+) diff --git a/Changes b/Changes index ed0719b55..caa0c00c7 100644 --- a/Changes +++ b/Changes @@ -1,6 +1,9 @@ LIST OF CHANGES --------------- + - pipeline scripts - redirect stderr output to the log to capture output from all + NPG and CPAN modules in one place + release 51.9 - p4stage2 speed-up by caching references - p4stage2 errors in getting a reference made fatal diff --git a/bin/npg_pipeline_central b/bin/npg_pipeline_central index 1b792e46b..c4ef81024 100755 --- a/bin/npg_pipeline_central +++ b/bin/npg_pipeline_central @@ -23,6 +23,7 @@ Log::Log4perl->easy_init({layout => $layout, file => $logfile, utf8 => 1}); +$p->redirect_stderr(); $p->main(); 0; diff --git a/bin/npg_pipeline_post_qc_review b/bin/npg_pipeline_post_qc_review index 1849b7de8..a7894e270 100755 --- a/bin/npg_pipeline_post_qc_review +++ b/bin/npg_pipeline_post_qc_review @@ -23,6 +23,7 @@ Log::Log4perl->easy_init({layout => $layout, file => $logfile, utf8 => 1}); +$p->redirect_stderr(); $p->main(); 0; From 204180b065834661ce9ad99f90e9f87ce25c3b83 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Wed, 17 May 2017 22:18:45 +0100 Subject: [PATCH 06/27] fix job id propagation --- lib/npg_pipeline/pluggable/harold/central.pm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/npg_pipeline/pluggable/harold/central.pm b/lib/npg_pipeline/pluggable/harold/central.pm index 44ad67d16..26b6aa45f 100644 --- a/lib/npg_pipeline/pluggable/harold/central.pm +++ b/lib/npg_pipeline/pluggable/harold/central.pm @@ -103,8 +103,9 @@ sub illumina_basecall_stats { $self->info(q{HiSeqX sequencing instrument, illumina_basecall_stats will not be run}); return (); } + my $required_job_completion = shift @args; return $self->new_with_cloned_attributes(q{npg_pipeline::analysis::illumina_basecall_stats}) - ->generate(@args); + ->generate({required_job_completion => $required_job_completion,}); } =head2 split_bam_by_tag From 09d39d7154967c54274eb571fd9add50194641a0 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Thu, 18 May 2017 10:29:33 +0100 Subject: [PATCH 07/27] fixed bug in computing a path --- lib/npg_pipeline/pluggable/harold/central.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/npg_pipeline/pluggable/harold/central.pm b/lib/npg_pipeline/pluggable/harold/central.pm index 26b6aa45f..0f042f142 100644 --- a/lib/npg_pipeline/pluggable/harold/central.pm +++ b/lib/npg_pipeline/pluggable/harold/central.pm @@ -63,7 +63,7 @@ sub _set_paths { $self->info('Intensities path: ', $self->intensity_path() ); if ( ! $self->has_basecall_path() ) { - my $bpath = join $sep, $self->intensity_path() . q{BaseCalls}; + my $bpath = join $sep, $self->intensity_path() , q{BaseCalls}; if (!-e $bpath) { $self->warn(qq{BaseCalls path $bpath not found}); $bpath = $self->runfolder_path(); From 591b8aaad3092157509ef0a80dbd5ee60beeb85d Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Thu, 18 May 2017 16:36:26 +0100 Subject: [PATCH 08/27] remove gclp-specific code and conf. files --- Changes | 2 + MANIFEST | 2 - .../function_list_central_gclp.yml | 26 ------- .../function_list_post_qc_review_gclp.yml | 11 --- .../archive/file/generation/seq_alignment.pm | 4 +- lib/npg_pipeline/archive/file/logs.pm | 21 ++---- lib/npg_pipeline/archive/file/to_irods.pm | 9 --- lib/npg_pipeline/base.pm | 13 +--- lib/npg_pipeline/daemon.pm | 1 - lib/npg_pipeline/daemon/analysis.pm | 36 +++------- lib/npg_pipeline/daemon/archival.pm | 7 +- .../pluggable/harold/post_qc_review.pm | 22 ------ lib/npg_pipeline/roles/business/base.pm | 13 ---- t/10-base.t | 18 +---- t/10-pluggable_harold_central.t | 2 +- t/10-pluggable_harold_post_qc_review.t | 8 +-- t/15-pipeline_launcher_scripts.t | 12 +--- t/20-archive_file-to_irods.t | 23 +------ t/20-archive_logs.t | 23 +------ t/50-npg_pipeline-daemon-analysis.t | 67 ++++--------------- t/50-npg_pipeline-daemon-archival.t | 5 +- t/data/study_analysis_conf/study_analysis.yml | 1 - 22 files changed, 46 insertions(+), 280 deletions(-) delete mode 100644 data/config_files/function_list_central_gclp.yml delete mode 100644 data/config_files/function_list_post_qc_review_gclp.yml diff --git a/Changes b/Changes index ed0719b55..dfbdffb12 100644 --- a/Changes +++ b/Changes @@ -1,6 +1,8 @@ LIST OF CHANGES --------------- + - remove GCLP-specific code and configuration files + release 51.9 - p4stage2 speed-up by caching references - p4stage2 errors in getting a reference made fatal diff --git a/MANIFEST b/MANIFEST index 99e6486a0..4ba5080d6 100644 --- a/MANIFEST +++ b/MANIFEST @@ -11,11 +11,9 @@ bin/script_must_be_unique_runner Build.PL Changes data/config_files/function_list_central.yml -data/config_files/function_list_central_gclp.yml data/config_files/function_list_central_olb.yml data/config_files/function_list_central_qc_run.yml data/config_files/function_list_post_qc_review.yml -data/config_files/function_list_post_qc_review_gclp.yml data/config_files/general_values.ini data/config_files/illumina_pipeline.ini data/config_files/parallelisation.yml diff --git a/data/config_files/function_list_central_gclp.yml b/data/config_files/function_list_central_gclp.yml deleted file mode 100644 index c671e8018..000000000 --- a/data/config_files/function_list_central_gclp.yml +++ /dev/null @@ -1,26 +0,0 @@ ---- -- create_archive_directory -- create_empty_fastq -- create_summary_link_analysis -- run_analysis_in_progress -- lane_analysis_in_progress -- illumina_basecall_stats -- p4_stage1_analysis -- update_ml_warehouse -- run_secondary_analysis_in_progress -- bam2fastqcheck_and_cached_fastq -- qc_qX_yield -- qc_insert_size -- qc_sequence_error -- qc_gc_fraction -- qc_ref_match -- seq_alignment -- bam_cluster_counter_check -- seqchksum_comparator -- qc_pulldown_metrics -- qc_genotype -- qc_verify_bam_id -- qc_upstream_tags -- run_analysis_complete -- update_ml_warehouse -- run_qc_review_pending diff --git a/data/config_files/function_list_post_qc_review_gclp.yml b/data/config_files/function_list_post_qc_review_gclp.yml deleted file mode 100644 index c787e846d..000000000 --- a/data/config_files/function_list_post_qc_review_gclp.yml +++ /dev/null @@ -1,11 +0,0 @@ ---- -- run_archival_in_progress -- update_ml_warehouse -- archive_to_irods -- upload_fastqcheck_to_qc_database -- upload_illumina_analysis_to_qc_database -- upload_auto_qc_to_qc_database -- run_run_archived -- run_qc_complete -- update_ml_warehouse_post_qc_complete - diff --git a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm index ebad3ba49..a8a61b2ac 100644 --- a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm +++ b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm @@ -229,8 +229,8 @@ sub _lsf_alignment_command { ## no critic (Subroutines::ProhibitExcessComplexity my $do_rna = $self->_do_rna_analysis($l); my $hs_bwa = ($self->is_paired_read ? 'bwa_aln' : 'bwa_aln_se'); - # continue to use the "aln" algorithm from bwa for these older chemistries (where read length <= 100bp) unless GCLP - my $bwa = ($self->gclp or $self->is_hiseqx_run or $self->_has_newer_flowcell or any {$_ >= $FORCE_BWAMEM_MIN_READ_CYCLES } $self->read_cycle_counts) + # continue to use the "aln" algorithm from bwa for these older chemistries (where read length <= 100bp) + my $bwa = ($self->is_hiseqx_run or $self->_has_newer_flowcell or any {$_ >= $FORCE_BWAMEM_MIN_READ_CYCLES } $self->read_cycle_counts) ? 'bwa_mem' : $hs_bwa; diff --git a/lib/npg_pipeline/archive/file/logs.pm b/lib/npg_pipeline/archive/file/logs.pm index 863ad9af1..922222e5d 100644 --- a/lib/npg_pipeline/archive/file/logs.pm +++ b/lib/npg_pipeline/archive/file/logs.pm @@ -6,16 +6,11 @@ extends qw{npg_pipeline::base}; our $VERSION = '0'; -has 'irods_root' => ( isa => 'Str', - is => 'rw', - lazy_build => 1, +has 'irods_root' => ( isa => 'Str', + is => 'rw', + default => '/seq/', ); -sub _build_irods_root { - my $self = shift; - return $self->gclp ? q(/gseq/) : q(/seq/); -} - sub submit_to_lsf { my ($self, $arg_refs) = @_; my $job_sub = $self->_generate_bsub_command($arg_refs); @@ -26,7 +21,6 @@ sub submit_to_lsf { sub _generate_bsub_command { my ($self, $arg_refs) = @_; - my $irodsinstance = $self->gclp ? q(gclp) : q(); my $id_run = $self->id_run(); my $required_job_completion = $arg_refs->{'required_job_completion'}; @@ -47,18 +41,11 @@ sub _generate_bsub_command { $bsub_command .= q{-o } . $location_of_logs . qq{/$job_name.out }; my $future_path = $self->path_in_outgoing($self->runfolder_path()); - $bsub_command .= qq{-E "[ -d '$future_path' ]" }; + $bsub_command .= qq{-E "[ -d '$future_path' ]" }; $bsub_command .= q{'}; - - if ($irodsinstance) { - $bsub_command .= q{irodsEnvFile=$}.q{HOME/.irods/.irodsEnv-} . $irodsinstance . q{-iseq-logs }; - } - $bsub_command .= $archive_script . q{ --runfolder_path } . $future_path . q{ --id_run } . $self->id_run(); - $bsub_command .= q{ --irods_root } . $self->irods_root(); - $bsub_command .= q{'}; $self->debug($bsub_command); diff --git a/lib/npg_pipeline/archive/file/to_irods.pm b/lib/npg_pipeline/archive/file/to_irods.pm index eb4d6a8a9..195449bb3 100644 --- a/lib/npg_pipeline/archive/file/to_irods.pm +++ b/lib/npg_pipeline/archive/file/to_irods.pm @@ -20,7 +20,6 @@ sub submit_to_lsf { sub _generate_bsub_command { my ($self, $arg_refs) = @_; - my $irodsinstance = $self->gclp() ? q(gclp) : q(); my $id_run = $self->id_run(); my @positions = $self->positions(); @@ -48,10 +47,6 @@ sub _generate_bsub_command { $bsub_command .= q{-o } . $location_of_logs . qq{/$job_name.out }; $bsub_command .= q{'}; - if($irodsinstance){ - $bsub_command .= q{irodsEnvFile=$}.q{HOME/.irods/.irodsEnv-} . $irodsinstance . q{-iseq }; - } - ##no critic (ValuesAndExpressions::RequireInterpolationOfMetachars) my $publish_process_log_name = q(process_publish_${LSB_JOBID}.json); ##use critic @@ -66,10 +61,6 @@ sub _generate_bsub_command { $bsub_command .= q{ --alt_process qc_run}; } - if($irodsinstance){ - $bsub_command .= q{ --collection /14mg/seq/illumina/run/} . $self->id_run(); - } - if($position_list){ $bsub_command .= $position_list } diff --git a/lib/npg_pipeline/base.pm b/lib/npg_pipeline/base.pm index fc07b377e..28de74135 100644 --- a/lib/npg_pipeline/base.pm +++ b/lib/npg_pipeline/base.pm @@ -28,7 +28,7 @@ with q{npg_tracking::illumina::run::long_info}; with q{npg_pipeline::roles::business::flag_options}; Readonly::Scalar my $DEFAULT_JOB_ID_FOR_NO_BSUB => 50; -Readonly::Array my @FLAG2FUNCTION_LIST => qw/ olb qc_run gclp /; +Readonly::Array my @FLAG2FUNCTION_LIST => qw/ olb qc_run /; $ENV{LSB_DEFAULTPROJECT} ||= q{pipeline}; @@ -254,12 +254,9 @@ has q{force_p4} => ( isa => q{Bool}, is => q{ro}, lazy_build => 1, - documentation => q{Boolean decision to force on P4 pipeline usage, default true iff GCLP}, + documentation => q{Boolean decision to force on P4 pipeline usage, default is false}, ); -sub _build_force_p4 { - my ($self) = @_; - return $self->gclp; -} + =head2 verbose @@ -420,10 +417,6 @@ around 'function_list' => sub { return $file; }; -sub _build_gclp { - my ($self) = @_; - return $self->has_function_list && $self->function_list =~ /gclp/ismx; -} =head2 function_list_conf diff --git a/lib/npg_pipeline/daemon.pm b/lib/npg_pipeline/daemon.pm index 589feabf6..b9c956010 100644 --- a/lib/npg_pipeline/daemon.pm +++ b/lib/npg_pipeline/daemon.pm @@ -194,7 +194,6 @@ sub check_lims_link { my $lims = {}; $lims->{'id'} = $batch_id; if ($fcell_row) { - $lims->{'gclp'} = $fcell_row->from_gclp; $lims->{'qc_run'} = (defined $fcell_row->purpose && $fcell_row->purpose eq 'qc') ? 1 : undef; } else { $lims->{'qc_run'} = diff --git a/lib/npg_pipeline/daemon/analysis.pm b/lib/npg_pipeline/daemon/analysis.pm index f032f0550..3ec35292e 100644 --- a/lib/npg_pipeline/daemon/analysis.pm +++ b/lib/npg_pipeline/daemon/analysis.pm @@ -16,7 +16,6 @@ Readonly::Scalar my $PIPELINE_SCRIPT => q{npg_pipeline_central}; Readonly::Scalar my $DEFAULT_JOB_PRIORITY => 50; Readonly::Scalar my $RAPID_RUN_JOB_PRIORITY => 60; Readonly::Scalar my $ANALYSIS_PENDING => q{analysis pending}; -Readonly::Scalar my $GCLP_STUDY_KEY => q{gclp_all_studies}; Readonly::Scalar my $PATH_DELIM => q{:}; sub build_pipeline_script_name { @@ -82,7 +81,7 @@ sub _process_one_run { $arg_refs->{'job_priority'} += $inherited_priority; } $arg_refs->{'rf_path'} = $self->runfolder_path4run($id_run); - $arg_refs->{'software'} = $self->_software_bundle($arg_refs->{'gclp'} ? 1 : 0, $arg_refs->{'studies'}); + $arg_refs->{'software'} = $self->_software_bundle($arg_refs->{'studies'}); $self->run_command( $id_run, $self->_generate_command( $arg_refs )); @@ -90,16 +89,13 @@ sub _process_one_run { } sub _software_bundle { - my ($self, $is_gclp_run, $studies) = @_; + my ($self, $studies) = @_; - if (!defined $is_gclp_run) { - $self->logcroak('GCLP flag is not defined'); - } if (!$studies) { $self->logcroak('Study ids are missing'); } - my @s = $is_gclp_run ? ($GCLP_STUDY_KEY) : @{$studies}; + my @s = @{$studies}; my $conf = $self->study_analysis_conf(); @@ -109,10 +105,6 @@ sub _software_bundle { } my $software_dir = @software ? $software[0] : q[]; - if ($is_gclp_run && !$software_dir) { - $self->logcroak(q{GCLP run needs explicit software bundle}); - } - if ($software_dir && !-d $software_dir) { $self->logcroak(qq{Directory '$software_dir' does not exist}); } @@ -138,21 +130,15 @@ sub _generate_command { $arg_refs->{'job_priority'}, $arg_refs->{'rf_path'}; - if ( $arg_refs->{'gclp'} ) { - $self->info('GCLP run'); - $cmd .= q{ --function_list gclp}; - } else { - $self->info('Non-GCLP run'); - if (!$arg_refs->{'id'}) { - # Batch id is needed for MiSeq runs, including qc runs - $self->logcroak(q{Lims flowcell id is missing}); - } - if ($arg_refs->{'qc_run'}) { - $cmd .= q{ --qc_run}; - $self->info('QC run'); - } - $cmd .= q{ --id_flowcell_lims } . $arg_refs->{'id'}; + if (!$arg_refs->{'id'}) { + # Batch id is needed for MiSeq runs, including qc runs + $self->logcroak(q{Lims flowcell id is missing}); + } + if ($arg_refs->{'qc_run'}) { + $cmd .= q{ --qc_run}; + $self->info('QC run'); } + $cmd .= q{ --id_flowcell_lims } . $arg_refs->{'id'}; my $path = join $PATH_DELIM, $self->local_path(), $ENV{'PATH'}; my $analysis_path_root = $arg_refs->{'software'}; diff --git a/lib/npg_pipeline/daemon/archival.pm b/lib/npg_pipeline/daemon/archival.pm index 8474bdf96..874255734 100644 --- a/lib/npg_pipeline/daemon/archival.pm +++ b/lib/npg_pipeline/daemon/archival.pm @@ -27,7 +27,7 @@ sub run { } else { if ( $self->staging_host_match($run->folder_path_glob)) { my $lims = $self->check_lims_link($run); - $self->run_command($id_run, $self->_generate_command($id_run, $lims->{'gclp'})); + $self->run_command($id_run, $self->_generate_command($id_run)); } } } catch { @@ -39,12 +39,9 @@ sub run { } sub _generate_command { - my ($self, $id_run, $gclp) = @_; - - $self->info($gclp ? 'GCLP run' : 'Non-GCLP run'); + my ($self, $id_run) = @_; my $cmd = $self->pipeline_script_name(); - $cmd = $cmd . ($gclp ? q{ --function_list gclp} : q()); $cmd = $cmd . q{ --verbose --runfolder_path } . $self->runfolder_path4run($id_run); my $path = join q[:], $self->local_path(), $ENV{PATH}; my $prefix = $self->daemon_conf()->{'command_prefix'}; diff --git a/lib/npg_pipeline/pluggable/harold/post_qc_review.pm b/lib/npg_pipeline/pluggable/harold/post_qc_review.pm index 4b71d58c4..ed6e86ba4 100644 --- a/lib/npg_pipeline/pluggable/harold/post_qc_review.pm +++ b/lib/npg_pipeline/pluggable/harold/post_qc_review.pm @@ -26,28 +26,6 @@ Pluggable pipeline module for the post_qc_review pipeline =head1 SUBROUTINES/METHODS -=head2 archive_to_irods - - upload all archival files to irods (used by GCLP only) - -=cut - -sub archive_to_irods { - my ($self, @args) = @_; - - if ($self->no_irods_archival) { - $self->warn(q{Archival to iRODS is switched off.}); - return (); - } - my $required_job_completion = shift @args; - my $ats = $self->new_with_cloned_attributes(q{npg_pipeline::archive::file::to_irods}); - my @job_ids = $ats->submit_to_lsf({ - required_job_completion => $required_job_completion, - }); - - return @job_ids; -} - =head2 archive_to_irods_samplesheet upload all archival files using the samplesheet LIMS driver diff --git a/lib/npg_pipeline/roles/business/base.pm b/lib/npg_pipeline/roles/business/base.pm index 3db3a34de..84b8be0ec 100644 --- a/lib/npg_pipeline/roles/business/base.pm +++ b/lib/npg_pipeline/roles/business/base.pm @@ -215,19 +215,6 @@ sub _build_is_hiseqx_run { return $self->run->instrument->name =~ /\AH[XF]/xms; } -=head2 gclp - -Boolean describing whether this analysis is GCLP - -=cut - -has q{gclp} => ( - isa => q{Bool}, - is => q{ro}, - lazy_build => 1, - documentation => q{Boolean describing whether this analysis is GCLP with a default based on the function_list if set}, -); - =head2 positions An array of lane positions for this submission. diff --git a/t/10-base.t b/t/10-base.t index 71d318372..fd9a2b833 100644 --- a/t/10-base.t +++ b/t/10-base.t @@ -1,6 +1,6 @@ use strict; use warnings; -use Test::More tests => 91; +use Test::More tests => 84; use Test::Exception; use File::Temp qw(tempdir tempfile); use File::Copy qw(cp); @@ -63,7 +63,6 @@ use_ok(q{npg_pipeline::base}); { my $base = npg_pipeline::base->new(); - ok( !$base->gclp, 'function list not set and correctly defaults as not GCLP'); my $path = "${config_dir}/function_list_base.yml"; @@ -79,14 +78,6 @@ use_ok(q{npg_pipeline::base}); $path =~ s/function_list_base/function_list_central/; $base = npg_pipeline::base->new(function_list => $path); is( $base->function_list, $path, 'function list path as given'); - ok(!$base->gclp, 'function list set and correctly identified as not GCLP'); - isa_ok( $base->function_list_conf(), q{ARRAY}, 'function list is read into an array'); - - my $gpath=$path; - $gpath =~ s/function_list_central/function_list_central_gclp/; - $base = npg_pipeline::base->new(function_list => $gpath); - is( $base->function_list, $gpath, 'GCLP function list path as given'); - ok( $base->gclp, 'function list set and correctly identified as GCLP'); isa_ok( $base->function_list_conf(), q{ARRAY}, 'function list is read into an array'); $base = npg_pipeline::base->new(function_list => 'data/config_files/function_list_central.yml'); @@ -251,13 +242,6 @@ package main; my $fl = "${config_dir}/function_list_central_qc_run.yml"; is( $base->function_list, $fl, 'qc function list'); - $base = mytest::central->new(id_flowcell_lims => 3456, gclp => 1); - my $gfl = "${config_dir}/function_list_central_gclp.yml"; - is( $base->function_list, $gfl, 'gclp function list'); - - $base = mytest::central->new(id_flowcell_lims => 3456, function_list => 'gclp'); - is( $base->function_list, $gfl, 'gclp function list'); - $base = npg_pipeline::base->new(id_flowcell_lims => '3980331130775'); my $path = "${config_dir}/function_list_base_qc_run.yml"; throws_ok { $base->function_list } diff --git a/t/10-pluggable_harold_central.t b/t/10-pluggable_harold_central.t index 5f5f0e385..c3f24b5ce 100644 --- a/t/10-pluggable_harold_central.t +++ b/t/10-pluggable_harold_central.t @@ -137,7 +137,7 @@ my $runfolder_path = $util->analysis_runfolder_path(); my $pb; $util->set_staging_analysis_area(); my $init = { - function_order => [qw{illumina2bam qc_qX_yield qc_adapter update_warehouse qc_insert_size archive_to_irods}], + function_order => [qw{illumina2bam qc_qX_yield qc_adapter update_warehouse qc_insert_size}], lanes => [4], runfolder_path => $runfolder_path, no_bsub => 1, diff --git a/t/10-pluggable_harold_post_qc_review.t b/t/10-pluggable_harold_post_qc_review.t index 40948cea7..7ae362e67 100644 --- a/t/10-pluggable_harold_post_qc_review.t +++ b/t/10-pluggable_harold_post_qc_review.t @@ -106,8 +106,8 @@ use_ok('npg_pipeline::pluggable::harold::post_qc_review'); no_irods_archival => 1, no_warehouse_update => 1, ); - ok(!($p->archive_to_irods() || $p->archive_to_irods_samplesheet() || - $p->archive_to_irods_ml_warehouse()), 'archival to irods switched off'); + ok(!($p->archive_to_irods_samplesheet() || $p->archive_to_irods_ml_warehouse()), + 'archival to irods switched off'); ok(!$p->update_warehouse(), 'update to warehouse switched off'); } @@ -116,7 +116,7 @@ use_ok('npg_pipeline::pluggable::harold::post_qc_review'); runfolder_path => $runfolder_path, local => 1, ); - ok(! ($p->archive_to_irods() || $p->archive_to_irods_samplesheet() || + ok(! ($p->archive_to_irods_samplesheet() || $p->archive_to_irods_ml_warehouse()), 'archival to irods switched off'); ok(!$p->update_warehouse(), 'update to warehouse switched off'); is($p->no_summary_link,1, 'summary_link switched off'); @@ -128,7 +128,7 @@ use_ok('npg_pipeline::pluggable::harold::post_qc_review'); local => 1, no_warehouse_update => 0, ); - ok(!($p->archive_to_irods() || $p->archive_to_irods_samplesheet() || + ok(!($p->archive_to_irods_samplesheet() || $p->archive_to_irods_ml_warehouse()), 'archival to irods switched off'); ok($p->update_warehouse(), 'update to warehouse switched on'); is($p->no_summary_link,1, 'summary_link switched off'); diff --git a/t/15-pipeline_launcher_scripts.t b/t/15-pipeline_launcher_scripts.t index b2bfce25e..35bb47ab3 100644 --- a/t/15-pipeline_launcher_scripts.t +++ b/t/15-pipeline_launcher_scripts.t @@ -1,7 +1,7 @@ use strict; use warnings; use English qw{-no_match_vars}; -use Test::More tests => 12; +use Test::More tests => 8; use Test::Exception; use t::util; use Cwd; @@ -57,16 +57,6 @@ my $bin = $curdir . q[/bin]; q{ran bin/npg_pipeline_post_qc_review}; ok(!$CHILD_ERROR, qq{Return code of $CHILD_ERROR}); - lives_ok { qx{ - $bin/npg_pipeline_post_qc_review --runfolder_path $tmp_dir/nfs/sf45/IL2/analysis/123456_IL2_1234 --gclp}; } - q{ran bin/npg_pipeline_post_qc_review with gclp flag}; - ok(!$CHILD_ERROR, qq{Return code of $CHILD_ERROR}); - - lives_ok { qx{ - $bin/npg_pipeline_post_qc_review --runfolder_path $tmp_dir/nfs/sf45/IL2/analysis/123456_IL2_1234 --function_list gclp}; } - q{ran bin/npg_pipeline_post_qc_review with gclp function list}; - ok(!$CHILD_ERROR, qq{Return code of $CHILD_ERROR}); - lives_ok { qx{ $bin/npg_pipeline_post_qc_review --runfolder_path $tmp_dir/nfs/sf45/IL2/analysis/123456_IL2_1234 --function_list some}; } q{ran bin/npg_pipeline_post_qc_review with non-exisitng function list}; diff --git a/t/20-archive_file-to_irods.t b/t/20-archive_file-to_irods.t index f05aed820..d3bc50bd2 100644 --- a/t/20-archive_file-to_irods.t +++ b/t/20-archive_file-to_irods.t @@ -1,6 +1,6 @@ use strict; use warnings; -use Test::More tests => 15; +use Test::More tests => 13; use Test::Exception; use t::util; @@ -117,26 +117,5 @@ sub create_analysis { is( $bsub_command, $expected_command, q{generated bsub command is correct} ); } -{ - my $bam_irods; - lives_ok { $bam_irods = npg_pipeline::archive::file::to_irods->new( - function_list => q{post_qc_review_gclp}, - run_folder => q{123456_IL2_1234}, - runfolder_path => $analysis_runfolder_path, - id_flowcell_lims => q{1023456789111}, - recalibrated_path => $pb_cal_path, - timestamp => q{20090709-123456}, - verbose => 0, - ); } q{created with run_folder ok}; - - my $arg_refs = { - required_job_completion => q{-w'done(123) && done(321)'}, - }; - my $archive_path = "$pb_cal_path/archive"; - my $bsub_command = $bam_irods ->_generate_bsub_command($arg_refs); - my $expected_command = qq[bsub -q lowload -w'done(123) && done(321)' -J npg_publish_illumina_run.pl_1234_20090709-123456 -R 'rusage[nfs_12=1,seq_irods=15]' -E 'script_must_be_unique_runner -job_name="npg_publish_illumina_run.pl_1234"' -o $pb_cal_path/log/npg_publish_illumina_run.pl_1234_20090709-123456.out 'irodsEnvFile=\$HOME/.irods/.irodsEnv-gclp-iseq npg_publish_illumina_run.pl --archive_path $archive_path --runfolder_path $analysis_runfolder_path --restart_file ${archive_path}/process_publish_\${LSB_JOBID}.json --max_errors 10 --alt_process qc_run --collection /14mg/seq/illumina/run/1234']; - is( $bsub_command, $expected_command, q{generated bsub command is correct} ); -} - 1; __END__ diff --git a/t/20-archive_logs.t b/t/20-archive_logs.t index ff16ad1cb..2efb55db7 100644 --- a/t/20-archive_logs.t +++ b/t/20-archive_logs.t @@ -1,6 +1,6 @@ use strict; use warnings; -use Test::More tests => 13; +use Test::More tests => 11; use Test::Exception; use t::util; @@ -89,26 +89,5 @@ sub create_analysis { is( $bsub_command, $expected_command, q{generated bsub command is correct} ); } -{ - my $bam_irods; - - lives_ok { $bam_irods = npg_pipeline::archive::file::logs->new( - function_list => q{post_qc_review_gclp}, - run_folder => q{123456_IL2_1234}, - runfolder_path => $analysis_runfolder_path, - recalibrated_path => $pb_cal_path, - timestamp => q{20090709-123456}, - verbose => 0, - ); } q{created with run_folder ok}; - - my $arg_refs = { - required_job_completion => q{-w'done(123) && done(321)'}, - }; - - my $bsub_command = $util->drop_temp_part_from_paths( $bam_irods ->_generate_bsub_command($arg_refs) ); - my $expected_command = qq{bsub -q lowload -w'done(123) && done(321)' -J npg_irods_log_loader.pl_1234_20090709-123456 -R 'rusage[nfs_12=1,seq_irods=15]' -o ${rfpath}/Data/Intensities/Bustard1.3.4_09-07-2009_auto/PB_cal/log/npg_irods_log_loader.pl_1234_20090709-123456.out -E "[ -d '$rfpath' ]" 'irodsEnvFile=\$HOME/.irods/.irodsEnv-gclp-iseq-logs npg_irods_log_loader.pl --runfolder_path $rfpath --id_run 1234 --irods_root /gseq/'}; - is( $bsub_command, $expected_command, q{generated bsub command is correct} ); -} - 1; __END__ diff --git a/t/50-npg_pipeline-daemon-analysis.t b/t/50-npg_pipeline-daemon-analysis.t index 0909e9e1d..35f389f52 100644 --- a/t/50-npg_pipeline-daemon-analysis.t +++ b/t/50-npg_pipeline-daemon-analysis.t @@ -57,7 +57,7 @@ sub runfolder_path4run { return '/some/path' }; package main; subtest 'staging host matching' => sub { - plan tests => 26; + plan tests => 24; my $path49 = '/{export,nfs}/sf49/ILorHSany_sf49/*/'; my $path32 = '/{export,nfs}/sf32/ILorHSany_sf32/*/'; @@ -75,30 +75,13 @@ subtest 'staging host matching' => sub { throws_ok { $runner->_generate_command( { rf_path => $rf_path, job_priority => 50, - }) } qr/Lims flowcell id is missing/, - 'non-gclp run and lims flowcell id is missing - error'; + }) } qr/Lims flowcell id is missing/, 'lims flowcell id is missing - error'; like($runner->_generate_command( { rf_path => $rf_path, job_priority => 50, id => 1480, - } ), qr/$command_start $rf_path/, - q{generated command is correct}); - - like($runner->_generate_command( { - rf_path => $rf_path, - job_priority => 50, - gclp => 1, - } ), qr/$command_start $rf_path --function_list gclp/, - q{generated command is correct}); - - like($runner->_generate_command( { - rf_path => $rf_path, - job_priority => 50, - gclp => 1, - id => 22, - }), qr/$command_start $rf_path --function_list gclp/, - q{generated command is correct}); + } ), qr/$command_start $rf_path/, q{generated command is correct}); ok($runner->green_host,'running on a host in a green datacentre'); ok($runner->staging_host_match($path49), 'staging matches host'); @@ -181,7 +164,7 @@ subtest 'failure to retrive lims data' => sub { }; subtest 'retrieve lims data' => sub { - plan tests => 28; + plan tests => 19; my $runner; lives_ok { $runner = $package->new( @@ -206,7 +189,6 @@ subtest 'retrieve lims data' => sub { my $lims_data = $runner->check_lims_link($test_run); is ($lims_data->{'id'}, '1234567891234', 'lims id'); is ($lims_data->{'qc_run'}, 1, 'is qc run'); - ok(!$lims_data->{'gclp'}, 'gclp flag is false'); is_deeply($lims_data->{'studies'}, [], 'studies not retrieved'); $test_run->update({'batch_id' => 55}); @@ -227,14 +209,6 @@ subtest 'retrieve lims data' => sub { $fc_row->update({'id_lims' => 'SSCAPE'}); $lims_data = $runner->check_lims_link($test_run); is ($lims_data->{'id'}, undef, 'lims id is undefined'); - ok(!$lims_data->{'gclp'}, 'gclp flag is false'); - is ($lims_data->{'qc_run'}, undef, 'qc run flag is not set'); - is(join(q[:], @{$lims_data->{'studies'}}), '2967', 'studies retrieved'); - - $fc_row->update({'id_lims' => 'C_GCLP'}); - $lims_data = $runner->check_lims_link($test_run); - is ($lims_data->{'id'}, undef, 'lims id is undefined'); - is ($lims_data->{'gclp'}, 1, 'gclp flag is set to true'); is ($lims_data->{'qc_run'}, undef, 'qc run flag is not set'); is(join(q[:], @{$lims_data->{'studies'}}), '2967', 'studies retrieved'); @@ -242,24 +216,19 @@ subtest 'retrieve lims data' => sub { $fc_row->update({'id_flowcell_lims' => 55}); $lims_data = $runner->check_lims_link($test_run); is ($lims_data->{'id'}, 55, 'lims id is set'); - is ($lims_data->{'gclp'}, 1, 'gclp flag is set to true'); is ($lims_data->{'qc_run'}, undef, 'qc run flag is not set'); is(join(q[:], @{$lims_data->{'studies'}}), '2967', 'studies retrieved'); $fc_row->update({'id_lims' => 'SSCAPE'}); $lims_data = $runner->check_lims_link($test_run); is ($lims_data->{'id'}, 55, 'lims id is set'); - ok (!$lims_data->{'gclp'}, 'gclp flag is false'); is ($lims_data->{'qc_run'}, undef, 'qc run flag is not set'); $fc_row->update({'id_lims' => 'SSCAPE'}); $fc_row->update({'purpose' => 'qc'}); $lims_data = $runner->check_lims_link($test_run); is ($lims_data->{'id'}, 55, 'lims id is set'); - ok (!$lims_data->{'gclp'}, 'gclp flag is false'); is ($lims_data->{'qc_run'}, 1, 'qc run flag is set'); - - }; subtest 'generate command' => sub { @@ -291,7 +260,7 @@ subtest 'generate command' => sub { }; subtest 'retrieve study analysis configuration' => sub { - plan tests => 6; + plan tests => 5; my $d = npg_pipeline::daemon::analysis->new(); isa_ok( $d->daemon_conf(), q{HASH}, q{$} . qq{base->daemon_conf} ); @@ -303,13 +272,12 @@ subtest 'retrieve study analysis configuration' => sub { $d = npg_pipeline::daemon::analysis->new(conf_path => 't/data/study_analysis_conf'); my $conf = $d->study_analysis_conf(); isa_ok($conf, 'HASH', 'HASH of study configurations'); - is($conf->{'gclp_all_studies'}, 't/data', 'dated directory name for gclp runs'); is($conf->{'12345'}, 't', 'dated directory name for study 12345'); is($conf->{'XY345'}, '/some/dir', 'dated directory name for study 12345'); }; subtest 'get software bundle' => sub { - plan tests => 11; + plan tests => 7; my $conf_file = join q[/], $temp_directory, 'study_conf.yml'; open my $fh, '>', $conf_file; @@ -325,19 +293,10 @@ subtest 'get software bundle' => sub { ); throws_ok { $runner->_software_bundle() } - qr/GCLP flag is not defined/, - 'error if gclp flag is not defined'; - throws_ok { $runner->_software_bundle(1) } qr/Study ids are missing/, 'error if no study array is given'; - lives_ok { $runner->_software_bundle(0, []) } + lives_ok { $runner->_software_bundle([]) } 'no error if study array is empty'; - throws_ok { $runner->_software_bundle(1, []) } - qr/GCLP run needs explicit software bundle/, - 'GCLP run: no study info - error'; - throws_ok { $runner->_software_bundle(1, [qw/3/]) } - qr/GCLP run needs explicit software bundle/, - 'no GCLP conf - error'; $runner = $package->new( pipeline_script_name => '/bin/true', @@ -346,21 +305,19 @@ subtest 'get software bundle' => sub { conf_path => 't/data/study_analysis_conf', ); - throws_ok { $runner->_software_bundle(0, [qw/3 12345/]) } + throws_ok { $runner->_software_bundle([qw/3 12345/]) } qr/Multiple software bundles for a run/, 'Software and no software - error'; - throws_ok { $runner->_software_bundle(0, [qw/12345 12346/]) } + throws_ok { $runner->_software_bundle([qw/12345 12346/]) } qr/Multiple software bundles for a run/, 'Multiple software bundles - error'; - throws_ok { $runner->_software_bundle(0, [qw/XY345/]) } + throws_ok { $runner->_software_bundle([qw/XY345/]) } qr/Directory \'\/some\/dir\' does not exist/, 'directory does not exist - error'; - is($runner->_software_bundle(0, []), q[], 'no study info - no path'); - is($runner->_software_bundle(0, [qw/12346 12347/]), + is($runner->_software_bundle([]), q[], 'no study info - no path'); + is($runner->_software_bundle([qw/12346 12347/]), "${current_dir}/t/data/cache", 'study analysis directory retrieved'); - is($runner->_software_bundle(1, [qw/12346 12347/]), - "${current_dir}/t/data", 'GCLP study analysis directory retrieved'); }; subtest 'mock continious running' => sub { diff --git a/t/50-npg_pipeline-daemon-archival.t b/t/50-npg_pipeline-daemon-archival.t index 9ad7507f3..822ffa066 100644 --- a/t/50-npg_pipeline-daemon-archival.t +++ b/t/50-npg_pipeline-daemon-archival.t @@ -1,6 +1,6 @@ use strict; use warnings; -use Test::More tests => 19; +use Test::More tests => 18; use Test::Exception; use Cwd; use List::MoreUtils qw{any}; @@ -58,9 +58,6 @@ package main; like($runner->_generate_command(1234), qr/npg_pipeline_post_qc_review --verbose --runfolder_path \/some\/path/, q{generated command is correct}); - like($runner->_generate_command(1234,1), - qr/npg_pipeline_post_qc_review --function_list gclp --verbose --runfolder_path \/some\/path/, - q{generated gclp command is correct}); ok(!$runner->green_host, 'host is not in green datacentre'); $schema->resultset(q[Run])->find(2)->update_run_status('archival pending', 'pipeline'); diff --git a/t/data/study_analysis_conf/study_analysis.yml b/t/data/study_analysis_conf/study_analysis.yml index 7a9141cb6..43d062e5f 100644 --- a/t/data/study_analysis_conf/study_analysis.yml +++ b/t/data/study_analysis_conf/study_analysis.yml @@ -1,7 +1,6 @@ # Study analysis environment # for gseq farm --- -gclp_all_studies: t/data 12345: t 12346: t/data/cache 12347: t/data/cache From 1559509fe1b36e204a2da8630688f3fb1b78c908 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Thu, 18 May 2017 20:12:26 +0100 Subject: [PATCH 09/27] deleted illumina2bam function --- Changes | 2 + MANIFEST | 3 - data/config_files/parallelisation.yml | 1 - .../archive/file/generation/illumina2bam.pm | 390 ------------------ .../file/generation/seqchksum_comparator.pm | 6 +- lib/npg_pipeline/pluggable/harold.pm | 13 - t/10-pluggable_harold_central.t | 23 +- t/20-archive_file_generation-illumina2bam.t | 299 -------------- ...ive-file-generation-seqchksum_comparator.t | 3 +- .../solexa/bin/aligners/illumina2bam/current | 1 - t/data/illumina2bam/1234_samplesheet.csv | 26 -- t/data/illumina2bam/npg/instrument/21.xml | 15 - t/data/illumina2bam/npg/run/1234.xml | 42 -- 13 files changed, 13 insertions(+), 811 deletions(-) delete mode 100644 lib/npg_pipeline/archive/file/generation/illumina2bam.pm delete mode 100644 t/20-archive_file_generation-illumina2bam.t delete mode 120000 t/bin/software/solexa/bin/aligners/illumina2bam/current delete mode 100644 t/data/illumina2bam/1234_samplesheet.csv delete mode 100644 t/data/illumina2bam/npg/instrument/21.xml delete mode 100644 t/data/illumina2bam/npg/run/1234.xml diff --git a/Changes b/Changes index ed0719b55..95ef8d127 100644 --- a/Changes +++ b/Changes @@ -1,6 +1,8 @@ LIST OF CHANGES --------------- + - illumina2bam function removed + release 51.9 - p4stage2 speed-up by caching references - p4stage2 errors in getting a reference made fatal diff --git a/MANIFEST b/MANIFEST index 99e6486a0..ca7f527e7 100644 --- a/MANIFEST +++ b/MANIFEST @@ -27,7 +27,6 @@ lib/npg_pipeline/analysis/harold_calibration_bam.pm lib/npg_pipeline/analysis/split_bam_by_tag.pm lib/npg_pipeline/archive/file/BamClusterCounts.pm lib/npg_pipeline/archive/file/generation.pm -lib/npg_pipeline/archive/file/generation/illumina2bam.pm lib/npg_pipeline/archive/file/generation/seq_alignment.pm lib/npg_pipeline/archive/file/qc.pm lib/npg_pipeline/archive/file/generation/seqchksum_comparator.pm @@ -75,7 +74,6 @@ t/15-pipeline_launcher_scripts.t t/20-archive-fastqcheck.t t/20-archive_auto_qc_to_db.t t/20-archive_file-to_irods.t -t/20-archive_file_generation-illumina2bam.t t/20-archive_file_generation-seq_alignment.t t/20-archive_folder_generation.t t/20-archive_illumina_analysis.t @@ -113,7 +111,6 @@ t/bin/software/solexa/jars/Illumina2bam.jar t/bin/software/solexa/jars/SplitBamByReadGroup.jar t/bin/software/solexa/bin/qc t/data/samplesheet_1234.csv -t/data/illumina2bam/1234_samplesheet.csv t/data/qc/1234_samplesheet_amended.csv t/data/qc/samplesheet_14353.csv t/data/qc/samplesheet_14043.csv diff --git a/data/config_files/parallelisation.yml b/data/config_files/parallelisation.yml index a023b001f..3a9f96f5b 100644 --- a/data/config_files/parallelisation.yml +++ b/data/config_files/parallelisation.yml @@ -1,5 +1,4 @@ a: - illumina2bam: 1 illumina_basecall_stats: 1 p4_stage1_analysis: 1 b: diff --git a/lib/npg_pipeline/archive/file/generation/illumina2bam.pm b/lib/npg_pipeline/archive/file/generation/illumina2bam.pm deleted file mode 100644 index 0eb34cb84..000000000 --- a/lib/npg_pipeline/archive/file/generation/illumina2bam.pm +++ /dev/null @@ -1,390 +0,0 @@ -package npg_pipeline::archive::file::generation::illumina2bam; - -use Moose; -use Carp; -use English qw{-no_match_vars}; -use Readonly; -use Perl6::Slurp; - -use st::api::lims; -use npg_common::roles::software_location; -use npg_pipeline::lsf_job; -use npg_pipeline::analysis::create_lane_tag_file; - -extends q{npg_pipeline::base}; -with q{npg_tracking::illumina::run::long_info}; - -our $VERSION = '0'; - -Readonly::Scalar our $DEFAULT_RESOURCES => npg_pipeline::lsf_job->new(memory => 2500)->memory_spec(); -Readonly::Scalar our $JAVA_CMD => q{java}; - -sub generate { - my ( $self, $arg_refs ) = @_; - - $self->info(q{Creating Jobs to run illumina2bam for run} . $self->id_run ); - - my $alims = $self->lims->children_ia; - my @job_ids; - for my $p ($self->positions()){ - my $tag_list_file; - if ($self->is_multiplexed_lane($p)) { - $self->info(qq{Lane $p is indexed, generating tag list}); - my $index_length = $self->_get_index_length( $alims->{$p} ); - $tag_list_file = npg_pipeline::analysis::create_lane_tag_file->new( - location => $self->metadata_cache_dir, - lane_lims => $alims->{$p}, - index_length => $index_length, - hiseqx => $self->is_hiseqx_run, - verbose => $self->verbose - )->generate(); - } - my $bsub_cmd = $self->_generate_bsub_commands( $arg_refs, $alims->{$p}, $tag_list_file); - push @job_ids, $self->submit_bsub_command( $bsub_cmd ); - } - - return @job_ids; -} - -foreach my $jar_name (qw/Illumina2bam BamAdapterFinder BamIndexDecoder/) { - has q{_}.$jar_name.q{_jar} => ( - isa => q{NpgCommonResolvedPathJarFile}, - is => q{ro}, - coerce => 1, - default => $jar_name.q{.jar}, - ); -} - -has q{_illumina2bam_cmd} => (isa => q{Str}, - is => q{ro}, - lazy_build => 1, - ); - -sub _build__illumina2bam_cmd { - my $self = shift; - return $JAVA_CMD . q{ -Xmx1024m} . q{ -jar } . $self->_Illumina2bam_jar(); -} - -has q{_bam_adapter_detect_cmd} => (isa => q{Str}, - is => q{ro}, - lazy_build => 1, - ); -sub _build__bam_adapter_detect_cmd { - return q(bamadapterfind); -} - -has q{_bam_index_decode_cmd} => (isa => q{Str}, - is => q{ro}, - lazy_build => 1, - ); -sub _build__bam_index_decode_cmd { - my $self = shift; - return $JAVA_CMD . q{ -Xmx1024m} - . q{ -jar } . $self->_BamIndexDecoder_jar() - . q{ VALIDATION_STRINGENCY=SILENT} -} - -sub _get_index_length { - my ( $self, $lane_lims ) = @_; - - my $index_length = $self->index_length; - - if ($lane_lims->inline_index_exists) { - my $index_start = $lane_lims->inline_index_start; - my $index_end = $lane_lims->inline_index_end; - if ($index_start && $index_end) { - $index_length = $index_end - $index_start + 1; - } - } - - return $index_length; -} - -sub _generate_bsub_commands { - my ( $self, $arg_refs, $lane_lims, $tag_list_file ) = @_; - - my $position = $lane_lims->position; - my $required_job_completion = $arg_refs->{required_job_completion}; - - my $id_run = $self->id_run(); - my $intensity_path = $self->intensity_path(); - my $bam_basecall_path = $self->bam_basecall_path(); - - my $full_bam_name = $bam_basecall_path . q{/}. $id_run . q{_} .$position. q{.bam}; - - my $job_name = q{illumina2bam_} . $id_run . q{_} . $position. q{_} . $self->timestamp(); - - my $log_folder = $self->make_log_dir( $bam_basecall_path ); - my $outfile = $log_folder . q{/} . $job_name . q{.%J.out}; - - $job_name = q{'} . $job_name . q{'}; - - my $last_tool_picard_based = 1; - my $job_command = $self->_illumina2bam_cmd() - . q{ I=} . $intensity_path - . q{ L=} . $position - . q{ B=} . $self->basecall_path() - . q{ RG=}. $id_run.q{_}.$position - . q{ PU=}. join q[_], $self->run_folder, $position; - - my $st_names = $self->_get_library_sample_study_names($lane_lims); - - if($st_names->{library}){ - $job_command .= q{ LIBRARY_NAME="} . $st_names->{library} . q{"}; - } - if($st_names->{sample}){ - $job_command .= q{ SAMPLE_ALIAS="} . $st_names->{sample} . q{"}; - } - if($st_names->{study}){ - my $study = $st_names->{study}; - $study =~ s/"/\\"/gmxs; - $job_command .= q{ STUDY_NAME="} . $study . q{"}; - } - if ($self->_extra_tradis_transposon_read) { - $job_command .= ' SEC_BC_SEQ=BC SEC_BC_QUAL=QT BC_SEQ=tr BC_QUAL=tq'; - } - - if ($lane_lims->inline_index_exists) { - my $index_start = $lane_lims->inline_index_start; - my $index_end = $lane_lims->inline_index_end; - my $index_read = $lane_lims->inline_index_read; - - if ($index_start && $index_end && $index_read) { - my($first, $final) = $self->read1_cycle_range(); - if ($index_read == 1) { - $index_start += ($first-1); - $index_end += ($first-1); - $job_command .= qq{ FIRST_INDEX=$index_start FINAL_INDEX=$index_end FIRST_INDEX=$first FINAL_INDEX=}.($index_start-1); - $job_command .= q{ SEC_BC_SEQ=br SEC_BC_QUAL=qr BC_READ=1 SEC_BC_READ=1}; - $job_command .= q{ FIRST=}.($index_end+1).qq{ FINAL=$final}; - if ($self->is_paired_read()) { - ($first, $final) = $self->read2_cycle_range(); - $job_command .= qq{ FIRST=$first FINAL=$final}; - } - } elsif ($index_read == 2) { - $self->is_paired_read() or $self->logcroak(q{Inline index read (2) does not exist}); - $job_command .= qq{ FIRST=$first FINAL=$final}; - ($first, $final) = $self->read2_cycle_range(); - $index_start += ($first-1); - $index_end += ($first-1); - $job_command .= qq{ FIRST_INDEX=$index_start FINAL_INDEX=$index_end FIRST_INDEX=$first FINAL_INDEX=}.($index_start-1); - $job_command .= q{ SEC_BC_SEQ=br SEC_BC_QUAL=qr BC_READ=2 SEC_BC_READ=2}; - $job_command .= q{ FIRST=}.($index_end+1).qq{ FINAL=$final}; - } else { - $self->logcroak("Invalid inline index read ($index_read)"); - } - } - } - - ### TODO: can new bamadapterfind cope without these exclusions? - if ( $self->is_paired_read() && !$lane_lims->inline_index_exists){ - # omit BamAdapterFinder for inline index - my @range1 = $self->read1_cycle_range(); - my $read1_length = $range1[1] - $range1[0] + 1; - my @range2 = $self->read2_cycle_range(); - my $read2_length = $range2[1] - $range2[0] + 1; - # omit BamAdapterFinder if reads are different lengths - if( $read1_length == $read2_length ){ - $job_command .= q{ OUTPUT=} . q{/dev/stdout} . q{ COMPRESSION_LEVEL=0}; - $job_command .= q{ | } . $self->_bam_adapter_detect_cmd(); - $last_tool_picard_based = 0; - } - } - - if( $self->is_multiplexed_lane($position) ){ - if (!$tag_list_file) { - $self->logcroak('Tag list file path should be defined'); - } - $job_command .= ($last_tool_picard_based - ? q{ OUTPUT=} . q{/dev/stdout} . q{ COMPRESSION_LEVEL=0} - : q{ level=0}); - $job_command .= q{ | } - . $self->_bam_index_decode_cmd() - . q{ I=/dev/stdin } - . q{ BARCODE_FILE=} . $tag_list_file - . q{ METRICS_FILE=} . $full_bam_name . q{.tag_decode.metrics}; - my $num_of_plexes_per_lane = $self->_get_number_of_plexes_excluding_control($lane_lims); - if( $num_of_plexes_per_lane == 1 ){ - $job_command .= q{ MAX_NO_CALLS=} . $self->general_values_conf()->{single_plex_decode_max_no_calls}; - $job_command .= q{ CONVERT_LOW_QUALITY_TO_NO_CALL=true}; - } - $last_tool_picard_based = 1; - } - - $job_command .= ($last_tool_picard_based ? q{ CREATE_MD5_FILE=false OUTPUT=/dev/stdout} : q{ md5=1 md5filename=}.$full_bam_name.q{.md5} ); - #TODO - shift this seqchksum earlier before any compression.... - #TODO - shift this seqchksum as early as possible - immediately after illuina2bam? (but we need to stop altering read names at deplxing for that) - - my $full_bam_seqchksum_name = $full_bam_name; - $full_bam_seqchksum_name =~ s/[.]bam$/.post_i2b.seqchksum/mxs; - my $full_bam_md5_name = $full_bam_name; - $full_bam_md5_name .= q{.md5}; - - $job_command .= q{| tee >(bamseqchksum > } . $full_bam_seqchksum_name . q{)}; - if ($last_tool_picard_based) { - $job_command .= q{ >(md5sum -b | tr -d '\\n *\\-' > } . $full_bam_md5_name . q{)}; - } - $job_command .= q{ > } . $full_bam_name; - - my $resources = ( $self->fs_resource_string( { - counter_slots_per_job => $self->general_values_conf()->{io_resource_slots}, - resource_string => $self->_default_resources(), - } ) ); - - $job_command =~ s/'/'"'"'/smxg;#for the bsub - $job_command =~ s/'/'"'"'/smxg;#for the bash -c - my $job_sub = q{bsub -q } . $self->lsf_queue() . qq{ $resources $required_job_completion -J $job_name -o $outfile /bin/bash -c 'set -o pipefail;$job_command'}; - - $self->debug($job_sub); - - return $job_sub; -} - -sub _default_resources { - my ( $self ) = @_; - my $mem = $self->general_values_conf()->{'illumina2bam_memory'}; - my $cpu = $self->general_values_conf()->{'illumina2bam_cpu'}; - my $hosts = 1; - return (join q[ ], npg_pipeline::lsf_job->new(memory => $mem)->memory_spec(), "-R 'span[hosts=$hosts]'", "-n$cpu"); -} - -sub _get_library_sample_study_names { - my ($self, $lane_lims) = @_; - - my $names = $self->get_study_library_sample_names($lane_lims); - my ($study_names, $library_names, $sample_names); - if($names->{study}){ - $study_names = join q{,}, @{$names->{study}}; - } - if($names->{library}){ - $library_names = join q{,}, @{$names->{library}}; - } - if($names->{sample}){ - $sample_names = join q{,}, @{$names->{sample}}; - } - - return {study=>$study_names, library=>$library_names, sample=>$sample_names}; -} - -sub _get_number_of_plexes_excluding_control { - my ($self, $lane_lims) = @_; - my $number = scalar keys %{$lane_lims->tags}; - if ($lane_lims->spiked_phix_tag_index) { - $number--; - } - return $number; -} - -has q{_extra_tradis_transposon_read} => ( - isa => q{Bool}, - is => q{rw}, - lazy_build => 1, - ); -sub _build__extra_tradis_transposon_read { - my $self = shift; - - $self->is_indexed; - my @i = $self->reads_indexed; - my $reads_indexed = 0; - ## no critic (ControlStructures::ProhibitPostfixControls) - foreach (@i) { $reads_indexed++ if $_; } - - my $is_tradis = 0; - foreach my $d ($self->lims->descendants()) { - if ($d->library_type && $d->library_type =~ /^TraDIS/smx) { - $is_tradis = 1; - last; - } - } - - if ($is_tradis) { - if ($self->run->is_multiplexed) { - return 1 if ($reads_indexed > 1); - } else { - return 1 if ($reads_indexed > 0); - } - } - - return 0; -} - - -no Moose; - -__PACKAGE__->meta->make_immutable; - -1; -__END__ - -=head1 NAME - -npg_pipeline::archive::file::generation::illumina2bam - -=head1 SYNOPSIS - - my $oAfgfq = npg_pipeline::archive::file::generation::illumina2bam->new( - run_folder => $sRunFolder, - ); - -=head1 DESCRIPTION - -Object module which knows how to construct and submits the command line to LSF for creating bam files from bcl files. - -=head1 SUBROUTINES/METHODS - -=head2 generate - generates the bsub jobs and submits them for creating the fastq files, returning an array of job_ids. - - my @job_ids = $oAfgfq->generate({ - required_job_completion} => q{-w (123 && 321)}; - }); - -=head1 DIAGNOSTICS - -=head1 CONFIGURATION AND ENVIRONMENT - -=head1 DEPENDENCIES - -=over - -=item Carp - -=item English -no_match_vars - -=item Readonly - -=item Moose - -=item Perl6::Slurp - -=item npg_common::roles::software_location - -=item st::api::lims - -=item npg_tracking::illumina::run::long_info - -=back - -=head1 INCOMPATIBILITIES - -=head1 BUGS AND LIMITATIONS - -=head1 AUTHOR - -Guoying Qi - -=head1 LICENSE AND COPYRIGHT - -Copyright (C) 2014 Genome Research Limited - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . diff --git a/lib/npg_pipeline/archive/file/generation/seqchksum_comparator.pm b/lib/npg_pipeline/archive/file/generation/seqchksum_comparator.pm index 06d7ae1cb..d62c1dee7 100644 --- a/lib/npg_pipeline/archive/file/generation/seqchksum_comparator.pm +++ b/lib/npg_pipeline/archive/file/generation/seqchksum_comparator.pm @@ -75,8 +75,8 @@ sub _generate_bsub_command { =head2 do_comparison Bamcat any plex/split bamfiles back together to perform a bamseqchksum. -Compare it with the one produced by the illumina2bam step, or croak if that has not been done. -Use diff -u rather than cmp and store the file on disk to help work out what has gone wrong +Compare it with the one for the whole lane or croak if that has not been done. +Use diff -u rather than cmp and store the file on disk to help work out what has gone wrong. =cut @@ -112,7 +112,7 @@ sub _compare_lane { my $input_lane_seqchksum_file_name = File::Spec->catfile($input_seqchksum_dir, $input_seqchksum_file_name); if ( ! -e $input_lane_seqchksum_file_name ) { - $self->logcroak("Cannot find $input_lane_seqchksum_file_name to compare: please check illumina2bam pipeline step"); + $self->logcroak("Cannot find $input_lane_seqchksum_file_name to compare to"); } my$wd = getcwd(); diff --git a/lib/npg_pipeline/pluggable/harold.pm b/lib/npg_pipeline/pluggable/harold.pm index 03a64ae0a..20c165c75 100644 --- a/lib/npg_pipeline/pluggable/harold.pm +++ b/lib/npg_pipeline/pluggable/harold.pm @@ -256,19 +256,6 @@ sub fix_config_files { return (); } -=head2 illumina2bam - -=cut - -sub illumina2bam { - my ( $self, @args ) = @_; - - my $illumina2bam = $self->new_with_cloned_attributes(q{npg_pipeline::archive::file::generation::illumina2bam}); - my $required_job_completion = shift @args; - my @job_ids = $illumina2bam->generate({required_job_completion => $required_job_completion,}); - return @job_ids; -} - =head2 create_summary_link_analysis function which creates/changes the summary link in the runfolder diff --git a/t/10-pluggable_harold_central.t b/t/10-pluggable_harold_central.t index 5f5f0e385..25eed58c4 100644 --- a/t/10-pluggable_harold_central.t +++ b/t/10-pluggable_harold_central.t @@ -1,6 +1,6 @@ use strict; use warnings; -use Test::More tests => 32; +use Test::More tests => 30; use Test::Exception; use Cwd qw/getcwd/; use List::MoreUtils qw/ any none /; @@ -122,12 +122,13 @@ my $runfolder_path = $util->analysis_runfolder_path(); my $pb; lives_ok { $pb = $central->new( - function_order => [qw(qc_qX_yield illumina2bam qc_insert_size)], + function_order => [qw(qc_qX_yield qc_insert_size)], runfolder_path => $runfolder_path, ); } q{no croak on creation}; $util->set_staging_analysis_area({with_latest_summary => 1}); - is(join(q[ ], @{$pb->function_order()}), 'lsf_start qc_qX_yield illumina2bam qc_insert_size lsf_end', 'function_order set on creation'); + is(join(q[ ], @{$pb->function_order()}), 'lsf_start qc_qX_yield qc_insert_size lsf_end', + 'function_order set on creation'); } { @@ -137,7 +138,7 @@ my $runfolder_path = $util->analysis_runfolder_path(); my $pb; $util->set_staging_analysis_area(); my $init = { - function_order => [qw{illumina2bam qc_qX_yield qc_adapter update_warehouse qc_insert_size archive_to_irods}], + function_order => [qw{qc_qX_yield qc_adapter update_warehouse qc_insert_size archive_to_irods}], lanes => [4], runfolder_path => $runfolder_path, no_bsub => 1, @@ -148,20 +149,8 @@ my $runfolder_path = $util->analysis_runfolder_path(); lives_ok { $pb = $central->new($init); } q{no croak on new creation}; mkdir $pb->archive_path; mkdir $pb->qc_path; - - throws_ok { $pb->main() } - qr/Error submitting jobs: Can\'t find \'BamAdapterFinder\.jar\' because CLASSPATH is not set/, - q{error running qc->main() when CLASSPATH is not set for illumina2bam job}; - - local $ENV{CLASSPATH} = q[t/bin/software]; - local $ENV{NPG_WEBSERVICE_CACHE_DIR} = q[t/data]; - throws_ok { $pb->main() } - qr/Error submitting jobs: no such file on CLASSPATH: BamAdapterFinder\.jar/, - q{error running qc->main() when CLASSPATH is not set correctly for illumina2bam job}; - - local $ENV{CLASSPATH} = q[t/bin/software/solexa/jars]; local $ENV{NPG_WEBSERVICE_CACHE_DIR} = q[t/data]; - lives_ok { $pb->main() } q{no croak running qc->main() when CLASSPATH is set correctly for illumina2bam job}; + lives_ok { $pb->main() } q{no croak running qc->main()}; my $timestamp = $pb->timestamp; my $recalibrated_path = $pb->recalibrated_path(); my $log_dir = $pb->make_log_dir( $recalibrated_path ); diff --git a/t/20-archive_file_generation-illumina2bam.t b/t/20-archive_file_generation-illumina2bam.t deleted file mode 100644 index c0fba9b92..000000000 --- a/t/20-archive_file_generation-illumina2bam.t +++ /dev/null @@ -1,299 +0,0 @@ -use strict; -use warnings; -use Test::More tests => 33; -use Test::Exception; -use Test::Differences; -use File::Copy; -use File::Path qw(make_path); -use Cwd; -use Log::Log4perl qw(:levels); - -use npg_tracking::util::abs_path qw(abs_path); -use t::util; - -my $util = t::util->new(); -my $dir = $util->temp_directory(); -my $current = abs_path(getcwd()); -$ENV{TEST_DIR} = $dir; -$ENV{TEST_FS_RESOURCE} = q{nfs_12}; -local $ENV{NPG_WEBSERVICE_CACHE_DIR} = 't/data/illumina2bam'; -local $ENV{CLASSPATH} = q[t/bin/software/solexa/jars]; -local $ENV{PATH} = join q[:], q[t/bin], q[t/bin/software/solexa/bin], $ENV{PATH}; -my $jar_path = join q[/], $current, $ENV{CLASSPATH}; - -Log::Log4perl->easy_init({layout => '%d %-5p %c - %m%n', - level => $DEBUG, - file => join(q[/], $dir, 'logfile'), - utf8 => 1}); - -use_ok('npg_pipeline::archive::file::generation::illumina2bam'); - -my $intensities_dir = $util->analysis_runfolder_path() . q{/Data/Intensities}; -my $pbcal_dir = $intensities_dir . q{/BaseCalls}; - -{ - my $new = "$dir/1234_samplesheet.csv"; - copy 't/data/illumina2bam/1234_samplesheet.csv', $new; - local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = $new; - `cp -R t/data/illumina2bam/npg $dir`; - local $ENV{NPG_WEBSERVICE_CACHE_DIR} = $dir; - - $util->create_analysis(); - my $runfolder = $util->analysis_runfolder_path() . '/'; - `cp t/data/runfolder/Data/RunInfo.xml $runfolder`; - - my $bam_generator; - lives_ok { $bam_generator = npg_pipeline::archive::file::generation::illumina2bam->new( - run_folder => q{123456_IL2_1234}, - runfolder_path => $util->analysis_runfolder_path(), - timestamp => q{20090709-123456}, - verbose => 0, - no_bsub => 1, - id_run => 1234, - _extra_tradis_transposon_read => 1, - bam_basecall_path => $pbcal_dir, - ); } q{no croak creating bam_generator object}; - - isa_ok($bam_generator, q{npg_pipeline::archive::file::generation::illumina2bam}, q{$bam_generator}); - is($bam_generator->_extra_tradis_transposon_read, 1, 'TraDIS set'); - $bam_generator->_extra_tradis_transposon_read(0); - is($bam_generator->_extra_tradis_transposon_read, 0, 'TraDIS not set'); - isa_ok($bam_generator->lims, 'st::api::lims', 'cached lims object'); - - my $arg_refs = { - required_job_completion => q{-w'done(123) && done(321)'}, - }; - - my $mem = 4000; - my $cpu = 2; - my $alims = $bam_generator->lims->children_ia; - my $position = 8; - my $bsub_command = $bam_generator->_generate_bsub_commands( $arg_refs , $alims->{$position}, 't/data/taglistfile'); - - is( $bam_generator->_get_number_of_plexes_excluding_control($alims->{$position}), - 1, 'correct number of plexes'); - - my $expected_cmd = q{bsub -q srpipeline -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=4]' -M} . $mem . - q{ -R 'span[hosts=1]' -n} . $cpu . - qq{ -w'done(123) && done(321)' -J 'illumina2bam_1234_8_20090709-123456' -o $pbcal_dir/log/illumina2bam_1234_8_20090709-123456.%J.out /bin/bash -c 'set -o pipefail;java -Xmx1024m -jar $jar_path/Illumina2bam.jar I=$intensities_dir L=8 B=$pbcal_dir RG=1234_8 PU=123456_IL2_1234_8 LIBRARY_NAME="51021" SAMPLE_ALIAS="SRS000147" STUDY_NAME="SRP000031: 1000Genomes Project Pilot 1" OUTPUT=/dev/stdout COMPRESSION_LEVEL=0 | java -Xmx1024m -jar $jar_path/BamIndexDecoder.jar VALIDATION_STRINGENCY=SILENT I=/dev/stdin BARCODE_FILE=t/data/taglistfile METRICS_FILE=$pbcal_dir/1234_8.bam.tag_decode.metrics MAX_NO_CALLS=6 CONVERT_LOW_QUALITY_TO_NO_CALL=true CREATE_MD5_FILE=false OUTPUT=/dev/stdout}; - $expected_cmd .= qq{| tee >(bamseqchksum > $pbcal_dir/1234_8.post_i2b.seqchksum)}; - $expected_cmd .= q{ >(md5sum -b | tr -d '"'"'"'"'"'"'"'"'\n *\-'"'"'"'"'"'"'"'"' > } . qq{$pbcal_dir/1234_8.bam.md5)}; - $expected_cmd .= qq{ > $pbcal_dir/1234_8.bam'}; - - eq_or_diff([split"=",$bsub_command], [split"=",$expected_cmd], 'correct bsub command for lane 8'); - - my @jids; - lives_ok { @jids = $bam_generator->generate($arg_refs); } q{no croak running generate}; - is(scalar @jids, 8, 'correct number of jobs submitted'); - ok(-f "$dir/lane_8.taglist", 'lane 8 tag list file generated'); - foreach my $lane ((1 .. 7)) { - ok(!-e "$dir/lane_$lane.taglist", "lane $lane tag list file does not exist"); - } - - local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = q[]; - local $ENV{NPG_WEBSERVICE_CACHE_DIR} = 't/data'; - - lives_ok { $bam_generator = npg_pipeline::archive::file::generation::illumina2bam->new( - run_folder => q{123456_IL2_1234}, - runfolder_path => $util->analysis_runfolder_path(), - timestamp => q{20090709-123456}, - verbose => 0, - id_run => 8033, - bam_basecall_path => $pbcal_dir, - ); } q{no croak creating bam_generator object}; - - is($bam_generator->_extra_tradis_transposon_read, 1, 'TraDIS set'); - - $arg_refs = { - required_job_completion => q{-w'done(123) && done(321)'}, - }; - - $alims = $bam_generator->lims->children_ia; - throws_ok {$bam_generator->_generate_bsub_commands( $arg_refs , $alims->{$position})} - qr/Tag list file path should be defined/, - 'error when tag file name is missing for a pool'; - - $bsub_command = $bam_generator->_generate_bsub_commands( $arg_refs , $alims->{$position}, 't/data/lanetagfile'); - is( $bam_generator->_get_number_of_plexes_excluding_control($alims->{$position}), - 72, 'correct number of plexes'); - - $expected_cmd = q{bsub -q srpipeline -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=4]' -M} . $mem . - q{ -R 'span[hosts=1]' -n}. $cpu . - qq{ -w'done(123) && done(321)' -J 'illumina2bam_8033_8_20090709-123456' -o $pbcal_dir/log/illumina2bam_8033_8_20090709-123456.%J.out /bin/bash -c 'set -o pipefail;java -Xmx1024m -jar $jar_path/Illumina2bam.jar I=$intensities_dir L=8 B=$pbcal_dir RG=8033_8 PU=123456_IL2_1234_8 LIBRARY_NAME="5206896" SAMPLE_ALIAS="ERS124385,ERS124386,ERS124387,ERS124388,ERS124389,ERS124390,ERS124391,ERS124392,ERS124393,ERS124394,ERS124395,ERS124396,ERS124397,ERS124398,ERS124399,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124385,ERS124386,ERS124387,ERS124388,ERS124389,ERS124390,ERS124391,ERS124392,ERS124393,ERS124394,ERS124395,ERS124396,ERS124397,ERS124398,ERS124399,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400,ERS124400" STUDY_NAME="mouse PiggyBac sequencing: sites of PiggyBac integration into mouse genome" SEC_BC_SEQ=BC SEC_BC_QUAL=QT BC_SEQ=tr BC_QUAL=tq OUTPUT=/dev/stdout COMPRESSION_LEVEL=0 | java -Xmx1024m -jar $jar_path/BamIndexDecoder.jar VALIDATION_STRINGENCY=SILENT I=/dev/stdin BARCODE_FILE=t/data/lanetagfile METRICS_FILE=$pbcal_dir/8033_8.bam.tag_decode.metrics CREATE_MD5_FILE=false OUTPUT=/dev/stdout}; - $expected_cmd .= qq{| tee >(bamseqchksum > $pbcal_dir/8033_8.post_i2b.seqchksum)}; - $expected_cmd .= q{ >(md5sum -b | tr -d '"'"'"'"'"'"'"'"'\n *\-'"'"'"'"'"'"'"'"' > } . qq{$pbcal_dir/8033_8.bam.md5)}; - $expected_cmd .= qq{ > $pbcal_dir/8033_8.bam'}; - - eq_or_diff([split"=",$bsub_command], [split"=",$expected_cmd], 'correct bsub command for lane 8'); - -## test of special 3' pulldown RNAseq read 1 index - - lives_ok { $bam_generator = npg_pipeline::archive::file::generation::illumina2bam->new( - run_folder => q{121112_HS20_08797_A_C18TEACXX}, - runfolder_path => $util->analysis_runfolder_path(), - timestamp => q{20121112-123456}, - verbose => 0, - id_run => 8797, - bam_basecall_path => $pbcal_dir, - ); } q{no croak creating bam_generator object for run 8797}; - - $arg_refs = { - required_job_completion => q{-w'done(123) && done(321)'}, - }; - - $alims = $bam_generator->lims->associated_child_lims_ia; - $position = 8; - $bsub_command = $bam_generator->_generate_bsub_commands( $arg_refs , $alims->{$position}, 't/data/lanetagfile'); - - $expected_cmd = q{bsub -q srpipeline -R 'select[mem>}.$mem.q{] rusage[mem=} . $mem . q{,nfs_12=4]' -M} . $mem . - q{ -R 'span[hosts=1]' -n} . $cpu . - qq{ -w'done(123) && done(321)' -J 'illumina2bam_8797_8_20121112-123456' -o $pbcal_dir/log/illumina2bam_8797_8_20121112-123456.%J.out /bin/bash -c 'set -o pipefail;java -Xmx1024m -jar $jar_path/Illumina2bam.jar I=$intensities_dir L=8 B=$pbcal_dir RG=8797_8 PU=121112_HS20_08797_A_C18TEACXX_8 LIBRARY_NAME="6045465" SAMPLE_ALIAS="ERS181250,ERS181251,ERS181252,ERS181253,ERS181254,ERS181255" STUDY_NAME="ERP001656: Total RNA was extracted from morpholically abnormal and sibling wild type embryos identified by the Zebrafish Mutation Project (http://www.sanger.ac.uk/Projects/D_rerio/zmp/). The 3prime end of fragmented RNA was pulled down using polyToligos attached to magnetic beads, reverse transcribed, made into Illumina libraries and sequenced using IlluminaHiSeq paired-end sequencing. Protocol: Total RNA was extracted from mouse embryos using Trizol and DNase treated. Chemically fragmented RNA was enriched for the 3prime ends by pulled down using an anchored polyToligo attached to magnetic beads. An RNA oligo comprising part of the Illumina adapter 2 was ligated to the 5prime end of the captured RNA and the RNA was eluted from the beads. Reverse transcription was primed with an anchored polyToligo with part of Illumina adapter 1 at the 5prime end followed by 4 random bases, then an A, C or G base, then one of twelve5 base indexing tags and 14 T bases. An Illumina library with full adapter sequence was produced by 15 cycles of PCR. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/" FIRST_INDEX=6 FINAL_INDEX=10 FIRST_INDEX=1 FINAL_INDEX=5 SEC_BC_SEQ=br SEC_BC_QUAL=qr BC_READ=1 SEC_BC_READ=1 FIRST=11 FINAL=50 OUTPUT=/dev/stdout COMPRESSION_LEVEL=0 | java -Xmx1024m -jar $jar_path/BamIndexDecoder.jar VALIDATION_STRINGENCY=SILENT I=/dev/stdin BARCODE_FILE=t/data/lanetagfile METRICS_FILE=$pbcal_dir/8797_8.bam.tag_decode.metrics CREATE_MD5_FILE=false OUTPUT=/dev/stdout}; - $expected_cmd .= qq{| tee >(bamseqchksum > $pbcal_dir/8797_8.post_i2b.seqchksum)}; - $expected_cmd .= q{ >(md5sum -b | tr -d '"'"'"'"'"'"'"'"'\n *\-'"'"'"'"'"'"'"'"' > } . qq{$pbcal_dir/8797_8.bam.md5)}; - $expected_cmd .= qq{ > $pbcal_dir/8797_8.bam'}; - eq_or_diff([split"=",$bsub_command], [split"=",$expected_cmd], 'correct bsub command for run 8797 lane 8, special "jecfoo" read1 index'); -} - -{ ## adapter detection - local $ENV{NPG_WEBSERVICE_CACHE_DIR} = 't/data'; - my $rf = join q[/], $dir, q[131010_HS34_11018_B_H722AADXX]; - my $bc = join q[/], $rf, q[Data/Intensities/BaseCalls]; - my $i = join q[/], $rf, q[Data/Intensities]; - make_path $bc; - copy q[t/data/example_runfolder/131010_HS34_11018_B_H722AADXX/RunInfo.xml], $rf; - - my $bam_generator; - lives_ok { $bam_generator = npg_pipeline::archive::file::generation::illumina2bam->new( - runfolder_path => $rf, - is_indexed => 0, - verbose => 0, - timestamp => q{20131028-155757}, - bam_basecall_path => $bc, - ); } q{no croak creating bam_generator object for run 11018}; - - my $alims = $bam_generator->lims->associated_child_lims_ia; - my $position = 1; - my $arg_refs = { - required_job_completion => q{-w'done(123) && done(321)'}, - }; - - my $mem = $bam_generator->general_values_conf()->{illumina2bam_memory}; - my $cpu = $bam_generator->general_values_conf()->{illumina2bam_cpu}; - my $bsub_command = $bam_generator->_generate_bsub_commands( $arg_refs , $alims->{$position}); - #$bsub_command = $util->drop_temp_part_from_paths( $bsub_command ); - - my $expected_cmd = q{bsub -q srpipeline -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=4]' -M}.$mem.q{ -R 'span[hosts=1]' -n} . $cpu . q{ -w'done(123) && done(321)' -J 'illumina2bam_11018_1_20131028-155757' -o } . $bc . q{/log/illumina2bam_11018_1_20131028-155757.%J.out /bin/bash -c 'set -o pipefail;java -Xmx1024m -jar } . qq{$jar_path/Illumina2bam.jar I=} . qq{$i L=1 B=$bc RG=11018_1 PU=131010_HS34_11018_B_H722AADXX_1 LIBRARY_NAME="8314075" SAMPLE_ALIAS="ERS333055,ERS333070,ERS333072,ERS333073,ERS333076,ERS333077" STUDY_NAME="ERP000730: llumina sequencing of various Plasmodium species is being carried out for de novo assembly and comparative genomics. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/" OUTPUT=/dev/stdout COMPRESSION_LEVEL=0 | bamadapterfind md5=1 md5filename=$bc/11018_1.bam.md5}; - $expected_cmd .= qq{| tee >(bamseqchksum > $bc/11018_1.post_i2b.seqchksum)}; - $expected_cmd .= qq{ > $bc/11018_1.bam'}; - - eq_or_diff([split"=",$bsub_command], [split"=",$expected_cmd], 'correct bsub command for lane 1 (with adapter detection)'); -} - -{ ## more testing of special 3' pulldown RNAseq - local $ENV{NPG_WEBSERVICE_CACHE_DIR} = 't/data'; - - my $rf = join q[/], $dir, q[121103_HS29_08747_B_C1BV5ACXX]; - my $bc = join q[/], $rf, q[Data/Intensities/BaseCalls]; - my $i = join q[/], $rf, q[Data/Intensities]; - make_path $bc; - copy q[t/data/example_runfolder/121103_HS29_08747_B_C1BV5ACXX/RunInfo.xml], $rf; - - my $bam_generator; - lives_ok { $bam_generator = npg_pipeline::archive::file::generation::illumina2bam->new( - runfolder_path => $rf, - timestamp => q{20121112-123456}, - bam_basecall_path => $bc, - verbose => 0, - ); } q{no croak creating bam_generator object for run 8747}; - - my $alims = $bam_generator->lims->associated_child_lims_ia; - my $position = 4; - my $arg_refs = { - required_job_completion => q{-w'done(123) && done(321)'}, - }; - - my $mem = 4000; - my $cpu = 2; - my $bsub_command = $bam_generator->_generate_bsub_commands( $arg_refs , $alims->{$position}, 't/data/lanetagfile'); - - my $expected_cmd = q{bsub -q srpipeline -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=4]' -M}.$mem.q{ -R 'span[hosts=1]' -n}.$cpu. q{ -w'done(123) && done(321)' -J 'illumina2bam_8747_4_20121112-123456' -o } . $bc . q{/log/illumina2bam_8747_4_20121112-123456.%J.out /bin/bash -c 'set -o pipefail;java -Xmx1024m -jar } . qq{$jar_path/Illumina2bam.jar I=} . $i . q{ L=4 B=} . $bc . q{ RG=8747_4 PU=121103_HS29_08747_B_C1BV5ACXX_4 LIBRARY_NAME="6101244" SAMPLE_ALIAS="ERS183138,ERS183139,ERS183140,ERS183141,ERS183142,ERS183143" STUDY_NAME="ERP001559: Total RNA was extracted from wild type and mutant zebrafish embryos. Double stranded cDNA representing the 3'"'"'"'"'"'"'"'"' ends of transcripts was made by a variety of methods, including polyT priming and 3'"'"'"'"'"'"'"'"' pull down on magentic beads. Some samples included indexing test experiments where a sequence barcode was placed within one of the sequence reads.. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/" FIRST_INDEX=5 FINAL_INDEX=10 FIRST_INDEX=1 FINAL_INDEX=4 SEC_BC_SEQ=br SEC_BC_QUAL=qr BC_READ=1 SEC_BC_READ=1 FIRST=11 FINAL=75 FIRST=84 FINAL=158 OUTPUT=/dev/stdout COMPRESSION_LEVEL=0 | java -Xmx1024m -jar } . qq{$jar_path/BamIndexDecoder.jar VALIDATION_STRINGENCY=SILENT I=/dev/stdin BARCODE_FILE=t/data/lanetagfile METRICS_FILE=} . $bc . q{/8747_4.bam.tag_decode.metrics CREATE_MD5_FILE=false OUTPUT=/dev/stdout}; - $expected_cmd .= qq{| tee >(bamseqchksum > $bc/8747_4.post_i2b.seqchksum)}; - $expected_cmd .= q{ >(md5sum -b | tr -d '"'"'"'"'"'"'"'"'\n *\-'"'"'"'"'"'"'"'"' > } . qq{$bc/8747_4.bam.md5)}; - $expected_cmd .= qq{ > $bc/8747_4.bam'}; - - eq_or_diff([split"=",$bsub_command], [split"=",$expected_cmd], 'correct bsub command for lane 4 of 3 prime pulldown'); -} - -{ ## more testing of special 3' pulldown RNAseq for non-standard inline index - local $ENV{NPG_WEBSERVICE_CACHE_DIR} = 't/data'; - - my $rf = join q[/], $dir, q[130917_MS6_10808_A_MS2030455-300V2]; - my $bc = join q[/], $rf, q[Data/Intensities/BaseCalls]; - my $i = join q[/], $rf, q[Data/Intensities]; - make_path $bc; - copy q[t/data/example_runfolder/130917_MS6_10808_A_MS2030455-300V2/RunInfo.xml], $rf; - - my $bam_generator; - lives_ok { $bam_generator = npg_pipeline::archive::file::generation::illumina2bam->new( - runfolder_path => $rf, - timestamp => q{20130919-132702}, - bam_basecall_path => $bc, - verbose => 0, - ); } q{no croak creating bam_generator object for run 10808}; - - my $alims = $bam_generator->lims->associated_child_lims_ia; - my $position = 1; - my $arg_refs = { - required_job_completion => q{-w'done(123) && done(321)'}, - }; - - my $mem = 4000; - my $cpu = 2; - my $bsub_command = $bam_generator->_generate_bsub_commands( $arg_refs , $alims->{$position}, 't/data/lanetagfile'); - - my $expected_cmd = q{bsub -q srpipeline -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=4]' -M}.$mem.q{ -R 'span[hosts=1]' -n}.$cpu. q{ -w'done(123) && done(321)' -J 'illumina2bam_10808_1_20130919-132702' -o } . $bc . q{/log/illumina2bam_10808_1_20130919-132702.%J.out /bin/bash -c 'set -o pipefail;java -Xmx1024m -jar } . qq{$jar_path/Illumina2bam.jar I=} . qq{$i L=1 B=$bc} . q{ RG=10808_1 PU=130917_MS6_10808_A_MS2030455-300V2_1 LIBRARY_NAME="8115659" SAMPLE_ALIAS="single_cell_1,single_cell_2,single_cell_3,single_cell_4" STUDY_NAME="Transcriptome profiling protocol development: Various test protocols to improve the 3'"'"'"'"'"'"'"'"' pull down transcript profiling protocol, aiming to produce a pipeline library prep protocol. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ " FIRST=1 FINAL=150 FIRST_INDEX=168 FINAL_INDEX=172 FIRST_INDEX=156 FINAL_INDEX=167 SEC_BC_SEQ=br SEC_BC_QUAL=qr BC_READ=2 SEC_BC_READ=2 FIRST=173 FINAL=305 OUTPUT=/dev/stdout COMPRESSION_LEVEL=0 | java -Xmx1024m -jar } . qq{$jar_path/BamIndexDecoder.jar VALIDATION_STRINGENCY=SILENT I=/dev/stdin BARCODE_FILE=t/data/lanetagfile METRICS_FILE=} . $bc . q{/10808_1.bam.tag_decode.metrics CREATE_MD5_FILE=false OUTPUT=/dev/stdout}; - $expected_cmd .= qq{| tee >(bamseqchksum > $bc/10808_1.post_i2b.seqchksum)}; - $expected_cmd .= q{ >(md5sum -b | tr -d '"'"'"'"'"'"'"'"'\n *\-'"'"'"'"'"'"'"'"' > } . qq{$bc/10808_1.bam.md5)}; - $expected_cmd .= qq{ > $bc/10808_1.bam'}; - - eq_or_diff([split"=",$bsub_command], [split"=",$expected_cmd], 'correct bsub command for lane 1 of 3 prime pulldown'); -} - -{ ## test of un-equal read lengths - local $ENV{NPG_WEBSERVICE_CACHE_DIR} = 't/data'; - - my $rf = join q[/], $dir, q[131021_MS5_11123_A_MS2000187-150V3]; - my $bc = join q[/], $rf, q[Data/Intensities/BaseCalls]; - my $i = join q[/], $rf, q[Data/Intensities]; - make_path $bc; - copy q[t/data/example_runfolder/131021_MS5_11123_A_MS2000187-150V3/RunInfo.xml], $rf; - - my $bam_generator; - lives_ok { $bam_generator = npg_pipeline::archive::file::generation::illumina2bam->new( - runfolder_path => $rf, - is_indexed => 0, - timestamp => q{20131022-114117}, - bam_basecall_path => $bc, - verbose => 0, - ); } q{no croak creating bam_generator object for run 1123}; - - my $alims = $bam_generator->lims->associated_child_lims_ia; - my $position = 1; - my $arg_refs = { - required_job_completion => q{-w'done(123) && done(321)'}, - }; - - my $mem = $bam_generator->general_values_conf()->{illumina2bam_memory}; - my $cpu = $bam_generator->general_values_conf()->{illumina2bam_cpu}; - my $bsub_command = $bam_generator->_generate_bsub_commands( $arg_refs , $alims->{$position}); - - my $expected_cmd = q{bsub -q srpipeline -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=4]' -M}. $mem. q{ -R 'span[hosts=1]' -n}.$cpu. q{ -w'done(123) && done(321)' -J 'illumina2bam_11123_1_20131022-114117' -o } . $bc . q{/log/illumina2bam_11123_1_20131022-114117.%J.out /bin/bash -c 'set -o pipefail;java -Xmx1024m -jar } . qq{$jar_path/Illumina2bam.jar } . qq{I=$i L=1 B=$bc} . q{ RG=11123_1 PU=131021_MS5_11123_A_MS2000187-150V3_1 LIBRARY_NAME="8111702" SAMPLE_ALIAS="arg404,arg405,arg406,arg407,arg408,arg409,arg410,arg411,arg412,arg413,arg414,arg415,arg416,arg417,arg418,arg419,arg420,arg421,arg422,arg423,arg424,arg425" STUDY_NAME="ERP001151: Data obtained from the sequencing of pools of barcoded P. berghei transgenics is predicted to allow for qualitative and quantitative measurements of individual mutant progeny generated during multiplex transfections. This type of analysis is expected to take P. berghei reverse genetics beyond that of the single-gene level. It aims to explore genetic interactions by measuring the effect on growth rates caused by simultaneous disruption of different genes in diverse genetic backgrounds, as well as potentially becoming a tool to identify essential genes to be prioritised as e.g. potential drug targets, or conversely to be excluded from future gene disruption studies. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/" CREATE_MD5_FILE=false OUTPUT=/dev/stdout}; - $expected_cmd .= qq{| tee >(bamseqchksum > $bc/11123_1.post_i2b.seqchksum)}; - $expected_cmd .= q{ >(md5sum -b | tr -d '"'"'"'"'"'"'"'"'\n *\-'"'"'"'"'"'"'"'"' > } . qq{$bc/11123_1.bam.md5)}; - $expected_cmd .= qq{ > $bc/11123_1.bam'}; - - eq_or_diff([split"=",$bsub_command], [split"=",$expected_cmd], 'correct bsub command for run with un-equal read lengths'); -} - -1; -__END__ diff --git a/t/35-archive-file-generation-seqchksum_comparator.t b/t/35-archive-file-generation-seqchksum_comparator.t index 177f75c93..9b525edd5 100644 --- a/t/35-archive-file-generation-seqchksum_comparator.t +++ b/t/35-archive-file-generation-seqchksum_comparator.t @@ -56,7 +56,8 @@ my $archive_path = $recalibrated_path . q{/archive}; my @jids = $object->launch( $arg_refs ); is( scalar @jids, 1, q{1 job id returned} ); - throws_ok{$object->do_comparison()} qr/please check illumina2bam pipeline step/, q{Doing a comparison with no files throws an exception}; + throws_ok{$object->do_comparison()} qr/Cannot find/, + q{Doing a comparison with no files throws an exception}; is($object->archive_path, $archive_path, "Object has correct archive path"); is($object->bam_basecall_path, $bam_basecall_path, "Object has correct bam_basecall path"); diff --git a/t/bin/software/solexa/bin/aligners/illumina2bam/current b/t/bin/software/solexa/bin/aligners/illumina2bam/current deleted file mode 120000 index 08306bac0..000000000 --- a/t/bin/software/solexa/bin/aligners/illumina2bam/current +++ /dev/null @@ -1 +0,0 @@ -Illumina2bam-tools-1.00/ \ No newline at end of file diff --git a/t/data/illumina2bam/1234_samplesheet.csv b/t/data/illumina2bam/1234_samplesheet.csv deleted file mode 100644 index 24a070116..000000000 --- a/t/data/illumina2bam/1234_samplesheet.csv +++ /dev/null @@ -1,26 +0,0 @@ -[Header],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -Investigator Name,pav,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -Project Name,1000Genomes-A1-YRI,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -Experiment Name,1234,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -Date,2008-08-17T13:18:30,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -Workflow,LibraryQC,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -Chemistry,Default,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -[Reads],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -37,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -37,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -[Settings],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -[Manifests],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -[Data],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -Index,Lane,Sample_ID,Sample_Name,GenomeFolder,bait_name,default_library_type,default_tag_sequence,email_addresses,email_addresses_of_followers,email_addresses_of_managers,email_addresses_of_owners,is_control,is_pool,lane_id,lane_priority,library_name,organism,organism_taxon_id,project_cost_code,project_id,project_name,qc_state,request_id,required_insert_size_range,sample_accession_number,sample_common_name,sample_consent_withdrawn,sample_description,sample_id,sample_name,sample_public_name,sample_reference_genome,spiked_phix_tag_index,study_accession_number,study_alignments_in_bam,study_contains_nonconsented_human,study_contains_nonconsented_xahuman,study_description,study_id,study_name,study_reference_genome,study_separate_y_chromosome_data,study_title,tag_index, -,1,51021,SRS000147,C:\Illumina\MiSeq Reporter\Genomes\WTSI_references\Homo_sapiens\1000Genomes\all\fasta\,,,,jws@sanger.ac.uk rd@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk,rd@sanger.ac.uk,0,0,66206,0,NA18907-YRI-1,human,9606,S0706,185,1000Genomes-A1-YRI,pending,2409,from:150 to:200,SRS000147,Homo sapiens,,,766,NA18907-YRI-1,NA18907,,,SRP000031,1,0,0,1000Genomes Project Pilot 1,185,1000Genomes-A1-YRI,,,1000Genomes Project Pilot 1,, -,2,51021,SRS000147,C:\Illumina\MiSeq Reporter\Genomes\WTSI_references\Homo_sapiens\1000Genomes\all\fasta\,,,,jws@sanger.ac.uk rd@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk,rd@sanger.ac.uk,0,0,66207,0,NA18907-YRI-1,human,9606,S0706,185,1000Genomes-A1-YRI,pending,2410,from:150 to:200,SRS000147,Homo sapiens,,,766,NA18907-YRI-1,NA18907,,,SRP000031,1,0,0,1000Genomes Project Pilot 1,185,1000Genomes-A1-YRI,,,1000Genomes Project Pilot 1,, -,3,51021,SRS000147,C:\Illumina\MiSeq Reporter\Genomes\WTSI_references\Homo_sapiens\1000Genomes\all\fasta\,,,,jws@sanger.ac.uk rd@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk,rd@sanger.ac.uk,0,0,66208,0,NA18907-YRI-1,human,9606,S0706,185,1000Genomes-A1-YRI,pending,2411,from:150 to:200,SRS000147,Homo sapiens,,,766,NA18907-YRI-1,NA18907,,,SRP000031,1,0,0,1000Genomes Project Pilot 1,185,1000Genomes-A1-YRI,,,1000Genomes Project Pilot 1,, -,4,79570,phiX_SI_SPRI,C:\Illumina\MiSeq Reporter\Genomes\WTSI_references\PhiX\Sanger-SNPs\all\fasta\,,,,,,,,1,0,80723,0,phiX_SI_SPRI,,,,,,,41944,,,,,,9829,phiX_SI_SPRI,,,,,,0,0,,,,,,,, -,5,51021,SRS000147,C:\Illumina\MiSeq Reporter\Genomes\WTSI_references\Homo_sapiens\1000Genomes\all\fasta\,,,,jws@sanger.ac.uk rd@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk,rd@sanger.ac.uk,0,0,66209,0,NA18907-YRI-1,human,9606,S0706,185,1000Genomes-A1-YRI,pending,2412,from:150 to:200,SRS000147,Homo sapiens,,,766,NA18907-YRI-1,NA18907,,,SRP000031,1,0,0,1000Genomes Project Pilot 1,185,1000Genomes-A1-YRI,,,1000Genomes Project Pilot 1,, -,6,51021,SRS000147,C:\Illumina\MiSeq Reporter\Genomes\WTSI_references\Homo_sapiens\1000Genomes\all\fasta\,,,,jws@sanger.ac.uk rd@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk,rd@sanger.ac.uk,0,0,66210,0,NA18907-YRI-1,human,9606,S0706,185,1000Genomes-A1-YRI,pending,2413,from:150 to:200,SRS000147,Homo sapiens,,,766,NA18907-YRI-1,NA18907,,,SRP000031,1,0,0,1000Genomes Project Pilot 1,185,1000Genomes-A1-YRI,,,1000Genomes Project Pilot 1,, -,7,51021,SRS000147,C:\Illumina\MiSeq Reporter\Genomes\WTSI_references\Homo_sapiens\1000Genomes\all\fasta\,,,,jws@sanger.ac.uk rd@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk,rd@sanger.ac.uk,0,0,66211,0,NA18907-YRI-1,human,9606,S0706,185,1000Genomes-A1-YRI,pending,2414,from:150 to:200,SRS000147,Homo sapiens,,,766,NA18907-YRI-1,NA18907,,,SRP000031,1,0,0,1000Genomes Project Pilot 1,185,1000Genomes-A1-YRI,,,1000Genomes Project Pilot 1,, -ATCAACCG,8,51021,SRS000147,C:\Illumina\MiSeq Reporter\Genomes\WTSI_references\Homo_sapiens\1000Genomes\all\fasta\,,,ATCAACCG,jws@sanger.ac.uk rd@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk,rd@sanger.ac.uk,0,1,66212,0,NA18907-YRI-1,human,9606,S0706,185,1000Genomes-A1-YRI,pending,2415,from:150 to:200,SRS000147,Homo sapiens,,,766,NA18907-YRI-1,NA18907,,,SRP000031,1,0,0,1000Genomes Project Pilot 1,185,1000Genomes-A1-YRI,,,1000Genomes Project Pilot 1,154, - diff --git a/t/data/illumina2bam/npg/instrument/21.xml b/t/data/illumina2bam/npg/instrument/21.xml deleted file mode 100644 index cdead6e0d..000000000 --- a/t/data/illumina2bam/npg/instrument/21.xml +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - - - - - - - - - - diff --git a/t/data/illumina2bam/npg/run/1234.xml b/t/data/illumina2bam/npg/run/1234.xml deleted file mode 100644 index cba379613..000000000 --- a/t/data/illumina2bam/npg/run/1234.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - From 7a35555e66afe197eb471fe35e66722084fdaa31 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Fri, 19 May 2017 12:03:07 +0100 Subject: [PATCH 10/27] ensure that error in the pipeline can be captured in the pipeline daemon log --- lib/npg_pipeline/pluggable.pm | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/npg_pipeline/pluggable.pm b/lib/npg_pipeline/pluggable.pm index 4458ff468..0c4953e31 100644 --- a/lib/npg_pipeline/pluggable.pm +++ b/lib/npg_pipeline/pluggable.pm @@ -390,6 +390,15 @@ sub main { }; $self->_clear_env_vars(); if ($error) { + # This is the end of the pipeline script. + # We want to see this error in the pipeline daemon log, + # so it should be printed to standard error, not to + # this script's log, which might be a file. + # We currently tie STDERR so output to standard error + # goes to this script's log file. Hence the need to + # untie. Dies not cause an error if STDERR has not been + # tied. + untie *STDERR; croak($error); } return; From 395fb2a6a2c56785c1a430fb54fea8ca8d5eaf44 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Mon, 23 Jan 2017 16:43:25 +0000 Subject: [PATCH 11/27] update sequencescape warehouse loader job --- Changes | 3 +++ lib/npg_pipeline/pluggable/harold/post_qc_review.pm | 9 +++------ t/10-pluggable_harold_central.t | 5 ++--- t/10-pluggable_harold_post_qc_review.t | 6 +++--- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/Changes b/Changes index caa0c00c7..3b73f1cbb 100644 --- a/Changes +++ b/Changes @@ -23,6 +23,9 @@ release 51.8 release 51.7 - replaces the original log role with the one from DNAP utilities, which provides a Log4perl logger and some convenience methods. + - new signature for the sequencescape warehouse loader so that it uses + samplsheet LIMs driver at the analysis stage and ml_warehouse_fc_cache + LIMs driver at the archival stage release 51.6 - test and code fixes to ensure problem-free tests under Perl 5.22.2 diff --git a/lib/npg_pipeline/pluggable/harold/post_qc_review.pm b/lib/npg_pipeline/pluggable/harold/post_qc_review.pm index 4b71d58c4..223315739 100644 --- a/lib/npg_pipeline/pluggable/harold/post_qc_review.pm +++ b/lib/npg_pipeline/pluggable/harold/post_qc_review.pm @@ -225,14 +225,11 @@ sub _update_warehouse_command { my $post_qc_complete = $option and (ref $option eq 'HASH') and $option->{'post_qc_complete'} ? 1 : 0; my $id_run = $self->id_run; - my $command = q[]; + my $command = qq{$loader_name --verbose --id_run $id_run}; if ($loader_name eq 'warehouse_loader') { - # Currently, we need pool library name and link to plexes in SeqQC. - # Therefore, we need to run live. - $command = join q[], map {q[unset ] . $_ . q[;]} npg_pipeline::cache->env_vars; + $command .= q{ --lims_driver_type }; + $command .= $post_qc_complete ? 'ml_warehouse_fc_cache' : 'samplesheet'; } - - $command .= qq{$loader_name --verbose --id_run $id_run}; my $job_name = join q{_}, $loader_name, $id_run, $self->pipeline_name; my $path = $self->make_log_dir($self->recalibrated_path()); my $prereq = q[]; diff --git a/t/10-pluggable_harold_central.t b/t/10-pluggable_harold_central.t index 5f5f0e385..f42e5bf27 100644 --- a/t/10-pluggable_harold_central.t +++ b/t/10-pluggable_harold_central.t @@ -165,10 +165,9 @@ my $runfolder_path = $util->analysis_runfolder_path(); my $timestamp = $pb->timestamp; my $recalibrated_path = $pb->recalibrated_path(); my $log_dir = $pb->make_log_dir( $recalibrated_path ); - my $unset_string = 'unset NPG_WEBSERVICE_CACHE_DIR;unset NPG_CACHED_SAMPLESHEET_FILE;'; my $expected_command = q[bsub -q lowload 50 -J warehouse_loader_1234_central ] . - qq[-o $log_dir/warehouse_loader_1234_central_] . $timestamp . - qq[.out '${unset_string}warehouse_loader --verbose --id_run 1234']; + qq[-o $log_dir/warehouse_loader_1234_central_] . $timestamp . q[.out ] . + qq['warehouse_loader --verbose --id_run 1234 --lims_driver_type samplesheet']; is($pb->_update_warehouse_command('warehouse_loader', (50)), $expected_command, 'update warehouse command'); } diff --git a/t/10-pluggable_harold_post_qc_review.t b/t/10-pluggable_harold_post_qc_review.t index 40948cea7..055c636ba 100644 --- a/t/10-pluggable_harold_post_qc_review.t +++ b/t/10-pluggable_harold_post_qc_review.t @@ -34,7 +34,7 @@ use_ok('npg_pipeline::pluggable::harold::post_qc_review'); upload_auto_qc_to_qc_database run_run_archived run_qc_complete - update_warehouse + update_warehouse_post_qc_complete ); my @original = @functions_in_order; unshift @original, 'lsf_start'; @@ -64,10 +64,9 @@ use_ok('npg_pipeline::pluggable::harold::post_qc_review'); my $log_dir_in_outgoing = $log_dir; $log_dir_in_outgoing =~ s{/analysis/}{/outgoing/}smx; my $job_name = 'warehouse_loader_1234_post_qc_review'; - my $unset_string = 'unset NPG_WEBSERVICE_CACHE_DIR;unset NPG_CACHED_SAMPLESHEET_FILE;'; my $prefix = qq[bsub -q lowload 50 -J $job_name ] . qq[-o $log_dir/${job_name}_${timestamp}.out]; - my $command = qq['${unset_string}warehouse_loader --verbose --id_run 1234']; + my $command = q['warehouse_loader --verbose --id_run 1234 --lims_driver_type samplesheet']; is($post_qc_review->_update_warehouse_command('warehouse_loader', (50)), qq[$prefix $command], 'update warehouse command'); @@ -75,6 +74,7 @@ use_ok('npg_pipeline::pluggable::harold::post_qc_review'); $prefix = qq[bsub -q lowload 50 -J $job_name ] . qq[-o $log_dir_in_outgoing/${job_name}_${timestamp}.out]; my $preexec = qq(-E "[ -d '${log_dir_in_outgoing}' ]"); + $command = q['warehouse_loader --verbose --id_run 1234 --lims_driver_type ml_warehouse_fc_cache']; is($post_qc_review->_update_warehouse_command( 'warehouse_loader', (50, {}, {'post_qc_complete' => 1})), join(q[ ],$prefix,$preexec,$command), From b93f038d38a76b7e7af0a3ccba9f9bec7f047c04 Mon Sep 17 00:00:00 2001 From: Steven Leonard Date: Mon, 5 Jun 2017 11:21:41 +0100 Subject: [PATCH 12/27] warn rather than croak in seq_alignemnt if multiple references for tag 0 --- Changes | 1 + lib/npg_pipeline/archive/file/generation/seq_alignment.pm | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Changes b/Changes index 3b73f1cbb..e8a0a7a09 100644 --- a/Changes +++ b/Changes @@ -1,6 +1,7 @@ LIST OF CHANGES --------------- + - update p4 stage 2 (seq_alignment) warn rather than croak if multiple references for tag 0 - pipeline scripts - redirect stderr output to the log to capture output from all NPG and CPAN modules in one place diff --git a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm index ebad3ba49..26beeb79f 100644 --- a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm +++ b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm @@ -554,7 +554,11 @@ sub _ref { $self->warn(qq{No reference genome set for $lstring}); } else { if (scalar @refs > 1) { - $self->logcroak(qq{Multiple references for $lstring}); + if (defined $l->tag_index && $l->tag_index == 0) { + $self->logwarn(qq{Multiple references for $lstring}); + } else { + $self->logcroak(qq{Multiple references for $lstring}); + } } else { $ref = $refs[0]; if ($ref_name) { From a9521d033bdd027c69852ebde50288359dc5fe8f Mon Sep 17 00:00:00 2001 From: Steven Leonard Date: Mon, 5 Jun 2017 11:37:05 +0100 Subject: [PATCH 13/27] allow p4 stage 1 to analyse runs with different length reads --- Changes | 1 + lib/npg_pipeline/archive/file/generation/p4_stage1_analysis.pm | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Changes b/Changes index 3b73f1cbb..0b6819550 100644 --- a/Changes +++ b/Changes @@ -1,6 +1,7 @@ LIST OF CHANGES --------------- + - allow p4 stage 1 to analyse runs with different length reads - pipeline scripts - redirect stderr output to the log to capture output from all NPG and CPAN modules in one place diff --git a/lib/npg_pipeline/archive/file/generation/p4_stage1_analysis.pm b/lib/npg_pipeline/archive/file/generation/p4_stage1_analysis.pm index f28e6c68e..17c4a18e8 100644 --- a/lib/npg_pipeline/archive/file/generation/p4_stage1_analysis.pm +++ b/lib/npg_pipeline/archive/file/generation/p4_stage1_analysis.pm @@ -392,7 +392,7 @@ sub _generate_command_params { my @range2 = $self->read2_cycle_range(); my $read2_length = $range2[1] - $range2[0] + 1; if($read1_length != $read2_length) { - $self->logcroak('P4 stage1 analysis will not yet handle different length forward/reverse reads (no optional adapter detection)'); + $self->logwarn('P4 stage1 analysis will not yet handle different length forward/reverse reads (no optional adapter detection)'); } } From ff1b9d5556f36cd490ac49d02cea79d8715b3cbb Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Thu, 15 Dec 2016 10:06:53 +0000 Subject: [PATCH 14/27] Chain execution of RNA-SeQC qc check to the vtfp/viv alignment cmd - Add extra argument to _qc_command to support the execution of the rna_seqc qc check in seq_alignment module - Remove every trace of the execution of the rna_seqc check from npg_pipeline::archive::file::qc - Remove rna_seqc from the yaml-formatted central list and parallelisation files - Update tests to reflect changes in seq_alignment --- data/config_files/function_list_central.yml | 1 - .../function_list_central_qc_run.yml | 1 - data/config_files/parallelisation.yml | 1 - .../archive/file/generation/seq_alignment.pm | 38 +++++++++-- lib/npg_pipeline/archive/file/qc.pm | 63 +------------------ t/10-pluggable_harold_central.t | 1 - t/20-archive_file_generation-seq_alignment.t | 7 ++- 7 files changed, 43 insertions(+), 69 deletions(-) diff --git a/data/config_files/function_list_central.yml b/data/config_files/function_list_central.yml index 6727f31a9..a5b58162d 100644 --- a/data/config_files/function_list_central.yml +++ b/data/config_files/function_list_central.yml @@ -25,7 +25,6 @@ - qc_genotype - qc_verify_bam_id - qc_upstream_tags -- qc_rna_seqc - run_analysis_complete - update_ml_warehouse - archive_to_irods_samplesheet diff --git a/data/config_files/function_list_central_qc_run.yml b/data/config_files/function_list_central_qc_run.yml index fdc5b7089..721e0c2b5 100644 --- a/data/config_files/function_list_central_qc_run.yml +++ b/data/config_files/function_list_central_qc_run.yml @@ -22,7 +22,6 @@ - qc_genotype - qc_verify_bam_id - qc_upstream_tags -- qc_rna_seqc - run_analysis_complete - run_archival_in_progress - copy_interop_files_to_irods diff --git a/data/config_files/parallelisation.yml b/data/config_files/parallelisation.yml index a023b001f..cb4bb4377 100644 --- a/data/config_files/parallelisation.yml +++ b/data/config_files/parallelisation.yml @@ -19,7 +19,6 @@ c: qc_genotype: 1 qc_upstream_tags: 1 qc_verify_bam_id: 1 - qc_rna_seqc: 1 d: upload_auto_qc_to_qc_database: 1 upload_fastqcheck_to_qc_database: 1 diff --git a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm index ebad3ba49..bc6444736 100644 --- a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm +++ b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm @@ -5,6 +5,8 @@ use English qw{-no_match_vars}; use Readonly; use Moose::Meta::Class; use File::Slurp; +use File::Spec; +use File::Path qw{make_path}; use JSON::XS; use List::Util qw(sum); use List::MoreUtils qw(any); @@ -193,6 +195,7 @@ sub _lsf_alignment_command { ## no critic (Subroutines::ProhibitExcessComplexity my $spike_tag; my $input_path= $self->input_path; my $archive_path= $self->archive_path; + my $archive_qc_path = File::Spec->catdir($self->archive_path, q{qc}); my $qcpath= $self->qc_path; if($is_plex) { $tag_index = $l->tag_index; @@ -393,17 +396,26 @@ sub _lsf_alignment_command { ## no critic (Subroutines::ProhibitExcessComplexity q{&&}, _qc_command('bam_flagstats', $archive_path, $qcpath, $l, $is_plex, $nchs_outfile_label), : q(), + $do_rna ? join q( ), + q{&&}, + _qc_command('rna_seqc', $archive_path, $qcpath, $l, $is_plex, undef, $archive_qc_path), + : q() ), q('); } sub _qc_command {##no critic (Subroutines::ProhibitManyArgs) - my ($check_name, $qc_in, $qc_out, $l, $is_plex, $subset) = @_; + my ($check_name, $qc_in, $qc_out, $l, $is_plex, $subset, $archive_qc_path) = @_; + + my $args = {'id_run' => $l->id_run, + 'position'=> $l->position, + 'qc_out' => $qc_out, + 'check' => $check_name,}; - my $args = {'id_run' => $l->id_run, 'position' => $l->position}; if ($is_plex && defined $l->tag_index) { $args->{'tag_index'} = $l->tag_index; } + if ($check_name eq 'bam_flagstats') { if ($subset) { $args->{'subset'} = $subset; @@ -412,12 +424,26 @@ sub _qc_command {##no critic (Subroutines::ProhibitManyArgs) } else { $args->{'qc_in'} = q[$] . 'PWD'; } - $args->{'qc_out'} = $qc_out; - $args->{'check'} = $check_name; + + if ($check_name eq 'rna_seqc') { + my $rpt_dir; + my $rp_dir = join q[_], $l->id_run, $l->position; + my $qc_report_dir = File::Spec->catdir($archive_qc_path, 'rna_seqc', $rp_dir); + if ($is_plex && defined $l->tag_index) { + $rpt_dir = join q[#], $rp_dir, $l->tag_index; + $qc_report_dir = File::Spec->catdir($archive_qc_path, 'rna_seqc', $rp_dir, $rpt_dir); + } + $args->{'qc_report_dir'} = $qc_report_dir; + if (! -d $qc_report_dir) { + make_path($qc_report_dir); + } + } + my $command = q[]; foreach my $arg (sort keys %{$args}) { $command .= join q[ ], q[ --].$arg, $args->{$arg}; } + return $QC_SCRIPT_NAME . $command; } @@ -681,6 +707,10 @@ LSF job creation for alignment =item st::api::lims +=item File::Spec + +=item File::Path + =item npg_tracking::data::reference::find =item npg_tracking::data::bait diff --git a/lib/npg_pipeline/archive/file/qc.pm b/lib/npg_pipeline/archive/file/qc.pm index ccc966d7d..652263a4f 100644 --- a/lib/npg_pipeline/archive/file/qc.pm +++ b/lib/npg_pipeline/archive/file/qc.pm @@ -3,7 +3,6 @@ package npg_pipeline::archive::file::qc; use Moose; use Readonly; use File::Spec; -use File::Path qw{make_path}; use Class::Load qw{load_class}; use npg_pipeline::lsf_job; @@ -16,10 +15,6 @@ Readonly::Scalar my $QC_SCRIPT_NAME => q{qc}; Readonly::Scalar my $LSF_MEMORY_REQ => 6000; Readonly::Scalar my $LSF_MEMORY_REQ_ADAPTER => 1500; Readonly::Scalar my $LSF_INDEX_MULTIPLIER => 10_000; -Readonly::Scalar my $REQUIRES_QC_REPORT_DIR => { - rna_seqc => 'rna_seqc', -}; - has q{qc_to_run} => (isa => q{Str}, is => q{ro}, @@ -52,16 +47,6 @@ sub BUILD { return; } -has q{_qc_report_dirs} => (isa => q{HashRef[Str]}, - is => q{ro}, - traits => [q{Hash}], - default => sub { { } }, - handles => { - _set_rpt_qc_report_dir => q{set}, - _get_rpt_qc_report_dir => q{get}, - }, - ); - sub run_qc { my ($self, $arg_refs) = @_; @@ -78,28 +63,6 @@ sub run_qc { } } - if ($REQUIRES_QC_REPORT_DIR->{$qc_to_run}) { - my @archive_qc_path = ($self->archive_path, q[qc], $REQUIRES_QC_REPORT_DIR->{$qc_to_run}); - foreach my $position ($self->positions()) { - my $rp = join q[_], $self->id_run(), $position; - my $qc_report_dir = File::Spec->catdir(@archive_qc_path, $rp); - if (! -d $qc_report_dir) { - make_path($qc_report_dir); - $self->_set_rpt_qc_report_dir($rp, $qc_report_dir); - } - if ($self->is_multiplexed_lane($position)) { - foreach my $tag (@{$self->get_tag_index_list($position)}) { - my $rpt = join q[#], $rp, $tag; - $qc_report_dir = File::Spec->catdir(@archive_qc_path, $rp, $rpt); - if (! -d $qc_report_dir) { - make_path($qc_report_dir); - $self->_set_rpt_qc_report_dir($rpt, $qc_report_dir); - } - } - } - } - } - my $required_job_completion = $arg_refs->{'required_job_completion'}; $required_job_completion ||= q{}; @@ -198,17 +161,6 @@ sub _qc_command { } $c .= qq{ --qc_in=$qc_in --qc_out=$qc_out}; - if ($REQUIRES_QC_REPORT_DIR->{$self->qc_to_run()}) { - my @archive_qc_path = ($archive_path, q[qc], $REQUIRES_QC_REPORT_DIR->{$self->qc_to_run()}); - my $rptstr = join q[_], $self->id_run(), (defined $indexed ? $lanestr : $self->lsb_jobindex()); - my $qc_report_dir = File::Spec->catdir(@archive_qc_path, $rptstr); - if (defined $indexed) { - $rptstr = join q[#], $rptstr, $tagstr; - $qc_report_dir = File::Spec->catdir($qc_report_dir, $rptstr); - } - $c .= qq{ --qc_report_dir=$qc_report_dir}; - } - return $c; } @@ -218,9 +170,9 @@ sub _should_run { my $qc = $self->qc_to_run(); if (($qc =~ /^tag_metrics|upstream_tags|gc_bias|verify_bam_id$/smx) || - ($qc =~ /^genotype|pulldown_metrics|rna_seqc$/smx)) { + ($qc =~ /^genotype|pulldown_metrics$/smx)) { my $is_multiplexed_lane = $self->is_multiplexed_lane($position); - if ($qc =~ /^gc_bias|verify_bam_id|genotype|pulldown_metrics|rna_seqc$/smx) { + if ($qc =~ /^gc_bias|verify_bam_id|genotype|pulldown_metrics$/smx) { my $can_run = ((!defined $tag_index) && !$is_multiplexed_lane) || ((defined $tag_index) && $is_multiplexed_lane); if (!$can_run) { @@ -241,13 +193,6 @@ sub _should_run { if ($self->has_repository && $self->_check_uses_refrepos()) { $init_hash->{'repository'} = $self->repository; } - if ($REQUIRES_QC_REPORT_DIR->{$qc}) { - my $qc_report_dir_key = join q[_], $self->id_run(), $position; - if (defined $tag_index) { - $qc_report_dir_key = join q[#], $qc_report_dir_key, $tag_index; - } - $init_hash->{'qc_report_dir'} = $self->_get_rpt_qc_report_dir($qc_report_dir_key); - } return $self->_qc_module_name()->new($init_hash)->can_run(); } @@ -284,7 +229,7 @@ sub _lsf_options { my ($self, $qc_to_run) = @_; my $resources; - if ($qc_to_run =~ /insert_size|sequence_error|ref_match|pulldown_metrics|rna_seqc/smx ) { + if ($qc_to_run =~ /insert_size|sequence_error|ref_match|pulldown_metrics/smx ) { $resources = npg_pipeline::lsf_job->new(memory => $LSF_MEMORY_REQ)->memory_spec(); } elsif ($qc_to_run eq q[adapter]) { $resources = npg_pipeline::lsf_job->new(memory => $LSF_MEMORY_REQ_ADAPTER)->memory_spec() . @@ -352,8 +297,6 @@ Launches the qc jobs. =item Class::Load -=item File::Path - =back =head1 INCOMPATIBILITIES diff --git a/t/10-pluggable_harold_central.t b/t/10-pluggable_harold_central.t index f42e5bf27..e9a1a324e 100644 --- a/t/10-pluggable_harold_central.t +++ b/t/10-pluggable_harold_central.t @@ -76,7 +76,6 @@ my $runfolder_path = $util->analysis_runfolder_path(); qc_genotype qc_verify_bam_id qc_upstream_tags - qc_rna_seqc run_analysis_complete update_ml_warehouse archive_to_irods_samplesheet diff --git a/t/20-archive_file_generation-seq_alignment.t b/t/20-archive_file_generation-seq_alignment.t index d18cfe85d..a1c542b6c 100644 --- a/t/20-archive_file_generation-seq_alignment.t +++ b/t/20-archive_file_generation-seq_alignment.t @@ -102,12 +102,14 @@ subtest 'test 1' => sub { my $qc_in = $dir . q[/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/lane4]; my $qc_out = join q[/], $qc_in, q[qc]; + my $qc_report_dir = $dir . q[/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/qc/rna_seqc/12597_4/12597_4#3]; my $args = {}; $args->{'40003'} = qq{bash -c '\ mkdir -p $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#3 ; cd $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#3 && vtfp.pl -param_vals $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/lane4/12597_4#3_p4s2_pv_in.json -export_param_vals 12597_4#3_p4s2_pv_out_\${LSB_JOBID}.json -keys cfgdatadir -vals \$(dirname \$(readlink -f \$(which vtfp.pl)))/../data/vtlib/ -keys aligner_numthreads -vals `npg_pipeline_job_env_to_threads` -keys br_numthreads_val -vals `npg_pipeline_job_env_to_threads --exclude 1 --divide 2` -keys b2c_mt_val -vals `npg_pipeline_job_env_to_threads --exclude 2 --divide 2` -prune_nodes '"'"'fop.*samtools_stats_F0.*00_bait.*'"'"' \$(dirname \$(dirname \$(readlink -f \$(which vtfp.pl))))/data/vtlib/alignment_wtsi_stage2_template.json > run_12597_4#3.json && viv.pl -s -x -v 3 -o viv_12597_4#3.log run_12597_4#3.json } . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --tag_index 3} . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --subset phix --tag_index 3} . q{ && qc --check alignment_filter_metrics --id_run 12597 --position 4 --qc_in $PWD --qc_out } .$qc_out.q{ --tag_index 3}. + q{ && qc --check rna_seqc --id_run 12597 --position 4 --qc_in $PWD --qc_out } . $qc_out . q{ --qc_report_dir } . $qc_report_dir . q{ --tag_index 3}. q{ '}; $args->{'40000'} = qq{bash -c '\ mkdir -p $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 ; cd $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 && vtfp.pl -param_vals $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/lane4/12597_4#0_p4s2_pv_in.json -export_param_vals 12597_4#0_p4s2_pv_out_\${LSB_JOBID}.json -keys cfgdatadir -vals \$(dirname \$(readlink -f \$(which vtfp.pl)))/../data/vtlib/ -keys aligner_numthreads -vals `npg_pipeline_job_env_to_threads` -keys br_numthreads_val -vals `npg_pipeline_job_env_to_threads --exclude 1 --divide 2` -keys b2c_mt_val -vals `npg_pipeline_job_env_to_threads --exclude 2 --divide 2` -prune_nodes '"'"'fop.*samtools_stats_F0.*00_bait.*'"'"' \$(dirname \$(dirname \$(readlink -f \$(which vtfp.pl))))/data/vtlib/alignment_wtsi_stage2_template.json > run_12597_4#0.json && viv.pl -s -x -v 3 -o viv_12597_4#0.log run_12597_4#0.json } . @@ -142,6 +144,7 @@ subtest 'test 1' => sub { qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --tag_index 3} . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --subset phix --tag_index 3} . q{ && qc --check alignment_filter_metrics --id_run 12597 --position 4 --qc_in $PWD --qc_out } .$qc_out.q{ --tag_index 3}. + q{ && qc --check rna_seqc --id_run 12597 --position 4 --qc_in $PWD --qc_out } . $qc_out . q{ --qc_report_dir } . $qc_report_dir . q{ --tag_index 3}. qq( '","40000":"bash -c ' mkdir -p $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 ; cd $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 && vtfp.pl -param_vals $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/lane4/12597_4#0_p4s2_pv_in.json -export_param_vals 12597_4#0_p4s2_pv_out_\${LSB_JOBID}.json -keys cfgdatadir -vals \$(dirname \$(readlink -f \$(which vtfp.pl)))/../data/vtlib/ -keys aligner_numthreads -vals `npg_pipeline_job_env_to_threads` -keys br_numthreads_val -vals `npg_pipeline_job_env_to_threads --exclude 1 --divide 2` -keys b2c_mt_val -vals `npg_pipeline_job_env_to_threads --exclude 2 --divide 2` -prune_nodes '\\"'\\"'fop.*samtools_stats_F0.*00_bait.*'\\"'\\"' \$(dirname \$(dirname \$(readlink -f \$(which vtfp.pl))))/data/vtlib/alignment_wtsi_stage2_template.json > run_12597_4#0.json && viv.pl -s -x -v 3 -o viv_12597_4#0.log run_12597_4#0.json ) . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --tag_index 0} . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --subset phix --tag_index 0} . @@ -265,11 +268,13 @@ subtest 'test 2' => sub { my $qc_in = qq{$dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive}; my $qc_out = join q[/], $qc_in, q[qc]; + my $qc_report_dir = qq[$dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive/qc/rna_seqc/13066_8]; my $args = {}; $args->{8} = qq{bash -c ' mkdir -p $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive/tmp_\$LSB_JOBID/13066_8 ; cd $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive/tmp_\$LSB_JOBID/13066_8 && vtfp.pl -param_vals $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/13066_8_p4s2_pv_in.json -export_param_vals 13066_8_p4s2_pv_out_\${LSB_JOBID}.json -keys cfgdatadir -vals \$(dirname \$(readlink -f \$(which vtfp.pl)))/../data/vtlib/ -keys aligner_numthreads -vals `npg_pipeline_job_env_to_threads` -keys br_numthreads_val -vals `npg_pipeline_job_env_to_threads --exclude 1 --divide 2` -keys b2c_mt_val -vals `npg_pipeline_job_env_to_threads --exclude 2 --divide 2` -prune_nodes '"'"'fop.*samtools_stats_F0.*00_bait.*'"'"' \$(dirname \$(dirname \$(readlink -f \$(which vtfp.pl))))/data/vtlib/alignment_wtsi_stage2_template.json > run_13066_8.json && viv.pl -s -x -v 3 -o viv_13066_8.log run_13066_8.json } . qq{ && qc --check bam_flagstats --id_run 13066 --position 8 --qc_in $qc_in --qc_out $qc_out} . qq{ && qc --check bam_flagstats --id_run 13066 --position 8 --qc_in $qc_in --qc_out $qc_out --subset phix} . - q{ && qc --check alignment_filter_metrics --id_run 13066 --position 8 --qc_in $PWD --qc_out } . $qc_out . q{ '}; + q{ && qc --check alignment_filter_metrics --id_run 13066 --position 8 --qc_in $PWD --qc_out } . $qc_out . + q{ && qc --check rna_seqc --id_run 13066 --position 8 --qc_in $PWD --qc_out } . $qc_out . q{ --qc_report_dir } . $qc_report_dir . q{ '}; lives_ok {$rna_gen->_generate_command_arguments([8])} 'no error generating command arguments'; From 243d5d2ab475b77ce558f7fd852a70eb62013712 Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Mon, 9 Jan 2017 14:21:59 +0000 Subject: [PATCH 15/27] Remove use of attribute qc_report_dir for rna_seqc check - The attribute is not required by the check anymore. - Update test units in t/20-archive_file_generation-seq_alignment.t to reflect this change. - Remove extra argument used to create qc_report_dir in _qc_command --- .../archive/file/generation/seq_alignment.pm | 18 ++---------------- t/20-archive_file_generation-seq_alignment.t | 8 ++++---- 2 files changed, 6 insertions(+), 20 deletions(-) diff --git a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm index bc6444736..7379e34f9 100644 --- a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm +++ b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm @@ -398,14 +398,14 @@ sub _lsf_alignment_command { ## no critic (Subroutines::ProhibitExcessComplexity : q(), $do_rna ? join q( ), q{&&}, - _qc_command('rna_seqc', $archive_path, $qcpath, $l, $is_plex, undef, $archive_qc_path), + _qc_command('rna_seqc', $archive_path, $qcpath, $l, $is_plex), : q() ), q('); } sub _qc_command {##no critic (Subroutines::ProhibitManyArgs) - my ($check_name, $qc_in, $qc_out, $l, $is_plex, $subset, $archive_qc_path) = @_; + my ($check_name, $qc_in, $qc_out, $l, $is_plex, $subset) = @_; my $args = {'id_run' => $l->id_run, 'position'=> $l->position, @@ -425,20 +425,6 @@ sub _qc_command {##no critic (Subroutines::ProhibitManyArgs) $args->{'qc_in'} = q[$] . 'PWD'; } - if ($check_name eq 'rna_seqc') { - my $rpt_dir; - my $rp_dir = join q[_], $l->id_run, $l->position; - my $qc_report_dir = File::Spec->catdir($archive_qc_path, 'rna_seqc', $rp_dir); - if ($is_plex && defined $l->tag_index) { - $rpt_dir = join q[#], $rp_dir, $l->tag_index; - $qc_report_dir = File::Spec->catdir($archive_qc_path, 'rna_seqc', $rp_dir, $rpt_dir); - } - $args->{'qc_report_dir'} = $qc_report_dir; - if (! -d $qc_report_dir) { - make_path($qc_report_dir); - } - } - my $command = q[]; foreach my $arg (sort keys %{$args}) { $command .= join q[ ], q[ --].$arg, $args->{$arg}; diff --git a/t/20-archive_file_generation-seq_alignment.t b/t/20-archive_file_generation-seq_alignment.t index a1c542b6c..5b3b3ed3e 100644 --- a/t/20-archive_file_generation-seq_alignment.t +++ b/t/20-archive_file_generation-seq_alignment.t @@ -109,7 +109,7 @@ subtest 'test 1' => sub { qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --tag_index 3} . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --subset phix --tag_index 3} . q{ && qc --check alignment_filter_metrics --id_run 12597 --position 4 --qc_in $PWD --qc_out } .$qc_out.q{ --tag_index 3}. - q{ && qc --check rna_seqc --id_run 12597 --position 4 --qc_in $PWD --qc_out } . $qc_out . q{ --qc_report_dir } . $qc_report_dir . q{ --tag_index 3}. + q{ && qc --check rna_seqc --id_run 12597 --position 4 --qc_in $PWD --qc_out } . $qc_out . q{ --tag_index 3}. q{ '}; $args->{'40000'} = qq{bash -c '\ mkdir -p $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 ; cd $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 && vtfp.pl -param_vals $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/lane4/12597_4#0_p4s2_pv_in.json -export_param_vals 12597_4#0_p4s2_pv_out_\${LSB_JOBID}.json -keys cfgdatadir -vals \$(dirname \$(readlink -f \$(which vtfp.pl)))/../data/vtlib/ -keys aligner_numthreads -vals `npg_pipeline_job_env_to_threads` -keys br_numthreads_val -vals `npg_pipeline_job_env_to_threads --exclude 1 --divide 2` -keys b2c_mt_val -vals `npg_pipeline_job_env_to_threads --exclude 2 --divide 2` -prune_nodes '"'"'fop.*samtools_stats_F0.*00_bait.*'"'"' \$(dirname \$(dirname \$(readlink -f \$(which vtfp.pl))))/data/vtlib/alignment_wtsi_stage2_template.json > run_12597_4#0.json && viv.pl -s -x -v 3 -o viv_12597_4#0.log run_12597_4#0.json } . @@ -144,7 +144,7 @@ subtest 'test 1' => sub { qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --tag_index 3} . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --subset phix --tag_index 3} . q{ && qc --check alignment_filter_metrics --id_run 12597 --position 4 --qc_in $PWD --qc_out } .$qc_out.q{ --tag_index 3}. - q{ && qc --check rna_seqc --id_run 12597 --position 4 --qc_in $PWD --qc_out } . $qc_out . q{ --qc_report_dir } . $qc_report_dir . q{ --tag_index 3}. + q{ && qc --check rna_seqc --id_run 12597 --position 4 --qc_in $PWD --qc_out } . $qc_out . q{ --tag_index 3}. qq( '","40000":"bash -c ' mkdir -p $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 ; cd $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 && vtfp.pl -param_vals $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/lane4/12597_4#0_p4s2_pv_in.json -export_param_vals 12597_4#0_p4s2_pv_out_\${LSB_JOBID}.json -keys cfgdatadir -vals \$(dirname \$(readlink -f \$(which vtfp.pl)))/../data/vtlib/ -keys aligner_numthreads -vals `npg_pipeline_job_env_to_threads` -keys br_numthreads_val -vals `npg_pipeline_job_env_to_threads --exclude 1 --divide 2` -keys b2c_mt_val -vals `npg_pipeline_job_env_to_threads --exclude 2 --divide 2` -prune_nodes '\\"'\\"'fop.*samtools_stats_F0.*00_bait.*'\\"'\\"' \$(dirname \$(dirname \$(readlink -f \$(which vtfp.pl))))/data/vtlib/alignment_wtsi_stage2_template.json > run_12597_4#0.json && viv.pl -s -x -v 3 -o viv_12597_4#0.log run_12597_4#0.json ) . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --tag_index 0} . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --subset phix --tag_index 0} . @@ -273,8 +273,8 @@ subtest 'test 2' => sub { $args->{8} = qq{bash -c ' mkdir -p $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive/tmp_\$LSB_JOBID/13066_8 ; cd $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive/tmp_\$LSB_JOBID/13066_8 && vtfp.pl -param_vals $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/13066_8_p4s2_pv_in.json -export_param_vals 13066_8_p4s2_pv_out_\${LSB_JOBID}.json -keys cfgdatadir -vals \$(dirname \$(readlink -f \$(which vtfp.pl)))/../data/vtlib/ -keys aligner_numthreads -vals `npg_pipeline_job_env_to_threads` -keys br_numthreads_val -vals `npg_pipeline_job_env_to_threads --exclude 1 --divide 2` -keys b2c_mt_val -vals `npg_pipeline_job_env_to_threads --exclude 2 --divide 2` -prune_nodes '"'"'fop.*samtools_stats_F0.*00_bait.*'"'"' \$(dirname \$(dirname \$(readlink -f \$(which vtfp.pl))))/data/vtlib/alignment_wtsi_stage2_template.json > run_13066_8.json && viv.pl -s -x -v 3 -o viv_13066_8.log run_13066_8.json } . qq{ && qc --check bam_flagstats --id_run 13066 --position 8 --qc_in $qc_in --qc_out $qc_out} . qq{ && qc --check bam_flagstats --id_run 13066 --position 8 --qc_in $qc_in --qc_out $qc_out --subset phix} . - q{ && qc --check alignment_filter_metrics --id_run 13066 --position 8 --qc_in $PWD --qc_out } . $qc_out . - q{ && qc --check rna_seqc --id_run 13066 --position 8 --qc_in $PWD --qc_out } . $qc_out . q{ --qc_report_dir } . $qc_report_dir . q{ '}; + q{ && qc --check alignment_filter_metrics --id_run 13066 --position 8 --qc_in $PWD --qc_out } . $qc_out . + q{ && qc --check rna_seqc --id_run 13066 --position 8 --qc_in $PWD --qc_out } . $qc_out . q{ '}; lives_ok {$rna_gen->_generate_command_arguments([8])} 'no error generating command arguments'; From 4337198342bcd904be3475f3a4c95f29af9e5aed Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Tue, 10 Jan 2017 11:47:22 +0000 Subject: [PATCH 16/27] Fix bug - value of qc_in arg passed to rna_seqc is wrong in seq_alignment --- lib/npg_pipeline/archive/file/generation/seq_alignment.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm index 7379e34f9..4c5117176 100644 --- a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm +++ b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm @@ -416,7 +416,7 @@ sub _qc_command {##no critic (Subroutines::ProhibitManyArgs) $args->{'tag_index'} = $l->tag_index; } - if ($check_name eq 'bam_flagstats') { + if ($check_name =~ /^bam_flagstats|rna_seqc$/smx) { if ($subset) { $args->{'subset'} = $subset; } From 4e858e1ae5858bacd18b680eb2df6a16c190c30b Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Wed, 11 Jan 2017 14:55:20 +0000 Subject: [PATCH 17/27] Remove code that creates the BAM_basecalls_XX-XX/archive/qc/rna_seqc directory --- lib/npg_pipeline/archive/folder/generation.pm | 27 ------------------- 1 file changed, 27 deletions(-) diff --git a/lib/npg_pipeline/archive/folder/generation.pm b/lib/npg_pipeline/archive/folder/generation.pm index 95a166a23..ce0267aba 100644 --- a/lib/npg_pipeline/archive/folder/generation.pm +++ b/lib/npg_pipeline/archive/folder/generation.pm @@ -19,7 +19,6 @@ sub create_dir { my $qc_dir = $self->qc_path(); my $qc_log_dir = $qc_dir . q{/log}; my $tileviz_dir = $qc_dir . q{/tileviz}; - my $rna_seqc_dir = $qc_dir . q{/rna_seqc}; ############# # check existence of archive directory @@ -56,21 +55,6 @@ sub create_dir { } } - ############# - # check existence of rna_seqc directory - # create if it doesn't - - if ( ! -d $rna_seqc_dir) { - my $mk_rna_seqc_dir_cmd = qq{mkdir -p $rna_seqc_dir}; - $self->debug($mk_rna_seqc_dir_cmd); - my $return = qx{$mk_rna_seqc_dir_cmd}; - if ( $CHILD_ERROR ) { - $self->logcroak($tileviz_dir, - qq{ does not exist and unable to create: $CHILD_ERROR }, - $return); - } - } - ############# # check existence of multiplex lane and qc directory # create if they doesn't @@ -125,11 +109,6 @@ sub create_dir { $self->warn("could not chgrp $tileviz_dir\n\t$rc"); # not fatal } - $self->info("chgrp $owning_group $rna_seqc_dir"); - $rc = `chgrp $owning_group $rna_seqc_dir`; - if ( $CHILD_ERROR ) { - $self->warn("could not chgrp $rna_seqc_dir\n\t$rc"); # not fatal - } ############ # ensure that the owning group is what we expect @@ -167,12 +146,6 @@ sub create_dir { $self->warn("could not chmod $tileviz_dir\n\t$rc"); # not fatal } - $self->info("chmod u=rwx,g=srxw,o=rx $rna_seqc_dir"); - $rc = `chmod u=rwx,g=srxw,o=rx $rna_seqc_dir`; - if ( $CHILD_ERROR ) { - $self->warn("could not chmod $rna_seqc_dir\n\t$rc"); # not fatal - } - $self->info("chmod u=rwx,g=srxw,o=rx $archive_log_dir"); $rc = `chmod u=rwx,g=srxw,o=rx $archive_log_dir`; if ( $CHILD_ERROR ) { From fd7be642f3660ea70a37b9fb1df6781ca240151f Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Wed, 11 Jan 2017 16:01:50 +0000 Subject: [PATCH 18/27] Update seq_alignment tests to reflect latest changes --- t/20-archive_file_generation-seq_alignment.t | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/t/20-archive_file_generation-seq_alignment.t b/t/20-archive_file_generation-seq_alignment.t index 5b3b3ed3e..ece0cfbe7 100644 --- a/t/20-archive_file_generation-seq_alignment.t +++ b/t/20-archive_file_generation-seq_alignment.t @@ -109,7 +109,7 @@ subtest 'test 1' => sub { qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --tag_index 3} . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --subset phix --tag_index 3} . q{ && qc --check alignment_filter_metrics --id_run 12597 --position 4 --qc_in $PWD --qc_out } .$qc_out.q{ --tag_index 3}. - q{ && qc --check rna_seqc --id_run 12597 --position 4 --qc_in $PWD --qc_out } . $qc_out . q{ --tag_index 3}. + qq{ && qc --check rna_seqc --id_run 12597 --position 4 --qc_in $qc_in --qc_out } . $qc_out . q{ --tag_index 3}. q{ '}; $args->{'40000'} = qq{bash -c '\ mkdir -p $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 ; cd $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 && vtfp.pl -param_vals $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/lane4/12597_4#0_p4s2_pv_in.json -export_param_vals 12597_4#0_p4s2_pv_out_\${LSB_JOBID}.json -keys cfgdatadir -vals \$(dirname \$(readlink -f \$(which vtfp.pl)))/../data/vtlib/ -keys aligner_numthreads -vals `npg_pipeline_job_env_to_threads` -keys br_numthreads_val -vals `npg_pipeline_job_env_to_threads --exclude 1 --divide 2` -keys b2c_mt_val -vals `npg_pipeline_job_env_to_threads --exclude 2 --divide 2` -prune_nodes '"'"'fop.*samtools_stats_F0.*00_bait.*'"'"' \$(dirname \$(dirname \$(readlink -f \$(which vtfp.pl))))/data/vtlib/alignment_wtsi_stage2_template.json > run_12597_4#0.json && viv.pl -s -x -v 3 -o viv_12597_4#0.log run_12597_4#0.json } . @@ -144,7 +144,7 @@ subtest 'test 1' => sub { qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --tag_index 3} . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --subset phix --tag_index 3} . q{ && qc --check alignment_filter_metrics --id_run 12597 --position 4 --qc_in $PWD --qc_out } .$qc_out.q{ --tag_index 3}. - q{ && qc --check rna_seqc --id_run 12597 --position 4 --qc_in $PWD --qc_out } . $qc_out . q{ --tag_index 3}. + qq{ && qc --check rna_seqc --id_run 12597 --position 4 --qc_in $qc_in --qc_out } . $qc_out . q{ --tag_index 3}. qq( '","40000":"bash -c ' mkdir -p $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 ; cd $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 && vtfp.pl -param_vals $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/lane4/12597_4#0_p4s2_pv_in.json -export_param_vals 12597_4#0_p4s2_pv_out_\${LSB_JOBID}.json -keys cfgdatadir -vals \$(dirname \$(readlink -f \$(which vtfp.pl)))/../data/vtlib/ -keys aligner_numthreads -vals `npg_pipeline_job_env_to_threads` -keys br_numthreads_val -vals `npg_pipeline_job_env_to_threads --exclude 1 --divide 2` -keys b2c_mt_val -vals `npg_pipeline_job_env_to_threads --exclude 2 --divide 2` -prune_nodes '\\"'\\"'fop.*samtools_stats_F0.*00_bait.*'\\"'\\"' \$(dirname \$(dirname \$(readlink -f \$(which vtfp.pl))))/data/vtlib/alignment_wtsi_stage2_template.json > run_12597_4#0.json && viv.pl -s -x -v 3 -o viv_12597_4#0.log run_12597_4#0.json ) . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --tag_index 0} . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --subset phix --tag_index 0} . @@ -274,7 +274,7 @@ subtest 'test 2' => sub { qq{ && qc --check bam_flagstats --id_run 13066 --position 8 --qc_in $qc_in --qc_out $qc_out} . qq{ && qc --check bam_flagstats --id_run 13066 --position 8 --qc_in $qc_in --qc_out $qc_out --subset phix} . q{ && qc --check alignment_filter_metrics --id_run 13066 --position 8 --qc_in $PWD --qc_out } . $qc_out . - q{ && qc --check rna_seqc --id_run 13066 --position 8 --qc_in $PWD --qc_out } . $qc_out . q{ '}; + qq{ && qc --check rna_seqc --id_run 13066 --position 8 --qc_in $qc_in --qc_out } . $qc_out . q{ '}; lives_ok {$rna_gen->_generate_command_arguments([8])} 'no error generating command arguments'; From 622ca7cf1d1a5a1ac15431264e443f9c6abaa185 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Mon, 5 Jun 2017 13:27:13 +0100 Subject: [PATCH 19/27] tests with multiple references --- t/20-archive_file_generation-seq_alignment.t | 33 ++++++++++++++++++-- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/t/20-archive_file_generation-seq_alignment.t b/t/20-archive_file_generation-seq_alignment.t index d18cfe85d..9ba17d73d 100644 --- a/t/20-archive_file_generation-seq_alignment.t +++ b/t/20-archive_file_generation-seq_alignment.t @@ -3,6 +3,7 @@ use warnings; use Test::More tests => 11; use Test::Exception; use Test::Deep; +use Test::Warn; use File::Temp qw/tempdir/; use Cwd qw/cwd abs_path/; use Perl6::Slurp; @@ -10,6 +11,8 @@ use File::Copy; use Log::Log4perl qw(:levels); use JSON; +use st::api::lims; + use_ok('npg_pipeline::archive::file::generation::seq_alignment'); local $ENV{'NPG_WEBSERVICE_CACHE_DIR'} = q[t/data/rna_seq]; local $ENV{'TEST_FS_RESOURCE'} = 'nfs-sf3'; @@ -327,7 +330,7 @@ subtest 'test 3' => sub { }; subtest 'test 4' => sub { - plan tests => 5; + plan tests => 8; ##HiSeqX, run 16839_7 my $ref_dir = join q[/],$dir,'references','Homo_sapiens','GRCh38_full_analysis_set_plus_decoy_hla','all'; @@ -342,7 +345,8 @@ subtest 'test 4' => sub { my $cache_dir = join q[/], $runfolder_path, 'Data/Intensities/BAM_basecalls_20150712-121006/metadata_cache_16839'; `mkdir -p $cache_dir`; copy("t/data/hiseqx/16839_RunInfo.xml","$runfolder_path/RunInfo.xml") or die "Copy failed: $!"; #to get information that it is paired end - `touch $ref_dir/fasta/Homo_sapiens.GRCh38_full_analysis_set_plus_decoy_hla.fa`; + my $fasta_ref = "$ref_dir/fasta/Homo_sapiens.GRCh38_full_analysis_set_plus_decoy_hla.fa"; + `touch $fasta_ref`; `touch $ref_dir/picard/Homo_sapiens.GRCh38_full_analysis_set_plus_decoy_hla.fa.dict`; `touch $ref_dir/bwa0_6/Homo_sapiens.GRCh38_full_analysis_set_plus_decoy_hla.fa.alt`; `touch $ref_dir/bwa0_6/Homo_sapiens.GRCh38_full_analysis_set_plus_decoy_hla.fa.amb`; @@ -352,7 +356,7 @@ subtest 'test 4' => sub { `touch $ref_dir/bwa0_6/Homo_sapiens.GRCh38_full_analysis_set_plus_decoy_hla.fa.sa`; local $ENV{'NPG_WEBSERVICE_CACHE_DIR'} = q[t/data/hiseqx]; - local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = q[t/data/hiseqx/samplesheet_16839.csv]; + local $ENV{'NPG_CACHED_SAMPLESHEET_FILE'} = q[t/data/hiseqx/samplesheet_16839.csv]; my $hsx_gen; lives_ok { @@ -367,6 +371,29 @@ subtest 'test 4' => sub { } 'no error creating an object'; is ($hsx_gen->id_run, 16839, 'id_run inferred correctly'); + my $l = st::api::lims->new(id_run => 16839, position => 1, tag_index => 0); + is ($hsx_gen->_ref($l, 'fasta'), $fasta_ref, 'reference for tag zero'); + my $old_ss = $ENV{'NPG_CACHED_SAMPLESHEET_FILE'}; + my $ss = slurp $old_ss; + $ss =~ s/GRCh38_full_analysis_set_plus_decoy_hla/GRCh38X/; + my $new_ss = "$dir/multiref_samplesheet_16839.csv"; + open my $fhss, '>', $new_ss or die "Cannot open $new_ss for writing"; + print $fhss $ss or die "Cannot write to $new_ss"; + close $fhss or warn "Failed to close $new_ss"; + # new samplesheet has miltiple references in lane 1 + local $ENV{'NPG_CACHED_SAMPLESHEET_FILE'} = $new_ss; + my $other_ref_dir = join q[/],$dir,'references','Homo_sapiens','GRCh38X','all'; + `mkdir -p $other_ref_dir/fasta`; + `touch $other_ref_dir/fasta/Homo_sapiens.GRCh38X.fa`; + $l = st::api::lims->new(id_run => 16839, position => 1, tag_index => 0); + my $other_ref; + warnings_exist { $other_ref = $hsx_gen->_ref($l, 'fasta') } + qr/Multiple references for st::api::lims object, driver - samplesheet/, + 'warning about multiple references'; + is ($other_ref, undef, 'multiple references in a lane - no reference for tag zero returned'); + + # restore old samplesheet + local $ENV{'NPG_CACHED_SAMPLESHEET_FILE'} = $old_ss; my $qc_in = qq{$dir/150709_HX4_16839_A_H7MHWCCXX/Data/Intensities/BAM_basecalls_20150712-121006/no_cal/archive/lane7}; my $qc_out = qq{$qc_in/qc}; my $args = {}; From e0d123305dfa9def603a3dadc28f304487acb824 Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Mon, 5 Jun 2017 13:28:53 +0100 Subject: [PATCH 20/27] Update changelog and tidy up code - remove unused libraries (update pod accordingly) - remove trailing whitespaces here and there --- Changes | 4 ++++ lib/npg_pipeline/archive/file/generation/seq_alignment.pm | 6 ------ t/20-archive_file_generation-seq_alignment.t | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/Changes b/Changes index 3b73f1cbb..531d6f9ce 100644 --- a/Changes +++ b/Changes @@ -1,6 +1,10 @@ LIST OF CHANGES --------------- + - Chained execution of RNA-SeQC to the vtfp/viv alignment cmd for RNA-Seq libraries only: + entries for qc check rna_seqc removed from central function and parallelisation. + code that created rna_seqc-specific directories has been removed as this is + now handled by the check itself using qc_out arg. - pipeline scripts - redirect stderr output to the log to capture output from all NPG and CPAN modules in one place diff --git a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm index 4c5117176..477fd2d1c 100644 --- a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm +++ b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm @@ -5,8 +5,6 @@ use English qw{-no_match_vars}; use Readonly; use Moose::Meta::Class; use File::Slurp; -use File::Spec; -use File::Path qw{make_path}; use JSON::XS; use List::Util qw(sum); use List::MoreUtils qw(any); @@ -693,10 +691,6 @@ LSF job creation for alignment =item st::api::lims -=item File::Spec - -=item File::Path - =item npg_tracking::data::reference::find =item npg_tracking::data::bait diff --git a/t/20-archive_file_generation-seq_alignment.t b/t/20-archive_file_generation-seq_alignment.t index ece0cfbe7..7446497c6 100644 --- a/t/20-archive_file_generation-seq_alignment.t +++ b/t/20-archive_file_generation-seq_alignment.t @@ -273,7 +273,7 @@ subtest 'test 2' => sub { $args->{8} = qq{bash -c ' mkdir -p $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive/tmp_\$LSB_JOBID/13066_8 ; cd $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive/tmp_\$LSB_JOBID/13066_8 && vtfp.pl -param_vals $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/13066_8_p4s2_pv_in.json -export_param_vals 13066_8_p4s2_pv_out_\${LSB_JOBID}.json -keys cfgdatadir -vals \$(dirname \$(readlink -f \$(which vtfp.pl)))/../data/vtlib/ -keys aligner_numthreads -vals `npg_pipeline_job_env_to_threads` -keys br_numthreads_val -vals `npg_pipeline_job_env_to_threads --exclude 1 --divide 2` -keys b2c_mt_val -vals `npg_pipeline_job_env_to_threads --exclude 2 --divide 2` -prune_nodes '"'"'fop.*samtools_stats_F0.*00_bait.*'"'"' \$(dirname \$(dirname \$(readlink -f \$(which vtfp.pl))))/data/vtlib/alignment_wtsi_stage2_template.json > run_13066_8.json && viv.pl -s -x -v 3 -o viv_13066_8.log run_13066_8.json } . qq{ && qc --check bam_flagstats --id_run 13066 --position 8 --qc_in $qc_in --qc_out $qc_out} . qq{ && qc --check bam_flagstats --id_run 13066 --position 8 --qc_in $qc_in --qc_out $qc_out --subset phix} . - q{ && qc --check alignment_filter_metrics --id_run 13066 --position 8 --qc_in $PWD --qc_out } . $qc_out . + q{ && qc --check alignment_filter_metrics --id_run 13066 --position 8 --qc_in $PWD --qc_out } . $qc_out . qq{ && qc --check rna_seqc --id_run 13066 --position 8 --qc_in $qc_in --qc_out } . $qc_out . q{ '}; lives_ok {$rna_gen->_generate_command_arguments([8])} From 81ce47666dd75055fddc3628b51d926d38e1c315 Mon Sep 17 00:00:00 2001 From: Marina Gourtovaia Date: Mon, 5 Jun 2017 14:23:27 +0100 Subject: [PATCH 21/27] merged in wtsi/devel --- .../archive/file/generation/seq_alignment.pm | 6 +++- .../pluggable/harold/post_qc_review.pm | 9 ++--- t/10-pluggable_harold_central.t | 5 ++- t/10-pluggable_harold_post_qc_review.t | 6 ++-- t/20-archive_file_generation-seq_alignment.t | 33 +++++++++++++++++-- 5 files changed, 43 insertions(+), 16 deletions(-) diff --git a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm index ebad3ba49..26beeb79f 100644 --- a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm +++ b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm @@ -554,7 +554,11 @@ sub _ref { $self->warn(qq{No reference genome set for $lstring}); } else { if (scalar @refs > 1) { - $self->logcroak(qq{Multiple references for $lstring}); + if (defined $l->tag_index && $l->tag_index == 0) { + $self->logwarn(qq{Multiple references for $lstring}); + } else { + $self->logcroak(qq{Multiple references for $lstring}); + } } else { $ref = $refs[0]; if ($ref_name) { diff --git a/lib/npg_pipeline/pluggable/harold/post_qc_review.pm b/lib/npg_pipeline/pluggable/harold/post_qc_review.pm index 4b71d58c4..223315739 100644 --- a/lib/npg_pipeline/pluggable/harold/post_qc_review.pm +++ b/lib/npg_pipeline/pluggable/harold/post_qc_review.pm @@ -225,14 +225,11 @@ sub _update_warehouse_command { my $post_qc_complete = $option and (ref $option eq 'HASH') and $option->{'post_qc_complete'} ? 1 : 0; my $id_run = $self->id_run; - my $command = q[]; + my $command = qq{$loader_name --verbose --id_run $id_run}; if ($loader_name eq 'warehouse_loader') { - # Currently, we need pool library name and link to plexes in SeqQC. - # Therefore, we need to run live. - $command = join q[], map {q[unset ] . $_ . q[;]} npg_pipeline::cache->env_vars; + $command .= q{ --lims_driver_type }; + $command .= $post_qc_complete ? 'ml_warehouse_fc_cache' : 'samplesheet'; } - - $command .= qq{$loader_name --verbose --id_run $id_run}; my $job_name = join q{_}, $loader_name, $id_run, $self->pipeline_name; my $path = $self->make_log_dir($self->recalibrated_path()); my $prereq = q[]; diff --git a/t/10-pluggable_harold_central.t b/t/10-pluggable_harold_central.t index 6e4afc59e..74a75eba0 100644 --- a/t/10-pluggable_harold_central.t +++ b/t/10-pluggable_harold_central.t @@ -132,10 +132,9 @@ my $runfolder_path = $util->analysis_runfolder_path(); my $timestamp = $pb->timestamp; my $recalibrated_path = $pb->recalibrated_path(); my $log_dir = $pb->make_log_dir( $recalibrated_path ); - my $unset_string = 'unset NPG_WEBSERVICE_CACHE_DIR;unset NPG_CACHED_SAMPLESHEET_FILE;'; my $expected_command = q[bsub -q lowload 50 -J warehouse_loader_1234_central ] . - qq[-o $log_dir/warehouse_loader_1234_central_] . $timestamp . - qq[.out '${unset_string}warehouse_loader --verbose --id_run 1234']; + qq[-o $log_dir/warehouse_loader_1234_central_] . $timestamp . q[.out ] . + qq['warehouse_loader --verbose --id_run 1234 --lims_driver_type samplesheet']; is($pb->_update_warehouse_command('warehouse_loader', (50)), $expected_command, 'update warehouse command'); } diff --git a/t/10-pluggable_harold_post_qc_review.t b/t/10-pluggable_harold_post_qc_review.t index 40948cea7..055c636ba 100644 --- a/t/10-pluggable_harold_post_qc_review.t +++ b/t/10-pluggable_harold_post_qc_review.t @@ -34,7 +34,7 @@ use_ok('npg_pipeline::pluggable::harold::post_qc_review'); upload_auto_qc_to_qc_database run_run_archived run_qc_complete - update_warehouse + update_warehouse_post_qc_complete ); my @original = @functions_in_order; unshift @original, 'lsf_start'; @@ -64,10 +64,9 @@ use_ok('npg_pipeline::pluggable::harold::post_qc_review'); my $log_dir_in_outgoing = $log_dir; $log_dir_in_outgoing =~ s{/analysis/}{/outgoing/}smx; my $job_name = 'warehouse_loader_1234_post_qc_review'; - my $unset_string = 'unset NPG_WEBSERVICE_CACHE_DIR;unset NPG_CACHED_SAMPLESHEET_FILE;'; my $prefix = qq[bsub -q lowload 50 -J $job_name ] . qq[-o $log_dir/${job_name}_${timestamp}.out]; - my $command = qq['${unset_string}warehouse_loader --verbose --id_run 1234']; + my $command = q['warehouse_loader --verbose --id_run 1234 --lims_driver_type samplesheet']; is($post_qc_review->_update_warehouse_command('warehouse_loader', (50)), qq[$prefix $command], 'update warehouse command'); @@ -75,6 +74,7 @@ use_ok('npg_pipeline::pluggable::harold::post_qc_review'); $prefix = qq[bsub -q lowload 50 -J $job_name ] . qq[-o $log_dir_in_outgoing/${job_name}_${timestamp}.out]; my $preexec = qq(-E "[ -d '${log_dir_in_outgoing}' ]"); + $command = q['warehouse_loader --verbose --id_run 1234 --lims_driver_type ml_warehouse_fc_cache']; is($post_qc_review->_update_warehouse_command( 'warehouse_loader', (50, {}, {'post_qc_complete' => 1})), join(q[ ],$prefix,$preexec,$command), diff --git a/t/20-archive_file_generation-seq_alignment.t b/t/20-archive_file_generation-seq_alignment.t index d18cfe85d..9ba17d73d 100644 --- a/t/20-archive_file_generation-seq_alignment.t +++ b/t/20-archive_file_generation-seq_alignment.t @@ -3,6 +3,7 @@ use warnings; use Test::More tests => 11; use Test::Exception; use Test::Deep; +use Test::Warn; use File::Temp qw/tempdir/; use Cwd qw/cwd abs_path/; use Perl6::Slurp; @@ -10,6 +11,8 @@ use File::Copy; use Log::Log4perl qw(:levels); use JSON; +use st::api::lims; + use_ok('npg_pipeline::archive::file::generation::seq_alignment'); local $ENV{'NPG_WEBSERVICE_CACHE_DIR'} = q[t/data/rna_seq]; local $ENV{'TEST_FS_RESOURCE'} = 'nfs-sf3'; @@ -327,7 +330,7 @@ subtest 'test 3' => sub { }; subtest 'test 4' => sub { - plan tests => 5; + plan tests => 8; ##HiSeqX, run 16839_7 my $ref_dir = join q[/],$dir,'references','Homo_sapiens','GRCh38_full_analysis_set_plus_decoy_hla','all'; @@ -342,7 +345,8 @@ subtest 'test 4' => sub { my $cache_dir = join q[/], $runfolder_path, 'Data/Intensities/BAM_basecalls_20150712-121006/metadata_cache_16839'; `mkdir -p $cache_dir`; copy("t/data/hiseqx/16839_RunInfo.xml","$runfolder_path/RunInfo.xml") or die "Copy failed: $!"; #to get information that it is paired end - `touch $ref_dir/fasta/Homo_sapiens.GRCh38_full_analysis_set_plus_decoy_hla.fa`; + my $fasta_ref = "$ref_dir/fasta/Homo_sapiens.GRCh38_full_analysis_set_plus_decoy_hla.fa"; + `touch $fasta_ref`; `touch $ref_dir/picard/Homo_sapiens.GRCh38_full_analysis_set_plus_decoy_hla.fa.dict`; `touch $ref_dir/bwa0_6/Homo_sapiens.GRCh38_full_analysis_set_plus_decoy_hla.fa.alt`; `touch $ref_dir/bwa0_6/Homo_sapiens.GRCh38_full_analysis_set_plus_decoy_hla.fa.amb`; @@ -352,7 +356,7 @@ subtest 'test 4' => sub { `touch $ref_dir/bwa0_6/Homo_sapiens.GRCh38_full_analysis_set_plus_decoy_hla.fa.sa`; local $ENV{'NPG_WEBSERVICE_CACHE_DIR'} = q[t/data/hiseqx]; - local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = q[t/data/hiseqx/samplesheet_16839.csv]; + local $ENV{'NPG_CACHED_SAMPLESHEET_FILE'} = q[t/data/hiseqx/samplesheet_16839.csv]; my $hsx_gen; lives_ok { @@ -367,6 +371,29 @@ subtest 'test 4' => sub { } 'no error creating an object'; is ($hsx_gen->id_run, 16839, 'id_run inferred correctly'); + my $l = st::api::lims->new(id_run => 16839, position => 1, tag_index => 0); + is ($hsx_gen->_ref($l, 'fasta'), $fasta_ref, 'reference for tag zero'); + my $old_ss = $ENV{'NPG_CACHED_SAMPLESHEET_FILE'}; + my $ss = slurp $old_ss; + $ss =~ s/GRCh38_full_analysis_set_plus_decoy_hla/GRCh38X/; + my $new_ss = "$dir/multiref_samplesheet_16839.csv"; + open my $fhss, '>', $new_ss or die "Cannot open $new_ss for writing"; + print $fhss $ss or die "Cannot write to $new_ss"; + close $fhss or warn "Failed to close $new_ss"; + # new samplesheet has miltiple references in lane 1 + local $ENV{'NPG_CACHED_SAMPLESHEET_FILE'} = $new_ss; + my $other_ref_dir = join q[/],$dir,'references','Homo_sapiens','GRCh38X','all'; + `mkdir -p $other_ref_dir/fasta`; + `touch $other_ref_dir/fasta/Homo_sapiens.GRCh38X.fa`; + $l = st::api::lims->new(id_run => 16839, position => 1, tag_index => 0); + my $other_ref; + warnings_exist { $other_ref = $hsx_gen->_ref($l, 'fasta') } + qr/Multiple references for st::api::lims object, driver - samplesheet/, + 'warning about multiple references'; + is ($other_ref, undef, 'multiple references in a lane - no reference for tag zero returned'); + + # restore old samplesheet + local $ENV{'NPG_CACHED_SAMPLESHEET_FILE'} = $old_ss; my $qc_in = qq{$dir/150709_HX4_16839_A_H7MHWCCXX/Data/Intensities/BAM_basecalls_20150712-121006/no_cal/archive/lane7}; my $qc_out = qq{$qc_in/qc}; my $args = {}; From ddb515de6494ea588bd139c5630b248589ca9d67 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Wed, 7 Jun 2017 21:34:37 +0100 Subject: [PATCH 22/27] removed unused force_p4 attribute from npg_pipeline/base.pm --- lib/npg_pipeline/base.pm | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/lib/npg_pipeline/base.pm b/lib/npg_pipeline/base.pm index f3045476d..4ace95faa 100644 --- a/lib/npg_pipeline/base.pm +++ b/lib/npg_pipeline/base.pm @@ -248,20 +248,6 @@ has q{force_phix_split} => ( default => 1, ); -=head2 force_p4 - -Boolean decision to force on P4 pipeline usage - -=cut - -has q{force_p4} => ( - isa => q{Bool}, - is => q{ro}, - lazy_build => 1, - documentation => q{Boolean decision to force on P4 pipeline usage, default is false}, -); - - =head2 verbose Boolean option to switch on verbose mode From 0df06f935f66b29f6db18ae902d8a2a41b6fa877 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Thu, 8 Jun 2017 10:22:19 +0100 Subject: [PATCH 23/27] update Changes --- Changes | 1 + 1 file changed, 1 insertion(+) diff --git a/Changes b/Changes index 0a2a70082..87f24cc2f 100644 --- a/Changes +++ b/Changes @@ -2,6 +2,7 @@ LIST OF CHANGES --------------- - remove GCLP-specific code and configuration files + - remove unused force_p4 attribute - OLB analysis removed - recalibration removed - pb_cal_path and dif_files_path accessors disabled From 2049e9c17297b1a452eef446ddd0a3beb54b17fe Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Thu, 8 Jun 2017 11:37:52 +0100 Subject: [PATCH 24/27] update p4 stage 2 (seq_alignment) to use bambi chrsplit instead of SplitBamByChromosomes.jar for Y-split runs --- lib/npg_pipeline/archive/file/generation/seq_alignment.pm | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm index 559008311..486e5e11d 100644 --- a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm +++ b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm @@ -334,9 +334,8 @@ sub _lsf_alignment_command { ## no critic (Subroutines::ProhibitExcessComplexity } if($l->separate_y_chromosome_data) { - $p4_param_vals->{split_bam_by_chromosome_flags} = q[S=Y]; - $p4_param_vals->{split_bam_by_chromosome_flags} = q[V=true]; - $p4_param_vals->{split_bam_by_chromosomes_jar} = $self->_SplitBamByChromosomes_jar; + $p4_param_vals->{chrsplit_subset_flag} = ['--subset', 'Y,chrY']; + $p4_param_vals->{chrsplit_invert_flag} = q[--invert]; } # write p4 parameters to file From a00c3c5d2d0ba770fae66d9cb0a4d40e64cdcaa3 Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Thu, 8 Jun 2017 11:38:51 +0100 Subject: [PATCH 25/27] update Changes --- Changes | 1 + 1 file changed, 1 insertion(+) diff --git a/Changes b/Changes index 87f24cc2f..ee188d218 100644 --- a/Changes +++ b/Changes @@ -9,6 +9,7 @@ LIST OF CHANGES - allow p4 stage 1 to analyse runs with different length reads - illumina2bam function removed - update p4 stage 2 (seq_alignment) warn rather than croak if multiple references for tag 0 + - update p4 stage 2 (seq_alignment) to use bambi chrsplit instead of SplitBamByChromosomes.jar for Y-split runs - pipeline scripts - redirect stderr output to the log to capture output from all NPG and CPAN modules in one place From 654c3611e49086d9f8f058c4d843b4c4f06738b2 Mon Sep 17 00:00:00 2001 From: "Ruben E. Bautista" Date: Thu, 8 Jun 2017 11:41:12 +0100 Subject: [PATCH 26/27] Tiny change: remove unused variable --- lib/npg_pipeline/archive/file/generation/seq_alignment.pm | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm index 477fd2d1c..1cc08b9ae 100644 --- a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm +++ b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm @@ -193,7 +193,6 @@ sub _lsf_alignment_command { ## no critic (Subroutines::ProhibitExcessComplexity my $spike_tag; my $input_path= $self->input_path; my $archive_path= $self->archive_path; - my $archive_qc_path = File::Spec->catdir($self->archive_path, q{qc}); my $qcpath= $self->qc_path; if($is_plex) { $tag_index = $l->tag_index; From e645566b2aeddc7eee6c1139fd050cc98035e7dd Mon Sep 17 00:00:00 2001 From: Kevin Lewis Date: Thu, 8 Jun 2017 14:42:15 +0100 Subject: [PATCH 27/27] extend list of names for chromosome Y for y-split to cover all variants in our current set of human reference genomes --- lib/npg_pipeline/archive/file/generation/seq_alignment.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm index 486e5e11d..16c5b03d2 100644 --- a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm +++ b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm @@ -334,7 +334,7 @@ sub _lsf_alignment_command { ## no critic (Subroutines::ProhibitExcessComplexity } if($l->separate_y_chromosome_data) { - $p4_param_vals->{chrsplit_subset_flag} = ['--subset', 'Y,chrY']; + $p4_param_vals->{chrsplit_subset_flag} = ['--subset', 'Y,chrY,ChrY,chrY_KI270740v1_random']; $p4_param_vals->{chrsplit_invert_flag} = q[--invert]; }