diff --git a/Changes b/Changes index ed0719b55..65187e03d 100644 --- a/Changes +++ b/Changes @@ -1,6 +1,22 @@ LIST OF CHANGES --------------- + - Chained execution of RNA-SeQC to the vtfp/viv alignment cmd for RNA-Seq libraries only: + entries for qc check rna_seqc removed from central function and parallelisation. + code that created rna_seqc-specific directories has been removed as this is + now handled by the check itself using qc_out arg. + - remove GCLP-specific code and configuration files + - remove unused force_p4 attribute + - OLB analysis removed + - recalibration removed + - pb_cal_path and dif_files_path accessors disabled + - allow p4 stage 1 to analyse runs with different length reads + - illumina2bam function removed + - update p4 stage 2 (seq_alignment) warn rather than croak if multiple references for tag 0 + - update p4 stage 2 (seq_alignment) to use bambi chrsplit instead of SplitBamByChromosomes.jar for Y-split runs + - pipeline scripts - redirect stderr output to the log to capture output from all + NPG and CPAN modules in one place + release 51.9 - p4stage2 speed-up by caching references - p4stage2 errors in getting a reference made fatal @@ -20,6 +36,9 @@ release 51.8 release 51.7 - replaces the original log role with the one from DNAP utilities, which provides a Log4perl logger and some convenience methods. + - new signature for the sequencescape warehouse loader so that it uses + samplsheet LIMs driver at the analysis stage and ml_warehouse_fc_cache + LIMs driver at the archival stage release 51.6 - test and code fixes to ensure problem-free tests under Perl 5.22.2 diff --git a/MANIFEST b/MANIFEST index 99e6486a0..4c4203db3 100644 --- a/MANIFEST +++ b/MANIFEST @@ -11,23 +11,16 @@ bin/script_must_be_unique_runner Build.PL Changes data/config_files/function_list_central.yml -data/config_files/function_list_central_gclp.yml -data/config_files/function_list_central_olb.yml data/config_files/function_list_central_qc_run.yml data/config_files/function_list_post_qc_review.yml -data/config_files/function_list_post_qc_review_gclp.yml data/config_files/general_values.ini -data/config_files/illumina_pipeline.ini data/config_files/parallelisation.yml -data/config_files/pb_cal_pipeline.ini -lib/npg_pipeline/analysis/bustard4pbcb.pm lib/npg_pipeline/analysis/create_lane_tag_file.pm lib/npg_pipeline/analysis/FixConfigFiles.pm -lib/npg_pipeline/analysis/harold_calibration_bam.pm +lib/npg_pipeline/analysis/illumina_basecall_stats.pm lib/npg_pipeline/analysis/split_bam_by_tag.pm lib/npg_pipeline/archive/file/BamClusterCounts.pm lib/npg_pipeline/archive/file/generation.pm -lib/npg_pipeline/archive/file/generation/illumina2bam.pm lib/npg_pipeline/archive/file/generation/seq_alignment.pm lib/npg_pipeline/archive/file/qc.pm lib/npg_pipeline/archive/file/generation/seqchksum_comparator.pm @@ -52,7 +45,6 @@ lib/npg_pipeline/pluggable/harold/post_qc_review.pm lib/npg_pipeline/roles/accessor.pm lib/npg_pipeline/roles/business/base.pm lib/npg_pipeline/roles/business/flag_options.pm -lib/npg_pipeline/roles/business/harold_calibration_reqs.pm lib/npg_pipeline/run/folder/link.pm lib/npg_tracking/daemon/analysis.pm lib/npg_tracking/daemon/archival.pm @@ -75,18 +67,16 @@ t/15-pipeline_launcher_scripts.t t/20-archive-fastqcheck.t t/20-archive_auto_qc_to_db.t t/20-archive_file-to_irods.t -t/20-archive_file_generation-illumina2bam.t t/20-archive_file_generation-seq_alignment.t t/20-archive_folder_generation.t t/20-archive_illumina_analysis.t t/20-archive_qc.t t/20-archive_logs.t t/21-references_adapters.t -t/25-analysis-bustard4pbcb.t t/25-analysis-create_lane_tag_file.t t/25-analysis-FixConfigFiles.t t/25-analysis-split_bam_by_tag.t -t/25-harold_calibration_bam.t +t/25-analysis-illumina_basecall_stats.t t/30-launcher-status.t t/30-run_folder-link.t t/35-archive_file_generation-BamClusterCounts.t @@ -113,7 +103,6 @@ t/bin/software/solexa/jars/Illumina2bam.jar t/bin/software/solexa/jars/SplitBamByReadGroup.jar t/bin/software/solexa/bin/qc t/data/samplesheet_1234.csv -t/data/illumina2bam/1234_samplesheet.csv t/data/qc/1234_samplesheet_amended.csv t/data/qc/samplesheet_14353.csv t/data/qc/samplesheet_14043.csv diff --git a/bin/npg_pipeline_central b/bin/npg_pipeline_central index 1b792e46b..c4ef81024 100755 --- a/bin/npg_pipeline_central +++ b/bin/npg_pipeline_central @@ -23,6 +23,7 @@ Log::Log4perl->easy_init({layout => $layout, file => $logfile, utf8 => 1}); +$p->redirect_stderr(); $p->main(); 0; diff --git a/bin/npg_pipeline_post_qc_review b/bin/npg_pipeline_post_qc_review index 1849b7de8..a7894e270 100755 --- a/bin/npg_pipeline_post_qc_review +++ b/bin/npg_pipeline_post_qc_review @@ -23,6 +23,7 @@ Log::Log4perl->easy_init({layout => $layout, file => $logfile, utf8 => 1}); +$p->redirect_stderr(); $p->main(); 0; diff --git a/data/config_files/function_list_central.yml b/data/config_files/function_list_central.yml index 6727f31a9..a5b58162d 100644 --- a/data/config_files/function_list_central.yml +++ b/data/config_files/function_list_central.yml @@ -25,7 +25,6 @@ - qc_genotype - qc_verify_bam_id - qc_upstream_tags -- qc_rna_seqc - run_analysis_complete - update_ml_warehouse - archive_to_irods_samplesheet diff --git a/data/config_files/function_list_central_gclp.yml b/data/config_files/function_list_central_gclp.yml deleted file mode 100644 index c671e8018..000000000 --- a/data/config_files/function_list_central_gclp.yml +++ /dev/null @@ -1,26 +0,0 @@ ---- -- create_archive_directory -- create_empty_fastq -- create_summary_link_analysis -- run_analysis_in_progress -- lane_analysis_in_progress -- illumina_basecall_stats -- p4_stage1_analysis -- update_ml_warehouse -- run_secondary_analysis_in_progress -- bam2fastqcheck_and_cached_fastq -- qc_qX_yield -- qc_insert_size -- qc_sequence_error -- qc_gc_fraction -- qc_ref_match -- seq_alignment -- bam_cluster_counter_check -- seqchksum_comparator -- qc_pulldown_metrics -- qc_genotype -- qc_verify_bam_id -- qc_upstream_tags -- run_analysis_complete -- update_ml_warehouse -- run_qc_review_pending diff --git a/data/config_files/function_list_central_olb.yml b/data/config_files/function_list_central_olb.yml deleted file mode 100644 index 6cf8a38e7..000000000 --- a/data/config_files/function_list_central_olb.yml +++ /dev/null @@ -1,36 +0,0 @@ ---- -- create_archive_directory -- create_empty_fastq -- create_summary_link_analysis -- run_analysis_in_progress -- lane_analysis_in_progress -- bustard_matrix_lanes -- bustard_matrix_all -- bustard_phasing_lanes -- bustard_phasing_all -- bustard_basecalls_lanes -- bustard_basecalls_all -- p4_stage1_analysis -- update_warehouse -- update_ml_warehouse -- run_secondary_analysis_in_progress -- bam2fastqcheck_and_cached_fastq -- qc_qX_yield -- qc_adapter -- qc_insert_size -- qc_sequence_error -- qc_gc_fraction -- qc_ref_match -- seq_alignment -- update_ml_warehouse -- bam_cluster_counter_check -- seqchksum_comparator -- qc_gc_bias -- qc_pulldown_metrics -- qc_genotype -- qc_verify_bam_id -- qc_upstream_tags -- run_analysis_complete -- update_ml_warehouse -- archive_to_irods_samplesheet -- run_qc_review_pending diff --git a/data/config_files/function_list_central_qc_run.yml b/data/config_files/function_list_central_qc_run.yml index fdc5b7089..721e0c2b5 100644 --- a/data/config_files/function_list_central_qc_run.yml +++ b/data/config_files/function_list_central_qc_run.yml @@ -22,7 +22,6 @@ - qc_genotype - qc_verify_bam_id - qc_upstream_tags -- qc_rna_seqc - run_analysis_complete - run_archival_in_progress - copy_interop_files_to_irods diff --git a/data/config_files/function_list_post_qc_review_gclp.yml b/data/config_files/function_list_post_qc_review_gclp.yml deleted file mode 100644 index c787e846d..000000000 --- a/data/config_files/function_list_post_qc_review_gclp.yml +++ /dev/null @@ -1,11 +0,0 @@ ---- -- run_archival_in_progress -- update_ml_warehouse -- archive_to_irods -- upload_fastqcheck_to_qc_database -- upload_illumina_analysis_to_qc_database -- upload_auto_qc_to_qc_database -- run_run_archived -- run_qc_complete -- update_ml_warehouse_post_qc_complete - diff --git a/data/config_files/illumina_pipeline.ini b/data/config_files/illumina_pipeline.ini deleted file mode 100644 index ed7e53f24..000000000 --- a/data/config_files/illumina_pipeline.ini +++ /dev/null @@ -1,7 +0,0 @@ -olb=/software/solexa/src/OLB-1.9.4 -bustard_exe=bin/bustard.py -bcl_to_qseq=bin/setupBclToQseq.py -control_based_phasing=autoCONTROL_LANE -lane_based_phasing=lane -control_based_matrix=autoCONTROL_LANE -lane_based_matrix=lane diff --git a/data/config_files/parallelisation.yml b/data/config_files/parallelisation.yml index a023b001f..384e62c8e 100644 --- a/data/config_files/parallelisation.yml +++ b/data/config_files/parallelisation.yml @@ -1,5 +1,4 @@ a: - illumina2bam: 1 illumina_basecall_stats: 1 p4_stage1_analysis: 1 b: @@ -19,7 +18,6 @@ c: qc_genotype: 1 qc_upstream_tags: 1 qc_verify_bam_id: 1 - qc_rna_seqc: 1 d: upload_auto_qc_to_qc_database: 1 upload_fastqcheck_to_qc_database: 1 diff --git a/data/config_files/pb_cal_pipeline.ini b/data/config_files/pb_cal_pipeline.ini deleted file mode 100644 index 3e1959eed..000000000 --- a/data/config_files/pb_cal_pipeline.ini +++ /dev/null @@ -1,16 +0,0 @@ -cal_table_script=pb_calibration -recalibration_script=pb_predictor -alignment_script=pb_align -default_directory_for_qseqs=rta -make_stats=makeStats.pl -second_basecall_script=pb_second_basecall -cal_table_suffix=_purity_cycle_caltable.txt -default_aligner=bwa -random=5 -t_filter=2 -mem_calibration=3072 -mem_score=1725 -region_size=200 -region_mismatch_threshold=0.016 -region_insertion_threshold=0.016 -region_deletion_threshold=0.016 diff --git a/lib/npg_pipeline/analysis/bustard4pbcb.pm b/lib/npg_pipeline/analysis/bustard4pbcb.pm deleted file mode 100644 index f4a889079..000000000 --- a/lib/npg_pipeline/analysis/bustard4pbcb.pm +++ /dev/null @@ -1,260 +0,0 @@ -package npg_pipeline::analysis::bustard4pbcb; - -use Moose; -use Moose::Util::TypeConstraints; -use Carp; -use Cwd; -use File::Spec::Functions; -use File::Slurp; -use Try::Tiny; -use Readonly; - -use npg_pipeline::lsf_job; -extends q{npg_pipeline::base}; - -our $VERSION = '0'; - -=head1 NAME - -npg_pipeline::analysis::bustard4pbcb - -=head1 SYNOPSIS - -=head1 DESCRIPTION - -OLB bustard preprocessing for the pbcal bam pipeline - -=head1 SUBROUTINES/METHODS - -=cut - -Readonly::Scalar our $MEM_REQ => 13_800; # total MB used by a make -Readonly::Scalar our $CPUS_NUM => q{8,16}; - -subtype 'NpgPipelinePluggableObject' - => as 'Object' - => where { ref =~ /^npg_pipeline::pluggable/smxi; }; - -has q{+id_run} => ( required => 1, ); - -has q{pipeline} => ( isa => q{NpgPipelinePluggableObject}, - is => q{ro}, - ); - -has q{bustard_home} => ( isa => q{Str}, - is => q{ro}, - required => 1, - ); - -has q{script_path} => ( isa => q{Str}, - is => q{ro}, - lazy_build => 1, - ); -sub _build_script_path { - my $self = shift; - return catfile($self->illumina_pipeline_conf()->{olb}, - $self->illumina_pipeline_conf()->{bustard_exe}); -} - -has q{bustard_dir} => ( isa => q{Str}, - is => q{ro}, - lazy_build => 1, - ); -sub _build_bustard_dir { - my $self = shift; - - $self->info(q[Running Bustard makefile creation]); - my $bustard_command = $self->_bustard_command(); - $self->info("Bustard command: $bustard_command"); - my $rc = system $bustard_command; - my @lines = (); - try { - @lines = read_file($self->_bustard_output_file()); - }; - if ($rc) { - my $error= "Bustard command '$bustard_command' failed with code $rc"; - if (@lines) { - $error .= q[ ] . join q[ ], @lines; - } - $self->logcroak($error); - } - if (!@lines) { - $self->logcroak(q[No bustard output in ] . $self->_bustard_output_file()); - } - return $self->_get_bustard_dir(@lines); -} - -has q{_bustard_output_file} => ( isa => q{Str}, - is => q{ro}, - lazy_build => 1, - ); -sub _build__bustard_output_file { - my $self = shift; - return catfile($self->bustard_home, q[bustard_output_] . $self->timestamp() . q[.txt]); -} - -sub _get_bustard_dir { - my ($self, @lines) = @_; - my $line = q[]; - ##no critic (RegularExpressions::ProhibitEscapedMetacharacters) - foreach (@lines) { - if (/^Sequence\ folder/ixms) { - $line = $_; - last; - } - } - ## use critic - if (!$line) { - $self->logcroak(q[No record about bustard directory (Sequence folder) in ] . $self->_bustard_output_file()); - } - (my $dir) = $line =~ /:\s+(\S+)$/smx; - return $dir; -} - -sub _bustard_command { - my ($self) = shift; - - my $timestamp = $self->timestamp(); - my ($time) = $timestamp =~ /-(\d+)$/smx; - my $bustard_out = catfile($self->bustard_home, "bustard_output_$timestamp.txt"); - my @command = (); - push @command, "LOGNAME=$time"; - push @command, $self->script_path; - if ( $self->has_override_all_bustard_options() ) { - push @command, $self->override_all_bustard_options(); - } else { - push @command, '--make --CIF --keep-dif-files --no-eamss --phasing=lane --matrix=lane'; - my $tile_list = $self->tile_list() || join q[,], map {"s_$_"} $self->positions(); - push @command, "--tiles=$tile_list"; - } - push @command, $self->bustard_home; - push @command, '> ' . $self->_bustard_output_file(); - push @command, '2>&1'; - return join q[ ], @command; -} - -sub _make_command { - my ($self, $step_name, $deps) = @_; - - my $position_string = ($step_name =~ /lanes$/smx) ? $self->lsb_jobindex() : q{}, - $deps ||= q{}; - (my $target) = $step_name =~ /(matrix|phasing)/smx; - if ($target) { - if ($position_string) { - $target .= "_$position_string"; - } - $target .= q{_finished.txt}; - } else { - $target = $position_string ? "s_$position_string" : 'all'; - } - - my $job_name = join q[_], 'bustard', $step_name, $self->id_run(), $self->timestamp(); - my $index = $position_string ? q{.%I} : q{}; - my $output_name = $job_name . $index . q{.%J.out}; - $output_name = catfile(q{log} , $output_name); - if ($position_string) { - $job_name .= '[' . join(q[,], $self->positions()) . ']'; - } - - my @command = (); - push @command, 'bsub'; - push @command, "-n $CPUS_NUM"; - push @command, '-q ' . $self->lsf_queue; - push @command, "-o $output_name"; - push @command, "-J $job_name"; - my $memory_spec = npg_pipeline::lsf_job->new(memory => $MEM_REQ)->memory_spec(); - push @command, $self->pipeline->fs_resource_string( { - resource_string => qq{$memory_spec -R 'span[hosts=1]'}, - ##no critic (BuiltinFunctions::ProhibitStringySplit) - counter_slots_per_job => (split q{,}, $CPUS_NUM)[0], - ##use critic - }); - if ($deps) { push @command, $deps; } - push @command, q['make -j `npg_pipeline_job_env_to_threads` ] . qq[$target']; - return join q[ ], @command; -} - -=head2 make - - Submits bustard 'make' jobs for post-run analysis as a single step. - - my @job_ids = $bObj->make($step_name, $required_job_completion); - -=cut - -sub make { - my ($self, $step_name, $required_job_completion) = @_; - if (!$self->pipeline) { - $self->logcroak('To submit a job, pipeline accessor should be set'); - } - my $working = getcwd(); - chdir $self->bustard_dir; - my $command = $self->_make_command($step_name, $required_job_completion); - $self->debug("Bustard make command: $command"); - my @ids = $self->pipeline->submit_bsub_command($command); - chdir $working; - return @ids; -} - -no Moose::Util::TypeConstraints; -no Moose; -__PACKAGE__->meta->make_immutable; - -1; - -__END__ - - -=head1 DIAGNOSTICS - -=head1 CONFIGURATION AND ENVIRONMENT - -=head1 DEPENDENCIES - -=over - -=item Carp - -=item Readonly - -=item Moose - -=item Moose::Util::TypeConstraints - -=item Cwd - -=item File::Spec::Functions - -=item File::Slurp - -=item Try::Tiny - -=item npg_pipeline::base - -=back - -=head1 INCOMPATIBILITIES - -=head1 BUGS AND LIMITATIONS - -=head1 AUTHOR - -Andy Brown -Marina Gourtovaia - -=head1 LICENSE AND COPYRIGHT - -Copyright (C) 2014 Genome Research Ltd - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . diff --git a/lib/npg_pipeline/analysis/harold_calibration_bam.pm b/lib/npg_pipeline/analysis/harold_calibration_bam.pm deleted file mode 100644 index e748267d0..000000000 --- a/lib/npg_pipeline/analysis/harold_calibration_bam.pm +++ /dev/null @@ -1,627 +0,0 @@ -package npg_pipeline::analysis::harold_calibration_bam; - -use Moose; -use Carp; -use English qw{-no_match_vars}; -use Cwd; -use Readonly; -use List::MoreUtils qw{any}; -use File::Spec; -use File::Basename; - -use npg_tracking::util::types; -use npg_pipeline::lsf_job; - -our $VERSION = '0'; - -Readonly::Scalar our $PB_ALIGN_BAM_PREFIX => q{pb_align_}; -Readonly::Scalar our $MAKE_STATS_J => 4; -Readonly::Scalar our $MAKE_STATS_MEM => 350; - - -extends q{npg_pipeline::base}; -with qw{ - npg_common::roles::software_location - npg_pipeline::roles::business::harold_calibration_reqs -}; - -=head1 NAME - -npg_pipeline::analysis::harold_calibration_bam - -=head1 SYNOPSIS - - my $oHaroldCalibration = npg_pipeline:analysis::harold_calibration_bam->new(); - -=head1 DESCRIPTION - -Object runner to launch internal calibration instead of CASAVA based calibration - -=head1 SUBROUTINES/METHODS - -=head2 spatial_filter_path - -Absolute path to spatial_filter executable - -=cut - -has 'spatial_filter_path' => ( - is => 'ro', - isa => 'NpgCommonResolvedPathExecutable', - coerce => 1, - default => 'spatial_filter', - ); - -=head2 pb_calibration_bin - -Directory where pb bcalibration family executables are - -=cut - -has 'pb_calibration_bin' => ( - isa => 'NpgTrackingDirectory', - is => 'ro', - lazy => 1, - builder => '_build_pb_calibration_bin', - ); -sub _build_pb_calibration_bin { - my $self = shift; - return dirname($self->spatial_filter_path()); -} - -sub _generate_illumina_basecall_stats_command { - my ( $self, $arg_refs ) = @_; - - my $job_dependencies = $arg_refs->{required_job_completion}; - - my $basecall_dir = $self->basecall_path(); - my $dir = $self->bam_basecall_path(); - - $self->make_log_dir( $dir ); # create a log directory within bam_basecalls - - my $bsub_queue = $self->lsf_queue; - my $job_name = q{basecall_stats_} . $self->id_run() . q{_} . $self->timestamp(); - - my @command; - push @command, 'bsub'; - push @command, "-q $bsub_queue"; - push @command, qq{-o $dir/log/}. $job_name . q{.%J.out}; - push @command, "-J $job_name"; - - my $hosts = 1; - my $memory_spec = join q[], npg_pipeline::lsf_job->new(memory => $MAKE_STATS_MEM)->memory_spec(), " -R 'span[hosts=$hosts]'"; - push @command, $self->fs_resource_string( { - resource_string => $memory_spec, - counter_slots_per_job => $MAKE_STATS_J, - } ); - push @command, q{-n } . $MAKE_STATS_J; - push @command, $job_dependencies || q[]; - - push @command, q["]; # " enclose command in quotes - - my $bcl2qseq_path = join q[/], $self->illumina_pipeline_conf()->{olb}, $self->illumina_pipeline_conf()->{bcl_to_qseq}; - - my $cmd = join q[ && ], - qq{cd $dir}, - q{if [[ -f Makefile ]]; then echo Makefile already present 1>&2; else echo creating bcl2qseq Makefile 1>&2; }. - qq{$bcl2qseq_path -b $basecall_dir -o $dir --overwrite; fi}, - qq[make -j $MAKE_STATS_J Matrix Phasing], - qq[make -j $MAKE_STATS_J BustardSummary.x{s,m}l]; - - push @command,$cmd; - - push @command, q["]; # " closing quote - - return join q[ ], @command; -} - -=head2 generate_illumina_basecall_stats - -Use Illumina tools to generate the (per run) BustardSummary and IVC reports (from on instrument RTA basecalling). - -=cut - -sub generate_illumina_basecall_stats{ - my ( $self, $arg_refs ) = @_; - my @id_runs = $self->submit_bsub_command( $self->_generate_illumina_basecall_stats_command($arg_refs) ); - return @id_runs; -} - -=head2 generate_alignment_files - -submit the jobs which will generate bam alignment files ready to pass onto calibration table generator - - my $aJobIds = $oHaroldCalibration->generate_alignment_files({ - required_job_completion => $sJobRequirenmentString, - }); - -=cut - -sub generate_alignment_files { - my ( $self, $arg_refs ) = @_; - - my $job_ids = []; - my $job_dependencies = $arg_refs->{'required_job_completion'}; - - # create the calibration directory - my $pb_cal_dir = $self->create_pb_calibration_directory(); - - $self->_set_recalibrated_path( $self->pb_cal_path() ); - - foreach my $position ( $self->positions ) { - if ( ! $self->is_spiked_lane( $position ) ){ - $self->warn("Lane $position is not spiked with phiX, no PB_cal alignment job needed"); - next; - } - $self->_generate_alignment_file_per_lane({ - position => $position, - job_ids => $job_ids, - job_dependencies => $job_dependencies - }); - } - - return @{ $job_ids }; -} - -=head2 generate_calibration_table - -submit the bsub jobs which will create the calibration tables, returning an array of job_ids. - - my $aJobIds = $oHaroldCalibration->generate_calibration_table({ - required_job_completion => $sJobRequirenmentString, - }); - -=cut - -sub generate_calibration_table { - my ($self, $arg_refs) = @_; - - if ( !$self->recalibration() ) { - $self->warn(q{This has been set to run with no recalibration step}); - return (); - } - - my $job_ids = []; - my $job_dependencies = $arg_refs->{'required_job_completion'}; - - # create the calibration directory - my $pb_cal_dir = $self->create_pb_calibration_directory(); - - $self->_set_recalibrated_path( $self->pb_cal_path() ); - - my $snp_file = $self->control_snp_file(); - - foreach my $position ( $self->positions ) { - if ( ! $self->is_spiked_lane( $position ) ){ - $self->warn("Lane $position is not spiked with phiX, no PB_cal calibration table job needed"); - next; - } - $self->_generate_calibration_table_per_lane( { - position => $position, - job_ids => $job_ids, - job_dependencies => $job_dependencies, - snp_file => $snp_file, - } ); - } - - return @{ $job_ids }; -} - -=head2 generate_recalibrated_bam - -submit the bsub jobs which will recalibrate the lanes, returning an array of job_ids. - - my $aJobIds = $oHaroldCalibration->generate_recalibrated_bam({ - required_job_completion => $sJobRequirenmentString, - }); - -=cut - -sub generate_recalibrated_bam { - my ($self, $arg_refs) = @_; - - $self->_bam_merger_cmd(); - - my $pb_cal_dir = $self->pb_cal_path(); - - if ( ! $self->directory_exists($pb_cal_dir) ) { - $self->warn(qq{$pb_cal_dir does not exist, not executing jobs}); - return (); - } - - my $job_ids = []; - my $job_dependencies = $arg_refs->{'required_job_completion'}; - - foreach my $position ( $self->positions ) { - my $arg_ref_hash = { - job_ids => $job_ids, - position => $position, - job_dependencies => $job_dependencies, - }; - $self->_generate_recalibrated_bam_per_lane( $arg_ref_hash ); - } - - return @{ $job_ids }; -} - -########## -# private methods - -sub _generate_recalibrated_bam_per_lane { - my ( $self, $arg_refs ) = @_; - - my $lane = $arg_refs->{'position'}; - - my $cal_table_1_to_use = $self->calibration_table_name( { - id_run => $self->id_run(), - position => $lane, - } ); - - - my $args_bam= { - position => $lane, - job_dependencies => $arg_refs->{'job_dependencies'}, - ct => $cal_table_1_to_use, - }; - - my $bsub_command = $self->_recalibration_bsub_command( $args_bam ); - $self->debug($bsub_command); - - push @{ $arg_refs->{'job_ids'} }, $self->submit_bsub_command( $bsub_command ); - - return; -} - -sub _generate_calibration_table_per_lane { - my ( $self, $arg_refs ) = @_; - - my $args = { - position => $arg_refs->{'position'}, - job_dependencies => $arg_refs->{'job_dependencies'}, - is_spiked_phix => 1, - snp_file => $arg_refs->{'snp_file'}, - }; - - my $bsub_command = $self->_calibration_table_bsub_command( $args ); - - $self->debug($bsub_command); - - push @{ $arg_refs->{'job_ids'} }, $self->submit_bsub_command($bsub_command); - - return; -} - -# generate the alignment file -sub _generate_alignment_file_per_lane { - my ( $self, $arg_refs ) = @_; - - my $bsub_command = $self->_alignment_file_bsub_command( { - position => $arg_refs->{'position'}, - job_dependencies => $arg_refs->{'job_dependencies'}, - ref_seq => $self->control_ref(), - is_paired => $self->is_paired_read(), - is_spiked_phix => 1, - } ); - - $self->debug($bsub_command); - - push @{ $arg_refs->{'job_ids'} }, $self->submit_bsub_command($bsub_command); - - return; -} - -# generate bsub command for generating the alignment files required -sub _alignment_file_bsub_command { - my ( $self, $arg_refs ) = @_; - - my $position = $arg_refs->{'position'}; - my $job_dependencies = $arg_refs->{'job_dependencies'}; - my $ref_seq = $arg_refs->{'ref_seq'}; - my $is_paired = $arg_refs->{'is_paired'}; - my $is_spiked_phix = $arg_refs->{'is_spiked_phix'}; - - my $mem_size = $self->general_values_conf()->{bam_creation_memory}; - my $timestamp = $self->timestamp(); - my $bsub_queue = $self->lsf_queue; - my $id_run = $self->id_run(); - - my $job_name = $self->is_paired_read() ? $self->align_job() . q{_} . $id_run . q{_} . $position . q{_paired_} . $timestamp - : $self->align_job() . q{_} . $id_run . q{_} . $position . q{_} . $timestamp - ; - - my @command; - push @command, q{cd}, $self->pb_cal_path(), q{&&}; - push @command, $self->pb_calibration_bin() . q{/} . $self->alignment_script(); - push @command, q{--aln_parms "-t "`npg_pipeline_job_env_to_threads` }; - push @command, q{--sam_parms "-t "`npg_pipeline_job_env_to_threads --maximum 8` }; - if ($self->spatial_filter) { - push @command, q{--spatial_filter}; - push @command, q{--sf_parms "} . q{--region_size } . $self->pb_cal_pipeline_conf()->{region_size} . q{ } - . q{--region_mismatch_threshold } . $self->pb_cal_pipeline_conf()->{region_mismatch_threshold} . q{ } - . q{--region_insertion_threshold } . $self->pb_cal_pipeline_conf()->{region_insertion_threshold} . q{ } - . q{--region_deletion_threshold } . $self->pb_cal_pipeline_conf()->{region_deletion_threshold} . q{ } - . q{--tileviz } . $self->qc_path . q{/} . q(tileviz) . q{/} .$id_run. q{_} . $position . q{ } - . q{"}; - push @command, q{--bam_join_jar } . $self->_bam_merger_jar; - }; - push @command, q{--ref } . $ref_seq; - if( $is_paired ) { - push @command, q{--read1 1}; - push @command, q{--read2 2}; - } else { - push @command, q{--read 0}; - } - push @command, q{--bam }.$self->bam_basecall_path().q{/}.$id_run.q{_}.$position.q{.bam}; - push @command, q{--prefix } . $PB_ALIGN_BAM_PREFIX . $id_run.q{_}.$position; - push @command, q{--pf_filter}; - - my $job_command = join q[ ], @command; - $job_command=~s/'/'"'"'/smxg; - - @command = (); - push @command, 'bsub'; - push @command, "-q $bsub_queue"; - push @command, $self->ref_adapter_pre_exec_string(); - push @command, q{-o }.$self->pb_cal_path().q{/log/}. $job_name . q{.%J.out}; - push @command, "-J $job_name"; - - my $hosts = 1; - my $memory_spec = join q[], npg_pipeline::lsf_job->new(memory => $mem_size)->memory_spec(), " -R 'span[hosts=$hosts]'"; - push @command, $self->fs_resource_string( { - resource_string => $memory_spec, - counter_slots_per_job => $self->general_values_conf()->{io_resource_slots}, - } ); - push @command, q{-n } . $self->general_values_conf()->{bwa_aln_threads}; - push @command, $job_dependencies || q[]; - push @command, "'$job_command'"; # " enclose command in quotes - - return join q[ ], @command; -} - - - -# generate bsub command for recalibrating the lane qseq data -sub _recalibration_bsub_command { - my ($self, $arg_refs) = @_; - my $position = $arg_refs->{'position'}; - my $job_dependencies = $arg_refs->{'job_dependencies'}; - my $id_run = $self->id_run(); - - my $output_bam = $id_run . q{_} . $position . q{.bam}; - my $output_bam_md5 = $output_bam . q{.md5}; - my $input_bam = q{../} . $output_bam; - my $input_bam_md5 = $input_bam . q{.md5}; - my $phix_bam = $PB_ALIGN_BAM_PREFIX . $id_run . q{_} .$position . q{.bam}; - - #pb_calibration_cmd - my @command_pb_cal; - push @command_pb_cal, $self->pb_calibration_bin() . q{/} . $self->recalibration_script(); - push @command_pb_cal, q{--u}; - push @command_pb_cal, q{--bam } . $input_bam; - if ($self->dif_files_path()) { - push @command_pb_cal, q{--intensity_dir } . $self->dif_files_path(); # for dif file location, it should be bustard_dir if OLB - } - - my $cycle_start1 = 1; - my $alims = $self->lims->associated_child_lims_ia; - #if read 1 has an inline index reset cycle_start1 to the first cycle after the index - if ($alims->{$position}->inline_index_exists && $alims->{$position}->inline_index_read == 1) { - $cycle_start1 += $alims->{$position}->inline_index_end; - } - if( !$self->is_paired_read() ){ - push @command_pb_cal, qq{--cstart $cycle_start1}; - }else{ - push @command_pb_cal, qq{--cstart1 $cycle_start1}; - my @r2r = $self->read2_cycle_range(); - my $cycle_start2 = $r2r[0]; - #if read 2 has an inline index reset cycle_start2 to the first cycle after the index - if ($alims->{$position}->inline_index_exists && $alims->{$position}->inline_index_read == 2) { - $cycle_start2 += $alims->{$position}->inline_index_end; - } - push @command_pb_cal, qq{--cstart2 $cycle_start2}; - } - - my $cl_table1 = $arg_refs->{ct}; - push @command_pb_cal, qq{--ct $cl_table1}; - - my $pb_calibration_cmd = join q[ ], @command_pb_cal; - #finish pb_calibration_cmd; - - #bam merge command - my $bam_merge_cmd = q{ } . $self->_bam_merger_cmd() . qq{ O=$output_bam ALIGNED=$phix_bam}; - - #bjob now - my $mem_size = $self->mem_score(); - my $timestamp = $self->timestamp(); - my $bsub_queue = $self->lsf_queue; - my $job_name = $self->score_job() . q{_} . $id_run . q{_} . $position . q{_} . $timestamp; - - my @command; - push @command, 'bsub'; - push @command, "-q $bsub_queue"; - push @command, q{-o }.$self->pb_cal_path().q{/log/}. $job_name . q{.%J.out}; - push @command, "-J $job_name"; - - my $hosts = 1; - my $memory_spec = join q[], npg_pipeline::lsf_job->new(memory => $mem_size)->memory_spec(), " -R 'span[hosts=$hosts]'"; - push @command, $self->fs_resource_string( { - resource_string => $memory_spec, - counter_slots_per_job => 2 * $self->general_values_conf()->{io_resource_slots}, - } ); - push @command, $job_dependencies || q[]; - - push @command, q[']; # ' enclose command in quotes - push @command, q{cd}, $self->pb_cal_path(), q{&&}; - - my $check_cl_table = qq{-f $cl_table1}; - - my $check_cmd = qq{if [[ -f $phix_bam ]]; then echo phix alignment so merging alignments with 1>&2; set -o pipefail; (if [ $check_cl_table ]; then echo recalibrated qvals 1>&2; $pb_calibration_cmd ; else echo no recalibration 1>&2; cat $input_bam ; fi;) | }; - if ($self->spatial_filter) { - $check_cmd .= qq{ ( if [[ -f ${phix_bam}.filter ]]; then echo applying spatial filter 1>&2; } . $self->pb_calibration_bin() . q{/} . - qq{spatial_filter -u -a -f -F ${phix_bam}.filter - } . - q{2> >( tee /dev/stderr | } . qq{qc --check spatial_filter --id_run $id_run --position $position --qc_out } . $self->qc_path . q{ );} . - q{ else echo no spatial filter 1>&2; cat; fi;) | }; - } - $check_cmd .= qq{$bam_merge_cmd; else echo symlinking as no phix alignment 1>&2; rm -f $output_bam; ln -s $input_bam $output_bam; rm -f $output_bam_md5; ln -s $input_bam_md5 $output_bam_md5; fi}; - $check_cmd =~ s/'/'"'"'/smxg; # cope with any single ' quote in the command when submitting command within single ' quote in bash -c argument - null op here? - $check_cmd = "bash -c '$check_cmd'"; # >( ...) is a bash'ish - - $check_cmd =~ s/'/'"'"'/smxg; # cope with any single ' quote in the command when submitting command within single ' quote in bsub command line argument - push @command,$check_cmd; - - push @command, q[']; # ' closing quote - - my $bsub_command = join q[ ], @command; - - return $bsub_command; -} - -# generate bsub command for generating the calibration table required -sub _calibration_table_bsub_command { - my ($self, $arg_refs) = @_; - my $position = $arg_refs->{'position'}; - my $job_dependencies = $arg_refs->{'job_dependencies'}; - - my $mem_size = $self->mem_calibration(); - my $timestamp = $self->timestamp(); - my $bsub_queue = $self->lsf_queue; - my $id_run = $self->id_run(); - - my $job_name = $self->cal_table_job() . q{_} . $id_run . q{_} . $position . q{_} . $timestamp ; - - my @command; - push @command, 'bsub'; - push @command, "-q $bsub_queue"; - push @command, $self->ref_adapter_pre_exec_string(); - push @command, q{-o }.$self->pb_cal_path().q{/log/}. $job_name . q{.%J.out}; - push @command, "-J $job_name"; - - my $hosts = 1; - my $memory_spec = join q[], npg_pipeline::lsf_job->new(memory => $mem_size)->memory_spec(), " -R 'span[hosts=$hosts]'"; - push @command, $self->fs_resource_string( { - resource_string => $memory_spec, - counter_slots_per_job => 2 * $self->general_values_conf()->{io_resource_slots}, - } ); - push @command, $job_dependencies || q[]; - - push @command, q["]; # " enclose command in quotes - push @command, q{cd}, $self->pb_cal_path(), q{&&}; - push @command, $self->pb_calibration_bin() . q{/} . $self->cal_table_script(); - push @command, q{--intensity_dir }. $self->dif_files_path(); # for dif file location, change to bustard if olb - push @command, q{--t_filter } . $self->t_filter(); - push @command, q{--prefix } . $id_run . q{_} . $position ; - - my $cycle_start1 = 1; - #if read 1 has an inline index reset cycle_start1 to the first cycle after the index - my $alims = $self->lims->associated_child_lims_ia; - if ($alims->{$position}->inline_index_exists && $alims->{$position}->inline_index_read == 1) { - $cycle_start1 += $alims->{$position}->inline_index_end; - } - if( !$self->is_paired_read() ){ - push @command, qq{--cstart $cycle_start1}; - }else{ - push @command, qq{--cstart1 $cycle_start1}; - my @r2r = $self->read2_cycle_range(); - my $cycle_start2 = $r2r[0]; - #if read 2 has an inline index reset cycle_start2 to the first cycle after the index - if ($alims->{$position}->inline_index_exists && $alims->{$position}->inline_index_read == 2) { - $cycle_start2 += $alims->{$position}->inline_index_end; - } - push @command, qq{--cstart2 $cycle_start2}; - } - - if ( $arg_refs->{is_spiked_phix} ) { - if (!$arg_refs->{snp_file}) { - $self->logcroak('SNP file not available'); - } - push @command, q{--snp } . $arg_refs->{snp_file}; - } - - push @command, qq{--bam ${PB_ALIGN_BAM_PREFIX}${id_run}_${position}.bam}; - - push @command, q["]; # " closing quote - - my $bsub_command = join q[ ], @command; - return $bsub_command; -} - -has q{_bam_merger_jar} => ( - isa => q{NpgCommonResolvedPathJarFile}, - is => q{ro}, - coerce => 1, - default => q{BamMerger.jar}, - ); - -has q{_bam_merger_cmd} => (isa => q{Str}, - is => q{ro}, - lazy_build => 1, - ); - -sub _build__bam_merger_cmd{ - my $self = shift; - - return $self->java_cmd . q{ -Xmx1024m} - . q{ -jar } . $self->_bam_merger_jar() - . q{ CREATE_MD5_FILE=true VALIDATION_STRINGENCY=SILENT KEEP=true I=/dev/stdin REPLACE_QUAL=true}; -} - -no Moose; - -__PACKAGE__->meta->make_immutable; - -1; - -__END__ - -=head1 DIAGNOSTICS - -=head1 CONFIGURATION AND ENVIRONMENT - -=head1 DEPENDENCIES - -=over - -=item Moose - -=item Carp - -=item English -no_match_vars - -=item Readonly - -=item List::MoreUtils - -=item File::Basename - -=item File::Spec - -=item npg_tracking::util::types - -=item npg_common::roles::software_location - -=back - -=head1 INCOMPATIBILITIES - -=head1 BUGS AND LIMITATIONS - -=head1 AUTHOR - -Guoying Qi - -=head1 LICENSE AND COPYRIGHT - -Copyright (C) 2015 Genome Research Ltd - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . diff --git a/lib/npg_pipeline/analysis/illumina_basecall_stats.pm b/lib/npg_pipeline/analysis/illumina_basecall_stats.pm new file mode 100644 index 000000000..704fd5732 --- /dev/null +++ b/lib/npg_pipeline/analysis/illumina_basecall_stats.pm @@ -0,0 +1,143 @@ +package npg_pipeline::analysis::illumina_basecall_stats; + +use Moose; +use Readonly; +use npg_pipeline::lsf_job; + +extends 'npg_pipeline::base'; +with 'npg_common::roles::software_location'; + +our $VERSION = '0'; + +Readonly::Scalar our $MAKE_STATS_J => 4; +Readonly::Scalar our $MAKE_STATS_MEM => 350; + +=head1 NAME + + npg_pipeline::analysis::illumina_basecall_stats + +=head1 SYNOPSIS + +=head1 DESCRIPTION + +=head1 SUBROUTINES/METHODS + +=head2 bcl2qseq + +Absolute path to executable that generates Illumina basecall stats + +=cut + +has 'bcl2qseq' => ( isa => 'NpgCommonResolvedPathExecutable', + is => 'ro', + coerce => 1, + lazy_build => 1,); +sub _build_bcl2qseq { + return 'setupBclToQseq.py' +} + +sub _generate_command { + my ( $self, $arg_refs ) = @_; + + my $job_dependencies = $arg_refs->{'required_job_completion'}; + + my $basecall_dir = $self->basecall_path(); + my $dir = $self->bam_basecall_path(); + + $self->make_log_dir( $dir ); # create a log directory within bam_basecalls + + my $bsub_queue = $self->lsf_queue; + my $job_name = q{basecall_stats_} . $self->id_run() . q{_} . $self->timestamp(); + + my @command; + push @command, 'bsub'; + push @command, "-q $bsub_queue"; + push @command, qq{-o $dir/log/}. $job_name . q{.%J.out}; + push @command, "-J $job_name"; + + my $hosts = 1; + my $memory_spec = join q[], npg_pipeline::lsf_job->new(memory => $MAKE_STATS_MEM)->memory_spec(), " -R 'span[hosts=$hosts]'"; + push @command, $self->fs_resource_string( { + resource_string => $memory_spec, + counter_slots_per_job => $MAKE_STATS_J, + } ); + push @command, q{-n } . $MAKE_STATS_J; + push @command, $job_dependencies || q[]; + + push @command, q["]; # " enclose command in quotes + + my $bcl2qseq_path = $self->bcl2qseq; + my $cmd = join q[ && ], + qq{cd $dir}, + q{if [[ -f Makefile ]]; then echo Makefile already present 1>&2; else echo creating bcl2qseq Makefile 1>&2; }. + qq{$bcl2qseq_path -b $basecall_dir -o $dir --overwrite; fi}, + qq[make -j $MAKE_STATS_J Matrix Phasing], + qq[make -j $MAKE_STATS_J BustardSummary.x{s,m}l]; + + push @command,$cmd; + + push @command, q["]; # " closing quote + + return join q[ ], @command; +} + +=head2 generate + +Use Illumina tools to generate the (per run) BustardSummary +and IVC reports (from on instrument RTA basecalling). + +=cut + +sub generate { + my ( $self, $arg_refs ) = @_; + return $self->submit_bsub_command($self->_generate_command($arg_refs)); +} + +no Moose; + +__PACKAGE__->meta->make_immutable; + +1; + +__END__ + +=head1 DIAGNOSTICS + +=head1 CONFIGURATION AND ENVIRONMENT + +=head1 DEPENDENCIES + +=over + +=item Moose + +=item Readonly + +=item npg_common::roles::software_location + +=back + +=head1 INCOMPATIBILITIES + +=head1 BUGS AND LIMITATIONS + +=head1 AUTHOR + +Steven Leonard + +=head1 LICENSE AND COPYRIGHT + +Copyright (C) 2017 Genome Research Ltd + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . diff --git a/lib/npg_pipeline/archive/file/generation/illumina2bam.pm b/lib/npg_pipeline/archive/file/generation/illumina2bam.pm deleted file mode 100644 index 0eb34cb84..000000000 --- a/lib/npg_pipeline/archive/file/generation/illumina2bam.pm +++ /dev/null @@ -1,390 +0,0 @@ -package npg_pipeline::archive::file::generation::illumina2bam; - -use Moose; -use Carp; -use English qw{-no_match_vars}; -use Readonly; -use Perl6::Slurp; - -use st::api::lims; -use npg_common::roles::software_location; -use npg_pipeline::lsf_job; -use npg_pipeline::analysis::create_lane_tag_file; - -extends q{npg_pipeline::base}; -with q{npg_tracking::illumina::run::long_info}; - -our $VERSION = '0'; - -Readonly::Scalar our $DEFAULT_RESOURCES => npg_pipeline::lsf_job->new(memory => 2500)->memory_spec(); -Readonly::Scalar our $JAVA_CMD => q{java}; - -sub generate { - my ( $self, $arg_refs ) = @_; - - $self->info(q{Creating Jobs to run illumina2bam for run} . $self->id_run ); - - my $alims = $self->lims->children_ia; - my @job_ids; - for my $p ($self->positions()){ - my $tag_list_file; - if ($self->is_multiplexed_lane($p)) { - $self->info(qq{Lane $p is indexed, generating tag list}); - my $index_length = $self->_get_index_length( $alims->{$p} ); - $tag_list_file = npg_pipeline::analysis::create_lane_tag_file->new( - location => $self->metadata_cache_dir, - lane_lims => $alims->{$p}, - index_length => $index_length, - hiseqx => $self->is_hiseqx_run, - verbose => $self->verbose - )->generate(); - } - my $bsub_cmd = $self->_generate_bsub_commands( $arg_refs, $alims->{$p}, $tag_list_file); - push @job_ids, $self->submit_bsub_command( $bsub_cmd ); - } - - return @job_ids; -} - -foreach my $jar_name (qw/Illumina2bam BamAdapterFinder BamIndexDecoder/) { - has q{_}.$jar_name.q{_jar} => ( - isa => q{NpgCommonResolvedPathJarFile}, - is => q{ro}, - coerce => 1, - default => $jar_name.q{.jar}, - ); -} - -has q{_illumina2bam_cmd} => (isa => q{Str}, - is => q{ro}, - lazy_build => 1, - ); - -sub _build__illumina2bam_cmd { - my $self = shift; - return $JAVA_CMD . q{ -Xmx1024m} . q{ -jar } . $self->_Illumina2bam_jar(); -} - -has q{_bam_adapter_detect_cmd} => (isa => q{Str}, - is => q{ro}, - lazy_build => 1, - ); -sub _build__bam_adapter_detect_cmd { - return q(bamadapterfind); -} - -has q{_bam_index_decode_cmd} => (isa => q{Str}, - is => q{ro}, - lazy_build => 1, - ); -sub _build__bam_index_decode_cmd { - my $self = shift; - return $JAVA_CMD . q{ -Xmx1024m} - . q{ -jar } . $self->_BamIndexDecoder_jar() - . q{ VALIDATION_STRINGENCY=SILENT} -} - -sub _get_index_length { - my ( $self, $lane_lims ) = @_; - - my $index_length = $self->index_length; - - if ($lane_lims->inline_index_exists) { - my $index_start = $lane_lims->inline_index_start; - my $index_end = $lane_lims->inline_index_end; - if ($index_start && $index_end) { - $index_length = $index_end - $index_start + 1; - } - } - - return $index_length; -} - -sub _generate_bsub_commands { - my ( $self, $arg_refs, $lane_lims, $tag_list_file ) = @_; - - my $position = $lane_lims->position; - my $required_job_completion = $arg_refs->{required_job_completion}; - - my $id_run = $self->id_run(); - my $intensity_path = $self->intensity_path(); - my $bam_basecall_path = $self->bam_basecall_path(); - - my $full_bam_name = $bam_basecall_path . q{/}. $id_run . q{_} .$position. q{.bam}; - - my $job_name = q{illumina2bam_} . $id_run . q{_} . $position. q{_} . $self->timestamp(); - - my $log_folder = $self->make_log_dir( $bam_basecall_path ); - my $outfile = $log_folder . q{/} . $job_name . q{.%J.out}; - - $job_name = q{'} . $job_name . q{'}; - - my $last_tool_picard_based = 1; - my $job_command = $self->_illumina2bam_cmd() - . q{ I=} . $intensity_path - . q{ L=} . $position - . q{ B=} . $self->basecall_path() - . q{ RG=}. $id_run.q{_}.$position - . q{ PU=}. join q[_], $self->run_folder, $position; - - my $st_names = $self->_get_library_sample_study_names($lane_lims); - - if($st_names->{library}){ - $job_command .= q{ LIBRARY_NAME="} . $st_names->{library} . q{"}; - } - if($st_names->{sample}){ - $job_command .= q{ SAMPLE_ALIAS="} . $st_names->{sample} . q{"}; - } - if($st_names->{study}){ - my $study = $st_names->{study}; - $study =~ s/"/\\"/gmxs; - $job_command .= q{ STUDY_NAME="} . $study . q{"}; - } - if ($self->_extra_tradis_transposon_read) { - $job_command .= ' SEC_BC_SEQ=BC SEC_BC_QUAL=QT BC_SEQ=tr BC_QUAL=tq'; - } - - if ($lane_lims->inline_index_exists) { - my $index_start = $lane_lims->inline_index_start; - my $index_end = $lane_lims->inline_index_end; - my $index_read = $lane_lims->inline_index_read; - - if ($index_start && $index_end && $index_read) { - my($first, $final) = $self->read1_cycle_range(); - if ($index_read == 1) { - $index_start += ($first-1); - $index_end += ($first-1); - $job_command .= qq{ FIRST_INDEX=$index_start FINAL_INDEX=$index_end FIRST_INDEX=$first FINAL_INDEX=}.($index_start-1); - $job_command .= q{ SEC_BC_SEQ=br SEC_BC_QUAL=qr BC_READ=1 SEC_BC_READ=1}; - $job_command .= q{ FIRST=}.($index_end+1).qq{ FINAL=$final}; - if ($self->is_paired_read()) { - ($first, $final) = $self->read2_cycle_range(); - $job_command .= qq{ FIRST=$first FINAL=$final}; - } - } elsif ($index_read == 2) { - $self->is_paired_read() or $self->logcroak(q{Inline index read (2) does not exist}); - $job_command .= qq{ FIRST=$first FINAL=$final}; - ($first, $final) = $self->read2_cycle_range(); - $index_start += ($first-1); - $index_end += ($first-1); - $job_command .= qq{ FIRST_INDEX=$index_start FINAL_INDEX=$index_end FIRST_INDEX=$first FINAL_INDEX=}.($index_start-1); - $job_command .= q{ SEC_BC_SEQ=br SEC_BC_QUAL=qr BC_READ=2 SEC_BC_READ=2}; - $job_command .= q{ FIRST=}.($index_end+1).qq{ FINAL=$final}; - } else { - $self->logcroak("Invalid inline index read ($index_read)"); - } - } - } - - ### TODO: can new bamadapterfind cope without these exclusions? - if ( $self->is_paired_read() && !$lane_lims->inline_index_exists){ - # omit BamAdapterFinder for inline index - my @range1 = $self->read1_cycle_range(); - my $read1_length = $range1[1] - $range1[0] + 1; - my @range2 = $self->read2_cycle_range(); - my $read2_length = $range2[1] - $range2[0] + 1; - # omit BamAdapterFinder if reads are different lengths - if( $read1_length == $read2_length ){ - $job_command .= q{ OUTPUT=} . q{/dev/stdout} . q{ COMPRESSION_LEVEL=0}; - $job_command .= q{ | } . $self->_bam_adapter_detect_cmd(); - $last_tool_picard_based = 0; - } - } - - if( $self->is_multiplexed_lane($position) ){ - if (!$tag_list_file) { - $self->logcroak('Tag list file path should be defined'); - } - $job_command .= ($last_tool_picard_based - ? q{ OUTPUT=} . q{/dev/stdout} . q{ COMPRESSION_LEVEL=0} - : q{ level=0}); - $job_command .= q{ | } - . $self->_bam_index_decode_cmd() - . q{ I=/dev/stdin } - . q{ BARCODE_FILE=} . $tag_list_file - . q{ METRICS_FILE=} . $full_bam_name . q{.tag_decode.metrics}; - my $num_of_plexes_per_lane = $self->_get_number_of_plexes_excluding_control($lane_lims); - if( $num_of_plexes_per_lane == 1 ){ - $job_command .= q{ MAX_NO_CALLS=} . $self->general_values_conf()->{single_plex_decode_max_no_calls}; - $job_command .= q{ CONVERT_LOW_QUALITY_TO_NO_CALL=true}; - } - $last_tool_picard_based = 1; - } - - $job_command .= ($last_tool_picard_based ? q{ CREATE_MD5_FILE=false OUTPUT=/dev/stdout} : q{ md5=1 md5filename=}.$full_bam_name.q{.md5} ); - #TODO - shift this seqchksum earlier before any compression.... - #TODO - shift this seqchksum as early as possible - immediately after illuina2bam? (but we need to stop altering read names at deplxing for that) - - my $full_bam_seqchksum_name = $full_bam_name; - $full_bam_seqchksum_name =~ s/[.]bam$/.post_i2b.seqchksum/mxs; - my $full_bam_md5_name = $full_bam_name; - $full_bam_md5_name .= q{.md5}; - - $job_command .= q{| tee >(bamseqchksum > } . $full_bam_seqchksum_name . q{)}; - if ($last_tool_picard_based) { - $job_command .= q{ >(md5sum -b | tr -d '\\n *\\-' > } . $full_bam_md5_name . q{)}; - } - $job_command .= q{ > } . $full_bam_name; - - my $resources = ( $self->fs_resource_string( { - counter_slots_per_job => $self->general_values_conf()->{io_resource_slots}, - resource_string => $self->_default_resources(), - } ) ); - - $job_command =~ s/'/'"'"'/smxg;#for the bsub - $job_command =~ s/'/'"'"'/smxg;#for the bash -c - my $job_sub = q{bsub -q } . $self->lsf_queue() . qq{ $resources $required_job_completion -J $job_name -o $outfile /bin/bash -c 'set -o pipefail;$job_command'}; - - $self->debug($job_sub); - - return $job_sub; -} - -sub _default_resources { - my ( $self ) = @_; - my $mem = $self->general_values_conf()->{'illumina2bam_memory'}; - my $cpu = $self->general_values_conf()->{'illumina2bam_cpu'}; - my $hosts = 1; - return (join q[ ], npg_pipeline::lsf_job->new(memory => $mem)->memory_spec(), "-R 'span[hosts=$hosts]'", "-n$cpu"); -} - -sub _get_library_sample_study_names { - my ($self, $lane_lims) = @_; - - my $names = $self->get_study_library_sample_names($lane_lims); - my ($study_names, $library_names, $sample_names); - if($names->{study}){ - $study_names = join q{,}, @{$names->{study}}; - } - if($names->{library}){ - $library_names = join q{,}, @{$names->{library}}; - } - if($names->{sample}){ - $sample_names = join q{,}, @{$names->{sample}}; - } - - return {study=>$study_names, library=>$library_names, sample=>$sample_names}; -} - -sub _get_number_of_plexes_excluding_control { - my ($self, $lane_lims) = @_; - my $number = scalar keys %{$lane_lims->tags}; - if ($lane_lims->spiked_phix_tag_index) { - $number--; - } - return $number; -} - -has q{_extra_tradis_transposon_read} => ( - isa => q{Bool}, - is => q{rw}, - lazy_build => 1, - ); -sub _build__extra_tradis_transposon_read { - my $self = shift; - - $self->is_indexed; - my @i = $self->reads_indexed; - my $reads_indexed = 0; - ## no critic (ControlStructures::ProhibitPostfixControls) - foreach (@i) { $reads_indexed++ if $_; } - - my $is_tradis = 0; - foreach my $d ($self->lims->descendants()) { - if ($d->library_type && $d->library_type =~ /^TraDIS/smx) { - $is_tradis = 1; - last; - } - } - - if ($is_tradis) { - if ($self->run->is_multiplexed) { - return 1 if ($reads_indexed > 1); - } else { - return 1 if ($reads_indexed > 0); - } - } - - return 0; -} - - -no Moose; - -__PACKAGE__->meta->make_immutable; - -1; -__END__ - -=head1 NAME - -npg_pipeline::archive::file::generation::illumina2bam - -=head1 SYNOPSIS - - my $oAfgfq = npg_pipeline::archive::file::generation::illumina2bam->new( - run_folder => $sRunFolder, - ); - -=head1 DESCRIPTION - -Object module which knows how to construct and submits the command line to LSF for creating bam files from bcl files. - -=head1 SUBROUTINES/METHODS - -=head2 generate - generates the bsub jobs and submits them for creating the fastq files, returning an array of job_ids. - - my @job_ids = $oAfgfq->generate({ - required_job_completion} => q{-w (123 && 321)}; - }); - -=head1 DIAGNOSTICS - -=head1 CONFIGURATION AND ENVIRONMENT - -=head1 DEPENDENCIES - -=over - -=item Carp - -=item English -no_match_vars - -=item Readonly - -=item Moose - -=item Perl6::Slurp - -=item npg_common::roles::software_location - -=item st::api::lims - -=item npg_tracking::illumina::run::long_info - -=back - -=head1 INCOMPATIBILITIES - -=head1 BUGS AND LIMITATIONS - -=head1 AUTHOR - -Guoying Qi - -=head1 LICENSE AND COPYRIGHT - -Copyright (C) 2014 Genome Research Limited - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . diff --git a/lib/npg_pipeline/archive/file/generation/p4_stage1_analysis.pm b/lib/npg_pipeline/archive/file/generation/p4_stage1_analysis.pm index f28e6c68e..17c4a18e8 100644 --- a/lib/npg_pipeline/archive/file/generation/p4_stage1_analysis.pm +++ b/lib/npg_pipeline/archive/file/generation/p4_stage1_analysis.pm @@ -392,7 +392,7 @@ sub _generate_command_params { my @range2 = $self->read2_cycle_range(); my $read2_length = $range2[1] - $range2[0] + 1; if($read1_length != $read2_length) { - $self->logcroak('P4 stage1 analysis will not yet handle different length forward/reverse reads (no optional adapter detection)'); + $self->logwarn('P4 stage1 analysis will not yet handle different length forward/reverse reads (no optional adapter detection)'); } } diff --git a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm index ebad3ba49..e84fb6590 100644 --- a/lib/npg_pipeline/archive/file/generation/seq_alignment.pm +++ b/lib/npg_pipeline/archive/file/generation/seq_alignment.pm @@ -229,8 +229,8 @@ sub _lsf_alignment_command { ## no critic (Subroutines::ProhibitExcessComplexity my $do_rna = $self->_do_rna_analysis($l); my $hs_bwa = ($self->is_paired_read ? 'bwa_aln' : 'bwa_aln_se'); - # continue to use the "aln" algorithm from bwa for these older chemistries (where read length <= 100bp) unless GCLP - my $bwa = ($self->gclp or $self->is_hiseqx_run or $self->_has_newer_flowcell or any {$_ >= $FORCE_BWAMEM_MIN_READ_CYCLES } $self->read_cycle_counts) + # continue to use the "aln" algorithm from bwa for these older chemistries (where read length <= 100bp) + my $bwa = ($self->is_hiseqx_run or $self->_has_newer_flowcell or any {$_ >= $FORCE_BWAMEM_MIN_READ_CYCLES } $self->read_cycle_counts) ? 'bwa_mem' : $hs_bwa; @@ -334,9 +334,8 @@ sub _lsf_alignment_command { ## no critic (Subroutines::ProhibitExcessComplexity } if($l->separate_y_chromosome_data) { - $p4_param_vals->{split_bam_by_chromosome_flags} = q[S=Y]; - $p4_param_vals->{split_bam_by_chromosome_flags} = q[V=true]; - $p4_param_vals->{split_bam_by_chromosomes_jar} = $self->_SplitBamByChromosomes_jar; + $p4_param_vals->{chrsplit_subset_flag} = ['--subset', 'Y,chrY,ChrY,chrY_KI270740v1_random']; + $p4_param_vals->{chrsplit_invert_flag} = q[--invert]; } # write p4 parameters to file @@ -393,6 +392,10 @@ sub _lsf_alignment_command { ## no critic (Subroutines::ProhibitExcessComplexity q{&&}, _qc_command('bam_flagstats', $archive_path, $qcpath, $l, $is_plex, $nchs_outfile_label), : q(), + $do_rna ? join q( ), + q{&&}, + _qc_command('rna_seqc', $archive_path, $qcpath, $l, $is_plex), + : q() ), q('); } @@ -400,11 +403,16 @@ sub _lsf_alignment_command { ## no critic (Subroutines::ProhibitExcessComplexity sub _qc_command {##no critic (Subroutines::ProhibitManyArgs) my ($check_name, $qc_in, $qc_out, $l, $is_plex, $subset) = @_; - my $args = {'id_run' => $l->id_run, 'position' => $l->position}; + my $args = {'id_run' => $l->id_run, + 'position'=> $l->position, + 'qc_out' => $qc_out, + 'check' => $check_name,}; + if ($is_plex && defined $l->tag_index) { $args->{'tag_index'} = $l->tag_index; } - if ($check_name eq 'bam_flagstats') { + + if ($check_name =~ /^bam_flagstats|rna_seqc$/smx) { if ($subset) { $args->{'subset'} = $subset; } @@ -412,12 +420,12 @@ sub _qc_command {##no critic (Subroutines::ProhibitManyArgs) } else { $args->{'qc_in'} = q[$] . 'PWD'; } - $args->{'qc_out'} = $qc_out; - $args->{'check'} = $check_name; + my $command = q[]; foreach my $arg (sort keys %{$args}) { $command .= join q[ ], q[ --].$arg, $args->{$arg}; } + return $QC_SCRIPT_NAME . $command; } @@ -554,7 +562,11 @@ sub _ref { $self->warn(qq{No reference genome set for $lstring}); } else { if (scalar @refs > 1) { - $self->logcroak(qq{Multiple references for $lstring}); + if (defined $l->tag_index && $l->tag_index == 0) { + $self->logwarn(qq{Multiple references for $lstring}); + } else { + $self->logcroak(qq{Multiple references for $lstring}); + } } else { $ref = $refs[0]; if ($ref_name) { diff --git a/lib/npg_pipeline/archive/file/generation/seqchksum_comparator.pm b/lib/npg_pipeline/archive/file/generation/seqchksum_comparator.pm index 06d7ae1cb..d62c1dee7 100644 --- a/lib/npg_pipeline/archive/file/generation/seqchksum_comparator.pm +++ b/lib/npg_pipeline/archive/file/generation/seqchksum_comparator.pm @@ -75,8 +75,8 @@ sub _generate_bsub_command { =head2 do_comparison Bamcat any plex/split bamfiles back together to perform a bamseqchksum. -Compare it with the one produced by the illumina2bam step, or croak if that has not been done. -Use diff -u rather than cmp and store the file on disk to help work out what has gone wrong +Compare it with the one for the whole lane or croak if that has not been done. +Use diff -u rather than cmp and store the file on disk to help work out what has gone wrong. =cut @@ -112,7 +112,7 @@ sub _compare_lane { my $input_lane_seqchksum_file_name = File::Spec->catfile($input_seqchksum_dir, $input_seqchksum_file_name); if ( ! -e $input_lane_seqchksum_file_name ) { - $self->logcroak("Cannot find $input_lane_seqchksum_file_name to compare: please check illumina2bam pipeline step"); + $self->logcroak("Cannot find $input_lane_seqchksum_file_name to compare to"); } my$wd = getcwd(); diff --git a/lib/npg_pipeline/archive/file/logs.pm b/lib/npg_pipeline/archive/file/logs.pm index 863ad9af1..922222e5d 100644 --- a/lib/npg_pipeline/archive/file/logs.pm +++ b/lib/npg_pipeline/archive/file/logs.pm @@ -6,16 +6,11 @@ extends qw{npg_pipeline::base}; our $VERSION = '0'; -has 'irods_root' => ( isa => 'Str', - is => 'rw', - lazy_build => 1, +has 'irods_root' => ( isa => 'Str', + is => 'rw', + default => '/seq/', ); -sub _build_irods_root { - my $self = shift; - return $self->gclp ? q(/gseq/) : q(/seq/); -} - sub submit_to_lsf { my ($self, $arg_refs) = @_; my $job_sub = $self->_generate_bsub_command($arg_refs); @@ -26,7 +21,6 @@ sub submit_to_lsf { sub _generate_bsub_command { my ($self, $arg_refs) = @_; - my $irodsinstance = $self->gclp ? q(gclp) : q(); my $id_run = $self->id_run(); my $required_job_completion = $arg_refs->{'required_job_completion'}; @@ -47,18 +41,11 @@ sub _generate_bsub_command { $bsub_command .= q{-o } . $location_of_logs . qq{/$job_name.out }; my $future_path = $self->path_in_outgoing($self->runfolder_path()); - $bsub_command .= qq{-E "[ -d '$future_path' ]" }; + $bsub_command .= qq{-E "[ -d '$future_path' ]" }; $bsub_command .= q{'}; - - if ($irodsinstance) { - $bsub_command .= q{irodsEnvFile=$}.q{HOME/.irods/.irodsEnv-} . $irodsinstance . q{-iseq-logs }; - } - $bsub_command .= $archive_script . q{ --runfolder_path } . $future_path . q{ --id_run } . $self->id_run(); - $bsub_command .= q{ --irods_root } . $self->irods_root(); - $bsub_command .= q{'}; $self->debug($bsub_command); diff --git a/lib/npg_pipeline/archive/file/qc.pm b/lib/npg_pipeline/archive/file/qc.pm index ccc966d7d..652263a4f 100644 --- a/lib/npg_pipeline/archive/file/qc.pm +++ b/lib/npg_pipeline/archive/file/qc.pm @@ -3,7 +3,6 @@ package npg_pipeline::archive::file::qc; use Moose; use Readonly; use File::Spec; -use File::Path qw{make_path}; use Class::Load qw{load_class}; use npg_pipeline::lsf_job; @@ -16,10 +15,6 @@ Readonly::Scalar my $QC_SCRIPT_NAME => q{qc}; Readonly::Scalar my $LSF_MEMORY_REQ => 6000; Readonly::Scalar my $LSF_MEMORY_REQ_ADAPTER => 1500; Readonly::Scalar my $LSF_INDEX_MULTIPLIER => 10_000; -Readonly::Scalar my $REQUIRES_QC_REPORT_DIR => { - rna_seqc => 'rna_seqc', -}; - has q{qc_to_run} => (isa => q{Str}, is => q{ro}, @@ -52,16 +47,6 @@ sub BUILD { return; } -has q{_qc_report_dirs} => (isa => q{HashRef[Str]}, - is => q{ro}, - traits => [q{Hash}], - default => sub { { } }, - handles => { - _set_rpt_qc_report_dir => q{set}, - _get_rpt_qc_report_dir => q{get}, - }, - ); - sub run_qc { my ($self, $arg_refs) = @_; @@ -78,28 +63,6 @@ sub run_qc { } } - if ($REQUIRES_QC_REPORT_DIR->{$qc_to_run}) { - my @archive_qc_path = ($self->archive_path, q[qc], $REQUIRES_QC_REPORT_DIR->{$qc_to_run}); - foreach my $position ($self->positions()) { - my $rp = join q[_], $self->id_run(), $position; - my $qc_report_dir = File::Spec->catdir(@archive_qc_path, $rp); - if (! -d $qc_report_dir) { - make_path($qc_report_dir); - $self->_set_rpt_qc_report_dir($rp, $qc_report_dir); - } - if ($self->is_multiplexed_lane($position)) { - foreach my $tag (@{$self->get_tag_index_list($position)}) { - my $rpt = join q[#], $rp, $tag; - $qc_report_dir = File::Spec->catdir(@archive_qc_path, $rp, $rpt); - if (! -d $qc_report_dir) { - make_path($qc_report_dir); - $self->_set_rpt_qc_report_dir($rpt, $qc_report_dir); - } - } - } - } - } - my $required_job_completion = $arg_refs->{'required_job_completion'}; $required_job_completion ||= q{}; @@ -198,17 +161,6 @@ sub _qc_command { } $c .= qq{ --qc_in=$qc_in --qc_out=$qc_out}; - if ($REQUIRES_QC_REPORT_DIR->{$self->qc_to_run()}) { - my @archive_qc_path = ($archive_path, q[qc], $REQUIRES_QC_REPORT_DIR->{$self->qc_to_run()}); - my $rptstr = join q[_], $self->id_run(), (defined $indexed ? $lanestr : $self->lsb_jobindex()); - my $qc_report_dir = File::Spec->catdir(@archive_qc_path, $rptstr); - if (defined $indexed) { - $rptstr = join q[#], $rptstr, $tagstr; - $qc_report_dir = File::Spec->catdir($qc_report_dir, $rptstr); - } - $c .= qq{ --qc_report_dir=$qc_report_dir}; - } - return $c; } @@ -218,9 +170,9 @@ sub _should_run { my $qc = $self->qc_to_run(); if (($qc =~ /^tag_metrics|upstream_tags|gc_bias|verify_bam_id$/smx) || - ($qc =~ /^genotype|pulldown_metrics|rna_seqc$/smx)) { + ($qc =~ /^genotype|pulldown_metrics$/smx)) { my $is_multiplexed_lane = $self->is_multiplexed_lane($position); - if ($qc =~ /^gc_bias|verify_bam_id|genotype|pulldown_metrics|rna_seqc$/smx) { + if ($qc =~ /^gc_bias|verify_bam_id|genotype|pulldown_metrics$/smx) { my $can_run = ((!defined $tag_index) && !$is_multiplexed_lane) || ((defined $tag_index) && $is_multiplexed_lane); if (!$can_run) { @@ -241,13 +193,6 @@ sub _should_run { if ($self->has_repository && $self->_check_uses_refrepos()) { $init_hash->{'repository'} = $self->repository; } - if ($REQUIRES_QC_REPORT_DIR->{$qc}) { - my $qc_report_dir_key = join q[_], $self->id_run(), $position; - if (defined $tag_index) { - $qc_report_dir_key = join q[#], $qc_report_dir_key, $tag_index; - } - $init_hash->{'qc_report_dir'} = $self->_get_rpt_qc_report_dir($qc_report_dir_key); - } return $self->_qc_module_name()->new($init_hash)->can_run(); } @@ -284,7 +229,7 @@ sub _lsf_options { my ($self, $qc_to_run) = @_; my $resources; - if ($qc_to_run =~ /insert_size|sequence_error|ref_match|pulldown_metrics|rna_seqc/smx ) { + if ($qc_to_run =~ /insert_size|sequence_error|ref_match|pulldown_metrics/smx ) { $resources = npg_pipeline::lsf_job->new(memory => $LSF_MEMORY_REQ)->memory_spec(); } elsif ($qc_to_run eq q[adapter]) { $resources = npg_pipeline::lsf_job->new(memory => $LSF_MEMORY_REQ_ADAPTER)->memory_spec() . @@ -352,8 +297,6 @@ Launches the qc jobs. =item Class::Load -=item File::Path - =back =head1 INCOMPATIBILITIES diff --git a/lib/npg_pipeline/archive/file/to_irods.pm b/lib/npg_pipeline/archive/file/to_irods.pm index eb4d6a8a9..195449bb3 100644 --- a/lib/npg_pipeline/archive/file/to_irods.pm +++ b/lib/npg_pipeline/archive/file/to_irods.pm @@ -20,7 +20,6 @@ sub submit_to_lsf { sub _generate_bsub_command { my ($self, $arg_refs) = @_; - my $irodsinstance = $self->gclp() ? q(gclp) : q(); my $id_run = $self->id_run(); my @positions = $self->positions(); @@ -48,10 +47,6 @@ sub _generate_bsub_command { $bsub_command .= q{-o } . $location_of_logs . qq{/$job_name.out }; $bsub_command .= q{'}; - if($irodsinstance){ - $bsub_command .= q{irodsEnvFile=$}.q{HOME/.irods/.irodsEnv-} . $irodsinstance . q{-iseq }; - } - ##no critic (ValuesAndExpressions::RequireInterpolationOfMetachars) my $publish_process_log_name = q(process_publish_${LSB_JOBID}.json); ##use critic @@ -66,10 +61,6 @@ sub _generate_bsub_command { $bsub_command .= q{ --alt_process qc_run}; } - if($irodsinstance){ - $bsub_command .= q{ --collection /14mg/seq/illumina/run/} . $self->id_run(); - } - if($position_list){ $bsub_command .= $position_list } diff --git a/lib/npg_pipeline/archive/folder/generation.pm b/lib/npg_pipeline/archive/folder/generation.pm index 95a166a23..ce0267aba 100644 --- a/lib/npg_pipeline/archive/folder/generation.pm +++ b/lib/npg_pipeline/archive/folder/generation.pm @@ -19,7 +19,6 @@ sub create_dir { my $qc_dir = $self->qc_path(); my $qc_log_dir = $qc_dir . q{/log}; my $tileviz_dir = $qc_dir . q{/tileviz}; - my $rna_seqc_dir = $qc_dir . q{/rna_seqc}; ############# # check existence of archive directory @@ -56,21 +55,6 @@ sub create_dir { } } - ############# - # check existence of rna_seqc directory - # create if it doesn't - - if ( ! -d $rna_seqc_dir) { - my $mk_rna_seqc_dir_cmd = qq{mkdir -p $rna_seqc_dir}; - $self->debug($mk_rna_seqc_dir_cmd); - my $return = qx{$mk_rna_seqc_dir_cmd}; - if ( $CHILD_ERROR ) { - $self->logcroak($tileviz_dir, - qq{ does not exist and unable to create: $CHILD_ERROR }, - $return); - } - } - ############# # check existence of multiplex lane and qc directory # create if they doesn't @@ -125,11 +109,6 @@ sub create_dir { $self->warn("could not chgrp $tileviz_dir\n\t$rc"); # not fatal } - $self->info("chgrp $owning_group $rna_seqc_dir"); - $rc = `chgrp $owning_group $rna_seqc_dir`; - if ( $CHILD_ERROR ) { - $self->warn("could not chgrp $rna_seqc_dir\n\t$rc"); # not fatal - } ############ # ensure that the owning group is what we expect @@ -167,12 +146,6 @@ sub create_dir { $self->warn("could not chmod $tileviz_dir\n\t$rc"); # not fatal } - $self->info("chmod u=rwx,g=srxw,o=rx $rna_seqc_dir"); - $rc = `chmod u=rwx,g=srxw,o=rx $rna_seqc_dir`; - if ( $CHILD_ERROR ) { - $self->warn("could not chmod $rna_seqc_dir\n\t$rc"); # not fatal - } - $self->info("chmod u=rwx,g=srxw,o=rx $archive_log_dir"); $rc = `chmod u=rwx,g=srxw,o=rx $archive_log_dir`; if ( $CHILD_ERROR ) { diff --git a/lib/npg_pipeline/base.pm b/lib/npg_pipeline/base.pm index fc07b377e..4ace95faa 100644 --- a/lib/npg_pipeline/base.pm +++ b/lib/npg_pipeline/base.pm @@ -19,16 +19,20 @@ with qw{ MooseX::Getopt MooseX::AttributeCloner WTSI::DNAP::Utilities::Loggable - npg_tracking::illumina::run::short_info - npg_tracking::illumina::run::folder npg_pipeline::roles::accessor + npg_tracking::illumina::run::short_info npg_pipeline::roles::business::base }; + +with 'npg_tracking::illumina::run::folder' => { + -excludes => [qw(pb_cal_path dif_files_path)] + }; + with q{npg_tracking::illumina::run::long_info}; with q{npg_pipeline::roles::business::flag_options}; Readonly::Scalar my $DEFAULT_JOB_ID_FOR_NO_BSUB => 50; -Readonly::Array my @FLAG2FUNCTION_LIST => qw/ olb qc_run gclp /; +Readonly::Array my @FLAG2FUNCTION_LIST => qw/ qc_run /; $ENV{LSB_DEFAULTPROJECT} ||= q{pipeline}; @@ -244,23 +248,6 @@ has q{force_phix_split} => ( default => 1, ); -=head2 force_p4 - -Boolean decision to force on P4 pipeline usage - -=cut - -has q{force_p4} => ( - isa => q{Bool}, - is => q{ro}, - lazy_build => 1, - documentation => q{Boolean decision to force on P4 pipeline usage, default true iff GCLP}, -); -sub _build_force_p4 { - my ($self) = @_; - return $self->gclp; -} - =head2 verbose Boolean option to switch on verbose mode @@ -420,10 +407,6 @@ around 'function_list' => sub { return $file; }; -sub _build_gclp { - my ($self) = @_; - return $self->has_function_list && $self->function_list =~ /gclp/ismx; -} =head2 function_list_conf @@ -442,8 +425,6 @@ sub _build_function_list_conf { } =head2 general_values_conf -=head2 illumina_pipeline_conf -=head2 pb_cal_pipeline_conf =head2 parallelisation_conf Returns a hashref of configuration details from the relevant configuration file @@ -451,8 +432,6 @@ Returns a hashref of configuration details from the relevant configuration file =cut has [ qw{ general_values_conf - illumina_pipeline_conf - pb_cal_pipeline_conf parallelisation_conf } ] => ( isa => q{HashRef}, @@ -465,14 +444,6 @@ sub _build_general_values_conf { my ( $self ) = @_; return $self->read_config( $self->conf_file_path(q{general_values.ini}) ); } -sub _build_illumina_pipeline_conf { - my ( $self ) = @_; - return $self->read_config( $self->conf_file_path(q{illumina_pipeline.ini}) ); -} -sub _build_pb_cal_pipeline_conf { - my ( $self ) = @_; - return $self->read_config( $self->conf_file_path(q{pb_cal_pipeline.ini}) ); -} sub _build_parallelisation_conf { my ( $self ) = @_; return $self->read_config( $self->conf_file_path(q{parallelisation.yml}) ); diff --git a/lib/npg_pipeline/daemon.pm b/lib/npg_pipeline/daemon.pm index 589feabf6..b9c956010 100644 --- a/lib/npg_pipeline/daemon.pm +++ b/lib/npg_pipeline/daemon.pm @@ -194,7 +194,6 @@ sub check_lims_link { my $lims = {}; $lims->{'id'} = $batch_id; if ($fcell_row) { - $lims->{'gclp'} = $fcell_row->from_gclp; $lims->{'qc_run'} = (defined $fcell_row->purpose && $fcell_row->purpose eq 'qc') ? 1 : undef; } else { $lims->{'qc_run'} = diff --git a/lib/npg_pipeline/daemon/analysis.pm b/lib/npg_pipeline/daemon/analysis.pm index f032f0550..3ec35292e 100644 --- a/lib/npg_pipeline/daemon/analysis.pm +++ b/lib/npg_pipeline/daemon/analysis.pm @@ -16,7 +16,6 @@ Readonly::Scalar my $PIPELINE_SCRIPT => q{npg_pipeline_central}; Readonly::Scalar my $DEFAULT_JOB_PRIORITY => 50; Readonly::Scalar my $RAPID_RUN_JOB_PRIORITY => 60; Readonly::Scalar my $ANALYSIS_PENDING => q{analysis pending}; -Readonly::Scalar my $GCLP_STUDY_KEY => q{gclp_all_studies}; Readonly::Scalar my $PATH_DELIM => q{:}; sub build_pipeline_script_name { @@ -82,7 +81,7 @@ sub _process_one_run { $arg_refs->{'job_priority'} += $inherited_priority; } $arg_refs->{'rf_path'} = $self->runfolder_path4run($id_run); - $arg_refs->{'software'} = $self->_software_bundle($arg_refs->{'gclp'} ? 1 : 0, $arg_refs->{'studies'}); + $arg_refs->{'software'} = $self->_software_bundle($arg_refs->{'studies'}); $self->run_command( $id_run, $self->_generate_command( $arg_refs )); @@ -90,16 +89,13 @@ sub _process_one_run { } sub _software_bundle { - my ($self, $is_gclp_run, $studies) = @_; + my ($self, $studies) = @_; - if (!defined $is_gclp_run) { - $self->logcroak('GCLP flag is not defined'); - } if (!$studies) { $self->logcroak('Study ids are missing'); } - my @s = $is_gclp_run ? ($GCLP_STUDY_KEY) : @{$studies}; + my @s = @{$studies}; my $conf = $self->study_analysis_conf(); @@ -109,10 +105,6 @@ sub _software_bundle { } my $software_dir = @software ? $software[0] : q[]; - if ($is_gclp_run && !$software_dir) { - $self->logcroak(q{GCLP run needs explicit software bundle}); - } - if ($software_dir && !-d $software_dir) { $self->logcroak(qq{Directory '$software_dir' does not exist}); } @@ -138,21 +130,15 @@ sub _generate_command { $arg_refs->{'job_priority'}, $arg_refs->{'rf_path'}; - if ( $arg_refs->{'gclp'} ) { - $self->info('GCLP run'); - $cmd .= q{ --function_list gclp}; - } else { - $self->info('Non-GCLP run'); - if (!$arg_refs->{'id'}) { - # Batch id is needed for MiSeq runs, including qc runs - $self->logcroak(q{Lims flowcell id is missing}); - } - if ($arg_refs->{'qc_run'}) { - $cmd .= q{ --qc_run}; - $self->info('QC run'); - } - $cmd .= q{ --id_flowcell_lims } . $arg_refs->{'id'}; + if (!$arg_refs->{'id'}) { + # Batch id is needed for MiSeq runs, including qc runs + $self->logcroak(q{Lims flowcell id is missing}); + } + if ($arg_refs->{'qc_run'}) { + $cmd .= q{ --qc_run}; + $self->info('QC run'); } + $cmd .= q{ --id_flowcell_lims } . $arg_refs->{'id'}; my $path = join $PATH_DELIM, $self->local_path(), $ENV{'PATH'}; my $analysis_path_root = $arg_refs->{'software'}; diff --git a/lib/npg_pipeline/daemon/archival.pm b/lib/npg_pipeline/daemon/archival.pm index 8474bdf96..874255734 100644 --- a/lib/npg_pipeline/daemon/archival.pm +++ b/lib/npg_pipeline/daemon/archival.pm @@ -27,7 +27,7 @@ sub run { } else { if ( $self->staging_host_match($run->folder_path_glob)) { my $lims = $self->check_lims_link($run); - $self->run_command($id_run, $self->_generate_command($id_run, $lims->{'gclp'})); + $self->run_command($id_run, $self->_generate_command($id_run)); } } } catch { @@ -39,12 +39,9 @@ sub run { } sub _generate_command { - my ($self, $id_run, $gclp) = @_; - - $self->info($gclp ? 'GCLP run' : 'Non-GCLP run'); + my ($self, $id_run) = @_; my $cmd = $self->pipeline_script_name(); - $cmd = $cmd . ($gclp ? q{ --function_list gclp} : q()); $cmd = $cmd . q{ --verbose --runfolder_path } . $self->runfolder_path4run($id_run); my $path = join q[:], $self->local_path(), $ENV{PATH}; my $prefix = $self->daemon_conf()->{'command_prefix'}; diff --git a/lib/npg_pipeline/pluggable.pm b/lib/npg_pipeline/pluggable.pm index 4458ff468..0c4953e31 100644 --- a/lib/npg_pipeline/pluggable.pm +++ b/lib/npg_pipeline/pluggable.pm @@ -390,6 +390,15 @@ sub main { }; $self->_clear_env_vars(); if ($error) { + # This is the end of the pipeline script. + # We want to see this error in the pipeline daemon log, + # so it should be printed to standard error, not to + # this script's log, which might be a file. + # We currently tie STDERR so output to standard error + # goes to this script's log file. Hence the need to + # untie. Dies not cause an error if STDERR has not been + # tied. + untie *STDERR; croak($error); } return; diff --git a/lib/npg_pipeline/pluggable/harold.pm b/lib/npg_pipeline/pluggable/harold.pm index 03a64ae0a..20c165c75 100644 --- a/lib/npg_pipeline/pluggable/harold.pm +++ b/lib/npg_pipeline/pluggable/harold.pm @@ -256,19 +256,6 @@ sub fix_config_files { return (); } -=head2 illumina2bam - -=cut - -sub illumina2bam { - my ( $self, @args ) = @_; - - my $illumina2bam = $self->new_with_cloned_attributes(q{npg_pipeline::archive::file::generation::illumina2bam}); - my $required_job_completion = shift @args; - my @job_ids = $illumina2bam->generate({required_job_completion => $required_job_completion,}); - return @job_ids; -} - =head2 create_summary_link_analysis function which creates/changes the summary link in the runfolder diff --git a/lib/npg_pipeline/pluggable/harold/central.pm b/lib/npg_pipeline/pluggable/harold/central.pm index 799100ac7..0f042f142 100644 --- a/lib/npg_pipeline/pluggable/harold/central.pm +++ b/lib/npg_pipeline/pluggable/harold/central.pm @@ -3,7 +3,6 @@ package npg_pipeline::pluggable::harold::central; use Moose; use Carp; use English qw{-no_match_vars}; -use Readonly; use File::Spec; use List::MoreUtils qw/any/; @@ -26,39 +25,19 @@ Pluggable module runner for the main pipeline =cut -Readonly::Array our @OLB_FUNCTIONS => qw/ matrix_lanes matrix_all - phasing_lanes phasing_all - basecalls_lanes basecalls_all - /; =head1 SUBROUTINES/METHODS -=cut - -has '_pbcal_obj' => ( - isa => 'npg_pipeline::analysis::harold_calibration_bam', - is => 'ro', - lazy => 1, - builder => '_build_pbcal_obj', - ); -sub _build_pbcal_obj { - my $self = shift; - return $self->new_with_cloned_attributes(q{npg_pipeline::analysis::harold_calibration_bam}); -} - =head2 prepare Sets all paths needed during the lifetime of the analysis runfolder. Creates any of the paths that do not exist. - Dynamically adds bustard functions to the object; - =cut override 'prepare' => sub { my $self = shift; $self->_set_paths(); super(); # Correct order! - $self->_inject_bustard_functions(); return; }; @@ -71,8 +50,10 @@ override 'prepare' => sub { sub _set_paths { my $self = shift; + my $sep = q[/]; + if ( ! $self->has_intensity_path() ) { - my $ipath = $self->runfolder_path() . q{/Data/Intensities}; + my $ipath = join $sep, $self->runfolder_path(), q{Data}, q{Intensities}; if (!-e $ipath) { $self->info(qq{Intensities path $ipath not found}); $ipath = $self->runfolder_path(); @@ -81,122 +62,31 @@ sub _set_paths { } $self->info('Intensities path: ', $self->intensity_path() ); - # If preprocessing with OLB, to set the paths mentioned below, - # one needs to know the name of the bustard directory. - # This name is not known till the bustard scripts is run. - # Therefore, if using OLB, delay creating these directories. - if (!$self->olb) { - if ( ! $self->has_dif_files_path() ) { - $self->set_dif_files_path( $self->intensity_path() ); - } - $self->info('Dif files path: ', $self->dif_files_path() ); - - if ( ! $self->has_basecall_path() ) { - my $bpath = $self->intensity_path() . q{/BaseCalls}; - if (!-e $bpath) { - $self->warn(qq{BaseCalls path $bpath not found}); - $bpath = $self->runfolder_path(); - } - $self->_set_basecall_path( $bpath); + if ( ! $self->has_basecall_path() ) { + my $bpath = join $sep, $self->intensity_path() , q{BaseCalls}; + if (!-e $bpath) { + $self->warn(qq{BaseCalls path $bpath not found}); + $bpath = $self->runfolder_path(); } - $self->info('BaseCalls path: ' . $self->basecall_path() ); + $self->_set_basecall_path( $bpath); } + $self->info('BaseCalls path: ' . $self->basecall_path() ); - if( ! $self->has_bam_basecall_path() ) { - my $bam_basecalls_dir = $self->intensity_path() . q{/} .q{BAM_basecalls_} . $self->timestamp(); + if( ! $self->has_bam_basecall_path() ) { + my $bam_basecalls_dir = join $sep, $self->intensity_path(), q{BAM_basecalls_} . $self->timestamp(); $self->make_log_dir( $bam_basecalls_dir ); $self->set_bam_basecall_path( $bam_basecalls_dir ); } $self->info('BAM_basecall path: ' . $self->bam_basecall_path()); - $self->_set_bam_basecall_dependent_paths(); - - if ($self->olb) { - my $bustard_dir = $self->new_with_cloned_attributes(q{npg_pipeline::analysis::bustard4pbcb}, - {bustard_home => $self->intensity_path,})->bustard_dir(); - $self->set_dif_files_path( $bustard_dir ); - $self->_set_basecall_path( $bustard_dir ); - $self->info("basecall and dif_files paths set to $bustard_dir"); - $self->make_log_dir( $bustard_dir ); + if (! $self->has_recalibrated_path()) { + $self->_set_recalibrated_path(join $sep, $self->bam_basecall_path(), 'no_cal') } + $self->make_log_dir($self->recalibrated_path()); + $self->info('PB_cal path: ' . $self->recalibrated_path()); - return; -} - -### -# -# If unset, sets recalibrated_path and pb_cal_path. -# - -sub _set_bam_basecall_dependent_paths { - my $self = shift; - my $pathways = { - recalibrated_path => undef, - pb_cal_path => undef, - }; - - # for each of the paths, see if they have been prepopulated - foreach my $path ( keys %{ $pathways } ) { - my $has_method = q{has_} . $path; - if ( $self->$has_method() ) { - $pathways->{$path} = $self->$path(); - } - } - - # if recalibrated_path or pb_cal_path are not set, but the other is, match them up - if ( $pathways->{recalibrated_path} && ! $pathways->{pb_cal_path} ) { - $pathways->{pb_cal_path} = $pathways->{recalibrated_path}; - } - if ( ! $pathways->{recalibrated_path} && $pathways->{pb_cal_path} ) { - $pathways->{recalibrated_path} = $pathways->{pb_cal_path}; - } - - # if there is no recalibrated_path and pb_cal_path, then create them and store - if ( ! $pathways->{recalibrated_path} ) { - my $recalibrated_level_dir = !$self->recalibration() ? q{no_cal} - : q{PB_cal_bam} - ; - $self->make_log_dir( $self->bam_basecall_path() . q{/} . $recalibrated_level_dir ); - $pathways->{recalibrated_path} = $self->bam_basecall_path() . q{/} . $recalibrated_level_dir; - $pathways->{pb_cal_path} = $self->bam_basecall_path() . q{/} . $recalibrated_level_dir; - } - # for each of these, go and set them (we know we must have created them by now) - foreach my $path ( keys %{ $pathways } ) { - my $set_method = q{_set_} . $path; - $self->$set_method( $pathways->{$path} ); - } - - $self->info('PB_cal path: ' . $self->pb_cal_path()); - $self->info('Recalibrated_path: ' . $self->recalibrated_path() ); $self->make_log_dir( $self->status_files_path ); - return; -} - -#### -# Dynamically creates functions to run OLB preprocessing. -# -sub _inject_bustard_functions { - my $self = shift; - - foreach my $function (@OLB_FUNCTIONS) { - ##no critic (TestingAndDebugging::ProhibitNoStrict TestingAndDebugging::ProhibitNoWarnings) - no strict 'refs'; - no warnings 'redefine'; - my $fpointer = 'bustard_' . $function; - if ($self->olb) { - *{$fpointer}= sub { my ($self, @args) = @_; - my $job_dep = shift @args; - return npg_pipeline::analysis::bustard4pbcb->new( - pipeline=>$self, - bustard_home=>$self->intensity_path, - bustard_dir=>$self->basecall_path, - id_run=>$self->id_run, - lanes=>$self->lanes)->make($function,$job_dep); }; - } else { - *{$fpointer}= sub { $self->info('OLB preprocessing switched off, not running ' . $function ); return (); } - } - } return; } @@ -213,50 +103,9 @@ sub illumina_basecall_stats { $self->info(q{HiSeqX sequencing instrument, illumina_basecall_stats will not be run}); return (); } - return $self->_run_harold_steps( q{generate_illumina_basecall_stats}, @args); -} - -=head2 harold_alignment_files - -Generate the alignment files to now be used for generating calibration tables - -=cut - -sub harold_alignment_files { - my ($self, @args) = @_; - return $self->_run_harold_steps( q{generate_alignment_files}, @args); -} - -=head2 harold_calibration_tables - -Generate the calibration tables used for harold recalibration - -=cut - -sub harold_calibration_tables { - my ($self, @args) = @_; - if ( !$self->recalibration() ) { - $self->info(q{recalibration is false, no recalibration will be performed}); - return (); - } - return $self->_run_harold_steps( q{generate_calibration_table}, @args); -} - -=head2 harold_recalibration - -submit the recalibration jobs - -=cut - -sub harold_recalibration { - my ($self, @args) = @_; - return $self->_run_harold_steps( q{generate_recalibrated_bam}, @args); -} - -sub _run_harold_steps { - my ($self, $method, @args) = @_; my $required_job_completion = shift @args; - return $self->_pbcal_obj->$method({required_job_completion => $required_job_completion,}); + return $self->new_with_cloned_attributes(q{npg_pipeline::analysis::illumina_basecall_stats}) + ->generate({required_job_completion => $required_job_completion,}); } =head2 split_bam_by_tag @@ -349,7 +198,8 @@ sub _bam2fastqcheck_and_cached_fastq_command { my $job_name = join q{_}, q{bam2fastqcheck_and_cached_fastq}, $id_run, $timestamp; my $out = $job_name . q{.%I.%J.out}; - $out = File::Spec->catfile($self->make_log_dir($self->pb_cal_path), $out ); + $out = File::Spec->catfile($self->make_log_dir($self->recalibrated_path), $out ); + $job_name = q{'} . $job_name . npg_pipeline::lsf_job->create_array_string( $self->positions()) . q{'}; my $job_sub = q{bsub -q } . $self->lsf_queue() . q{ } . @@ -357,7 +207,7 @@ sub _bam2fastqcheck_and_cached_fastq_command { qq{ $required_job_completion -J $job_name -o $out }; $job_sub .= q{'} . q{generate_cached_fastq --path } . $self->archive_path() . - q{ --file } . $self->pb_cal_path() . q{/} . $id_run . q{_} . $self->lsb_jobindex() . q{.bam} . + q{ --file } . $self->recalibrated_path() . q{/} . $id_run . q{_} . $self->lsb_jobindex() . q{.bam} . q{'}; $self->debug($job_sub); @@ -395,8 +245,6 @@ __END__ =item English -no_match_vars -=item Readonly - =item File::Spec =item List::MoreUtils @@ -413,7 +261,7 @@ Guoying Qi =head1 LICENSE AND COPYRIGHT -Copyright (C) 2014 Genome Research Limited +Copyright (C) 2017 Genome Research Limited This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/lib/npg_pipeline/pluggable/harold/post_qc_review.pm b/lib/npg_pipeline/pluggable/harold/post_qc_review.pm index 4b71d58c4..f1babe94e 100644 --- a/lib/npg_pipeline/pluggable/harold/post_qc_review.pm +++ b/lib/npg_pipeline/pluggable/harold/post_qc_review.pm @@ -26,28 +26,6 @@ Pluggable pipeline module for the post_qc_review pipeline =head1 SUBROUTINES/METHODS -=head2 archive_to_irods - - upload all archival files to irods (used by GCLP only) - -=cut - -sub archive_to_irods { - my ($self, @args) = @_; - - if ($self->no_irods_archival) { - $self->warn(q{Archival to iRODS is switched off.}); - return (); - } - my $required_job_completion = shift @args; - my $ats = $self->new_with_cloned_attributes(q{npg_pipeline::archive::file::to_irods}); - my @job_ids = $ats->submit_to_lsf({ - required_job_completion => $required_job_completion, - }); - - return @job_ids; -} - =head2 archive_to_irods_samplesheet upload all archival files using the samplesheet LIMS driver @@ -225,14 +203,11 @@ sub _update_warehouse_command { my $post_qc_complete = $option and (ref $option eq 'HASH') and $option->{'post_qc_complete'} ? 1 : 0; my $id_run = $self->id_run; - my $command = q[]; + my $command = qq{$loader_name --verbose --id_run $id_run}; if ($loader_name eq 'warehouse_loader') { - # Currently, we need pool library name and link to plexes in SeqQC. - # Therefore, we need to run live. - $command = join q[], map {q[unset ] . $_ . q[;]} npg_pipeline::cache->env_vars; + $command .= q{ --lims_driver_type }; + $command .= $post_qc_complete ? 'ml_warehouse_fc_cache' : 'samplesheet'; } - - $command .= qq{$loader_name --verbose --id_run $id_run}; my $job_name = join q{_}, $loader_name, $id_run, $self->pipeline_name; my $path = $self->make_log_dir($self->recalibrated_path()); my $prereq = q[]; diff --git a/lib/npg_pipeline/roles/business/base.pm b/lib/npg_pipeline/roles/business/base.pm index 3db3a34de..b67445374 100644 --- a/lib/npg_pipeline/roles/business/base.pm +++ b/lib/npg_pipeline/roles/business/base.pm @@ -215,19 +215,6 @@ sub _build_is_hiseqx_run { return $self->run->instrument->name =~ /\AH[XF]/xms; } -=head2 gclp - -Boolean describing whether this analysis is GCLP - -=cut - -has q{gclp} => ( - isa => q{Bool}, - is => q{ro}, - lazy_build => 1, - documentation => q{Boolean describing whether this analysis is GCLP with a default based on the function_list if set}, -); - =head2 positions An array of lane positions for this submission. @@ -253,34 +240,6 @@ sub all_positions { return @position; } -=head2 tile_list - -A string of wildcards for tiles for OLB, defaults to an empty string - -=cut - -has q{tile_list} => (isa => q{Str}, - is => q{ro}, - default => q{}, - documentation => q{string of wildcards for tiles for OLB, defaults to an empty string},); - -=head2 override_all_bustard_options - -Overrides all bustard options (including any given via other options) as a string - it is up to the user to ensure all are correct and given - -=head2 has_override_all_bustard_options - -predicate to ensure that options are available - -=cut - -has q{override_all_bustard_options} => ( - isa => q{Str}, - is => q{ro}, - predicate => q{has_override_all_bustard_options}, - documentation => q{Overrides all bustard options (including any given via other options) as a string - it is up to the user to ensure all are correct and given - i.e. only use if you know what you are doing.}, -); - =head2 repository A custom reference repository root directory. diff --git a/lib/npg_pipeline/roles/business/flag_options.pm b/lib/npg_pipeline/roles/business/flag_options.pm index 529cf5115..39e6be45c 100644 --- a/lib/npg_pipeline/roles/business/flag_options.pm +++ b/lib/npg_pipeline/roles/business/flag_options.pm @@ -40,18 +40,6 @@ has q{no_summary_link} => ( documentation => q{Turn off creating a Latest_Summary link}, ); -=head2 recalibration - -Switches recalibration on, false by default - -=cut - -has q{recalibration} => ( - isa => q{Bool}, - is => q{ro}, - documentation => q{Switches recalibration on, false by default}, -); - =head2 no_fix_config_files flag option to request that config files are not checked and fixed @@ -172,19 +160,6 @@ sub _build_local { return $self->no_bsub ? 1 : 0; } -=head2 olb - -Switches on OLB Bustard preprocessing to generate dif and bcl -files and Bustard XML Summary from cif files; false by default. - -=cut - -has q{olb} => ( - isa => q{Bool}, - is => q{ro}, - documentation => q{Switches on OLB Bustard preprocessing}, -); - =head2 spatial_filter Do we want to use the spatial_filter program? @@ -236,7 +211,7 @@ Andy Brown =head1 LICENSE AND COPYRIGHT -Copyright (C) 2015 Genome Research Ltd +Copyright (C) 2017 Genome Research Ltd This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/lib/npg_pipeline/roles/business/harold_calibration_reqs.pm b/lib/npg_pipeline/roles/business/harold_calibration_reqs.pm deleted file mode 100644 index 551a4317c..000000000 --- a/lib/npg_pipeline/roles/business/harold_calibration_reqs.pm +++ /dev/null @@ -1,229 +0,0 @@ -package npg_pipeline::roles::business::harold_calibration_reqs; - -use Moose::Role; -use English qw{-no_match_vars}; -use Carp; -use Readonly; - -requires qw{directory_exists}; - -our $VERSION = '0'; - -# hard-coded default parameters for running harold_calibration steps, can be overriden on the command line -# whilst these can be overridden, they are requested to be how the pipeline operates, so we don't -# want them in a config file -Readonly::Scalar our $CAL_TABLE_JOB => q{PB_cal_table}; -Readonly::Scalar our $SCORE_JOB => q{PB_cal_score}; -Readonly::Scalar our $ALIGN_JOB => q{PB_cal_align}; -Readonly::Scalar our $PB_DIRECTORY => q{PB_cal}; - -=head1 NAME - -npg_pipeline::roles::business::harold_calibration_reqs - -=head1 SYNOPSIS - - package MyPackage; - use Moose; - ... - with qw{npg_pipeline::roles::business::harold_calibration_reqs}; - -=head1 DESCRIPTION - -This role is designed to be able to apply all the harold calibration variables, including lsf requirements, -for internal running of the harold calibration steps. - -Note, your class must provide the following methods - - 'directory_exists' - -=head1 SUBROUTINES/METHODS -=cut - -has q{random} => (isa => q{Int}, is => q{ro}, lazy_build => 1, - documentation => q{Default from pb_cal_pipeline.ini},); - -sub _build_random { - my ( $self ) = @_; - return $self->pb_cal_pipeline_conf()->{random}; -} - -has q{t_filter} => (isa => q{Int}, is => q{ro}, lazy_build => 1, - documentation => q{t_filter value},); - -sub _build_t_filter { - my ( $self ) = @_; - return $self->pb_cal_pipeline_conf()->{t_filter}; -} - -has q{mem_calibration} => (isa => q{Int}, is => q{ro}, lazy_build => 1, - documentation => q{memory to be used for calibration table creation jobs},); - -sub _build_mem_calibration { - my ( $self ) = @_; - return $self->pb_cal_pipeline_conf()->{mem_calibration}; -} - -has q{mem_score} => (isa => q{Int}, is => q{ro}, lazy_build => 1, - documentation => q{memory to be used for scoring jobs},); - -sub _build_mem_score { - my ( $self ) = @_; - return $self->pb_cal_pipeline_conf()->{mem_score}; -} - -has q{cal_table_job} => (isa => q{Str}, is => q{ro}, lazy_build => 1, - documentation => qq{Default : $CAL_TABLE_JOB},); - -sub _build_cal_table_job { return $CAL_TABLE_JOB; } - -has q{cal_table_script} => (isa => q{Str}, is => q{ro}, lazy_build => 1, - documentation => q{Default from pb_cal_pipeline.ini},); - -sub _build_cal_table_script { - my ( $self ) = @_; - return $self->pb_cal_pipeline_conf()->{cal_table_script}; -} - -has q{align_job} => (isa => q{Str}, is => q{ro}, lazy_build => 1, - documentation => qq{Default : $ALIGN_JOB},); - -sub _build_align_job { return $ALIGN_JOB; } - -has q{alignment_script} => (isa => q{Str}, is => q{ro}, lazy_build => 1, - documentation => q{Default from pb_cal_pipeline.ini},); - -sub _build_alignment_script { - my ( $self ) = @_; - return $self->pb_cal_pipeline_conf()->{alignment_script}; -} - -has q{recalibration_script} => (isa => q{Str}, is => q{ro}, lazy_build => 1, - documentation => q{Default from pb_cal_pipeline.ini},); - -sub _build_recalibration_script { - my ( $self ) = @_; - return $self->pb_cal_pipeline_conf()->{recalibration_script}; -} - -has q{pb_directory} => (isa => q{Str}, is => q{ro}, lazy_build => 1, init_arg => undef); - -sub _build_pb_directory { return $PB_DIRECTORY; } - -has q{score_job} => (isa => q{Str}, is => q{ro}, lazy_build => 1, - documentation => qq{Default : $SCORE_JOB},); - -sub _build_score_job { return $SCORE_JOB; } - -has q{region_size} => (isa => q{Int}, is => q{ro}, lazy_build => 1, - documentation => q{Default in pb_cal_pipeline.ini},); - -sub _build_region_size { - my ( $self ) = @_; - return $self->pb_cal_pipeline_conf()->{region_size}; -} - -=head2 calibration_table_name - -generates the calibration table name expected, requiring the id_run and read to be passes in -if no control lane can be worked out, will return an empty string - - my $sCalibrationTableName = $class->calibration_table_name( $iIdRun, $iRead ); - -=cut - -sub calibration_table_name { - my ($self, $arg_refs ) = @_; - my $id_run = $arg_refs->{id_run}; - if( $arg_refs->{read} ) { - $self->logcroak(q{read is a deprecated argument}); - } - my $position = $arg_refs->{position}; - # set the mode - if( $arg_refs->{mode} ) { - $self->logcroak(q{mode is a deprecated argument}); - } - - if ( ! $position ) { - $self->warn(q{no position obtained}); - return q{}; - } - - return $id_run . q{_} . $position . $self->pb_cal_pipeline_conf()->{cal_table_suffix}; -} - - -=head2 create_pb_calibration_directory - -checks for the existence of a pb_calibration directory and if it doesn't exist, will create it - -returns the path of the pb_calibration directory - -=cut - -sub create_pb_calibration_directory { - my ( $self ) = @_; - - my $pb_cal_dir = $self->pb_cal_path(); - - if ( ! $self->directory_exists( $pb_cal_dir ) ) { - $self->info(qq{Creating $pb_cal_dir}); - - my $output = qx[mkdir $pb_cal_dir]; - if ($CHILD_ERROR) { - $self->logcroak(qq{Unable to create $pb_cal_dir}); - } - - $self->info(qq{Created : $output}); - } - - $self->make_log_dir( $pb_cal_dir ); - - return $pb_cal_dir; -} - -1; -__END__ - -=head1 DIAGNOSTICS - -=head1 CONFIGURATION AND ENVIRONMENT - -=head1 DEPENDENCIES - -=over - -=item Moose::Role - -=item Carp - -=item English -no_match_vars - -=item Readonly - -=back - -=head1 INCOMPATIBILITIES - -=head1 BUGS AND LIMITATIONS - -=head1 AUTHOR - -Andy Brown - -=head1 LICENSE AND COPYRIGHT - -Copyright (C) 2014 Genome Research Ltd - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . diff --git a/t/10-base.t b/t/10-base.t index 71d318372..45f6aaf4d 100644 --- a/t/10-base.t +++ b/t/10-base.t @@ -1,6 +1,6 @@ use strict; use warnings; -use Test::More tests => 91; +use Test::More tests => 82; use Test::Exception; use File::Temp qw(tempdir tempfile); use File::Copy qw(cp); @@ -51,19 +51,11 @@ use_ok(q{npg_pipeline::base}); { my $base = npg_pipeline::base->new(); - - foreach my $config_group ( qw{ - general_values_conf - illumina_pipeline_conf - pb_cal_pipeline_conf - } ) { - isa_ok( $base->$config_group(), q{HASH}, q{$} . qq{base->$config_group} ); - } + isa_ok( $base->general_values_conf(), q{HASH}); } { my $base = npg_pipeline::base->new(); - ok( !$base->gclp, 'function list not set and correctly defaults as not GCLP'); my $path = "${config_dir}/function_list_base.yml"; @@ -79,14 +71,6 @@ use_ok(q{npg_pipeline::base}); $path =~ s/function_list_base/function_list_central/; $base = npg_pipeline::base->new(function_list => $path); is( $base->function_list, $path, 'function list path as given'); - ok(!$base->gclp, 'function list set and correctly identified as not GCLP'); - isa_ok( $base->function_list_conf(), q{ARRAY}, 'function list is read into an array'); - - my $gpath=$path; - $gpath =~ s/function_list_central/function_list_central_gclp/; - $base = npg_pipeline::base->new(function_list => $gpath); - is( $base->function_list, $gpath, 'GCLP function list path as given'); - ok( $base->gclp, 'function list set and correctly identified as GCLP'); isa_ok( $base->function_list_conf(), q{ARRAY}, 'function list is read into an array'); $base = npg_pipeline::base->new(function_list => 'data/config_files/function_list_central.yml'); @@ -251,13 +235,6 @@ package main; my $fl = "${config_dir}/function_list_central_qc_run.yml"; is( $base->function_list, $fl, 'qc function list'); - $base = mytest::central->new(id_flowcell_lims => 3456, gclp => 1); - my $gfl = "${config_dir}/function_list_central_gclp.yml"; - is( $base->function_list, $gfl, 'gclp function list'); - - $base = mytest::central->new(id_flowcell_lims => 3456, function_list => 'gclp'); - is( $base->function_list, $gfl, 'gclp function list'); - $base = npg_pipeline::base->new(id_flowcell_lims => '3980331130775'); my $path = "${config_dir}/function_list_base_qc_run.yml"; throws_ok { $base->function_list } diff --git a/t/10-pluggable_harold_central.t b/t/10-pluggable_harold_central.t index 5f5f0e385..2bf83154a 100644 --- a/t/10-pluggable_harold_central.t +++ b/t/10-pluggable_harold_central.t @@ -1,6 +1,6 @@ use strict; use warnings; -use Test::More tests => 32; +use Test::More tests => 21; use Test::Exception; use Cwd qw/getcwd/; use List::MoreUtils qw/ any none /; @@ -21,7 +21,6 @@ Log::Log4perl->easy_init({layout => '%d %-5p %c - %m%n', file => join(q[/], $tdir, 'logfile'), utf8 => 1}); -local $ENV{TEST_DIR} = $tdir; local $ENV{NPG_WEBSERVICE_CACHE_DIR} = q[t/data]; local $ENV{TEST_FS_RESOURCE} = q{nfs_12}; @@ -76,7 +75,6 @@ my $runfolder_path = $util->analysis_runfolder_path(); qc_genotype qc_verify_bam_id qc_upstream_tags - qc_rna_seqc run_analysis_complete update_ml_warehouse archive_to_irods_samplesheet @@ -86,48 +84,17 @@ my $runfolder_path = $util->analysis_runfolder_path(); is_deeply( $pipeline->function_order() , $expected_function_order, q{Function order correct} ); } -{ - local $ENV{CLASSPATH} = q{t/bin/software/solexa/jars}; - my $pipeline; - lives_ok { - $pipeline = $central->new( - id_run => 1234, - runfolder_path => $runfolder_path, - recalibration => 0, - no_bsub => 1, - spider => 0, - ); - } q{no croak creating new object}; - - ok( !scalar $pipeline->harold_calibration_tables(), q{no calibration tables launched} ); - ok(!$pipeline->olb, 'not olb pipeline'); - lives_ok { $pipeline->prepare() } 'prepare lives'; - ok( $pipeline->illumina_basecall_stats(), q{olb false - illumina_basecall_stats job launched} ); - my $bool = none {$_ =~ /bustard/} @{$pipeline->function_order()}; - ok( $bool, 'bustard functions are out'); - - $pipeline = $central->new( - runfolder_path => $runfolder_path, - no_bsub => 1, - olb => 1, - ); - is ($pipeline->function_list, - abs_path(getcwd() . '/data/config_files/function_list_central_olb.yml'), - 'olb function list'); - $bool = any {$_ =~ /bustard/} @{$pipeline->function_order()}; - ok( $bool, 'bustard functions are in'); -} - { my $pb; lives_ok { $pb = $central->new( - function_order => [qw(qc_qX_yield illumina2bam qc_insert_size)], + function_order => [qw(qc_qX_yield qc_insert_size)], runfolder_path => $runfolder_path, ); } q{no croak on creation}; $util->set_staging_analysis_area({with_latest_summary => 1}); - is(join(q[ ], @{$pb->function_order()}), 'lsf_start qc_qX_yield illumina2bam qc_insert_size lsf_end', 'function_order set on creation'); + is(join(q[ ], @{$pb->function_order()}), 'lsf_start qc_qX_yield qc_insert_size lsf_end', + 'function_order set on creation'); } { @@ -137,7 +104,7 @@ my $runfolder_path = $util->analysis_runfolder_path(); my $pb; $util->set_staging_analysis_area(); my $init = { - function_order => [qw{illumina2bam qc_qX_yield qc_adapter update_warehouse qc_insert_size archive_to_irods}], + function_order => [qw{qc_qX_yield qc_adapter update_warehouse qc_insert_size}], lanes => [4], runfolder_path => $runfolder_path, no_bsub => 1, @@ -148,27 +115,14 @@ my $runfolder_path = $util->analysis_runfolder_path(); lives_ok { $pb = $central->new($init); } q{no croak on new creation}; mkdir $pb->archive_path; mkdir $pb->qc_path; - - throws_ok { $pb->main() } - qr/Error submitting jobs: Can\'t find \'BamAdapterFinder\.jar\' because CLASSPATH is not set/, - q{error running qc->main() when CLASSPATH is not set for illumina2bam job}; - - local $ENV{CLASSPATH} = q[t/bin/software]; - local $ENV{NPG_WEBSERVICE_CACHE_DIR} = q[t/data]; - throws_ok { $pb->main() } - qr/Error submitting jobs: no such file on CLASSPATH: BamAdapterFinder\.jar/, - q{error running qc->main() when CLASSPATH is not set correctly for illumina2bam job}; - - local $ENV{CLASSPATH} = q[t/bin/software/solexa/jars]; local $ENV{NPG_WEBSERVICE_CACHE_DIR} = q[t/data]; - lives_ok { $pb->main() } q{no croak running qc->main() when CLASSPATH is set correctly for illumina2bam job}; + lives_ok { $pb->main() } q{no croak running qc->main()}; my $timestamp = $pb->timestamp; my $recalibrated_path = $pb->recalibrated_path(); my $log_dir = $pb->make_log_dir( $recalibrated_path ); - my $unset_string = 'unset NPG_WEBSERVICE_CACHE_DIR;unset NPG_CACHED_SAMPLESHEET_FILE;'; my $expected_command = q[bsub -q lowload 50 -J warehouse_loader_1234_central ] . - qq[-o $log_dir/warehouse_loader_1234_central_] . $timestamp . - qq[.out '${unset_string}warehouse_loader --verbose --id_run 1234']; + qq[-o $log_dir/warehouse_loader_1234_central_] . $timestamp . q[.out ] . + qq['warehouse_loader --verbose --id_run 1234 --lims_driver_type samplesheet']; is($pb->_update_warehouse_command('warehouse_loader', (50)), $expected_command, 'update warehouse command'); } @@ -189,8 +143,7 @@ mkdir $rf; is ($pb->intensity_path, $rf, 'intensities path is set to runfolder'); is ($pb->basecall_path, $rf, 'basecall path is set to runfolder'); is ($pb->bam_basecall_path, join(q[/],$rf,q{BAM_basecalls_22-May}), 'bam basecall path is created'); - is ($pb->pb_cal_path, join(q[/],$pb->bam_basecall_path, 'no_cal'), 'pb_cal path set'); - is ($pb->recalibrated_path, $pb->pb_cal_path, 'recalibrated directory set'); + is ($pb->recalibrated_path, join(q[/],$pb->bam_basecall_path, 'no_cal'), 'recalibrated path set'); my $status_path = $pb->status_files_path(); is ($status_path, join(q[/],$rf,q{BAM_basecalls_22-May}, q{status}), 'status directory path'); ok(-d $status_path, 'status directory created'); diff --git a/t/10-pluggable_harold_post_qc_review.t b/t/10-pluggable_harold_post_qc_review.t index 40948cea7..1f0d19551 100644 --- a/t/10-pluggable_harold_post_qc_review.t +++ b/t/10-pluggable_harold_post_qc_review.t @@ -34,7 +34,7 @@ use_ok('npg_pipeline::pluggable::harold::post_qc_review'); upload_auto_qc_to_qc_database run_run_archived run_qc_complete - update_warehouse + update_warehouse_post_qc_complete ); my @original = @functions_in_order; unshift @original, 'lsf_start'; @@ -64,10 +64,9 @@ use_ok('npg_pipeline::pluggable::harold::post_qc_review'); my $log_dir_in_outgoing = $log_dir; $log_dir_in_outgoing =~ s{/analysis/}{/outgoing/}smx; my $job_name = 'warehouse_loader_1234_post_qc_review'; - my $unset_string = 'unset NPG_WEBSERVICE_CACHE_DIR;unset NPG_CACHED_SAMPLESHEET_FILE;'; my $prefix = qq[bsub -q lowload 50 -J $job_name ] . qq[-o $log_dir/${job_name}_${timestamp}.out]; - my $command = qq['${unset_string}warehouse_loader --verbose --id_run 1234']; + my $command = q['warehouse_loader --verbose --id_run 1234 --lims_driver_type samplesheet']; is($post_qc_review->_update_warehouse_command('warehouse_loader', (50)), qq[$prefix $command], 'update warehouse command'); @@ -75,6 +74,7 @@ use_ok('npg_pipeline::pluggable::harold::post_qc_review'); $prefix = qq[bsub -q lowload 50 -J $job_name ] . qq[-o $log_dir_in_outgoing/${job_name}_${timestamp}.out]; my $preexec = qq(-E "[ -d '${log_dir_in_outgoing}' ]"); + $command = q['warehouse_loader --verbose --id_run 1234 --lims_driver_type ml_warehouse_fc_cache']; is($post_qc_review->_update_warehouse_command( 'warehouse_loader', (50, {}, {'post_qc_complete' => 1})), join(q[ ],$prefix,$preexec,$command), @@ -106,8 +106,8 @@ use_ok('npg_pipeline::pluggable::harold::post_qc_review'); no_irods_archival => 1, no_warehouse_update => 1, ); - ok(!($p->archive_to_irods() || $p->archive_to_irods_samplesheet() || - $p->archive_to_irods_ml_warehouse()), 'archival to irods switched off'); + ok(!($p->archive_to_irods_samplesheet() || $p->archive_to_irods_ml_warehouse()), + 'archival to irods switched off'); ok(!$p->update_warehouse(), 'update to warehouse switched off'); } @@ -116,7 +116,7 @@ use_ok('npg_pipeline::pluggable::harold::post_qc_review'); runfolder_path => $runfolder_path, local => 1, ); - ok(! ($p->archive_to_irods() || $p->archive_to_irods_samplesheet() || + ok(! ($p->archive_to_irods_samplesheet() || $p->archive_to_irods_ml_warehouse()), 'archival to irods switched off'); ok(!$p->update_warehouse(), 'update to warehouse switched off'); is($p->no_summary_link,1, 'summary_link switched off'); @@ -128,7 +128,7 @@ use_ok('npg_pipeline::pluggable::harold::post_qc_review'); local => 1, no_warehouse_update => 0, ); - ok(!($p->archive_to_irods() || $p->archive_to_irods_samplesheet() || + ok(!($p->archive_to_irods_samplesheet() || $p->archive_to_irods_ml_warehouse()), 'archival to irods switched off'); ok($p->update_warehouse(), 'update to warehouse switched on'); is($p->no_summary_link,1, 'summary_link switched off'); diff --git a/t/15-pipeline_launcher_scripts.t b/t/15-pipeline_launcher_scripts.t index b2bfce25e..35bb47ab3 100644 --- a/t/15-pipeline_launcher_scripts.t +++ b/t/15-pipeline_launcher_scripts.t @@ -1,7 +1,7 @@ use strict; use warnings; use English qw{-no_match_vars}; -use Test::More tests => 12; +use Test::More tests => 8; use Test::Exception; use t::util; use Cwd; @@ -57,16 +57,6 @@ my $bin = $curdir . q[/bin]; q{ran bin/npg_pipeline_post_qc_review}; ok(!$CHILD_ERROR, qq{Return code of $CHILD_ERROR}); - lives_ok { qx{ - $bin/npg_pipeline_post_qc_review --runfolder_path $tmp_dir/nfs/sf45/IL2/analysis/123456_IL2_1234 --gclp}; } - q{ran bin/npg_pipeline_post_qc_review with gclp flag}; - ok(!$CHILD_ERROR, qq{Return code of $CHILD_ERROR}); - - lives_ok { qx{ - $bin/npg_pipeline_post_qc_review --runfolder_path $tmp_dir/nfs/sf45/IL2/analysis/123456_IL2_1234 --function_list gclp}; } - q{ran bin/npg_pipeline_post_qc_review with gclp function list}; - ok(!$CHILD_ERROR, qq{Return code of $CHILD_ERROR}); - lives_ok { qx{ $bin/npg_pipeline_post_qc_review --runfolder_path $tmp_dir/nfs/sf45/IL2/analysis/123456_IL2_1234 --function_list some}; } q{ran bin/npg_pipeline_post_qc_review with non-exisitng function list}; diff --git a/t/20-archive_file-to_irods.t b/t/20-archive_file-to_irods.t index f05aed820..58d85b1da 100644 --- a/t/20-archive_file-to_irods.t +++ b/t/20-archive_file-to_irods.t @@ -1,6 +1,6 @@ use strict; use warnings; -use Test::More tests => 15; +use Test::More tests => 13; use Test::Exception; use t::util; @@ -8,7 +8,6 @@ use_ok('npg_pipeline::archive::file::to_irods'); my $util = t::util->new(); -$ENV{TEST_DIR} = $util->temp_directory(); $ENV{TEST_FS_RESOURCE} = q{nfs_12}; local $ENV{NPG_WEBSERVICE_CACHE_DIR} = q[t/data]; local $ENV{PATH} = join q[:], q[t/bin], q[t/bin/software/solexa/bin], $ENV{PATH}; @@ -20,9 +19,8 @@ my $pb_cal = q[/Data/Intensities/Bustard1.3.4_09-07-2009_auto/PB_cal]; my $pb_cal_path = $analysis_runfolder_path . $pb_cal; sub create_analysis { - `rm -rf $tmp_dir/nfs/sf45`; `mkdir -p $analysis_runfolder_path/$pb_cal/archive`; - `mkdir $analysis_runfolder_path/Config`; + `mkdir -p $analysis_runfolder_path/Config`; `cp t/data/Recipes/Recipe_GA2_37Cycle_PE_v6.1.xml $analysis_runfolder_path/`; `cp t/data/Recipes/TileLayout.xml $analysis_runfolder_path/Config/`; `ln -s $pb_cal $analysis_runfolder_path/Latest_Summary`; @@ -117,26 +115,5 @@ sub create_analysis { is( $bsub_command, $expected_command, q{generated bsub command is correct} ); } -{ - my $bam_irods; - lives_ok { $bam_irods = npg_pipeline::archive::file::to_irods->new( - function_list => q{post_qc_review_gclp}, - run_folder => q{123456_IL2_1234}, - runfolder_path => $analysis_runfolder_path, - id_flowcell_lims => q{1023456789111}, - recalibrated_path => $pb_cal_path, - timestamp => q{20090709-123456}, - verbose => 0, - ); } q{created with run_folder ok}; - - my $arg_refs = { - required_job_completion => q{-w'done(123) && done(321)'}, - }; - my $archive_path = "$pb_cal_path/archive"; - my $bsub_command = $bam_irods ->_generate_bsub_command($arg_refs); - my $expected_command = qq[bsub -q lowload -w'done(123) && done(321)' -J npg_publish_illumina_run.pl_1234_20090709-123456 -R 'rusage[nfs_12=1,seq_irods=15]' -E 'script_must_be_unique_runner -job_name="npg_publish_illumina_run.pl_1234"' -o $pb_cal_path/log/npg_publish_illumina_run.pl_1234_20090709-123456.out 'irodsEnvFile=\$HOME/.irods/.irodsEnv-gclp-iseq npg_publish_illumina_run.pl --archive_path $archive_path --runfolder_path $analysis_runfolder_path --restart_file ${archive_path}/process_publish_\${LSB_JOBID}.json --max_errors 10 --alt_process qc_run --collection /14mg/seq/illumina/run/1234']; - is( $bsub_command, $expected_command, q{generated bsub command is correct} ); -} - 1; __END__ diff --git a/t/20-archive_file_generation-illumina2bam.t b/t/20-archive_file_generation-illumina2bam.t deleted file mode 100644 index c0fba9b92..000000000 --- a/t/20-archive_file_generation-illumina2bam.t +++ /dev/null @@ -1,299 +0,0 @@ -use strict; -use warnings; -use Test::More tests => 33; -use Test::Exception; -use Test::Differences; -use File::Copy; -use File::Path qw(make_path); -use Cwd; -use Log::Log4perl qw(:levels); - -use npg_tracking::util::abs_path qw(abs_path); -use t::util; - -my $util = t::util->new(); -my $dir = $util->temp_directory(); -my $current = abs_path(getcwd()); -$ENV{TEST_DIR} = $dir; -$ENV{TEST_FS_RESOURCE} = q{nfs_12}; -local $ENV{NPG_WEBSERVICE_CACHE_DIR} = 't/data/illumina2bam'; -local $ENV{CLASSPATH} = q[t/bin/software/solexa/jars]; -local $ENV{PATH} = join q[:], q[t/bin], q[t/bin/software/solexa/bin], $ENV{PATH}; -my $jar_path = join q[/], $current, $ENV{CLASSPATH}; - -Log::Log4perl->easy_init({layout => '%d %-5p %c - %m%n', - level => $DEBUG, - file => join(q[/], $dir, 'logfile'), - utf8 => 1}); - -use_ok('npg_pipeline::archive::file::generation::illumina2bam'); - -my $intensities_dir = $util->analysis_runfolder_path() . q{/Data/Intensities}; -my $pbcal_dir = $intensities_dir . q{/BaseCalls}; - -{ - my $new = "$dir/1234_samplesheet.csv"; - copy 't/data/illumina2bam/1234_samplesheet.csv', $new; - local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = $new; - `cp -R t/data/illumina2bam/npg $dir`; - local $ENV{NPG_WEBSERVICE_CACHE_DIR} = $dir; - - $util->create_analysis(); - my $runfolder = $util->analysis_runfolder_path() . '/'; - `cp t/data/runfolder/Data/RunInfo.xml $runfolder`; - - my $bam_generator; - lives_ok { $bam_generator = npg_pipeline::archive::file::generation::illumina2bam->new( - run_folder => q{123456_IL2_1234}, - runfolder_path => $util->analysis_runfolder_path(), - timestamp => q{20090709-123456}, - verbose => 0, - no_bsub => 1, - id_run => 1234, - _extra_tradis_transposon_read => 1, - bam_basecall_path => $pbcal_dir, - ); } q{no croak creating bam_generator object}; - - isa_ok($bam_generator, q{npg_pipeline::archive::file::generation::illumina2bam}, q{$bam_generator}); - is($bam_generator->_extra_tradis_transposon_read, 1, 'TraDIS set'); - $bam_generator->_extra_tradis_transposon_read(0); - is($bam_generator->_extra_tradis_transposon_read, 0, 'TraDIS not set'); - isa_ok($bam_generator->lims, 'st::api::lims', 'cached lims object'); - - my $arg_refs = { - required_job_completion => q{-w'done(123) && done(321)'}, - }; - - my $mem = 4000; - my $cpu = 2; - my $alims = $bam_generator->lims->children_ia; - my $position = 8; - my $bsub_command = $bam_generator->_generate_bsub_commands( $arg_refs , $alims->{$position}, 't/data/taglistfile'); - - is( $bam_generator->_get_number_of_plexes_excluding_control($alims->{$position}), - 1, 'correct number of plexes'); - - my $expected_cmd = q{bsub -q srpipeline -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=4]' -M} . $mem . - q{ -R 'span[hosts=1]' -n} . $cpu . - qq{ -w'done(123) && done(321)' -J 'illumina2bam_1234_8_20090709-123456' -o $pbcal_dir/log/illumina2bam_1234_8_20090709-123456.%J.out /bin/bash -c 'set -o pipefail;java -Xmx1024m -jar $jar_path/Illumina2bam.jar I=$intensities_dir L=8 B=$pbcal_dir RG=1234_8 PU=123456_IL2_1234_8 LIBRARY_NAME="51021" SAMPLE_ALIAS="SRS000147" STUDY_NAME="SRP000031: 1000Genomes Project Pilot 1" OUTPUT=/dev/stdout COMPRESSION_LEVEL=0 | java -Xmx1024m -jar $jar_path/BamIndexDecoder.jar VALIDATION_STRINGENCY=SILENT I=/dev/stdin BARCODE_FILE=t/data/taglistfile METRICS_FILE=$pbcal_dir/1234_8.bam.tag_decode.metrics MAX_NO_CALLS=6 CONVERT_LOW_QUALITY_TO_NO_CALL=true CREATE_MD5_FILE=false OUTPUT=/dev/stdout}; - $expected_cmd .= qq{| tee >(bamseqchksum > $pbcal_dir/1234_8.post_i2b.seqchksum)}; - $expected_cmd .= q{ >(md5sum -b | tr -d '"'"'"'"'"'"'"'"'\n *\-'"'"'"'"'"'"'"'"' > } . qq{$pbcal_dir/1234_8.bam.md5)}; - $expected_cmd .= qq{ > $pbcal_dir/1234_8.bam'}; - - eq_or_diff([split"=",$bsub_command], [split"=",$expected_cmd], 'correct bsub command for lane 8'); - - my @jids; - lives_ok { @jids = $bam_generator->generate($arg_refs); } q{no croak running generate}; - is(scalar @jids, 8, 'correct number of jobs submitted'); - ok(-f "$dir/lane_8.taglist", 'lane 8 tag list file generated'); - foreach my $lane ((1 .. 7)) { - ok(!-e "$dir/lane_$lane.taglist", "lane $lane tag list file does not exist"); - } - - local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = q[]; - local $ENV{NPG_WEBSERVICE_CACHE_DIR} = 't/data'; - - lives_ok { $bam_generator = npg_pipeline::archive::file::generation::illumina2bam->new( - run_folder => q{123456_IL2_1234}, - runfolder_path => $util->analysis_runfolder_path(), - timestamp => q{20090709-123456}, - verbose => 0, - id_run => 8033, - bam_basecall_path => $pbcal_dir, - ); } q{no croak creating bam_generator object}; - - is($bam_generator->_extra_tradis_transposon_read, 1, 'TraDIS set'); - - $arg_refs = { - required_job_completion => q{-w'done(123) && done(321)'}, - }; - - $alims = $bam_generator->lims->children_ia; - throws_ok {$bam_generator->_generate_bsub_commands( $arg_refs , $alims->{$position})} - qr/Tag list file path should be defined/, - 'error when tag file name is missing for a pool'; - - $bsub_command = $bam_generator->_generate_bsub_commands( $arg_refs , $alims->{$position}, 't/data/lanetagfile'); - is( $bam_generator->_get_number_of_plexes_excluding_control($alims->{$position}), - 72, 'correct number of plexes'); - - $expected_cmd = q{bsub -q srpipeline -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=4]' -M} . $mem . - q{ -R 'span[hosts=1]' -n}. $cpu . - qq{ -w'done(123) && done(321)' -J 'illumina2bam_8033_8_20090709-123456' -o $pbcal_dir/log/illumina2bam_8033_8_20090709-123456.%J.out /bin/bash -c 'set -o pipefail;java -Xmx1024m -jar $jar_path/Illumina2bam.jar I=$intensities_dir L=8 B=$pbcal_dirmouse PiggyBac sequencing: sites of PiggyBac integration into mouse genome" SEC_BC_SEQ=BC SEC_BC_QUAL=QT BC_SEQ=tr BC_QUAL=tq OUTPUT=/dev/stdout COMPRESSION_LEVEL=0 | java -Xmx1024m -jar $jar_path/BamIndexDecoder.jar VALIDATION_STRINGENCY=SILENT I=/dev/stdin BARCODE_FILE=t/data/lanetagfile METRICS_FILE=$pbcal_dir/8033_8.bam.tag_decode.metrics CREATE_MD5_FILE=false OUTPUT=/dev/stdout}; - $expected_cmd .= qq{| tee >(bamseqchksum > $pbcal_dir/8033_8.post_i2b.seqchksum)}; - $expected_cmd .= q{ >(md5sum -b | tr -d '"'"'"'"'"'"'"'"'\n *\-'"'"'"'"'"'"'"'"' > } . qq{$pbcal_dir/8033_8.bam.md5)}; - $expected_cmd .= qq{ > $pbcal_dir/8033_8.bam'}; - - eq_or_diff([split"=",$bsub_command], [split"=",$expected_cmd], 'correct bsub command for lane 8'); - -## test of special 3' pulldown RNAseq read 1 index - - lives_ok { $bam_generator = npg_pipeline::archive::file::generation::illumina2bam->new( - run_folder => q{121112_HS20_08797_A_C18TEACXX}, - runfolder_path => $util->analysis_runfolder_path(), - timestamp => q{20121112-123456}, - verbose => 0, - id_run => 8797, - bam_basecall_path => $pbcal_dir, - ); } q{no croak creating bam_generator object for run 8797}; - - $arg_refs = { - required_job_completion => q{-w'done(123) && done(321)'}, - }; - - $alims = $bam_generator->lims->associated_child_lims_ia; - $position = 8; - $bsub_command = $bam_generator->_generate_bsub_commands( $arg_refs , $alims->{$position}, 't/data/lanetagfile'); - - $expected_cmd = q{bsub -q srpipeline -R 'select[mem>}.$mem.q{] rusage[mem=} . $mem . q{,nfs_12=4]' -M} . $mem . - q{ -R 'span[hosts=1]' -n} . $cpu . - qq{ -w'done(123) && done(321)' -J 'illumina2bam_8797_8_20121112-123456' -o $pbcal_dir/log/illumina2bam_8797_8_20121112-123456.%J.out /bin/bash -c 'set -o pipefail;java -Xmx1024m -jar $jar_path/Illumina2bam.jar I=$intensities_dir L=8 B=$pbcal_dir RG=8797_8 PU=121112_HS20_08797_A_C18TEACXX_8 LIBRARY_NAME="6045465" SAMPLE_ALIAS="ERS181250,ERS181251,ERS181252,ERS181253,ERS181254,ERS181255" STUDY_NAME="ERP001656: Total RNA was extracted from morpholically abnormal and sibling wild type embryos identified by the Zebrafish Mutation Project (http://www.sanger.ac.uk/Projects/D_rerio/zmp/). The 3prime end of fragmented RNA was pulled down using polyToligos attached to magnetic beads, reverse transcribed, made into Illumina libraries and sequenced using IlluminaHiSeq paired-end sequencing. Protocol: Total RNA was extracted from mouse embryos using Trizol and DNase treated. Chemically fragmented RNA was enriched for the 3prime ends by pulled down using an anchored polyToligo attached to magnetic beads. An RNA oligo comprising part of the Illumina adapter 2 was ligated to the 5prime end of the captured RNA and the RNA was eluted from the beads. Reverse transcription was primed with an anchored polyToligo with part of Illumina adapter 1 at the 5prime end followed by 4 random bases, then an A, C or G base, then one of twelve5 base indexing tags and 14 T bases. An Illumina library with full adapter sequence was produced by 15 cycles of PCR. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/" FIRST_INDEX=6 FINAL_INDEX=10 FIRST_INDEX=1 FINAL_INDEX=5 SEC_BC_SEQ=br SEC_BC_QUAL=qr BC_READ=1 SEC_BC_READ=1 FIRST=11 FINAL=50 OUTPUT=/dev/stdout COMPRESSION_LEVEL=0 | java -Xmx1024m -jar $jar_path/BamIndexDecoder.jar VALIDATION_STRINGENCY=SILENT I=/dev/stdin BARCODE_FILE=t/data/lanetagfile METRICS_FILE=$pbcal_dir/8797_8.bam.tag_decode.metrics CREATE_MD5_FILE=false OUTPUT=/dev/stdout}; - $expected_cmd .= qq{| tee >(bamseqchksum > $pbcal_dir/8797_8.post_i2b.seqchksum)}; - $expected_cmd .= q{ >(md5sum -b | tr -d '"'"'"'"'"'"'"'"'\n *\-'"'"'"'"'"'"'"'"' > } . qq{$pbcal_dir/8797_8.bam.md5)}; - $expected_cmd .= qq{ > $pbcal_dir/8797_8.bam'}; - eq_or_diff([split"=",$bsub_command], [split"=",$expected_cmd], 'correct bsub command for run 8797 lane 8, special "jecfoo" read1 index'); -} - -{ ## adapter detection - local $ENV{NPG_WEBSERVICE_CACHE_DIR} = 't/data'; - my $rf = join q[/], $dir, q[131010_HS34_11018_B_H722AADXX]; - my $bc = join q[/], $rf, q[Data/Intensities/BaseCalls]; - my $i = join q[/], $rf, q[Data/Intensities]; - make_path $bc; - copy q[t/data/example_runfolder/131010_HS34_11018_B_H722AADXX/RunInfo.xml], $rf; - - my $bam_generator; - lives_ok { $bam_generator = npg_pipeline::archive::file::generation::illumina2bam->new( - runfolder_path => $rf, - is_indexed => 0, - verbose => 0, - timestamp => q{20131028-155757}, - bam_basecall_path => $bc, - ); } q{no croak creating bam_generator object for run 11018}; - - my $alims = $bam_generator->lims->associated_child_lims_ia; - my $position = 1; - my $arg_refs = { - required_job_completion => q{-w'done(123) && done(321)'}, - }; - - my $mem = $bam_generator->general_values_conf()->{illumina2bam_memory}; - my $cpu = $bam_generator->general_values_conf()->{illumina2bam_cpu}; - my $bsub_command = $bam_generator->_generate_bsub_commands( $arg_refs , $alims->{$position}); - #$bsub_command = $util->drop_temp_part_from_paths( $bsub_command ); - - my $expected_cmd = q{bsub -q srpipeline -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=4]' -M}.$mem.q{ -R 'span[hosts=1]' -n} . $cpu . q{ -w'done(123) && done(321)' -J 'illumina2bam_11018_1_20131028-155757' -o } . $bc . q{/log/illumina2bam_11018_1_20131028-155757.%J.out /bin/bash -c 'set -o pipefail;java -Xmx1024m -jar } . qq{$jar_path/Illumina2bam.jar I=} . qq{$i L=1 B=$bc RG=11018_1 PU=131010_HS34_11018_B_H722AADXX_1 LIBRARY_NAME="8314075" SAMPLE_ALIAS="ERS333055,ERS333070,ERS333072,ERS333073,ERS333076,ERS333077" STUDY_NAME="ERP000730: llumina sequencing of various Plasmodium species is being carried out for de novo assembly and comparative genomics. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/" OUTPUT=/dev/stdout COMPRESSION_LEVEL=0 | bamadapterfind md5=1 md5filename=$bc/11018_1.bam.md5}; - $expected_cmd .= qq{| tee >(bamseqchksum > $bc/11018_1.post_i2b.seqchksum)}; - $expected_cmd .= qq{ > $bc/11018_1.bam'}; - - eq_or_diff([split"=",$bsub_command], [split"=",$expected_cmd], 'correct bsub command for lane 1 (with adapter detection)'); -} - -{ ## more testing of special 3' pulldown RNAseq - local $ENV{NPG_WEBSERVICE_CACHE_DIR} = 't/data'; - - my $rf = join q[/], $dir, q[121103_HS29_08747_B_C1BV5ACXX]; - my $bc = join q[/], $rf, q[Data/Intensities/BaseCalls]; - my $i = join q[/], $rf, q[Data/Intensities]; - make_path $bc; - copy q[t/data/example_runfolder/121103_HS29_08747_B_C1BV5ACXX/RunInfo.xml], $rf; - - my $bam_generator; - lives_ok { $bam_generator = npg_pipeline::archive::file::generation::illumina2bam->new( - runfolder_path => $rf, - timestamp => q{20121112-123456}, - bam_basecall_path => $bc, - verbose => 0, - ); } q{no croak creating bam_generator object for run 8747}; - - my $alims = $bam_generator->lims->associated_child_lims_ia; - my $position = 4; - my $arg_refs = { - required_job_completion => q{-w'done(123) && done(321)'}, - }; - - my $mem = 4000; - my $cpu = 2; - my $bsub_command = $bam_generator->_generate_bsub_commands( $arg_refs , $alims->{$position}, 't/data/lanetagfile'); - - my $expected_cmd = q{bsub -q srpipeline -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=4]' -M}.$mem.q{ -R 'span[hosts=1]' -n}.$cpu. q{ -w'done(123) && done(321)' -J 'illumina2bam_8747_4_20121112-123456' -o } . $bc . q{/log/illumina2bam_8747_4_20121112-123456.%J.out /bin/bash -c 'set -o pipefail;java -Xmx1024m -jar } . qq{$jar_path/Illumina2bam.jar I=} . $i . q{ L=4 B=} . $bc . q{ RG=8747_4 PU=121103_HS29_08747_B_C1BV5ACXX_4 LIBRARY_NAME="6101244" SAMPLE_ALIAS="ERS183138,ERS183139,ERS183140,ERS183141,ERS183142,ERS183143" STUDY_NAME="ERP001559: Total RNA was extracted from wild type and mutant zebrafish embryos. Double stranded cDNA representing the 3'"'"'"'"'"'"'"'"' ends of transcripts was made by a variety of methods, including polyT priming and 3'"'"'"'"'"'"'"'"' pull down on magentic beads. Some samples included indexing test experiments where a sequence barcode was placed within one of the sequence reads.. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/" FIRST_INDEX=5 FINAL_INDEX=10 FIRST_INDEX=1 FINAL_INDEX=4 SEC_BC_SEQ=br SEC_BC_QUAL=qr BC_READ=1 SEC_BC_READ=1 FIRST=11 FINAL=75 FIRST=84 FINAL=158 OUTPUT=/dev/stdout COMPRESSION_LEVEL=0 | java -Xmx1024m -jar } . qq{$jar_path/BamIndexDecoder.jar VALIDATION_STRINGENCY=SILENT I=/dev/stdin BARCODE_FILE=t/data/lanetagfile METRICS_FILE=} . $bc . q{/8747_4.bam.tag_decode.metrics CREATE_MD5_FILE=false OUTPUT=/dev/stdout}; - $expected_cmd .= qq{| tee >(bamseqchksum > $bc/8747_4.post_i2b.seqchksum)}; - $expected_cmd .= q{ >(md5sum -b | tr -d '"'"'"'"'"'"'"'"'\n *\-'"'"'"'"'"'"'"'"' > } . qq{$bc/8747_4.bam.md5)}; - $expected_cmd .= qq{ > $bc/8747_4.bam'}; - - eq_or_diff([split"=",$bsub_command], [split"=",$expected_cmd], 'correct bsub command for lane 4 of 3 prime pulldown'); -} - -{ ## more testing of special 3' pulldown RNAseq for non-standard inline index - local $ENV{NPG_WEBSERVICE_CACHE_DIR} = 't/data'; - - my $rf = join q[/], $dir, q[130917_MS6_10808_A_MS2030455-300V2]; - my $bc = join q[/], $rf, q[Data/Intensities/BaseCalls]; - my $i = join q[/], $rf, q[Data/Intensities]; - make_path $bc; - copy q[t/data/example_runfolder/130917_MS6_10808_A_MS2030455-300V2/RunInfo.xml], $rf; - - my $bam_generator; - lives_ok { $bam_generator = npg_pipeline::archive::file::generation::illumina2bam->new( - runfolder_path => $rf, - timestamp => q{20130919-132702}, - bam_basecall_path => $bc, - verbose => 0, - ); } q{no croak creating bam_generator object for run 10808}; - - my $alims = $bam_generator->lims->associated_child_lims_ia; - my $position = 1; - my $arg_refs = { - required_job_completion => q{-w'done(123) && done(321)'}, - }; - - my $mem = 4000; - my $cpu = 2; - my $bsub_command = $bam_generator->_generate_bsub_commands( $arg_refs , $alims->{$position}, 't/data/lanetagfile'); - - my $expected_cmd = q{bsub -q srpipeline -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=4]' -M}.$mem.q{ -R 'span[hosts=1]' -n}.$cpu. q{ -w'done(123) && done(321)' -J 'illumina2bam_10808_1_20130919-132702' -o } . $bc . q{/log/illumina2bam_10808_1_20130919-132702.%J.out /bin/bash -c 'set -o pipefail;java -Xmx1024m -jar } . qq{$jar_path/Illumina2bam.jar I=} . qq{$i L=1 B=$bc} . q{ RG=10808_1 PU=130917_MS6_10808_A_MS2030455-300V2_1 LIBRARY_NAME="8115659" SAMPLE_ALIAS="single_cell_1,single_cell_2,single_cell_3,single_cell_4" STUDY_NAME="Transcriptome profiling protocol development: Various test protocols to improve the 3'"'"'"'"'"'"'"'"' pull down transcript profiling protocol, aiming to produce a pipeline library prep protocol. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ " FIRST=1 FINAL=150 FIRST_INDEX=168 FINAL_INDEX=172 FIRST_INDEX=156 FINAL_INDEX=167 SEC_BC_SEQ=br SEC_BC_QUAL=qr BC_READ=2 SEC_BC_READ=2 FIRST=173 FINAL=305 OUTPUT=/dev/stdout COMPRESSION_LEVEL=0 | java -Xmx1024m -jar } . qq{$jar_path/BamIndexDecoder.jar VALIDATION_STRINGENCY=SILENT I=/dev/stdin BARCODE_FILE=t/data/lanetagfile METRICS_FILE=} . $bc . q{/10808_1.bam.tag_decode.metrics CREATE_MD5_FILE=false OUTPUT=/dev/stdout}; - $expected_cmd .= qq{| tee >(bamseqchksum > $bc/10808_1.post_i2b.seqchksum)}; - $expected_cmd .= q{ >(md5sum -b | tr -d '"'"'"'"'"'"'"'"'\n *\-'"'"'"'"'"'"'"'"' > } . qq{$bc/10808_1.bam.md5)}; - $expected_cmd .= qq{ > $bc/10808_1.bam'}; - - eq_or_diff([split"=",$bsub_command], [split"=",$expected_cmd], 'correct bsub command for lane 1 of 3 prime pulldown'); -} - -{ ## test of un-equal read lengths - local $ENV{NPG_WEBSERVICE_CACHE_DIR} = 't/data'; - - my $rf = join q[/], $dir, q[131021_MS5_11123_A_MS2000187-150V3]; - my $bc = join q[/], $rf, q[Data/Intensities/BaseCalls]; - my $i = join q[/], $rf, q[Data/Intensities]; - make_path $bc; - copy q[t/data/example_runfolder/131021_MS5_11123_A_MS2000187-150V3/RunInfo.xml], $rf; - - my $bam_generator; - lives_ok { $bam_generator = npg_pipeline::archive::file::generation::illumina2bam->new( - runfolder_path => $rf, - is_indexed => 0, - timestamp => q{20131022-114117}, - bam_basecall_path => $bc, - verbose => 0, - ); } q{no croak creating bam_generator object for run 1123}; - - my $alims = $bam_generator->lims->associated_child_lims_ia; - my $position = 1; - my $arg_refs = { - required_job_completion => q{-w'done(123) && done(321)'}, - }; - - my $mem = $bam_generator->general_values_conf()->{illumina2bam_memory}; - my $cpu = $bam_generator->general_values_conf()->{illumina2bam_cpu}; - my $bsub_command = $bam_generator->_generate_bsub_commands( $arg_refs , $alims->{$position}); - - my $expected_cmd = q{bsub -q srpipeline -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=4]' -M}. $mem. q{ -R 'span[hosts=1]' -n}.$cpu. q{ -w'done(123) && done(321)' -J 'illumina2bam_11123_1_20131022-114117' -o } . $bc . q{/log/illumina2bam_11123_1_20131022-114117.%J.out /bin/bash -c 'set -o pipefail;java -Xmx1024m -jar } . qq{$jar_path/Illumina2bam.jar } . qq{I=$i L=1 B=$bc} . q{ RG=11123_1 PU=131021_MS5_11123_A_MS2000187-150V3_1 LIBRARY_NAME="8111702" SAMPLE_ALIAS="arg404,arg405,arg406,arg407,arg408,arg409,arg410,arg411,arg412,arg413,arg414,arg415,arg416,arg417,arg418,arg419,arg420,arg421,arg422,arg423,arg424,arg425" STUDY_NAME="ERP001151: Data obtained from the sequencing of pools of barcoded P. berghei transgenics is predicted to allow for qualitative and quantitative measurements of individual mutant progeny generated during multiplex transfections. This type of analysis is expected to take P. berghei reverse genetics beyond that of the single-gene level. It aims to explore genetic interactions by measuring the effect on growth rates caused by simultaneous disruption of different genes in diverse genetic backgrounds, as well as potentially becoming a tool to identify essential genes to be prioritised as e.g. potential drug targets, or conversely to be excluded from future gene disruption studies. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/" CREATE_MD5_FILE=false OUTPUT=/dev/stdout}; - $expected_cmd .= qq{| tee >(bamseqchksum > $bc/11123_1.post_i2b.seqchksum)}; - $expected_cmd .= q{ >(md5sum -b | tr -d '"'"'"'"'"'"'"'"'\n *\-'"'"'"'"'"'"'"'"' > } . qq{$bc/11123_1.bam.md5)}; - $expected_cmd .= qq{ > $bc/11123_1.bam'}; - - eq_or_diff([split"=",$bsub_command], [split"=",$expected_cmd], 'correct bsub command for run with un-equal read lengths'); -} - -1; -__END__ diff --git a/t/20-archive_file_generation-seq_alignment.t b/t/20-archive_file_generation-seq_alignment.t index d18cfe85d..531c001ab 100644 --- a/t/20-archive_file_generation-seq_alignment.t +++ b/t/20-archive_file_generation-seq_alignment.t @@ -3,6 +3,7 @@ use warnings; use Test::More tests => 11; use Test::Exception; use Test::Deep; +use Test::Warn; use File::Temp qw/tempdir/; use Cwd qw/cwd abs_path/; use Perl6::Slurp; @@ -10,6 +11,8 @@ use File::Copy; use Log::Log4perl qw(:levels); use JSON; +use st::api::lims; + use_ok('npg_pipeline::archive::file::generation::seq_alignment'); local $ENV{'NPG_WEBSERVICE_CACHE_DIR'} = q[t/data/rna_seq]; local $ENV{'TEST_FS_RESOURCE'} = 'nfs-sf3'; @@ -102,12 +105,14 @@ subtest 'test 1' => sub { my $qc_in = $dir . q[/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/lane4]; my $qc_out = join q[/], $qc_in, q[qc]; + my $qc_report_dir = $dir . q[/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/qc/rna_seqc/12597_4/12597_4#3]; my $args = {}; $args->{'40003'} = qq{bash -c '\ mkdir -p $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#3 ; cd $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#3 && vtfp.pl -param_vals $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/lane4/12597_4#3_p4s2_pv_in.json -export_param_vals 12597_4#3_p4s2_pv_out_\${LSB_JOBID}.json -keys cfgdatadir -vals \$(dirname \$(readlink -f \$(which vtfp.pl)))/../data/vtlib/ -keys aligner_numthreads -vals `npg_pipeline_job_env_to_threads` -keys br_numthreads_val -vals `npg_pipeline_job_env_to_threads --exclude 1 --divide 2` -keys b2c_mt_val -vals `npg_pipeline_job_env_to_threads --exclude 2 --divide 2` -prune_nodes '"'"'fop.*samtools_stats_F0.*00_bait.*'"'"' \$(dirname \$(dirname \$(readlink -f \$(which vtfp.pl))))/data/vtlib/alignment_wtsi_stage2_template.json > run_12597_4#3.json && viv.pl -s -x -v 3 -o viv_12597_4#3.log run_12597_4#3.json } . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --tag_index 3} . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --subset phix --tag_index 3} . q{ && qc --check alignment_filter_metrics --id_run 12597 --position 4 --qc_in $PWD --qc_out } .$qc_out.q{ --tag_index 3}. + qq{ && qc --check rna_seqc --id_run 12597 --position 4 --qc_in $qc_in --qc_out } . $qc_out . q{ --tag_index 3}. q{ '}; $args->{'40000'} = qq{bash -c '\ mkdir -p $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 ; cd $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 && vtfp.pl -param_vals $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/lane4/12597_4#0_p4s2_pv_in.json -export_param_vals 12597_4#0_p4s2_pv_out_\${LSB_JOBID}.json -keys cfgdatadir -vals \$(dirname \$(readlink -f \$(which vtfp.pl)))/../data/vtlib/ -keys aligner_numthreads -vals `npg_pipeline_job_env_to_threads` -keys br_numthreads_val -vals `npg_pipeline_job_env_to_threads --exclude 1 --divide 2` -keys b2c_mt_val -vals `npg_pipeline_job_env_to_threads --exclude 2 --divide 2` -prune_nodes '"'"'fop.*samtools_stats_F0.*00_bait.*'"'"' \$(dirname \$(dirname \$(readlink -f \$(which vtfp.pl))))/data/vtlib/alignment_wtsi_stage2_template.json > run_12597_4#0.json && viv.pl -s -x -v 3 -o viv_12597_4#0.log run_12597_4#0.json } . @@ -142,6 +147,7 @@ subtest 'test 1' => sub { qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --tag_index 3} . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --subset phix --tag_index 3} . q{ && qc --check alignment_filter_metrics --id_run 12597 --position 4 --qc_in $PWD --qc_out } .$qc_out.q{ --tag_index 3}. + qq{ && qc --check rna_seqc --id_run 12597 --position 4 --qc_in $qc_in --qc_out } . $qc_out . q{ --tag_index 3}. qq( '","40000":"bash -c ' mkdir -p $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 ; cd $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/archive/tmp_\$LSB_JOBID/12597_4#0 && vtfp.pl -param_vals $dir/140409_HS34_12597_A_C333TACXX/Data/Intensities/BAM_basecalls_20140515-073611/no_cal/lane4/12597_4#0_p4s2_pv_in.json -export_param_vals 12597_4#0_p4s2_pv_out_\${LSB_JOBID}.json -keys cfgdatadir -vals \$(dirname \$(readlink -f \$(which vtfp.pl)))/../data/vtlib/ -keys aligner_numthreads -vals `npg_pipeline_job_env_to_threads` -keys br_numthreads_val -vals `npg_pipeline_job_env_to_threads --exclude 1 --divide 2` -keys b2c_mt_val -vals `npg_pipeline_job_env_to_threads --exclude 2 --divide 2` -prune_nodes '\\"'\\"'fop.*samtools_stats_F0.*00_bait.*'\\"'\\"' \$(dirname \$(dirname \$(readlink -f \$(which vtfp.pl))))/data/vtlib/alignment_wtsi_stage2_template.json > run_12597_4#0.json && viv.pl -s -x -v 3 -o viv_12597_4#0.log run_12597_4#0.json ) . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --tag_index 0} . qq{ && qc --check bam_flagstats --id_run 12597 --position 4 --qc_in $qc_in --qc_out $qc_out --subset phix --tag_index 0} . @@ -265,11 +271,13 @@ subtest 'test 2' => sub { my $qc_in = qq{$dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive}; my $qc_out = join q[/], $qc_in, q[qc]; + my $qc_report_dir = qq[$dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive/qc/rna_seqc/13066_8]; my $args = {}; $args->{8} = qq{bash -c ' mkdir -p $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive/tmp_\$LSB_JOBID/13066_8 ; cd $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/archive/tmp_\$LSB_JOBID/13066_8 && vtfp.pl -param_vals $dir/140529_HS18_13066_A_C3C3KACXX/Data/Intensities/BAM_basecalls_20140606-133530/no_cal/13066_8_p4s2_pv_in.json -export_param_vals 13066_8_p4s2_pv_out_\${LSB_JOBID}.json -keys cfgdatadir -vals \$(dirname \$(readlink -f \$(which vtfp.pl)))/../data/vtlib/ -keys aligner_numthreads -vals `npg_pipeline_job_env_to_threads` -keys br_numthreads_val -vals `npg_pipeline_job_env_to_threads --exclude 1 --divide 2` -keys b2c_mt_val -vals `npg_pipeline_job_env_to_threads --exclude 2 --divide 2` -prune_nodes '"'"'fop.*samtools_stats_F0.*00_bait.*'"'"' \$(dirname \$(dirname \$(readlink -f \$(which vtfp.pl))))/data/vtlib/alignment_wtsi_stage2_template.json > run_13066_8.json && viv.pl -s -x -v 3 -o viv_13066_8.log run_13066_8.json } . qq{ && qc --check bam_flagstats --id_run 13066 --position 8 --qc_in $qc_in --qc_out $qc_out} . qq{ && qc --check bam_flagstats --id_run 13066 --position 8 --qc_in $qc_in --qc_out $qc_out --subset phix} . - q{ && qc --check alignment_filter_metrics --id_run 13066 --position 8 --qc_in $PWD --qc_out } . $qc_out . q{ '}; + q{ && qc --check alignment_filter_metrics --id_run 13066 --position 8 --qc_in $PWD --qc_out } . $qc_out . + qq{ && qc --check rna_seqc --id_run 13066 --position 8 --qc_in $qc_in --qc_out } . $qc_out . q{ '}; lives_ok {$rna_gen->_generate_command_arguments([8])} 'no error generating command arguments'; @@ -327,7 +335,7 @@ subtest 'test 3' => sub { }; subtest 'test 4' => sub { - plan tests => 5; + plan tests => 8; ##HiSeqX, run 16839_7 my $ref_dir = join q[/],$dir,'references','Homo_sapiens','GRCh38_full_analysis_set_plus_decoy_hla','all'; @@ -342,7 +350,8 @@ subtest 'test 4' => sub { my $cache_dir = join q[/], $runfolder_path, 'Data/Intensities/BAM_basecalls_20150712-121006/metadata_cache_16839'; `mkdir -p $cache_dir`; copy("t/data/hiseqx/16839_RunInfo.xml","$runfolder_path/RunInfo.xml") or die "Copy failed: $!"; #to get information that it is paired end - `touch $ref_dir/fasta/Homo_sapiens.GRCh38_full_analysis_set_plus_decoy_hla.fa`; + my $fasta_ref = "$ref_dir/fasta/Homo_sapiens.GRCh38_full_analysis_set_plus_decoy_hla.fa"; + `touch $fasta_ref`; `touch $ref_dir/picard/Homo_sapiens.GRCh38_full_analysis_set_plus_decoy_hla.fa.dict`; `touch $ref_dir/bwa0_6/Homo_sapiens.GRCh38_full_analysis_set_plus_decoy_hla.fa.alt`; `touch $ref_dir/bwa0_6/Homo_sapiens.GRCh38_full_analysis_set_plus_decoy_hla.fa.amb`; @@ -352,7 +361,7 @@ subtest 'test 4' => sub { `touch $ref_dir/bwa0_6/Homo_sapiens.GRCh38_full_analysis_set_plus_decoy_hla.fa.sa`; local $ENV{'NPG_WEBSERVICE_CACHE_DIR'} = q[t/data/hiseqx]; - local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = q[t/data/hiseqx/samplesheet_16839.csv]; + local $ENV{'NPG_CACHED_SAMPLESHEET_FILE'} = q[t/data/hiseqx/samplesheet_16839.csv]; my $hsx_gen; lives_ok { @@ -367,6 +376,29 @@ subtest 'test 4' => sub { } 'no error creating an object'; is ($hsx_gen->id_run, 16839, 'id_run inferred correctly'); + my $l = st::api::lims->new(id_run => 16839, position => 1, tag_index => 0); + is ($hsx_gen->_ref($l, 'fasta'), $fasta_ref, 'reference for tag zero'); + my $old_ss = $ENV{'NPG_CACHED_SAMPLESHEET_FILE'}; + my $ss = slurp $old_ss; + $ss =~ s/GRCh38_full_analysis_set_plus_decoy_hla/GRCh38X/; + my $new_ss = "$dir/multiref_samplesheet_16839.csv"; + open my $fhss, '>', $new_ss or die "Cannot open $new_ss for writing"; + print $fhss $ss or die "Cannot write to $new_ss"; + close $fhss or warn "Failed to close $new_ss"; + # new samplesheet has miltiple references in lane 1 + local $ENV{'NPG_CACHED_SAMPLESHEET_FILE'} = $new_ss; + my $other_ref_dir = join q[/],$dir,'references','Homo_sapiens','GRCh38X','all'; + `mkdir -p $other_ref_dir/fasta`; + `touch $other_ref_dir/fasta/Homo_sapiens.GRCh38X.fa`; + $l = st::api::lims->new(id_run => 16839, position => 1, tag_index => 0); + my $other_ref; + warnings_exist { $other_ref = $hsx_gen->_ref($l, 'fasta') } + qr/Multiple references for st::api::lims object, driver - samplesheet/, + 'warning about multiple references'; + is ($other_ref, undef, 'multiple references in a lane - no reference for tag zero returned'); + + # restore old samplesheet + local $ENV{'NPG_CACHED_SAMPLESHEET_FILE'} = $old_ss; my $qc_in = qq{$dir/150709_HX4_16839_A_H7MHWCCXX/Data/Intensities/BAM_basecalls_20150712-121006/no_cal/archive/lane7}; my $qc_out = qq{$qc_in/qc}; my $args = {}; diff --git a/t/20-archive_logs.t b/t/20-archive_logs.t index ff16ad1cb..c35d27fba 100644 --- a/t/20-archive_logs.t +++ b/t/20-archive_logs.t @@ -1,6 +1,6 @@ use strict; use warnings; -use Test::More tests => 13; +use Test::More tests => 11; use Test::Exception; use t::util; @@ -21,7 +21,6 @@ my $pb_cal_path = $analysis_runfolder_path . $pb_cal; my $rfpath = '/nfs/sf45/IL2/outgoing/123456_IL2_1234'; sub create_analysis { - `rm -rf $tmp_dir/nfs/sf45`; `mkdir -p $analysis_runfolder_path/$pb_cal/archive`; `mkdir $analysis_runfolder_path/Config`; `cp t/data/Recipes/Recipe_GA2_37Cycle_PE_v6.1.xml $analysis_runfolder_path/`; @@ -89,26 +88,5 @@ sub create_analysis { is( $bsub_command, $expected_command, q{generated bsub command is correct} ); } -{ - my $bam_irods; - - lives_ok { $bam_irods = npg_pipeline::archive::file::logs->new( - function_list => q{post_qc_review_gclp}, - run_folder => q{123456_IL2_1234}, - runfolder_path => $analysis_runfolder_path, - recalibrated_path => $pb_cal_path, - timestamp => q{20090709-123456}, - verbose => 0, - ); } q{created with run_folder ok}; - - my $arg_refs = { - required_job_completion => q{-w'done(123) && done(321)'}, - }; - - my $bsub_command = $util->drop_temp_part_from_paths( $bam_irods ->_generate_bsub_command($arg_refs) ); - my $expected_command = qq{bsub -q lowload -w'done(123) && done(321)' -J npg_irods_log_loader.pl_1234_20090709-123456 -R 'rusage[nfs_12=1,seq_irods=15]' -o ${rfpath}/Data/Intensities/Bustard1.3.4_09-07-2009_auto/PB_cal/log/npg_irods_log_loader.pl_1234_20090709-123456.out -E "[ -d '$rfpath' ]" 'irodsEnvFile=\$HOME/.irods/.irodsEnv-gclp-iseq-logs npg_irods_log_loader.pl --runfolder_path $rfpath --id_run 1234 --irods_root /gseq/'}; - is( $bsub_command, $expected_command, q{generated bsub command is correct} ); -} - 1; __END__ diff --git a/t/20-archive_qc.t b/t/20-archive_qc.t index 83617ef15..e9e47b090 100644 --- a/t/20-archive_qc.t +++ b/t/20-archive_qc.t @@ -10,18 +10,14 @@ use File::Slurp; use Cwd; use t::util; -local $ENV{PATH} = join q[:], q[t/bin], q[t/bin/software/solexa/bin], $ENV{PATH}; - use_ok('npg_pipeline::archive::file::qc'); my $util = t::util->new(); my $tmp = $util->temp_directory(); -$ENV{TEST_DIR} = $tmp; $ENV{TEST_FS_RESOURCE} = q{nfs_12}; local $ENV{NPG_WEBSERVICE_CACHE_DIR} = q[t/data]; local $ENV{PATH} = join q[:], q[t/bin], q[t/bin/software/solexa/bin], $ENV{PATH}; -my $run_folder = $util->default_runfolder(); my $pbcal = q{/nfs/sf45/IL2/analysis/123456_IL2_1234/Data/Intensities/Bustard1.3.4_09-07-2009_auto/PB_cal}; my $recalibrated = $util->analysis_runfolder_path() . q{/Data/Intensities/Bustard1.3.4_09-07-2009_auto/PB_cal}; @@ -32,7 +28,6 @@ $arg_refs->{'required_job_completion'} = $job_dep;; { throws_ok { npg_pipeline::archive::file::qc->new( - run_folder => $run_folder, runfolder_path => $util->analysis_runfolder_path(), recalibrated_path => $recalibrated, ) @@ -43,7 +38,6 @@ $arg_refs->{'required_job_completion'} = $job_dep;; my $aqc; lives_ok { $aqc = npg_pipeline::archive::file::qc->new( - run_folder => $run_folder, runfolder_path => $util->analysis_runfolder_path(), recalibrated_path => $recalibrated, qc_to_run => q{adapter}, @@ -68,7 +62,6 @@ $arg_refs->{'required_job_completion'} = $job_dep;; my $aqc; lives_ok { $aqc = npg_pipeline::archive::file::qc->new( - run_folder => $run_folder, runfolder_path => $util->analysis_runfolder_path(), recalibrated_path => $recalibrated, qc_to_run => q{qX_yield}, @@ -90,7 +83,6 @@ $arg_refs->{'required_job_completion'} = $job_dep;; { my $aqc = npg_pipeline::archive::file::qc->new( - run_folder => $run_folder, runfolder_path => $util->analysis_runfolder_path(), recalibrated_path => $recalibrated, qc_to_run => q{qX_yield}, @@ -117,7 +109,6 @@ $arg_refs->{'required_job_completion'} = $job_dep;; my $runfolder_path = $util->analysis_runfolder_path(); my $aqc = npg_pipeline::archive::file::qc->new( - run_folder => $run_folder, runfolder_path => $runfolder_path, recalibrated_path => $recalibrated, lanes => [7], @@ -132,7 +123,6 @@ $arg_refs->{'required_job_completion'} = $job_dep;; local $ENV{NPG_WEBSERVICE_CACHE_DIR} = q[]; local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = 't/data/qc/1234_samplesheet_amended.csv'; $aqc = npg_pipeline::archive::file::qc->new( - run_folder => $run_folder, runfolder_path => $runfolder_path, recalibrated_path => $recalibrated, lanes => [8], @@ -145,7 +135,6 @@ $arg_refs->{'required_job_completion'} = $job_dep;; is(scalar@jids, 2, q{2 job ids returned}); # the lane is a pool $aqc = npg_pipeline::archive::file::qc->new( - run_folder => $run_folder, runfolder_path => $runfolder_path, recalibrated_path => $recalibrated, lanes => [8], @@ -166,7 +155,6 @@ $arg_refs->{'required_job_completion'} = $job_dep;; my $runfolder_path = $util->analysis_runfolder_path(); my $aqc = npg_pipeline::archive::file::qc->new( - run_folder => $run_folder, runfolder_path => $runfolder_path, recalibrated_path => $recalibrated, lanes => [7], @@ -184,7 +172,6 @@ $arg_refs->{'required_job_completion'} = $job_dep;; $aqc = npg_pipeline::archive::file::qc->new( id_run => 14353, - run_folder => $run_folder, runfolder_path => $util->analysis_runfolder_path(), recalibrated_path => $recalibrated, lanes => [1], @@ -276,7 +263,6 @@ $arg_refs->{'required_job_completion'} = $job_dep;; my $init = { id_run => 14043, - run_folder => $rf_name, runfolder_path => $rf_path, bam_basecall_path => $analysis_dir, archive_path => $archive_dir, diff --git a/t/25-analysis-bustard4pbcb.t b/t/25-analysis-bustard4pbcb.t deleted file mode 100644 index cc7490772..000000000 --- a/t/25-analysis-bustard4pbcb.t +++ /dev/null @@ -1,141 +0,0 @@ -use strict; -use warnings; -use Test::More tests => 18; -use Test::Exception; -use Log::Log4perl qw(:levels); -use t::util; - -my $util = t::util->new(); -my $tmp_dir = $util->temp_directory(); - -Log::Log4perl->easy_init({layout => '%d %-5p %c - %m%n', - level => $DEBUG, - file => join(q[/], $tmp_dir, 'logfile'), - utf8 => 1}); - -$ENV{TEST_DIR} = $tmp_dir; -$ENV{TEST_FS_RESOURCE} = q{nfs_12}; -local $ENV{NPG_WEBSERVICE_CACHE_DIR} = q[t/data]; -local $ENV{PATH} = join q[:], q[t/bin], q[t/bin/software/solexa/bin], $ENV{PATH}; -my $mem_units = 'MB'; - -use_ok(q{npg_pipeline::analysis::bustard4pbcb}); - -my $runfolder_path = $util->analysis_runfolder_path(); -my $bustard_home = qq{$runfolder_path/Data/Intensities}; -my $bustard_rta = qq{$bustard_home/Bustard_RTA}; -my $config_path = qq{$runfolder_path/Config}; - -my $req_job_completion = q{-w'done(123) && done(321)'}; - -sub set_staging_analysis_area { - `rm -rf $tmp_dir/nfs/sf45`; - `mkdir -p $bustard_home`; - `mkdir $config_path`; - return 1; -} - -{ - set_staging_analysis_area(); - my $bustard; - lives_ok { - $bustard = npg_pipeline::analysis::bustard4pbcb->new( - id_run => 1234, - bustard_home => $bustard_home, - ); - } q{no croak creating new object with id_run and bustard_home attributes}; - isa_ok($bustard, q{npg_pipeline::analysis::bustard4pbcb}, q{$bustard}); - - require "npg_pipeline/pluggable.pm"; - lives_ok { - $bustard = npg_pipeline::analysis::bustard4pbcb->new( - pipeline => npg_pipeline::pluggable->new(id_run=>1), - id_run => 1234, - bustard_home => $bustard_home, - bustard_dir => join ($bustard_home, 'BUSTARD_NPG'), - ); - } q{no croak creating new object with pipeline, id_run, bustard_home and bustard_dir attributes}; - is($bustard->script_path, '/software/solexa/src/OLB-1.9.4/bin/bustard.py', 'live bustard script path'); - - throws_ok { - npg_pipeline::analysis::bustard4pbcb->new( - pipeline => $util, - id_run => 1234, - bustard_home => $bustard_home, - bustard_dir => join ($bustard_home, 'BUSTARD_NPG'), - )} qr/Validation failed for 'NpgPipelinePluggableObject'/, 'error when pipeline object has wrong type'; -} - -{ - my $bustard = npg_pipeline::analysis::bustard4pbcb->new( - pipeline => npg_pipeline::pluggable->new(), - id_run => 1234, - bustard_home => $bustard_home, - bustard_dir => join ($bustard_home, 'BUSTARD_NPG'), - timestamp => '20091028-101635', - script_path => '/bin/true', - ); - - my $lsf_index_string = $bustard->lsb_jobindex(); - - my $expected_cmd = qq{LOGNAME=101635 /bin/true --make --CIF --keep-dif-files --no-eamss --phasing=lane --matrix=lane --tiles=s_1,s_2,s_3,s_4,s_5,s_6,s_7,s_8 $bustard_home > $bustard_home/bustard_output_20091028-101635.txt 2>&1}; - is( $bustard->_bustard_command(), $expected_cmd, q{bustard command}); - - my $mem = 13800; - my $mem_limit = npg_pipeline::lsf_job->new(memory => $mem, memory_units =>$mem_units)->_scale_mem_limit(); - $expected_cmd = q{bsub -n 8,16 -q srpipeline -o log/bustard_basecalls_all_1234_20091028-101635.%J.out -J bustard_basecalls_all_1234_20091028-101635 -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=8]' -M}.$mem_limit.q{ -R 'span[hosts=1]' -w'done(123) && done(321)' 'make -j `npg_pipeline_job_env_to_threads` all'}; - is( $bustard->_make_command('basecalls_all', $req_job_completion), $expected_cmd, q{command for basecalls all generated correctly}); - - $expected_cmd = q{bsub -n 8,16 -q srpipeline -o log/bustard_basecalls_lanes_1234_20091028-101635.%I.%J.out -J bustard_basecalls_lanes_1234_20091028-101635[1,2,3,4,5,6,7,8] -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=8]' -M}.$mem_limit.q{ -R 'span[hosts=1]' -w'done(123) && done(321)' 'make -j `npg_pipeline_job_env_to_threads` s_} . $lsf_index_string . q{'}; - is( $bustard->_make_command('basecalls_lanes', $req_job_completion), $expected_cmd, q{command for basecall lanes generated correctly}); - - $bustard = npg_pipeline::analysis::bustard4pbcb->new( - pipeline => npg_pipeline::pluggable->new(), - id_run => 1234, - bustard_home => $bustard_home, - bustard_dir => join ($bustard_home, 'BUSTARD_NPG'), - timestamp => '20091028-101635', - script_path => 'bustard_script', - lanes => [1,3,5], - ); - - $expected_cmd = qq{LOGNAME=101635 bustard_script --make --CIF --keep-dif-files --no-eamss --phasing=lane --matrix=lane --tiles=s_1,s_3,s_5 $bustard_home > $bustard_home/bustard_output_20091028-101635.txt 2>&1}; - is( $bustard->_bustard_command(), $expected_cmd, q{bustard command}); - - $expected_cmd = q{bsub -n 8,16 -q srpipeline -o log/bustard_matrix_lanes_1234_20091028-101635.%I.%J.out -J bustard_matrix_lanes_1234_20091028-101635[1,3,5] -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=8]' -M}.$mem_limit.q{ -R 'span[hosts=1]' 'make -j `npg_pipeline_job_env_to_threads` matrix_`echo $LSB_JOBINDEX`_finished.txt'}; - is ($bustard->_make_command('matrix_lanes'), $expected_cmd, 'matrix lane command'); - - $expected_cmd = q{bsub -n 8,16 -q srpipeline -o log/bustard_phasing_all_1234_20091028-101635.%J.out -J bustard_phasing_all_1234_20091028-101635 -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=8]' -M}.$mem_limit.q{ -R 'span[hosts=1]' 'make -j `npg_pipeline_job_env_to_threads` phasing_finished.txt'}; - is ($bustard->_make_command('phasing_all'), $expected_cmd, 'phasing all command'); -} - -{ - my $bustard = npg_pipeline::analysis::bustard4pbcb->new( - id_run => 1234, - bustard_home => $bustard_home, - timestamp => '20091028-101635', - script_path => 'none', - ); - throws_ok { $bustard->bustard_dir } qr/ not found/, 'error when bustard command not found'; - - $bustard = npg_pipeline::analysis::bustard4pbcb->new( - id_run => 1234, - bustard_home => $bustard_home, - script_path => '/bin/true', - ); - throws_ok { $bustard->bustard_dir } qr/No bustard output in/, 'error when bustard output file is empty'; - - $bustard = npg_pipeline::analysis::bustard4pbcb->new( - id_run => 1234, - bustard_home => $bustard_home, - script_path => '/bin/true', - ); - throws_ok { $bustard->_get_bustard_dir(qw/one two three/) } qr/No record about bustard directory/, 'error when bustard output file does not contain the bustard directory name'; - - my $dir; - lives_ok { $dir = $bustard->_get_bustard_dir('one', 'Sequence folder: folder', 'three')} 'parsing bustard output lives'; - is ($dir, 'folder', 'correct bustard directory extracted'); - is($bustard->_get_bustard_dir('one', 'Sequence folder:folder', 'three'), undef, 'undef returned if line format is wrong'); -} - -1; diff --git a/t/25-analysis-illumina_basecall_stats.t b/t/25-analysis-illumina_basecall_stats.t new file mode 100644 index 000000000..3ee5e3610 --- /dev/null +++ b/t/25-analysis-illumina_basecall_stats.t @@ -0,0 +1,70 @@ +use strict; +use warnings; +use Test::More tests => 4; +use Test::Exception; +use Cwd; +use Log::Log4perl qw(:levels); + +use npg_tracking::util::abs_path qw(abs_path); +use t::util; + +my $util = t::util->new(); + +my $curdir = abs_path(getcwd()); +my $tdir = $util->temp_directory(); + +local $ENV{NPG_WEBSERVICE_CACHE_DIR} = $curdir . q{/t/data}; +local $ENV{TEST_FS_RESOURCE} = q{nfs_12}; + +Log::Log4perl->easy_init({layout => '%d %-5p %c - %m%n', + level => $DEBUG, + file => join(q[/], $tdir, 'logfile'), + utf8 => 1}); + +my $e = join q[/], $tdir, 'setupBclToQseq.py'; +open my $fh, '>', $e; +print "#!/usr/bin/env python\n"; +close $fh; +chmod 0755, $e; + +local $ENV{PATH} = join q[:], qq[$curdir/t/bin], $tdir, $ENV{PATH}; + +use_ok(q{npg_pipeline::analysis::illumina_basecall_stats}); + +{ + my $runfolder_path = $util->analysis_runfolder_path(); + my $bustard_rta = qq{$runfolder_path/Data/Intensities/Bustard_RTA}; + `mkdir -p $bustard_rta`; + + my $obj; + my $id_run = 1234; + my $bam_basecall_path = $runfolder_path . q{/Data/Intensities/BAM_basecalls}; + my $basecall_path = $runfolder_path . q{/Data/Intensities/BaseCalls}; + lives_ok { + $obj = npg_pipeline::analysis::illumina_basecall_stats->new({ + id_run => $id_run, + run_folder => q{123456_IL2_1234}, + runfolder_path => $runfolder_path, + timestamp => q{20091028-101635}, + verbose => 0, + bam_basecall_path => $bam_basecall_path, + no_bsub => 1, + }) + } q{create object ok}; + + my $arg_refs = { + timestamp => q{20091028-101635}, + position => 1, + job_dependencies => q{-w 'done(1234) && done(4321)'}, + }; + my $mem = 350; + my $mem_limit = npg_pipeline::lsf_job->new(memory => $mem, memory_units =>'MB')->_scale_mem_limit(); + my $expected_command = qq(bsub -q srpipeline -o $bam_basecall_path/log/basecall_stats_1234_20091028-101635.%J.out -J basecall_stats_1234_20091028-101635 -R 'select[mem>).$mem.q{] rusage[mem=}.$mem.q{,nfs_12=4]' -M} . $mem_limit . qq( -R 'span[hosts=1]' -n 4 " cd $bam_basecall_path && if [[ -f Makefile ]]; then echo Makefile already present 1>&2; else echo creating bcl2qseq Makefile 1>&2; ) . qq($tdir/setupBclToQseq.py -b $basecall_path -o $bam_basecall_path --overwrite; fi && make -j 4 Matrix Phasing && make -j 4 BustardSummary.x{s,m}l "); + is( $obj->_generate_command( $arg_refs ), $expected_command, + q{Illumina basecalls stats generation bsub command is correct} ); + + my @job_ids = $obj->generate($arg_refs); + is( scalar @job_ids, 1, q{1 job ids, generate Illumina basecall stats} ); +} + +1; diff --git a/t/25-harold_calibration_bam.t b/t/25-harold_calibration_bam.t deleted file mode 100644 index 857d22197..000000000 --- a/t/25-harold_calibration_bam.t +++ /dev/null @@ -1,335 +0,0 @@ -use strict; -use warnings; -use Test::More tests => 39; -use Test::Exception; -use t::util; -use Cwd; -use Log::Log4perl qw(:levels); - -use npg_tracking::util::abs_path qw(abs_path); -my $util = t::util->new(); - -my $curdir = abs_path(getcwd()); -my $repos = join q[/], $curdir, 't/data/sequence'; - -my $tdir = $util->temp_directory(); -$ENV{TEST_DIR} = $tdir; -$ENV{TEST_FS_RESOURCE} = q{nfs_12}; -$ENV{NPG_WEBSERVICE_CACHE_DIR} = $curdir . q{/t/data}; - -Log::Log4perl->easy_init({layout => '%d %-5p %c - %m%n', - level => $DEBUG, - file => join(q[/], $tdir, 'logfile'), - utf8 => 1}); - -my $sp = join q[/], $tdir, 'spatial_filter'; -my $java = join q[/], $tdir, 'java'; -foreach my $tool (($sp, $java)) { - `touch $tool`; - `chmod +x $tool`; -} -local $ENV{PATH} = join q[:], qq[$curdir/t/bin], $tdir, $ENV{PATH}; -local $ENV{CLASSPATH} = q{t/bin/software/solexa/jars}; - -my $id_run; -my $mem_units = 'MB'; - -use_ok(q{npg_pipeline::analysis::harold_calibration_bam}); - -my $runfolder_path = $util->analysis_runfolder_path(); -my $bustard_home = qq{$runfolder_path/Data/Intensities}; -my $bustard_rta = qq{$bustard_home/Bustard_RTA}; -my $gerald_rta = qq{$bustard_rta/GERALD_RTA}; -my $config_path = qq{$runfolder_path/Config}; - -sub set_staging_analysis_area { - `rm -rf /tmp/nfs/sf45`; - `mkdir -p $bustard_rta`; - `mkdir -p $config_path`; - `cp t/data/Recipes/Recipe_GA2_37Cycle_PE_v6.1.xml $runfolder_path/`; - `cp t/data/Recipes/TileLayout.xml $config_path/`; - return 1; -} - -{ - set_staging_analysis_area(); - my $harold; - lives_ok { - $harold = npg_pipeline::analysis::harold_calibration_bam->new({ - id_run => 1234, - run_folder => q{123456_IL2_1234}, - runfolder_path => $runfolder_path, - timestamp => q{20091028-101635}, - verbose => 0, - repository => $repos, - no_bsub => 1, - recalibration => 1, - }); - } q{create $harold object ok}; - - isa_ok($harold, q{npg_pipeline::analysis::harold_calibration_bam}, q{$harold}); - is($harold->pb_calibration_bin, $tdir, 'pb calibration bin is correct'); - is($harold->spatial_filter_path, $sp, 'spatial filter path is correct'); -} - -{ - set_staging_analysis_area(); - my $harold; - lives_ok { - $harold = npg_pipeline::analysis::harold_calibration_bam->new({ - id_run => 1234, - run_folder => q{123456_IL2_1234}, - runfolder_path => $runfolder_path, - timestamp => q{20091028-101635}, - verbose => 0, - repository => $repos, - dif_files_path => $bustard_home, - spatial_filter => 1, - no_bsub => 1, - recalibration => 1, - force_phix_split => 0, - }); - } q{create $harold object ok}; - - my $req_job_completion = q{-w'done(123) && done(321)'}; - my $arg_refs = { - required_job_completion => $req_job_completion, - }; - - my @job_ids; - my $mem = 3072; - my $mem_limit = npg_pipeline::lsf_job->new(memory => $mem, memory_units =>$mem_units)->_scale_mem_limit(); - my $job = qq{bsub -q srpipeline -E 'npg_pipeline_preexec_references --repository $curdir/t/data/sequence' -o $bustard_rta/PB_cal/log/PB_cal_table_1234_4_20091028-101635.%J.out -J PB_cal_table_1234_4_20091028-101635 -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=8]' -M}.$mem_limit.qq{ -R 'span[hosts=1]' -w'done(123) && done(321)' " cd $bustard_rta/PB_cal && $tdir/pb_calibration --intensity_dir $bustard_home --t_filter 2 --prefix 1234_4 --cstart 1 --bam pb_align_1234_4.bam "}; - lives_ok { - @job_ids = $harold->generate_calibration_table( $arg_refs ); - } q{no croak submitting calibration table jobs}; - - is( scalar @job_ids, 1, q{8 jobs created}); - - is( $harold->_calibration_table_bsub_command( { - dir => $bustard_rta, - position => 4, - job_dependencies => $req_job_completion, - ref_seq => q{phix-illumina.fa}, - } ), $job, q{generated bsub command is correct} ); - - my $cal_table = q{1234_4_purity_cycle_caltable.txt}; - is( $harold->calibration_table_name( { id_run => 1234, position=>4 } ), $cal_table, q{generated calibration table name is correct}); - $mem = 1725; - $mem_limit = npg_pipeline::lsf_job->new(memory => $mem, memory_units =>$mem_units)->_scale_mem_limit(); - $job = qq{bsub -q srpipeline -o $bustard_rta/PB_cal/log/PB_cal_score_1234_3_20091028-101635.%J.out -J PB_cal_score_1234_3_20091028-101635 -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=8]' -M}.$mem_limit.qq{ -R 'span[hosts=1]' -w'done(123) && done(321)' ' cd $bustard_rta/PB_cal && bash -c '"'"'if [[ -f pb_align_1234_3.bam ]]; then echo phix alignment so merging alignments with 1>&2; set -o pipefail; (if [ -f 1234_4_purity_cycle_caltable.txt ]; then echo recalibrated qvals 1>&2; $tdir/pb_predictor --u --bam ../1234_3.bam --intensity_dir $bustard_home --cstart 1 --ct 1234_4_purity_cycle_caltable.txt ; else echo no recalibration 1>&2; cat ../1234_3.bam ; fi;) | ( if [[ -f pb_align_1234_3.bam.filter ]]; then echo applying spatial filter 1>&2; $sp -u -a -f -F pb_align_1234_3.bam.filter - 2> >( tee /dev/stderr | qc --check spatial_filter --id_run 1234 --position 3 --qc_out $bustard_home/Bustard_RTA/PB_cal/archive/qc ); else echo no spatial filter 1>&2; cat; fi;) | $java -Xmx1024m -jar $curdir/t/bin/software/solexa/jars/BamMerger.jar CREATE_MD5_FILE=true VALIDATION_STRINGENCY=SILENT KEEP=true I=/dev/stdin REPLACE_QUAL=true O=1234_3.bam ALIGNED=pb_align_1234_3.bam; else echo symlinking as no phix alignment 1>&2; rm -f 1234_3.bam; ln -s ../1234_3.bam 1234_3.bam; rm -f 1234_3.bam.md5; ln -s ../1234_3.bam.md5 1234_3.bam.md5; fi'"'"' '}; - my $expect_job = $harold->_recalibration_bsub_command( { - position => 3, - job_dependencies => $req_job_completion, - ct => $cal_table, - } ); - is($expect_job, $job, q{generated bsub command for recalibration job is correct}); - - lives_ok { - @job_ids = $harold->generate_recalibrated_bam($arg_refs); - } q{no croak submitting recalibration jobs}; - is( scalar @job_ids, 8, q{8 jobs created}); -} - -{ - set_staging_analysis_area(); - my $harold; - lives_ok { - $harold = npg_pipeline::analysis::harold_calibration_bam->new({ - id_run => 8797, - run_folder => q{121112_HS20_08797_A_C18TEACXX}, - runfolder_path => $runfolder_path, - timestamp => q{20121112-123456}, - verbose => 0, - repository => $repos, - dif_files_path => $bustard_home, - spatial_filter => 1, - no_bsub => 1, - recalibration => 1, - }); - } q{create $harold object ok}; - - my $req_job_completion = q{-w'done(123) && done(321)'}; - my $arg_refs = { - required_job_completion => $req_job_completion, - }; - - my @job_ids; - my $mem = 3072; - my $mem_limit = npg_pipeline::lsf_job->new(memory => $mem, memory_units =>$mem_units)->_scale_mem_limit(); - my $job = qq{bsub -q srpipeline -E 'npg_pipeline_preexec_references --repository $curdir/t/data/sequence' -o $bustard_rta/PB_cal/log/PB_cal_table_8797_8_20121112-123456.%J.out -J PB_cal_table_8797_8_20121112-123456 -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=8]' -M}.$mem_limit.qq{ -R 'span[hosts=1]' -w'done(123) && done(321)' " cd $bustard_rta/PB_cal && $tdir/pb_calibration --intensity_dir $bustard_home --t_filter 2 --prefix 8797_8 --cstart 11 --bam pb_align_8797_8.bam "}; - - lives_ok { - @job_ids = $harold->generate_calibration_table( $arg_refs ); - } q{no croak submitting calibration table jobs}; - - is( scalar @job_ids, 8, q{8 jobs created}); - - is( $harold->_calibration_table_bsub_command( { - dir => $bustard_rta, - position => 8, - job_dependencies => $req_job_completion, - ref_seq => q{phix-illumina.fa}, - } ), $job, q{generated bsub command is correct} ); - - my $cal_table = q{8797_7_purity_cycle_caltable.txt}; - is( $harold->calibration_table_name( { id_run => 8797, position=>7 } ), $cal_table, q{generated calibration table name is correct}); - $mem = 1725; - $mem_limit = npg_pipeline::lsf_job->new(memory => $mem, memory_units =>$mem_units)->_scale_mem_limit(); - $job = qq{bsub -q srpipeline -o $bustard_rta/PB_cal/log/PB_cal_score_8797_7_20121112-123456.%J.out -J PB_cal_score_8797_7_20121112-123456 -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=8]' -M}.$mem_limit.qq{ -R 'span[hosts=1]' -w'done(123) && done(321)' ' cd $bustard_rta/PB_cal && bash -c '"'"'if [[ -f pb_align_8797_7.bam ]]; then echo phix alignment so merging alignments with 1>&2; set -o pipefail; (if [ -f 8797_7_purity_cycle_caltable.txt ]; then echo recalibrated qvals 1>&2; $tdir/pb_predictor --u --bam ../8797_7.bam --intensity_dir $bustard_home --cstart 11 --ct 8797_7_purity_cycle_caltable.txt ; else echo no recalibration 1>&2; cat ../8797_7.bam ; fi;) | ( if [[ -f pb_align_8797_7.bam.filter ]]; then echo applying spatial filter 1>&2; $sp -u -a -f -F pb_align_8797_7.bam.filter - 2> >( tee /dev/stderr | qc --check spatial_filter --id_run 8797 --position 7 --qc_out $bustard_home/Bustard_RTA/PB_cal/archive/qc ); else echo no spatial filter 1>&2; cat; fi;) | $java -Xmx1024m -jar $curdir/t/bin/software/solexa/jars/BamMerger.jar CREATE_MD5_FILE=true VALIDATION_STRINGENCY=SILENT KEEP=true I=/dev/stdin REPLACE_QUAL=true O=8797_7.bam ALIGNED=pb_align_8797_7.bam; else echo symlinking as no phix alignment 1>&2; rm -f 8797_7.bam; ln -s ../8797_7.bam 8797_7.bam; rm -f 8797_7.bam.md5; ln -s ../8797_7.bam.md5 8797_7.bam.md5; fi'"'"' '} ; - my $expect_job = $harold->_recalibration_bsub_command( { - position => 7, - job_dependencies => $req_job_completion, - ct => $cal_table, - } ); - is($expect_job, $job, q{generated bsub command for recalibration job is correct}); - - lives_ok { - @job_ids = $harold->generate_recalibrated_bam($arg_refs); - } q{no croak submitting recalibration jobs}; - is( scalar @job_ids, 8, q{8 jobs created}); -} - -{ - set_staging_analysis_area(); - my $harold; - $id_run = 4846; - lives_ok { - $harold = npg_pipeline::analysis::harold_calibration_bam->new({ - id_run => $id_run, - run_folder => q{123456_IL2_1234}, - runfolder_path => $runfolder_path, - timestamp => q{20091028-101635}, - verbose => 0, - repository => $repos, - bam_basecall_path => $runfolder_path . q{/Data/Intensities/BaseCalls}, - no_bsub => 1, - recalibration => 1, - force_phix_split => 0, - }); - } q{create $harold object ok}; - - isa_ok($harold, q{npg_pipeline::analysis::harold_calibration_bam}, q{$harold}); - - my @job_ids = $harold->generate_alignment_files({}); - is( scalar @job_ids, 0, q{no job ids for alignment as no spiked phix lane} ); - - @job_ids = $harold->generate_calibration_table({}); - is( scalar @job_ids, 0, q{no job ids for calibration table as no spiked phix lane} ); - - @job_ids = $harold->generate_recalibrated_bam({}); - is( scalar @job_ids, 8, q{8 job ids for recalibration even if no spiked phix lane} ); -} - -{ - set_staging_analysis_area(); - my $harold; - $id_run = 4846; - lives_ok { - $harold = npg_pipeline::analysis::harold_calibration_bam->new({ - id_run => $id_run, - run_folder => q{123456_IL2_1234}, - runfolder_path => $runfolder_path, - timestamp => q{20091028-101635}, - verbose => 0, - repository => $repos, - bam_basecall_path => $runfolder_path . q{/Data/Intensities/BaseCalls}, - no_bsub => 1, - recalibration => 1, - force_phix_split => 1, - }); - } q{create $harold object ok}; - - isa_ok($harold, q{npg_pipeline::analysis::harold_calibration_bam}, q{$harold}); - - my @job_ids = $harold->generate_alignment_files({}); - is( scalar @job_ids, 8, q{8 job ids for alignment as no spiked phix lane but force phix split} ); - - @job_ids = $harold->generate_calibration_table({}); - is( scalar @job_ids, 8, q{8 job ids for calibration table as no spiked phix lane but force phix split} ); - - @job_ids = $harold->generate_recalibrated_bam({}); - is( scalar @job_ids, 8, q{8 job ids for recalibration even if no spiked phix lane but force phix split} ); -} - -{ - set_staging_analysis_area(); - my $harold; - $id_run = 1234; - lives_ok { - $harold = npg_pipeline::analysis::harold_calibration_bam->new({ - id_run => $id_run, - run_folder => q{123456_IL2_1234}, - runfolder_path => $runfolder_path, - timestamp => q{20091028-101635}, - verbose => 0, - repository => $repos, - dif_files_path => $runfolder_path . q{/Data/Intensities}, - bam_basecall_path => $runfolder_path . q{/Data/Intensities/BaseCalls}, - no_bsub => 1, - spatial_filter => 1, - recalibration => 1, - force_phix_split => 0, - }); - } q{create $harold object ok}; - - my $arg_refs = { - timestamp => q{20091028-101635}, - position => 1, - job_dependencies => q{-w 'done(1234) && done(4321)'}, - ref_seq => q{t/data/sequence/references/Human/default/all/bwa/someref.fa.bwt}, - }; - - my $mem = 16000; - my $mem_limit = npg_pipeline::lsf_job->new(memory => $mem, memory_units =>$mem_units)->_scale_mem_limit(); - my $single_read_alignment_command = qq{bsub -q srpipeline -E 'npg_pipeline_preexec_references --repository $curdir/t/data/sequence' -o $bustard_rta/PB_cal/}.q{log/PB_cal_align_1234_1_20091028-101635.%J.out -J PB_cal_align_1234_1_20091028-101635 -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=4]' -M}.$mem_limit.q{ -R 'span[hosts=1]' -n 6,12 -w 'done(1234) && done(4321)' '} . qq{cd $bustard_rta/PB_cal && $tdir} . q{/pb_align --aln_parms "-t "`npg_pipeline_job_env_to_threads` --sam_parms "-t "`npg_pipeline_job_env_to_threads --maximum 8` --spatial_filter --sf_parms "--region_size 200 --region_mismatch_threshold 0.016 --region_insertion_threshold 0.016 --region_deletion_threshold 0.016 --tileviz } . $bustard_home . q{/Bustard_RTA/PB_cal/archive/qc/tileviz/1234_1 " } . qq{--bam_join_jar $curdir/t/bin/software/solexa/jars/BamMerger.jar} . q{ --ref t/data/sequence/references/Human/default/all/bwa/someref.fa.bwt --read 0 --bam } . $bustard_home . q{/BaseCalls/1234_1.bam --prefix pb_align_1234_1 --pf_filter'}; - my $paired_read_alignment_command = qq{bsub -q srpipeline -E 'npg_pipeline_preexec_references --repository $curdir/t/data/sequence' -o $bustard_rta/PB_cal/log/PB_cal_align_1234_1_20091028-101635.%J.out -J PB_cal_align_1234_1_20091028-101635 -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=4]' -M}.$mem_limit.q{ -R 'span[hosts=1]' -n 6,12 -w 'done(1234) && done(4321)' '} . qq{cd $bustard_rta/PB_cal && $tdir} . q{/pb_align --aln_parms "-t "`npg_pipeline_job_env_to_threads` --sam_parms "-t "`npg_pipeline_job_env_to_threads --maximum 8` --spatial_filter --sf_parms "--region_size 200 --region_mismatch_threshold 0.016 --region_insertion_threshold 0.016 --region_deletion_threshold 0.016 --tileviz } . $bustard_home . q{/Bustard_RTA/PB_cal/archive/qc/tileviz/1234_1 " } . qq{--bam_join_jar $curdir/t/bin/software/solexa/jars/BamMerger.jar} . qq{ --ref t/data/sequence/references/Human/default/all/bwa/someref.fa.bwt --read1 1 --read2 2 --bam $bustard_home/BaseCalls/1234_1.bam --prefix pb_align_1234_1 --pf_filter'}; - my $spiked_read_alignment_command = qq{bsub -q srpipeline -E 'npg_pipeline_preexec_references --repository $curdir/t/data/sequence' -o $bustard_rta/PB_cal/log/PB_cal_align_1234_1_20091028-101635.%J.out -J PB_cal_align_1234_1_20091028-101635 -R 'select[mem>}.$mem.q{] rusage[mem=}.$mem.q{,nfs_12=4]' -M}.$mem_limit.q{ -R 'span[hosts=1]' -n 6,12 -w 'done(1234) && done(4321)' '} . qq{cd $bustard_rta/PB_cal && $tdir} . q{/pb_align --aln_parms "-t "`npg_pipeline_job_env_to_threads` --sam_parms "-t "`npg_pipeline_job_env_to_threads --maximum 8` --spatial_filter --sf_parms "--region_size 200 --region_mismatch_threshold 0.016 --region_insertion_threshold 0.016 --region_deletion_threshold 0.016 --tileviz } . $bustard_home . q{/Bustard_RTA/PB_cal/archive/qc/tileviz/1234_1 " } . qq{--bam_join_jar $curdir/t/bin/software/solexa/jars/BamMerger.jar} . qq{ --ref t/data/sequence/references/PhiX/default/all/fasta/phix-illumina.fa --read1 1 --read2 2 --bam $bustard_home/BaseCalls/1234_1.bam --prefix pb_align_1234_1 --pf_filter'}; - - is( $harold->_alignment_file_bsub_command( $arg_refs ), $single_read_alignment_command, q{single read alignment bsub command is correct} ); - - $arg_refs->{is_paired} = 1; - is( $harold->_alignment_file_bsub_command( $arg_refs ), $paired_read_alignment_command, q{paired read alignment bsub command is correct} ); - - $arg_refs->{is_spiked_phix} = 1; - $arg_refs->{ref_seq} = q{t/data/sequence/references/PhiX/default/all/fasta/phix-illumina.fa}; - is( $harold->_alignment_file_bsub_command( $arg_refs ), $spiked_read_alignment_command, q{paired read alignment bsub command is correct} ); - - my @job_ids = $harold->generate_alignment_files({}); - is( scalar @job_ids, 1, q{1 job ids, one spiked phix lane} ); -} - -{ - set_staging_analysis_area(); - my $harold; - $id_run = 1234; - lives_ok { - $harold = npg_pipeline::analysis::harold_calibration_bam->new({ - id_run => $id_run, - run_folder => q{123456_IL2_1234}, - runfolder_path => $runfolder_path, - timestamp => q{20091028-101635}, - verbose => 0, - repository => $repos, - bam_basecall_path => $runfolder_path . q{/Data/Intensities/BAM_basecalls}, - no_bsub => 1, - recalibration => 1, - }); - } q{create $harold object ok}; - - my $arg_refs = { - timestamp => q{20091028-101635}, - position => 1, - job_dependencies => q{-w 'done(1234) && done(4321)'}, - ref_seq => q{t/data/sequence/references/Human/default/all/bwa/someref.fa.bwt}, - }; - my $mem = 350; - my $mem_limit = npg_pipeline::lsf_job->new(memory => $mem, memory_units =>$mem_units)->_scale_mem_limit(); - my $expected_command = q(bsub -q srpipeline -o /nfs/sf45/IL2/analysis/123456_IL2_1234/Data/Intensities/BAM_basecalls/log/basecall_stats_1234_20091028-101635.%J.out -J basecall_stats_1234_20091028-101635 -R 'select[mem>).$mem.q{] rusage[mem=}.$mem.q{,nfs_12=4]' -M}.$mem_limit.q( -R 'span[hosts=1]' -n 4 " cd /nfs/sf45/IL2/analysis/123456_IL2_1234/Data/Intensities/BAM_basecalls && if [[ -f Makefile ]]; then echo Makefile already present 1>&2; else echo creating bcl2qseq Makefile 1>&2; /software/solexa/src/OLB-1.9.4/bin/setupBclToQseq.py -b /nfs/sf45/IL2/analysis/123456_IL2_1234/Data/Intensities/BaseCalls -o /nfs/sf45/IL2/analysis/123456_IL2_1234/Data/Intensities/BAM_basecalls --overwrite; fi && make -j 4 Matrix Phasing && make -j 4 BustardSummary.x{s,m}l "); - is( $util->drop_temp_part_from_paths( $harold->_generate_illumina_basecall_stats_command( $arg_refs ) ), $expected_command, q{Illumina basecalls stats generation bsub command is correct} ); - - my @job_ids = $harold->generate_illumina_basecall_stats($arg_refs); - is( scalar @job_ids, 1, q{1 job ids, generate Illumina basecall stats} ); -} - -1; diff --git a/t/35-archive-file-generation-seqchksum_comparator.t b/t/35-archive-file-generation-seqchksum_comparator.t index 177f75c93..9b525edd5 100644 --- a/t/35-archive-file-generation-seqchksum_comparator.t +++ b/t/35-archive-file-generation-seqchksum_comparator.t @@ -56,7 +56,8 @@ my $archive_path = $recalibrated_path . q{/archive}; my @jids = $object->launch( $arg_refs ); is( scalar @jids, 1, q{1 job id returned} ); - throws_ok{$object->do_comparison()} qr/please check illumina2bam pipeline step/, q{Doing a comparison with no files throws an exception}; + throws_ok{$object->do_comparison()} qr/Cannot find/, + q{Doing a comparison with no files throws an exception}; is($object->archive_path, $archive_path, "Object has correct archive path"); is($object->bam_basecall_path, $bam_basecall_path, "Object has correct bam_basecall path"); diff --git a/t/35-archive_file_generation-BamClusterCounts.t b/t/35-archive_file_generation-BamClusterCounts.t index 59bdc390e..8727200a4 100644 --- a/t/35-archive_file_generation-BamClusterCounts.t +++ b/t/35-archive_file_generation-BamClusterCounts.t @@ -10,7 +10,6 @@ use_ok( q{npg_pipeline::archive::file::BamClusterCounts} ); my $util = t::util->new({}); my $dir = $util->temp_directory(); -$ENV{TEST_DIR} = $dir; local $ENV{NPG_WEBSERVICE_CACHE_DIR} = q[t/data]; local $ENV{PATH} = join q[:], q[t/bin], q[t/bin/software/solexa/bin], $ENV{PATH}; @@ -21,7 +20,7 @@ Log::Log4perl->easy_init({layout => '%d %-5p %c - %m%n', $util->create_multiplex_analysis(); my $analysis_runfolder_path = $util->analysis_runfolder_path(); -my $bam_basecall_path = $util->standard_analysis_bustard_path(); +my $bam_basecall_path = $util->standard_bam_basecall_path(); qx{cp t/data/summary_files/BustardSummary_mp.xml $bam_basecall_path/BustardSummary.xml}; my $recalibrated_path = $util->standard_analysis_recalibrated_path(); my $archive_path = $recalibrated_path . q{/archive}; diff --git a/t/50-npg_pipeline-daemon-analysis.t b/t/50-npg_pipeline-daemon-analysis.t index 0909e9e1d..35f389f52 100644 --- a/t/50-npg_pipeline-daemon-analysis.t +++ b/t/50-npg_pipeline-daemon-analysis.t @@ -57,7 +57,7 @@ sub runfolder_path4run { return '/some/path' }; package main; subtest 'staging host matching' => sub { - plan tests => 26; + plan tests => 24; my $path49 = '/{export,nfs}/sf49/ILorHSany_sf49/*/'; my $path32 = '/{export,nfs}/sf32/ILorHSany_sf32/*/'; @@ -75,30 +75,13 @@ subtest 'staging host matching' => sub { throws_ok { $runner->_generate_command( { rf_path => $rf_path, job_priority => 50, - }) } qr/Lims flowcell id is missing/, - 'non-gclp run and lims flowcell id is missing - error'; + }) } qr/Lims flowcell id is missing/, 'lims flowcell id is missing - error'; like($runner->_generate_command( { rf_path => $rf_path, job_priority => 50, id => 1480, - } ), qr/$command_start $rf_path/, - q{generated command is correct}); - - like($runner->_generate_command( { - rf_path => $rf_path, - job_priority => 50, - gclp => 1, - } ), qr/$command_start $rf_path --function_list gclp/, - q{generated command is correct}); - - like($runner->_generate_command( { - rf_path => $rf_path, - job_priority => 50, - gclp => 1, - id => 22, - }), qr/$command_start $rf_path --function_list gclp/, - q{generated command is correct}); + } ), qr/$command_start $rf_path/, q{generated command is correct}); ok($runner->green_host,'running on a host in a green datacentre'); ok($runner->staging_host_match($path49), 'staging matches host'); @@ -181,7 +164,7 @@ subtest 'failure to retrive lims data' => sub { }; subtest 'retrieve lims data' => sub { - plan tests => 28; + plan tests => 19; my $runner; lives_ok { $runner = $package->new( @@ -206,7 +189,6 @@ subtest 'retrieve lims data' => sub { my $lims_data = $runner->check_lims_link($test_run); is ($lims_data->{'id'}, '1234567891234', 'lims id'); is ($lims_data->{'qc_run'}, 1, 'is qc run'); - ok(!$lims_data->{'gclp'}, 'gclp flag is false'); is_deeply($lims_data->{'studies'}, [], 'studies not retrieved'); $test_run->update({'batch_id' => 55}); @@ -227,14 +209,6 @@ subtest 'retrieve lims data' => sub { $fc_row->update({'id_lims' => 'SSCAPE'}); $lims_data = $runner->check_lims_link($test_run); is ($lims_data->{'id'}, undef, 'lims id is undefined'); - ok(!$lims_data->{'gclp'}, 'gclp flag is false'); - is ($lims_data->{'qc_run'}, undef, 'qc run flag is not set'); - is(join(q[:], @{$lims_data->{'studies'}}), '2967', 'studies retrieved'); - - $fc_row->update({'id_lims' => 'C_GCLP'}); - $lims_data = $runner->check_lims_link($test_run); - is ($lims_data->{'id'}, undef, 'lims id is undefined'); - is ($lims_data->{'gclp'}, 1, 'gclp flag is set to true'); is ($lims_data->{'qc_run'}, undef, 'qc run flag is not set'); is(join(q[:], @{$lims_data->{'studies'}}), '2967', 'studies retrieved'); @@ -242,24 +216,19 @@ subtest 'retrieve lims data' => sub { $fc_row->update({'id_flowcell_lims' => 55}); $lims_data = $runner->check_lims_link($test_run); is ($lims_data->{'id'}, 55, 'lims id is set'); - is ($lims_data->{'gclp'}, 1, 'gclp flag is set to true'); is ($lims_data->{'qc_run'}, undef, 'qc run flag is not set'); is(join(q[:], @{$lims_data->{'studies'}}), '2967', 'studies retrieved'); $fc_row->update({'id_lims' => 'SSCAPE'}); $lims_data = $runner->check_lims_link($test_run); is ($lims_data->{'id'}, 55, 'lims id is set'); - ok (!$lims_data->{'gclp'}, 'gclp flag is false'); is ($lims_data->{'qc_run'}, undef, 'qc run flag is not set'); $fc_row->update({'id_lims' => 'SSCAPE'}); $fc_row->update({'purpose' => 'qc'}); $lims_data = $runner->check_lims_link($test_run); is ($lims_data->{'id'}, 55, 'lims id is set'); - ok (!$lims_data->{'gclp'}, 'gclp flag is false'); is ($lims_data->{'qc_run'}, 1, 'qc run flag is set'); - - }; subtest 'generate command' => sub { @@ -291,7 +260,7 @@ subtest 'generate command' => sub { }; subtest 'retrieve study analysis configuration' => sub { - plan tests => 6; + plan tests => 5; my $d = npg_pipeline::daemon::analysis->new(); isa_ok( $d->daemon_conf(), q{HASH}, q{$} . qq{base->daemon_conf} ); @@ -303,13 +272,12 @@ subtest 'retrieve study analysis configuration' => sub { $d = npg_pipeline::daemon::analysis->new(conf_path => 't/data/study_analysis_conf'); my $conf = $d->study_analysis_conf(); isa_ok($conf, 'HASH', 'HASH of study configurations'); - is($conf->{'gclp_all_studies'}, 't/data', 'dated directory name for gclp runs'); is($conf->{'12345'}, 't', 'dated directory name for study 12345'); is($conf->{'XY345'}, '/some/dir', 'dated directory name for study 12345'); }; subtest 'get software bundle' => sub { - plan tests => 11; + plan tests => 7; my $conf_file = join q[/], $temp_directory, 'study_conf.yml'; open my $fh, '>', $conf_file; @@ -325,19 +293,10 @@ subtest 'get software bundle' => sub { ); throws_ok { $runner->_software_bundle() } - qr/GCLP flag is not defined/, - 'error if gclp flag is not defined'; - throws_ok { $runner->_software_bundle(1) } qr/Study ids are missing/, 'error if no study array is given'; - lives_ok { $runner->_software_bundle(0, []) } + lives_ok { $runner->_software_bundle([]) } 'no error if study array is empty'; - throws_ok { $runner->_software_bundle(1, []) } - qr/GCLP run needs explicit software bundle/, - 'GCLP run: no study info - error'; - throws_ok { $runner->_software_bundle(1, [qw/3/]) } - qr/GCLP run needs explicit software bundle/, - 'no GCLP conf - error'; $runner = $package->new( pipeline_script_name => '/bin/true', @@ -346,21 +305,19 @@ subtest 'get software bundle' => sub { conf_path => 't/data/study_analysis_conf', ); - throws_ok { $runner->_software_bundle(0, [qw/3 12345/]) } + throws_ok { $runner->_software_bundle([qw/3 12345/]) } qr/Multiple software bundles for a run/, 'Software and no software - error'; - throws_ok { $runner->_software_bundle(0, [qw/12345 12346/]) } + throws_ok { $runner->_software_bundle([qw/12345 12346/]) } qr/Multiple software bundles for a run/, 'Multiple software bundles - error'; - throws_ok { $runner->_software_bundle(0, [qw/XY345/]) } + throws_ok { $runner->_software_bundle([qw/XY345/]) } qr/Directory \'\/some\/dir\' does not exist/, 'directory does not exist - error'; - is($runner->_software_bundle(0, []), q[], 'no study info - no path'); - is($runner->_software_bundle(0, [qw/12346 12347/]), + is($runner->_software_bundle([]), q[], 'no study info - no path'); + is($runner->_software_bundle([qw/12346 12347/]), "${current_dir}/t/data/cache", 'study analysis directory retrieved'); - is($runner->_software_bundle(1, [qw/12346 12347/]), - "${current_dir}/t/data", 'GCLP study analysis directory retrieved'); }; subtest 'mock continious running' => sub { diff --git a/t/50-npg_pipeline-daemon-archival.t b/t/50-npg_pipeline-daemon-archival.t index 9ad7507f3..822ffa066 100644 --- a/t/50-npg_pipeline-daemon-archival.t +++ b/t/50-npg_pipeline-daemon-archival.t @@ -1,6 +1,6 @@ use strict; use warnings; -use Test::More tests => 19; +use Test::More tests => 18; use Test::Exception; use Cwd; use List::MoreUtils qw{any}; @@ -58,9 +58,6 @@ package main; like($runner->_generate_command(1234), qr/npg_pipeline_post_qc_review --verbose --runfolder_path \/some\/path/, q{generated command is correct}); - like($runner->_generate_command(1234,1), - qr/npg_pipeline_post_qc_review --function_list gclp --verbose --runfolder_path \/some\/path/, - q{generated gclp command is correct}); ok(!$runner->green_host, 'host is not in green datacentre'); $schema->resultset(q[Run])->find(2)->update_run_status('archival pending', 'pipeline'); diff --git a/t/bin/software/solexa/bin/aligners/illumina2bam/current b/t/bin/software/solexa/bin/aligners/illumina2bam/current deleted file mode 120000 index 08306bac0..000000000 --- a/t/bin/software/solexa/bin/aligners/illumina2bam/current +++ /dev/null @@ -1 +0,0 @@ -Illumina2bam-tools-1.00/ \ No newline at end of file diff --git a/t/data/illumina2bam/1234_samplesheet.csv b/t/data/illumina2bam/1234_samplesheet.csv deleted file mode 100644 index 24a070116..000000000 --- a/t/data/illumina2bam/1234_samplesheet.csv +++ /dev/null @@ -1,26 +0,0 @@ -[Header],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -Investigator Name,pav,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -Project Name,1000Genomes-A1-YRI,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -Experiment Name,1234,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -Date,2008-08-17T13:18:30,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -Workflow,LibraryQC,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -Chemistry,Default,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -[Reads],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -37,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -37,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -[Settings],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -[Manifests],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -[Data],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -Index,Lane,Sample_ID,Sample_Name,GenomeFolder,bait_name,default_library_type,default_tag_sequence,email_addresses,email_addresses_of_followers,email_addresses_of_managers,email_addresses_of_owners,is_control,is_pool,lane_id,lane_priority,library_name,organism,organism_taxon_id,project_cost_code,project_id,project_name,qc_state,request_id,required_insert_size_range,sample_accession_number,sample_common_name,sample_consent_withdrawn,sample_description,sample_id,sample_name,sample_public_name,sample_reference_genome,spiked_phix_tag_index,study_accession_number,study_alignments_in_bam,study_contains_nonconsented_human,study_contains_nonconsented_xahuman,study_description,study_id,study_name,study_reference_genome,study_separate_y_chromosome_data,study_title,tag_index, -,1,51021,SRS000147,C:\Illumina\MiSeq Reporter\Genomes\WTSI_references\Homo_sapiens\1000Genomes\all\fasta\,,,,jws@sanger.ac.uk rd@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk,rd@sanger.ac.uk,0,0,66206,0,NA18907-YRI-1,human,9606,S0706,185,1000Genomes-A1-YRI,pending,2409,from:150 to:200,SRS000147,Homo sapiens,,,766,NA18907-YRI-1,NA18907,,,SRP000031,1,0,0,1000Genomes Project Pilot 1,185,1000Genomes-A1-YRI,,,1000Genomes Project Pilot 1,, -,2,51021,SRS000147,C:\Illumina\MiSeq Reporter\Genomes\WTSI_references\Homo_sapiens\1000Genomes\all\fasta\,,,,jws@sanger.ac.uk rd@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk,rd@sanger.ac.uk,0,0,66207,0,NA18907-YRI-1,human,9606,S0706,185,1000Genomes-A1-YRI,pending,2410,from:150 to:200,SRS000147,Homo sapiens,,,766,NA18907-YRI-1,NA18907,,,SRP000031,1,0,0,1000Genomes Project Pilot 1,185,1000Genomes-A1-YRI,,,1000Genomes Project Pilot 1,, -,3,51021,SRS000147,C:\Illumina\MiSeq Reporter\Genomes\WTSI_references\Homo_sapiens\1000Genomes\all\fasta\,,,,jws@sanger.ac.uk rd@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk,rd@sanger.ac.uk,0,0,66208,0,NA18907-YRI-1,human,9606,S0706,185,1000Genomes-A1-YRI,pending,2411,from:150 to:200,SRS000147,Homo sapiens,,,766,NA18907-YRI-1,NA18907,,,SRP000031,1,0,0,1000Genomes Project Pilot 1,185,1000Genomes-A1-YRI,,,1000Genomes Project Pilot 1,, -,4,79570,phiX_SI_SPRI,C:\Illumina\MiSeq Reporter\Genomes\WTSI_references\PhiX\Sanger-SNPs\all\fasta\,,,,,,,,1,0,80723,0,phiX_SI_SPRI,,,,,,,41944,,,,,,9829,phiX_SI_SPRI,,,,,,0,0,,,,,,,, -,5,51021,SRS000147,C:\Illumina\MiSeq Reporter\Genomes\WTSI_references\Homo_sapiens\1000Genomes\all\fasta\,,,,jws@sanger.ac.uk rd@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk,rd@sanger.ac.uk,0,0,66209,0,NA18907-YRI-1,human,9606,S0706,185,1000Genomes-A1-YRI,pending,2412,from:150 to:200,SRS000147,Homo sapiens,,,766,NA18907-YRI-1,NA18907,,,SRP000031,1,0,0,1000Genomes Project Pilot 1,185,1000Genomes-A1-YRI,,,1000Genomes Project Pilot 1,, -,6,51021,SRS000147,C:\Illumina\MiSeq Reporter\Genomes\WTSI_references\Homo_sapiens\1000Genomes\all\fasta\,,,,jws@sanger.ac.uk rd@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk,rd@sanger.ac.uk,0,0,66210,0,NA18907-YRI-1,human,9606,S0706,185,1000Genomes-A1-YRI,pending,2413,from:150 to:200,SRS000147,Homo sapiens,,,766,NA18907-YRI-1,NA18907,,,SRP000031,1,0,0,1000Genomes Project Pilot 1,185,1000Genomes-A1-YRI,,,1000Genomes Project Pilot 1,, -,7,51021,SRS000147,C:\Illumina\MiSeq Reporter\Genomes\WTSI_references\Homo_sapiens\1000Genomes\all\fasta\,,,,jws@sanger.ac.uk rd@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk,rd@sanger.ac.uk,0,0,66211,0,NA18907-YRI-1,human,9606,S0706,185,1000Genomes-A1-YRI,pending,2414,from:150 to:200,SRS000147,Homo sapiens,,,766,NA18907-YRI-1,NA18907,,,SRP000031,1,0,0,1000Genomes Project Pilot 1,185,1000Genomes-A1-YRI,,,1000Genomes Project Pilot 1,, -ATCAACCG,8,51021,SRS000147,C:\Illumina\MiSeq Reporter\Genomes\WTSI_references\Homo_sapiens\1000Genomes\all\fasta\,,,ATCAACCG,jws@sanger.ac.uk rd@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk thomas.keane@sanger.ac.uk,jws@sanger.ac.uk,rd@sanger.ac.uk,0,1,66212,0,NA18907-YRI-1,human,9606,S0706,185,1000Genomes-A1-YRI,pending,2415,from:150 to:200,SRS000147,Homo sapiens,,,766,NA18907-YRI-1,NA18907,,,SRP000031,1,0,0,1000Genomes Project Pilot 1,185,1000Genomes-A1-YRI,,,1000Genomes Project Pilot 1,154, - diff --git a/t/data/illumina2bam/npg/instrument/21.xml b/t/data/illumina2bam/npg/instrument/21.xml deleted file mode 100644 index cdead6e0d..000000000 --- a/t/data/illumina2bam/npg/instrument/21.xml +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - - - - - - - - - - diff --git a/t/data/illumina2bam/npg/run/1234.xml b/t/data/illumina2bam/npg/run/1234.xml deleted file mode 100644 index cba379613..000000000 --- a/t/data/illumina2bam/npg/run/1234.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/t/data/study_analysis_conf/study_analysis.yml b/t/data/study_analysis_conf/study_analysis.yml index 7a9141cb6..43d062e5f 100644 --- a/t/data/study_analysis_conf/study_analysis.yml +++ b/t/data/study_analysis_conf/study_analysis.yml @@ -1,7 +1,6 @@ # Study analysis environment # for gseq farm --- -gclp_all_studies: t/data 12345: t 12346: t/data/cache 12347: t/data/cache diff --git a/t/util.pm b/t/util.pm index dcecff44b..9634175da 100644 --- a/t/util.pm +++ b/t/util.pm @@ -8,64 +8,31 @@ use Readonly; use Cwd qw(getcwd); use npg::api::request; -Readonly::Scalar our $TEMP_DIR => q{/tmp}; Readonly::Scalar our $NFS_STAGING_DISK => q{/nfs/sf45}; -has q{cwd} => ( - isa => q{Str}, - is => q{ro}, - lazy_build => 1, -); - -sub _build_cwd { - my ( $self ) = @_; - return getcwd(); -} - -# for getting a temporary directory which will clean up itself, and should not clash with other people attempting to run the tests has q{temp_directory} => ( isa => q{Str}, is => q{ro}, lazy_build => 1, ); sub _build_temp_directory { - my ( $self ) = @_; - - my $tempdir = tempdir( - DIR => $TEMP_DIR, - CLEANUP => 1, - ); - return $tempdir; + return tempdir(CLEANUP => 1); } ############### # path setups Readonly::Scalar our $DEFAULT_RUNFOLDER => q{123456_IL2_1234}; - Readonly::Scalar our $ANALYSIS_RUNFOLDER_PATH => $NFS_STAGING_DISK . q{/IL2/analysis/} . $DEFAULT_RUNFOLDER; -Readonly::Scalar our $OUTGOING_RUNFOLDER_PATH => $NFS_STAGING_DISK . q{/IL2/outgoing/} . $DEFAULT_RUNFOLDER; Readonly::Scalar our $BUSTARD_PATH => qq{$ANALYSIS_RUNFOLDER_PATH/Data/Intensities/Bustard1.3.4_09-07-2009_auto}; Readonly::Scalar our $RECALIBRATED_PATH => qq{$BUSTARD_PATH/PB_cal}; -sub default_runfolder { - my ( $self ) = @_; - return $DEFAULT_RUNFOLDER; -} - -sub test_run_folder { - my ($self) = @_; - my $test_run_folder_path = $self->temp_directory() . $ANALYSIS_RUNFOLDER_PATH; - my ($run_folder) = $test_run_folder_path =~ /(\d+_IL\d+_\d+)/xms; - return $run_folder; -} - sub analysis_runfolder_path { my ( $self ) = @_; return $self->temp_directory() . $ANALYSIS_RUNFOLDER_PATH; } -sub standard_analysis_bustard_path { +sub standard_bam_basecall_path { my ( $self ) = @_; return $self->temp_directory() . $BUSTARD_PATH; } @@ -169,7 +136,7 @@ sub set_rta_staging_analysis_area { } `cp t/data/Recipes/TileLayout.xml $analysis_runfolder_path/Config/`; `touch $recalibrated_path/touch_file`; - return {bustard_path => $bustard_path, recalibrated_path => $recalibrated_path, runfolder_path => $analysis_runfolder_path}; + return {recalibrated_path => $recalibrated_path, runfolder_path => $analysis_runfolder_path}; } sub remove_staging { @@ -185,17 +152,16 @@ sub remove_staging { sub drop_temp_part_from_paths { my ( $self, $path ) = @_; my $temp_dir = $self->temp_directory(); - my $cwd = $self->cwd(); + my $cwd = getcwd(); $path =~ s{\Q$temp_dir\E}{}gxms; $path =~ s{\Q$cwd/\E}{}gxms; $path =~ s{\Q$cwd\E}{}gxms; return $path; } -# ensure that the environment variables do not get passed around and that extraneous files do not get left behind +# ensure that the environment variables do not get passed around sub DEMOLISH { $ENV{ npg::api::request->cache_dir_var_name() } = q{}; - unlink 'Latest_Summary'; } 1;