Merge pull request sanger-pathogens#53 from andrewjpage/simplify_inst…

…allation POD formatting Former-commit-id: 8c8a192
andrewjpage · Sep 10, 2015 · ea183e7 · ea183e7
2 parents b6c42e1 + f752a43
commit ea183e7
Show file tree

Hide file tree

Showing 11 changed files with 62 additions and 725 deletions.
diff --git a/GPL-LICENCE b/GPL-LICENCE
diff --git a/dist.ini b/dist.ini
@@ -1,9 +1,9 @@
 name    = Bio-Tradis
-author  = Carla Cummins <cc21@sanger.ac.uk>
+author  = Carla Cummins <path-help@sanger.ac.uk>
 license = GPL_3
 copyright_holder = Wellcome Trust Sanger Institute
 copyright_year   = 2013
-version = 1.2
+version = 1.2.1
 
 [MetaResources]
 homepage        = http://www.sanger.ac.uk/

diff --git a/lib/Bio/Tradis.pm b/lib/Bio/Tradis.pm
@@ -7,15 +7,25 @@ package Bio::Tradis;
 =head1 SYNOPSIS
 
 Bio-Tradis provides functionality to:
+
 =over
+
 =item * detect TraDIS tags in a BAM file - L<Bio::Tradis::DetectTags>
+
 =item * add the tags to the reads - L<Bio::Tradis::AddTagsToSeq>
+
 =item * filter reads in a FastQ file containing a user defined tag - L<Bio::Tradis::FilterTags>
+
 =item * remove tags - L<Bio::Tradis::RemoveTags>
+
 =item * map to a reference genome - L<Bio::Tradis::Map>
+
 =item * create an insertion site plot file - L<Bio::Tradis::TradisPlot>
+
 =back
+
 Most of these functions are available as standalone scripts or as perl modules.
 
+
 =cut
 1;
diff --git a/lib/Bio/Tradis/Analysis/InsertSite.pm b/lib/Bio/Tradis/Analysis/InsertSite.pm
@@ -1,5 +1,6 @@
 package Bio::Tradis::Analysis::InsertSite;
 # ABSTRACT: Take in a bam file and plot the start position of each read
+
 =head1 NAME
 
 InsertSite.pm   - Take in a bam file and plot the start position of each read
@@ -8,12 +9,12 @@ InsertSite.pm   - Take in a bam file and plot the start position of each read
 
 Takes in a mapped BAM file and plot the start position of each read
 
-use Bio::Tradis::Analysis::InsertSite;
-my $insertsite_plots_from_bam = Bio::Tradis::Analysis::InsertSite->new(
-   filename => 'my_file.bam',
-   output_base_filename => 'my_output_file'
-  );
-$insertsite_plots_from_bam->create_plots();
+   use Bio::Tradis::Analysis::InsertSite;
+   my $insertsite_plots_from_bam = Bio::Tradis::Analysis::InsertSite->new(
+      filename => 'my_file.bam',
+      output_base_filename => 'my_output_file'
+     );
+   $insertsite_plots_from_bam->create_plots();
 
 
 =cut
@@ -176,39 +177,6 @@ sub _build__frequency_of_read_start {
 	return \%frequency_of_read_start;
 }
 
-#use Bio::DB::Sam;
-#has '_input_file_handle' => ( is => 'rw', lazy_build => 1 );
-#sub _build__input_file_handle {
-#    my ($self) = @_;
-#    return Bio::DB::Bam->open( $self->filename );
-#}
-#sub _build__frequency_of_read_start {
-#    my ($self) = @_;
-#    my %frequency_of_read_start;
-#    my $header       = $self->_input_file_handle->header;
-#    my $target_names = $header->target_name;
-#    while ( my $align = $self->_input_file_handle->read1 ) {
-#        next if ( $align->unmapped );
-#
-#        # check quality score
-#        my $quality = $align->qual;
-#        if ( $quality >= $self->mapping_score ) {
-#            my $seqid = $target_names->[ $align->tid ];
-#            if ( $align->strand == 1 ) {
-#                $frequency_of_read_start{$seqid}{ $align->start }
-#                  { $align->strand }++;
-#            }
-#            else {
-#                $frequency_of_read_start{$seqid}{ $align->end }
-#                  { $align->strand }++;
-#            }
-#
-#        }
-#    }
-#
-#    return \%frequency_of_read_start;
-#}
-
 sub create_plots {
     my ($self) = @_;
     my %read_starts = %{ $self->_frequency_of_read_start };

diff --git a/lib/Bio/Tradis/CommandLine/PlotCombine.pm b/lib/Bio/Tradis/CommandLine/PlotCombine.pm
@@ -10,15 +10,17 @@ plotfile and as an identifier in the stats file, so ensure these are unique.
 
 For example, an input file named plots_to_combine.txt:
 
-tradis1	plot1.1.gz	plot1.2.gz plot1.3.gz
-tradis2 plot2.1.gz	plot2.2.gz
-tradis3	plot3.1.gz	plot3.2.gz plot3.3.gz	plot3.4.gz
+   tradis1	plot1.1.gz	plot1.2.gz plot1.3.gz
+   tradis2 plot2.1.gz	plot2.2.gz
+   tradis3	plot3.1.gz	plot3.2.gz plot3.3.gz	plot3.4.gz
 
 will produce:
+
 =over
 
 =item 1. a directory named combined with 3 files - tradis1.insertion_site_plot.gz,
 tradis2.insertion_site_plot.gz, tradis3.insertion_site_plot.gz
+
 =item 2. a stats file named plots_to_combine.stats
 
 =back

diff --git a/lib/Bio/Tradis/FilterTags.pm b/lib/Bio/Tradis/FilterTags.pm
@@ -18,15 +18,21 @@ Outputs a file *.tag.fastq unless an alternative outfile name is specified
 =head2 Required
 
 =over
+
 =item * C<fastqfile> - path to/name of file to filter. This may be a gzipped fastq file, in which case a temporary unzipped version is used and removed on completion.
+
 =item * C<tag> - TraDIS tag to match
+
 =back
 
 =head2 Optional
 
 =over
+
 =item * C<mismatch> - number of mismatches to allow when matching the tag. Default = 0
+
 =item * C<outfile> - output file name. Defaults to C<file.tag.fastq> for an input file named C<file.fastq>
+
 =back
 
 =head1 METHODS

diff --git a/lib/Bio/Tradis/Map.pm b/lib/Bio/Tradis/Map.pm
@@ -18,33 +18,45 @@ Maps given fastq files to ref.
 =head2 Required
 
 =over
+
 =item * C<fastqfile> - path to/name of file containing reads to map to the reference
+
 =item * C<reference> - path to/name of reference genome in fasta format (.fa)
+
 =back
 
 =head2 Optional
 
 =over
+
 =item * C<refname> - name to assign to the reference index files. Default = ref.index
+
 =item * C<outfile> -  name to assign to the mapped SAM file. Default = mapped.sam
+
 =back
 
 =head1 METHODS
 
 =over
+
 =item * C<index_ref> - create index files of the reference genome. These are required
 			for the mapping step. Only skip this step if index files already
 			exist. -k and -s options for referencing are calculated based
 			on the length of the reads being mapped as per table:
+
 =begin html
+
 <table>
 <tr><th>Read length</th><th>k</th><th>s</th></tr>
 <tr><td><70</td><td>13</td><td>4<td></tr>
 <tr><td>>70 and <100</td><td>13</td><td>6<td></tr>
 <tr><td>>100</td><td>20</td><td>6<td></tr>
 </table>
+
 =end html
+
 =item * C<do_mapping> - map C<fastqfile> to C<reference>. Options used for mapping are: C<-r -1 -x -y 0.96>
+
 =back
 
 For more information on the mapping and indexing options discussed here, see the L<SMALT manual|ftp://ftp.sanger.ac.uk/pub4/resources/software/smalt/smalt-manual-0.7.4.pdf>

diff --git a/lib/Bio/Tradis/Parser/Fastq.pm b/lib/Bio/Tradis/Parser/Fastq.pm
@@ -5,7 +5,8 @@ package Bio::Tradis::Parser::Fastq;
 =head1 SYNOPSIS
 
 Parses fastq files. 
-use Bio::Tradis::Parser::Fastq;
+
+   use Bio::Tradis::Parser::Fastq;
    
    my $pipeline = Bio::Tradis::Parser::Fastq->new(file => 'abc');
    $pipeline->next_read;

diff --git a/lib/Bio/Tradis/RemoveTags.pm b/lib/Bio/Tradis/RemoveTags.pm
@@ -18,18 +18,25 @@ Outputs a file *.rmtag.fastq unless an out file is specified
 =head2 Required
 
 =over
+
 =item * C<fastqfile> - path to/name of file to filter
+
 =item * C<tag> - TraDIS tag to remove
+
 =back
 
 =head2 Optional
 
 =over
+
 =item * C<mismatch> - number of mismatches to allow when removing the tag. Default = 0
+
 =item * C<outfile> - defaults to C<file.rmtag.fastq> for and input file named C<file.fastq>
+
 =back
 
 =head1 METHODS
+
 C<remove_tags> - output all reads with the tags removed to C<outfile>
 
 =cut

diff --git a/lib/Bio/Tradis/RunTradis.pm b/lib/Bio/Tradis/RunTradis.pm
@@ -23,20 +23,28 @@ Artemis (or other genome browsers), mapped BAM files for each lane and a statist
 =head2 Required
 
 =over
+
 =item * C<fastqfile> - file containing a list of fastqs (gzipped or raw) to run the 
 			complete analysis on. This includes all (including 
 			intermediary format conversion and sorting) steps starting from
 			filtering.
+
 =item * C<tag> - TraDIS tag to filter and then remove
+
 =item * C<reference> - path to/name of reference genome in fasta format (.fa)
+
 =back
 
 =head2 Optional
 
 =over
+
 =item * C<mismatch> - number of mismatches to allow when filtering/removing the tag. Default = 0
+
 =item * C<tagdirection> - direction of the tag, 5' or 3'. Default = 3
+
 =item * C<mapping_score> - cutoff value for mapping score when creating insertion site plots. Default = 30
+
 =back
 
 =head1 METHODS

diff --git a/lib/Bio/Tradis/TradisPlot.pm b/lib/Bio/Tradis/TradisPlot.pm
@@ -21,8 +21,11 @@ C<mappedfile> - mapped and sorted BAM file
 =head2 Optional
 
 =over
+
 =item * C<outfile> - base name to assign to the resulting insertion site plot. Default = tradis.plot
+
 =item * C<mapping_score> - cutoff value for mapping score. Default = 30
+
 =back
 
 =head1 METHODS