multi_seqdoc.pl

#!/usr/bin/perl
=head1 NAME

multi_seqdoc.pl - Perl cgi script to align and subtractively compare multiple sequence
chromatograms in ABI (Applied Biosystems) format to a single reference chromatogram.

=head1 SYNOPSIS

seqdoc.pl?ref_seq=<reference sequence file>&test_seq0=<test_sequence file>&test_seq1=<test_sequence file> etc.

Designed to be called as a cgi script from a web form with the filenames
supplied as parameters.

=head1 DESCRIPTION

This script does the following in order:

=over 3

=item *

Reads in the sequence files

=item *

Normalizes the peak heights for each trace

=item *

Aligns each test trace to the reference trace

=item *

Calculates a difference profile between the traces

=item *

Processes the difference profile to highlight differences characteristic of
single bases

=item *

Outputs aligned images of the input sequences and the difference profiles
in html format

=back
	
All processing is performed automatically, with no user-defined parameters other
than the sequence file names. Settings can be modified by altering values
defined in the code if desired.

The program requires write access to a temporary directory (by default /usr/local/apache/htdocs/Temp/) to store the image files between their generation and display on the web page.

=head1 WEBSITE

http://research.imb.uq.edu.au/seqdoc/

=head1 AUTHOR

Written by Mark Crowe - mcrowe@bioinformatics.org

The Institute for Molecular Bioscience, The University of Queensland, Brisbane, Queensland, Australia.

The single file version of this script was published as Crowe, Mark L "SeqDoC: rapid SNP and mutation detection by direct comparison of DNA sequence chromatograms." BMC Bioinformatics 2005, 6:133  doi:10.1186/1471-2105-6-133
with an attached Creative Commons license: (http://creativecommons.org/licenses/by/2.0).
"Multiple SeqDoC" by Mark Crowe was obtained from website at http://research.imb.uq.edu.au/seqdoc/, specifically http://research.imb.uq.edu.au/seqdoc/multi_seqdoc.pl

Other modifications performed by Matt Pagel to read secondary peak-under-peak

=head1 COPYRIGHT

Copyright 2005, Mark Crowe.

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License (http://www.gnu.org/licenses/gpl.html)
for more details.

=cut

use strict;
#use warnings;
use CGI;
use ABI;
use GD::Graph::lines;
use GD::Image;
use GD::Image::Orientation;
#use Data::Dump

my $cgi = new CGI;

#print "Content-type: text/html\n\n";

# Create a list of the four channels to save on having to keep retyping them
# in loops
our @trace_list = qw( a_trace g_trace c_trace t_trace); #tr_bases
our @pos_trace_list = qw( a_trace g_trace c_trace t_trace base_pos);
#our @seq_list = qw( seq );
our $temp_dir = '/var/www/temp/';

# Get input data parameters from the web page
# Can be up to 10 input sequences (numbered 0 to 9)
my $ref_seq = $cgi->param('ref_seq');
my @test_seqs;
for (0..9) {
	$test_seqs[$_] = $cgi->param('test_seq'.$_) if $cgi->param('test_seq'.$_);
}

# Create temporary files to store the chromatograms
# Reference sequence first
open REF, ">".$temp_dir.$$."ref_seq";
my $ref_fh = $cgi->upload('ref_seq');
print REF while <$ref_fh>;
# Need to close file, otherwise ABI.pm can't access them properly
close REF;

# And test sequences
my $testcount = 0; # Incrementing integer to distinguish test files
for my $testfile (@test_seqs) {
	open TEST, ">".$temp_dir.$$.'test_seq_'.$testcount;
	my $test_fh = $cgi->upload('test_seq'.$testcount);
	print TEST while <$test_fh>;
	# Need to close files, otherwise ABI.pm can't access them properly
	close TEST;
	$testcount++;
}
# $testcount is a count of the number of test sequences. Since it's incremented after 
# the last sequence, need to take it back down by one.
$testcount--;

# Mac uploads put in extra header info. Delete it, since all it seems to do
# is break things
demac();


# Extract the data from the reference chromatogram file. Get it as a hash reference
my $ref_data = read_chromatogram($temp_dir.$$.'ref_seq');
# Extract the sequence name
my $ref_name = $ref_data->{name};
# Normalize peak heights to allow valid comparisons between runs
# By passing a hash reference, the changes made in the subroutine will act on
# the original hash (which is what we want in this case)
normalize($ref_data);
my $ref_sequence = $ref_data->{sequence};
my $ref_bps = $ref_data->{base_pos};

# Extract the data from the test chromatogram files. Get it as an array of hash references
my @test_data;
my @differences;
# Set initial alignment length to arbitrarily large value, so all true values are less - final
# used value is smallest, so need to start with largest
my $align_length = 1000000000;
for (0..$testcount) {
	my $test_data = read_chromatogram($temp_dir.$$.'test_seq_'.$_);
	# Extract the sequence names
	my $test_name = $test_data->{name};

	# Normalize peak heights to allow valid comparisons between runs
	# By passing a hash reference, the changes made in the subroutine will act on
	# the original hash (which is what we want in this case)
	normalize($test_data);

	# Now align test trace to reference trace. Reference trace doesn't change.
	get_best_align($ref_data, $test_data);

	# Calculate differences between traces
	my ($al_length, $diffs) = differences($ref_data, $test_data);
	# Only produce output up to length of shortest sequence
	$align_length = $al_length if $al_length < $align_length;
	$test_data[$_] = $test_data;
	$differences[$_] = $diffs;
}

# Finally generate output images - need to create temporary files, as can't
# include image data directly in html
# Pass details about image size and scale to the subroutine, also output file
my @retstring = ();
my $ref_image = get_image($ref_data, 0, 2000, 200, $temp_dir.$$."ref_image.png", $align_length, 1, $ref_data);
my $nll = rot_image($temp_dir.$$."ref_image");

for (0..$testcount) {
	my $test_image = get_image($test_data[$_], 0, 2000, 200, $temp_dir.$$."test_image".$_.".png", $align_length, 1, $ref_data );
	my $diff_image = get_image($differences[$_], -500, 500, 300, $temp_dir.$$."diff_image".$_.".png", $align_length, 0, $ref_data);
	$retstring[$_] = $diff_image;
	$nll = rot_image($temp_dir.$$."test_image".$_);
	$nll = rot_image($temp_dir.$$."diff_image".$_);
}

# Print out page
print $cgi->header;
print "<head><title>Chromatogram comparison</title></head>";
print "<body>";
for (0..$testcount) {
	print "<h3>Alignment ".($_ + 1)."</h3><br>";
	print "Reference sequence";
	if ($ref_data->{name}) {print " - ".$ref_data->{name}."<br>"}
	else {print " - ".$ref_seq."<br>"}
	print "<img src=\"/temp/".$$."ref_image.png\"><br>\n";
  print "Reference sequence call: ".$ref_sequence."<br>\n";
	print "<img src=\"/temp/".$$."diff_image".$_.".png\"><br>\n";
	print "Dif sequence call: ".$retstring[$_]."<br>\n";
	print "Test sequence ".($_ + 1);
	if ($test_data[$_]{name}) {print " - ".$test_data[$_]{name}."<br>"}
	else {print " - ".$test_seqs[$_]."<br>"}
	print "<img src=\"/temp/".$$."test_image".$_.".png\"><br>\n";
	print "Test sequence call: ".$test_data[$_]{sequence}."<br>\n";

  print "<img src=\"/temp/".$$."test_image".$_."-r.png\">\n";
  print "<img src=\"/temp/".$$."diff_image".$_."-r.png\">\n";
  print "<img src=\"/temp/".$$."ref_image-r.png\">\n";
 
}
print "</body></html>";


# Leave output files - they are deleted automatically each night
sub rot_image {
	my ($filebase) = @_;
	my $infn = $filebase.".png";
	open (PNGIN, $infn) || die $infn;
  my $img = GD::Image->newFromPng(\*PNGIN);
	close PNGIN;
	$img = $img->vertical(0,1);
  open OUT, ">".$filebase."-r.png";
  print OUT $img->png();
  close OUT;
 
}

sub error_end {
	# Deals with errors in input file format
	my $error = shift;
	my %error_reply = (abi_format  => 'One of the files does not seem to be a correctly formatted ABI file.',
	                   trace_error => 'Unable to extract trace data from one of the chromatograms.',
										 basecalls   => 'Unable to extract basecall positions from one of the input files.',
										 sequence    => 'Unable to extract called sequence from one of the input files.',
										 short_seq   => 'One of the sequences is too short for a valid comparison to be made',
										);
	my $reply = $error_reply{$error};
	# Print out page
	print $cgi->header;
	print "<head><title>Chromatogram comparison error</title></head>";
	print "<body>";
	print "<h1>Error report</h1>\nAn error has occurred. The error is :-  <br><pre>$reply</pre>\nPlease check that both your input files are valid ABI chromatograms.";
	print "</body></html>";
	exit;
}

										 
sub read_chromatogram {
	# Reads in data from the chromatogram files and returns it as a hash reference
	# Provide filename as the argument to the function call
	my $file = shift;
	# Create a new ABI object pointing to that file
	my $abi = ABI->new(-file=>$file) or error_end('abi_format');
	# Get the sample name from the file
	my $sample_name = $abi->get_sample_name();
	#Now get the four traces
	my @trace_a = $abi->get_trace("A") or error_end('trace_error');
	my @trace_g = $abi->get_trace("G") or error_end('trace_error');
	my @trace_c = $abi->get_trace("C") or error_end('trace_error');
	my @trace_t = $abi->get_trace("T") or error_end('trace_error');
	# Remove blank sequence from end of trace
	# Blank sequence often has noise, so can't use equal to zero. Instead have
	# threshold cutoff. Tried with various values, even as high as 500 seems to
	# work and not delete real sequence, while as low as 5 still removes most
	# blank sequence. 50 therefore seems to be a good compromise.
  my $seq_length = $#trace_a;

#	while ($trace_a[-1] < 50 && $trace_g[-1] < 50 && $trace_c[-1] < 50 && $trace_t[-1] < 50) {
#	pop(@trace_a); pop(@trace_g); pop(@trace_c); pop(@trace_t)}
	
	my @base_pos = $abi->get_corrected_base_calls() or error_end('basecalls');
	my $sequence = $abi->get_corrected_sequence() or error_end('sequence');
#	my $tr_bases1 = $sequence;
#	$tr_bases1 =~ tr/ACTGN/12345/;
#	my @bases_tr2 = split(//, $tr_bases1);
#	my @bases_tr3 = map { ((ord $_) - (ord "0")) } @bases_tr2;
#	my $seq_length = $#trace_a;
#	my @bases_tr;
#	for (0..$seq_length) {
#		push @bases_tr,0;
#	}
#	for (0..$#bases_tr3) {
#		$bases_tr[$base_pos[$_]] = $bases_tr3[$_]*100;
#	}
  while ($trace_a[-1] < 50 && $trace_g[-1] < 50 && $trace_c[-1] < 50 && $trace_t[-1] < 50) {
  pop(@trace_a); pop(@trace_g); pop(@trace_c); pop(@trace_t); 
#  pop(@bases_tr)
}


	# Put all the data together into a hash to return it
	my %return_hash = (name       => $sample_name,
										 a_trace    => \@trace_a,
										 g_trace    => \@trace_g,
										 c_trace    => \@trace_c,
										 t_trace    => \@trace_t,
#										 tr_bases		=> \@bases_tr,
										 seq_length => $seq_length,
										 sequence   => $sequence,
										 base_pos   => \@base_pos,
										);
	return \%return_hash;
}

sub normalize {
	# Takes the raw data from the chromatograms and normalizes each value to a 
	# mean of 100 with respect to all values within 500 datapoints either side
	# N.B. There is a special case for points within 500bp of the start.
		
	my $trace_data = shift;
	# Get the length of good trace data
	my $trace_length = $trace_data->{seq_length};
	# Ensure that the sequence is long enough for normalization to not give any 
	# errors. A shorter sequence probably won't be a lot of use anyway. Use
	# 1100 as first 500 + last 500 plus a short middle buffer. Actual value has
	# no meaning beyond preventing errors
	error_end('short_seq') unless $trace_length > 1100; 
	
	# Need to create new reference arrays, since normalization alters the existing
	# values in the hash
	my %orig_values;
	for (@pos_trace_list) {
		for my $index (0..$trace_length) {
			$orig_values{$_}[$index] = $trace_data->{$_}[$index];
		}
	}
	
	# Do the middle of the trace first - from #500 to 500 before the end
	my $totalsum = 0;
	# Set the normalization sum before starting
	for my $index (0..999) {
		for (@trace_list) {
			$totalsum += $orig_values{$_}[$index];
		}
	}
	for my $datapoint (500..($trace_length - 501)) {
		# Normalize to 100. Divide by sum of all values, multiply by number of 	
		# values, and multiply by 100;
		# First add values for datapoint +500
		for (@trace_list) {
			$totalsum += $orig_values{$_}[$datapoint + 500];
		}
		for (@trace_list) {
			# Blank sequence can cause problems through division by zero errors.
			# Deleting trailing blank sequence helps, but just put in a default value
			# for totalsum in case of problems.
			$totalsum = 1000 unless $totalsum;
			# Calculate normalized data
			$trace_data->{$_}[$datapoint] = int(($orig_values{$_}[$datapoint] / $totalsum) * 4000 * 100);
		}
		# Finally subtract values for datapoint -500
		for (@trace_list) {
			$totalsum -= $orig_values{$_}[$datapoint - 500];
		}
	}
	# Now do first 500 - special case, since can't do 500 before. Instead just 
	# take all points before. Not so critical anyway, since data quality is poor
	# at start so any mismatches will be unreliable anyway.
	# Start by initialising totalsum
	$totalsum = 0;
	for my $index (0..500) {
		for (@trace_list) {
			$totalsum += $orig_values{$_}[$index];
		}
	}
		# Blank sequence can cause problems through division by zero errors.
		# Deleting trailing blank sequence helps, but just put in a default value
		# for totalsum in case of problems.
		$totalsum = 1000 unless $totalsum;
		
	for my $datapoint (0..499) {
		# Can do 500 after though
		my $end = $datapoint + 500;
		# Normalize to 100. Divide by sum of all values, multiply by number of
		# values, and multiply by 100;
		for (@trace_list) {
			# Blank sequence can cause problems through division by zero errors.
			# Deleting trailing blank sequence helps, but just put in a default value
			# for totalsum in case of problems.
			$totalsum = 1000 unless $totalsum;
			# Calculate normalized data
			$trace_data->{$_}[$datapoint] = int(($orig_values{$_}[$datapoint] / $totalsum) * $end * 4 * 100);
		}
		# Add next value to totalsum, to keep 500 values after
		for (@trace_list) {
			$totalsum += $orig_values{$_}[$end + 1];
		}
	}
	
	# Finally the last 500 - again a special case, as can't do 500 after. Instead 
	# just take all points after. Not so critical anyway, since data quality is
	# poor at end so any mismatches will be unreliable anyway.
	# Again start by initialising totalsum
	$totalsum = 0;  
	for my $index (($trace_length-1000)..($trace_length-1)) {
		for (@trace_list) {
			$totalsum += abs($orig_values{$_}[$index]);
		}
	}
	# Identify start and finish points
	my ($first, $last) = (($trace_length-500),($trace_length-1));
	for my $datapoint ($first..$last) {
		my $start = $datapoint - 500;
		# Normalize to 100. Divide by sum of all values, multiply by number of
		# values, and multiply by 100;
		for (@trace_list) {
			# Blank sequence can cause problems through division by zero errors.
			# Deleting trailing blank sequence helps, but just put in a default value
			# for totalsum in case of problems.
			$totalsum = 1000 unless $totalsum;
			# Calculate normalized data
			$trace_data->{$_}[$datapoint] = int(($orig_values{$_}[$datapoint] / $totalsum) * ($last-$start) * 4 * 100);
		}
		# Subtract first value from totalsum, to keep to 500 point before test
		for (@trace_list) {
			$totalsum -= abs($orig_values{$_}[$start]);
		}	
	}

	return 1;
}

		
sub get_best_align {
# This does an alignment of the first 1000 datapoints using a range of offsets
# from -200 to 200. The best alignment is picked on the basis of having the
# lowest score from datapoint 200 to 1000, and is used to allow for any
# variation in start position of the two sequences.
	my ($ref, $test) = @_;
	my %scores;
	for (my $offset=-200;$offset<=200;$offset+=20) {
		# Create temporary hashes, since a hash reference is passed to the function
		# and otherwise the real hash will be modified
		my (%temp_ref, %temp_test);
		# Also need to extract hash values, since they too are references and 
		# can't be copied for the same reason.
		for (@pos_trace_list) {
			my @temp_ref_trace = @{$ref->{$_}};
			$temp_ref{$_} = \@temp_ref_trace;
			my @temp_test_trace = @{$test->{$_}};
			$temp_test{$_} = \@temp_test_trace;
		}
		# Do a partial alignment (first 1000 datapoints)
		align (\%temp_ref, \%temp_test, $offset, 1000);
		# Work out the score for that alignment
		$scores{$offset} = get_score(200, 1000, 0, \%temp_ref, \%temp_test);
	}
	# Sort the scores to find out the lowest, and record the value of that offset
	my @offset = sort { $scores{$a} <=> $scores{$b} } keys %scores;
	# Once the best alignment has been determined, then do it for real
	align($ref, $test, $offset[0], ($#{$test->{a_trace}} + $offset[0]));
	return 1;
}

sub align {
	# This takes the normalized traces and returns a best alignment of the two.
	# Rows are added to or deleted from the test trace to keep the alignment.
	# Best alignment is calculated by minimising the difference between the test
	# and reference sequence over the next 30 datapoints. It is adjusted every five
	# bases. Inserted lines are just a duplicate of the previous line.
	my ($ref, $test, $min_index, $trace_length) = @_;
	
	# Add/delete the appropriate number of lines to the test sequence to correct
	# for the offset value
	if ($min_index < 0) {
		# Need to add lines to test sequence, since it's behind
		for ($min_index..0) {
			for (@trace_list) {
				unshift @{$test->{$_}}, $test->{$_}[0];
			}
#		unshift @{$test->{$base_pos}}, $test->{$base_pos}[0];
		}
	} elsif ($min_index > 0) {
		# Need to delete lines from test sequence, since it's ahead
		for (@trace_list) {
			splice @{$test->{$_}}, 0, $min_index;
		}
#		splice @{$test->{$base_pos}}, 0, $min_index;
	}
	# Make a note of the offset value for datapoint numbering
	$test->{initial_offset} = $min_index;
	$ref->{initial_offset} = 0;
	
	
	# Now check alignments
  my $totaloffset = $min_index;
	my @lenofseq = @{$test->{base_pos}};
	for my $index (0..($trace_length - 1)) {
		# Each third entry (starting from 1), check alignment
		# This used to be every fifth entry, but had some issues with compressed sequences
		# 3 doesn't seem to cause any new problems, and seems to fix compression
		next if $index%3;
		my $offset;
		my $start_pos = $index;
		my $end_pos = $index + 30;
		# Compare the scores in the current alignment with those one data point in 
		# either direction
		my $score = get_score($start_pos, $end_pos, 0, $ref, $test);
		my $pre_score = get_score($start_pos, $end_pos, -1, $ref, $test);
		my $post_score = get_score($start_pos, $end_pos, 1, $ref, $test);
		last if $score eq 'no_score' or $post_score eq 'no_score' or $pre_score eq 'no_score';
		# Work out offset
		# Default is 0; score is the lowest of the three
		if ($score < $pre_score && $score < $post_score) {$offset = 0}
		# if pre-score is the lowest, then offset = -1
		elsif ($pre_score < $score && $pre_score < $post_score) {$offset = -1}
		# if post-score is the lowest, then offset = 1
		elsif ($post_score < $score && $post_score < $pre_score) {$offset = 1}
		# If in doubt, default to no change
		else {$offset = 0}
		# Now insert or delete lines as required
		if ($offset == 1) {
		# The reference sample is behind, need to delete a row from test
			for (@trace_list) {
				splice @{$test->{$_}}, $index, 1;
			}
#			splice @{$test->{$base_pos}}, $index, 1;
			for (0..@{$test->{base_pos}}) {
				if ($index < $test->{base_pos}[$_]-$min_index) {
					--$test->{base_pos}[$_];
				}
			}
		}
		elsif ($offset == -1) {
		# The reference sample is ahead, need to add a row to test
			for (@trace_list) {
				splice @{$test->{$_}}, $index, 0, $test->{$_}[$index];
			}
      for (0..@{$test->{base_pos}}) {
        if ($index < $test->{base_pos}[$_]-$min_index) {
          ++$test->{base_pos}[$_];
        }
			}

#			splice @{$test->{$base_pos}}, $index, 0, $test->{$base_pos}[$index];
		}
	}
	# Reset length of test sequence to correct value
	$test->{seq_length} = $#{$test->{a_trace}};
	return 1;
}
		
sub get_score {
	# Subroutine used in alignment testing - it gets the total difference between
	# the two submitted sections of array and returns it.
	my ($start, $end, $offset, $ref, $test) = @_;
	my $score;
	for my $index($start..$end) {
		return 'no_score' unless defined ($ref->{a_trace}[$index]) and defined($test->{a_trace}[$index + $offset]);
		for (@trace_list) {
			$score += abs($ref->{$_}[$index] - $test->{$_}[$index + $offset]);
		}
	}
	return $score;
}

sub differences {
	# Takes the two traces and calculates the difference between the two. Then 
	# squares this difference to highlight the larger (relevent) changes.
	# Also returns the length of the shortest sequence, so both are aligned on 
	# image generation
	my ($ref, $test) = @_;
	# Just compare up to end of shortest sequence
	my $min_index = $ref->{seq_length} < $test->{seq_length} ? $ref->{seq_length} : $test->{seq_length};
	 
	# Create a hash for the difference values
	my %diffs;
	# Loop through the traces and calculate the differences
	for my $index (0..$min_index) {
		# Need to do all four traces
		for (@trace_list) {
			# Get the difference
			my $diff = $ref->{$_}[$index] - $test->{$_}[$index];
			# Is the difference positive or negative (need to record, otherwise will 
			# be lost on squaring)
			my $sign = $diff < 0 ? -1 : 1;
			# Stop saturation by cutting off to a max value
			if (abs($diff) > 500) {$diff = 500}
			# Highlight differences by multiplying value by total values of OTHER
			# channels
			my $othersum = 1;
			my $samesum = 1;
			$diffs{$_}[$index] = $diff;
		}
		# Have now got difference for all four traces. Can accentuate real diffs
		for (@trace_list) {
			my $diff = $diffs{$_}[$index];
			my $sign = $diff < 0 ? -1 : 1;
			my $otherchannels = 1;
			# Sum all values in the other channels which have the opposite sign
			for my $channel (@trace_list) {
				next if $channel eq $_;
				my $value = $diffs{$channel}[$index];
				# Ignore if the sign is the same
				next if $value * $sign > 0;
				$otherchannels += $value;
			}
			my $finaldiff = ($sign * $diff * $diff * sqrt(abs($otherchannels))) / 5000;
			$finaldiff = $sign * 500 if abs($finaldiff) > 500;
			$diffs{$_}[$index] = $finaldiff;
		}
	}
	
	return $min_index, \%diffs;
}

sub get_image {
	# Routine to convert the supplied hash reference into a png image of a graph
	# This is the slowest section of the program by a long way (typically takes
	# around 75% of the total calculation time). The limiting factor appears to be
	# GD::Graph - optimization suggestions are welcome.
	my ($trace, $min, $max, $size, $output_file, $length, $chromatogram, $reftrace) = @_;
	
	my @labels;
	my @calls;
#	$txtlabs = GD::Graph::Data->new();
	for (0..$length) {
		push @labels, '';
		push @calls, '';
	}
	if ($chromatogram) {
		my @seq = split //, $trace->{sequence};
#		my $txtlabs = GD::Graph::Data->new();

#print "Content-type: text/html\n\n";

		for (0..@{$trace->{base_pos}}) {

			# Only mark every 10th base from 10.
#			next if $_ % 10;
			next unless $_;
			last unless $trace->{base_pos}[$_];
			last if ($trace->{base_pos}[$_] - $trace->{initial_offset}) >= $length;
			# Correct for initial offset - puts numbers in same places as for 
			# chromatogram (otherwise they can get shifted)
			my $adj_ch_ind = $trace->{base_pos}[$_-1] - $trace->{initial_offset};
			my $flat_ch_ind = $trace->{base_pos}[$_-1];
			$adj_ch_ind = $adj_ch_ind*($adj_ch_ind>0);
			$calls[$_-1] = $seq[$_-1];
			if (!($_ % 10)) {$labels[$adj_ch_ind] = $_}
			$labels[($adj_ch_ind)].=" ".$calls[$_-1];
#			print $trace->{base_pos}[$_-1].",";
#			$txtlabs->add_point($ind,$calls[($ind)];
		}
	} else { #dif spectrum
    my @refseq = split //, $reftrace->{sequence};
    for (0..@{$reftrace->{base_pos}}) {

      # Only mark every 10th base from 10.
#     next if $_ % 10;
      next unless $_;
      last unless $reftrace->{base_pos}[$_];
      last if ($reftrace->{base_pos}[$_] - $reftrace->{initial_offset}) >= $length;
      # Correct for initial offset - puts numbers in same places as for 
      # chromatogram (otherwise they can get shifted)
      my $adj_ch_ind = $reftrace->{base_pos}[$_-1] - $reftrace->{initial_offset};
      my $flat_ch_ind = $reftrace->{base_pos}[$_-1];
      $adj_ch_ind = $adj_ch_ind*($adj_ch_ind>0);
      $calls[$_-1] = callmin($trace->{a_trace}[$adj_ch_ind],$trace->{c_trace}[$adj_ch_ind],$trace->{g_trace}[$adj_ch_ind],$trace->{t_trace}[$adj_ch_ind],60);
			if (!($calls[$_-1])) {
				$calls[$_-1] = $refseq[$_-1];
				#$labels[$adj_ch_ind] = ".";
			}
      if (!($_ % 10)) {$labels[$adj_ch_ind] .= $_}
      $labels[($adj_ch_ind)].=" ".($calls[$_-1]);
			$labels[$adj_ch_ind] =~ tr/A-Za-z/a-zA-Z/;
		}
#	$labels =~ tr/A-Za-z/a-zA-Z/;
	}
#print "Content-type: text/html\n\n";
#my @temp = @ { $trace->{t_trace} };
#print $#temp;
#print join(", ", @temp);
#@temp = @ { $trace->{tr_bases} };
#print $#temp;
#print join(", ", @temp);

	
	my @plot_data = (\@labels, $trace->{a_trace}, $trace->{g_trace}, $trace->{c_trace}, $trace->{t_trace});                                    #trace tr_bases
	# Define graph format
	my $mygraph = GD::Graph::lines->new($length, $size);
	$mygraph->set(
    x_label     => '',
		x_ticks     => 0,
	  y1_label     => '',
		y1_max_value => $max,
		y1_min_value => $min,
    y2_label     => '',
    y2_max_value => 2000,
    y2_min_value => 0,
		two_axes => 0,
		# Draw datasets in 'solid', 'dashed' and 'dotted-dashed' lines
		line_types  => [1, 1, 1, 1],
		# Set the thickness of line
		line_width  => 1,
    # Set colors for datasets
		dclrs       => ['green', 'black', 'blue', 'red'],
#		show_values => set_y(1,1,undef),
		x_labels_vertical => 1,
		values_vertical => 1,
	use_axis => [1,1,1,1]
	) or warn $mygraph->error;
#	my $pldata = GD::Graph::Data->new(\@plot_data);
#	my $vals = $pldata->copy;
#	$vals->wanted(5);
#	$vals = map{($_ > 0)?1:undef} \@vals;
#	print "Content-type: text/html\n\n";
#	my $v2 = map {map {($_ > 0)?$_:undef} @$_} @$vals;
#	map {map {print $_} @$_ } @$v2;
#	$mygraph->set(show_values=>$vals);
#	$vals->map(set_y,(1,$_,undef));
#	$graph->set(show_values => set_y(1,1,undef));
#  $values->set_y(1, 1, undef);
#  $values->set_y(2, 0, undef);
	
	my $myimage = $mygraph->plot(\@plot_data) or die $mygraph->error;
	open OUT, ">".$output_file;
	print OUT $myimage->png();
	close OUT;

	return join("",@calls);
}

sub extractNearBasePos {
	my ($trace, $pos) = @_;
#	for (@trace_list) {
	my @valslist = map {$_ - $pos} @{$trace->{base_pos}};
#	for (0..@{$trace->{base_pos}})
#		$trace->{base_pos}[$_-1]
	
}

sub findminmax {
	my (@list) = @_;
	my ($mini,$maxi);
	$mini=$maxi=0;
	my @list=(1,7,2,-2,7,0,-1);
	foreach my $i (1..$#list) {
  	$mini=$i if $list[$i]<$list[$mini];
  	$maxi=$i if $list[$i]>$list[$maxi];
	}
	return ($mini,$maxi);
}

sub callmin {
	my ($a, $c, $g, $t, @thresh) = @_;
	my @list=($a,$c,$g,$t);
	my @list2=("a","c","g","t");
	my $min;
	my $mintext=0;
	foreach my $i (1..$#list) {
  	$min=$i if $list[$i]<$list[$min];
	#  $max=$i if $list[$i]>$list[$max];
	}
	if (abs($list[$min]) > @thresh) {
		$mintext=$list2[$min];
	}
	return $mintext;
	
}

sub y_format_actg { 
	my $value = shift;
	my $ret;
	if ($value >= 0) { 
		$ret = sprintf("%s", $value);
 	}
	#$graph->set( 'y2_number_format' => \&y_format ); 
}

sub demac {
	# Subroutine to strip out extra Mac headers
	for my $file ($temp_dir.$$."ref_seq", glob($temp_dir.$$."test_seq_*") ) {
		open IN, $file;
		open OUT, ">".$file."_new";
		while (<IN>) {
			if ($. == 1) {
				s/^.*ABIF/ABIF/;
				unless (/ABIF/) {error_end('abi_format')}
			}
			print OUT $_;
		}
		close IN;
		close OUT;
		rename $file."_new", $file
	}
}