diff --git a/assemble/skani.py b/assemble/skani.py new file mode 100644 index 00000000..06182cc9 --- /dev/null +++ b/assemble/skani.py @@ -0,0 +1,126 @@ +''' + SKANI - accurate, fast nucleotide identity calculation for MAGs and databases + https://github.com/bluenote-1577/skani +''' + +__author__ = "dpark@broadinstitute.org" + +import logging +import tools +import util.file +import util.misc +import csv +import os +import os.path +import shutil +import subprocess + +TOOL_NAME = "skani" + +_log = logging.getLogger(__name__) + +class UndirectedGraph: + ''' Simple utility class for finding clusters from pairwise relationships + ''' + def __init__(self): + self.edges = {} + + def add_node(self, node): + self.edges.setdefault(node, set()) + + def add_edge(self, node1, node2): + self.edges.setdefault(node1, set()).add(node2) + self.edges.setdefault(node2, set()).add(node1) + + def _dfs(self, node, visited): + visited.add(node) + cluster = set() + cluster.add(node) + for neighbor in self.edges[node]: + if neighbor not in visited: + cluster.update(self._dfs(neighbor, visited)) + return cluster + + def get_clusters(self): + visited = set() + clusters = [] + for node in self.edges.keys(): + if node not in visited: + clusters.append(self._dfs(node, visited)) + return clusters + + +class SkaniTool(tools.Tool): + + def __init__(self, install_methods=None): + if install_methods is None: + install_methods = [tools.PrexistingUnixCommand(shutil.which(TOOL_NAME), require_executability=True)] + super(SkaniTool, self).__init__(install_methods=install_methods) + + def version(self): + if self.tool_version is None: + self._get_tool_version() + return self.tool_version + + def _get_tool_version(self): + self.tool_version = subprocess.check_output([self.install_and_get_path(), '--version']).decode('UTF-8').strip().split()[1] + + def execute(self, subcommand, args, outfile, threads=None): + ''' generic execution of skani + ''' + + # build the skani command + tool_cmd = [self.install_and_get_path(), subcommand] + tool_cmd.extend(map(str, args)) + tool_cmd.extend(["-t", "{}".format(util.misc.sanitize_thread_count(threads))]) + + # run the command + _log.debug(' '.join(tool_cmd) + ' > ' + outfile) + with open(outfile, 'w') as outf: + util.misc.run_and_save(tool_cmd, outf=outf) + + def triangle(self, ref_fastas, outfile_ani, other_args = (), threads=None): + ''' skani triangle computes an all-to-all ANI distance matrix for a set of sequences + ''' + self.execute('triangle', list(ref_fastas) + list(other_args), outfile_ani, threads=threads) + + def dist(self, query_fasta, ref_fastas, outfile, other_args = (), threads=None): + ''' skani dist computes ANI distance between a specified query set of + sequences (MAGs) and reference genomes (database) + ''' + self.execute('dist', ['-q', query_fasta, '-r'] + list(ref_fastas) + list(other_args), outfile, threads=threads) + + def find_reference_clusters(self, ref_fastas, + other_args = ('-m', 50, '--no-learned-ani', '--slow', '--robust', '--detailed', '--ci', '--sparse'), + threads=None): + ''' use skani triangle to define clusters of highly-related genomes + (default settings here are for viral genomes) + ''' + g = UndirectedGraph() + for ref_fasta in ref_fastas: + g.add_node(ref_fasta) + + with util.file.tempfname('.skani_matrix.ani') as tmp_matrix_ani: + # run skani triangle + self.triangle(ref_fastas, tmp_matrix_ani, other_args, threads=threads) + + # parse the skani triangle results and define clusters + with open(tmp_matrix_ani, 'r') as inf: + for row in csv.DictReader(inf, delimiter='\t'): + g.add_edge(row['Ref_file'], row['Query_file']) + + return g.get_clusters() + + def find_closest_reference(self, contigs_fasta, ref_fastas, out_file, + other_args = ('-m', 50, '--no-learned-ani', '--slow', '--robust', '--detailed', '--ci', '-s', 85, '-n', 10, '--no-marker-index'), + threads=None): + ''' use skani dist to find the closest reference genome for each contig + (default settings here are for viral genomes) + ''' + self.dist(contigs_fasta, ref_fastas, out_file, other_args, threads=threads) + with open(out_file, 'r') as inf: + top_row = None + for row in csv.DictReader(inf, delimiter='\t'): + top_row = row + break + return (top_row['Ref_file'], top_row['ANI'], top_row['Align_fraction_ref'], top_row['Total_bases_covered']) if top_row is not None else None diff --git a/assembly.py b/assembly.py index e71bbe94..807ee560 100755 --- a/assembly.py +++ b/assembly.py @@ -41,6 +41,7 @@ import assemble.mummer import assemble.muscle import assemble.gap2seq +import assemble.skani # third-party import numpy @@ -375,10 +376,69 @@ def parser_gapfill_gap2seq(parser=argparse.ArgumentParser(description='Close gap util.cmd.attach_main(parser, gapfill_gap2seq, split_args=True) return parser - __commands__.append(('gapfill_gap2seq', parser_gapfill_gap2seq)) +def cluster_references_ani(inRefs, outClusters, threads=None): + ''' This step uses the skani triangle tool to define clusters of highly-related genomes. + ''' + skani = assemble.skani.SkaniTool() + clusters = skani.find_reference_clusters(inRefs, threads=threads) + with open(outClusters, 'w') as outf: + for cluster in clusters: + outf.write('\t'.join(cluster) + '\n') + +def parser_cluster_references_ani(parser=argparse.ArgumentParser(description='Cluster references')): + parser.add_argument('inRefs', nargs='+', help='FASTA files containing reference genomes') + parser.add_argument('outClusters', help='Output file containing clusters of highly-related genomes. Each line contains the filenames of the genomes in one cluster.') + util.cmd.common_args(parser, (('threads', None), ('loglevel', None), ('version', None), ('tmp_dir', None))) + util.cmd.attach_main(parser, cluster_references_ani, split_args=True) + return parser + +__commands__.append(('cluster_references_ani', parser_cluster_references_ani)) + + +def skani_contigs_to_refs(inContigs, inRefs, out_skani_dist, out_skani_dist_filtered, out_clusters_filtered, threads=None): + + skani = assemble.skani.SkaniTool() + clusters = skani.find_reference_clusters(inRefs, threads=threads) + skani.find_closest_reference(inContigs, inRefs, out_skani_dist, threads=threads) + refs_hit = set() + refs_hit_by_cluster = set() + + dist_header = util.file.readFlatFileHeader(out_skani_dist) + with open(out_skani_dist, 'r') as inf: + with open(out_skani_dist_filtered, 'w') as outf: + writer = csv.DictWriter(outf, dist_header, delimiter='\t', dialect=csv.unix_dialect, quoting=csv.QUOTE_MINIMAL) + for row in csv.DictReader(inf, delimiter='\t'): + refs_hit.add(row['Ref_file']) + if row['Ref_file'] not in refs_hit_by_cluster: + writer.writerow(row) + for cluster in clusters: + if row['Ref_file'] in cluster: + refs_hit_by_cluster.update(cluster) + break + + with open(out_clusters_filtered, 'w') as outf: + for cluster in clusters: + hits = list([ref for ref in cluster if ref in refs_hit]) + if hits: + outf.write('\t'.join(hits) + '\n') + +def parser_skani_contigs_to_refs(parser=argparse.ArgumentParser(description='Find closest references for contigs')): + parser.add_argument('inContigs', help='FASTA file containing contigs') + parser.add_argument('inRefs', nargs='+', help='FASTA files containing reference genomes') + parser.add_argument('out_skani_dist', help='Output file containing ANI distances between contigs and references') + parser.add_argument('out_skani_dist_filtered', help='Output file containing ANI distances between contigs and references, with only the top reference hit per cluster') + parser.add_argument('out_clusters_filtered', help='Output file containing clusters of highly-related genomes, with only clusters that have a hit to the contigs') + util.cmd.common_args(parser, (('threads', None), ('loglevel', None), ('version', None), ('tmp_dir', None))) + util.cmd.attach_main(parser, skani_contigs_to_refs, split_args=True) + return parser + +__commands__.append(('skani_contigs_to_refs', parser_skani_contigs_to_refs)) + + + def _order_and_orient_orig(inFasta, inReference, outFasta, outAlternateContigs=None, breaklen=None, # aligner='nucmer', circular=False, trimmed_contigs=None, diff --git a/test/input/TestOrderAndOrient/ref.ebov.gin.fasta b/test/input/TestOrderAndOrient/ref.ebov.gin.fasta new file mode 100644 index 00000000..9c7ed106 --- /dev/null +++ b/test/input/TestOrderAndOrient/ref.ebov.gin.fasta @@ -0,0 +1,271 @@ +>KT765130.1 Zaire ebolavirus isolate H.sapiens-wt/GIN/2014/Makona-Conakry-CREMS-1022, complete genome +CGGACACACAAAAAGAAAGAAGAATTTTTAGGATCTTTTGTGTGCGAATAACTATGAGGAAGATTAATAA +TTTTCCTCTCATTGAAATTTATATCAGAATTTAAATTGAAATTGTTACTGTAATCATACCTGGTTTGTTT +CAGAGCCATATCACCAAGATAGAGAACAACCTAGGTCTCCGGAGGGGGCAAGGGCATCAGTGTGCTCAGT +TGAAAATCCCTTGTCAACATCTAGGCCTTATCACATCACAAGTTCCGCCTTAAACTCTGCAGGGTGATCC +AACAACCTTAATAGCAACATTATTGTTAAAGGACAGCATTAGTTCACAGTCAAACAAGCAAGATTGGGAA +TTAACTTTGATTTTGAACCTGAACACCCAGAGGACTGGAGACTCAACAACCCTAAAGCCTGGGGTAAAAC +ATTAGAAATAGTTTAAAGACAAATTGCTCGGAATCACAAAATTCCGAGTATGGATTCTCGTCCTCAGAAA +GTCTGGATGACGCCGAGTCTCACTGAATCTGACATGGATTACCACAAGATCTTGACAGCAGGTCTGTCCG +TTCAACAGGGGATTGTTCGGCAAAGAGTCATCCCAGTGTATCAAGTAAACAATCTTGAGGAAATTTGCCA +ACTTATCATACAGGCCTTTGAAGCTGGTGTTGATTTTCAAGAGAGTGCGGACAGTTTCCTTCTCATGCTT +TGTCTTCATCATGCGTACCAAGGAGATTACAAACTTTTCTTGGAAAGTGGCGCAGTCAAGTATTTGGAAG +GGCACGGGTTCCGTTTTGAAGTCAAGAAGTGTGATGGAGTGAAGCGCCTTGAGGAATTGCTGCCAGCAGT +ATCTAGTGGGAGAAACATTAAGAGAACACTTGCTGCCATGCCGGAAGAGGAGACGACTGAAGCTAATGCC +GGTCAGTTCCTCTCCTTTGCAAGTCTATTCCTTCCGAAATTGGTAGTAGGAGAAAAGGCTTGCCTTGAGA +AGGTTCAAAGGCAAATTCAAGTACATGCAGAGCAAGGACTGATACAATATCCAACAGCTTGGCAATCAGT +AGGACACATGATGGTGATTTTCCGTTTGATGCGAACAAATTTTTTGATCAAATTTCTTCTAATACACCAA +GGGATGCACATGGTTGCCGGACATGATGCCAACGATGCTGTGATTTCAAATTCAGTGGCTCAAGCTCGTT +TTTCAGGTCTATTGATTGTCAAAACAGTACTTGATCATATCCTACAAAAGACAGAACGAGGAGTTCGTCT +CCATCCTCTTGCAAGGACCGCCAAGGTAAAAAATGAGGTGAACTCCTTCAAGGCTGCACTCAGCTCCCTG +GCCAAGCATGGAGAGTATGCTCCTTTCGCCCGACTTTTGAACCTTTCTGGAGTAAATAATCTTGAGCATG +GTCTTTTCCCTCAACTGTCGGCAATTGCACTCGGAGTCGCCACAGCCCACGGGAGCACCCTCGCAGGAGT +AAATGTTGGAGAACAGTATCAACGGCTCAGAGAGGCAGCCACTGAGGCTGAGAAGCAACTCCAACAATAT +GCGGAGTCTCGTGAACTTGACCATCTTGGACTTGATGATCAGGAAAAGAAAATTCTTATGAGCTTCCATC +AGAAAAAGAACGAAATCAGCTTCCAGCAAACAAACGCGATGGTAACTCTAAGAAAAGAGCGCCTGGCCAA +GCTGACAGAAGCTATCACTGCTGCATCACTGCCCAAAACAAGTGGACATTACGATGATGATGACGACATT +CCCTTTCCAGGACCCATCAATGATGACAACAATCCTGGCCATCAAGATGATGATCCGACTGACTCACAGG +ATACGACCATTCCCGATGTGGTAGTTGACCCCGATGATGGAGGCTACGGCGAATACCAAAGTTACTCGGA +AAACGGCATGAGTGCACCAGATGACTTGGTCCTATTCGATCTAGACGAGGACGACGAGGACACCAAGCCA +GTGCCTAACAGATCGACCAAGGGTGGACAACAGAAAAACAGTCAAAAGGGCCAGCATACAGAGGGCAGAC +AGACACAATCCACGCCAACTCAAAACGTCACAGGCCCTCGCAGAACAATCCACCATGCCAGTGCTCCACT +CACGGACAATGACAGAAGAAACGAACCCTCCGGCTCAACCAGCCCTCGCATGCTGACCCCAATCAACGAA +GAGGCAGACCCACTGGACGATGCCGACGACGAGACGTCTTGCCTTCCGCCCTTAGAGTCAGATGATGAAG +AACAGGACAGGGACGGAACTTCTAACCGCACACCCACTGTCGCCCCACCGGCTCCCGTATACAGAGATCA +CTCCGAAAAGAAAGAACTCCTGCAAGATGAACAACAAGATCAGGACCACATTCAAGAGGCCAAGAACCAA +GACAGTGACAACACCCAGCCAGAACATTCTTTTGAGGAGATGTATCTCCACATTCTAAGATCACAGGGGC +CATTTGATGCCGTTTTGTATTATCATATGATGAAGGATGAGCCTGTAGTTTTCAGTACCAGTGATGGTAA +AGAGTACACGTATCCGGACTCCCTTGAAGAGGAATATCCACCATGGCTCACTGAAAAAGAGGCCATGAAT +GATGAGAATAGATTTGTTACACTGGATGGTCAACAATTTTATTGGCCAGTAATGAATCACAGGAATAAAT +TCATGGCAATCCTGCAACATCATCAGTGAATGAGCATGTAATAATGGGATGATTTAATCGACAAATAGCT +AACATTAAATAGTCAAGGAACGCAAACAGGAAGAATTTTTGATGTCTAAGGTGTGAATTATTATCACAAT +AAAAGTGATTCTTAGTTTTGAATTTAAAGCTAGCTTATTATTACTAGCCGTTTTTCAAAGTTCAATTTGA +GTCTTAATGCAAATAAGCGTTAAACCACAGTTATAGCCATAATGGTAACTCAATATCTTAGCCAGCGATT +TATCTAAATTAAATTACATTATGCTTTTATAACTTACCTACTAGCCTGCCCAACATTTACACGATCGTTT +TATAATTAAGAAAAAACTAATGATGAAGATTAAAACCTTCATCATCCTTACGTCAATTGAATTCTCTAGC +ACTAGAAGCTTATTGTCTTCAATGTAAAAGAAAAGCTGGCCTAACAAGATGACAACTAGAACAAAGGGCA +GGGGCCATACTGTGGCCACGACTCAAAACGACAGAATGCCAGGCCCTGAGCTTTCGGGCTGGATCTCTGA +GCAGCTAATGACCGGAAGGATTCCTGTAAACGACATCTTCTGTGATATTGAGAACAATCCAGGATTATGC +TACGCATCCCAAATGCAACAAACGAAGCCAAACCCGAAGATGCGCAACAGTCAAACCCAAACGGACCCAA +TTTGCAATCATAGTTTTGAGGAGGTAGTACAAACATTGGCTTCATTGGCTACTGTTGTGCAACAACAAAC +CATCGCATCAGAATCATTAGAACAACGCATTACGAGTCTTGAGAATGGTCTAAAGCCAGTTTATGATATG +GCAAAAACAATCTCCTCATTGAACAGGGTTTGTGCTGAGATGGTTGCAAAATATGATCTTCTGGTGATGA +CAACCGGTCGGGCAACAGCAACCGCTGCGGCAACTGAGGCTTATTGGGCTGAACATGGTCAACCACCACC +TGGACCATCACTTTATGAAGAAAGTGCGATTCGGGGTAAGATTGAATCTAGAGATGAGACTGTCCCTCAA +AGTGTTAGGGAGGCATTCAACAATCTAGACAGTACCACTTCACTAACTGAGGAAAATTTTGGGAAACCTG +ACATTTCGGCAAAGGATTTGAGAAACATTATGTATGATCACTTGCCTGGTTTTGGAACTGCTTTCCACCA +ATTAGTACAAGTGATTTGTAAATTGGGAAAAGATAGCAATTCATTGGACATTATTCATGCTGAGTTCCAG +GCCAGCCTGGCTGAAGGAGACTCCCCTCAATGTGCCCTAATTCAAATTACAAAAAGAGTTCCAATCTTCC +AAGATGCTGCTCCACCTGTCATCCACATCCGCTCTCGAGGTGACATTCCCCGAGCTTGCCAGAAGAGCTT +GCGTCCAGTCCCACCATCACCCAAGATTGATCGAGGTTGGGTATGTGTTTTTCAGCTTCAAGATGGTAAA +ACACTTGGACTCAAAATTTGAGCCAATCTCTTTTCCCTCCGAAAGAGGCAACTAATAGCAGAGGCTTCAA +CTGCTGAACTATAGGGTATGTTACATTAATGATACACTTGTGAGTATCAGCCCTAGATAATATAAGTCAA +TTAAACAACCAAGATAAAATTGTTCATATCCCGCTAGCAGCTTTAAAGATAAATGTAATAGGAGCTATAC +CTCTGACAGTATTATAATTAATTGTTATTAAGTAACCCAAACCAAAAATGATGAAGATTAAGAAAAACCT +ACCTCGACTGAGAGAGTGTTTTTTCATTAACCTTCATCTTGTAAACGTTGAGCAAAATTGTTAAAAATAT +GAGGCGGGTTATATTGCCTACTGCTCCTCCTGAATATATGGAGGCCATATACCCTGCCAGGTCAAATTCA +ACAATTGCTAGGGGTGGCAACAGCAATACAGGCTTCCTGACACCGGAGTCAGTCAATGGAGACACTCCAT +CGAATCCACTCAGGCCAATTGCTGATGACACCATCGACCATGCCAGCCACACACCAGGCAGTGTGTCATC +AGCATTCATCCTCGAAGCTATGGTGAATGTCATATCGGGCCCCAAAGTGCTAATGAAGCAAATTCCAATT +TGGCTTCCTCTAGGTGTCGCTGATCAAAAGACCTACAGCTTTGACTCAACTACGGCCGCCATCATGCTTG +CTTCATATACTATCACCCATTTCGGCAAGGCAACCAATCCGCTTGTCAGAGTCAATCGGCTGGGTCCTGG +AATCCCGGATCACCCCCTCAGGCTCCTGCGAATTGGAAACCAGGCTTTCCTCCAGGAGTTCGTTCTTCCA +CCAGTCCAACTACCCCAGTATTTCACCTTTGATTTGACAGCACTCAAACTGATCACTCAACCACTGCCTG +CTGCAACATGGACCGATGACACTCCAACTGGATCAAATGGAGCGTTGCGTCCAGGAATTTCATTTCATCC +AAAACTTCGCCCCATTCTTTTACCCAACAAAAGTGGGAAGAAGGGGAACAGTGCCGATCTAACATCTCCG +GAGAAAATCCAAGCAATAATGACTTCACTCCAGGACTTTAAGATCGTTCCAATTGATCCAACCAAAAATA +TCATGGGTATCGAAGTGCCAGAAACTCTGGTCCACAAGCTGACCGGTAAGAAGGTGACTTCCAAAAATGG +ACAACCAATCATCCCTGTTCTTTTGCCAAAGTACATTGGGTTGGACCCGGTGGCTCCAGGAGACCTCACC +ATGGTAATCACACAGGATTGTGACACGTGTCATTCTCCTGCAAGTCTTCCAGCTGTGGTTGAGAAGTAAT +TGCAATAATTGACTCAGATCCAGTTTTACAGAATCTTCTCAGGGATAGTGATAACATCTTTTTAATAATC +CGTCTACTAGAAGAGATACTTCTAATTGATCAATATACTAAAGGTGCTTTACACCATTGTCTCTTTTCTT +TCGTAAATGTAGAGCTTAACAAAAGACTCATAATATACCTGTTTTTAAAAGATTGATTGATGAAAGATCA +TGACTAATAACATTACAAACAATCCTACTATAATCAATACGGTGATTCAAATGTCAATCTTTCTCATTGC +ACATACTCTTTGTCCTTATCCTCAAATTGCCTACATGCTTACATCTGAGGACAGCCAGTGTGACTTGGAT +TGGAGATGTGGAGGAAAAATCGGGGCCCATTTCTAAGTTGTTCACAATCTAAGTACAGACATTGCTCTTC +TAATTAAGAAAAAATCGGCGATGAAGATTAAGCCGACAGTGAGCGTAATCTTCATCTCTCTTAGATTATT +TGTCTTCCAGAGTAGGGGTCATCAGGTCCTTTTCAATTGGATAACCAAAATAAGCTTCACTAGAAGGATA +TTGTGAGGCGACAACACAATGGGTGTTACAGGAATATTGCAGTTACCTCGTGATCGATTCAAGAGGACAT +CATTCTTTCTTTGGGTAATTATCCTTTTCCAAAGAACATTTTCCATCCCGCTTGGAGTTATCCACAATAG +TACATTACAGGTTAGTGATGTCGACAAACTAGTTTGTCGTGACAAACTGTCATCCACAAATCAATTGAGA +TCAGTTGGACTGAATCTCGAGGGGAATGGAGTGGCAACTGACGTGCCATCTGTGACTAAAAGATGGGGCT +TCAGGTCCGGTGTCCCACCAAAGGTGGTCAATTATGAAGCTGGTGAATGGGCTGAAAACTGCTACAATCT +TGAAATCAAAAAACCTGACGGGAGTGAGTGTCTACCAGCAGCGCCAGACGGGATTCGGGGCTTCCCCCGG +TGCCGGTATGTGCACAAAGTATCAGGAACGGGACCATGTGCCGGAGACTTTGCCTTCCACAAAGAGGGTG +CTTTCTTCCTGTATGATCGACTTGCTTCCACAGTTATCTACCGAGGAACGACTTTCGCTGAAGGTGTCGT +TGCATTTCTGATACTGCCCCAAGCTAAGAAGGACTTCTTCAGCTCACACCCCTTGAGAGAGCCGGTCAAT +GCAACGGAGGACCCGTCGAGTGGCTATTATTCTACCACAATTAGATATCAGGCTACCGGTTTTGGAACTA +ATGAGACAGAGTACTTGTTCGAGGTTGACAATTTGACCTACGTCCAACTTGAATCAAGATTCACACCACA +GTTTCTGCTCCAGCTGAATGAGACAATATATGCAAGTGGGAAGAGGAGCAACACCACGGGAAAACTAATT +TGGAAGGTCAACCCCGAAATTGATACAACAATCGGGGAGTGGGCCTTCTGGGAAACTAAAAAAACCTCAC +TAGAAAAATTCGCAGTGAAGAGTTGTCTTTCACAGCTGTATCAAACGGACCCAAAAACATCAGTGGTCAG +AGTCCGGCGCGAACTTCTTCCGACCCAGAGACCAACACAACAAATGAAGACCACAAAATCATGGCTTCAG +AAAATTCCTCTGCAATGGTTCAAGTGCACAGTCAAGGAAGGAAAGCTGCAGTGTCGCATCTGACAACCCT +TGCCACAATCTCCACGAGTCCTCAACCTCCCACAACCAAAACAGGTCCGGACAACAGCACCCATAATACA +CCCGTGTATAAACTTGACATCTCTGAGGCAACTCAAGTTGGACAACATCACCGTAGAGCAGACAACGACA +GCACAGCCTCCGACACTCCCCCCGCCACGACCGCAGCCGGACCCTTAAAAGCAGAGAACACCAACACGAG +TAAGAGCGCTGACTCCCTGGACCTCGCCACCACGACAAGCCCCCCAAACTACAGCGAGACTGCTGGCAAC +AACAACACTCATCACCAAGATACCGGAGAAGAGAGTGCCAGCAGCGGGAAGCTAGGCTTAATTACCAATA +CTATTGCTGGAGTAGCAGGACTGATCACAGGCGGGAGAAGGACTCGAAGAGAAGTAATTGTCAATGCTCA +ACCCAAATGCAACCCCAATTTACATTACTGGACTACTCAGGATGAAGGTGCTGCAATCGGATTGGCCTGG +ATACCATATTTCGGGCCAGCAGCCGAAGGAATTTACACAGAGGGGCTAATGCACAACCAAGATGGTTTAA +TCTGTGGGTTGAGGCAGCTGGCCAACGAAACGACTCAAGCTCTCCAACTGTTCCTGAGAGCCACAACTGA +GCTGCGAACCTTTTCAATCCTCAACCGTAAGGCAATTGACTTCCTGCTGCAGCGATGGGGTGGCACATGC +CACATTTTGGGACCGGACTGCTGTATCGAACCACATGATTGGACCAAGAACATAACAGACAAAATTGATC +AGATTATTCATGATTTTGTTGATAAAACTCTTCCGGACCAGGGGGACAATGACAATTGGTGGACAGGATG +GAGACAATGGATACCGGCAGGTATTGGAGTTACAGGTGTTATAATTGCAGTTATCGCTTTATTCTGTATA +TGCAAATTTGTCTTTTAGTCTTTCTTCAGATTGTTTCACGGCAAAACTCAACCTCAAATCAATGAAACTA +GGATTTAATTATATGAATCACTTGAATCTAAGATTACTTGACAAATGATAACATAATACACTGGAGCTTC +AAACATAGCCAATGTGATTCTAACTCCTTTAAACTCACAGTTAATCATAAACAAGGTTTGACATCAATCT +AGCTATATCTTTAAGAATGATAAACTTGATGAAGATTAAGAAAAAGGTAATCTTTCGATTATCTTTAGTC +TTCATCCTTGATTCTACAATCATGACAGTTGTCTTTAATGAAAAAGGAAAAAAGCCTTTTTATTAAGTTG +TAATAATCAGATCTGCAAACCGGTAGAATTTAGTTGTAACCTAACACACACAAAGCATTGGTAAAAAAGT +CAATAGAAATTTAAACAGTGAGTGCAGACAACTCTTAAATGGAAGCTTCATATGAGAGAGGACGCCCCCG +AGCTGCCAGACAGCATTCAAGGGATGGACACGACCACCATGTTCGAGCACGATCATCATCCAGAGAGAAT +TATCGAGGTGAGTACCGTCAATCAAGGAGCGCCTCACAAGTGCGCGTTCCTACTGTATTTCATAAGAAGA +GAGTTGAACCATTAACAGTTCCTCCAGCACCTAAAGACATATGTCCGACCTTGAAAAAAGGATTTTTGTG +TGACAGTAGTTTTTGCAAAAAAGACCACCAGTTAGAAAGTTTAACTGATAGGGAATTACTCCTACTAATC +GCCCGTAAGACTTGTGGATCAGTAGAACAACAATTAAATATAACTGCACCCAAGGACTCGCGCTTAGCAA +ATCCAACGGCTGATGATTTCCAGCAAGAGGAAGGTCCCAAAATTACCTTGTTGACACTGATCAAGACGGC +AGAACACTGGGCGAGACAAGACATCCGAACCATAGAGGATTCCAAATTAAGGGCATTGTTAACTCTATGT +GCTGTGATGACGAGGAAATTCTCAAAATCCCAGCTGAGTCTTTTGTGTGAGACACACCTAAGGCGCGAAG +GGCTTGGGCAAGATCAGGCAGAACCCGTTCTCGAAGTATATCAACGATTACACAGTGATAAAGGAGGCAG +TTTTGAAGCTGCACTATGGCAACAATGGGACCGACAATCCCTAATTATGTTTATCACTGCATTCTTGAAT +ATCGCTCTCCAGTTACCGTGTGAGAGTTCTGCTGTCGTTGTTTCAGGGTTAAGAACATTGGTTCCTCAAT +CAGATAATGAGGAAGCTTCAACCAACCCGGGGACATGCTCATGGTCTGATGAGGGTACCCCTTAATAAGG +CTGACTAAAACACTATATAACCTTCTACTTGATCACAATACTCCGTATACCTATCATCATATATTTAATC +AAGACGATATCCTTTAAAACTTATTCAGTACTATAATCACTCTCATTTCAAATTGATAAGATATGCATAA +TTGCCTTAATATATAAAGAGGTATGATATAACCCAAACATTGACCAAAGAAAATCATAATCTCGTATCGC +TCGCAATATAACCTGCCAAGCATACCTCTTGCACAAAGTGATTCTTGTACACAAATAATGTTTGACTCTA +CAGGAGGTAGCAACGATCCATCTCATCAAAAAATAAGTATTTTATGATTTACTAATGATCTCTTAAAATA +TTAAGAAAAACTGACGGAACATAAATTCTTTCTGCTTCAAGTTGTGGAGGAGGTCTATGGTATTCGCTAT +TGTTATATTACAATCAATAACAAGCTTGTAAAAATATTGTTCTTGTTTCAGGAGGTATATTGTGACCGGA +AAAGCTAAACTAATGATGAAGATTAATGCGGAGGTCTGATGAGAATAAACCTTATTATTCAGATTAGGCC +CCAAGAGGCATTCTTCATCTCCTTTTAGCAAAATACTATTTCAGGATAGTCCAGCTAGTGACACGTCTTT +TAGCTGTATACCAGTTGCCCCTGAGATACGCCACAAAAGTGTCTCTGAGCTAAAGTGGTCTGTACACATC +TCATACATTGTATTAGGGGCAATAATATCTAATTGAACTTAGCCATTTAAAATTTAGTGCATAAATCTGG +GCTAACTCCACCAGGTCAACTCCATTGGCTGAAAAGAAGCCCACCTACAACGAACATTACTTTGAGCACC +CTCACAATTAAAAAATAAGAGCGTCGTTCCAACAATCGAGCGCAAGGTTACAAGGTTGAACTGAGAGTGT +CTAGACAACAAAATATCGATACTCCAGACACCAAGCAAGACCTGAGAAAAAACCATGGCCAAAGCTACGG +GACGATACAATCTAATATCGCCCAAAAAGGACCTGGAGAAAGGGGTTGTCTTAAGCGACCTCTGTAACTT +CTTAGTTAGTCAAACTATTCAAGGGTGGAAAGTTTATTGGGCTGGTATTGAGTTTGATGTGACTCACAAA +GGAATGGCCCTATTGCATAGACTGAAAACTAATGACTTTGCCCCTGCATGGTCAATGACAAGGAACCTAT +TTCCCCATTTATTTCAAAATCCGAATTCCACTATTGAATCACCGCTGTGGGCACTGAGAGTCATCCTTGC +AGCAGGGATACAGGACCAGTTAATTGACCAGTCTTTGATTGAACCCTTAGCAGGAGCCCTTGGTCTGATC +TCTGATTGGCTGCTAACAACCAACACTAACCATTTCAACATGCGAACACAACGTGTCAAGGAACAATTGA +GCCTAAAAATGCTGTCGTTGATTCGATCCAATATTCTCAAGTTTATTAACAAATTGGATGCTCTACATGT +CGTGAACTACAATGGATTATTGAGCAGTATTGAAATTGGAACTCAAAATCATACAATCATCATAACTCGA +ACTAACATGGGTTTTCTGGTGGAGCTCCAAGAACCCGACAAATCGGCAATGAACCGCAAGAAGCCTGGGC +CGGCGAAATTTTCCCTCCTTCATGAGTCCACACTGAAAGCATTTACACAAGGGTCCTCGACACGAATGCA +AAGTTTAATTCTTGAATTCAATAGCTCTCTTGCTATCTAACTAAGATGGAATACTTCATATTGGGCTAAC +TCATATATGCTGACTCAATAGTTAACTTGACATCTCTGCCTTCATAATCAGATATATAAGCATAATAAAT +AAATACTCATATTTCTTGATAATTTGTTTAACCACAGATAAATCCTCACTGTAAGCCAGCTTCCAAGTTG +ACACCCTTACAAAAACCAGGACTCAGAATCCCTCAAATAAGAGATTCCAAGACAACATCATAGAATTGCT +TTATTATATTAATAAGCATTTTATCACTAGAAATCCAATATACGAAATGGTTAATTGTAACTAAACCCGC +AGGTCATGTGTGTTAGGTTTCACAAATTATATATATTACTAACTCCATACTCGTAACTAACATTAGATAA +GTAGGTTAAGAAAAAAGCTTGAGGAAGATTAAGAAAAACTGCTTATTGGGTCTTTCCGTGTTTTAGATGA +AGCAGTTGACATTCTTCCTCTTGATATTAAATGGCTACACAACATACCCAATACCCAGACGCCAGGTTAT +CATCACCAATTGTATTGGACCAATGTGACCTTGTCACTAGAGCTTGCGGGTTGTATTCATCATACTCCCT +TAATCCGCAACTACGCAACTGTAAACTCCCGAAACATATATACCGTTTAAAATATGATGTAACTGTTACC +AAGTTCTTAAGTGATGTACCAGTGGCGACATTGCCCATAGATTTCATAGTCCCAATTCTTCTCAAGGCAC +TATCAGGCAATGGGTTCTGTCCTGTTGAGCCGCGGTGCCAACAGTTCTTAGATGAAATTATTAAGTACAC +AATGCAAGATGCTCTCTTCCTGAAATATTATCTCAAAAATGTGGGTGCTCAAGAAGACTGTGTTGATGAC +CACTTTCAAGAAAAAATCTTATCTTCAATTCAGGGCAATGAATTTTTACATCAAATGTTTTTCTGGTATG +ACCTGGCTATTTTAACTCGAAGGGGTAGATTAAATCGAGGAAACTCTAGATCAACGTGGTTTGTTCATGA +TGATTTAATAGACATCTTAGGCTATGGGGACTATGTTTTTTGGAAGATCCCAATTTCACTGTTACCACTG +AACACACAAGGAATCCCCCATGCTGCTATGGATTGGTATCAGACATCAGTATTCAAAGAAGCGGTTCAAG +GGCATACACACATTGTTTCTGTTTCTACTGCCGATGTCTTGATAATGTGCAAAGATTTAATTACATGTCG +ATTCAACACAACTCTAATCTCAAAAATAGCAGAGGTTGAGGACCCAGTTTGCTCTGATTATCCCAATTTT +AAGATTGTGTCTATGCTTTACCAGAGCGGAGATTACTTACTCTCCATATTAGGGTCTGATGGGTATAAAA +TCATTAAGTTTCTCGAACCATTGTGCTTGGCTAAAATTCAATTGTGCTCAAAGTACACCGAGAGGAAGGG +CCGATTCTTAACACAAATGCATTTAGCTGTAAATCACACCCTGGAAGAAATTACAGAAATACGTGCACTA +AAGCCTTCACAGGCTCACAAGATCCGTGAATTCCATAGAACATTGATAAGGCTGGAGATGACGCCACAAC +AACTTTGTGAGCTATTTTCCATACAAAAACACTGGGGGCATCCTGTGCTACATAGTGAAACAGCAATCCA +AAAAGTTAAAAAACATGCTACGGTGCTAAAAGCATTACGCCCTATCGTGATTTTCGAGACATATTGTGTT +TTTAAATATAGCATTGCAAAACATTATTTTGATAGTCAAGGATCTTGGTACAGTGTTACCTCAGATAGAA +ATCTAACACCAGGTCTTAATTCTTATATCAAAAGAAATCAATTCCCTCCGTTGCCAATGATTAAAGAACT +GCTATGGGAATTTTACCACCTTGACCATCCTCCACTTTTCTCAACCAAAATTATTAGTGACTTAAGTATT +TTTATAAAAGACAGAGCTACTGCAGTAGAAAGGACATGCTGGGATGCAGTATTCGAGCCTAATGTTCTGG +GATATAATCCACCTCACAAATTCAGTACCAAACGTGTACCGGAACAATTTTTAGAGCAAGAAAACTTTTC +TATTGAGAATGTTCTTTCCTACGCGCAAAAACTCGAGTATCTACTACCACAATATCGGAATTTTTCTTTC +TCATTGAAAGAGAAAGAGTTGAATGTAGGTAGAACTTTCGGAAAATTGCCTTATCCGACTCGCAATGTTC +AAACACTTTGTGAAGCTCTGTTAGCTGATGGTCTTGCTAAAGCATTTCCTAGCAATATGATGGTAGTTAC +GGAACGTGAACAAAAAGAAAGCTTATTGCATCAAGCATCATGGCACCACACAAGTGATGATTTCGGTGAG +CATGCCACAGTTAGAGGGAGTAGCTTTGTAACTGATTTAGAGAAATACAATCTTGCATTTAGGTATGAAT +TTACAGCACCTTTTATAGAATATTGCAACCGTTGCTATGGTGTTAAGAATGTTTTTAATTGGATGCATTA +TACAATCCCACAGTGTTATATGCATGTCAGTGATTATTATAATCCACCGCATAACCTCACACTGGAAAAT +CGAAACAACCCCCCTGAAGGGCCTAGTTCATACAGGGGTCATATGGGAGGGATTGAAGGACTGCAACAAA +AACTCTGGACAAGTATTTCATGTGCTCAAATTTCTTTAGTTGAAATTAAGACTGGCTTTAAGTTGCGCTC +AGCTGTGATGGGTGACAATCAGTGCATTACCGTTTTATCAGTCTTCCCCTTAGAGACTGATGCAGGCGAG +CAGGAACAGAGCGCCGAGGACAATGCAGCGAGGGTGGCCGCCAGCCTAGCAAAAGTTACAAGTGCCTGTG +GAATCTTTTTAAAACCTGATGAAACATTTGTACATTCAGGTTTTATCTATTTTGGAAAAAAACAATATTT +GAATGGGGTCCAATTGCCCCAGTCCCTTAAAACGGCTACAAGAATGGCACCATTGTCTGATGCAATTTTT +GATGATCTTCAAGGGACCCTGGCTAGTATAGGTACTGCTTTTGAGCGATCCATCTCTGAGACACGACATA +TCTTTCCTTGCAGAATAACCGCAGCTTTCCATACGTTCTTTTCGGTGAGAATCTTGCAATATCATCACCT +CGGATTTAATAAAGGTTTTGACCTTGGACAGTTAACACTCGGCAAACCTCTGGATTTCGGAACAATATCA +TTGGCACTAGCGGTACCGCAGGTGCTTGGAGGGTTATCCTTCTTGAATCCTGAGAAATGTTTCTACCGGA +ATCTAGGAGATCCAGTTACCTCAGGTTTATTCCAGTTAAAAACTTATCTCCGAATGATTGAGATGGATGA +TTTATTCTTACCTTTAATTGCGAAGAACCCTGGGAACTGCACTGCCATTGACTTTGTGCTAAATCCTAGC +GGATTAAATGTTCCTGGGTCGCAAGACTTAACTTCATTTCTGCGCCAGATTGTACGTAGGACTATCACCC +TAAGTGCGAAAAACAAACTTATTAATACCTTATTTCATGCATCAGCTGACTTCGAAGACGAAATGGTTTG +TAAGTGGCTCTTATCATCAACTCCTGTTATGAGTCGTTTCGCAGCCGATATATTTTCACGCACGCCGAGC +GGGAAGCGATTGCAAATTCTAGGATACTTGGAAGGAACACGCACATTATTAGCCTCTAAGATCATCAACA +ATAATACAGAGACGCCGGTTTTGGACAGACTGAGGAAGATAACATTGCAAAGGTGGAGTCTATGGTTTAG +TTATCTTGATCATTGTGATAATATCCTGGCGGAGGCTTTAACCCAAATAACTTGCACAGTTGATTTAGCA +CAGATCCTGAGGGAATATTCATGGGCACATATTTTAGAGGGGAGACCTCTTATTGGAGCCACACTCCCAT +GTATGATTGAGCAATTCAAAGTGGTTTGGCTGAAACCCTACGAACAATGTCCGCAGTGTTCAAATGCCAA +GCAACCTGGTGGGAAACCATTCGTGTCAGTAGCAGTCAAGAAACATATTGTTAGTGCATGGCCAAATGCA +TCCCGAATAAGCTGGACTATCGGGGATGGAATCCCATACATTGGATCAAGGACAGAAGATAAGATAGGGC +AACCTGCTATTAAACCAAAATGTCCTTCCGCAGCCTTAAGAGAGGCCATTGAATTGGCGTCCCGTTTAAC +ATGGGTAACTCAAGGCAGTTCGAACAGTGACTTGCTAATAAAACCATTTTTGGAAGCACGAGTAAATTTA +AGTGTTCAAGAAATACTTCAAATGACCCCTTCACATTACTCGGGAAATATTGTTCATAGGTACAACGATC +AATACAGTCCTCATTCTTTCATGGCCAATCGTATGAGTAACTCAGCAACGCGATTGATTGTTTCTACAAA +CACTTTAGGTGAGTTTTCAGGAGGTGGCCAATCGGCACGCGACAGCAATATTATTTTCCAGAATGTTATA +AATTATGCAGTTGCACTGTTCGATATTAAATTTAGAAACACTGAGGCTACAGATATCCAGTATAATCGTG +CTCACCTTCATCTAACTAAGTGTTGCACCCGGGAGGTACCAGCTCAGTACTTAACATACACATCTACATT +GGATTTAGATTTAACAAGATACCGAGAAAATGAATTGATTTATGACAATAATCCTCTAAAAGGAGGACTC +AATTGCAATATCTCATTTGATAACCCATTTTTCCAAGGCAAACAGCTGAACATTATAGAAGATGACCTTA +TTCGACTGCCTCACTTATCTGGATGGGAGCTAGCTAAGACCATCATGCAATCAATTATTTCAGATAGCAA +TAATTCGTCTACAGACCCAATTAGCAGTGGAGAAACAAGATCATTCACTACCCATTTCTTAACTTATCCC +AAAATAGGACTTCTGTACAGTTTTGGGGCCTTTGTAAGTTATTATCTTGGCAATACAATTCTTCGGACTA +AGAAATTAACACTTGACAATTTTTTATATTACTTAACTACCCAAATTCATAATCTACCACATCGCTCATT +GCGAATACTTAAGCCAACATTCAAACATGCAAGCGTTATGTCACGATTAATGAGTATTGATCCCCATTTT +TCTATTTACATAGGCGGTGCTGCAGGTGACAGAGGACTCTCAGATGCGGCCAGGTTATTTTTGAGAACGT +CCATTTCATCTTTTCTTACATTTGTAAAGGAATGGATAATTAATCGCGGAACAATTGTCCCTTTATGGAT +AGTATATCCATTAGAGGGTCAAAATCCAACACCTGTTAATAATTTCCTCCATCAGATCGTAGAACTGCTG +GTGCATGATTCATCAAGACACCAGGCTTTTAAAACTACCATAAATGATCATGTACATCCTCACGACAATC +TTGTTTACACATGTAAGAGTACAGCCAGCAATTTCTTCCATGCGTCATTGGCGTACTGGAGGAGCAGGCA +CAGAAACAGCAACCGAAAAGACTTGACAAGAAACTCTTCAACTGGATCAAGCACAAACAACAGTGATGGT +CATATTAAGAGAAGTCAAGAACAAACCACCAGAGATCCACATGATGGCACTGAACGGAGTCTAGTCCTGC +AAATGAGCCATGAAATAAAAAGAACGACAATTCCACAAGAGAACACGCACCAGGGTCCGTCGTTCCAGTC +ATTTCTAAGTGACTCTGCTTGCGGTACAGCAAACCCAAAACTAAATTTCGATAGATCGAGACACAATGTG +AAATCTCAGGATCATAACTCAGCATCCAAGAGGGAAGGTCATCAAATAATCTCACATCGTCTAGTCCTAC +CTTTCTTTACATTATCTCAAGGGACACGCCAATTAACGTCATCCAATGAGTCACAAACCCAAGATGAGAT +ATCAAAGTACTTACGGCAATTGAGATCCGTCATTGATACCACAGTTTATTGTAGGTTTACCGGTATAGTC +TCGTCCATGCATTACAAACTTGATGAGGTCCTTTGGGAAATAGAGAATTTTAAGTCGGCTGTGACGCTGG +CAGAGGGAGAAGGTGCTGGTGCCTTACTATTGATTCAGAAATACCAAGTTAAGACCTTATTCTTCAACAC +GCTAGCTACTGAGTCCAGTATAGAGTCAGAAATAGTATCAGGAATGACTACTCCTAGGATGCTTCTACCT +GTTATGTCAAAATTCCATAATGACCAAATTGAGATTATTCTTAACAACTCAGCAAGCCAAATAACAGACA +TAACAAATCCTACTTGGTTTAAAGACCAAAGAGCAAGGCTACCTAGGCAAGTCGAGGTTATAACCATGGA +TGCAGAGACGACAGAGAATATAAACAGATCGAAATTGTACGAAGCTGTACATAAATTGATCTTACACCAT +GTTGATCCCAGCGTATTGAAAGCAGTGGTCCTTAAAGTCTTTCTAAGTGATACCGAGGGTATGTTATGGC +TAAATGATAATCTAGCCCCGTTTTTTGCCACTGGGTATTTAATTAAGCCAATAACGTCAAGTGCCAGGTC +TAGTGAGTGGTATCTTTGTCTGACGAACTTCTTATCAACTACACGTAAGATGCCACACCAAAACCATCTC +AGTTGTAAGCAGGTAATACTTACGGCATTGCAACTGCAAATTCAACGGAGCCCATACTGGCTAAGTCATT +TAACTCAGTATGCTGACTGCGATTTACATTTAAGCTATATCCGCCTTGGTTTTCCATCATTAGAGAAAGT +ATTATACCACAGGTATAACCTTGTCGATTCAAAAAGAGGTCCACTAGTCTCTGTCACTCAGCACTTAGCA +CATCTTAGGGCAGAGATTCGAGAATTGACCAATGATTATAATCAACAGCGACAAAGTCGGACTCAAACAT +ATCACTTTATTCGTACTGCAAAAGGACGAATCACAAAACTAGTCAATGATTATTTAAAATTCTTTCTTAT +TGTACAAGCATTAAAACATAATGGGACATGGCAAGCTGAGTTTAAGAAATTACCAGAGTTGATTAGTGTG +TGCTATAGGTTCTATCATATTAGAGATTGTAATTGTGAAGAACGTTTCTTAGTTCAAACCTTATATTTAC +ATAGAATGCAGGATTCTGAAGTTAAGCTTATCGAAAGGCTGACAGGGCTTCTGAGTTTATTTCCAGATGG +TCTCTACAGGTTCGATTGAATAACCGTGCATAGTATTTTGATACTTGTAAAGGTTGGTTATCAACATACA +GATTATAAAAAACTCATAAATTGCTCTCATACATCATCTTGATCTGATTTCAATAAATAACTATTTAGAT +AACGAAAGGAGTCCTTACATTATACACTATATTTGGCCTCTCTCCCTGCGTGATAATCAAAAATTCACAA +TACAGCATGTGTGACATATTACTGCTGCAATGAGTCTAACGCAACATAATAAACTCCGCACTCTTTATAA +TTAAGCTTTAACGATAGGTCTGGGCTCATATTGTTATTGATATAGTAATGTTGTATCAATATCTTGCCAG +ATGGAATAGTGCTTTGGTTGATAACACGACTTCTTAAAACAAAACTGATCTTTAAGATTAAGTTTTTTAT +AATTGTCATTGCTTTAATTTGTCGATTTAAAAATGGTGATAGCCTTAATCTTTGTGTAAAATAAGAGATT +AGGTGTAATAACTTTAACATTTTTGTCTAGTAAGCTACTATTCCATTCAGAATGATAAAATTAAAAGAAA +AGACAGGACTGTAAAATCAGAAATACCTTCTTTACAATATACCAGACTAGACAATAATCTTCGTGTTAAT +GATAATTAAGACATTGACCACGCTCATCAGAAGGCTC diff --git a/test/input/TestOrderAndOrient/ref.ebov.lbr.fasta b/test/input/TestOrderAndOrient/ref.ebov.lbr.fasta new file mode 100644 index 00000000..53277862 --- /dev/null +++ b/test/input/TestOrderAndOrient/ref.ebov.lbr.fasta @@ -0,0 +1,271 @@ +>KX009902.1 Zaire ebolavirus isolate Ebola virus/H.sapiens-wt/LBR/2014/Makona-CDC/NIH-1122, partial genome +TTTGTGTGCGAATAACTATGAGGAAGATTAATAATTTTCCTCTCATTGAAATTTATATCGGAATTTAAAT +TGAAATTGTTACTGTAATCATACCTGGTTTGTTTCAGAGCCATATCACCAAGATAGAGAACAACCTAGGT +CTCCGGAGGGGGCAAGGGCATCAGTGTGCTCAGTTGAAAATCCCTTGTCAACATCTAGGCCTTATCACAT +CACAAGTTCCGCCTTAAACTCTGCAGGGTGATCCAACAACCTTAATAGCAACATTATTGTTAAAGGACAG +CATTAGTTCACAGTCAAACAAGCAAGATTGAGAATTAACTTTGATTTTGAACCTGAACACCCAGAGGACT +GGAGACTCAACAACCCTAAAGCCTGGGGTAAAACATTAGAAATAGTTTAAAGACAAATTGCTCGGAATCA +CAAAATTCCGAGTATGGATTCTCGTCCTCAGAAAGTCTGGATGACGCCGAGTCTCACTGAATCTGACATG +GATTACCACAAGATCTTGACAGCAGGTCTGTCCGTTCAACAGGGGATTGTTCGGCAAAGAGTCATCCCAG +TGTATCAAGTAAACAATCTTGAGGAAATTTGCCAACTTATCATACAGGCCTTTGAAGCTGGTGTTGATTT +TCAAGAGAGTGCGGACAGTTTCCTTCTCATGCTTTGTCTTCATCATGCGTACCAAGGAGATTACAAACTT +TTCTTGGAAAGTGGCGCAGTCAAGTATTTGGAAGGGCACGGGTTCCGTTTTGAAGTCAAGAAGTGTGATG +GAGTGAAGCGCCTTGAGGAATTGCTGCCAGCAGTATCTAGTGGGAGAAACATTAAGAGAACACTTGCTGC +CATGCCGGAAGAGGAGACGACTGAAGCTAATGCCGGTCAGTTCCTCTCCTTTGCAAGTCTATTCCTTCCG +AAATTGGTAGTAGGAGAAAAGGCTTGCCTTGAGAAGGTTCAAAGGCAAATTCAAGTACATGCAGAGCAAG +GACTGATACAATATCCAACAGCTTGGCAATCAGTAGGACACATGATGGTGATTTTCCGTTTGATGCGAAC +AAATTTTTTGATCAAATTTCTTCTAATACACCAAGGGATGCACATGGTTGCCGGACATGATGCCAACGAT +GCTGTGATTTCAAATTCAGTGGCTCAAGCTCGTTTTTCAGGTCTATTGATTGTCAAAACAGTACTTGATC +ATATCCTACAAAAGACAGAACGAGGAGTTCGTCTCCATCCTCTTGCAAGGACCGCCAAGGTAAAAAATGA +GGTGAACTCCTTCAAGGCTGCACTCAGCTCCCTGGCCAAGCATGGAGAGTATGCTCCTTTCGCCCGACTT +TTGAACCTTTCTGGAGTAAATAATCTTGAGCATGGTCTTTTCCCTCAACTGTCGGCAATTGCACTCGGAG +TCGCCACAGCCCACGGGAGCACCCTCGCAGGAGTAAATGTTGGAGAACAGTATCAACAGCTCAGAGAGGC +AGCCACTGAGGCTGAGAAGCAACTCCAACAATATGCGGAGTCTCGTGAACTTGACCATCTTGGACTTGAT +GATCAGGAAAAGAAAATTCTTATGAACTTCCATCAGAAAAAGAACGAAATCAGCTTCCAGCAAACAAACG +CGATGGTAACTCTCAGAAAAGAGCGCCTGGCCAAGCTGACAGAAGCTATCACTGCTGCATCACTGCCCAA +AACAAGTGGACATTACGATGATGATGACGACATTCCCTTTCCAGGACCCATCAATGATGACGACAATCCT +GGCCATCAAGATGATGATCCGACTGACTCACAGGATACGACCATTCCCGATGTGGTAGTTGACCCCGATG +ATGGAGGCTACGGCGAATACCAAAGTTACTCGGAAAACGGCATGAGTGCACCAGATGACTTGGTCCTATT +CGATCTAGACGAGGACGACGAGGACACCAAGCCAGTGCCTAACAGATCGACCAAGGGTGGACAACAGAAA +AACAGTCAAAAGGGCCAGCATACAGAGGGCAGACAGACACAATCCACGCCAACTCAAAACGTCACAGGCC +CTCGCAGAACAATCCACCATGCCAGTGCTCCACTCACGGACAATGACAGAAGAAACGAACCCTCCGGCTC +AACCAGCCCTCGCATGCTGACCCCAATCAACGAAGAGGCAGACCCACTGGACGATGCCGACGACGAGACG +TCTAGCCTTCCGCCCTTAGAGTCAGATGATGAAGAACAGGACAGGGACGGAACTTCTAACCGCACACCCA +CTGTCGCCCCACCGGCTCCCGTATACAGAGATCACTCCGAAAAGAAAGAACTCCCGCAAGATGAACAACA +AGATCAGGACCACATTCAAGAGGCCAGGAACCAAGACAGTGACAACACCCAGCCAGAACATTCTTTTGAG +GAGATGTATCGCCACATTCTAAGATCACAGGGGCCATTTGATGCCGTTTTGTATTATCATATGATGAAGG +ATGAGCCTGTAGTTTTCAGTACCAGTGATGGTAAAGAGTACACGTATCCGGACTCCCTTGAAGAGGAATA +TCCACCATGGCTCACTGAAAAAGAGGCCATGAATGATGAGAATAGATTTGTTACACTGGATGGTCAACAA +TTTTATTGGCCAGTAATGAATCACAGGAATAAATTCATGGCAATCCTGCAACATCATCAGTGAATGAGCA +TGTAATAATGGGATGATTTAATCGACAAATAGCTAACATTAAATAGTCAAGGAACGCAAACAGGAAGAAT +TTTTGATGTCTAAGGTGTGAATTATTATCACAATAAAAGTGATTCTTAGTTTTGAATTTAAAGCTAGCTT +ATTATTACTAGCCGTTTTTCAAAGTTCAATTTGAGTCTTAATGCAAATAAGCGTTAAGCCACAGTTATAG +CCATAATGGTAACTCAATATCTTAGCCAGCGATTTATCTAAATTAAATTACATTATGCTTTTATAACTTA +CCTACTAGCCTGCCCAACATTTACACGATCGTTTTATAATTAAGAAAAAACTAATGATGAAGATTAAAAC +CTTCATCATCCTTACGTCAATTGAATTCTCTAGCACTAGAAGCTTATTGTCTTCAATGTAAAAGAAAAGC +TGGCCTAACAAGATGACAACTAGAACAAAGGGCAGGGGCCATACTGTGGCCACGACTCAAAACGACAGAA +TGCCAGGCCCTGAGCTTTCGGGCTGGATCTCTGAGCAGCTAATGACCGGAAGGATTCCTGTAAACGACAT +CTTCTGTGATATTGAGAACAATCCAGGATTATGCTACGCATCCCAAATGCAACAAACGAAGCCAAACCCG +AAGATGCGCAACAGTCAAACCCAAACGGACCCAATTTGCAATCATAGTTTTGAGGAGGTAGTACAAACAT +TGGCTTCATTGGCTACTGTTGTGCAACAACAAACCATCGCATCAGAATCATTAGAACAACGCATTACGAG +TCTTGAGAATGGTCTAAAGCCAGTTTATGATATGGCAAAAACAATCTCCTCATTGAACAGGGTTTGTGCT +GAGATGGTTGCAAAATATGATCTTCTGGTGATGACAACCGGTCGGGCAACAGCAACCGCTGCGGCAACTG +AGGCTTATTGGGCTGAACATGGTCAACCACCACCTGGACCATCACTTTATGAAGAAAGTGCGATTCGGGG +TAAGATTGAATCTAGAGATGAGACTGTCCCTCAAAGTGTTAGGGAGGCATTCAACAATCTAGACAGTACC +ACTTCACTAACTGAGGAAAATTTTGGGAAACCTGACATTTCGGCAAAGGATTTGAGAAACATTATGTATG +ATCACTTGCCTGGTTTTGGAACTGCTTTCCACCAATTAGTACAAGTGATTTGTAAATTGGGAAAAGATAG +CAATTCATTGGACATTATTCATGCTGAGTTCCAGGCCAGCCTGGCTGAAGGAGACTCCCCTCAATGTGCC +CTAATTCAAATTACAAAAAGAGTTCCAATCTTCCAAGATGCTGCTCCACCTGTCATCCACATCCGCTCTC +GAGGTGACATTCCCCGAGCTTGCCAGAAGAGCTTGCGTCCAGTCCCACCATCACCCAAGATTGATCGAGG +TTGGGTATGTGTTTTTCAGCTTCAAGATGGTAAAACACTTGGACTCAAAATTTGAGCCAATCTCTTTTCC +CTCCGAAAGAGGCAACTAATAGCAGAGGCTTCAACTGCTGAACTATAGGGTATGTTACATTAATGATACA +CTTGTGAGTATCAGCCCTAGATAATATAAGTCAATTAAACAACCAAGATAAAATTGTTCATATCCCGCTA +GCAGCTTTAAAGATAAATGTAATAGGAGCTATACCTCTGACAGTATTATAATTAATTGTTATTAAGTAAC +CCAAACCAAAAATGATGAAGATTAAGAAAAACCTACCTCGACTGAGAGAGTGTTTTTTCATTAACCTTCA +TCTTGTAAACGTTGAGCAAAATTGTTAAAAATATGAGGCGGGTTATATTGCCTACTGCTCCTCCTGAATA +TATGGAGGCCATATACCCTGCCAGGTCAAATTCAACAATTGCTAGGGGTGGCAACAGCAATACAGGCTTC +CTGACACCGGAGTCAGTCAATGGAGACACTCCATCGAATCCACTCAGGCCAATTGCTGATGACACCATCG +ACCATGCCAGCCACACACCAGGCAGTGTGTCATCAGCATTCATCCTCGAAGCTATGGTGAATGTCATATC +GGGCCCCAAAGTGCTAATGAAGCAAATTCCAATTTGGCTTCCTCTAGGTGTCGCTGATCAAAAGACCTAC +AGCTTTGACTCAACTACGGCCGCCATCATGCTTGCTTCATATACTATCACCCATTTCGGCAAGGCAACCA +ATCCGCTTGTCAGAGTCAATCGGCTGGGTCCTGGAATCCCGGATCACCCCCTCAGGCTCCTGCGAATTGG +AAACCAGGCTTTCCTCCAGGAGTTCGTTCTTCCACCAGTCCAACTACCCCAGTATTTCACCTTTGATTTG +ACAGCACTCAAACTGATCACTCAACCACTGCCTGCTGCAACATGGACCGATGACACTCCAACTGGATCAA +ATGGAGCGTTGCGTCCAGGAATTTCATTTCATCCAAAACTTCGCCCCATTCTTTTACCCAACAAAAGTGG +GAAGAAGGGGAACAGTGCCGATCTAACATCTCCGGAGAAAATCCAAGCAATAATGACTTCACTCCAGGAC +TTTAAGATCGTTCCAATTGATCCAACCAAAAATATCATGGGTATCGAAGTGCCAGAAACTCTGGTCCACA +AGCTGACCGGTAAGAAGGTGACTTCCAAAAATGGACAACCAATCATCCCTGTTCTTTTGCCAAAGTACAT +TGGGTTGGACCCGGTGGCTCCAGGAGACCTCACCATGGTAATCACACAGGATTGTGACACGTGTCATTCT +CCTGCAAGTCTTCCAGCTGTGGTTGAGAAGTAATTGCAATAATTGACTCAGATCCAGTTTTACAGAATCT +TCTCAGGGATAGTGATAACATCTTTTTAATAATCCGTCTACTAGAAGAGATACTTCTAATTGATCAATAT +ACTAAAGGTGCTTTACACCATTGTCTCTTTTCTCTCCTAAATGTAGAGCTTAACAAAAGACTCATAATAT +ACCTGTTTTTAAAAGATTGATTGATGAAAGATCATGACTAATAACATTACAAACAATCCTACTATAATCA +ATACGGTGATTCAAATGTCAATCTTTCTCATTGCACATACTCTTTGTCCTTATCCTCAAATTGCCTACAT +GCTTACATCTGAGGACAGCCAGTGTGACTTGGATTGGAGATGTGGAGGAAAAATCGGGGCCCATTTCTAA +GTTGTTCACAATCTAAGTACAGACATTGCTCTTCTAATTAAGAAAAAATCGGCGATGAAGATTAAGCCGA +CAGTGAGCGTAATCTTCATCTCTCTTAGATTATTTGTCTTCCAGAGTAGGGGTCATCAGGTCCTTTTCAA +TTGGATAACCAAAATAAGCTTCACTAGAAGGATATTGTGAGGCGACAACACAATGGGTGTTACAGGAATA +TTGCAGTTACCTCGTGATCGATTCAAGAGGACATCATTCTTTCTTTGGGTAATTATCCTTTTCCAAAGAA +CATTTTCCATCCCGCTTGGAGTTATCCACAATAGTACATTACAGGTTAGTGATGTCGACAAACTAGTTTG +TCGTGACAAACTGTCATCCACAAATCAATTGAGATCAGTTGGACTGAATCTCGAGGGGAATGGAGTGGCA +ACTGACGTGCCATCTGTGACTAAAAGATGGGGCTTCAGGTCCGGTGTCCCACCAAAGGTGGTCAATTATG +AAGCTGGTGAATGGGCTGAAAACTGCTACAATCTTGAAATCAAAAAACCTGACGGGAGTGAGTGTCTACC +AGCAGCGCCAGACGGGATTCGGGGCTTCCCCCGGTGCCGGTATGTGCACAAAGTATCAGGAACGGGACCA +TGTGCCGGAGACTTTGCCTTCCACAAAGAGGGTGCTTTCTTCCTGTATGATCGACTTGCTTCCACAGTTA +TCTACCGAGGAACGACTTTCGCTGAAGGTGTCGTTGCATTTCTGATACTGCCCCAAGCTAAGAAGGACTT +CTTCAGCTCACACCCCTTGAGAGAGCCGGTCAATGCAACGGAGGACCCGTCGAGTGGCTATTATTCTACC +ACAATTAGATATCAGGCTACCGGTTTTGGAACTAATGAGACAGAGTACTTGTTCGAGGTTGACAATTTGA +CCTACGTCCAACTTGAATCAAGATTCACACCACAGTTTCTGCTCCAGCTGAATGAGACAATATATGCAAG +TGGGAAGAGGAGCAACACCACGGGAAAACTAATTTGGAAGGTCAACCCCGAAATTGATACAACAATCGGG +GAGTGGGCCTTCTGGGAAACTAAAAAAACCTCACTAGAAAAATTCGCAGTGAAGAGTTGTCTTTCACAGC +TGTATCAAACGGACCCAAAAACATCAGTGGTCAGAGTCCGGCGCGAACTTCTTCCGACCCAGAGACCAAC +ACAACAAATGAAGACCACAAAATCATGGCTTCAGAAAATTCCTCTGCAATGGTTCAAGTGCACAGTCAAG +GAAGGAAAGCTGCAGTGTCGCATCTGACAACCCTTGCCACAATCTCCACGAGTCCTCAACCTCCCACAAC +CAAAACAGGTCCGGACAACAGCACCCATAATACACCCGTGTATAAACTTGACATCTCTGAGGCAACTCAA +GTTGGACAACATCACCGTAGAGCAGACAACGACAGCACAGCCTCCGACACTCCCCCCGCCACGACCGCAG +CCGGACCCTTAAAAGCAGAGAACACCAACACGAGTAAGAGCGCTGACTCCCTGGACCTCGCCACCACGAC +AAGCCCCCAAAACTACAGCGAGACTGCTGGCAACAACAACACTCATCACCAAGATACCGGAGAAGAGAGT +GCCAGCAGCGGGAAGCTAGGCTTAATTACCAATACTATTGCTGGAGTAGCAGGACTGATCACAGGCGGGA +GAAGGACTCGAAGAGAAGTAATTGTCAATGCTCAACCCAAATGCAACCCCAATTTACATTACTGGACTAC +TCAGGATGAAGGTGCTGCAATCGGATTGGCCTGGATACCATATTTCGGGCCAGCAGCCGAAGGAATTTAC +ACAGAGGGGCTAATGCACAACCAAGATGGTTTAATCTGTGGGTTGAGGCAGCTGGCCAACGAAACGACTC +AAGCTCTCCAACTGTTCCTGAGAGCCACAACTGAGCTGCGAACCTTTTCAATCCTCAACCGTAAGGCAAT +TGACTTCCTGCTGCAGCGATGGGGTGGCACATGCCACATTTTGGGACCGGACTGCTGTATCGAACCACAT +GATTGGACCAAGAACATAACAGACAAAATTGATCAGATTATTCATGATTTTGTTGATAAAACCCTTCCGG +ACCAGGGGGACAATGACAATTGGTGGACAGGATGGAGACAATGGATACCGGCAGGTATTGGAGTTACAGG +TGTTATAATTGCAGTTATCGCTTTATTCTGTATATGCAAATTTGTCTTTTAGTCTTTCTTCAGATTGTTT +CACGGCAAAACTCAACCTCAAATCAATGAAACTAGGATTTAATTATATGAATCACTTGAATCTAAGATTA +CTTGACAAATGATAACATAATACACTGGAGCTTCAAACATAGCCAATGTGATTCTAACTCCTTTAAACTC +ACAGTTAATCATAAACAAGGTTTGACATCAATCTAGCTATATCTTTAAGAATGATAAACTTGATGAAGAT +TAAGAAAAAGGTAATCTTTCGATTATCTTTAGTCTTCATCCTTGATTCTACAATCATGACAGTTGTCTTT +AATGAAAAAGGAAAAAAGCCTTTTTATTAAGTTGTAATAATCAGATCTGCAAACCGGTAGAATTTAGTTG +TAACCTAACACACACAAAGCATTGGTAAAAAAGTCAATAGAAATTTAAACAGTGAGTGCAGACAACTCTT +AAATGGAAGCTTCATATGAGAGAGGACGCCCCCGAGCTGCCAGACAGCATTCAAGGGATGGACACGACCA +CCATGTTCGAGCACGATCATCATCCAGAGAGAATTATCGAGGTGAGTACCGTCAATCAAGGAGCGCCTCA +CAAGTGCGCGTTCCTACTGTATTTCATAAGAAGAGAGTTGAACCATTAACAGTTCCTCCAGCACCTAAAG +ACATATGTCCGACCTTGAAAAAAGGATTTTTGTGTGACAGTAGTTTTTGCAAAAAAGACCACCAGTTAGA +AAGTTTAACTGATAGGGAATTACTCCTACTAATCGCCCGTAAGACTTGTGGATCAGTAGAACAACAATTA +AATATAACTGCACCCAAGGACTCGCGCTTAGCAAATCCAACGGCTGATGATTTCCAGCAAGAGGAAGGTC +CCAAAATTACCTTGTTGACACTGATCAAGACGGCAGAACACTGGGCGAGACAAGACATCCGAACCATAGA +GGATTCCAAATTAAGGGCATTGTTAACTCTATGTGCTGTGATGACGAGGAAATTCTCAAAATCCCAGCTG +AGTCTTTTGTGTGAGACACACCTAAGGCGCGAAGGGCTTGGGCAAGATCAGGCAGAACCCGTTCTCGAAG +TATATCAACGATTACACAGTGATAAAGGAGGCAGTTTTGAAGCTGCACTATGGCAACAATGGGACCGACA +ATCCCTAATTATGTTTATCACTGCATTCTTGAATATCGCTCTCCAGTTACCGTGTGAAAGTTCTGCTGTC +GTTGTTTCAGGGTTAAGAACATTGGTTCCTCAATCAGATAATGAGGAAGCTTCAACCAACCCGGGGACAT +GCTCATGATCTGATGAGGGTACCCCTTAATAAGGCTGACTAAAACACTATATAACCTTCTACTTGATCAC +AATACTCCGTATACCTATCATCATATATTTAATCAAGACGATATCCTTTAAAACTTATTCAGTACTATAA +TCACTCTCATTTCAAATTGATAAGATATGCATAATTGCCTTAATATATAAAGAGGTATGATATAACCCAA +ACATTGACCAAAGAAAATCATAATCTCGTATCGCTCGCAATATAACCTGCCAAGCATACCTCTTGCACAA +AGTGATTCTTGTACACAAATAATGTTTGACTCTACAGGAGGTAGCAACGATCCATCTCATCAAAAAATAA +GTATTTTATGATTTACTAATGATCTCTTAAAATATTAAGAAAAACTGACGGAACATAAATTCTTTCTGCT +TCAAGTTGTGGAGGAGGTCTATGGTATTCGCTATTGTTATATTACAATCAATAACAAGCTTGTAAAAATA +TTGTTCTTGTTTCAGGAGGTATATTGTGACCGGAAAAGCTAAACTAATGATGAAGATTAATGCGGAGGTC +TGATGAGAATAAACCTTATTATTCAGATTAGGCCCCAAGAGGCATTCTTCATCTCCTTTTAGCAAAATAC +TATTTCAGGATAGTCCAGCTAGTGACACGTCTTTTAGCTGTATACCAGTTGCCCCTGAGATACGCCACAA +AAGTGTCTCTGAGCTAAAGTGGTCTGTACACATCTCATACATTGTATTAGGGGCAATAATATCTAATTGA +ACTTAGCCATTTAAAATTTAGTGCATAAATCTGGGCTAACTCCACCAGGTCAACTCCATTGGCTGAAAAG +AAGCCCACCTACAACGAACATTACTTTGAGCGCCCTCACAATTAAAAAATAAGAGCGTCGTTCCAACAAT +CGAGCGCAAGGTTACAAGGTTGAACTGAGAGTGTCTAGACAACAAAATATCGATACTCCAGACACCAAGC +AAGACCTGAGAAAAAACCATGGCCAAAGCTACGGGACGATACAATCTAATATCGCCCAAAAAGGACCTGG +AGAAAGGGGTTGTCTTAAGCGACCTCTGTAACTTCTTAGTTAGTCAAACTATTCAAGGGTGGAAAGTTTA +TTGGGCTGGTATTGAGTTTGATGTGACTCACAAAGGAATGGCCCTATTGCATAGACTGAAAACTAATGAC +TTTGCCCCTGCATGGTCAATGACAAGGAACCTATTTCCCCATTTATTTCAAAATCCGAATTCCACTATTG +AATCACCGCTGTGGGCACTGAGAGTCATCCTTGCAGCAGGGATACAGGACCAGTTAATTGACCAGTCTTT +GATTGAACCCTTAGCAGGAGCCCTTGGTCTGATCTCTGATTGGCTGCTAACAACCAACACTAACCATTTC +AACATGCGAACACAACGTGTCAAGGAACAATTGAGCCTAAAAATGCTGTCGTTGATTCGATCCAATATTC +TCAAGTTTATTAACAAATTGGATGCTCTACATGTCGTGAACTACAATGGATTATTGAGCAGTATTGAAAT +TGGAACTCAAAATCATACAATCATCATAACTCGAACTAACATGGGTTTTCTGGTGGAGCTCCAAGAACCC +GACAAATCGGCAATGAACCGCAAGAAGCCTGGGCCGGCGAAATTTTCCCTCCTTCATGAGTCCACACTGA +AAGCATTTACACAAGGGTCCTCGACACGAATGCAAAGTTTAATTCTTGAATTCAATAGCTCTCTTGCTAT +CTAACTAAGATGGAATACTTCATATTGGGCTAACTCATATATGCTGACTCAATAGTTAACTTGACATCTC +TGCCTTCATAATCAGATATATAAGCATAATAAATAAATACTCATATTTCTTGATAATTTGTTTAACCACA +GATAAATCCTCACTGTAAGCCAGCTTCCAAGTTGACACCCTTACAAAAACCAGGACTCAGAATCCCTCAA +ATAAGAGATTCCAAGACAACATCATAGAATTGCTTTATTATATTAATAAGCATTTTATCACTAGAAATCC +AATATACGAAATGGTTAATTGTAACTAAACCCGCAGGTCATGTGTGTTAGGTTTCACAAATTATATATAT +TACTAACTCCATACTCGTAACTAACATTAGATAAGTAGGTTAAGAAAAAAGCTTGAGGAAGATTAAGAAA +AACTGCTTATTGGGTCTTTCCGTGTTTTAGATGAAGCAGTTGACATTCTTCCTCTTGATATTAAATGGCT +ACACAACATACCCAATACCCAGACGCCAGGTTATCATCACCAATTGTATTGGACCAATGTGACCTTGTCA +CTAGAGCTTGCGGGTTGTATTCATCATACTCCCTTAATCCGCAACTACGCAACTGTAAACTCCCGAAACA +TATATACCGTTTAAAATATGATGTAACTGTTACCAAGTTCTTAAGTGATGTACCAGTGGCGACATTGCCC +ATAGATTTCATAGTCCCAATTCTTCTCAAGGCACTATCAGGCAATGGGTTCTGTCCTGTTGAGCCGCGGT +GCCAACAGTTCTTAGATGAAATTATTAAGTACACAATGCAAGATGCTCTCTTCCTGAAATATTATCTCAA +AAATGTGGGTGCTCAAGAAGACTGTGTTGATGACCACTTTCAAGAAAAAATCTTATCTTCAATTCAGGGC +AATGAATTTTTACATCAAATGTTTTTCTGGTATGACCTGGCTATTTTAACTCGAAGGGGTAGATTAAATC +GAGGAAACTCTAGATCAACGTGGTTTGTTCATGATGATTTAATAGACATCTTAGGCTATGGGGACTATGT +TTTTTGGAAGATCCCAATTTCACTGTTACCACTGAACACACAAGGAATCCCCCATGCTGCTATGGATTGG +TATCAGACATCAGTATTCAAAGAAGCGGTTCAAGGGCATACACACATTGTTTCTGTTTCTACTGCCGATG +TCTTGATAATGTGCAAAGATTTAATTACATGTCGATTCAACACAACTCTAATCTCAAAAATAGCAGAGGT +TGAGGACCCAGTTTGCTCTGATTATCCCAATTTTAAGATTGTGTCTATGCTTTACCAGAGCGGAGATTAC +TTACTCTCCATATTAGGGTCTGATGGGTATAAAATCATTAAGTTTCTCGAACCATTGTGCTTGGCTAAAA +TTCAATTGTGCTCAAAGTACACCGAGAGGAAGGGCCGATTCTTAACACAAATGCATTTAGCTGTAAATCA +CACCCTGGAAGAAATTACAGAAATACGTGCACTAAAGCCTTCACAGGCTCACAAGATCCGTGAATTCCAT +AGAACATTGATAAGGCTGGAGATGACGCCACAACAACTTTGTGAGCTATTTTCCATACAAAAACACTGGG +GGCATCCTGTGCTACATAGTGAAACAGCAATCCAAAAAGTTAAAAAACATGCTACGGTGCTAAAAGCATT +ACGCCCTATCGTGATTTTCGAGACATATTGTGTTTTTAAATATAGCATTGCAAAACATTATTTTGATAGT +CAAGGATCTTGGTACAGTGTTACCTCAGATAGAAATCTAACACCAGGTCTTAATTCTTATATCAAAAGAA +ATCAATTCCCTCCGTTGCCAATGATTAAAGAACTGCTATGGGAATTTTACCACCTTGACCATCCTCCACT +TTTCTCAACCAAAATTATTAGTGACTTAAGTATTTTTATAAAAGACAGAGCTACTGCAGTAGAAAGGACA +TGCTGGGATGCAGTATTCGAGCCTAATGTTCTGGGATATAATCCACCTCACAAATTCAGTACCAAACGTG +TACCGGAACAATTTTTAGAGCAAGAAAACTTTTCTATTGAGAATGTTCTTTCCTACGCGCAAAAACTCGA +GTATCTACTACCACAATATCGGAATTTTTCTTTCTCATTGAAAGAGAAAGAGTTGAATGTAGGTAGAACT +TTCGGAAAATTGCCTTATCCGACTCGCAATGTTCAAACACTTTGTGAAGCTCTGTTAGCTGATGGTCTTG +CTAAAGCATTTCCTAGCAATATGATGGTAGTTACGGAACGTGAACAAAAAGAAAGCTTATTGCATCAAGC +ATCATGGCACCACACAAGTGATGATTTCGGTGAGCATGCCACAGTTAGAGGGAGTAGCTTTGTAACTGAT +TTAGAGAAATACAATCTTGCATTTAGGTATGAGTTTACAGCACCTTTTATAGAATATTGCAACCGTTGCT +ATGGTGTTAAGAATGTTTTTAATTGGATGCATTATACAATCCCACAGTGTTATATGCATGTCAGTGATTA +TTATAATCCACCGCATAACCTCACACTGGAAAATCGAAACAACCCCCCTGAAGGGCCTAGTTCATACAGG +GGTCATATGGGAGGGATTGAAGGACTGCAACAAAAACTCTGGACAAGTATTTCATGTGCTCAAATTTCTT +TAGTTGAAATTAAGACTGGTTTTAAGTTGCGCTCAGCTGTGATGGGTGACAATCAGTGCATTACCGTTTT +ATCAGTCTTCCCCTTAGAGACTGATGCAGGCGAGCAGGAACAGAGCGCCGAGGACAATGCAGCGAGGGTG +GCCGCCAGCCTAGCAAAAGTTACAAGTGCCTGTGGAATCTTTTTAAAACCTGATGAAACATTTGTACATT +CAGGTTTTATCTATTTTGGAAAAAAACAATATTTGAATGGGGTCCAATTGCCTCAGTCCCTTAAAACGGC +TACAAGAATGGCACCATTGTCTGATGCAATTTTTGATGATCTTCAAGGGACCCTGGCTAGTATAGGTACT +GCTTTTGAGCGATCCATCTCTGAGACACGACATATCTTTCCTTGCAGAATAACCGCAGCTTTCCATACGT +TCTTTTCGGTGAGAATCTTGCAATATCATCACCTCGGATTTAATAAAGGTTTTGACCTTGGACAGTTAAC +ACTCGGCAAACCTCTGGATTTCGGAACAATATCATTGGCACTAGCGGTACCGCAGGTGCTTGGAGGGTTA +TCCTTCTTGAATCCTGAGAAATGTTTCTACCGGAATCTAGGAGATCCAGTTACCTCAGGTTTATTCCAGT +TAAAAACTTATCTCCGAATGATTGAGATGGATGATTTATTCTTACCTTTAATTGCGAAGAACCCTGGGAA +CTGCACTGCCATTGACTTTGTGCTAAATCCTAGCGGATTAAATGTTCCTGGGTCGCAAGACTTAACTTCA +TTTCTGCGCCAGATTGTACGTAGGACTATCACCCTAAGTGCGAAAAACAAACTTATTAATACCTTATTTC +ATGCATCAGCTGACTTCGAAGACGAAATGGTTTGTAAGTGGCTCTTATCATCAACTCCTGTTATGAGTCG +TTTCGCAGCCGATATATTTTCACGCACGCCGAGCGGGAAGCGATTGCAAATTCTAGGATACTTGGAAGGA +ACACGCACATTATTAGCCTCTAAGATCATCAACAATAATACAGAGACGCCGGTTTTGGACAGACTGAGGA +AGATAACATTGCAAAGGTGGAGTCTATGGTTTAGTTATCTTGATCATTGTGATAATATCCTGGCGGAGGC +TTTAACCCAAATAACTTGCACAGTTGATTTAGCACAGATCCTGAGGGAATATTCATGGGCACATATTTTA +GAGGGGAGACCTCTTATTGGAGCCACACTCCCATGTATGATTGAGCAATTCAAAGTGGTTTGGCTGAAAC +CCTACGAACAATGTCCGCAGTGTTCAAATGCCAAGCAACCTGGTGGGAAACCATTCGTGTCAGTAGCAGT +CAAGAAACATATTGTTAGTGCATGGCCAAATGCATCCCGAATAAGCTGGACTATCGGGGATGGAATCCCA +TACATTGGATCAAGGACAGAAGATAAGATAGGGCAACCTGCTATTAAACCAAAATGTCCTTCCGCAGCCT +TAAGAGAGGCCATTGAATTGGCGTCCCGTTTAACATGGGTAACTCAAGGCAGTTCGAACAGTGACTTGCT +AATAAAACCATTTTTGGAAGCACGAGTAAATTTAAGTGTTCAAGAAATACTTCAAATGACCCCTTCACAT +TACTCGGGAAATATTGTTCATAGGTACAACGATCAATACAGTCCTCATTCTTTCATGGCCAATCGTATGA +GTAACTCAGCAACGCGATTGATTGTTTCTACAAACACTTTAGGTGAGTTTTCAGGAGGTGGCCAATCGGC +ACGCGACAGCAATATTATTTTCCAGAATGTTATAAATTATGCAGTTGCACTGTTCGATATTAAATTTAGA +AACACTGAGGCTACAGATATCCAGTATAATCGTGCTCACCTTCATCTAACTAAGTGTTGCACCCGGGAGG +TACCAGCTCAGTACTTAACATACACATCTACATTGGATTTAGATTTAACAAGATACCGAGAAAATGAATT +GATTTATGACAATAATCCTCTAAAAGGAGGACTCAATTGCAATATCTCATTTGATAACCCATTTTTCCAA +GGCAAACAGCTGAACATTATAGAAGATGACCTTATTCGACTGCCTCACTTATCTGGATGGGAGCTAGCTA +AGACCATCATGCAATCAATTATTTCAGATAGCAATAATTCGTCTACAGACCCAATTAGCAGTGGAGAAAC +AAGATCATTCACTACCCATTTCTTAACTTATCCCAAAATAGGACTTCTGTACAGTTTTGGGGCCTTTGTA +AGTTATTATCTTGGCAATACAATTCTTCGGACTAAGAAATTAACACTTGACAATTTTTTATATTACTTAA +CTACCCAAATTCATAATCTACCACATCGCTCATTGCGAATACTTAAGCCAACATTCAAACATGCAAGCGT +TATGTCACGATTAATGAGTATTGATCCCCATTTTTCTATTTACATAGGCGGTGCTGCAGGTGACAGAGGA +CTCTCAGATGCGGCCAGGTTATTTTTGAGAACGTCCATTTCATCTTTTCTTACATTTGTAAAGGAATGGA +TAATTAATCGCGGAACAATTGTCCCTTTATGGATAGTATATCCATTAGAGGGTCAAAATCCAACACCTGT +TAATAATTTCCTCCATCAGATCGTAGAACTGCTGGTGCATGATTCATCAAGACACCAGGCTTTTAAAACT +ACCATAAATGATCATGTACATCCTCACGACAATCTTGTTTACACATGTAAGAGTACAGCCAGCAATTTCT +TCCATGCGTCATTGGCGTACTGGAGGAGCAGGCACAGAAACAGCAACCGAAAAGACTTGACAAGAAACTC +TTCAACTGGATCAAGCACAAACAACAGTGATGGTCATATTAAGAGAAGTCAAGAACAAACCACCAGAGAT +CCACATGATGGCACTGAACGGAGTCTAGTCCTGCAAATGAGCCATGAAATAAAAAGAACGACAATTCCAC +AAGAGAACACGCACCAGGGTCCGTCGTTCCAGTCATTTCTAAGTGACTCTGCTTGCGGTACAGCAAACCC +AAAACTAAATTTCGATAGATCGAGACACAATGTGAAATCTCAGGATCATAACTCAGCATCCAAGAGGGAA +GGTCATCAAATAATCTCACATCGTCTAGTCCTACCTTTCTTTACATTATCTCAAGGGACACGCCAATTAA +CGTCATCCAATGAGTCACAAACCCAAGATGAGATATCAAAGTACTTACGGCAATTGAGATCCGTCATTGA +TACCACAGTTTATTGTAGGTTTACCGGTATAGTCTCGTCCATGCATTACAAACTTGATGAGGTCCTTTGG +GAAATAGAGAATTTTAAGTCGGCTGTGACGCTGGCAGAGGGAGAAGGTGCTGGTGCCTTACTATTGATTC +AGAAATACCAAGTTAAGACCTTATTCTTCAACACGCTAGCTACTGAGTCCAGTATAGAGTCAGAAATAGT +ATCAGGAATGACTACTCCTAGGATGCTTCTACCTGTTATGTCAAAATTCCATAATGACCAAATTGAGATT +ATTCTTAACAACTCAGCAAGCCAAATAACAGACATAACAAATCCTACTTGGTTTAAAGACCAAAGAGCAA +GGCTACCTAGGCAAGTCGAGGTTATAACCATGGATGCAGAGACGACAGAGAATATAAACAGATCGAAATT +GTACGAAGCTGTACATAAATTGATCTTACACCATGTTGATCCCAGCGTATTGAAAGCAGTGGTCCTTAAA +GTCTTTCTAAGTGATACCGAGGGTATGTTATGGCTAAATGATAATCTAGCCCCGTTTTTTGCCACTGGGT +ATTTAATTAAGCCAATAACGTCAAGTGCCAGGTCTAGTGAGTGGTATCTTTGTCTGACGAACTTCTTATC +AACTACACGTAAGATGCCACACCAAAACCATCTCAGTTGTAAGCAGGTAATACTTACGGCATTGCAACTG +CAAATTCAACGGAGCCCATACTGGCTAAGTCATTTAACTCAGTATGCTGACTGCGATTTACATTTAAGCT +ATATCCGCCTTGGTTTTCCATCATTAGAGAAAGTACTATACCACAGGTATAACCTTGTCGATTCAAAAAG +AGGTCCACTAGTCTCTGTCACTCAGCACTTAGCACATCTTAGGGCAGAGATTCGAGAATTGACCAATGAT +TATAATCAACAGCGACAAAGTCGGACTCAAACATATCACTTTATTCGTACTGCAAAAGGACGAATCACAA +AACTAGTCAATGATTATTTAAAATTCTTTCTTATTGTACAAGCATTAAAACATAATGGGACATGGCAAGC +TGAGTTTAAGAAATTACCAGAGTTGATTAGTGTGTGCAATAGGTTCTATCATATTAGAGATTGTAATTGT +GAAGAACGTTTCTTAGTTCAAACCTTATATTTACATAGAATGCAGGATTCTGAAGTTAAGCTTATCGAAA +GGCTGACAGGGCTTCTGAGTTTATTTCCAGATGGTCTCTACAGGTTCGATTGAATAACCGTGCATAGTAT +TTTGATACTTGTAAAGGTTGGTTATCAACATACAGATTATAAAAAACTCATAAATTGCTCTCATACATCA +TCTTGATCTGATTTCAATAAATAACTATTTAGATAACGAAAGGAGTCCTTACATTATACACTATATTTGG +CCTCTCTCCCTGCGTGATAATCAAAAAATTCACAATACAGCATGTGTGACATATTACTGCTGCAATGAGT +CTAACGCAACATAATAAACTCCGCACTCTTTATAATTAAGCTTTAACGATAGGTCTGGGCTCATATTGTT +ATTGATATAGTAATGTTGTATCAATATCTTGCCAGATGGAATAGTGCTTTGGTTGATAACACGACTTCTT +AAAACAAAACTGATCTTTAAGATTAAGTTTTTTATAATTGTCATTGCTTTAATTTGTCGATTTAAAAATG +GTGATAGCCTTAATCTTTGTGTAAAATAAGAGATTAGGTGTAATAACTTTAACANNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNATGATAAAANTNAAAGANAAGACATNACTGTAAANNNNGAAATACCTTCTTTAC +AATATAGCAGACTAGATAATAATCTTCGTGTTAATGATAATTAAGGCATTGACCACGCTCATCAGAAGGC +TCACTAGAATAAACGTTGC diff --git a/test/input/TestOrderAndOrient/ref.ebov.sle.fasta b/test/input/TestOrderAndOrient/ref.ebov.sle.fasta new file mode 100644 index 00000000..5e53fb13 --- /dev/null +++ b/test/input/TestOrderAndOrient/ref.ebov.sle.fasta @@ -0,0 +1,272 @@ +>KM034550.1 Zaire ebolavirus isolate Ebola virus/H.sapiens-wt/SLE/2014/Makona-EM095, complete genome +GAATAACTATGAGGAAGATTAATAATTTTCCTCTCATTGAAATTTATATCGGAATTTAAATTGAAATTGT +TACTGTAATCATACCTGGTTTGTTTCAGAGCCATATCACCAAGATAGAGAACAACCTAGGTCTCCGGAGG +GGGCAAGGGCATCAGTGTGCTCAGTTGAAAATCCCTTGTCAACATCTAGGCCTTATCACATCACAAGTTC +CGCCTTAAACTCTGCAGGGTGATCCAACAACCTTAATAGCAACATTATTGTTAAAGGACAGCATTAGTTC +ACAGTCAAACAAGCAAGATTGAGAATTAACTTTGATTTTGAACCTGAACACCCAGAGGACTGGAGACTCA +ACAACCCTAAAGCCTGGGGTAAAACATTAGAAATAGTTTAAAGACAAATTGCTCGGAATCACAAAATTCC +GAGTATGGATTCTCGTCCTCAGAAAGTCTGGATGACGCCGAGTCTCACTGAATCTGACATGGATTACCAC +AAGATCTTGACAGCAGGTCTGTCCGTTCAACAGGGGATTGTTCGGCAAAGAGTCATCCCAGTGTATCAAG +TAAACAATCTTGAGGAAATTTGCCAACTTATCATACAGGCCTTTGAAGCTGGTGTTGATTTTCAAGAGAG +TGCGGACAGTTTCCTTCTCATGCTTTGTCTTCATCATGCGTACCAAGGAGATTACAAACTTTTCTTGGAA +AGTGGCGCAGTCAAGTATTTGGAAGGGCACGGGTTCCGTTTTGAAGTCAAGAAGCGTGATGGAGTGAAGC +GCCTTGAGGAATTGCTGCCAGCAGTATCTAGTGGGAGAAACATTAAGAGAACACTTGCTGCCATGCCGGA +AGAGGAGACGACTGAAGCTAATGCCGGTCAGTTCCTCTCCTTTGCAAGTCTATTCCTTCCGAAATTGGTA +GTAGGAGAAAAGGCTTGCCTTGAGAAGGTTCAAAGGCAAATTCAAGTACATGCAGAGCAAGGACTGATAC +AATATCCAACAGCTTGGCAATCAGTAGGACACATGATGGTGATTTTCCGTTTGATGCGAACAAATTTTTT +GATCAAATTTCTTCTAATACACCAAGGGATGCACATGGTTGCCGGACATGATGCCAACGATGCTGTGATT +TCAAATTCAGTGGCTCAAGCTCGTTTTTCAGGTCTATTGATTGTCAAAACAGTACTTGATCATATCCTAC +AAAAGACAGAACGAGGAGTTCGTCTCCATCCTCTTGCAAGGACCGCCAAGGTAAAAAATGAGGTGAACTC +CTTCAAGGCTGCACTCAGCTCCCTGGCCAAGCATGGAGAGTATGCTCCTTTCGCCCGACTTTTGAACCTT +TCTGGAGTAAATAATCTTGAGCATGGTCTTTTCCCTCAACTGTCGGCAATTGCACTCGGAGTCGCCACAG +CCCACGGGAGCACCCTCGCAGGAGTAAATGTTGGAGAACAGTATCAACAGCTCAGAGAGGCAGCCACTGA +GGCTGAGAAGCAACTCCAACAATATGCGGAGTCTCGTGAACTTGACCATCTTGGACTTGATGATCAGGAA +AAGAAAATTCTTATGAACTTCCATCAGAAAAAGAACGAAATCAGCTTCCAGCAAACAAACGCGATGGTAA +CTCTAAGAAAAGAGCGCCTGGCCAAGCTGACAGAAGCTATCACTGCTGCATCACTGCCCAAAACAAGTGG +ACATTACGATGATGATGACGACATTCCCTTTCCAGGACCCATCAATGATGACGACAATCCTGGCCATCAA +GATGATGATCCGACTGACTCACAGGATACGACCATTCCCGATGTGGTAGTTGACCCCGATGATGGAGGCT +ACGGCGAATACCAAAGTTACTCGGAAAACGGCATGAGTGCACCAGATGACTTGGTCCTATTCGATCTAGA +CGAGGACGACGAGGACACCAAGCCAGTGCCTAACAGATCGACCAAGGGTGGACAACAGAAAAACAGTCAA +AAGGGCCAGCATACAGAGGGCAGACAGACACAATCCACGCCAACTCAAAACGTCACAGGCCCTCGCAGAA +CAATCCACCATGCCAGTGCTCCACTCACGGACAATGACAGAAGAAACGAACCCTCCGGCTCAACCAGCCC +TCGCATGCTGACCCCAATCAACGAAGAGGCAGACCCACTGGACGATGCCGACGACGAGACGTCTAGCCTT +CCGCCCTTAGAGTCAGATGATGAAGAACAGGACAGGGACGGAACTTCTAACCGCACACCCACTGTCGCCC +CACCGGCTCCCGTATACAGAGATCACTCCGAAAAGAAAGAACTCCCGCAAGATGAACAACAAGATCAGGA +CCACATTCAAGAGGCCAGGAACCAAGACAGTGACAACACCCAGCCAGAACATTCTTTTGAGGAGATGTAT +CGCCACATTCTAAGATCACAGGGGCCATTTGATGCCGTTTTGTATTATCATATGATGAAGGATGAGCCTG +TAGTTTTCAGTACCAGTGATGGTAAAGAGTACACGTATCCGGACTCCCTTGAAGAGGAATATCCACCATG +GCTCACTGAAAAAGAGGCCATGAATGATGAGAATAGATTTGTTACACTGGATGGTCAACAATTTTATTGG +CCAGTAATGAATCACAGGAATAAATTCATGGCAATCCTGCAACATCATCAGTGAATGAGCATGTAATAAT +GGGATGATTTAATCGACAAATAGCTAACATTAAATAGTCAAGGAACGCAAACAGGAAGAATTTTTGATGT +CTAAGGTGTGAATTATTATCACAATAAAAGTGATTCTTAGTTTTGAATTTAAAGCTAGCTTATTATTACT +AGCCGTTTTTCAAAGTTCAATTTGAGTCTTAATGCAAATAAGCGTTAAGCCACAGTTATAGCCATAATGG +TAACTCAATATCTTAGCCAGCGATTTATCTAAATTAAATTACATTATGCTTTTATAACTTACCTACTAGC +CTGCCCAACATTTACACGATCGTTTTATAATTAAGAAAAAACTAATGATGAAGATTAAAACCTTCATCAT +CCTTACGTCAATTGAATTCTCTAGCACTAGAAGCTTATTGTCTTCAATGTAAAAGAAAAGCTGGCCTAAC +AAGATGACAACTAGAACAAAGGGCAGGGGCCATACTGTGGCCACGACTCAAAACGACAGAATGCCAGGCC +CTGAGCTTTCGGGCTGGATCTCTGAGCAGCTAATGACCGGAAGGATTCCTGTAAACGACATCTTCTGTGA +TATTGAGAACAATCCAGGATTATGCTACGCATCCCAAATGCAACAAACGAAGCCAAACCCGAAGATGCGC +AACAGTCAAACCCAAACGGACCCAATTTGCAATCATAGTTTTGAGGAGGTAGTACAAACATTGGCTTCAT +TGGCTACTGTTGTGCAACAACAAACCATCGCATCAGAATCATTAGAACAACGCATTACGAGTCTTGAGAA +TGGTCTAAAGCCAGTTTATGATATGGCAAAAACAATCTCCTCATTGAACAGGGTTTGTGCTGAGATGGTT +GCAAAATATGATCTTCTGGTGATGACAACCGGTCGGGCAACAGCAACCGCTGCGGCAACTGAGGCTTATT +GGGCTGAACATGGTCAACCACCACCTGGACCATCACTTTATGAAGAAAGTGCGATTCGGGGTAAGATTGA +ATCTAGAGATGAGACTGTCCCTCAAAGTGTTAGGGAGGCATTCAACAATCTAGACAGTACCACTTCACTA +ACTGAGGAAAATTTTGGGAAACCTGACATTTCGGCAAAGGATTTGAGAAACATTATGTATGATCACTTGC +CTGGTTTTGGAACTGCTTTCCACCAATTAGTACAAGTGATTTGTAAATTGGGAAAAGATAGCAATTCATT +GGACATTATTCATGCTGAGTTCCAGGCCAGCCTGGCTGAAGGAGACTCCCCTCAATGTGCCCTAATTCAA +ATTACAAAAAGAGTTCCAATCTTCCAAGATGCTGCTCCACCTGTCATCCACATCCGCTCTCGAGGTGACA +TTCCCCGAGCTTGCCAGAAGAGCTTGCGTCCAGTCCCACCATCACCCAAGATTGATCGAGGTTGGGTATG +TGTTTTTCAGCTTCAAGATGGTAAAACACTTGGACTCAAAATTTGAGCCAATCTCTTTTCCCTCCGAAAG +AGGCAACTAATAGCAGAGGCTTCAACTGCTGAACTATAGGGTATGTTACATTAATGATACACTTGTGAGT +ATCAGCCCTAGATAATATAAGTCAATTAAACAACCAAGATAAAATTGTTCATATCCCGCTAGCAGCTTTA +AAGATAAATGTAATAGGAGCTATACCTCTGACAGTATTATAATTAATTGTTATTAAGTAACCCAAACCAA +AAATGATGAAGATTAAGAAAAACCTACCTCGACTGAGAGAGTGTTTTTTCATTAACCTTCATCTTGTAAA +CGTTGAGCAAAATTGTTAAAAATATGAGGCGGGTTATATTGCCTACTGCTCCTCCTGAATATATGGAGGC +CATATACCCTGCCAGGTCAAATTCAACAATTGCTAGGGGTGGCAACAGCAATACAGGCTTCCTGACACCG +GAGTCAGTCAATGGAGACACTCCATCGAATCCACTCAGGCCAATTGCTGATGACACCATCGACCATGCCA +GCCACACACCAGGCAGTGTGTCATCAGCATTCATCCTCGAAGCTATGGTGAATGTCATATCGGGCCCCAA +AGTGCTAATGAAGCAAATTCCAATTTGGCTTCCTCTAGGTGTCGCTGATCAAAAGACCTACAGCTTTGAC +TCAACTACGGCCGCCATCATGCTTGCTTCATATACTATCACCCATTTCGGCAAGGCAACCAATCCGCTTG +TCAGAGTCAATCGGCTGGGTCCTGGAATCCCGGATCACCCCCTCAGGCTCCTGCGAATTGGAAACCAGGC +TTTCCTCCAGGAGTTCGTTCTTCCACCAGTCCAACTACCCCAGTATTTCACCTTTGATTTGACAGCACTC +AAACTGATCACTCAACCACTGCCTGCTGCAACATGGACCGATGACACTCCAACTGGATCAAATGGAGCGT +TGCGTCCAGGAATTTCATTTCATCCAAAACTTCGCCCCATTCTTTTACCCAACAAAAGTGGGAAGAAGGG +GAACAGTGCCGATCTAACATCTCCGGAGAAAATCCAAGCAATAATGACTTCACTCCAGGACTTTAAGATC +GTTCCAATTGATCCAACCAAAAATATCATGGGTATCGAAGTGCCAGAAACTCTGGTCCACAAGCTGACCG +GTAAGAAGGTGACTTCCAAAAATGGACAACCAATCATCCCTGTTCTTTTGCCAAAGTACATTGGGTTGGA +CCCGGTGGCTCCAGGAGACCTCACCATGGTAATCACACAGGATTGTGACACGTGTCATTCTCCTGCAAGT +CTTCCAGCTGTGGTTGAGAAGTAATTGCAATAATTGACTCAGATCCAGTTTTACAGAATCTTCTCAGGGA +TAGTGATAACATCTTTTTAATAATCCGTCTACTAGAAGAGATACTTCTAATTGATCAATATACTAAAGGT +GCTTTACACCATTGTCTCTTTTCTCTCCTAAATGTAGAGCTTAACAAAAGACTCATAATATACCTGTTTT +TAAAAGATTGATTGATGAAAGATCATGACTAATAACATTACAAACAATCCTACTATAATCAATACGGTGA +TTCAAATGTCAATCTTTCTCATTGCACATACTCTTTGTCCTTATCCTCAAATTGCCTACATGCTTACATC +TGAGGACAGCCAGTGTGACTTGGATTGGAGATGTGGAGGAAAAATCGGGGCCCATTTCTAAGTTGTTCAC +AATCTAAGTACAGACATTGCTCTTCTAATTAAGAAAAAATCGGCGATGAAGATTAAGCCGACAGTGAGCG +TAATCTTCATCTCTCTTAGATTATTTGTCTTCCAGAGTAGGGGTCATCAGGTCCTTTTCAATTGGATAAC +CAAAATAAGCTTCACTAGAAGGATATTGTGAGGCGACAACACAATGGGTGTTACAGGAATATTGCAGTTA +CCTCGTGATCGATTCAAGAGGACATCATTCTTTCTTTGGGTAATTATCCTTTTCCAAAGAACATTTTCCA +TCCCGCTTGGAGTTATCCACAATAGTACATTACAGGTTAGTGATGTCGACAAACTAGTTTGTCGTGACAA +ACTGTCATCCACAAATCAATTGAGATCAGTTGGACTGAATCTCGAGGGGAATGGAGTGGCAACTGACGTG +CCATCTGTGACTAAAAGATGGGGCTTCAGGTCCGGTGTCCCACCAAAGGTGGTCAATTATGAAGCTGGTG +AATGGGCTGAAAACTGCTACAATCTTGAAATCAAAAAACCTGACGGGAGTGAGTGTCTACCAGCAGCGCC +AGACGGGATTCGGGGCTTCCCCCGGTGCCGGTATGTGCACAAAGTATCAGGAACGGGACCATGTGCCGGA +GACTTTGCCTTCCACAAAGAGGGTGCTTTCTTCCTGTATGATCGACTTGCTTCCACAGTTATCTACCGAG +GAACGACTTTCGCTGAAGGTGTCGTTGCATTTCTGATACTGCCCCAAGCTAAGAAGGACTTCTTCAGCTC +ACACCCCTTGAGAGAGCCGGTCAATGCAACGGAGGACCCGTCGAGTGGCTATTATTCTACCACAATTAGA +TATCAGGCTACCGGTTTTGGAACTAATGAGACAGAGTACTTGTTCGAGGTTGACAATTTGACCTACGTCC +AACTTGAATCAAGATTCACACCACAGTTTCTGCTCCAGCTGAATGAGACAATATATGCAAGTGGGAAGAG +GAGCAACACCACGGGAAAACTAATTTGGAAGGTCAACCCCGAAATTGATACAACAATCGGGGAGTGGGCC +TTCTGGGAAACTAAAAAAACCTCACTAGAAAAATTCGCAGTGAAGAGTTGTCTTTCACAGCTGTATCAAA +CGGACCCAAAAACATCAGTGGTCAGAGTCCGGCGCGAACTTCTTCCGACCCAGAGACCAACACAACAAAT +GAAGACCACAAAATCATGGCTTCAGAAAATTCCTCTGCAATGGTTCAAGTGCACAGTCAAGGAAGGAAAG +CTGCAGTGTCGCATCTGACAACCCTTGCCACAATCTCCACGAGTCCTCAACCTCCCACAACCAAAACAGG +TCCGGACAACAGCACCCATAATACACCCGTGTATAAACTTGACATCTCTGAGGCAACTCAAGTTGGACAA +CATCACCGTAGAGCAGACAACGACAGCACAGCCTCCGACACTCCCCCCGCCACGACCGCAGCCGGACCCT +TAAAAGCAGAGAACACCAACACGAGTAAGAGCGCTGACTCCCTGGACCTCGCCACCACGACAAGCCCCCA +AAACTACAGCGAGACTGCTGGCAACAACAACACTCATCACCAAGATACCGGAGAAGAGAGTGCCAGCAGC +GGGAAGCTAGGCTTAATTACCAATACTATTGCTGGAGTAGCAGGACTGATCACAGGCGGGAGAAGGACTC +GAAGAGAAGTAATTGTCAATGCTCAACCCAAATGCAACCCCAATTTACATTACTGGACTACTCAGGATGA +AGGTGCTGCAATCGGATTGGCCTGGATACCATATTTCGGGCCAGCAGCCGAAGGAATTTACACAGAGGGG +CTAATGCACAACCAAGATGGTTTAATCTGTGGGTTGAGGCAGCTGGCCAACGAAACGACTCAAGCTCTCC +AACTGTTCCTGAGAGCCACAACTGAGCTGCGAACCTTTTCAATCCTCAACCGTAAGGCAATTGACTTCCT +GCTGCAGCGATGGGGTGGCACATGCCACATTTTGGGACCGGACTGCTGTATCGAACCACATGATTGGACC +AAGAACATAACAGACAAAATTGATCAGATTATTCATGATTTTGTTGATAAAACCCTTCCGGACCAGGGGG +ACAATGACAATTGGTGGACAGGATGGAGACAATGGATACCGGCAGGTATTGGAGTTACAGGTGTTATAAT +TGCAGTTATCGCTTTATTCTGTATATGCAAATTTGTCTTTTAGTCTTTCTTCAGATTGTTTCACGGCAAA +ACTCAACCTCAAATCAATGAAACTAGGATTTAATTATATGAATCACTTGAATCTAAGATTACTTGACAAA +TGATAACATAATACACTGGAGCTTCAAACATAGCCAATGTGATTCTAACTCCTTTAAACTCACAGTTAAT +CATAAACAAGGTTTGACATCAATCTAGCTATATCTTTAAGAATGATAAACTTGATGAAGATTAAGAAAAA +GGTAATCTTTCGATTATCTTTAGTCTTCATCCTTGATTCTACAATCATGACAGTTGTCTTTAATGAAAAA +GGAAAAAAGCCTTTTTATTAAGTTGTAATAATCAGATCTGCAAACCGGTAGAATTTAGTTGTAACCTAAC +ACACACAAAGCATTGGTAAAAAAGTCAATAGAAATTTAAACAGTGAGTGCAGACAACTCTTAAATGGAAG +CTTCATATGAGAGAGGACGCCCCCGAGCTGCCAGACAGCATTCAAGGGATGGACACGACCACCATGTTCG +AGCACGATCATCATCCAGAGAGAATTATCGAGGTGAGTACCGTCAATCAAGGAGCGCCTCACAAGTGCGC +GTTCCTACTGTATTTCATAAGAAGAGAGTTGAACCATTAACAGTTCCTCCAGCACCTAAAGACATATGTC +CGACCTTGAAAAAAGGATTTTTGTGTGACAGTAGTTTTTGCAAAAAAGACCACCAGTTAGAAAGTTTAAC +TGATAGGGAATTACTCCTACTAATCGCCCGTAAGACTTGTGGATCAGTAGAACAACAATTAAATATAACT +GCACCCAAGGACTCGCGCTTAGCAAATCCAACGGCTGATGATTTCCAGCAAGAGGAAGGTCCAAAAATTA +CCTTGTTGACACTGATCAAGACGGCAGAACACTGGGCGAGACAAGACATCCGAACCATAGAGGATTCCAA +ATTAAGGGCATTGTTAACTCTATGTGCTGTGATGACGAGGAAATTCTCAAAATCCCAGCTGAGTCTTTTG +TGTGAGACACACCTAAGGCGCGAAGGGCTTGGGCAAGATCAGGCAGAACCCGTTCTCGAAGTATATCAAC +GATTACACAGTGATAAAGGAGGCAGTTTTGAAGCTGCACTATGGCAACAATGGGACCGACAATCCCTAAT +TATGTTTATCACTGCATTCTTGAATATCGCTCTCCAGTTACCGTGTGAAAGTTCTGCTGTCGTTGTTTCA +GGGTTAAGAACATTGGTTCCTCAATCAGATAATGAGGAAGCTTCAACCAACCCGGGGACATGCTCATGGT +CTGATGAGGGTACCCCTTAATAAGGCTGACTAAAACACTATATAACCTTCTACTTGATCACAATACTCCG +TATACCTATCATCATATATTTAATCAAGACGATATCCTTTAAAACTTATTCAGTACTATAATCACTCTCA +TTTCAAATTGATAAGATATGCATAATTGCCTTAATATATAAAGAGGTATGATATAACCCAAACATTGACC +AAAGAAAATCATAATCTCGTATCGCTCGCAATATAACCTGCCAAGCATACCTCTTGCACAAAGTGATTCT +TGTACACAAATAATGTTTGACTCTACAGGAGGTAGCAACGATCCATCTCATCAAAAAATAAGTATTTTAT +GATTTACTAATGATCTCTTAAAATATTAAGAAAAACTGACGGAACATAAATTCTTTCTGCTTCAAGTTGT +GGAGGAGGTCTATGGTATTCGCTATTGTTATATTACAATCAATAACAAGCTTGTAAAAATATTGTTCTTG +TTTCAGGAGGTATATTGTGACCGGAAAAGCTAAACTAATGATGAAGATTAATGCGGAGGTCTGATGAGAA +TAAACCTTATTATTCAGATTAGGCCCCAAGAGGCATTCTTCATCTCCTTTTAGCAAAATACTATTTCAGG +ATAGTCCAGCTAGTGACACGTCTTTTAGCTGTATACCAGNNNNNNNNNNNNNNNNNNNNNNAAGTGTCTC +TGAGCTAAAGTGGTCTGTACACATCTCATACATTGTATTAGGGGCAATAATATCTAATTGAACTTAGCCA +TTTAAAATTTAGTGCATAAATCTGGGCTAACTCCACCAGGTCAACTCCATTGGCTGAAAAGAAGCCCACC +TACAACGAACATTACTTTGAGCGCCCTCACAATTAAAAAATAAGAGCGTCGTTCCAACAATCGAGCGCAA +GGTTACAAGGTTGAACTGAGAGTGTCTAGACAACAAAATATCGATACTCCAGACACCAAGCAAGACCTGA +GAAAAAACCATGGCCAAAGCTACGGGACGATACAATCTAATATCGCCCAAAAAGGACCTGGAGAAAGGGG +TTGTCTTAAGCGACCTCTGTAACTTCTTAGTTAGTCAAACTATTCAAGGGTGGAAAGTTTATTGGGCTGG +TATTGAGTTTGATGTGACTCACAAAGGAATGGCCCTATTGCATAGACTGAAAACTAATGACTTTGCCCCT +GCATGGTCAATGACAAGGAACCTATTTCCCCATTTATTTCAAAATCCGAATTCCACTATTGAATCACCGC +TGTGGGCACTGAGAGTCATCCTTGCAGCAGGGATACAGGACCAGTTAATTGACCAGTCTTTGATTGAACC +CTTAGCAGGAGCCCTTGGTCTGATCTCTGATTGGCTGCTAACAACCAACACTAACCATTTCAACATGCGA +ACACAACGTGTCAAGGAACAATTGAGCCTAAAAATGCTGTCGTTGATTCGATCCAATATTCTCAAGTTTA +TTAACAAATTGGATGCTCTACATGTCGTGAACTACAATGGATTATTGAGCAGTATTGAAATTGGAACTCA +AAATCATACAATCATCATAACTCGAACTAACATGGGTTTTCTGGTGGAGCTCCAAGAACCCGACAAATCG +GCAATGAACCGCAAGAAGCCTGGGCCGGCGAAATTTTCCCTCCTTCATGAGTCCACACTGAAAGCATTTA +CACAAGGGTCCTCGACACGAATGCAAAGTTTAATTCTTGAATTCAATAGCTCTCTTGCTATCTAACTAAG +ATGGAATACTTCATATTGGGCTAACTCATATATGCTGACTCAATAGTTAACTTGACATCTCTGCCTTCAT +AATCAGATATATAAGCATAATAAATAAATACTCATATTTCTTGATAATTTGTTTAACCACAGATAAATCC +TCACTGTAAGCCAGCTTCCAAGTTGACACCCTTACAAAAACCAGGACTCAGAATCCCTCAAATAAGAGAT +TCCAAGACAACATCATAGAATTGCTTTATTATATTAATAAGCATTTTATCACTAGAAATCCAATATACGA +AATGGTTAATTGTAACTAAACCCGCAGGTCATGTGTGTTAGGTTTCACAAATTATATATATTACTAACTC +CATACTCGTAACTAACATTAGATAAGTAGGTTAAGAAAAAAGCTTGAGGAAGATTAAGAAAAACTGCTTA +TTGGGTCTTTCCGTGTTTTAGATGAAGCAGTTGACATTCTTCCTCTTGATATTAAATGGCTACACAACAT +ACCCAATACCCAGACGCCAGGTTATCATCACCAATTGTATTGGACCAATGTGACCTTGTCACTAGAGCTT +GCGGGTTGTATTCATCATACTCCCTTAATCCGCAACTACGCAACTGTAAACTCCCGAAACATATATACCG +TTTAAAATATGATGTAACTGTTACCAAGTTCTTAAGTGATGTACCAGTGGCGACATTGCCCATAGATTTC +ATAGTCCCAATTCTTCTCAAGGCACTATCAGGCAATGGGTTCTGTCCTGTTGAGCCGCGGTGCCAACAGT +TCTTAGATGAAATTATTAAGTACACAATGCAAGATGCTCTCTTCCTGAAATATTATCTCAAAAATGTGGG +TGCTCAAGAAGACTGTGTTGATGACCACTTTCAAGAAAAAATCTTATCTTCAATTCAGGGCAATGAATTT +TTACATCAAATGTTTTTCTGGTATGACCTGGCTATTTTAACTCGAAGGGGTAGATTAAATCGAGGAAACT +CTAGATCAACGTGGTTTGTTCATGATGATTTAATAGACATCTTAGGCTATGGGGACTATGTTTTTTGGAA +GATCCCAATTTCACTGTTACCACTGAACACACAAGGAATCCCCCATGCTGCTATGGATTGGTATCAGACA +TCAGTATTCAAAGAAGCGGTTCAAGGGCATACACACATTGTTTCTGTTTCTACTGCCGATGTCTTGATAA +TGTGCAAAGATTTAATTACATGTCGATTCAACACAACTCTAATCTCAAAAATAGCAGAGGTTGAGGACCC +AGTTTGCTCTGATTATCCCAATTTTAAGATTGTGTCTATGCTTTACCAGAGCGGAGATTACTTACTCTCC +ATATTAGGGTCTGATGGGTATAAAATCATTAAGTTTCTCGAACCATTGTGCTTGGCTAAAATTCAATTGT +GCTCAAAGTACACCGAGAGGAAGGGCCGATTCTTAACACAAATGCATTTAGCTGTAAATCACACCCTGGA +AGAAATTACAGAAATACGTGCACTAAAGCCTTCACAGGCTCACAAGATCCGTGAATTCCATAGAACATTG +ATAAGGCTGGAGATGACGCCACAACAACTTTGTGAGCTATTTTCCATACAAAAACACTGGGGGCATCCTG +TGCTACATAGTGAAACAGCAATCCAAAAAGTTAAAAAACATGCTACGGTGCTAAAAGCATTACGCCCTAT +CGTGATTTTCGAGACATATTGTGTTTTTAAATATAGCATTGCAAAACATTATTTTGATAGTCAAGGATCT +TGGTACAGTGTTACCTCAGATAGAAATCTAACACCAGGTCTTAATTCTTATATCAAAAGAAATCAATTCC +CTCCGTTGCCAATGATTAAAGAACTGCTATGGGAATTTTACCACCTTGACCATCCTCCACTTTTCTCAAC +CAAAATTATTAGTGACTTAAGTATTTTTATAAAAGACAGAGCTACTGCAGTAGAAAGGACATGCTGGGAT +GCAGTATTCGAGCCTAATGTTCTGGGATATAATCCACCTCACAAATTCAGTACCAAACGTGTACCGGAAC +AATTTTTAGAGCAAGAAAACTTTTCTATTGAGAATGTTCTTTCCTACGCGCAAAAACTCGAGTATCTACT +ACCACAATATCGGAATTTTTCTTTCTCATTGAAAGAGAAAGAGTTGAATGTAGGTAGAACTTTCGGAAAA +TTGCCTTATCCGACTCGCAATGTTCAAACACTTTGTGAAGCTCTGTTAGCTGATGGTCTTGCTAAAGCAT +TTCCTAGCAATATGATGGTAGTTACGGAACGTGAACAAAAAGAAAGCTTATTGCATCAAGCATCATGGCA +CCACACAAGTGATGATTTCGGTGAGCATGCCACAGTTAGAGGGAGTAGCTTTGTAACTGATTTAGAGAAA +TACAATCTTGCATTTAGGTATGAGTTTACAGCACCTTTTATAGAATATTGCAACCGTTGCTATGGTGTTA +AGAATGTTTTTAATTGGATGCATTATACAATCCCACAGTGTTATATGCATGTCAGTGATTATTATAATCC +ACCGCATAACCTCACACTGGAAAATCGAAACAACCCCCCTGAAGGGCCTAGTTCATACAGGGGTCATATG +GGAGGGATTGAAGGACTGCAACAAAAACTCTGGACAAGTATTTCATGTGCTCAAATTTCTTTAGTTGAAA +TTAAGACTGGTTTTAAGTTGCGCTCAGCTGTGATGGGTGACAATCAGTGCATTACCGTTTTATCAGTCTT +CCCCTTAGAGACTGATGCAGGCGAGCAGGAACAGAGCGCCGAGGACAATGCAGCGAGGGTGGCCGCCAGC +CTAGCAAAAGTTACAAGTGCCTGTGGAATCTTTTTAAAACCTGATGAAACATTTGTACATTCAGGTTTTA +TCTATTTTGGAAAAAAACAATATTTGAATGGGGTCCAATTGCCTCAGTCCCTTAAAACGGCTACAAGAAT +GGCACCATTGTCTGATGCAATTTTTGATGATCTTCAAGGGACCCTGGCTAGTATAGGTACTGCTTTTGAG +CGATCCATCTCTGAGACACGACATATCTTTCCTTGCAGAATAACCGCAGCTTTCCATACGTTCTTTTCGG +TGAGAATCTTGCAATATCATCACCTCGGATTTAATAAAGGTTTTGACCTTGGACAGTTAACACTCGGCAA +ACCTCTGGATTTCGGAACAATATCATTGGCACTAGCGGTACCGCAGGTGCTTGGAGGGTTATCCTTCTTG +AATCCTGAGAAATGTTTCTACCGGAATCTAGGAGATCCAGTTACCTCAGGTTTATTCCAGTTAAAAACTT +ATCTCCGAATGATTGAGATGGATGATTTATTCTTACCTTTAATTGCGAAGAACCCTGGGAACTGCACTGC +CATTGACTTTGTGCTAAATCCTAGCGGATTAAATGTTCCTGGGTCGCAAGACTTAACTTCATTTCTGCGC +CAGATTGTACGTAGGACTATCACCCTAAGTGCGAAAAACAAACTTATTAATACCTTATTTCATGCATCAG +CTGACTTCGAAGACGAAATGGTTTGTAAGTGGCTCTTATCATCAACTCCTGTTATGAGTCGTTTCGCAGC +CGATATATTTTCACGCACGCCGAGCGGGAAGCGATTGCAAATTCTAGGATACTTGGAAGGAACACGCACA +TTATTAGCCTCTAAGATCATCAACAATAATACAGAGACGCCGGTTTTGGACAGACTGAGGAAGATAACAT +TGCAAAGGTGGAGTCTATGGTTTAGTTATCTTGATCATTGTGATAATATCCTGGCGGAGGCTTTAACCCA +AATAACTTGCACAGTTGATTTAGCACAGATCCTGAGGGAATATTCATGGGCACATATTTTAGAGGGGAGA +CCTCTTATTGGAGCCACACTCCCATGTATGATTGAGCAATTCAAAGTGGTTTGGCTGAAACCCTACGAAC +AATGTCCGCAGTGTTCAAATGCCAAGCAACCTGGTGGGAAACCATTCGTGTCAGTAGCAGTCAAGAAACA +TATTGTTAGTGCATGGCCAAATGCATCCCGAATAAGCTGGACTATCGGGGATGGAATCCCATACATTGGA +TCAAGGACAGAAGATAAGATAGGGCAACCTGCTATTAAACCAAAATGTCCTTCCGCAGCCTTAAGAGAGG +CCATTGAATTGGCGTCCCGTTTAACATGGGTAACTCAAGGCAGTTCGAACAGTGACTTGCTAATAAAACC +ATTTTTGGAAGCACGAGTAAATTTAAGTGTTCAAGAAATACTTCAAATGACCCCTTCACATTACTCGGGA +AATATTGTTCATAGGTACAACGATCAATACAGTCCTCATTCTTTCATGGCCAATCGTATGAGTAACTCAG +CAACGCGATTGATTGTTTCTACAAACACTTTAGGTGAGTTTTCAGGAGGTGGCCAATCGGCACGCGACAG +CAATATTATTTTCCAGAATGTTATAAATTATGCAGTTGCACTGTTCGATATTAAATTTAGAAACACTGAG +GCTACAGATATCCAGTATAATCGTGCTCACCTTCATCTAACTAAGTGTTGCACCCGGGAGGTACCAGCTC +AGTACTTAACATACACATCTACATTGGATTTAGATTTAACAAGATACCGAGAAAATGAATTGATTTATGA +CAATAATCCTCTAAAAGGAGGACTCAATTGCAATATCTCATTTGATAACCCATTTTTCCAAGGCAAACAG +CTGAACATTATAGAAGATGACCTTATTCGACTGCCTCACTTATCTGGATGGGAGCTAGCTAAGACCATCA +TGCAATCAATTATTTCAGATAGCAATAATTCGTCTACAGACCCAATTAGCAGTGGAGAAACAAGATCATT +CACTACCCATTTCTTAACTTATCCCAAGATAGGACTTCTGTACAGTTTTGGGGCCTTTGTAAGTTATTAT +CTTGGCAATACAATTCTTCGGACTAAGAAATTAACACTTGACAATTTTTTATATTACTTAACTACCCAAA +TTCATAATCTACCACATCGCTCATTGCGAATACTTAAGCCAACATTCAAACATGCAAGCGTTATGTCACG +ATTAATGAGTATTGATCCCCATTTTTCTATTTACATAGGCGGTGCTGCAGGTGACAGAGGACTCTCAGAT +GCGGCCAGGTTATTTTTGAGAACGTCCATTTCATCTTTTCTTACATTTGTAAAGGAATGGATAATTAATC +GCGGAACAATTGTCCCTTTATGGATAGTATATCCATTAGAGGGTCAAAATCCAACACCTGTTAATAATTT +CCTCCATCAGATCGTAGAACTGCTGGTGCATGATTCATCAAGACACCAGGCTTTTAAAACTACCATAAAT +GATCATGTACATCCTCACGACAATCTTGTTTACACATGTAAGAGTACAGCCAGCAATTTCTTCCATGCGT +CATTGGCGTACTGGAGGAGCAGGCACAGAAACAGCAACCGAAAAGACTTGACAAGAAACTCTTCAACTGG +ATCAAGCACAAACAACAGTGATGGTCATATTAAGAGAAGTCAAGAACAAACCACCAGAGATCCACATGAT +GGCACTGAACGGAGTCTAGTCCTGCAAATGAGCCATGAAATAAAAAGAACGACAATTCCACAAGAGAACA +CGCACCAGGGTCCGTCGTTCCAGTCATTTCTAAGTGACTCTGCTTGCGGTACAGCAAACCCAAAACTAAA +TTTCGATAGATCGAGACACAATGTGAAATCTCAGGATCATAACTCAGCATCCAAGAGGGAAGGTCATCAA +ATAATCTCACATCGTCTAGTCCTACCTTTCTTTACATTATCTCAAGGGACACGCCAATTAACGTCATCCA +ATGAGTCACAAACCCAAGATGAGATATCAAAGTACTTACGGCAATTGAGATCCGTCATTGATACCACAGT +TTATTGTAGGTTTACCGGTATAGTCTCGTCCATGCATTACAAACTTGATGAGGTCCTTTGGGAAATAGAG +AATTTTAAGTCGGCTGTGACGCTGGCAGAGGGAGAAGGTGCTGGTGCCTTACTATTGATTCAGAAATACC +AAGTTAAGACCTTATTTTTCAACACGCTAGCTACTGAGTCCAGTATAGAGTCAGAAATAGTATCAGGAAT +GACTACTCCTAGGATGCTTCTACCTGTTATGTCAAAATTCCATAATGACCAAATTGAGATTATTCTTAAC +AACTCAGCAAGCCAAATAACAGACATAACAAATCCTACTTGGTTTAAAGACCAAAGAGCAAGGCTACCTA +GGCAAGTCGAGGTTATAACCATGGATGCAGAGACGACAGAGAATATAAACAGATCGAAATTGTACGAAGC +TGTACATAAATTGATCTTACACCATGTTGATCCCAGCGTATTGAAAGCAGTGGTCCTTAAAGTCTTTCTA +AGTGATACCGAGGGTATGTTATGGCTAAATGATAATCTAGCCCCGTTTTTTGCCACTGGGTATTTAATTA +AGCCAATAACGTCAAGTGCCAGGTCTAGTGAGTGGTATCTTTGTCTGACGAACTTCTTATCAACTACACG +TAAGATGCCACACCAAAACCATCTCAGTTGTAAGCAGGTAATACTTACGGCATTGCAACTGCAAATTCAA +CGGAGCCCATACTGGCTAAGTCATTTAACTCAGTATGCTGACTGCGATTTACATTTAAGCTATATCCGCC +TTGGTTTTCCATCATTAGAGAAAGTACTATACCACAGGTATAACCTTGTCGATTCAAAAAGAGGTCCACT +AGTCTCTGTCACTCAGCACTTAGCACATCTTAGGGCAGAGATTCGAGAATTGACCAATGATTATAATCAA +CAGCGACAAAGTCGGACTCAAACATATCACTTTATTCGTACTGCAAAAGGACGAATCACAAAACTAGTCA +ATGATTATTTAAAATTCTTTCTTATTGTACAAGCATTAAAACATAATGGGACATGGCAAGCTGAGTTTAA +GAAATTACCAGAGTTGATTAGTGTGTGCAATAGGTTCTATCATATTAGAGATTGTAATTGTGAAGAACGT +TTCTTAGTTCAAACCTTATATTTACATAGAATGCAGGATTCTGAAGTTAAGCTTATCGAAAGGCTGACAG +GGCTTCTGAGTTTATTTCCAGATGGTCTCTACAGGTTCGATTGAATAACCGTGCATAGTATTTTGATACT +TGTAAAGGTTGGTTATCAACATACAGATTATAAAAAACTCATAAATTGCTCTCATACATCATCTTGATCT +GATTTCAATAAATAACTATTTAGATAACGAAAGGAGTCCTTACATTATACACTATATTTGGCCTCTCTCC +CTGCGTGATAATCAAAAAATTCACAATACAGCATGTGTGACATATTACTGCTGCAATGAGTCTAACGCAA +CATAATAAACTCCGCACTCTTTATAATTAAGCTTTAACGATAGGTCTGGGCTCATATTGTTATTGATATA +GTAATGTTGTATCAATATCTTGCCAGATGGAATAGTGCTTTGGTTGATAACACGACTTCTTAAAACAAAA +CTGATCTTTAAGATTAAGTTTTTTATAATTGTCATTGCTTTAATTTGTCGATTTAAAAATGGTGATAGCC +TTAATCTTTGTGTAAAATAAGAGATTAGGTGTAATAACTTTAACATTTTTGTCTAGTAAGCTACTATTCC +ATTCAGAATGATAAAATTAAAAGAAAAGACATGACTGTAAAATCAGAAATACCTTCTTTACAATATAGCA +GACTAGATAATAATCTTCGTGTTAATGATAATTAAGGCATTGACCACGCTCATCAGAAGGCTCACTAGAA +TAAACGTTGCAAAAAGGATCCCTGGAAAAATGGTCGCACACAAAAATTTAAAAATAAATCTATTTCTTCT +TTTTTGTGTGT diff --git a/test/unit/test_assembly.py b/test/unit/test_assembly.py index 6f8affd1..206a3769 100644 --- a/test/unit/test_assembly.py +++ b/test/unit/test_assembly.py @@ -17,6 +17,7 @@ import itertools import pytest import assemble.mummer +import assemble.skani import tools.minimap2 import tools.novoalign import tools.picard @@ -246,6 +247,37 @@ def test_non_failure(self): self.assertEqual(out, out.upper()) +class TestUndirectedGraph(unittest.TestCase): + def test_simple(self): + g = assemble.skani.UndirectedGraph() + g.add_edge('a', 'b') + g.add_edge('a', 'c') + g.add_edge('b', 'd') + actual = list(sorted(g.get_clusters())) + self.assertEqual(actual, [{'a', 'b', 'c', 'd'}]) + + def test_disconnected(self): + g = assemble.skani.UndirectedGraph() + g.add_edge('a', 'b') + g.add_edge('c', 'd') + actual = list(sorted(g.get_clusters())) + self.assertEqual(actual, [{'a', 'b'}, {'c', 'd'}]) + + def test_both(self): + g = assemble.skani.UndirectedGraph() + g.add_edge(1, 2) + g.add_edge(11,12) + g.add_edge(18,15) + g.add_node(12) + g.add_node(22) + g.add_node(55) + g.add_edge(25,22) + g.add_edge(7,2) + g.add_edge(12,18) + actual = list(sorted(g.get_clusters())) + self.assertEqual(actual, [{1, 2, 7}, {11, 12, 15, 18}, {22, 25}, {55}]) + + class TestOrderAndOrient(TestCaseWithTmp): ''' Test the MUMmer-based order_and_orient command ''' @@ -313,11 +345,11 @@ def test_ebov_palindrome(self): def test_ebov_palindrome_refsel(self): # this tests a scenario where show-aligns has more alignments than show-tiling with util.file.tempfnames(('.out.fasta', '.stats.tsv')) as (outFasta, outStats): - contigs, refs, expected, expectedStats = self.inputs('contigs.ebov.doublehit.fasta', - 'refs.ebov.fasta', + contigs, expected, expectedStats = self.inputs('contigs.ebov.doublehit.fasta', 'expected.ebov.doublehit.fasta', 'expected.refsel.ebov.stats.tsv') - assembly.order_and_orient(contigs, refs, outFasta, n_genome_segments=1, outStats=outStats) + refs = self.inputs('ref.ebov.lbr.fasta','ref.ebov.sle.fasta','ref.ebov.gin.fasta') + assembly.order_and_orient(contigs, refs, outFasta, outStats=outStats) self.assertEqualFastaSeqs(outFasta, expected) self.assertEqualContents(outStats, expectedStats) @@ -582,6 +614,36 @@ def test_empty_fasta_input(self): aligner='mummer') self.assertEqualContents(outFasta, empty_fasta) +class TestSkaniReferenceSelection(TestCaseWithTmp): + ''' Test Skani-based reference selection ''' + + def test_skani_contigs_to_refs(self): + ''' + Test the skani_contigs_to_refs function. + Test inputs include LASV MAGs/contigs against various EBOV and LASV references. + The only references that should hit are the LASV Josiah and KGH_G502 references. + Additionally, skani should identify them as being from the same cluster. + No EBOV references should be selected. + ''' + + inDir = os.path.join(util.file.get_test_input_path(), 'TestOrderAndOrient') + with util.file.tempfnames(('.skani.dist.out', '.skani.dist.filtered', '.clusters.filtered')) \ + as (out_skani_dist, out_skani_dist_filtered, out_clusters_filtered): + contigs = os.path.join(inDir, 'contigs.lasv.fasta') + refs = [os.path.join(inDir, 'ref.lasv.{}.fasta'.format(strain)) + for strain in ('josiah', 'pinneo', 'KGH_G502', 'BNI_Nig08_A19', 'nomatch')] + \ + [os.path.join(inDir, 'ref.ebov.{}.fasta'.format(strain)) + for strain in ('lbr', 'sle', 'gin')] + + assembly.skani_contigs_to_refs(contigs, refs, out_skani_dist, out_skani_dist_filtered, out_clusters_filtered, threads=1) + + with open(out_clusters_filtered, 'r') as inf: + clusters = inf.readlines() + self.assertEqual(len(clusters), 1) + actual_cluster = set([os.path.basename(f) for f in clusters[0].strip().split('\t')]) + expected_cluster = set(['ref.lasv.{}.fasta'.format(strain) for strain in ('josiah', 'KGH_G502')]) + self.assertEqual(actual_cluster, expected_cluster) + class TestMutableSequence(unittest.TestCase): ''' Test the MutableSequence class '''