From 917ba609cb9416f01361b1297d7e85c755cd50c7 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 18 Mar 2024 13:37:32 -0400 Subject: [PATCH 01/14] deprecate nGenomeSegments to order_and_orient --- .../TestOrderAndOrient/ref.ebov.gin.fasta | 271 +++++++++++++++++ .../TestOrderAndOrient/ref.ebov.lbr.fasta | 271 +++++++++++++++++ .../TestOrderAndOrient/ref.ebov.sle.fasta | 272 ++++++++++++++++++ test/unit/test_assembly.py | 6 +- 4 files changed, 817 insertions(+), 3 deletions(-) create mode 100644 test/input/TestOrderAndOrient/ref.ebov.gin.fasta create mode 100644 test/input/TestOrderAndOrient/ref.ebov.lbr.fasta create mode 100644 test/input/TestOrderAndOrient/ref.ebov.sle.fasta diff --git a/test/input/TestOrderAndOrient/ref.ebov.gin.fasta b/test/input/TestOrderAndOrient/ref.ebov.gin.fasta new file mode 100644 index 00000000..9c7ed106 --- /dev/null +++ b/test/input/TestOrderAndOrient/ref.ebov.gin.fasta @@ -0,0 +1,271 @@ +>KT765130.1 Zaire ebolavirus isolate H.sapiens-wt/GIN/2014/Makona-Conakry-CREMS-1022, complete genome +CGGACACACAAAAAGAAAGAAGAATTTTTAGGATCTTTTGTGTGCGAATAACTATGAGGAAGATTAATAA +TTTTCCTCTCATTGAAATTTATATCAGAATTTAAATTGAAATTGTTACTGTAATCATACCTGGTTTGTTT +CAGAGCCATATCACCAAGATAGAGAACAACCTAGGTCTCCGGAGGGGGCAAGGGCATCAGTGTGCTCAGT +TGAAAATCCCTTGTCAACATCTAGGCCTTATCACATCACAAGTTCCGCCTTAAACTCTGCAGGGTGATCC +AACAACCTTAATAGCAACATTATTGTTAAAGGACAGCATTAGTTCACAGTCAAACAAGCAAGATTGGGAA +TTAACTTTGATTTTGAACCTGAACACCCAGAGGACTGGAGACTCAACAACCCTAAAGCCTGGGGTAAAAC +ATTAGAAATAGTTTAAAGACAAATTGCTCGGAATCACAAAATTCCGAGTATGGATTCTCGTCCTCAGAAA +GTCTGGATGACGCCGAGTCTCACTGAATCTGACATGGATTACCACAAGATCTTGACAGCAGGTCTGTCCG +TTCAACAGGGGATTGTTCGGCAAAGAGTCATCCCAGTGTATCAAGTAAACAATCTTGAGGAAATTTGCCA +ACTTATCATACAGGCCTTTGAAGCTGGTGTTGATTTTCAAGAGAGTGCGGACAGTTTCCTTCTCATGCTT +TGTCTTCATCATGCGTACCAAGGAGATTACAAACTTTTCTTGGAAAGTGGCGCAGTCAAGTATTTGGAAG +GGCACGGGTTCCGTTTTGAAGTCAAGAAGTGTGATGGAGTGAAGCGCCTTGAGGAATTGCTGCCAGCAGT +ATCTAGTGGGAGAAACATTAAGAGAACACTTGCTGCCATGCCGGAAGAGGAGACGACTGAAGCTAATGCC +GGTCAGTTCCTCTCCTTTGCAAGTCTATTCCTTCCGAAATTGGTAGTAGGAGAAAAGGCTTGCCTTGAGA +AGGTTCAAAGGCAAATTCAAGTACATGCAGAGCAAGGACTGATACAATATCCAACAGCTTGGCAATCAGT +AGGACACATGATGGTGATTTTCCGTTTGATGCGAACAAATTTTTTGATCAAATTTCTTCTAATACACCAA +GGGATGCACATGGTTGCCGGACATGATGCCAACGATGCTGTGATTTCAAATTCAGTGGCTCAAGCTCGTT +TTTCAGGTCTATTGATTGTCAAAACAGTACTTGATCATATCCTACAAAAGACAGAACGAGGAGTTCGTCT +CCATCCTCTTGCAAGGACCGCCAAGGTAAAAAATGAGGTGAACTCCTTCAAGGCTGCACTCAGCTCCCTG +GCCAAGCATGGAGAGTATGCTCCTTTCGCCCGACTTTTGAACCTTTCTGGAGTAAATAATCTTGAGCATG +GTCTTTTCCCTCAACTGTCGGCAATTGCACTCGGAGTCGCCACAGCCCACGGGAGCACCCTCGCAGGAGT +AAATGTTGGAGAACAGTATCAACGGCTCAGAGAGGCAGCCACTGAGGCTGAGAAGCAACTCCAACAATAT +GCGGAGTCTCGTGAACTTGACCATCTTGGACTTGATGATCAGGAAAAGAAAATTCTTATGAGCTTCCATC +AGAAAAAGAACGAAATCAGCTTCCAGCAAACAAACGCGATGGTAACTCTAAGAAAAGAGCGCCTGGCCAA +GCTGACAGAAGCTATCACTGCTGCATCACTGCCCAAAACAAGTGGACATTACGATGATGATGACGACATT +CCCTTTCCAGGACCCATCAATGATGACAACAATCCTGGCCATCAAGATGATGATCCGACTGACTCACAGG +ATACGACCATTCCCGATGTGGTAGTTGACCCCGATGATGGAGGCTACGGCGAATACCAAAGTTACTCGGA +AAACGGCATGAGTGCACCAGATGACTTGGTCCTATTCGATCTAGACGAGGACGACGAGGACACCAAGCCA +GTGCCTAACAGATCGACCAAGGGTGGACAACAGAAAAACAGTCAAAAGGGCCAGCATACAGAGGGCAGAC +AGACACAATCCACGCCAACTCAAAACGTCACAGGCCCTCGCAGAACAATCCACCATGCCAGTGCTCCACT +CACGGACAATGACAGAAGAAACGAACCCTCCGGCTCAACCAGCCCTCGCATGCTGACCCCAATCAACGAA +GAGGCAGACCCACTGGACGATGCCGACGACGAGACGTCTTGCCTTCCGCCCTTAGAGTCAGATGATGAAG +AACAGGACAGGGACGGAACTTCTAACCGCACACCCACTGTCGCCCCACCGGCTCCCGTATACAGAGATCA +CTCCGAAAAGAAAGAACTCCTGCAAGATGAACAACAAGATCAGGACCACATTCAAGAGGCCAAGAACCAA +GACAGTGACAACACCCAGCCAGAACATTCTTTTGAGGAGATGTATCTCCACATTCTAAGATCACAGGGGC +CATTTGATGCCGTTTTGTATTATCATATGATGAAGGATGAGCCTGTAGTTTTCAGTACCAGTGATGGTAA +AGAGTACACGTATCCGGACTCCCTTGAAGAGGAATATCCACCATGGCTCACTGAAAAAGAGGCCATGAAT +GATGAGAATAGATTTGTTACACTGGATGGTCAACAATTTTATTGGCCAGTAATGAATCACAGGAATAAAT +TCATGGCAATCCTGCAACATCATCAGTGAATGAGCATGTAATAATGGGATGATTTAATCGACAAATAGCT +AACATTAAATAGTCAAGGAACGCAAACAGGAAGAATTTTTGATGTCTAAGGTGTGAATTATTATCACAAT +AAAAGTGATTCTTAGTTTTGAATTTAAAGCTAGCTTATTATTACTAGCCGTTTTTCAAAGTTCAATTTGA +GTCTTAATGCAAATAAGCGTTAAACCACAGTTATAGCCATAATGGTAACTCAATATCTTAGCCAGCGATT +TATCTAAATTAAATTACATTATGCTTTTATAACTTACCTACTAGCCTGCCCAACATTTACACGATCGTTT +TATAATTAAGAAAAAACTAATGATGAAGATTAAAACCTTCATCATCCTTACGTCAATTGAATTCTCTAGC +ACTAGAAGCTTATTGTCTTCAATGTAAAAGAAAAGCTGGCCTAACAAGATGACAACTAGAACAAAGGGCA +GGGGCCATACTGTGGCCACGACTCAAAACGACAGAATGCCAGGCCCTGAGCTTTCGGGCTGGATCTCTGA +GCAGCTAATGACCGGAAGGATTCCTGTAAACGACATCTTCTGTGATATTGAGAACAATCCAGGATTATGC +TACGCATCCCAAATGCAACAAACGAAGCCAAACCCGAAGATGCGCAACAGTCAAACCCAAACGGACCCAA +TTTGCAATCATAGTTTTGAGGAGGTAGTACAAACATTGGCTTCATTGGCTACTGTTGTGCAACAACAAAC +CATCGCATCAGAATCATTAGAACAACGCATTACGAGTCTTGAGAATGGTCTAAAGCCAGTTTATGATATG +GCAAAAACAATCTCCTCATTGAACAGGGTTTGTGCTGAGATGGTTGCAAAATATGATCTTCTGGTGATGA +CAACCGGTCGGGCAACAGCAACCGCTGCGGCAACTGAGGCTTATTGGGCTGAACATGGTCAACCACCACC +TGGACCATCACTTTATGAAGAAAGTGCGATTCGGGGTAAGATTGAATCTAGAGATGAGACTGTCCCTCAA +AGTGTTAGGGAGGCATTCAACAATCTAGACAGTACCACTTCACTAACTGAGGAAAATTTTGGGAAACCTG +ACATTTCGGCAAAGGATTTGAGAAACATTATGTATGATCACTTGCCTGGTTTTGGAACTGCTTTCCACCA +ATTAGTACAAGTGATTTGTAAATTGGGAAAAGATAGCAATTCATTGGACATTATTCATGCTGAGTTCCAG +GCCAGCCTGGCTGAAGGAGACTCCCCTCAATGTGCCCTAATTCAAATTACAAAAAGAGTTCCAATCTTCC +AAGATGCTGCTCCACCTGTCATCCACATCCGCTCTCGAGGTGACATTCCCCGAGCTTGCCAGAAGAGCTT +GCGTCCAGTCCCACCATCACCCAAGATTGATCGAGGTTGGGTATGTGTTTTTCAGCTTCAAGATGGTAAA +ACACTTGGACTCAAAATTTGAGCCAATCTCTTTTCCCTCCGAAAGAGGCAACTAATAGCAGAGGCTTCAA +CTGCTGAACTATAGGGTATGTTACATTAATGATACACTTGTGAGTATCAGCCCTAGATAATATAAGTCAA +TTAAACAACCAAGATAAAATTGTTCATATCCCGCTAGCAGCTTTAAAGATAAATGTAATAGGAGCTATAC +CTCTGACAGTATTATAATTAATTGTTATTAAGTAACCCAAACCAAAAATGATGAAGATTAAGAAAAACCT +ACCTCGACTGAGAGAGTGTTTTTTCATTAACCTTCATCTTGTAAACGTTGAGCAAAATTGTTAAAAATAT +GAGGCGGGTTATATTGCCTACTGCTCCTCCTGAATATATGGAGGCCATATACCCTGCCAGGTCAAATTCA +ACAATTGCTAGGGGTGGCAACAGCAATACAGGCTTCCTGACACCGGAGTCAGTCAATGGAGACACTCCAT +CGAATCCACTCAGGCCAATTGCTGATGACACCATCGACCATGCCAGCCACACACCAGGCAGTGTGTCATC +AGCATTCATCCTCGAAGCTATGGTGAATGTCATATCGGGCCCCAAAGTGCTAATGAAGCAAATTCCAATT +TGGCTTCCTCTAGGTGTCGCTGATCAAAAGACCTACAGCTTTGACTCAACTACGGCCGCCATCATGCTTG +CTTCATATACTATCACCCATTTCGGCAAGGCAACCAATCCGCTTGTCAGAGTCAATCGGCTGGGTCCTGG +AATCCCGGATCACCCCCTCAGGCTCCTGCGAATTGGAAACCAGGCTTTCCTCCAGGAGTTCGTTCTTCCA +CCAGTCCAACTACCCCAGTATTTCACCTTTGATTTGACAGCACTCAAACTGATCACTCAACCACTGCCTG +CTGCAACATGGACCGATGACACTCCAACTGGATCAAATGGAGCGTTGCGTCCAGGAATTTCATTTCATCC +AAAACTTCGCCCCATTCTTTTACCCAACAAAAGTGGGAAGAAGGGGAACAGTGCCGATCTAACATCTCCG +GAGAAAATCCAAGCAATAATGACTTCACTCCAGGACTTTAAGATCGTTCCAATTGATCCAACCAAAAATA +TCATGGGTATCGAAGTGCCAGAAACTCTGGTCCACAAGCTGACCGGTAAGAAGGTGACTTCCAAAAATGG +ACAACCAATCATCCCTGTTCTTTTGCCAAAGTACATTGGGTTGGACCCGGTGGCTCCAGGAGACCTCACC +ATGGTAATCACACAGGATTGTGACACGTGTCATTCTCCTGCAAGTCTTCCAGCTGTGGTTGAGAAGTAAT +TGCAATAATTGACTCAGATCCAGTTTTACAGAATCTTCTCAGGGATAGTGATAACATCTTTTTAATAATC +CGTCTACTAGAAGAGATACTTCTAATTGATCAATATACTAAAGGTGCTTTACACCATTGTCTCTTTTCTT +TCGTAAATGTAGAGCTTAACAAAAGACTCATAATATACCTGTTTTTAAAAGATTGATTGATGAAAGATCA +TGACTAATAACATTACAAACAATCCTACTATAATCAATACGGTGATTCAAATGTCAATCTTTCTCATTGC +ACATACTCTTTGTCCTTATCCTCAAATTGCCTACATGCTTACATCTGAGGACAGCCAGTGTGACTTGGAT +TGGAGATGTGGAGGAAAAATCGGGGCCCATTTCTAAGTTGTTCACAATCTAAGTACAGACATTGCTCTTC +TAATTAAGAAAAAATCGGCGATGAAGATTAAGCCGACAGTGAGCGTAATCTTCATCTCTCTTAGATTATT +TGTCTTCCAGAGTAGGGGTCATCAGGTCCTTTTCAATTGGATAACCAAAATAAGCTTCACTAGAAGGATA +TTGTGAGGCGACAACACAATGGGTGTTACAGGAATATTGCAGTTACCTCGTGATCGATTCAAGAGGACAT +CATTCTTTCTTTGGGTAATTATCCTTTTCCAAAGAACATTTTCCATCCCGCTTGGAGTTATCCACAATAG +TACATTACAGGTTAGTGATGTCGACAAACTAGTTTGTCGTGACAAACTGTCATCCACAAATCAATTGAGA +TCAGTTGGACTGAATCTCGAGGGGAATGGAGTGGCAACTGACGTGCCATCTGTGACTAAAAGATGGGGCT +TCAGGTCCGGTGTCCCACCAAAGGTGGTCAATTATGAAGCTGGTGAATGGGCTGAAAACTGCTACAATCT +TGAAATCAAAAAACCTGACGGGAGTGAGTGTCTACCAGCAGCGCCAGACGGGATTCGGGGCTTCCCCCGG +TGCCGGTATGTGCACAAAGTATCAGGAACGGGACCATGTGCCGGAGACTTTGCCTTCCACAAAGAGGGTG +CTTTCTTCCTGTATGATCGACTTGCTTCCACAGTTATCTACCGAGGAACGACTTTCGCTGAAGGTGTCGT +TGCATTTCTGATACTGCCCCAAGCTAAGAAGGACTTCTTCAGCTCACACCCCTTGAGAGAGCCGGTCAAT +GCAACGGAGGACCCGTCGAGTGGCTATTATTCTACCACAATTAGATATCAGGCTACCGGTTTTGGAACTA +ATGAGACAGAGTACTTGTTCGAGGTTGACAATTTGACCTACGTCCAACTTGAATCAAGATTCACACCACA +GTTTCTGCTCCAGCTGAATGAGACAATATATGCAAGTGGGAAGAGGAGCAACACCACGGGAAAACTAATT +TGGAAGGTCAACCCCGAAATTGATACAACAATCGGGGAGTGGGCCTTCTGGGAAACTAAAAAAACCTCAC +TAGAAAAATTCGCAGTGAAGAGTTGTCTTTCACAGCTGTATCAAACGGACCCAAAAACATCAGTGGTCAG +AGTCCGGCGCGAACTTCTTCCGACCCAGAGACCAACACAACAAATGAAGACCACAAAATCATGGCTTCAG +AAAATTCCTCTGCAATGGTTCAAGTGCACAGTCAAGGAAGGAAAGCTGCAGTGTCGCATCTGACAACCCT +TGCCACAATCTCCACGAGTCCTCAACCTCCCACAACCAAAACAGGTCCGGACAACAGCACCCATAATACA +CCCGTGTATAAACTTGACATCTCTGAGGCAACTCAAGTTGGACAACATCACCGTAGAGCAGACAACGACA +GCACAGCCTCCGACACTCCCCCCGCCACGACCGCAGCCGGACCCTTAAAAGCAGAGAACACCAACACGAG +TAAGAGCGCTGACTCCCTGGACCTCGCCACCACGACAAGCCCCCCAAACTACAGCGAGACTGCTGGCAAC +AACAACACTCATCACCAAGATACCGGAGAAGAGAGTGCCAGCAGCGGGAAGCTAGGCTTAATTACCAATA +CTATTGCTGGAGTAGCAGGACTGATCACAGGCGGGAGAAGGACTCGAAGAGAAGTAATTGTCAATGCTCA +ACCCAAATGCAACCCCAATTTACATTACTGGACTACTCAGGATGAAGGTGCTGCAATCGGATTGGCCTGG +ATACCATATTTCGGGCCAGCAGCCGAAGGAATTTACACAGAGGGGCTAATGCACAACCAAGATGGTTTAA +TCTGTGGGTTGAGGCAGCTGGCCAACGAAACGACTCAAGCTCTCCAACTGTTCCTGAGAGCCACAACTGA +GCTGCGAACCTTTTCAATCCTCAACCGTAAGGCAATTGACTTCCTGCTGCAGCGATGGGGTGGCACATGC +CACATTTTGGGACCGGACTGCTGTATCGAACCACATGATTGGACCAAGAACATAACAGACAAAATTGATC +AGATTATTCATGATTTTGTTGATAAAACTCTTCCGGACCAGGGGGACAATGACAATTGGTGGACAGGATG +GAGACAATGGATACCGGCAGGTATTGGAGTTACAGGTGTTATAATTGCAGTTATCGCTTTATTCTGTATA +TGCAAATTTGTCTTTTAGTCTTTCTTCAGATTGTTTCACGGCAAAACTCAACCTCAAATCAATGAAACTA +GGATTTAATTATATGAATCACTTGAATCTAAGATTACTTGACAAATGATAACATAATACACTGGAGCTTC +AAACATAGCCAATGTGATTCTAACTCCTTTAAACTCACAGTTAATCATAAACAAGGTTTGACATCAATCT +AGCTATATCTTTAAGAATGATAAACTTGATGAAGATTAAGAAAAAGGTAATCTTTCGATTATCTTTAGTC +TTCATCCTTGATTCTACAATCATGACAGTTGTCTTTAATGAAAAAGGAAAAAAGCCTTTTTATTAAGTTG +TAATAATCAGATCTGCAAACCGGTAGAATTTAGTTGTAACCTAACACACACAAAGCATTGGTAAAAAAGT +CAATAGAAATTTAAACAGTGAGTGCAGACAACTCTTAAATGGAAGCTTCATATGAGAGAGGACGCCCCCG +AGCTGCCAGACAGCATTCAAGGGATGGACACGACCACCATGTTCGAGCACGATCATCATCCAGAGAGAAT +TATCGAGGTGAGTACCGTCAATCAAGGAGCGCCTCACAAGTGCGCGTTCCTACTGTATTTCATAAGAAGA +GAGTTGAACCATTAACAGTTCCTCCAGCACCTAAAGACATATGTCCGACCTTGAAAAAAGGATTTTTGTG +TGACAGTAGTTTTTGCAAAAAAGACCACCAGTTAGAAAGTTTAACTGATAGGGAATTACTCCTACTAATC +GCCCGTAAGACTTGTGGATCAGTAGAACAACAATTAAATATAACTGCACCCAAGGACTCGCGCTTAGCAA +ATCCAACGGCTGATGATTTCCAGCAAGAGGAAGGTCCCAAAATTACCTTGTTGACACTGATCAAGACGGC +AGAACACTGGGCGAGACAAGACATCCGAACCATAGAGGATTCCAAATTAAGGGCATTGTTAACTCTATGT +GCTGTGATGACGAGGAAATTCTCAAAATCCCAGCTGAGTCTTTTGTGTGAGACACACCTAAGGCGCGAAG +GGCTTGGGCAAGATCAGGCAGAACCCGTTCTCGAAGTATATCAACGATTACACAGTGATAAAGGAGGCAG +TTTTGAAGCTGCACTATGGCAACAATGGGACCGACAATCCCTAATTATGTTTATCACTGCATTCTTGAAT +ATCGCTCTCCAGTTACCGTGTGAGAGTTCTGCTGTCGTTGTTTCAGGGTTAAGAACATTGGTTCCTCAAT +CAGATAATGAGGAAGCTTCAACCAACCCGGGGACATGCTCATGGTCTGATGAGGGTACCCCTTAATAAGG +CTGACTAAAACACTATATAACCTTCTACTTGATCACAATACTCCGTATACCTATCATCATATATTTAATC +AAGACGATATCCTTTAAAACTTATTCAGTACTATAATCACTCTCATTTCAAATTGATAAGATATGCATAA +TTGCCTTAATATATAAAGAGGTATGATATAACCCAAACATTGACCAAAGAAAATCATAATCTCGTATCGC +TCGCAATATAACCTGCCAAGCATACCTCTTGCACAAAGTGATTCTTGTACACAAATAATGTTTGACTCTA +CAGGAGGTAGCAACGATCCATCTCATCAAAAAATAAGTATTTTATGATTTACTAATGATCTCTTAAAATA +TTAAGAAAAACTGACGGAACATAAATTCTTTCTGCTTCAAGTTGTGGAGGAGGTCTATGGTATTCGCTAT +TGTTATATTACAATCAATAACAAGCTTGTAAAAATATTGTTCTTGTTTCAGGAGGTATATTGTGACCGGA +AAAGCTAAACTAATGATGAAGATTAATGCGGAGGTCTGATGAGAATAAACCTTATTATTCAGATTAGGCC +CCAAGAGGCATTCTTCATCTCCTTTTAGCAAAATACTATTTCAGGATAGTCCAGCTAGTGACACGTCTTT +TAGCTGTATACCAGTTGCCCCTGAGATACGCCACAAAAGTGTCTCTGAGCTAAAGTGGTCTGTACACATC +TCATACATTGTATTAGGGGCAATAATATCTAATTGAACTTAGCCATTTAAAATTTAGTGCATAAATCTGG +GCTAACTCCACCAGGTCAACTCCATTGGCTGAAAAGAAGCCCACCTACAACGAACATTACTTTGAGCACC +CTCACAATTAAAAAATAAGAGCGTCGTTCCAACAATCGAGCGCAAGGTTACAAGGTTGAACTGAGAGTGT +CTAGACAACAAAATATCGATACTCCAGACACCAAGCAAGACCTGAGAAAAAACCATGGCCAAAGCTACGG +GACGATACAATCTAATATCGCCCAAAAAGGACCTGGAGAAAGGGGTTGTCTTAAGCGACCTCTGTAACTT +CTTAGTTAGTCAAACTATTCAAGGGTGGAAAGTTTATTGGGCTGGTATTGAGTTTGATGTGACTCACAAA +GGAATGGCCCTATTGCATAGACTGAAAACTAATGACTTTGCCCCTGCATGGTCAATGACAAGGAACCTAT +TTCCCCATTTATTTCAAAATCCGAATTCCACTATTGAATCACCGCTGTGGGCACTGAGAGTCATCCTTGC +AGCAGGGATACAGGACCAGTTAATTGACCAGTCTTTGATTGAACCCTTAGCAGGAGCCCTTGGTCTGATC +TCTGATTGGCTGCTAACAACCAACACTAACCATTTCAACATGCGAACACAACGTGTCAAGGAACAATTGA +GCCTAAAAATGCTGTCGTTGATTCGATCCAATATTCTCAAGTTTATTAACAAATTGGATGCTCTACATGT +CGTGAACTACAATGGATTATTGAGCAGTATTGAAATTGGAACTCAAAATCATACAATCATCATAACTCGA +ACTAACATGGGTTTTCTGGTGGAGCTCCAAGAACCCGACAAATCGGCAATGAACCGCAAGAAGCCTGGGC +CGGCGAAATTTTCCCTCCTTCATGAGTCCACACTGAAAGCATTTACACAAGGGTCCTCGACACGAATGCA +AAGTTTAATTCTTGAATTCAATAGCTCTCTTGCTATCTAACTAAGATGGAATACTTCATATTGGGCTAAC +TCATATATGCTGACTCAATAGTTAACTTGACATCTCTGCCTTCATAATCAGATATATAAGCATAATAAAT +AAATACTCATATTTCTTGATAATTTGTTTAACCACAGATAAATCCTCACTGTAAGCCAGCTTCCAAGTTG +ACACCCTTACAAAAACCAGGACTCAGAATCCCTCAAATAAGAGATTCCAAGACAACATCATAGAATTGCT +TTATTATATTAATAAGCATTTTATCACTAGAAATCCAATATACGAAATGGTTAATTGTAACTAAACCCGC +AGGTCATGTGTGTTAGGTTTCACAAATTATATATATTACTAACTCCATACTCGTAACTAACATTAGATAA +GTAGGTTAAGAAAAAAGCTTGAGGAAGATTAAGAAAAACTGCTTATTGGGTCTTTCCGTGTTTTAGATGA +AGCAGTTGACATTCTTCCTCTTGATATTAAATGGCTACACAACATACCCAATACCCAGACGCCAGGTTAT +CATCACCAATTGTATTGGACCAATGTGACCTTGTCACTAGAGCTTGCGGGTTGTATTCATCATACTCCCT +TAATCCGCAACTACGCAACTGTAAACTCCCGAAACATATATACCGTTTAAAATATGATGTAACTGTTACC +AAGTTCTTAAGTGATGTACCAGTGGCGACATTGCCCATAGATTTCATAGTCCCAATTCTTCTCAAGGCAC +TATCAGGCAATGGGTTCTGTCCTGTTGAGCCGCGGTGCCAACAGTTCTTAGATGAAATTATTAAGTACAC +AATGCAAGATGCTCTCTTCCTGAAATATTATCTCAAAAATGTGGGTGCTCAAGAAGACTGTGTTGATGAC +CACTTTCAAGAAAAAATCTTATCTTCAATTCAGGGCAATGAATTTTTACATCAAATGTTTTTCTGGTATG +ACCTGGCTATTTTAACTCGAAGGGGTAGATTAAATCGAGGAAACTCTAGATCAACGTGGTTTGTTCATGA +TGATTTAATAGACATCTTAGGCTATGGGGACTATGTTTTTTGGAAGATCCCAATTTCACTGTTACCACTG +AACACACAAGGAATCCCCCATGCTGCTATGGATTGGTATCAGACATCAGTATTCAAAGAAGCGGTTCAAG +GGCATACACACATTGTTTCTGTTTCTACTGCCGATGTCTTGATAATGTGCAAAGATTTAATTACATGTCG +ATTCAACACAACTCTAATCTCAAAAATAGCAGAGGTTGAGGACCCAGTTTGCTCTGATTATCCCAATTTT +AAGATTGTGTCTATGCTTTACCAGAGCGGAGATTACTTACTCTCCATATTAGGGTCTGATGGGTATAAAA +TCATTAAGTTTCTCGAACCATTGTGCTTGGCTAAAATTCAATTGTGCTCAAAGTACACCGAGAGGAAGGG +CCGATTCTTAACACAAATGCATTTAGCTGTAAATCACACCCTGGAAGAAATTACAGAAATACGTGCACTA +AAGCCTTCACAGGCTCACAAGATCCGTGAATTCCATAGAACATTGATAAGGCTGGAGATGACGCCACAAC +AACTTTGTGAGCTATTTTCCATACAAAAACACTGGGGGCATCCTGTGCTACATAGTGAAACAGCAATCCA +AAAAGTTAAAAAACATGCTACGGTGCTAAAAGCATTACGCCCTATCGTGATTTTCGAGACATATTGTGTT +TTTAAATATAGCATTGCAAAACATTATTTTGATAGTCAAGGATCTTGGTACAGTGTTACCTCAGATAGAA +ATCTAACACCAGGTCTTAATTCTTATATCAAAAGAAATCAATTCCCTCCGTTGCCAATGATTAAAGAACT +GCTATGGGAATTTTACCACCTTGACCATCCTCCACTTTTCTCAACCAAAATTATTAGTGACTTAAGTATT +TTTATAAAAGACAGAGCTACTGCAGTAGAAAGGACATGCTGGGATGCAGTATTCGAGCCTAATGTTCTGG +GATATAATCCACCTCACAAATTCAGTACCAAACGTGTACCGGAACAATTTTTAGAGCAAGAAAACTTTTC +TATTGAGAATGTTCTTTCCTACGCGCAAAAACTCGAGTATCTACTACCACAATATCGGAATTTTTCTTTC +TCATTGAAAGAGAAAGAGTTGAATGTAGGTAGAACTTTCGGAAAATTGCCTTATCCGACTCGCAATGTTC +AAACACTTTGTGAAGCTCTGTTAGCTGATGGTCTTGCTAAAGCATTTCCTAGCAATATGATGGTAGTTAC +GGAACGTGAACAAAAAGAAAGCTTATTGCATCAAGCATCATGGCACCACACAAGTGATGATTTCGGTGAG +CATGCCACAGTTAGAGGGAGTAGCTTTGTAACTGATTTAGAGAAATACAATCTTGCATTTAGGTATGAAT +TTACAGCACCTTTTATAGAATATTGCAACCGTTGCTATGGTGTTAAGAATGTTTTTAATTGGATGCATTA +TACAATCCCACAGTGTTATATGCATGTCAGTGATTATTATAATCCACCGCATAACCTCACACTGGAAAAT +CGAAACAACCCCCCTGAAGGGCCTAGTTCATACAGGGGTCATATGGGAGGGATTGAAGGACTGCAACAAA +AACTCTGGACAAGTATTTCATGTGCTCAAATTTCTTTAGTTGAAATTAAGACTGGCTTTAAGTTGCGCTC +AGCTGTGATGGGTGACAATCAGTGCATTACCGTTTTATCAGTCTTCCCCTTAGAGACTGATGCAGGCGAG +CAGGAACAGAGCGCCGAGGACAATGCAGCGAGGGTGGCCGCCAGCCTAGCAAAAGTTACAAGTGCCTGTG +GAATCTTTTTAAAACCTGATGAAACATTTGTACATTCAGGTTTTATCTATTTTGGAAAAAAACAATATTT +GAATGGGGTCCAATTGCCCCAGTCCCTTAAAACGGCTACAAGAATGGCACCATTGTCTGATGCAATTTTT +GATGATCTTCAAGGGACCCTGGCTAGTATAGGTACTGCTTTTGAGCGATCCATCTCTGAGACACGACATA +TCTTTCCTTGCAGAATAACCGCAGCTTTCCATACGTTCTTTTCGGTGAGAATCTTGCAATATCATCACCT +CGGATTTAATAAAGGTTTTGACCTTGGACAGTTAACACTCGGCAAACCTCTGGATTTCGGAACAATATCA +TTGGCACTAGCGGTACCGCAGGTGCTTGGAGGGTTATCCTTCTTGAATCCTGAGAAATGTTTCTACCGGA +ATCTAGGAGATCCAGTTACCTCAGGTTTATTCCAGTTAAAAACTTATCTCCGAATGATTGAGATGGATGA +TTTATTCTTACCTTTAATTGCGAAGAACCCTGGGAACTGCACTGCCATTGACTTTGTGCTAAATCCTAGC +GGATTAAATGTTCCTGGGTCGCAAGACTTAACTTCATTTCTGCGCCAGATTGTACGTAGGACTATCACCC +TAAGTGCGAAAAACAAACTTATTAATACCTTATTTCATGCATCAGCTGACTTCGAAGACGAAATGGTTTG +TAAGTGGCTCTTATCATCAACTCCTGTTATGAGTCGTTTCGCAGCCGATATATTTTCACGCACGCCGAGC +GGGAAGCGATTGCAAATTCTAGGATACTTGGAAGGAACACGCACATTATTAGCCTCTAAGATCATCAACA +ATAATACAGAGACGCCGGTTTTGGACAGACTGAGGAAGATAACATTGCAAAGGTGGAGTCTATGGTTTAG +TTATCTTGATCATTGTGATAATATCCTGGCGGAGGCTTTAACCCAAATAACTTGCACAGTTGATTTAGCA +CAGATCCTGAGGGAATATTCATGGGCACATATTTTAGAGGGGAGACCTCTTATTGGAGCCACACTCCCAT +GTATGATTGAGCAATTCAAAGTGGTTTGGCTGAAACCCTACGAACAATGTCCGCAGTGTTCAAATGCCAA +GCAACCTGGTGGGAAACCATTCGTGTCAGTAGCAGTCAAGAAACATATTGTTAGTGCATGGCCAAATGCA +TCCCGAATAAGCTGGACTATCGGGGATGGAATCCCATACATTGGATCAAGGACAGAAGATAAGATAGGGC +AACCTGCTATTAAACCAAAATGTCCTTCCGCAGCCTTAAGAGAGGCCATTGAATTGGCGTCCCGTTTAAC +ATGGGTAACTCAAGGCAGTTCGAACAGTGACTTGCTAATAAAACCATTTTTGGAAGCACGAGTAAATTTA +AGTGTTCAAGAAATACTTCAAATGACCCCTTCACATTACTCGGGAAATATTGTTCATAGGTACAACGATC +AATACAGTCCTCATTCTTTCATGGCCAATCGTATGAGTAACTCAGCAACGCGATTGATTGTTTCTACAAA +CACTTTAGGTGAGTTTTCAGGAGGTGGCCAATCGGCACGCGACAGCAATATTATTTTCCAGAATGTTATA +AATTATGCAGTTGCACTGTTCGATATTAAATTTAGAAACACTGAGGCTACAGATATCCAGTATAATCGTG +CTCACCTTCATCTAACTAAGTGTTGCACCCGGGAGGTACCAGCTCAGTACTTAACATACACATCTACATT +GGATTTAGATTTAACAAGATACCGAGAAAATGAATTGATTTATGACAATAATCCTCTAAAAGGAGGACTC +AATTGCAATATCTCATTTGATAACCCATTTTTCCAAGGCAAACAGCTGAACATTATAGAAGATGACCTTA +TTCGACTGCCTCACTTATCTGGATGGGAGCTAGCTAAGACCATCATGCAATCAATTATTTCAGATAGCAA +TAATTCGTCTACAGACCCAATTAGCAGTGGAGAAACAAGATCATTCACTACCCATTTCTTAACTTATCCC +AAAATAGGACTTCTGTACAGTTTTGGGGCCTTTGTAAGTTATTATCTTGGCAATACAATTCTTCGGACTA +AGAAATTAACACTTGACAATTTTTTATATTACTTAACTACCCAAATTCATAATCTACCACATCGCTCATT +GCGAATACTTAAGCCAACATTCAAACATGCAAGCGTTATGTCACGATTAATGAGTATTGATCCCCATTTT +TCTATTTACATAGGCGGTGCTGCAGGTGACAGAGGACTCTCAGATGCGGCCAGGTTATTTTTGAGAACGT +CCATTTCATCTTTTCTTACATTTGTAAAGGAATGGATAATTAATCGCGGAACAATTGTCCCTTTATGGAT +AGTATATCCATTAGAGGGTCAAAATCCAACACCTGTTAATAATTTCCTCCATCAGATCGTAGAACTGCTG +GTGCATGATTCATCAAGACACCAGGCTTTTAAAACTACCATAAATGATCATGTACATCCTCACGACAATC +TTGTTTACACATGTAAGAGTACAGCCAGCAATTTCTTCCATGCGTCATTGGCGTACTGGAGGAGCAGGCA +CAGAAACAGCAACCGAAAAGACTTGACAAGAAACTCTTCAACTGGATCAAGCACAAACAACAGTGATGGT +CATATTAAGAGAAGTCAAGAACAAACCACCAGAGATCCACATGATGGCACTGAACGGAGTCTAGTCCTGC +AAATGAGCCATGAAATAAAAAGAACGACAATTCCACAAGAGAACACGCACCAGGGTCCGTCGTTCCAGTC +ATTTCTAAGTGACTCTGCTTGCGGTACAGCAAACCCAAAACTAAATTTCGATAGATCGAGACACAATGTG +AAATCTCAGGATCATAACTCAGCATCCAAGAGGGAAGGTCATCAAATAATCTCACATCGTCTAGTCCTAC +CTTTCTTTACATTATCTCAAGGGACACGCCAATTAACGTCATCCAATGAGTCACAAACCCAAGATGAGAT +ATCAAAGTACTTACGGCAATTGAGATCCGTCATTGATACCACAGTTTATTGTAGGTTTACCGGTATAGTC +TCGTCCATGCATTACAAACTTGATGAGGTCCTTTGGGAAATAGAGAATTTTAAGTCGGCTGTGACGCTGG +CAGAGGGAGAAGGTGCTGGTGCCTTACTATTGATTCAGAAATACCAAGTTAAGACCTTATTCTTCAACAC +GCTAGCTACTGAGTCCAGTATAGAGTCAGAAATAGTATCAGGAATGACTACTCCTAGGATGCTTCTACCT +GTTATGTCAAAATTCCATAATGACCAAATTGAGATTATTCTTAACAACTCAGCAAGCCAAATAACAGACA +TAACAAATCCTACTTGGTTTAAAGACCAAAGAGCAAGGCTACCTAGGCAAGTCGAGGTTATAACCATGGA +TGCAGAGACGACAGAGAATATAAACAGATCGAAATTGTACGAAGCTGTACATAAATTGATCTTACACCAT +GTTGATCCCAGCGTATTGAAAGCAGTGGTCCTTAAAGTCTTTCTAAGTGATACCGAGGGTATGTTATGGC +TAAATGATAATCTAGCCCCGTTTTTTGCCACTGGGTATTTAATTAAGCCAATAACGTCAAGTGCCAGGTC +TAGTGAGTGGTATCTTTGTCTGACGAACTTCTTATCAACTACACGTAAGATGCCACACCAAAACCATCTC +AGTTGTAAGCAGGTAATACTTACGGCATTGCAACTGCAAATTCAACGGAGCCCATACTGGCTAAGTCATT +TAACTCAGTATGCTGACTGCGATTTACATTTAAGCTATATCCGCCTTGGTTTTCCATCATTAGAGAAAGT +ATTATACCACAGGTATAACCTTGTCGATTCAAAAAGAGGTCCACTAGTCTCTGTCACTCAGCACTTAGCA +CATCTTAGGGCAGAGATTCGAGAATTGACCAATGATTATAATCAACAGCGACAAAGTCGGACTCAAACAT +ATCACTTTATTCGTACTGCAAAAGGACGAATCACAAAACTAGTCAATGATTATTTAAAATTCTTTCTTAT +TGTACAAGCATTAAAACATAATGGGACATGGCAAGCTGAGTTTAAGAAATTACCAGAGTTGATTAGTGTG +TGCTATAGGTTCTATCATATTAGAGATTGTAATTGTGAAGAACGTTTCTTAGTTCAAACCTTATATTTAC +ATAGAATGCAGGATTCTGAAGTTAAGCTTATCGAAAGGCTGACAGGGCTTCTGAGTTTATTTCCAGATGG +TCTCTACAGGTTCGATTGAATAACCGTGCATAGTATTTTGATACTTGTAAAGGTTGGTTATCAACATACA +GATTATAAAAAACTCATAAATTGCTCTCATACATCATCTTGATCTGATTTCAATAAATAACTATTTAGAT +AACGAAAGGAGTCCTTACATTATACACTATATTTGGCCTCTCTCCCTGCGTGATAATCAAAAATTCACAA +TACAGCATGTGTGACATATTACTGCTGCAATGAGTCTAACGCAACATAATAAACTCCGCACTCTTTATAA +TTAAGCTTTAACGATAGGTCTGGGCTCATATTGTTATTGATATAGTAATGTTGTATCAATATCTTGCCAG +ATGGAATAGTGCTTTGGTTGATAACACGACTTCTTAAAACAAAACTGATCTTTAAGATTAAGTTTTTTAT +AATTGTCATTGCTTTAATTTGTCGATTTAAAAATGGTGATAGCCTTAATCTTTGTGTAAAATAAGAGATT +AGGTGTAATAACTTTAACATTTTTGTCTAGTAAGCTACTATTCCATTCAGAATGATAAAATTAAAAGAAA +AGACAGGACTGTAAAATCAGAAATACCTTCTTTACAATATACCAGACTAGACAATAATCTTCGTGTTAAT +GATAATTAAGACATTGACCACGCTCATCAGAAGGCTC diff --git a/test/input/TestOrderAndOrient/ref.ebov.lbr.fasta b/test/input/TestOrderAndOrient/ref.ebov.lbr.fasta new file mode 100644 index 00000000..53277862 --- /dev/null +++ b/test/input/TestOrderAndOrient/ref.ebov.lbr.fasta @@ -0,0 +1,271 @@ +>KX009902.1 Zaire ebolavirus isolate Ebola virus/H.sapiens-wt/LBR/2014/Makona-CDC/NIH-1122, partial genome +TTTGTGTGCGAATAACTATGAGGAAGATTAATAATTTTCCTCTCATTGAAATTTATATCGGAATTTAAAT +TGAAATTGTTACTGTAATCATACCTGGTTTGTTTCAGAGCCATATCACCAAGATAGAGAACAACCTAGGT +CTCCGGAGGGGGCAAGGGCATCAGTGTGCTCAGTTGAAAATCCCTTGTCAACATCTAGGCCTTATCACAT +CACAAGTTCCGCCTTAAACTCTGCAGGGTGATCCAACAACCTTAATAGCAACATTATTGTTAAAGGACAG +CATTAGTTCACAGTCAAACAAGCAAGATTGAGAATTAACTTTGATTTTGAACCTGAACACCCAGAGGACT +GGAGACTCAACAACCCTAAAGCCTGGGGTAAAACATTAGAAATAGTTTAAAGACAAATTGCTCGGAATCA +CAAAATTCCGAGTATGGATTCTCGTCCTCAGAAAGTCTGGATGACGCCGAGTCTCACTGAATCTGACATG +GATTACCACAAGATCTTGACAGCAGGTCTGTCCGTTCAACAGGGGATTGTTCGGCAAAGAGTCATCCCAG +TGTATCAAGTAAACAATCTTGAGGAAATTTGCCAACTTATCATACAGGCCTTTGAAGCTGGTGTTGATTT +TCAAGAGAGTGCGGACAGTTTCCTTCTCATGCTTTGTCTTCATCATGCGTACCAAGGAGATTACAAACTT +TTCTTGGAAAGTGGCGCAGTCAAGTATTTGGAAGGGCACGGGTTCCGTTTTGAAGTCAAGAAGTGTGATG +GAGTGAAGCGCCTTGAGGAATTGCTGCCAGCAGTATCTAGTGGGAGAAACATTAAGAGAACACTTGCTGC +CATGCCGGAAGAGGAGACGACTGAAGCTAATGCCGGTCAGTTCCTCTCCTTTGCAAGTCTATTCCTTCCG +AAATTGGTAGTAGGAGAAAAGGCTTGCCTTGAGAAGGTTCAAAGGCAAATTCAAGTACATGCAGAGCAAG +GACTGATACAATATCCAACAGCTTGGCAATCAGTAGGACACATGATGGTGATTTTCCGTTTGATGCGAAC +AAATTTTTTGATCAAATTTCTTCTAATACACCAAGGGATGCACATGGTTGCCGGACATGATGCCAACGAT +GCTGTGATTTCAAATTCAGTGGCTCAAGCTCGTTTTTCAGGTCTATTGATTGTCAAAACAGTACTTGATC +ATATCCTACAAAAGACAGAACGAGGAGTTCGTCTCCATCCTCTTGCAAGGACCGCCAAGGTAAAAAATGA +GGTGAACTCCTTCAAGGCTGCACTCAGCTCCCTGGCCAAGCATGGAGAGTATGCTCCTTTCGCCCGACTT +TTGAACCTTTCTGGAGTAAATAATCTTGAGCATGGTCTTTTCCCTCAACTGTCGGCAATTGCACTCGGAG +TCGCCACAGCCCACGGGAGCACCCTCGCAGGAGTAAATGTTGGAGAACAGTATCAACAGCTCAGAGAGGC +AGCCACTGAGGCTGAGAAGCAACTCCAACAATATGCGGAGTCTCGTGAACTTGACCATCTTGGACTTGAT +GATCAGGAAAAGAAAATTCTTATGAACTTCCATCAGAAAAAGAACGAAATCAGCTTCCAGCAAACAAACG +CGATGGTAACTCTCAGAAAAGAGCGCCTGGCCAAGCTGACAGAAGCTATCACTGCTGCATCACTGCCCAA +AACAAGTGGACATTACGATGATGATGACGACATTCCCTTTCCAGGACCCATCAATGATGACGACAATCCT +GGCCATCAAGATGATGATCCGACTGACTCACAGGATACGACCATTCCCGATGTGGTAGTTGACCCCGATG +ATGGAGGCTACGGCGAATACCAAAGTTACTCGGAAAACGGCATGAGTGCACCAGATGACTTGGTCCTATT +CGATCTAGACGAGGACGACGAGGACACCAAGCCAGTGCCTAACAGATCGACCAAGGGTGGACAACAGAAA +AACAGTCAAAAGGGCCAGCATACAGAGGGCAGACAGACACAATCCACGCCAACTCAAAACGTCACAGGCC +CTCGCAGAACAATCCACCATGCCAGTGCTCCACTCACGGACAATGACAGAAGAAACGAACCCTCCGGCTC +AACCAGCCCTCGCATGCTGACCCCAATCAACGAAGAGGCAGACCCACTGGACGATGCCGACGACGAGACG +TCTAGCCTTCCGCCCTTAGAGTCAGATGATGAAGAACAGGACAGGGACGGAACTTCTAACCGCACACCCA +CTGTCGCCCCACCGGCTCCCGTATACAGAGATCACTCCGAAAAGAAAGAACTCCCGCAAGATGAACAACA +AGATCAGGACCACATTCAAGAGGCCAGGAACCAAGACAGTGACAACACCCAGCCAGAACATTCTTTTGAG +GAGATGTATCGCCACATTCTAAGATCACAGGGGCCATTTGATGCCGTTTTGTATTATCATATGATGAAGG +ATGAGCCTGTAGTTTTCAGTACCAGTGATGGTAAAGAGTACACGTATCCGGACTCCCTTGAAGAGGAATA +TCCACCATGGCTCACTGAAAAAGAGGCCATGAATGATGAGAATAGATTTGTTACACTGGATGGTCAACAA +TTTTATTGGCCAGTAATGAATCACAGGAATAAATTCATGGCAATCCTGCAACATCATCAGTGAATGAGCA +TGTAATAATGGGATGATTTAATCGACAAATAGCTAACATTAAATAGTCAAGGAACGCAAACAGGAAGAAT +TTTTGATGTCTAAGGTGTGAATTATTATCACAATAAAAGTGATTCTTAGTTTTGAATTTAAAGCTAGCTT +ATTATTACTAGCCGTTTTTCAAAGTTCAATTTGAGTCTTAATGCAAATAAGCGTTAAGCCACAGTTATAG +CCATAATGGTAACTCAATATCTTAGCCAGCGATTTATCTAAATTAAATTACATTATGCTTTTATAACTTA +CCTACTAGCCTGCCCAACATTTACACGATCGTTTTATAATTAAGAAAAAACTAATGATGAAGATTAAAAC +CTTCATCATCCTTACGTCAATTGAATTCTCTAGCACTAGAAGCTTATTGTCTTCAATGTAAAAGAAAAGC +TGGCCTAACAAGATGACAACTAGAACAAAGGGCAGGGGCCATACTGTGGCCACGACTCAAAACGACAGAA +TGCCAGGCCCTGAGCTTTCGGGCTGGATCTCTGAGCAGCTAATGACCGGAAGGATTCCTGTAAACGACAT +CTTCTGTGATATTGAGAACAATCCAGGATTATGCTACGCATCCCAAATGCAACAAACGAAGCCAAACCCG +AAGATGCGCAACAGTCAAACCCAAACGGACCCAATTTGCAATCATAGTTTTGAGGAGGTAGTACAAACAT +TGGCTTCATTGGCTACTGTTGTGCAACAACAAACCATCGCATCAGAATCATTAGAACAACGCATTACGAG +TCTTGAGAATGGTCTAAAGCCAGTTTATGATATGGCAAAAACAATCTCCTCATTGAACAGGGTTTGTGCT +GAGATGGTTGCAAAATATGATCTTCTGGTGATGACAACCGGTCGGGCAACAGCAACCGCTGCGGCAACTG +AGGCTTATTGGGCTGAACATGGTCAACCACCACCTGGACCATCACTTTATGAAGAAAGTGCGATTCGGGG +TAAGATTGAATCTAGAGATGAGACTGTCCCTCAAAGTGTTAGGGAGGCATTCAACAATCTAGACAGTACC +ACTTCACTAACTGAGGAAAATTTTGGGAAACCTGACATTTCGGCAAAGGATTTGAGAAACATTATGTATG +ATCACTTGCCTGGTTTTGGAACTGCTTTCCACCAATTAGTACAAGTGATTTGTAAATTGGGAAAAGATAG +CAATTCATTGGACATTATTCATGCTGAGTTCCAGGCCAGCCTGGCTGAAGGAGACTCCCCTCAATGTGCC +CTAATTCAAATTACAAAAAGAGTTCCAATCTTCCAAGATGCTGCTCCACCTGTCATCCACATCCGCTCTC +GAGGTGACATTCCCCGAGCTTGCCAGAAGAGCTTGCGTCCAGTCCCACCATCACCCAAGATTGATCGAGG +TTGGGTATGTGTTTTTCAGCTTCAAGATGGTAAAACACTTGGACTCAAAATTTGAGCCAATCTCTTTTCC +CTCCGAAAGAGGCAACTAATAGCAGAGGCTTCAACTGCTGAACTATAGGGTATGTTACATTAATGATACA +CTTGTGAGTATCAGCCCTAGATAATATAAGTCAATTAAACAACCAAGATAAAATTGTTCATATCCCGCTA +GCAGCTTTAAAGATAAATGTAATAGGAGCTATACCTCTGACAGTATTATAATTAATTGTTATTAAGTAAC +CCAAACCAAAAATGATGAAGATTAAGAAAAACCTACCTCGACTGAGAGAGTGTTTTTTCATTAACCTTCA +TCTTGTAAACGTTGAGCAAAATTGTTAAAAATATGAGGCGGGTTATATTGCCTACTGCTCCTCCTGAATA +TATGGAGGCCATATACCCTGCCAGGTCAAATTCAACAATTGCTAGGGGTGGCAACAGCAATACAGGCTTC +CTGACACCGGAGTCAGTCAATGGAGACACTCCATCGAATCCACTCAGGCCAATTGCTGATGACACCATCG +ACCATGCCAGCCACACACCAGGCAGTGTGTCATCAGCATTCATCCTCGAAGCTATGGTGAATGTCATATC +GGGCCCCAAAGTGCTAATGAAGCAAATTCCAATTTGGCTTCCTCTAGGTGTCGCTGATCAAAAGACCTAC +AGCTTTGACTCAACTACGGCCGCCATCATGCTTGCTTCATATACTATCACCCATTTCGGCAAGGCAACCA +ATCCGCTTGTCAGAGTCAATCGGCTGGGTCCTGGAATCCCGGATCACCCCCTCAGGCTCCTGCGAATTGG +AAACCAGGCTTTCCTCCAGGAGTTCGTTCTTCCACCAGTCCAACTACCCCAGTATTTCACCTTTGATTTG +ACAGCACTCAAACTGATCACTCAACCACTGCCTGCTGCAACATGGACCGATGACACTCCAACTGGATCAA +ATGGAGCGTTGCGTCCAGGAATTTCATTTCATCCAAAACTTCGCCCCATTCTTTTACCCAACAAAAGTGG +GAAGAAGGGGAACAGTGCCGATCTAACATCTCCGGAGAAAATCCAAGCAATAATGACTTCACTCCAGGAC +TTTAAGATCGTTCCAATTGATCCAACCAAAAATATCATGGGTATCGAAGTGCCAGAAACTCTGGTCCACA +AGCTGACCGGTAAGAAGGTGACTTCCAAAAATGGACAACCAATCATCCCTGTTCTTTTGCCAAAGTACAT +TGGGTTGGACCCGGTGGCTCCAGGAGACCTCACCATGGTAATCACACAGGATTGTGACACGTGTCATTCT +CCTGCAAGTCTTCCAGCTGTGGTTGAGAAGTAATTGCAATAATTGACTCAGATCCAGTTTTACAGAATCT +TCTCAGGGATAGTGATAACATCTTTTTAATAATCCGTCTACTAGAAGAGATACTTCTAATTGATCAATAT +ACTAAAGGTGCTTTACACCATTGTCTCTTTTCTCTCCTAAATGTAGAGCTTAACAAAAGACTCATAATAT +ACCTGTTTTTAAAAGATTGATTGATGAAAGATCATGACTAATAACATTACAAACAATCCTACTATAATCA +ATACGGTGATTCAAATGTCAATCTTTCTCATTGCACATACTCTTTGTCCTTATCCTCAAATTGCCTACAT +GCTTACATCTGAGGACAGCCAGTGTGACTTGGATTGGAGATGTGGAGGAAAAATCGGGGCCCATTTCTAA +GTTGTTCACAATCTAAGTACAGACATTGCTCTTCTAATTAAGAAAAAATCGGCGATGAAGATTAAGCCGA +CAGTGAGCGTAATCTTCATCTCTCTTAGATTATTTGTCTTCCAGAGTAGGGGTCATCAGGTCCTTTTCAA +TTGGATAACCAAAATAAGCTTCACTAGAAGGATATTGTGAGGCGACAACACAATGGGTGTTACAGGAATA +TTGCAGTTACCTCGTGATCGATTCAAGAGGACATCATTCTTTCTTTGGGTAATTATCCTTTTCCAAAGAA +CATTTTCCATCCCGCTTGGAGTTATCCACAATAGTACATTACAGGTTAGTGATGTCGACAAACTAGTTTG +TCGTGACAAACTGTCATCCACAAATCAATTGAGATCAGTTGGACTGAATCTCGAGGGGAATGGAGTGGCA +ACTGACGTGCCATCTGTGACTAAAAGATGGGGCTTCAGGTCCGGTGTCCCACCAAAGGTGGTCAATTATG +AAGCTGGTGAATGGGCTGAAAACTGCTACAATCTTGAAATCAAAAAACCTGACGGGAGTGAGTGTCTACC +AGCAGCGCCAGACGGGATTCGGGGCTTCCCCCGGTGCCGGTATGTGCACAAAGTATCAGGAACGGGACCA +TGTGCCGGAGACTTTGCCTTCCACAAAGAGGGTGCTTTCTTCCTGTATGATCGACTTGCTTCCACAGTTA +TCTACCGAGGAACGACTTTCGCTGAAGGTGTCGTTGCATTTCTGATACTGCCCCAAGCTAAGAAGGACTT +CTTCAGCTCACACCCCTTGAGAGAGCCGGTCAATGCAACGGAGGACCCGTCGAGTGGCTATTATTCTACC +ACAATTAGATATCAGGCTACCGGTTTTGGAACTAATGAGACAGAGTACTTGTTCGAGGTTGACAATTTGA +CCTACGTCCAACTTGAATCAAGATTCACACCACAGTTTCTGCTCCAGCTGAATGAGACAATATATGCAAG +TGGGAAGAGGAGCAACACCACGGGAAAACTAATTTGGAAGGTCAACCCCGAAATTGATACAACAATCGGG +GAGTGGGCCTTCTGGGAAACTAAAAAAACCTCACTAGAAAAATTCGCAGTGAAGAGTTGTCTTTCACAGC +TGTATCAAACGGACCCAAAAACATCAGTGGTCAGAGTCCGGCGCGAACTTCTTCCGACCCAGAGACCAAC +ACAACAAATGAAGACCACAAAATCATGGCTTCAGAAAATTCCTCTGCAATGGTTCAAGTGCACAGTCAAG +GAAGGAAAGCTGCAGTGTCGCATCTGACAACCCTTGCCACAATCTCCACGAGTCCTCAACCTCCCACAAC +CAAAACAGGTCCGGACAACAGCACCCATAATACACCCGTGTATAAACTTGACATCTCTGAGGCAACTCAA +GTTGGACAACATCACCGTAGAGCAGACAACGACAGCACAGCCTCCGACACTCCCCCCGCCACGACCGCAG +CCGGACCCTTAAAAGCAGAGAACACCAACACGAGTAAGAGCGCTGACTCCCTGGACCTCGCCACCACGAC +AAGCCCCCAAAACTACAGCGAGACTGCTGGCAACAACAACACTCATCACCAAGATACCGGAGAAGAGAGT +GCCAGCAGCGGGAAGCTAGGCTTAATTACCAATACTATTGCTGGAGTAGCAGGACTGATCACAGGCGGGA +GAAGGACTCGAAGAGAAGTAATTGTCAATGCTCAACCCAAATGCAACCCCAATTTACATTACTGGACTAC +TCAGGATGAAGGTGCTGCAATCGGATTGGCCTGGATACCATATTTCGGGCCAGCAGCCGAAGGAATTTAC +ACAGAGGGGCTAATGCACAACCAAGATGGTTTAATCTGTGGGTTGAGGCAGCTGGCCAACGAAACGACTC +AAGCTCTCCAACTGTTCCTGAGAGCCACAACTGAGCTGCGAACCTTTTCAATCCTCAACCGTAAGGCAAT +TGACTTCCTGCTGCAGCGATGGGGTGGCACATGCCACATTTTGGGACCGGACTGCTGTATCGAACCACAT +GATTGGACCAAGAACATAACAGACAAAATTGATCAGATTATTCATGATTTTGTTGATAAAACCCTTCCGG +ACCAGGGGGACAATGACAATTGGTGGACAGGATGGAGACAATGGATACCGGCAGGTATTGGAGTTACAGG +TGTTATAATTGCAGTTATCGCTTTATTCTGTATATGCAAATTTGTCTTTTAGTCTTTCTTCAGATTGTTT +CACGGCAAAACTCAACCTCAAATCAATGAAACTAGGATTTAATTATATGAATCACTTGAATCTAAGATTA +CTTGACAAATGATAACATAATACACTGGAGCTTCAAACATAGCCAATGTGATTCTAACTCCTTTAAACTC +ACAGTTAATCATAAACAAGGTTTGACATCAATCTAGCTATATCTTTAAGAATGATAAACTTGATGAAGAT +TAAGAAAAAGGTAATCTTTCGATTATCTTTAGTCTTCATCCTTGATTCTACAATCATGACAGTTGTCTTT +AATGAAAAAGGAAAAAAGCCTTTTTATTAAGTTGTAATAATCAGATCTGCAAACCGGTAGAATTTAGTTG +TAACCTAACACACACAAAGCATTGGTAAAAAAGTCAATAGAAATTTAAACAGTGAGTGCAGACAACTCTT +AAATGGAAGCTTCATATGAGAGAGGACGCCCCCGAGCTGCCAGACAGCATTCAAGGGATGGACACGACCA +CCATGTTCGAGCACGATCATCATCCAGAGAGAATTATCGAGGTGAGTACCGTCAATCAAGGAGCGCCTCA +CAAGTGCGCGTTCCTACTGTATTTCATAAGAAGAGAGTTGAACCATTAACAGTTCCTCCAGCACCTAAAG +ACATATGTCCGACCTTGAAAAAAGGATTTTTGTGTGACAGTAGTTTTTGCAAAAAAGACCACCAGTTAGA +AAGTTTAACTGATAGGGAATTACTCCTACTAATCGCCCGTAAGACTTGTGGATCAGTAGAACAACAATTA +AATATAACTGCACCCAAGGACTCGCGCTTAGCAAATCCAACGGCTGATGATTTCCAGCAAGAGGAAGGTC +CCAAAATTACCTTGTTGACACTGATCAAGACGGCAGAACACTGGGCGAGACAAGACATCCGAACCATAGA +GGATTCCAAATTAAGGGCATTGTTAACTCTATGTGCTGTGATGACGAGGAAATTCTCAAAATCCCAGCTG +AGTCTTTTGTGTGAGACACACCTAAGGCGCGAAGGGCTTGGGCAAGATCAGGCAGAACCCGTTCTCGAAG +TATATCAACGATTACACAGTGATAAAGGAGGCAGTTTTGAAGCTGCACTATGGCAACAATGGGACCGACA +ATCCCTAATTATGTTTATCACTGCATTCTTGAATATCGCTCTCCAGTTACCGTGTGAAAGTTCTGCTGTC +GTTGTTTCAGGGTTAAGAACATTGGTTCCTCAATCAGATAATGAGGAAGCTTCAACCAACCCGGGGACAT +GCTCATGATCTGATGAGGGTACCCCTTAATAAGGCTGACTAAAACACTATATAACCTTCTACTTGATCAC +AATACTCCGTATACCTATCATCATATATTTAATCAAGACGATATCCTTTAAAACTTATTCAGTACTATAA +TCACTCTCATTTCAAATTGATAAGATATGCATAATTGCCTTAATATATAAAGAGGTATGATATAACCCAA +ACATTGACCAAAGAAAATCATAATCTCGTATCGCTCGCAATATAACCTGCCAAGCATACCTCTTGCACAA +AGTGATTCTTGTACACAAATAATGTTTGACTCTACAGGAGGTAGCAACGATCCATCTCATCAAAAAATAA +GTATTTTATGATTTACTAATGATCTCTTAAAATATTAAGAAAAACTGACGGAACATAAATTCTTTCTGCT +TCAAGTTGTGGAGGAGGTCTATGGTATTCGCTATTGTTATATTACAATCAATAACAAGCTTGTAAAAATA +TTGTTCTTGTTTCAGGAGGTATATTGTGACCGGAAAAGCTAAACTAATGATGAAGATTAATGCGGAGGTC +TGATGAGAATAAACCTTATTATTCAGATTAGGCCCCAAGAGGCATTCTTCATCTCCTTTTAGCAAAATAC +TATTTCAGGATAGTCCAGCTAGTGACACGTCTTTTAGCTGTATACCAGTTGCCCCTGAGATACGCCACAA +AAGTGTCTCTGAGCTAAAGTGGTCTGTACACATCTCATACATTGTATTAGGGGCAATAATATCTAATTGA +ACTTAGCCATTTAAAATTTAGTGCATAAATCTGGGCTAACTCCACCAGGTCAACTCCATTGGCTGAAAAG +AAGCCCACCTACAACGAACATTACTTTGAGCGCCCTCACAATTAAAAAATAAGAGCGTCGTTCCAACAAT +CGAGCGCAAGGTTACAAGGTTGAACTGAGAGTGTCTAGACAACAAAATATCGATACTCCAGACACCAAGC +AAGACCTGAGAAAAAACCATGGCCAAAGCTACGGGACGATACAATCTAATATCGCCCAAAAAGGACCTGG +AGAAAGGGGTTGTCTTAAGCGACCTCTGTAACTTCTTAGTTAGTCAAACTATTCAAGGGTGGAAAGTTTA +TTGGGCTGGTATTGAGTTTGATGTGACTCACAAAGGAATGGCCCTATTGCATAGACTGAAAACTAATGAC +TTTGCCCCTGCATGGTCAATGACAAGGAACCTATTTCCCCATTTATTTCAAAATCCGAATTCCACTATTG +AATCACCGCTGTGGGCACTGAGAGTCATCCTTGCAGCAGGGATACAGGACCAGTTAATTGACCAGTCTTT +GATTGAACCCTTAGCAGGAGCCCTTGGTCTGATCTCTGATTGGCTGCTAACAACCAACACTAACCATTTC +AACATGCGAACACAACGTGTCAAGGAACAATTGAGCCTAAAAATGCTGTCGTTGATTCGATCCAATATTC +TCAAGTTTATTAACAAATTGGATGCTCTACATGTCGTGAACTACAATGGATTATTGAGCAGTATTGAAAT +TGGAACTCAAAATCATACAATCATCATAACTCGAACTAACATGGGTTTTCTGGTGGAGCTCCAAGAACCC +GACAAATCGGCAATGAACCGCAAGAAGCCTGGGCCGGCGAAATTTTCCCTCCTTCATGAGTCCACACTGA +AAGCATTTACACAAGGGTCCTCGACACGAATGCAAAGTTTAATTCTTGAATTCAATAGCTCTCTTGCTAT +CTAACTAAGATGGAATACTTCATATTGGGCTAACTCATATATGCTGACTCAATAGTTAACTTGACATCTC +TGCCTTCATAATCAGATATATAAGCATAATAAATAAATACTCATATTTCTTGATAATTTGTTTAACCACA +GATAAATCCTCACTGTAAGCCAGCTTCCAAGTTGACACCCTTACAAAAACCAGGACTCAGAATCCCTCAA +ATAAGAGATTCCAAGACAACATCATAGAATTGCTTTATTATATTAATAAGCATTTTATCACTAGAAATCC +AATATACGAAATGGTTAATTGTAACTAAACCCGCAGGTCATGTGTGTTAGGTTTCACAAATTATATATAT +TACTAACTCCATACTCGTAACTAACATTAGATAAGTAGGTTAAGAAAAAAGCTTGAGGAAGATTAAGAAA +AACTGCTTATTGGGTCTTTCCGTGTTTTAGATGAAGCAGTTGACATTCTTCCTCTTGATATTAAATGGCT +ACACAACATACCCAATACCCAGACGCCAGGTTATCATCACCAATTGTATTGGACCAATGTGACCTTGTCA +CTAGAGCTTGCGGGTTGTATTCATCATACTCCCTTAATCCGCAACTACGCAACTGTAAACTCCCGAAACA +TATATACCGTTTAAAATATGATGTAACTGTTACCAAGTTCTTAAGTGATGTACCAGTGGCGACATTGCCC +ATAGATTTCATAGTCCCAATTCTTCTCAAGGCACTATCAGGCAATGGGTTCTGTCCTGTTGAGCCGCGGT +GCCAACAGTTCTTAGATGAAATTATTAAGTACACAATGCAAGATGCTCTCTTCCTGAAATATTATCTCAA +AAATGTGGGTGCTCAAGAAGACTGTGTTGATGACCACTTTCAAGAAAAAATCTTATCTTCAATTCAGGGC +AATGAATTTTTACATCAAATGTTTTTCTGGTATGACCTGGCTATTTTAACTCGAAGGGGTAGATTAAATC +GAGGAAACTCTAGATCAACGTGGTTTGTTCATGATGATTTAATAGACATCTTAGGCTATGGGGACTATGT +TTTTTGGAAGATCCCAATTTCACTGTTACCACTGAACACACAAGGAATCCCCCATGCTGCTATGGATTGG +TATCAGACATCAGTATTCAAAGAAGCGGTTCAAGGGCATACACACATTGTTTCTGTTTCTACTGCCGATG +TCTTGATAATGTGCAAAGATTTAATTACATGTCGATTCAACACAACTCTAATCTCAAAAATAGCAGAGGT +TGAGGACCCAGTTTGCTCTGATTATCCCAATTTTAAGATTGTGTCTATGCTTTACCAGAGCGGAGATTAC +TTACTCTCCATATTAGGGTCTGATGGGTATAAAATCATTAAGTTTCTCGAACCATTGTGCTTGGCTAAAA +TTCAATTGTGCTCAAAGTACACCGAGAGGAAGGGCCGATTCTTAACACAAATGCATTTAGCTGTAAATCA +CACCCTGGAAGAAATTACAGAAATACGTGCACTAAAGCCTTCACAGGCTCACAAGATCCGTGAATTCCAT +AGAACATTGATAAGGCTGGAGATGACGCCACAACAACTTTGTGAGCTATTTTCCATACAAAAACACTGGG +GGCATCCTGTGCTACATAGTGAAACAGCAATCCAAAAAGTTAAAAAACATGCTACGGTGCTAAAAGCATT +ACGCCCTATCGTGATTTTCGAGACATATTGTGTTTTTAAATATAGCATTGCAAAACATTATTTTGATAGT +CAAGGATCTTGGTACAGTGTTACCTCAGATAGAAATCTAACACCAGGTCTTAATTCTTATATCAAAAGAA +ATCAATTCCCTCCGTTGCCAATGATTAAAGAACTGCTATGGGAATTTTACCACCTTGACCATCCTCCACT +TTTCTCAACCAAAATTATTAGTGACTTAAGTATTTTTATAAAAGACAGAGCTACTGCAGTAGAAAGGACA +TGCTGGGATGCAGTATTCGAGCCTAATGTTCTGGGATATAATCCACCTCACAAATTCAGTACCAAACGTG +TACCGGAACAATTTTTAGAGCAAGAAAACTTTTCTATTGAGAATGTTCTTTCCTACGCGCAAAAACTCGA +GTATCTACTACCACAATATCGGAATTTTTCTTTCTCATTGAAAGAGAAAGAGTTGAATGTAGGTAGAACT +TTCGGAAAATTGCCTTATCCGACTCGCAATGTTCAAACACTTTGTGAAGCTCTGTTAGCTGATGGTCTTG +CTAAAGCATTTCCTAGCAATATGATGGTAGTTACGGAACGTGAACAAAAAGAAAGCTTATTGCATCAAGC +ATCATGGCACCACACAAGTGATGATTTCGGTGAGCATGCCACAGTTAGAGGGAGTAGCTTTGTAACTGAT +TTAGAGAAATACAATCTTGCATTTAGGTATGAGTTTACAGCACCTTTTATAGAATATTGCAACCGTTGCT +ATGGTGTTAAGAATGTTTTTAATTGGATGCATTATACAATCCCACAGTGTTATATGCATGTCAGTGATTA +TTATAATCCACCGCATAACCTCACACTGGAAAATCGAAACAACCCCCCTGAAGGGCCTAGTTCATACAGG +GGTCATATGGGAGGGATTGAAGGACTGCAACAAAAACTCTGGACAAGTATTTCATGTGCTCAAATTTCTT +TAGTTGAAATTAAGACTGGTTTTAAGTTGCGCTCAGCTGTGATGGGTGACAATCAGTGCATTACCGTTTT +ATCAGTCTTCCCCTTAGAGACTGATGCAGGCGAGCAGGAACAGAGCGCCGAGGACAATGCAGCGAGGGTG +GCCGCCAGCCTAGCAAAAGTTACAAGTGCCTGTGGAATCTTTTTAAAACCTGATGAAACATTTGTACATT +CAGGTTTTATCTATTTTGGAAAAAAACAATATTTGAATGGGGTCCAATTGCCTCAGTCCCTTAAAACGGC +TACAAGAATGGCACCATTGTCTGATGCAATTTTTGATGATCTTCAAGGGACCCTGGCTAGTATAGGTACT +GCTTTTGAGCGATCCATCTCTGAGACACGACATATCTTTCCTTGCAGAATAACCGCAGCTTTCCATACGT +TCTTTTCGGTGAGAATCTTGCAATATCATCACCTCGGATTTAATAAAGGTTTTGACCTTGGACAGTTAAC +ACTCGGCAAACCTCTGGATTTCGGAACAATATCATTGGCACTAGCGGTACCGCAGGTGCTTGGAGGGTTA +TCCTTCTTGAATCCTGAGAAATGTTTCTACCGGAATCTAGGAGATCCAGTTACCTCAGGTTTATTCCAGT +TAAAAACTTATCTCCGAATGATTGAGATGGATGATTTATTCTTACCTTTAATTGCGAAGAACCCTGGGAA +CTGCACTGCCATTGACTTTGTGCTAAATCCTAGCGGATTAAATGTTCCTGGGTCGCAAGACTTAACTTCA +TTTCTGCGCCAGATTGTACGTAGGACTATCACCCTAAGTGCGAAAAACAAACTTATTAATACCTTATTTC +ATGCATCAGCTGACTTCGAAGACGAAATGGTTTGTAAGTGGCTCTTATCATCAACTCCTGTTATGAGTCG +TTTCGCAGCCGATATATTTTCACGCACGCCGAGCGGGAAGCGATTGCAAATTCTAGGATACTTGGAAGGA +ACACGCACATTATTAGCCTCTAAGATCATCAACAATAATACAGAGACGCCGGTTTTGGACAGACTGAGGA +AGATAACATTGCAAAGGTGGAGTCTATGGTTTAGTTATCTTGATCATTGTGATAATATCCTGGCGGAGGC +TTTAACCCAAATAACTTGCACAGTTGATTTAGCACAGATCCTGAGGGAATATTCATGGGCACATATTTTA +GAGGGGAGACCTCTTATTGGAGCCACACTCCCATGTATGATTGAGCAATTCAAAGTGGTTTGGCTGAAAC +CCTACGAACAATGTCCGCAGTGTTCAAATGCCAAGCAACCTGGTGGGAAACCATTCGTGTCAGTAGCAGT +CAAGAAACATATTGTTAGTGCATGGCCAAATGCATCCCGAATAAGCTGGACTATCGGGGATGGAATCCCA +TACATTGGATCAAGGACAGAAGATAAGATAGGGCAACCTGCTATTAAACCAAAATGTCCTTCCGCAGCCT +TAAGAGAGGCCATTGAATTGGCGTCCCGTTTAACATGGGTAACTCAAGGCAGTTCGAACAGTGACTTGCT +AATAAAACCATTTTTGGAAGCACGAGTAAATTTAAGTGTTCAAGAAATACTTCAAATGACCCCTTCACAT +TACTCGGGAAATATTGTTCATAGGTACAACGATCAATACAGTCCTCATTCTTTCATGGCCAATCGTATGA +GTAACTCAGCAACGCGATTGATTGTTTCTACAAACACTTTAGGTGAGTTTTCAGGAGGTGGCCAATCGGC +ACGCGACAGCAATATTATTTTCCAGAATGTTATAAATTATGCAGTTGCACTGTTCGATATTAAATTTAGA +AACACTGAGGCTACAGATATCCAGTATAATCGTGCTCACCTTCATCTAACTAAGTGTTGCACCCGGGAGG +TACCAGCTCAGTACTTAACATACACATCTACATTGGATTTAGATTTAACAAGATACCGAGAAAATGAATT +GATTTATGACAATAATCCTCTAAAAGGAGGACTCAATTGCAATATCTCATTTGATAACCCATTTTTCCAA +GGCAAACAGCTGAACATTATAGAAGATGACCTTATTCGACTGCCTCACTTATCTGGATGGGAGCTAGCTA +AGACCATCATGCAATCAATTATTTCAGATAGCAATAATTCGTCTACAGACCCAATTAGCAGTGGAGAAAC +AAGATCATTCACTACCCATTTCTTAACTTATCCCAAAATAGGACTTCTGTACAGTTTTGGGGCCTTTGTA +AGTTATTATCTTGGCAATACAATTCTTCGGACTAAGAAATTAACACTTGACAATTTTTTATATTACTTAA +CTACCCAAATTCATAATCTACCACATCGCTCATTGCGAATACTTAAGCCAACATTCAAACATGCAAGCGT +TATGTCACGATTAATGAGTATTGATCCCCATTTTTCTATTTACATAGGCGGTGCTGCAGGTGACAGAGGA +CTCTCAGATGCGGCCAGGTTATTTTTGAGAACGTCCATTTCATCTTTTCTTACATTTGTAAAGGAATGGA +TAATTAATCGCGGAACAATTGTCCCTTTATGGATAGTATATCCATTAGAGGGTCAAAATCCAACACCTGT +TAATAATTTCCTCCATCAGATCGTAGAACTGCTGGTGCATGATTCATCAAGACACCAGGCTTTTAAAACT +ACCATAAATGATCATGTACATCCTCACGACAATCTTGTTTACACATGTAAGAGTACAGCCAGCAATTTCT +TCCATGCGTCATTGGCGTACTGGAGGAGCAGGCACAGAAACAGCAACCGAAAAGACTTGACAAGAAACTC +TTCAACTGGATCAAGCACAAACAACAGTGATGGTCATATTAAGAGAAGTCAAGAACAAACCACCAGAGAT +CCACATGATGGCACTGAACGGAGTCTAGTCCTGCAAATGAGCCATGAAATAAAAAGAACGACAATTCCAC +AAGAGAACACGCACCAGGGTCCGTCGTTCCAGTCATTTCTAAGTGACTCTGCTTGCGGTACAGCAAACCC +AAAACTAAATTTCGATAGATCGAGACACAATGTGAAATCTCAGGATCATAACTCAGCATCCAAGAGGGAA +GGTCATCAAATAATCTCACATCGTCTAGTCCTACCTTTCTTTACATTATCTCAAGGGACACGCCAATTAA +CGTCATCCAATGAGTCACAAACCCAAGATGAGATATCAAAGTACTTACGGCAATTGAGATCCGTCATTGA +TACCACAGTTTATTGTAGGTTTACCGGTATAGTCTCGTCCATGCATTACAAACTTGATGAGGTCCTTTGG +GAAATAGAGAATTTTAAGTCGGCTGTGACGCTGGCAGAGGGAGAAGGTGCTGGTGCCTTACTATTGATTC +AGAAATACCAAGTTAAGACCTTATTCTTCAACACGCTAGCTACTGAGTCCAGTATAGAGTCAGAAATAGT +ATCAGGAATGACTACTCCTAGGATGCTTCTACCTGTTATGTCAAAATTCCATAATGACCAAATTGAGATT +ATTCTTAACAACTCAGCAAGCCAAATAACAGACATAACAAATCCTACTTGGTTTAAAGACCAAAGAGCAA +GGCTACCTAGGCAAGTCGAGGTTATAACCATGGATGCAGAGACGACAGAGAATATAAACAGATCGAAATT +GTACGAAGCTGTACATAAATTGATCTTACACCATGTTGATCCCAGCGTATTGAAAGCAGTGGTCCTTAAA +GTCTTTCTAAGTGATACCGAGGGTATGTTATGGCTAAATGATAATCTAGCCCCGTTTTTTGCCACTGGGT +ATTTAATTAAGCCAATAACGTCAAGTGCCAGGTCTAGTGAGTGGTATCTTTGTCTGACGAACTTCTTATC +AACTACACGTAAGATGCCACACCAAAACCATCTCAGTTGTAAGCAGGTAATACTTACGGCATTGCAACTG +CAAATTCAACGGAGCCCATACTGGCTAAGTCATTTAACTCAGTATGCTGACTGCGATTTACATTTAAGCT +ATATCCGCCTTGGTTTTCCATCATTAGAGAAAGTACTATACCACAGGTATAACCTTGTCGATTCAAAAAG +AGGTCCACTAGTCTCTGTCACTCAGCACTTAGCACATCTTAGGGCAGAGATTCGAGAATTGACCAATGAT +TATAATCAACAGCGACAAAGTCGGACTCAAACATATCACTTTATTCGTACTGCAAAAGGACGAATCACAA +AACTAGTCAATGATTATTTAAAATTCTTTCTTATTGTACAAGCATTAAAACATAATGGGACATGGCAAGC +TGAGTTTAAGAAATTACCAGAGTTGATTAGTGTGTGCAATAGGTTCTATCATATTAGAGATTGTAATTGT +GAAGAACGTTTCTTAGTTCAAACCTTATATTTACATAGAATGCAGGATTCTGAAGTTAAGCTTATCGAAA +GGCTGACAGGGCTTCTGAGTTTATTTCCAGATGGTCTCTACAGGTTCGATTGAATAACCGTGCATAGTAT +TTTGATACTTGTAAAGGTTGGTTATCAACATACAGATTATAAAAAACTCATAAATTGCTCTCATACATCA +TCTTGATCTGATTTCAATAAATAACTATTTAGATAACGAAAGGAGTCCTTACATTATACACTATATTTGG +CCTCTCTCCCTGCGTGATAATCAAAAAATTCACAATACAGCATGTGTGACATATTACTGCTGCAATGAGT +CTAACGCAACATAATAAACTCCGCACTCTTTATAATTAAGCTTTAACGATAGGTCTGGGCTCATATTGTT +ATTGATATAGTAATGTTGTATCAATATCTTGCCAGATGGAATAGTGCTTTGGTTGATAACACGACTTCTT +AAAACAAAACTGATCTTTAAGATTAAGTTTTTTATAATTGTCATTGCTTTAATTTGTCGATTTAAAAATG +GTGATAGCCTTAATCTTTGTGTAAAATAAGAGATTAGGTGTAATAACTTTAACANNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNATGATAAAANTNAAAGANAAGACATNACTGTAAANNNNGAAATACCTTCTTTAC +AATATAGCAGACTAGATAATAATCTTCGTGTTAATGATAATTAAGGCATTGACCACGCTCATCAGAAGGC +TCACTAGAATAAACGTTGC diff --git a/test/input/TestOrderAndOrient/ref.ebov.sle.fasta b/test/input/TestOrderAndOrient/ref.ebov.sle.fasta new file mode 100644 index 00000000..5e53fb13 --- /dev/null +++ b/test/input/TestOrderAndOrient/ref.ebov.sle.fasta @@ -0,0 +1,272 @@ +>KM034550.1 Zaire ebolavirus isolate Ebola virus/H.sapiens-wt/SLE/2014/Makona-EM095, complete genome +GAATAACTATGAGGAAGATTAATAATTTTCCTCTCATTGAAATTTATATCGGAATTTAAATTGAAATTGT +TACTGTAATCATACCTGGTTTGTTTCAGAGCCATATCACCAAGATAGAGAACAACCTAGGTCTCCGGAGG +GGGCAAGGGCATCAGTGTGCTCAGTTGAAAATCCCTTGTCAACATCTAGGCCTTATCACATCACAAGTTC +CGCCTTAAACTCTGCAGGGTGATCCAACAACCTTAATAGCAACATTATTGTTAAAGGACAGCATTAGTTC +ACAGTCAAACAAGCAAGATTGAGAATTAACTTTGATTTTGAACCTGAACACCCAGAGGACTGGAGACTCA +ACAACCCTAAAGCCTGGGGTAAAACATTAGAAATAGTTTAAAGACAAATTGCTCGGAATCACAAAATTCC +GAGTATGGATTCTCGTCCTCAGAAAGTCTGGATGACGCCGAGTCTCACTGAATCTGACATGGATTACCAC +AAGATCTTGACAGCAGGTCTGTCCGTTCAACAGGGGATTGTTCGGCAAAGAGTCATCCCAGTGTATCAAG +TAAACAATCTTGAGGAAATTTGCCAACTTATCATACAGGCCTTTGAAGCTGGTGTTGATTTTCAAGAGAG +TGCGGACAGTTTCCTTCTCATGCTTTGTCTTCATCATGCGTACCAAGGAGATTACAAACTTTTCTTGGAA +AGTGGCGCAGTCAAGTATTTGGAAGGGCACGGGTTCCGTTTTGAAGTCAAGAAGCGTGATGGAGTGAAGC +GCCTTGAGGAATTGCTGCCAGCAGTATCTAGTGGGAGAAACATTAAGAGAACACTTGCTGCCATGCCGGA +AGAGGAGACGACTGAAGCTAATGCCGGTCAGTTCCTCTCCTTTGCAAGTCTATTCCTTCCGAAATTGGTA +GTAGGAGAAAAGGCTTGCCTTGAGAAGGTTCAAAGGCAAATTCAAGTACATGCAGAGCAAGGACTGATAC +AATATCCAACAGCTTGGCAATCAGTAGGACACATGATGGTGATTTTCCGTTTGATGCGAACAAATTTTTT +GATCAAATTTCTTCTAATACACCAAGGGATGCACATGGTTGCCGGACATGATGCCAACGATGCTGTGATT +TCAAATTCAGTGGCTCAAGCTCGTTTTTCAGGTCTATTGATTGTCAAAACAGTACTTGATCATATCCTAC +AAAAGACAGAACGAGGAGTTCGTCTCCATCCTCTTGCAAGGACCGCCAAGGTAAAAAATGAGGTGAACTC +CTTCAAGGCTGCACTCAGCTCCCTGGCCAAGCATGGAGAGTATGCTCCTTTCGCCCGACTTTTGAACCTT +TCTGGAGTAAATAATCTTGAGCATGGTCTTTTCCCTCAACTGTCGGCAATTGCACTCGGAGTCGCCACAG +CCCACGGGAGCACCCTCGCAGGAGTAAATGTTGGAGAACAGTATCAACAGCTCAGAGAGGCAGCCACTGA +GGCTGAGAAGCAACTCCAACAATATGCGGAGTCTCGTGAACTTGACCATCTTGGACTTGATGATCAGGAA +AAGAAAATTCTTATGAACTTCCATCAGAAAAAGAACGAAATCAGCTTCCAGCAAACAAACGCGATGGTAA +CTCTAAGAAAAGAGCGCCTGGCCAAGCTGACAGAAGCTATCACTGCTGCATCACTGCCCAAAACAAGTGG +ACATTACGATGATGATGACGACATTCCCTTTCCAGGACCCATCAATGATGACGACAATCCTGGCCATCAA +GATGATGATCCGACTGACTCACAGGATACGACCATTCCCGATGTGGTAGTTGACCCCGATGATGGAGGCT +ACGGCGAATACCAAAGTTACTCGGAAAACGGCATGAGTGCACCAGATGACTTGGTCCTATTCGATCTAGA +CGAGGACGACGAGGACACCAAGCCAGTGCCTAACAGATCGACCAAGGGTGGACAACAGAAAAACAGTCAA +AAGGGCCAGCATACAGAGGGCAGACAGACACAATCCACGCCAACTCAAAACGTCACAGGCCCTCGCAGAA +CAATCCACCATGCCAGTGCTCCACTCACGGACAATGACAGAAGAAACGAACCCTCCGGCTCAACCAGCCC +TCGCATGCTGACCCCAATCAACGAAGAGGCAGACCCACTGGACGATGCCGACGACGAGACGTCTAGCCTT +CCGCCCTTAGAGTCAGATGATGAAGAACAGGACAGGGACGGAACTTCTAACCGCACACCCACTGTCGCCC +CACCGGCTCCCGTATACAGAGATCACTCCGAAAAGAAAGAACTCCCGCAAGATGAACAACAAGATCAGGA +CCACATTCAAGAGGCCAGGAACCAAGACAGTGACAACACCCAGCCAGAACATTCTTTTGAGGAGATGTAT +CGCCACATTCTAAGATCACAGGGGCCATTTGATGCCGTTTTGTATTATCATATGATGAAGGATGAGCCTG +TAGTTTTCAGTACCAGTGATGGTAAAGAGTACACGTATCCGGACTCCCTTGAAGAGGAATATCCACCATG +GCTCACTGAAAAAGAGGCCATGAATGATGAGAATAGATTTGTTACACTGGATGGTCAACAATTTTATTGG +CCAGTAATGAATCACAGGAATAAATTCATGGCAATCCTGCAACATCATCAGTGAATGAGCATGTAATAAT +GGGATGATTTAATCGACAAATAGCTAACATTAAATAGTCAAGGAACGCAAACAGGAAGAATTTTTGATGT +CTAAGGTGTGAATTATTATCACAATAAAAGTGATTCTTAGTTTTGAATTTAAAGCTAGCTTATTATTACT +AGCCGTTTTTCAAAGTTCAATTTGAGTCTTAATGCAAATAAGCGTTAAGCCACAGTTATAGCCATAATGG +TAACTCAATATCTTAGCCAGCGATTTATCTAAATTAAATTACATTATGCTTTTATAACTTACCTACTAGC +CTGCCCAACATTTACACGATCGTTTTATAATTAAGAAAAAACTAATGATGAAGATTAAAACCTTCATCAT +CCTTACGTCAATTGAATTCTCTAGCACTAGAAGCTTATTGTCTTCAATGTAAAAGAAAAGCTGGCCTAAC +AAGATGACAACTAGAACAAAGGGCAGGGGCCATACTGTGGCCACGACTCAAAACGACAGAATGCCAGGCC +CTGAGCTTTCGGGCTGGATCTCTGAGCAGCTAATGACCGGAAGGATTCCTGTAAACGACATCTTCTGTGA +TATTGAGAACAATCCAGGATTATGCTACGCATCCCAAATGCAACAAACGAAGCCAAACCCGAAGATGCGC +AACAGTCAAACCCAAACGGACCCAATTTGCAATCATAGTTTTGAGGAGGTAGTACAAACATTGGCTTCAT +TGGCTACTGTTGTGCAACAACAAACCATCGCATCAGAATCATTAGAACAACGCATTACGAGTCTTGAGAA +TGGTCTAAAGCCAGTTTATGATATGGCAAAAACAATCTCCTCATTGAACAGGGTTTGTGCTGAGATGGTT +GCAAAATATGATCTTCTGGTGATGACAACCGGTCGGGCAACAGCAACCGCTGCGGCAACTGAGGCTTATT +GGGCTGAACATGGTCAACCACCACCTGGACCATCACTTTATGAAGAAAGTGCGATTCGGGGTAAGATTGA +ATCTAGAGATGAGACTGTCCCTCAAAGTGTTAGGGAGGCATTCAACAATCTAGACAGTACCACTTCACTA +ACTGAGGAAAATTTTGGGAAACCTGACATTTCGGCAAAGGATTTGAGAAACATTATGTATGATCACTTGC +CTGGTTTTGGAACTGCTTTCCACCAATTAGTACAAGTGATTTGTAAATTGGGAAAAGATAGCAATTCATT +GGACATTATTCATGCTGAGTTCCAGGCCAGCCTGGCTGAAGGAGACTCCCCTCAATGTGCCCTAATTCAA +ATTACAAAAAGAGTTCCAATCTTCCAAGATGCTGCTCCACCTGTCATCCACATCCGCTCTCGAGGTGACA +TTCCCCGAGCTTGCCAGAAGAGCTTGCGTCCAGTCCCACCATCACCCAAGATTGATCGAGGTTGGGTATG +TGTTTTTCAGCTTCAAGATGGTAAAACACTTGGACTCAAAATTTGAGCCAATCTCTTTTCCCTCCGAAAG +AGGCAACTAATAGCAGAGGCTTCAACTGCTGAACTATAGGGTATGTTACATTAATGATACACTTGTGAGT +ATCAGCCCTAGATAATATAAGTCAATTAAACAACCAAGATAAAATTGTTCATATCCCGCTAGCAGCTTTA +AAGATAAATGTAATAGGAGCTATACCTCTGACAGTATTATAATTAATTGTTATTAAGTAACCCAAACCAA +AAATGATGAAGATTAAGAAAAACCTACCTCGACTGAGAGAGTGTTTTTTCATTAACCTTCATCTTGTAAA +CGTTGAGCAAAATTGTTAAAAATATGAGGCGGGTTATATTGCCTACTGCTCCTCCTGAATATATGGAGGC +CATATACCCTGCCAGGTCAAATTCAACAATTGCTAGGGGTGGCAACAGCAATACAGGCTTCCTGACACCG +GAGTCAGTCAATGGAGACACTCCATCGAATCCACTCAGGCCAATTGCTGATGACACCATCGACCATGCCA +GCCACACACCAGGCAGTGTGTCATCAGCATTCATCCTCGAAGCTATGGTGAATGTCATATCGGGCCCCAA +AGTGCTAATGAAGCAAATTCCAATTTGGCTTCCTCTAGGTGTCGCTGATCAAAAGACCTACAGCTTTGAC +TCAACTACGGCCGCCATCATGCTTGCTTCATATACTATCACCCATTTCGGCAAGGCAACCAATCCGCTTG +TCAGAGTCAATCGGCTGGGTCCTGGAATCCCGGATCACCCCCTCAGGCTCCTGCGAATTGGAAACCAGGC +TTTCCTCCAGGAGTTCGTTCTTCCACCAGTCCAACTACCCCAGTATTTCACCTTTGATTTGACAGCACTC +AAACTGATCACTCAACCACTGCCTGCTGCAACATGGACCGATGACACTCCAACTGGATCAAATGGAGCGT +TGCGTCCAGGAATTTCATTTCATCCAAAACTTCGCCCCATTCTTTTACCCAACAAAAGTGGGAAGAAGGG +GAACAGTGCCGATCTAACATCTCCGGAGAAAATCCAAGCAATAATGACTTCACTCCAGGACTTTAAGATC +GTTCCAATTGATCCAACCAAAAATATCATGGGTATCGAAGTGCCAGAAACTCTGGTCCACAAGCTGACCG +GTAAGAAGGTGACTTCCAAAAATGGACAACCAATCATCCCTGTTCTTTTGCCAAAGTACATTGGGTTGGA +CCCGGTGGCTCCAGGAGACCTCACCATGGTAATCACACAGGATTGTGACACGTGTCATTCTCCTGCAAGT +CTTCCAGCTGTGGTTGAGAAGTAATTGCAATAATTGACTCAGATCCAGTTTTACAGAATCTTCTCAGGGA +TAGTGATAACATCTTTTTAATAATCCGTCTACTAGAAGAGATACTTCTAATTGATCAATATACTAAAGGT +GCTTTACACCATTGTCTCTTTTCTCTCCTAAATGTAGAGCTTAACAAAAGACTCATAATATACCTGTTTT +TAAAAGATTGATTGATGAAAGATCATGACTAATAACATTACAAACAATCCTACTATAATCAATACGGTGA +TTCAAATGTCAATCTTTCTCATTGCACATACTCTTTGTCCTTATCCTCAAATTGCCTACATGCTTACATC +TGAGGACAGCCAGTGTGACTTGGATTGGAGATGTGGAGGAAAAATCGGGGCCCATTTCTAAGTTGTTCAC +AATCTAAGTACAGACATTGCTCTTCTAATTAAGAAAAAATCGGCGATGAAGATTAAGCCGACAGTGAGCG +TAATCTTCATCTCTCTTAGATTATTTGTCTTCCAGAGTAGGGGTCATCAGGTCCTTTTCAATTGGATAAC +CAAAATAAGCTTCACTAGAAGGATATTGTGAGGCGACAACACAATGGGTGTTACAGGAATATTGCAGTTA +CCTCGTGATCGATTCAAGAGGACATCATTCTTTCTTTGGGTAATTATCCTTTTCCAAAGAACATTTTCCA +TCCCGCTTGGAGTTATCCACAATAGTACATTACAGGTTAGTGATGTCGACAAACTAGTTTGTCGTGACAA +ACTGTCATCCACAAATCAATTGAGATCAGTTGGACTGAATCTCGAGGGGAATGGAGTGGCAACTGACGTG +CCATCTGTGACTAAAAGATGGGGCTTCAGGTCCGGTGTCCCACCAAAGGTGGTCAATTATGAAGCTGGTG +AATGGGCTGAAAACTGCTACAATCTTGAAATCAAAAAACCTGACGGGAGTGAGTGTCTACCAGCAGCGCC +AGACGGGATTCGGGGCTTCCCCCGGTGCCGGTATGTGCACAAAGTATCAGGAACGGGACCATGTGCCGGA +GACTTTGCCTTCCACAAAGAGGGTGCTTTCTTCCTGTATGATCGACTTGCTTCCACAGTTATCTACCGAG +GAACGACTTTCGCTGAAGGTGTCGTTGCATTTCTGATACTGCCCCAAGCTAAGAAGGACTTCTTCAGCTC +ACACCCCTTGAGAGAGCCGGTCAATGCAACGGAGGACCCGTCGAGTGGCTATTATTCTACCACAATTAGA +TATCAGGCTACCGGTTTTGGAACTAATGAGACAGAGTACTTGTTCGAGGTTGACAATTTGACCTACGTCC +AACTTGAATCAAGATTCACACCACAGTTTCTGCTCCAGCTGAATGAGACAATATATGCAAGTGGGAAGAG +GAGCAACACCACGGGAAAACTAATTTGGAAGGTCAACCCCGAAATTGATACAACAATCGGGGAGTGGGCC +TTCTGGGAAACTAAAAAAACCTCACTAGAAAAATTCGCAGTGAAGAGTTGTCTTTCACAGCTGTATCAAA +CGGACCCAAAAACATCAGTGGTCAGAGTCCGGCGCGAACTTCTTCCGACCCAGAGACCAACACAACAAAT +GAAGACCACAAAATCATGGCTTCAGAAAATTCCTCTGCAATGGTTCAAGTGCACAGTCAAGGAAGGAAAG +CTGCAGTGTCGCATCTGACAACCCTTGCCACAATCTCCACGAGTCCTCAACCTCCCACAACCAAAACAGG +TCCGGACAACAGCACCCATAATACACCCGTGTATAAACTTGACATCTCTGAGGCAACTCAAGTTGGACAA +CATCACCGTAGAGCAGACAACGACAGCACAGCCTCCGACACTCCCCCCGCCACGACCGCAGCCGGACCCT +TAAAAGCAGAGAACACCAACACGAGTAAGAGCGCTGACTCCCTGGACCTCGCCACCACGACAAGCCCCCA +AAACTACAGCGAGACTGCTGGCAACAACAACACTCATCACCAAGATACCGGAGAAGAGAGTGCCAGCAGC +GGGAAGCTAGGCTTAATTACCAATACTATTGCTGGAGTAGCAGGACTGATCACAGGCGGGAGAAGGACTC +GAAGAGAAGTAATTGTCAATGCTCAACCCAAATGCAACCCCAATTTACATTACTGGACTACTCAGGATGA +AGGTGCTGCAATCGGATTGGCCTGGATACCATATTTCGGGCCAGCAGCCGAAGGAATTTACACAGAGGGG +CTAATGCACAACCAAGATGGTTTAATCTGTGGGTTGAGGCAGCTGGCCAACGAAACGACTCAAGCTCTCC +AACTGTTCCTGAGAGCCACAACTGAGCTGCGAACCTTTTCAATCCTCAACCGTAAGGCAATTGACTTCCT +GCTGCAGCGATGGGGTGGCACATGCCACATTTTGGGACCGGACTGCTGTATCGAACCACATGATTGGACC +AAGAACATAACAGACAAAATTGATCAGATTATTCATGATTTTGTTGATAAAACCCTTCCGGACCAGGGGG +ACAATGACAATTGGTGGACAGGATGGAGACAATGGATACCGGCAGGTATTGGAGTTACAGGTGTTATAAT +TGCAGTTATCGCTTTATTCTGTATATGCAAATTTGTCTTTTAGTCTTTCTTCAGATTGTTTCACGGCAAA +ACTCAACCTCAAATCAATGAAACTAGGATTTAATTATATGAATCACTTGAATCTAAGATTACTTGACAAA +TGATAACATAATACACTGGAGCTTCAAACATAGCCAATGTGATTCTAACTCCTTTAAACTCACAGTTAAT +CATAAACAAGGTTTGACATCAATCTAGCTATATCTTTAAGAATGATAAACTTGATGAAGATTAAGAAAAA +GGTAATCTTTCGATTATCTTTAGTCTTCATCCTTGATTCTACAATCATGACAGTTGTCTTTAATGAAAAA +GGAAAAAAGCCTTTTTATTAAGTTGTAATAATCAGATCTGCAAACCGGTAGAATTTAGTTGTAACCTAAC +ACACACAAAGCATTGGTAAAAAAGTCAATAGAAATTTAAACAGTGAGTGCAGACAACTCTTAAATGGAAG +CTTCATATGAGAGAGGACGCCCCCGAGCTGCCAGACAGCATTCAAGGGATGGACACGACCACCATGTTCG +AGCACGATCATCATCCAGAGAGAATTATCGAGGTGAGTACCGTCAATCAAGGAGCGCCTCACAAGTGCGC +GTTCCTACTGTATTTCATAAGAAGAGAGTTGAACCATTAACAGTTCCTCCAGCACCTAAAGACATATGTC +CGACCTTGAAAAAAGGATTTTTGTGTGACAGTAGTTTTTGCAAAAAAGACCACCAGTTAGAAAGTTTAAC +TGATAGGGAATTACTCCTACTAATCGCCCGTAAGACTTGTGGATCAGTAGAACAACAATTAAATATAACT +GCACCCAAGGACTCGCGCTTAGCAAATCCAACGGCTGATGATTTCCAGCAAGAGGAAGGTCCAAAAATTA +CCTTGTTGACACTGATCAAGACGGCAGAACACTGGGCGAGACAAGACATCCGAACCATAGAGGATTCCAA +ATTAAGGGCATTGTTAACTCTATGTGCTGTGATGACGAGGAAATTCTCAAAATCCCAGCTGAGTCTTTTG +TGTGAGACACACCTAAGGCGCGAAGGGCTTGGGCAAGATCAGGCAGAACCCGTTCTCGAAGTATATCAAC +GATTACACAGTGATAAAGGAGGCAGTTTTGAAGCTGCACTATGGCAACAATGGGACCGACAATCCCTAAT +TATGTTTATCACTGCATTCTTGAATATCGCTCTCCAGTTACCGTGTGAAAGTTCTGCTGTCGTTGTTTCA +GGGTTAAGAACATTGGTTCCTCAATCAGATAATGAGGAAGCTTCAACCAACCCGGGGACATGCTCATGGT +CTGATGAGGGTACCCCTTAATAAGGCTGACTAAAACACTATATAACCTTCTACTTGATCACAATACTCCG +TATACCTATCATCATATATTTAATCAAGACGATATCCTTTAAAACTTATTCAGTACTATAATCACTCTCA +TTTCAAATTGATAAGATATGCATAATTGCCTTAATATATAAAGAGGTATGATATAACCCAAACATTGACC +AAAGAAAATCATAATCTCGTATCGCTCGCAATATAACCTGCCAAGCATACCTCTTGCACAAAGTGATTCT +TGTACACAAATAATGTTTGACTCTACAGGAGGTAGCAACGATCCATCTCATCAAAAAATAAGTATTTTAT +GATTTACTAATGATCTCTTAAAATATTAAGAAAAACTGACGGAACATAAATTCTTTCTGCTTCAAGTTGT +GGAGGAGGTCTATGGTATTCGCTATTGTTATATTACAATCAATAACAAGCTTGTAAAAATATTGTTCTTG +TTTCAGGAGGTATATTGTGACCGGAAAAGCTAAACTAATGATGAAGATTAATGCGGAGGTCTGATGAGAA +TAAACCTTATTATTCAGATTAGGCCCCAAGAGGCATTCTTCATCTCCTTTTAGCAAAATACTATTTCAGG +ATAGTCCAGCTAGTGACACGTCTTTTAGCTGTATACCAGNNNNNNNNNNNNNNNNNNNNNNAAGTGTCTC +TGAGCTAAAGTGGTCTGTACACATCTCATACATTGTATTAGGGGCAATAATATCTAATTGAACTTAGCCA +TTTAAAATTTAGTGCATAAATCTGGGCTAACTCCACCAGGTCAACTCCATTGGCTGAAAAGAAGCCCACC +TACAACGAACATTACTTTGAGCGCCCTCACAATTAAAAAATAAGAGCGTCGTTCCAACAATCGAGCGCAA +GGTTACAAGGTTGAACTGAGAGTGTCTAGACAACAAAATATCGATACTCCAGACACCAAGCAAGACCTGA +GAAAAAACCATGGCCAAAGCTACGGGACGATACAATCTAATATCGCCCAAAAAGGACCTGGAGAAAGGGG +TTGTCTTAAGCGACCTCTGTAACTTCTTAGTTAGTCAAACTATTCAAGGGTGGAAAGTTTATTGGGCTGG +TATTGAGTTTGATGTGACTCACAAAGGAATGGCCCTATTGCATAGACTGAAAACTAATGACTTTGCCCCT +GCATGGTCAATGACAAGGAACCTATTTCCCCATTTATTTCAAAATCCGAATTCCACTATTGAATCACCGC +TGTGGGCACTGAGAGTCATCCTTGCAGCAGGGATACAGGACCAGTTAATTGACCAGTCTTTGATTGAACC +CTTAGCAGGAGCCCTTGGTCTGATCTCTGATTGGCTGCTAACAACCAACACTAACCATTTCAACATGCGA +ACACAACGTGTCAAGGAACAATTGAGCCTAAAAATGCTGTCGTTGATTCGATCCAATATTCTCAAGTTTA +TTAACAAATTGGATGCTCTACATGTCGTGAACTACAATGGATTATTGAGCAGTATTGAAATTGGAACTCA +AAATCATACAATCATCATAACTCGAACTAACATGGGTTTTCTGGTGGAGCTCCAAGAACCCGACAAATCG +GCAATGAACCGCAAGAAGCCTGGGCCGGCGAAATTTTCCCTCCTTCATGAGTCCACACTGAAAGCATTTA +CACAAGGGTCCTCGACACGAATGCAAAGTTTAATTCTTGAATTCAATAGCTCTCTTGCTATCTAACTAAG +ATGGAATACTTCATATTGGGCTAACTCATATATGCTGACTCAATAGTTAACTTGACATCTCTGCCTTCAT +AATCAGATATATAAGCATAATAAATAAATACTCATATTTCTTGATAATTTGTTTAACCACAGATAAATCC +TCACTGTAAGCCAGCTTCCAAGTTGACACCCTTACAAAAACCAGGACTCAGAATCCCTCAAATAAGAGAT +TCCAAGACAACATCATAGAATTGCTTTATTATATTAATAAGCATTTTATCACTAGAAATCCAATATACGA +AATGGTTAATTGTAACTAAACCCGCAGGTCATGTGTGTTAGGTTTCACAAATTATATATATTACTAACTC +CATACTCGTAACTAACATTAGATAAGTAGGTTAAGAAAAAAGCTTGAGGAAGATTAAGAAAAACTGCTTA +TTGGGTCTTTCCGTGTTTTAGATGAAGCAGTTGACATTCTTCCTCTTGATATTAAATGGCTACACAACAT +ACCCAATACCCAGACGCCAGGTTATCATCACCAATTGTATTGGACCAATGTGACCTTGTCACTAGAGCTT +GCGGGTTGTATTCATCATACTCCCTTAATCCGCAACTACGCAACTGTAAACTCCCGAAACATATATACCG +TTTAAAATATGATGTAACTGTTACCAAGTTCTTAAGTGATGTACCAGTGGCGACATTGCCCATAGATTTC +ATAGTCCCAATTCTTCTCAAGGCACTATCAGGCAATGGGTTCTGTCCTGTTGAGCCGCGGTGCCAACAGT +TCTTAGATGAAATTATTAAGTACACAATGCAAGATGCTCTCTTCCTGAAATATTATCTCAAAAATGTGGG +TGCTCAAGAAGACTGTGTTGATGACCACTTTCAAGAAAAAATCTTATCTTCAATTCAGGGCAATGAATTT +TTACATCAAATGTTTTTCTGGTATGACCTGGCTATTTTAACTCGAAGGGGTAGATTAAATCGAGGAAACT +CTAGATCAACGTGGTTTGTTCATGATGATTTAATAGACATCTTAGGCTATGGGGACTATGTTTTTTGGAA +GATCCCAATTTCACTGTTACCACTGAACACACAAGGAATCCCCCATGCTGCTATGGATTGGTATCAGACA +TCAGTATTCAAAGAAGCGGTTCAAGGGCATACACACATTGTTTCTGTTTCTACTGCCGATGTCTTGATAA +TGTGCAAAGATTTAATTACATGTCGATTCAACACAACTCTAATCTCAAAAATAGCAGAGGTTGAGGACCC +AGTTTGCTCTGATTATCCCAATTTTAAGATTGTGTCTATGCTTTACCAGAGCGGAGATTACTTACTCTCC +ATATTAGGGTCTGATGGGTATAAAATCATTAAGTTTCTCGAACCATTGTGCTTGGCTAAAATTCAATTGT +GCTCAAAGTACACCGAGAGGAAGGGCCGATTCTTAACACAAATGCATTTAGCTGTAAATCACACCCTGGA +AGAAATTACAGAAATACGTGCACTAAAGCCTTCACAGGCTCACAAGATCCGTGAATTCCATAGAACATTG +ATAAGGCTGGAGATGACGCCACAACAACTTTGTGAGCTATTTTCCATACAAAAACACTGGGGGCATCCTG +TGCTACATAGTGAAACAGCAATCCAAAAAGTTAAAAAACATGCTACGGTGCTAAAAGCATTACGCCCTAT +CGTGATTTTCGAGACATATTGTGTTTTTAAATATAGCATTGCAAAACATTATTTTGATAGTCAAGGATCT +TGGTACAGTGTTACCTCAGATAGAAATCTAACACCAGGTCTTAATTCTTATATCAAAAGAAATCAATTCC +CTCCGTTGCCAATGATTAAAGAACTGCTATGGGAATTTTACCACCTTGACCATCCTCCACTTTTCTCAAC +CAAAATTATTAGTGACTTAAGTATTTTTATAAAAGACAGAGCTACTGCAGTAGAAAGGACATGCTGGGAT +GCAGTATTCGAGCCTAATGTTCTGGGATATAATCCACCTCACAAATTCAGTACCAAACGTGTACCGGAAC +AATTTTTAGAGCAAGAAAACTTTTCTATTGAGAATGTTCTTTCCTACGCGCAAAAACTCGAGTATCTACT +ACCACAATATCGGAATTTTTCTTTCTCATTGAAAGAGAAAGAGTTGAATGTAGGTAGAACTTTCGGAAAA +TTGCCTTATCCGACTCGCAATGTTCAAACACTTTGTGAAGCTCTGTTAGCTGATGGTCTTGCTAAAGCAT +TTCCTAGCAATATGATGGTAGTTACGGAACGTGAACAAAAAGAAAGCTTATTGCATCAAGCATCATGGCA +CCACACAAGTGATGATTTCGGTGAGCATGCCACAGTTAGAGGGAGTAGCTTTGTAACTGATTTAGAGAAA +TACAATCTTGCATTTAGGTATGAGTTTACAGCACCTTTTATAGAATATTGCAACCGTTGCTATGGTGTTA +AGAATGTTTTTAATTGGATGCATTATACAATCCCACAGTGTTATATGCATGTCAGTGATTATTATAATCC +ACCGCATAACCTCACACTGGAAAATCGAAACAACCCCCCTGAAGGGCCTAGTTCATACAGGGGTCATATG +GGAGGGATTGAAGGACTGCAACAAAAACTCTGGACAAGTATTTCATGTGCTCAAATTTCTTTAGTTGAAA +TTAAGACTGGTTTTAAGTTGCGCTCAGCTGTGATGGGTGACAATCAGTGCATTACCGTTTTATCAGTCTT +CCCCTTAGAGACTGATGCAGGCGAGCAGGAACAGAGCGCCGAGGACAATGCAGCGAGGGTGGCCGCCAGC +CTAGCAAAAGTTACAAGTGCCTGTGGAATCTTTTTAAAACCTGATGAAACATTTGTACATTCAGGTTTTA +TCTATTTTGGAAAAAAACAATATTTGAATGGGGTCCAATTGCCTCAGTCCCTTAAAACGGCTACAAGAAT +GGCACCATTGTCTGATGCAATTTTTGATGATCTTCAAGGGACCCTGGCTAGTATAGGTACTGCTTTTGAG +CGATCCATCTCTGAGACACGACATATCTTTCCTTGCAGAATAACCGCAGCTTTCCATACGTTCTTTTCGG +TGAGAATCTTGCAATATCATCACCTCGGATTTAATAAAGGTTTTGACCTTGGACAGTTAACACTCGGCAA +ACCTCTGGATTTCGGAACAATATCATTGGCACTAGCGGTACCGCAGGTGCTTGGAGGGTTATCCTTCTTG +AATCCTGAGAAATGTTTCTACCGGAATCTAGGAGATCCAGTTACCTCAGGTTTATTCCAGTTAAAAACTT +ATCTCCGAATGATTGAGATGGATGATTTATTCTTACCTTTAATTGCGAAGAACCCTGGGAACTGCACTGC +CATTGACTTTGTGCTAAATCCTAGCGGATTAAATGTTCCTGGGTCGCAAGACTTAACTTCATTTCTGCGC +CAGATTGTACGTAGGACTATCACCCTAAGTGCGAAAAACAAACTTATTAATACCTTATTTCATGCATCAG +CTGACTTCGAAGACGAAATGGTTTGTAAGTGGCTCTTATCATCAACTCCTGTTATGAGTCGTTTCGCAGC +CGATATATTTTCACGCACGCCGAGCGGGAAGCGATTGCAAATTCTAGGATACTTGGAAGGAACACGCACA +TTATTAGCCTCTAAGATCATCAACAATAATACAGAGACGCCGGTTTTGGACAGACTGAGGAAGATAACAT +TGCAAAGGTGGAGTCTATGGTTTAGTTATCTTGATCATTGTGATAATATCCTGGCGGAGGCTTTAACCCA +AATAACTTGCACAGTTGATTTAGCACAGATCCTGAGGGAATATTCATGGGCACATATTTTAGAGGGGAGA +CCTCTTATTGGAGCCACACTCCCATGTATGATTGAGCAATTCAAAGTGGTTTGGCTGAAACCCTACGAAC +AATGTCCGCAGTGTTCAAATGCCAAGCAACCTGGTGGGAAACCATTCGTGTCAGTAGCAGTCAAGAAACA +TATTGTTAGTGCATGGCCAAATGCATCCCGAATAAGCTGGACTATCGGGGATGGAATCCCATACATTGGA +TCAAGGACAGAAGATAAGATAGGGCAACCTGCTATTAAACCAAAATGTCCTTCCGCAGCCTTAAGAGAGG +CCATTGAATTGGCGTCCCGTTTAACATGGGTAACTCAAGGCAGTTCGAACAGTGACTTGCTAATAAAACC +ATTTTTGGAAGCACGAGTAAATTTAAGTGTTCAAGAAATACTTCAAATGACCCCTTCACATTACTCGGGA +AATATTGTTCATAGGTACAACGATCAATACAGTCCTCATTCTTTCATGGCCAATCGTATGAGTAACTCAG +CAACGCGATTGATTGTTTCTACAAACACTTTAGGTGAGTTTTCAGGAGGTGGCCAATCGGCACGCGACAG +CAATATTATTTTCCAGAATGTTATAAATTATGCAGTTGCACTGTTCGATATTAAATTTAGAAACACTGAG +GCTACAGATATCCAGTATAATCGTGCTCACCTTCATCTAACTAAGTGTTGCACCCGGGAGGTACCAGCTC +AGTACTTAACATACACATCTACATTGGATTTAGATTTAACAAGATACCGAGAAAATGAATTGATTTATGA +CAATAATCCTCTAAAAGGAGGACTCAATTGCAATATCTCATTTGATAACCCATTTTTCCAAGGCAAACAG +CTGAACATTATAGAAGATGACCTTATTCGACTGCCTCACTTATCTGGATGGGAGCTAGCTAAGACCATCA +TGCAATCAATTATTTCAGATAGCAATAATTCGTCTACAGACCCAATTAGCAGTGGAGAAACAAGATCATT +CACTACCCATTTCTTAACTTATCCCAAGATAGGACTTCTGTACAGTTTTGGGGCCTTTGTAAGTTATTAT +CTTGGCAATACAATTCTTCGGACTAAGAAATTAACACTTGACAATTTTTTATATTACTTAACTACCCAAA +TTCATAATCTACCACATCGCTCATTGCGAATACTTAAGCCAACATTCAAACATGCAAGCGTTATGTCACG +ATTAATGAGTATTGATCCCCATTTTTCTATTTACATAGGCGGTGCTGCAGGTGACAGAGGACTCTCAGAT +GCGGCCAGGTTATTTTTGAGAACGTCCATTTCATCTTTTCTTACATTTGTAAAGGAATGGATAATTAATC +GCGGAACAATTGTCCCTTTATGGATAGTATATCCATTAGAGGGTCAAAATCCAACACCTGTTAATAATTT +CCTCCATCAGATCGTAGAACTGCTGGTGCATGATTCATCAAGACACCAGGCTTTTAAAACTACCATAAAT +GATCATGTACATCCTCACGACAATCTTGTTTACACATGTAAGAGTACAGCCAGCAATTTCTTCCATGCGT +CATTGGCGTACTGGAGGAGCAGGCACAGAAACAGCAACCGAAAAGACTTGACAAGAAACTCTTCAACTGG +ATCAAGCACAAACAACAGTGATGGTCATATTAAGAGAAGTCAAGAACAAACCACCAGAGATCCACATGAT +GGCACTGAACGGAGTCTAGTCCTGCAAATGAGCCATGAAATAAAAAGAACGACAATTCCACAAGAGAACA +CGCACCAGGGTCCGTCGTTCCAGTCATTTCTAAGTGACTCTGCTTGCGGTACAGCAAACCCAAAACTAAA +TTTCGATAGATCGAGACACAATGTGAAATCTCAGGATCATAACTCAGCATCCAAGAGGGAAGGTCATCAA +ATAATCTCACATCGTCTAGTCCTACCTTTCTTTACATTATCTCAAGGGACACGCCAATTAACGTCATCCA +ATGAGTCACAAACCCAAGATGAGATATCAAAGTACTTACGGCAATTGAGATCCGTCATTGATACCACAGT +TTATTGTAGGTTTACCGGTATAGTCTCGTCCATGCATTACAAACTTGATGAGGTCCTTTGGGAAATAGAG +AATTTTAAGTCGGCTGTGACGCTGGCAGAGGGAGAAGGTGCTGGTGCCTTACTATTGATTCAGAAATACC +AAGTTAAGACCTTATTTTTCAACACGCTAGCTACTGAGTCCAGTATAGAGTCAGAAATAGTATCAGGAAT +GACTACTCCTAGGATGCTTCTACCTGTTATGTCAAAATTCCATAATGACCAAATTGAGATTATTCTTAAC +AACTCAGCAAGCCAAATAACAGACATAACAAATCCTACTTGGTTTAAAGACCAAAGAGCAAGGCTACCTA +GGCAAGTCGAGGTTATAACCATGGATGCAGAGACGACAGAGAATATAAACAGATCGAAATTGTACGAAGC +TGTACATAAATTGATCTTACACCATGTTGATCCCAGCGTATTGAAAGCAGTGGTCCTTAAAGTCTTTCTA +AGTGATACCGAGGGTATGTTATGGCTAAATGATAATCTAGCCCCGTTTTTTGCCACTGGGTATTTAATTA +AGCCAATAACGTCAAGTGCCAGGTCTAGTGAGTGGTATCTTTGTCTGACGAACTTCTTATCAACTACACG +TAAGATGCCACACCAAAACCATCTCAGTTGTAAGCAGGTAATACTTACGGCATTGCAACTGCAAATTCAA +CGGAGCCCATACTGGCTAAGTCATTTAACTCAGTATGCTGACTGCGATTTACATTTAAGCTATATCCGCC +TTGGTTTTCCATCATTAGAGAAAGTACTATACCACAGGTATAACCTTGTCGATTCAAAAAGAGGTCCACT +AGTCTCTGTCACTCAGCACTTAGCACATCTTAGGGCAGAGATTCGAGAATTGACCAATGATTATAATCAA +CAGCGACAAAGTCGGACTCAAACATATCACTTTATTCGTACTGCAAAAGGACGAATCACAAAACTAGTCA +ATGATTATTTAAAATTCTTTCTTATTGTACAAGCATTAAAACATAATGGGACATGGCAAGCTGAGTTTAA +GAAATTACCAGAGTTGATTAGTGTGTGCAATAGGTTCTATCATATTAGAGATTGTAATTGTGAAGAACGT +TTCTTAGTTCAAACCTTATATTTACATAGAATGCAGGATTCTGAAGTTAAGCTTATCGAAAGGCTGACAG +GGCTTCTGAGTTTATTTCCAGATGGTCTCTACAGGTTCGATTGAATAACCGTGCATAGTATTTTGATACT +TGTAAAGGTTGGTTATCAACATACAGATTATAAAAAACTCATAAATTGCTCTCATACATCATCTTGATCT +GATTTCAATAAATAACTATTTAGATAACGAAAGGAGTCCTTACATTATACACTATATTTGGCCTCTCTCC +CTGCGTGATAATCAAAAAATTCACAATACAGCATGTGTGACATATTACTGCTGCAATGAGTCTAACGCAA +CATAATAAACTCCGCACTCTTTATAATTAAGCTTTAACGATAGGTCTGGGCTCATATTGTTATTGATATA +GTAATGTTGTATCAATATCTTGCCAGATGGAATAGTGCTTTGGTTGATAACACGACTTCTTAAAACAAAA +CTGATCTTTAAGATTAAGTTTTTTATAATTGTCATTGCTTTAATTTGTCGATTTAAAAATGGTGATAGCC +TTAATCTTTGTGTAAAATAAGAGATTAGGTGTAATAACTTTAACATTTTTGTCTAGTAAGCTACTATTCC +ATTCAGAATGATAAAATTAAAAGAAAAGACATGACTGTAAAATCAGAAATACCTTCTTTACAATATAGCA +GACTAGATAATAATCTTCGTGTTAATGATAATTAAGGCATTGACCACGCTCATCAGAAGGCTCACTAGAA +TAAACGTTGCAAAAAGGATCCCTGGAAAAATGGTCGCACACAAAAATTTAAAAATAAATCTATTTCTTCT +TTTTTGTGTGT diff --git a/test/unit/test_assembly.py b/test/unit/test_assembly.py index 6f8affd1..84f5479e 100644 --- a/test/unit/test_assembly.py +++ b/test/unit/test_assembly.py @@ -313,11 +313,11 @@ def test_ebov_palindrome(self): def test_ebov_palindrome_refsel(self): # this tests a scenario where show-aligns has more alignments than show-tiling with util.file.tempfnames(('.out.fasta', '.stats.tsv')) as (outFasta, outStats): - contigs, refs, expected, expectedStats = self.inputs('contigs.ebov.doublehit.fasta', - 'refs.ebov.fasta', + contigs, expected, expectedStats = self.inputs('contigs.ebov.doublehit.fasta', 'expected.ebov.doublehit.fasta', 'expected.refsel.ebov.stats.tsv') - assembly.order_and_orient(contigs, refs, outFasta, n_genome_segments=1, outStats=outStats) + refs = self.inputs('ref.ebov.gin.fasta','ref.ebov.sle.fasta','ref.ebov.lbr.fasta') + assembly.order_and_orient(contigs, refs, outFasta, outStats=outStats) self.assertEqualFastaSeqs(outFasta, expected) self.assertEqualContents(outStats, expectedStats) From 46b082b39d16932ebe87eec0ced760631df44d71 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 18 Mar 2024 13:47:56 -0400 Subject: [PATCH 02/14] change input order of refs to match previous --- test/unit/test_assembly.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/test_assembly.py b/test/unit/test_assembly.py index 84f5479e..b06d77f2 100644 --- a/test/unit/test_assembly.py +++ b/test/unit/test_assembly.py @@ -316,7 +316,7 @@ def test_ebov_palindrome_refsel(self): contigs, expected, expectedStats = self.inputs('contigs.ebov.doublehit.fasta', 'expected.ebov.doublehit.fasta', 'expected.refsel.ebov.stats.tsv') - refs = self.inputs('ref.ebov.gin.fasta','ref.ebov.sle.fasta','ref.ebov.lbr.fasta') + refs = self.inputs('ref.ebov.lbr.fasta','ref.ebov.sle.fasta','ref.ebov.gin.fasta') assembly.order_and_orient(contigs, refs, outFasta, outStats=outStats) self.assertEqualFastaSeqs(outFasta, expected) self.assertEqualContents(outStats, expectedStats) From 50f3812d80e260d0af8004403640a4805c159d50 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 18 Mar 2024 17:05:32 -0400 Subject: [PATCH 03/14] add initial implementation of skani-based reference finding tools --- assemble/skani.py | 128 ++++++++++++++++++++++++++++++++++++++++++++++ assembly.py | 61 +++++++++++++++++++++- 2 files changed, 188 insertions(+), 1 deletion(-) create mode 100644 assemble/skani.py diff --git a/assemble/skani.py b/assemble/skani.py new file mode 100644 index 00000000..f49e1984 --- /dev/null +++ b/assemble/skani.py @@ -0,0 +1,128 @@ +''' + SKANI - accurate, fast nucleotide identity calculation for MAGs and databases + https://github.com/bluenote-1577/skani +''' + +__author__ = "dpark@broadinstitute.org" + +import logging +import tools +import util.file +import util.misc +import csv +import os +import os.path +import shutil +import subprocess + +TOOL_NAME = "skani" + +_log = logging.getLogger(__name__) + +class UndirectedGraph: + ''' Simple utility class for finding clusters from pairwise relationships + ''' + def __init__(self): + self.edges = {} + + def add_node(self, node): + self.edges.setdefault(node, set()) + + def add_edge(self, node1, node2): + self.edges.setdefault(node1, set()).add(node2) + self.edges.setdefault(node2, set()).add(node1) + + def _dfs(self, node, visited): + visited.add(node) + cluster = set() + cluster.add(node) + for neighbor in self.edges[node]: + if neighbor not in visited: + cluster.update(self._dfs(neighbor, visited)) + return cluster + + def get_clusters(self): + visited = set() + clusters = [] + for node in self.edges.keys(): + if node not in visited: + clusters.append(self._dfs(node, visited)) + return clusters + + +class SkaniTool(tools.Tool): + + def __init__(self, install_methods=None): + if install_methods is None: + install_methods = [tools.PrexistingUnixCommand(shutil.which(TOOL_NAME), require_executability=True)] + super(SkaniTool, self).__init__(install_methods=install_methods) + + def version(self): + if self.tool_version is None: + self._get_tool_version() + return self.tool_version + + def _get_tool_version(self): + self.tool_version = subprocess.check_output([self.install_and_get_path(), '--version']).decode('UTF-8').strip().split()[1] + + def execute(self, subcommand, args, outfile, threads=None): + ''' generic execution of skani + ''' + + # build the skani command + tool_cmd = [self.install_and_get_path(), subcommand] + tool_cmd.extend(args) + tool_cmd.extend(["-t", "{}".format(util.misc.sanitize_thread_count(threads))]) + + # run the command + _log.debug(' '.join(tool_cmd) + ' > ' + outfile) + with open(outfile, 'w') as outf: + util.misc.run_and_save(tool_cmd, outf=outf) + + def triangle(self, ref_fastas, outfile_ani, outfile_af, other_args = (), threads=None): + ''' skani triangle computes an all-to-all ANI distance matrix for a set of sequences + ''' + self.execute('triangle', ref_fastas + other_args, outfile_ani, threads=threads) + shutil.copyfile('skani_matrix.af', outfile_af) + + def dist(self, query_fasta, ref_fastas, outfile, other_args = (), threads=None): + ''' skani dist computes ANI distance between a specified query set of + sequences (MAGs) and reference genomes (database) + ''' + self.execute('dist', ['-q', query_fasta, '-r'] + ref_fastas + other_args, outfile, threads=threads) + + def find_reference_clusters(self, ref_fastas, + other_args = ('-m', 50, '--no-learned-ani', '--slow', '--robust', '--detailed', '--ci', '--sparse'), + threads=None): + ''' use skani triangle to define clusters of highly-related genomes + (default settings here are for viral genomes) + ''' + g = UndirectedGraph() + for ref_fasta in ref_fastas: + g.add_node(ref_fasta) + + with util.file.tempfnames(('.skani_matrix.ani', '.skani_matrix.af')) \ + as (tmp_matrix_ani, tmp_matrix_af): + # run skani triangle + self.triangle(ref_fastas, 'skani_matrix.ani', 'skani_matrix.af', other_args, threads=threads) + + # parse the skani triangle results and define clusters + with open(tmp_matrix_ani, 'r') as inf: + for row in csv.DictReader(inf, delimiter='\t'): + g.add_edge(row['Ref_file'], row['Query_file']) + + return g.get_clusters() + + def find_closest_reference(self, contigs_fasta, ref_fastas, out_file, + other_args = ('-m', 50, '--no-learned-ani', '--slow', '--robust', '--detailed', '--ci', '-s', 85, '-n', 10, '--no-marker-index'), + threads=None): + ''' use skani dist to find the closest reference genome for each contig + (default settings here are for viral genomes) + ''' + self.dist(contigs_fasta, ref_fastas, out_file, other_args, threads=threads) + with open(out_file, 'r') as inf: + top_row = None + for row in csv.DictReader(inf, delimiter='\t'): + top_row = row + break + return (top_row['Ref_file'], top_row['ANI'], top_row['Align_fraction_ref'], top_row['Total_bases_covered']) if top_row is not None else None diff --git a/assembly.py b/assembly.py index e71bbe94..764cb27e 100755 --- a/assembly.py +++ b/assembly.py @@ -375,10 +375,69 @@ def parser_gapfill_gap2seq(parser=argparse.ArgumentParser(description='Close gap util.cmd.attach_main(parser, gapfill_gap2seq, split_args=True) return parser - __commands__.append(('gapfill_gap2seq', parser_gapfill_gap2seq)) +def cluster_references_ani(inRefs, outClusters, threads=None): + ''' This step uses the skani triangle tool to define clusters of highly-related genomes. + ''' + skani = tools.skani.SkaniTool() + clusters = skani.find_reference_clusters(inRefs, threads=threads) + with open(outClusters, 'w') as outf: + for cluster in clusters: + outf.write('\t'.join(cluster) + '\n') + +def parser_cluster_references_ani(parser=argparse.ArgumentParser(description='Cluster references')): + parser.add_argument('inRefs', nargs='+', help='FASTA files containing reference genomes') + parser.add_argument('outClusters', help='Output file containing clusters of highly-related genomes. Each line contains the filenames of the genomes in one cluster.') + util.cmd.common_args(parser, (('threads', None), ('loglevel', None), ('version', None), ('tmp_dir', None))) + util.cmd.attach_main(parser, cluster_references_ani, split_args=True) + return parser + +__commands__.append(('cluster_references_ani', parser_cluster_references_ani)) + + +def skani_contigs_to_refs(inContigs, inRefs, out_skani_dist, out_skani_dist_filtered, out_clusters_filtered, threads=None): + + skani = tools.skani.SkaniTool() + clusters = skani.find_reference_clusters(inRefs, threads=threads) + skani.find_closest_references(inContigs, inRefs, out_skani_dist, threads=threads) + refs_hit = set() + refs_hit_by_cluster = set() + + dist_header = util.file.readFlatFileHeader(out_skani_dist) + with open(out_skani_dist, 'r') as inf: + with open(out_skani_dist_filtered, 'w') as outf: + writer = csv.DictWriter(outf, dist_header, delimiter='\t', dialect=csv.unix_dialect, quoting=csv.QUOTE_MINIMAL) + for row in csv.DictReader(inf, delimiter='\t'): + refs_hit.add(row['Ref_file']) + if row['Ref_file'] not in refs_hit_by_cluster: + writer.writerow(row) + for cluster in clusters: + if row['Ref_file'] in cluster: + refs_hit_by_cluster.update(cluster) + break + + with open(out_clusters_filtered, 'w') as outf: + for cluster in clusters: + hits = list([ref for ref in cluster if ref in refs_hit]) + if hits: + outf.write('\t'.join(hits) + '\n') + +def parser_skani_contigs_to_refs(parser=argparse.ArgumentParser(description='Find closest references for contigs')): + parser.add_argument('inContigs', help='FASTA file containing contigs') + parser.add_argument('inRefs', nargs='+', help='FASTA files containing reference genomes') + parser.add_argument('out_skani_dist', help='Output file containing distances between contigs and references') + parser.add_argument('out_skani_dist_filtered', help='Output file containing distances between contigs and references, with only references that have a hit') + parser.add_argument('out_clusters_filtered', help='Output file containing clusters of highly-related genomes, with only references that have a hit') + util.cmd.common_args(parser, (('threads', None), ('loglevel', None), ('version', None), ('tmp_dir', None))) + util.cmd.attach_main(parser, skani_contigs_to_refs, split_args=True) + return parser + +__commands__.append(('skani_contigs_to_refs', parser_skani_contigs_to_refs)) + + + def _order_and_orient_orig(inFasta, inReference, outFasta, outAlternateContigs=None, breaklen=None, # aligner='nucmer', circular=False, trimmed_contigs=None, From 2294d78cb486600da3255becf22cf6cc90d703a4 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 18 Mar 2024 17:55:22 -0400 Subject: [PATCH 04/14] add unit test --- test/unit/test_assembly.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/test/unit/test_assembly.py b/test/unit/test_assembly.py index b06d77f2..61b10c83 100644 --- a/test/unit/test_assembly.py +++ b/test/unit/test_assembly.py @@ -582,6 +582,28 @@ def test_empty_fasta_input(self): aligner='mummer') self.assertEqualContents(outFasta, empty_fasta) +class TestSkaniReferenceSelection(TestCaseWithTmp): + ''' Test Skani-based reference selection ''' + + def test_skani_contigs_to_refs(self): + inDir = os.path.join(util.file.get_test_input_path(), 'TestOrderAndOrient') + with util.file.tempfnames(('.skani.dist.out', '.skani.dist.filtered', '.clusters.filtered')) \ + as (out_skani_dist, out_skani_dist_filtered, out_clusters_filtered): + contigs = os.path.join(inDir, 'contigs.lasv.fasta') + refs = [os.path.join(inDir, 'ref.lasv.{}.fasta'.format(strain)) + for strain in ('josiah', 'pinneo', 'KGH_G502', 'BNI_Nig08_A19', 'nomatch')] + \ + [os.path.join(inDir, 'ref.ebov.{}.fasta'.format(strain)) + for strain in ('lbr', 'sle', 'gin')] + + assembly.skani_contigs_to_refs(contigs, refs, out_skani_dist, out_skani_dist_filtered, out_clusters_filtered, threads=1) + + with open(out_clusters_filtered, 'r') as inf: + clusters = inf.readlines() + self.assertEqual(len(clusters), 1) + clusters = set([os.path.basename(f) for f in clusters.strip().split('\t')]) + expected_clusters = set(['ref.lasv.{}.fasta'.format(strain) for strain in ('pinneo', 'KGH_G502')]) + self.assertEqual(clusters, expected_clusters) + class TestMutableSequence(unittest.TestCase): ''' Test the MutableSequence class ''' From 3c56bbb1d9f3f01b1bbe4e88152854c01a64ea60 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 18 Mar 2024 19:08:29 -0400 Subject: [PATCH 05/14] fix import of skani tool --- assembly.py | 5 +++-- test/unit/test_assembly.py | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/assembly.py b/assembly.py index 764cb27e..9c28b4bf 100755 --- a/assembly.py +++ b/assembly.py @@ -41,6 +41,7 @@ import assemble.mummer import assemble.muscle import assemble.gap2seq +import assemble.skani # third-party import numpy @@ -381,7 +382,7 @@ def parser_gapfill_gap2seq(parser=argparse.ArgumentParser(description='Close gap def cluster_references_ani(inRefs, outClusters, threads=None): ''' This step uses the skani triangle tool to define clusters of highly-related genomes. ''' - skani = tools.skani.SkaniTool() + skani = assemble.skani.SkaniTool() clusters = skani.find_reference_clusters(inRefs, threads=threads) with open(outClusters, 'w') as outf: for cluster in clusters: @@ -399,7 +400,7 @@ def parser_cluster_references_ani(parser=argparse.ArgumentParser(description='Cl def skani_contigs_to_refs(inContigs, inRefs, out_skani_dist, out_skani_dist_filtered, out_clusters_filtered, threads=None): - skani = tools.skani.SkaniTool() + skani = assemble.skani.SkaniTool() clusters = skani.find_reference_clusters(inRefs, threads=threads) skani.find_closest_references(inContigs, inRefs, out_skani_dist, threads=threads) refs_hit = set() diff --git a/test/unit/test_assembly.py b/test/unit/test_assembly.py index 61b10c83..3d871872 100644 --- a/test/unit/test_assembly.py +++ b/test/unit/test_assembly.py @@ -17,6 +17,7 @@ import itertools import pytest import assemble.mummer +import assemble.skani import tools.minimap2 import tools.novoalign import tools.picard From f5dedea543f428fc1a2ed423e141a3cd49c25265 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 18 Mar 2024 19:18:15 -0400 Subject: [PATCH 06/14] add unit test for UndirectedGraph --- test/unit/test_assembly.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/test/unit/test_assembly.py b/test/unit/test_assembly.py index 3d871872..a74ab945 100644 --- a/test/unit/test_assembly.py +++ b/test/unit/test_assembly.py @@ -247,6 +247,37 @@ def test_non_failure(self): self.assertEqual(out, out.upper()) +class TestUndirectedGraph(unittest.TestCase): + def test_simple(self): + g = assembly.UndirectedGraph() + g.add_edge('a', 'b') + g.add_edge('a', 'c') + g.add_edge('b', 'd') + actual = list(sorted(g.get_clusters())) + self.assertEqual(actual, [{'a', 'b', 'c', 'd'}]) + + def test_disconnected(self): + g = assembly.UndirectedGraph() + g.add_edge('a', 'b') + g.add_edge('c', 'd') + actual = list(sorted(g.get_clusters())) + self.assertEqual(actual, [{'a', 'b'}, {'c', 'd'}]) + + def test_both(self): + g = assembly.UndirectedGraph() + g.add_edge(1, 2) + g.add_edge(11,12) + g.add_edge(18,15) + g.add_node(12) + g.add_node(22) + g.add_node(55) + g.add_edge(25,22) + g.add_edge(7,2) + g.add_edge(12,18) + actual = list(sorted(g.get_clusters())) + self.assertEqual(actual, [{1, 2, 7}, {11, 12, 15, 18}, {22, 25}, {55}]) + + class TestOrderAndOrient(TestCaseWithTmp): ''' Test the MUMmer-based order_and_orient command ''' From 235f3b01c08d32f793bb71ea0c139d72b015d2a9 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 18 Mar 2024 19:27:29 -0400 Subject: [PATCH 07/14] doc updates and fixes --- assemble/skani.py | 4 ++-- assembly.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/assemble/skani.py b/assemble/skani.py index f49e1984..39238dd5 100644 --- a/assemble/skani.py +++ b/assemble/skani.py @@ -82,14 +82,14 @@ def execute(self, subcommand, args, outfile, threads=None): def triangle(self, ref_fastas, outfile_ani, outfile_af, other_args = (), threads=None): ''' skani triangle computes an all-to-all ANI distance matrix for a set of sequences ''' - self.execute('triangle', ref_fastas + other_args, outfile_ani, threads=threads) + self.execute('triangle', list(ref_fastas) + list(other_args), outfile_ani, threads=threads) shutil.copyfile('skani_matrix.af', outfile_af) def dist(self, query_fasta, ref_fastas, outfile, other_args = (), threads=None): ''' skani dist computes ANI distance between a specified query set of sequences (MAGs) and reference genomes (database) ''' - self.execute('dist', ['-q', query_fasta, '-r'] + ref_fastas + other_args, outfile, threads=threads) + self.execute('dist', ['-q', query_fasta, '-r'] + list(ref_fastas) + list(other_args), outfile, threads=threads) def find_reference_clusters(self, ref_fastas, other_args = ('-m', 50, '--no-learned-ani', '--slow', '--robust', '--detailed', '--ci', '--sparse'), diff --git a/assembly.py b/assembly.py index 9c28b4bf..175b113f 100755 --- a/assembly.py +++ b/assembly.py @@ -428,9 +428,9 @@ def skani_contigs_to_refs(inContigs, inRefs, out_skani_dist, out_skani_dist_filt def parser_skani_contigs_to_refs(parser=argparse.ArgumentParser(description='Find closest references for contigs')): parser.add_argument('inContigs', help='FASTA file containing contigs') parser.add_argument('inRefs', nargs='+', help='FASTA files containing reference genomes') - parser.add_argument('out_skani_dist', help='Output file containing distances between contigs and references') - parser.add_argument('out_skani_dist_filtered', help='Output file containing distances between contigs and references, with only references that have a hit') - parser.add_argument('out_clusters_filtered', help='Output file containing clusters of highly-related genomes, with only references that have a hit') + parser.add_argument('out_skani_dist', help='Output file containing ANI distances between contigs and references') + parser.add_argument('out_skani_dist_filtered', help='Output file containing ANI distances between contigs and references, with only the top reference hit per cluster') + parser.add_argument('out_clusters_filtered', help='Output file containing clusters of highly-related genomes, with only clusters that have a hit to the contigs') util.cmd.common_args(parser, (('threads', None), ('loglevel', None), ('version', None), ('tmp_dir', None))) util.cmd.attach_main(parser, skani_contigs_to_refs, split_args=True) return parser From 2be5c304e584285acd33b54990b8e04f6129b92d Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 18 Mar 2024 19:35:51 -0400 Subject: [PATCH 08/14] string coerce --- assemble/skani.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assemble/skani.py b/assemble/skani.py index 39238dd5..7429972c 100644 --- a/assemble/skani.py +++ b/assemble/skani.py @@ -75,7 +75,7 @@ def execute(self, subcommand, args, outfile, threads=None): tool_cmd.extend(["-t", "{}".format(util.misc.sanitize_thread_count(threads))]) # run the command - _log.debug(' '.join(tool_cmd) + ' > ' + outfile) + _log.debug(' '.join(map(str, tool_cmd)) + ' > ' + outfile) with open(outfile, 'w') as outf: util.misc.run_and_save(tool_cmd, outf=outf) From 9fbd1f0711a86984052f7249099c090f03bfa04a Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 18 Mar 2024 19:46:29 -0400 Subject: [PATCH 09/14] fix unit test paths, fix string coercion point for skani execute --- assemble/skani.py | 4 ++-- test/unit/test_assembly.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/assemble/skani.py b/assemble/skani.py index 7429972c..5c0ca108 100644 --- a/assemble/skani.py +++ b/assemble/skani.py @@ -71,11 +71,11 @@ def execute(self, subcommand, args, outfile, threads=None): # build the skani command tool_cmd = [self.install_and_get_path(), subcommand] - tool_cmd.extend(args) + tool_cmd.extend(map(str, args)) tool_cmd.extend(["-t", "{}".format(util.misc.sanitize_thread_count(threads))]) # run the command - _log.debug(' '.join(map(str, tool_cmd)) + ' > ' + outfile) + _log.debug(' '.join(tool_cmd) + ' > ' + outfile) with open(outfile, 'w') as outf: util.misc.run_and_save(tool_cmd, outf=outf) diff --git a/test/unit/test_assembly.py b/test/unit/test_assembly.py index a74ab945..81deb6e0 100644 --- a/test/unit/test_assembly.py +++ b/test/unit/test_assembly.py @@ -249,7 +249,7 @@ def test_non_failure(self): class TestUndirectedGraph(unittest.TestCase): def test_simple(self): - g = assembly.UndirectedGraph() + g = assembly.skani.UndirectedGraph() g.add_edge('a', 'b') g.add_edge('a', 'c') g.add_edge('b', 'd') @@ -257,14 +257,14 @@ def test_simple(self): self.assertEqual(actual, [{'a', 'b', 'c', 'd'}]) def test_disconnected(self): - g = assembly.UndirectedGraph() + g = assembly.skani.UndirectedGraph() g.add_edge('a', 'b') g.add_edge('c', 'd') actual = list(sorted(g.get_clusters())) self.assertEqual(actual, [{'a', 'b'}, {'c', 'd'}]) def test_both(self): - g = assembly.UndirectedGraph() + g = assembly.skani.UndirectedGraph() g.add_edge(1, 2) g.add_edge(11,12) g.add_edge(18,15) From 23462b3fe0a9b841afd52b40a5c8d7a3ef615e1b Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 18 Mar 2024 20:05:20 -0400 Subject: [PATCH 10/14] skip alignedfrac output matrix from skani triangle --- assemble/skani.py | 8 +++----- test/unit/test_assembly.py | 6 +++--- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/assemble/skani.py b/assemble/skani.py index 5c0ca108..734fe8ae 100644 --- a/assemble/skani.py +++ b/assemble/skani.py @@ -79,11 +79,10 @@ def execute(self, subcommand, args, outfile, threads=None): with open(outfile, 'w') as outf: util.misc.run_and_save(tool_cmd, outf=outf) - def triangle(self, ref_fastas, outfile_ani, outfile_af, other_args = (), threads=None): + def triangle(self, ref_fastas, outfile_ani, other_args = (), threads=None): ''' skani triangle computes an all-to-all ANI distance matrix for a set of sequences ''' self.execute('triangle', list(ref_fastas) + list(other_args), outfile_ani, threads=threads) - shutil.copyfile('skani_matrix.af', outfile_af) def dist(self, query_fasta, ref_fastas, outfile, other_args = (), threads=None): ''' skani dist computes ANI distance between a specified query set of @@ -101,10 +100,9 @@ def find_reference_clusters(self, ref_fastas, for ref_fasta in ref_fastas: g.add_node(ref_fasta) - with util.file.tempfnames(('.skani_matrix.ani', '.skani_matrix.af')) \ - as (tmp_matrix_ani, tmp_matrix_af): + with util.file.tempfnames(('.skani_matrix.ani'),) as (tmp_matrix_ani,): # run skani triangle - self.triangle(ref_fastas, 'skani_matrix.ani', 'skani_matrix.af', other_args, threads=threads) + self.triangle(ref_fastas, tmp_matrix_ani, other_args, threads=threads) # parse the skani triangle results and define clusters with open(tmp_matrix_ani, 'r') as inf: diff --git a/test/unit/test_assembly.py b/test/unit/test_assembly.py index 81deb6e0..ea1bcee8 100644 --- a/test/unit/test_assembly.py +++ b/test/unit/test_assembly.py @@ -249,7 +249,7 @@ def test_non_failure(self): class TestUndirectedGraph(unittest.TestCase): def test_simple(self): - g = assembly.skani.UndirectedGraph() + g = assemble.skani.UndirectedGraph() g.add_edge('a', 'b') g.add_edge('a', 'c') g.add_edge('b', 'd') @@ -257,14 +257,14 @@ def test_simple(self): self.assertEqual(actual, [{'a', 'b', 'c', 'd'}]) def test_disconnected(self): - g = assembly.skani.UndirectedGraph() + g = assemble.skani.UndirectedGraph() g.add_edge('a', 'b') g.add_edge('c', 'd') actual = list(sorted(g.get_clusters())) self.assertEqual(actual, [{'a', 'b'}, {'c', 'd'}]) def test_both(self): - g = assembly.skani.UndirectedGraph() + g = assemble.skani.UndirectedGraph() g.add_edge(1, 2) g.add_edge(11,12) g.add_edge(18,15) From 4c48a3f854b39c0ede9552eb908df4f2d0c99b13 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 18 Mar 2024 20:16:37 -0400 Subject: [PATCH 11/14] single tempfname --- assemble/skani.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assemble/skani.py b/assemble/skani.py index 734fe8ae..06182cc9 100644 --- a/assemble/skani.py +++ b/assemble/skani.py @@ -100,7 +100,7 @@ def find_reference_clusters(self, ref_fastas, for ref_fasta in ref_fastas: g.add_node(ref_fasta) - with util.file.tempfnames(('.skani_matrix.ani'),) as (tmp_matrix_ani,): + with util.file.tempfname('.skani_matrix.ani') as tmp_matrix_ani: # run skani triangle self.triangle(ref_fastas, tmp_matrix_ani, other_args, threads=threads) From ab3f1957f1c5fa9488b4284d6e04dc0a046f4eb8 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 18 Mar 2024 20:31:52 -0400 Subject: [PATCH 12/14] typo --- assembly.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assembly.py b/assembly.py index 175b113f..807ee560 100755 --- a/assembly.py +++ b/assembly.py @@ -402,7 +402,7 @@ def skani_contigs_to_refs(inContigs, inRefs, out_skani_dist, out_skani_dist_filt skani = assemble.skani.SkaniTool() clusters = skani.find_reference_clusters(inRefs, threads=threads) - skani.find_closest_references(inContigs, inRefs, out_skani_dist, threads=threads) + skani.find_closest_reference(inContigs, inRefs, out_skani_dist, threads=threads) refs_hit = set() refs_hit_by_cluster = set() From e121329ef0d635308256ca7bd2b6c1a584d90864 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 18 Mar 2024 21:12:44 -0400 Subject: [PATCH 13/14] fix unit test code --- test/unit/test_assembly.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/unit/test_assembly.py b/test/unit/test_assembly.py index ea1bcee8..cccaef5c 100644 --- a/test/unit/test_assembly.py +++ b/test/unit/test_assembly.py @@ -632,9 +632,9 @@ def test_skani_contigs_to_refs(self): with open(out_clusters_filtered, 'r') as inf: clusters = inf.readlines() self.assertEqual(len(clusters), 1) - clusters = set([os.path.basename(f) for f in clusters.strip().split('\t')]) - expected_clusters = set(['ref.lasv.{}.fasta'.format(strain) for strain in ('pinneo', 'KGH_G502')]) - self.assertEqual(clusters, expected_clusters) + actual_cluster = set([os.path.basename(f) for f in clusters[0].strip().split('\t')]) + expected_cluster = set(['ref.lasv.{}.fasta'.format(strain) for strain in ('pinneo', 'KGH_G502')]) + self.assertEqual(actual_cluster, expected_cluster) class TestMutableSequence(unittest.TestCase): From 10896afdbd2cb7c730dbca310fa551a42de830e9 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Mon, 18 Mar 2024 21:23:17 -0400 Subject: [PATCH 14/14] fix expected list --- test/unit/test_assembly.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/test/unit/test_assembly.py b/test/unit/test_assembly.py index cccaef5c..206a3769 100644 --- a/test/unit/test_assembly.py +++ b/test/unit/test_assembly.py @@ -618,6 +618,14 @@ class TestSkaniReferenceSelection(TestCaseWithTmp): ''' Test Skani-based reference selection ''' def test_skani_contigs_to_refs(self): + ''' + Test the skani_contigs_to_refs function. + Test inputs include LASV MAGs/contigs against various EBOV and LASV references. + The only references that should hit are the LASV Josiah and KGH_G502 references. + Additionally, skani should identify them as being from the same cluster. + No EBOV references should be selected. + ''' + inDir = os.path.join(util.file.get_test_input_path(), 'TestOrderAndOrient') with util.file.tempfnames(('.skani.dist.out', '.skani.dist.filtered', '.clusters.filtered')) \ as (out_skani_dist, out_skani_dist_filtered, out_clusters_filtered): @@ -633,7 +641,7 @@ def test_skani_contigs_to_refs(self): clusters = inf.readlines() self.assertEqual(len(clusters), 1) actual_cluster = set([os.path.basename(f) for f in clusters[0].strip().split('\t')]) - expected_cluster = set(['ref.lasv.{}.fasta'.format(strain) for strain in ('pinneo', 'KGH_G502')]) + expected_cluster = set(['ref.lasv.{}.fasta'.format(strain) for strain in ('josiah', 'KGH_G502')]) self.assertEqual(actual_cluster, expected_cluster)