From c304f832b54bde9a986913b6d7cd0d38e0fcc017 Mon Sep 17 00:00:00 2001 From: Svenja Mehringer Date: Thu, 20 Oct 2022 15:19:24 +0200 Subject: [PATCH] [MISC] Remove field::alignment from SAM file. --- doc/cookbook/index.md | 9 +- doc/tutorial/10_sam_file/index.md | 37 ++--- .../sam_file_alignments_with_ref.cpp | 30 ---- .../sam_file_alignments_with_ref.err | 5 - .../sam_file_alignments_without_ref.cpp | 25 ---- .../sam_file_alignments_without_ref.err | 5 - .../10_sam_file/sam_file_solution2.cpp | 10 +- doc/tutorial/10_sam_file/sam_file_writing.cpp | 22 +-- .../io/sam_file/detail/format_sam_base.hpp | 70 --------- include/seqan3/io/sam_file/format_bam.hpp | 141 ++---------------- include/seqan3/io/sam_file/format_sam.hpp | 134 ++--------------- include/seqan3/io/sam_file/input.hpp | 48 ++---- .../io/sam_file/input_format_concept.hpp | 3 - include/seqan3/io/sam_file/output.hpp | 10 +- .../io/sam_file/output_format_concept.hpp | 6 - include/seqan3/io/sam_file/record.hpp | 23 --- ...ction_without_automatic_type_deduction.cpp | 1 - test/unit/io/sam_file/format_bam_test.cpp | 139 +++++++---------- test/unit/io/sam_file/format_sam_test.cpp | 28 +--- .../sam_file_format_test_template.hpp | 122 ++------------- test/unit/io/sam_file/sam_file_input_test.cpp | 128 +--------------- .../unit/io/sam_file/sam_file_output_test.cpp | 5 +- .../unit/io/sam_file/sam_file_record_test.cpp | 46 ++---- test/unit/io/sam_file/sam_file_seek_test.cpp | 3 - 24 files changed, 175 insertions(+), 875 deletions(-) delete mode 100644 doc/tutorial/10_sam_file/sam_file_alignments_with_ref.cpp delete mode 100644 doc/tutorial/10_sam_file/sam_file_alignments_with_ref.err delete mode 100644 doc/tutorial/10_sam_file/sam_file_alignments_without_ref.cpp delete mode 100644 doc/tutorial/10_sam_file/sam_file_alignments_without_ref.err diff --git a/doc/cookbook/index.md b/doc/cookbook/index.md index 763799bf314..52fa869eb32 100644 --- a/doc/cookbook/index.md +++ b/doc/cookbook/index.md @@ -137,14 +137,7 @@ To search for either 1 insertion or 1 deletion you can use the seqan3::search_cf \snippet doc/tutorial/09_search/search_small_snippets.cpp error_search -# Reading the CIGAR information into an actual alignment - -In SeqAn, the conversion from a CIGAR string to an alignment (two `seqan3::aligned_sequence`s) is done automatically for -you. You can access the resulting alignment via seqan3::sam_record::alignment: - -\snippet doc/tutorial/10_sam_file/sam_file_alignments_without_ref.cpp main - -# Combining sequence and alignment files +# Reading the CIGAR information from a SAM file and construct an alignment This recipe can be used to: 1. Read in a FASTA file with the reference and a SAM file with the alignment diff --git a/doc/tutorial/10_sam_file/index.md b/doc/tutorial/10_sam_file/index.md index 1e208ea220e..463a832ba04 100644 --- a/doc/tutorial/10_sam_file/index.md +++ b/doc/tutorial/10_sam_file/index.md @@ -65,7 +65,7 @@ To make things clearer, here is the table of SAM columns and the corresponding f | 3 | RNAME | seqan3::sam_record::reference_id | seqan3::field::ref_id | | 4 | POS | seqan3::sam_record::reference_position | seqan3::field::ref_offset | | 5 | MAPQ | seqan3::sam_record::mapping_quality | seqan3::field::mapq | -| 6 | CIGAR | implicitly stored in seqan3::sam_record::alignment
explicitly stored in seqan3::sam_record::cigar_sequence | seqan3::field::alignment
seqan3::field::cigar | +| 6 | CIGAR | seqan3::sam_record::cigar_sequence | seqan3::field::cigar | | 7 | RNEXT | seqan3::sam_record::mate_reference_id | seqan3::field::mate | | 8 | PNEXT | seqan3::sam_record::mate_position | seqan3::field::mate | | 9 | TLEN | seqan3::sam_record::template_length | seqan3::field::mate | @@ -138,14 +138,12 @@ Average: 27.4 \endsolution -# Alignment representation in the SAM format - -In SeqAn, we represent an alignment as a tuple of two `seqan3::aligned_sequence`s. +# Alignment representation in SAM/BAM files The SAM format is the common output format of read mappers where you align short read sequences to one or more large reference sequences. In fact, the SAM format stores those alignment information only partially: -It **does not store the reference sequence** but only the read sequence and a *CIGAR* string +It **does not store the reference sequence** but only the query/read sequence and a *CIGAR* string representing the alignment based on the read. Take this SAM record as an example: @@ -183,26 +181,17 @@ into a std::vector\: \snippet doc/tutorial/10_sam_file/sam_file_read_cigar.cpp main -## Reading the CIGAR information into an actual alignment - -In SeqAn, the conversion from a CIGAR string to an alignment (two seqan3::aligned_sequence's) is done automatically for you. -You can access it by accessing seqan3::sam_record::alignment from the record: - -\snippet doc/tutorial/10_sam_file/sam_file_alignments_without_ref.cpp main +## Transforming the CIGAR information into an seqan3 alignment -In the example above, you can only safely access the aligned read. - -\attention The **unknown** aligned reference sequence at the first position in the alignment tuple cannot be accessed - (e.g. via the `operator[]`). It is represented by a dummy type that throws on access. +In SeqAn, we represent an alignment as a tuple of two `seqan3::aligned_sequence`s. -Although the SAM format does not handle reference sequence information, -you can provide these information to the seqan3::sam_file_input which automatically fills the alignment object. -You can pass reference ids and reference sequences as additional constructor parameters: +The conversion from a CIGAR string to an alignment can be done with the function `seqan3::alignment_from_cigar`. +You need to pass the reference sequence with the position the read was aligned to and the read sequence: -\snippet doc/tutorial/10_sam_file/sam_file_alignments_with_ref.cpp main +\include snippet/alignment/cigar_conversion/alignment_from_cigar_io.cpp The code will print the following: -\include doc/tutorial/10_sam_file/sam_file_sam_record.err +\include snippet/alignment/cigar_conversion/alignment_from_cigar_io.err \assignment{Assignment 2: Combining sequence and alignment files} @@ -218,7 +207,7 @@ Only use * seqan3::sam_record::id, * seqan3::sam_record::reference_id, * seqan3::sam_record::mapping_quality, and -* seqan3::sam_record::alignment. +* seqan3::sam_record::cigar. With that information do the following: * Filter the alignment records and only take those with a mapping quality >= 30. @@ -226,6 +215,12 @@ With that information do the following: * For the resulting alignments, print which read was mapped against which reference id and the number of `seqan3::gap`s in each sequence (aligned reference and read sequence). +Hint: + +Given your reference sequences, you need to know which reference sequence to use for the alignment. +For this purpose, you can access `record.reference_id()` which gives you the position of the respective reference +sequence. + Your program should print the following: ``` diff --git a/doc/tutorial/10_sam_file/sam_file_alignments_with_ref.cpp b/doc/tutorial/10_sam_file/sam_file_alignments_with_ref.cpp deleted file mode 100644 index 3cee4dcdc6a..00000000000 --- a/doc/tutorial/10_sam_file/sam_file_alignments_with_ref.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#include -seqan3::test::create_temporary_snippet_file example_sam{"example.sam", - R"//![sam_file]( -@HD VN:1.6 SO:coordinate -@SQ SN:ref LN:45 -r001 99 ref 7 30 8M2I4M1D3M = 37 39 TTAGATAAAGGATACTG * -r003 0 ref 9 30 5S6M * 0 0 GCCTAAGCTAA * -r004 0 ref 16 30 6M14N5M * 0 0 ATAGCTTCAGC * -r003 2064 ref 29 17 5M * 0 0 TAGGC * -r001 147 ref 37 30 9M = 7 -39 CAGCGGCAT * NM:i:1 -)//![sam_file]"}; // std::filesystem::current_path() / "example.sam" will be deleted after the execution - -//![main] -#include - -int main() -{ - using namespace seqan3::literals; - - auto filename = std::filesystem::current_path() / "example.sam"; - - std::vector ref_ids{"ref"}; // list of one reference name - std::vector ref_sequences{"AGAGTTCGAGATCGAGGACTAGCGACGAGGCAGCGAGCGATCGAT"_dna5}; - - seqan3::sam_file_input fin{filename, ref_ids, ref_sequences}; - - for (auto & record : fin) - seqan3::debug_stream << record.alignment() << '\n'; // Now you can print the whole alignment! -} -//![main] diff --git a/doc/tutorial/10_sam_file/sam_file_alignments_with_ref.err b/doc/tutorial/10_sam_file/sam_file_alignments_with_ref.err deleted file mode 100644 index 7079da13894..00000000000 --- a/doc/tutorial/10_sam_file/sam_file_alignments_with_ref.err +++ /dev/null @@ -1,5 +0,0 @@ -(CGAGATCG--AGGACTAG,TTAGATAAAGGATA-CTG) -(AGATCG,AGCTAA) -(GGACTAGCGACGAGGCAGCGAGCGA,ATAGCT--------------TCAGC) -(GGCAG,TAGGC) -(GCGATCGAT,CAGCGGCAT) diff --git a/doc/tutorial/10_sam_file/sam_file_alignments_without_ref.cpp b/doc/tutorial/10_sam_file/sam_file_alignments_without_ref.cpp deleted file mode 100644 index bab3e97c400..00000000000 --- a/doc/tutorial/10_sam_file/sam_file_alignments_without_ref.cpp +++ /dev/null @@ -1,25 +0,0 @@ -#include -seqan3::test::create_temporary_snippet_file example_sam{"example.sam", - R"//![sam_file]( -@HD VN:1.6 SO:coordinate -@SQ SN:ref LN:45 -r001 99 ref 7 30 8M2I4M1D3M = 37 39 TTAGATAAAGGATACTG * -r003 0 ref 9 30 5S6M * 0 0 GCCTAAGCTAA * -r004 0 ref 16 30 6M14N5M * 0 0 ATAGCTTCAGC * -r003 2064 ref 29 17 5M * 0 0 TAGGC * -r001 147 ref 37 30 9M = 7 -39 CAGCGGCAT * NM:i:1 -)//![sam_file]"}; // std::filesystem::current_path() / "example.sam" will be deleted after the execution - -//![main] -#include - -int main() -{ - auto filename = std::filesystem::current_path() / "example.sam"; - - seqan3::sam_file_input fin{filename}; - - for (auto & record : fin) - seqan3::debug_stream << record.id() << ": " << std::get<1>(record.alignment()) << '\n'; -} -//![main] diff --git a/doc/tutorial/10_sam_file/sam_file_alignments_without_ref.err b/doc/tutorial/10_sam_file/sam_file_alignments_without_ref.err deleted file mode 100644 index 1861652a3ba..00000000000 --- a/doc/tutorial/10_sam_file/sam_file_alignments_without_ref.err +++ /dev/null @@ -1,5 +0,0 @@ -r001: TTAGATAAAGGATA-CTG -r003: AGCTAA -r004: ATAGCT--------------TCAGC -r003: TAGGC -r001: CAGCGGCAT diff --git a/doc/tutorial/10_sam_file/sam_file_solution2.cpp b/doc/tutorial/10_sam_file/sam_file_solution2.cpp index 5144fe1cafe..851f23bae50 100644 --- a/doc/tutorial/10_sam_file/sam_file_solution2.cpp +++ b/doc/tutorial/10_sam_file/sam_file_solution2.cpp @@ -25,6 +25,7 @@ r003 2064 chr2 18 10 5M * 0 0 TAGGC * #include #include +#include #include #include #include @@ -59,14 +60,19 @@ int main() for (auto & record : mapping_file | mapq_filter) { + auto alignment = seqan3::alignment_from_cigar(record.cigar_sequence(), + reference_sequences[record.reference_id().value()], + record.reference_position().value(), + record.sequence()); + // as loop size_t sum_reference{}; - for (auto const & char_reference : std::get<0>(record.alignment())) + for (auto const & char_reference : std::get<0>(alignment)) if (char_reference == seqan3::gap{}) ++sum_reference; // or via std::ranges::count - size_t sum_read = std::ranges::count(std::get<1>(record.alignment()), seqan3::gap{}); + size_t sum_read = std::ranges::count(std::get<1>(alignment), seqan3::gap{}); // The reference_id is ZERO based and an optional. -1 is represented by std::nullopt (= reference not known). std::optional reference_id = record.reference_id(); diff --git a/doc/tutorial/10_sam_file/sam_file_writing.cpp b/doc/tutorial/10_sam_file/sam_file_writing.cpp index c2af9ceecd5..591538d5d9e 100644 --- a/doc/tutorial/10_sam_file/sam_file_writing.cpp +++ b/doc/tutorial/10_sam_file/sam_file_writing.cpp @@ -5,12 +5,6 @@ seqan3::test::create_temporary_snippet_file example_sam{"out.sam", ""}; //![main] #include -using aligned_sequence_type = std::vector>; -using alignment_type = std::pair; - -using types = seqan3::type_list, std::string, alignment_type>; -using fields = seqan3::fields; - int main() { using namespace seqan3::literals; @@ -18,6 +12,9 @@ int main() auto filename = std::filesystem::current_path() / "out.sam"; seqan3::sam_file_output fout{filename}; + + using types = seqan3::type_list, std::string, std::vector>; + using fields = seqan3::fields; using sam_record_type = seqan3::sam_record; // write the following to the file @@ -25,15 +22,10 @@ int main() sam_record_type record{}; record.id() = "r001"; record.sequence() = "ACGTACGT"_dna5; - auto & [reference_sequence, read_sequence] = record.alignment(); - - // ACGT--GTTT - seqan3::assign_unaligned(reference_sequence, "ACGTGTTT"_dna5); - seqan3::insert_gap(reference_sequence, reference_sequence.begin() + 4, 2); - - // ACGTACGT-- - seqan3::assign_unaligned(read_sequence, record.sequence()); - seqan3::insert_gap(read_sequence, read_sequence.end(), 2); + record.cigar_sequence() = {{4, 'M'_cigar_operation}, + {2, 'I'_cigar_operation}, + {2, 'M'_cigar_operation}, + {2, 'D'_cigar_operation}}; fout.push_back(record); } diff --git a/include/seqan3/io/sam_file/detail/format_sam_base.hpp b/include/seqan3/io/sam_file/detail/format_sam_base.hpp index 51390a4938d..a76f28af510 100644 --- a/include/seqan3/io/sam_file/detail/format_sam_base.hpp +++ b/include/seqan3/io/sam_file/detail/format_sam_base.hpp @@ -74,14 +74,6 @@ class format_sam_base header_type & header, ref_seqs_type & /*tag*/); - template - void construct_alignment(align_type & align, - std::vector & cigar_vector, - [[maybe_unused]] int32_t rid, - [[maybe_unused]] ref_seqs_type & ref_seqs, - [[maybe_unused]] int32_t ref_start, - size_t ref_length); - void transfer_soft_clipping_to(std::vector const & cigar_vector, int32_t & sc_begin, int32_t & sc_end) const; template @@ -199,68 +191,6 @@ inline void format_sam_base::transfer_soft_clipping_to(std::vector const sc_end = cigar_count_at(second_last_index); } -/*!\brief Construct the field::alignment depending on the given information. - * \tparam align_type The alignment type. - * \tparam ref_seqs_type The type of reference sequences (might decay to ignore). - * \param[in,out] align The alignment (pair of aligned sequences) to fill. - * \param[in] cigar_vector The cigar information to convert to an alignment. - * \param[in] rid The index of the reference sequence in header.ref_ids(). - * \param[in] ref_seqs The reference sequence information. - * \param[in] ref_start The start position of the alignment in the reference sequence. - * \param[in] ref_length The length of the aligned reference sequence. - */ -template -inline void format_sam_base::construct_alignment(align_type & align, - std::vector & cigar_vector, - [[maybe_unused]] int32_t rid, - [[maybe_unused]] ref_seqs_type & ref_seqs, - [[maybe_unused]] int32_t ref_start, - size_t ref_length) -{ - if (rid > -1 && ref_start > -1 && // read is mapped - !cigar_vector.empty() && // alignment field was not empty - !std::ranges::empty(get<1>(align))) // seq field was not empty - { - if constexpr (!detail::decays_to_ignore_v) - { - assert(static_cast(ref_start + ref_length) <= std::ranges::size(ref_seqs[rid])); - // copy over unaligned reference sequence part - assign_unaligned(get<0>(align), ref_seqs[rid] | views::slice(ref_start, ref_start + ref_length)); - } - else - { - using unaligned_t = std::remove_cvref_t(align))>>; - auto dummy_seq = views::repeat_n(std::ranges::range_value_t{}, ref_length) - | std::views::transform(detail::access_restrictor_fn{}); - static_assert(std::same_as, - "No reference information was given so the type of the first alignment tuple position" - "must have an unaligned sequence type of a dummy sequence (" - "views::repeat_n(dna5{}, size_t{}) | " - "std::views::transform(detail::access_restrictor_fn{}))"); - - assign_unaligned(get<0>(align), dummy_seq); // assign dummy sequence - } - - // insert gaps according to the cigar information - detail::alignment_from_cigar(align, cigar_vector); - } - else // not enough information for an alignment, assign an empty view/dummy_sequence - { - if constexpr (!detail::decays_to_ignore_v) // reference info given - { - assert(std::ranges::size(ref_seqs) > 0); // we assume that the given ref info is not empty - assign_unaligned(get<0>(align), ref_seqs[0] | views::slice(0, 0)); - } - else - { - using unaligned_t = std::remove_cvref_t(align))>>; - assign_unaligned(get<0>(align), - views::repeat_n(std::ranges::range_value_t{}, 0) - | std::views::transform(detail::access_restrictor_fn{})); - } - } -} - /*!\brief Reads std::byte fields using std::from_chars. * \tparam stream_view_t The type of the stream as a view. * diff --git a/include/seqan3/io/sam_file/format_bam.hpp b/include/seqan3/io/sam_file/format_bam.hpp index e5719d52660..bd7917006db 100644 --- a/include/seqan3/io/sam_file/format_bam.hpp +++ b/include/seqan3/io/sam_file/format_bam.hpp @@ -77,7 +77,6 @@ class format_bam : private detail::format_sam_base typename ref_seq_type, typename ref_id_type, typename ref_offset_type, - typename align_type, typename cigar_type, typename flag_type, typename mapq_type, @@ -98,7 +97,6 @@ class format_bam : private detail::format_sam_base ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq), ref_id_type & ref_id, ref_offset_type & ref_offset, - align_type & align, cigar_type & cigar_vector, flag_type & flag, mapq_type & mapq, @@ -113,7 +111,6 @@ class format_bam : private detail::format_sam_base typename id_type, typename ref_seq_type, typename ref_id_type, - typename align_type, typename cigar_type, typename qual_type, typename mate_type, @@ -128,7 +125,6 @@ class format_bam : private detail::format_sam_base [[maybe_unused]] ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq), [[maybe_unused]] ref_id_type && ref_id, [[maybe_unused]] std::optional ref_offset, - [[maybe_unused]] align_type && align, [[maybe_unused]] cigar_type && cigar_vector, [[maybe_unused]] sam_flag const flag, [[maybe_unused]] uint8_t const mapq, @@ -251,7 +247,6 @@ template tmp_cigar_vector{}; - [[maybe_unused]] int32_t ref_length{0}, seq_length{0}; // length of aligned part for ref and query + [[maybe_unused]] int32_t offset_tmp{}; // needed in case the cigar string was stored in the tag dictionary + [[maybe_unused]] int32_t ref_length{0}; // needed in case the cigar string was stored in the tag dictionary // Header // ------------------------------------------------------------------------------------------------------------- @@ -430,11 +421,12 @@ format_bam::read_alignment_record(stream_type & stream, // read cigar string // ------------------------------------------------------------------------------------------------------------- - if constexpr (!detail::decays_to_ignore_v || !detail::decays_to_ignore_v) + if constexpr (!detail::decays_to_ignore_v) { - std::tie(tmp_cigar_vector, ref_length, seq_length) = parse_binary_cigar(stream_view, core.n_cigar_op); - transfer_soft_clipping_to(tmp_cigar_vector, offset_tmp, soft_clipping_end); - // the actual cigar_vector is swapped with tmp_cigar_vector at the end to avoid copying + int32_t seq_length{0}; + std::tie(cigar_vector, ref_length, seq_length) = parse_binary_cigar(stream_view, core.n_cigar_op); + int32_t soft_clipping_end{}; + transfer_soft_clipping_to(cigar_vector, offset_tmp, soft_clipping_end); } else { @@ -464,56 +456,7 @@ format_bam::read_alignment_record(stream_type & stream, ++std::ranges::begin(stream_view); }; - if constexpr (!detail::decays_to_ignore_v) - { - static_assert(sequence_container(align))>>, - "If you want to read ALIGNMENT but not SEQ, the alignment" - " object must store a sequence container at the second (query) position."); - - if (!tmp_cigar_vector.empty()) // only parse alignment if cigar information was given - { - assert(core.l_seq == (seq_length + offset_tmp + soft_clipping_end)); // sanity check - using alph_t = std::ranges::range_value_t(align))>; - constexpr auto from_dna16 = detail::convert_through_char_representation; - - get<1>(align).reserve(seq_length); - - auto tmp_iter = std::ranges::begin(seq_stream); - std::ranges::advance(tmp_iter, offset_tmp / 2); // skip soft clipped bases at the beginning - - if (offset_tmp & 1) - { - get<1>(align).push_back(from_dna16[to_rank(get<1>(*tmp_iter))]); - ++tmp_iter; - --seq_length; - } - - for (size_t i = (seq_length / 2); i > 0; --i) - { - get<1>(align).push_back(from_dna16[to_rank(get<0>(*tmp_iter))]); - get<1>(align).push_back(from_dna16[to_rank(get<1>(*tmp_iter))]); - ++tmp_iter; - } - - if (seq_length & 1) - { - get<1>(align).push_back(from_dna16[to_rank(get<0>(*tmp_iter))]); - ++tmp_iter; - --soft_clipping_end; - } - - std::ranges::advance(tmp_iter, (soft_clipping_end + !(seq_length & 1)) / 2); - } - else - { - skip_sequence_bytes(); - get<1>(align) = std::remove_reference_t(align))>{}; // assign empty container - } - } - else - { - skip_sequence_bytes(); - } + skip_sequence_bytes(); } else { @@ -533,14 +476,6 @@ format_bam::read_alignment_record(stream_type & stream, seq.push_back(from_dna16[to_rank(d)]); ++std::ranges::begin(stream_view); } - - if constexpr (!detail::decays_to_ignore_v) - { - assign_unaligned(get<1>(align), - seq - | views::slice(static_cast>(offset_tmp), - std::ranges::distance(seq) - soft_clipping_end)); - } } } @@ -574,7 +509,7 @@ format_bam::read_alignment_record(stream_type & stream, // DONE READING - wrap up // ------------------------------------------------------------------------------------------------------------- - if constexpr (!detail::decays_to_ignore_v || !detail::decays_to_ignore_v) + if constexpr (!detail::decays_to_ignore_v) { // Check cigar, if it matches ‘kSmN’, where ‘k’ equals lseq, ‘m’ is the reference sequence length in the // alignment, and ‘S’ and ‘N’ are the soft-clipping and reference-clip, then the cigar string was larger @@ -607,29 +542,15 @@ format_bam::read_alignment_record(stream_type & stream, "record.")}; auto cigar_view = std::views::all(std::get(it->second)); - std::tie(tmp_cigar_vector, ref_length, seq_length) = detail::parse_cigar(cigar_view); - offset_tmp = soft_clipping_end = 0; - transfer_soft_clipping_to(tmp_cigar_vector, offset_tmp, soft_clipping_end); + int32_t seq_length{0}; + std::tie(cigar_vector, ref_length, seq_length) = detail::parse_cigar(cigar_view); + offset_tmp = 0; + int32_t soft_clipping_end{}; + transfer_soft_clipping_to(cigar_vector, offset_tmp, soft_clipping_end); tag_dict.erase(it); // remove redundant information - - if constexpr (!detail::decays_to_ignore_v) - { - assign_unaligned( - get<1>(align), - seq - | views::slice(static_cast>(offset_tmp), - std::ranges::distance(seq) - soft_clipping_end)); - } } } } - - // Alignment object construction - if constexpr (!detail::decays_to_ignore_v) - construct_alignment(align, tmp_cigar_vector, core.refID, ref_seqs, core.pos, ref_length); // inherited from SAM - - if constexpr (!detail::decays_to_ignore_v) - std::swap(cigar_vector, tmp_cigar_vector); } //!\copydoc seqan3::sam_file_output_format::write_alignment_record @@ -639,7 +560,6 @@ template ref_offset, - [[maybe_unused]] align_type && align, [[maybe_unused]] cigar_type && cigar_vector, [[maybe_unused]] sam_flag const flag, [[maybe_unused]] uint8_t const mapq, @@ -686,16 +605,6 @@ inline void format_bam::write_alignment_record([[maybe_unused]] stream_type & st "over letters that model seqan3::alphabet or an integral or a std::optional."); } - static_assert(tuple_like>, - "The align object must be a std::pair of two ranges whose " - "value_type is comparable to seqan3::gap"); - - static_assert((std::tuple_size_v> == 2 - && std::equality_comparable_with(align))>> - && std::equality_comparable_with(align))>>), - "The align object must be a std::pair of two ranges whose " - "value_type is comparable to seqan3::gap"); - static_assert((std::ranges::forward_range && alphabet>), "The qual object must be a std::ranges::forward_range " "over letters that model seqan3::alphabet."); @@ -775,38 +684,20 @@ inline void format_bam::write_alignment_record([[maybe_unused]] stream_type & st // --------------------------------------------------------------------- int32_t ref_length{}; - // if alignment is non-empty, replace cigar_vector. - // else, compute the ref_length from given cigar_vector which is needed to fill field `bin`. + // Compute the ref_length from given cigar_vector which is needed to fill field `bin`. if (!std::ranges::empty(cigar_vector)) { int32_t dummy_seq_length{}; for (auto & [count, operation] : cigar_vector) detail::update_alignment_lengths(ref_length, dummy_seq_length, operation.to_char(), count); } - else if (!std::ranges::empty(get<0>(align)) && !std::ranges::empty(get<1>(align))) - { - ref_length = std::ranges::distance(get<1>(align)); - - // compute possible distance from alignment end to sequence end - // which indicates soft clipping at the end. - // This should be replaced by a free count_gaps function for - // aligned sequences which is more efficient if possible. - int32_t off_end{static_cast(std::ranges::distance(seq)) - offset}; - - for (auto chr : get<1>(align)) - if (chr == gap{}) - ++off_end; - - off_end -= ref_length; - cigar_vector = detail::get_cigar_vector(align, offset, off_end); - } if (cigar_vector.size() >= (1 << 16)) // must be written into the sam tag CG { tag_dict["CG"_tag] = detail::get_cigar_string(cigar_vector); cigar_vector.resize(2); cigar_vector[0] = cigar{static_cast(std::ranges::distance(seq)), 'S'_cigar_operation}; - cigar_vector[1] = cigar{static_cast(std::ranges::distance(get<1>(align))), 'N'_cigar_operation}; + cigar_vector[1] = cigar{static_cast(ref_length), 'N'_cigar_operation}; } std::string tag_dict_binary_str = get_tag_dict_str(tag_dict); diff --git a/include/seqan3/io/sam_file/format_sam.hpp b/include/seqan3/io/sam_file/format_sam.hpp index 07f4958ebf6..5cd86aa11f3 100644 --- a/include/seqan3/io/sam_file/format_sam.hpp +++ b/include/seqan3/io/sam_file/format_sam.hpp @@ -59,7 +59,7 @@ namespace seqan3 * ### fields_specialisation * * The SAM format provides the following fields: - * seqan3::field::alignment, seqan3::field::seq, seqan3::field::qual, + * seqan3::field::seq, seqan3::field::qual, * seqan3::field::id, seqan3::field::ref_seq, seqan3::field::ref_id * seqan3::field::ref_ossfet, seqan3::field::offset, seqan3::field::flag, * seqan3::field::mapq and seqan3::field::mate. @@ -81,7 +81,7 @@ namespace seqan3 * | 3 | RNAME | seqan3::field::ref_id | * | 4 | POS | seqan3::field::ref_offset | * | 5 | MAPQ | seqan3::field::mapq | - * | 6 | CIGAR | implicilty stored in seqan3::field::alignment | + * | 6 | CIGAR | seqan3::field::cigar | * | 7 | RNEXT | seqan3::field::mate (tuple pos 0) | * | 8 | PNEXT | seqan3::field::mate (tuple pos 1) | * | 9 | TLEN | seqan3::field::mate (tuple pos 2) | @@ -93,10 +93,6 @@ namespace seqan3 * deduced by how much the read sequence length + offset is larger than the * alignment length). * - * Note: SeqAn currently does not support hard clipping. When reading SAM, - * hard-clipping is discarded; but the resulting alignment/sequence combination - * is still valid. - * * ### Format Check * * The format checks are implemented according to the official @@ -169,7 +165,6 @@ class format_sam : private detail::format_sam_base typename ref_seq_type, typename ref_id_type, typename ref_offset_type, - typename align_type, typename cigar_type, typename flag_type, typename mapq_type, @@ -190,7 +185,6 @@ class format_sam : private detail::format_sam_base ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq), ref_id_type & ref_id, ref_offset_type & ref_offset, - align_type & align, cigar_type & cigar_vector, flag_type & flag, mapq_type & mapq, @@ -205,7 +199,6 @@ class format_sam : private detail::format_sam_base typename id_type, typename ref_seq_type, typename ref_id_type, - typename align_type, typename qual_type, typename mate_type, typename tag_dict_type, @@ -221,7 +214,6 @@ class format_sam : private detail::format_sam_base ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq), ref_id_type && ref_id, std::optional ref_offset, - align_type && align, std::vector const & cigar_vector, sam_flag const flag, uint8_t const mapq, @@ -311,7 +303,6 @@ inline void format_sam::read_sequence_record(stream_type & stream, std::ignore, std::ignore, std::ignore, - std::ignore, std::ignore); } @@ -337,7 +328,6 @@ inline void format_sam::write_sequence_record(stream_type & stream, id_type && id, qual_type && qualities) { - using default_align_t = std::pair>, std::span>>; using default_mate_t = std::tuple, int32_t>; sam_file_output_options output_options; @@ -352,7 +342,6 @@ inline void format_sam::write_sequence_record(stream_type & stream, /*ref_seq*/ std::string_view{}, /*ref_id*/ std::string_view{}, /*ref_offset*/ -1, - /*align*/ default_align_t{}, /*cigar_vector*/ std::vector{}, /*flag*/ sam_flag::none, /*mapq*/ 0, @@ -374,7 +363,6 @@ template ); - // these variables need to be stored to compute the ALIGNMENT - int32_t ref_offset_tmp{}; - std::ranges::range_value_t ref_id_tmp{}; - [[maybe_unused]] int32_t offset_tmp{}; - [[maybe_unused]] int32_t soft_clipping_end{}; - [[maybe_unused]] std::vector tmp_cigar_vector{}; - [[maybe_unused]] int32_t ref_length{0}, seq_length{0}; // length of aligned part for ref and query + int32_t ref_offset_tmp{}; // needed to read the ref_offset (int) beofre storing it in std::optional + std::ranges::range_value_t ref_id_tmp{}; // in case mate is requested but ref_offset not // Header // ------------------------------------------------------------------------------------------------------------- @@ -464,13 +446,16 @@ format_sam::read_alignment_record(stream_type & stream, // Field 6: CIGAR // ------------------------------------------------------------------------------------------------------------- - if constexpr (!detail::decays_to_ignore_v || !detail::decays_to_ignore_v) + if constexpr (!detail::decays_to_ignore_v) { if (!is_char<'*'>(*std::ranges::begin(stream_view))) // no cigar information given { - std::tie(tmp_cigar_vector, ref_length, seq_length) = detail::parse_cigar(field_view); - transfer_soft_clipping_to(tmp_cigar_vector, offset_tmp, soft_clipping_end); - // the actual cigar_vector is swapped with tmp_cigar_vector at the end to avoid copying + int32_t ref_length{0}, seq_length{0}; // length of aligned part for ref and query + std::tie(cigar_vector, ref_length, seq_length) = detail::parse_cigar(field_view); + int32_t soft_clipping_end{}; + int32_t offset_tmp{}; + transfer_soft_clipping_to(cigar_vector, offset_tmp, soft_clipping_end); + offset = offset_tmp; } else { @@ -482,8 +467,6 @@ format_sam::read_alignment_record(stream_type & stream, detail::consume(field_view); } - offset = offset_tmp; - // Field 7-9: (RNEXT PNEXT TLEN) = MATE // ------------------------------------------------------------------------------------------------------------- if constexpr (!detail::decays_to_ignore_v) @@ -541,52 +524,11 @@ format_sam::read_alignment_record(stream_type & stream, if constexpr (detail::decays_to_ignore_v) { - if constexpr (!detail::decays_to_ignore_v) - { - static_assert(sequence_container(align))>>, - "If you want to read ALIGNMENT but not SEQ, the alignment" - " object must store a sequence container at the second (query) position."); - - if (!tmp_cigar_vector.empty()) // only parse alignment if cigar information was given - { - - auto tmp_iter = std::ranges::begin(seq_stream); - std::ranges::advance(tmp_iter, offset_tmp); - - for (; seq_length > 0; --seq_length) // seq_length is not needed anymore - { - get<1>(align).push_back( - std::ranges::range_value_t(align))>{}.assign_char(*tmp_iter)); - ++tmp_iter; - } - - std::ranges::advance(tmp_iter, soft_clipping_end); - } - else - { - get<1>(align) = std::remove_reference_t(align))>{}; // empty container - } - } - else - { - detail::consume(seq_stream); - } + detail::consume(seq_stream); } else { read_forward_range_field(seq_stream, seq); - - if constexpr (!detail::decays_to_ignore_v) - { - if (!tmp_cigar_vector - .empty()) // if no alignment info is given, the field::alignment should remain empty - { - assign_unaligned(get<1>(align), - seq - | views::slice(static_cast(offset_tmp), - std::ranges::size(seq) - soft_clipping_end)); - } - } } } else @@ -629,29 +571,6 @@ format_sam::read_alignment_record(stream_type & stream, } detail::consume(stream_view | detail::take_until(!(is_char<'\r'> || is_char<'\n'>))); // consume new line - - // DONE READING - wrap up - // ------------------------------------------------------------------------------------------------------------- - // Alignment object construction - // Note that the query sequence in get<1>(align) has already been filled while reading Field 10. - if constexpr (!detail::decays_to_ignore_v) - { - int32_t ref_idx{(ref_id_tmp.empty() /*unmapped read?*/) ? -1 : 0}; - - if constexpr (!detail::decays_to_ignore_v) - { - if (!ref_id_tmp.empty()) - { - assert(header.ref_dict.count(ref_id_tmp) != 0); // taken care of in check_and_assign_ref_id() - ref_idx = header.ref_dict[ref_id_tmp]; // get index for reference sequence - } - } - - construct_alignment(align, tmp_cigar_vector, ref_idx, ref_seqs, ref_offset_tmp, ref_length); - } - - if constexpr (!detail::decays_to_ignore_v) - std::swap(cigar_vector, tmp_cigar_vector); } //!\copydoc sam_file_output_format::write_alignment_record @@ -661,7 +580,6 @@ template ref_offset, - align_type && align, std::vector const & cigar_vector, sam_flag const flag, uint8_t const mapq, @@ -723,16 +640,6 @@ inline void format_sam::write_alignment_record(stream_type & stream, "If you give indices as reference id information the header must also be present."); } - static_assert(tuple_like>, - "The align object must be a std::pair of two ranges whose " - "value_type is comparable to seqan3::gap"); - - static_assert((std::tuple_size_v> == 2 - && std::equality_comparable_with(align))>> - && std::equality_comparable_with(align))>>), - "The align object must be a std::pair of two ranges whose " - "value_type is comparable to seqan3::gap"); - static_assert((std::ranges::forward_range && alphabet>), "The qual object must be a std::ranges::forward_range " "over letters that model seqan3::alphabet."); @@ -867,23 +774,6 @@ inline void format_sam::write_alignment_record(stream_type & stream, for (auto & c : cigar_vector) //TODO THIS IS PROBABLY TERRIBLE PERFORMANCE_WISE stream_it.write_range(c.to_string()); } - else if (!std::ranges::empty(get<0>(align)) && !std::ranges::empty(get<1>(align))) - { - // compute possible distance from alignment end to sequence end - // which indicates soft clipping at the end. - // This should be replace by a free count_gaps function for - // aligned sequences which is more efficient if possible. - size_t off_end{std::ranges::size(seq) - offset}; - for (auto chr : get<1>(align)) - if (chr == gap{}) - ++off_end; - - // Might happen if get<1>(align) doesn't correspond to the reference. - assert(off_end >= std::ranges::size(get<1>(align))); - off_end -= std::ranges::size(get<1>(align)); - - write_range_or_asterisk(stream_it, detail::get_cigar_string(align, offset, off_end)); - } else { *stream_it = '*'; diff --git a/include/seqan3/io/sam_file/input.hpp b/include/seqan3/io/sam_file/input.hpp index b828a8bd641..66f63ce813c 100644 --- a/include/seqan3/io/sam_file/input.hpp +++ b/include/seqan3/io/sam_file/input.hpp @@ -21,7 +21,6 @@ #include #include -#include #include #include #include @@ -149,16 +148,6 @@ concept sam_file_input_traits = // field::evalue is fixed to double // field::bitscore is fixed to double // field::mate is fixed to std::tuple, ref_offset_type, int32_t> - - // field::alignment - // the alignment type cannot be configured. - // Type of tuple entry 1 (reference) is set to - // 1) a std::ranges::subrange over std::ranges::range_value_t if reference information was given - // or 2) a "dummy" sequence type: - // views::repeat_n(sequence_alphabet{}, size_t{}) | std::views::transform(detail::access_restrictor_fn{}) - // Type of tuple entry 2 (query) is set to - // 1) a std::ranges::subrange over std::ranges::range_value_t if reference information was given - // or 2) a "dummy" sequence type: }; //!\endcond @@ -222,7 +211,7 @@ struct sam_file_input_default_traits // sam_file_input // --------------------------------------------------------------------------------------------------------------------- -/*!\brief A class for reading alignment files, e.g. SAM, BAM, BLAST ... +/*!\brief A class for reading SAM files, both SAM and its binary representation BAM are supported. * \ingroup io_sam_file * \tparam traits_type An auxiliary type that defines certain member types and constants, must model * seqan3::sam_file_input_traits. @@ -243,7 +232,6 @@ template , field::offset, field::ref_id, field::ref_offset, - field::alignment, field::cigar, field::mapq, field::qual, @@ -332,24 +320,13 @@ class sam_file_input //!\brief The type of field::header_ptr (default: sam_file_header). using header_type = sam_file_header; -private: - //!\brief The type of the aligned query sequence (second type of the pair of alignment_type). - using alignment_query_type = std::conditional_t< - selected_field_ids::contains(field::seq), - gap_decorator() | views::slice(0, 0))>, - typename traits_type::template sequence_container>>; - public: - //!\brief The type of field::alignment (default: std::pair>, std::vector>>). - using alignment_type = std::tuple, alignment_query_type>; - //!\brief The previously defined types aggregated in a seqan3::type_list. using field_types = type_list, mapq_type, quality_type, @@ -360,20 +337,19 @@ class sam_file_input /*!\brief The subset of seqan3::field tags valid for this file; order corresponds to the types in \ref field_types. * - * The SAM file abstraction supports reading 12 different fields: + * The SAM file abstraction supports reading 11 different fields: * * 1. seqan3::field::seq * 2. seqan3::field::id * 3. seqan3::field::offset * 4. seqan3::field::ref_id * 5. seqan3::field::ref_offset - * 6. seqan3::field::alignment - * 7. seqan3::field::cigar - * 8. seqan3::field::mapq - * 9. seqan3::field::qual - * 10. seqan3::field::flag - * 11. seqan3::field::mate - * 12. seqan3::field::tags + * 6. seqan3::field::cigar + * 7. seqan3::field::mapq + * 8. seqan3::field::qual + * 9. seqan3::field::flag + * 10. seqan3::field::mate + * 11. seqan3::field::tags * * There exists one more field for SAM files, the seqan3::field::header_ptr, but this field is mostly used * internally. Please see the seqan3::sam_file_output::header member function for details on how to access @@ -384,7 +360,6 @@ class sam_file_input field::offset, field::ref_id, field::ref_offset, - field::alignment, field::cigar, field::mapq, field::qual, @@ -393,6 +368,10 @@ class sam_file_input field::tags, field::header_ptr>; + static_assert(!selected_field_ids::contains(field::alignment), + "The field::alignment is deprecated and only field::cigar is supported. Please see " + "seqan3::alignment_from_cigar on how to get an alignment from the cigar information."); + static_assert( []() constexpr { for (field f : selected_field_ids::as_array) @@ -400,7 +379,7 @@ class sam_file_input return false; return true; }(), - "You selected a field that is not valid for alignment files, please refer to the documentation " + "You selected a field that is not valid for SAM files, please refer to the documentation " "of sam_file_input::field_ids for the accepted values."); //!\brief The type of the record, a specialisation of seqan3::record; acts as a tuple of the selected field types. @@ -871,7 +850,6 @@ class sam_file_input detail::get_or_ignore(record_buffer), detail::get_or_ignore(record_buffer), detail::get_or_ignore(record_buffer), - detail::get_or_ignore(record_buffer), detail::get_or_ignore(record_buffer), detail::get_or_ignore(record_buffer), detail::get_or_ignore(record_buffer), diff --git a/include/seqan3/io/sam_file/input_format_concept.hpp b/include/seqan3/io/sam_file/input_format_concept.hpp index e32407dfa81..f0cdcef0998 100644 --- a/include/seqan3/io/sam_file/input_format_concept.hpp +++ b/include/seqan3/io/sam_file/input_format_concept.hpp @@ -88,7 +88,6 @@ concept sam_file_input_format = requires (detail::sam_file_input_format_exposer< dna5_vector & ref_seq, std::optional & ref_id, std::optional & ref_offset, - std::pair>, std::vector>> & align, std::vector & cigar, sam_flag & flag, uint8_t & mapq, @@ -112,7 +111,6 @@ concept sam_file_input_format = requires (detail::sam_file_input_format_exposer< ref_seq, ref_id, ref_offset, - align, cigar, flag, mapq, @@ -141,7 +139,6 @@ concept sam_file_input_format = requires (detail::sam_file_input_format_exposer< std::ignore, std::ignore, std::ignore, - std::ignore, std::ignore) }; }; diff --git a/include/seqan3/io/sam_file/output.hpp b/include/seqan3/io/sam_file/output.hpp index 3fee46743dc..dc986e4e1bd 100644 --- a/include/seqan3/io/sam_file/output.hpp +++ b/include/seqan3/io/sam_file/output.hpp @@ -44,7 +44,7 @@ namespace seqan3 // sam_file_output // ---------------------------------------------------------------------------- -/*!\brief A class for writing alignment files, e.g. SAM, BAL, BLAST, ... +/*!\brief A class for writing SAM files, both SAM and its binary representation BAM are supported. * \ingroup io_sam_file * \tparam selected_field_ids A seqan3::fields type with the list and order of * fields IDs; only relevant if these can't be deduced. @@ -62,7 +62,6 @@ template (r, std::string_view{}), detail::get_or(r, std::ignore), detail::get_or(r, std::optional{}), - detail::get_or(r, default_align_t{}), detail::get_or(r, std::vector{}), detail::get_or(r, sam_flag::none), detail::get_or(r, 0u), @@ -443,7 +440,6 @@ class sam_file_output detail::get_or(t, std::string_view{}), detail::get_or(t, std::ignore), detail::get_or(t, std::optional{}), - detail::get_or(t, default_align_t{}), detail::get_or(t, std::vector{}), detail::get_or(t, sam_flag::none), detail::get_or(t, 0u), @@ -659,7 +655,7 @@ class sam_file_output template void write_record(record_header_ptr_t && record_header_ptr, pack_type &&... remainder) { - static_assert((sizeof...(pack_type) == 15), "Wrong parameter list passed to write_record."); + static_assert((sizeof...(pack_type) == 14), "Wrong parameter list passed to write_record."); assert(!format.valueless_by_exception()); diff --git a/include/seqan3/io/sam_file/output_format_concept.hpp b/include/seqan3/io/sam_file/output_format_concept.hpp index e8fa5db246d..91cc1101301 100644 --- a/include/seqan3/io/sam_file/output_format_concept.hpp +++ b/include/seqan3/io/sam_file/output_format_concept.hpp @@ -18,7 +18,6 @@ #include #include -#include #include #include #include @@ -86,7 +85,6 @@ concept sam_file_output_format = requires (detail::sam_file_output_format_expose dna5_vector & ref_seq, std::optional & ref_id, std::optional & ref_offset, - std::pair>, std::vector>> & align, std::vector & cigar, sam_flag & flag, uint8_t & mapq, @@ -107,7 +105,6 @@ concept sam_file_output_format = requires (detail::sam_file_output_format_expose ref_seq, ref_id, ref_offset, - align, cigar, flag, mapq, @@ -142,7 +139,6 @@ class sam_file_output_format ref_seq_type && ref_seq, ref_id_type && ref_id, ref_offset_type && ref_offset, - align_type && align, std::vector & cigar_vector, flag_type && flag, mapq_type && mapq, @@ -158,7 +154,6 @@ class sam_file_output_format * \tparam ref_seq_type Type of the seqan3 * \tparam ref_id_type Type of the seqan3 * \tparam ref_offset_type Type of the seqan3 - * \tparam align_type Type of the seqan3 * \tparam flag_type Type of the seqan3 * \tparam mapq_type Type of the seqan3 * \tparam qual_type Type of the seqan3 @@ -177,7 +172,6 @@ class sam_file_output_format * \param[in] ref_seq The data for seqan3::field::ref_offset, i.e. the reference sequence. * \param[in] ref_id The data for seqan3::field::ref_id, e.g. the reference id.. * \param[in] ref_offset The data for seqan3::field::ref_offset, i.e. the start position of the alignment in \p ref_seq. - * \param[in] align The data for seqan3::field::align, e.g. the alignment between query and ref. * \param[in] cigar_vector The data for seqan3::field::cigar, e.g. representing the alignment between query and ref. * \param[in] flag The data for seqan3::field::flag, e.g. the SAM mapping flag value. * \param[in] mapq The data for seqan3::field::mapq, e.g. the mapping quality value. diff --git a/include/seqan3/io/sam_file/record.hpp b/include/seqan3/io/sam_file/record.hpp index de642c61087..a293c3c40f0 100644 --- a/include/seqan3/io/sam_file/record.hpp +++ b/include/seqan3/io/sam_file/record.hpp @@ -152,29 +152,6 @@ class sam_record : public record return get_impl(field_constant{}, static_cast(*this)); } - /*!\brief The (pairwise) alignment stored in an object that models seqan3::detail::pairwise_alignment. - * \returns seqan3::sam_file_input::alignment_type per default - */ - decltype(auto) alignment() && - { - return get_impl(field_constant{}, static_cast(*this)); - } - //!\copydoc seqan3::sam_record::alignment - decltype(auto) alignment() const && - { - return get_impl(field_constant{}, static_cast(*this)); - } - //!\copydoc seqan3::sam_record::alignment - decltype(auto) alignment() & - { - return get_impl(field_constant{}, static_cast(*this)); - } - //!\copydoc seqan3::sam_record::alignment - decltype(auto) alignment() const & - { - return get_impl(field_constant{}, static_cast(*this)); - } - /*!\brief The identifier of the (reference) sequence that seqan3::sam_record::sequence was aligned to. * (SAM Column ID: RNAME) * \returns seqan3::sam_file_input::ref_id_type per default diff --git a/test/snippet/io/sam_file/sam_file_input_construction_without_automatic_type_deduction.cpp b/test/snippet/io/sam_file/sam_file_input_construction_without_automatic_type_deduction.cpp index c380da65f08..e9908f3ecd4 100644 --- a/test/snippet/io/sam_file/sam_file_input_construction_without_automatic_type_deduction.cpp +++ b/test/snippet/io/sam_file/sam_file_input_construction_without_automatic_type_deduction.cpp @@ -14,7 +14,6 @@ int main() seqan3::field::offset, seqan3::field::ref_id, seqan3::field::ref_offset, - seqan3::field::alignment, seqan3::field::cigar, seqan3::field::mapq, seqan3::field::qual, diff --git a/test/unit/io/sam_file/format_bam_test.cpp b/test/unit/io/sam_file/format_bam_test.cpp index c68f2ab932b..bd3002b6521 100644 --- a/test/unit/io/sam_file/format_bam_test.cpp +++ b/test/unit/io/sam_file/format_bam_test.cpp @@ -300,72 +300,48 @@ struct sam_file_read : public sam_file_data '\x02', '\x02', '\x03', '\x61', '\x61', '\x41', '\x63', '\x41', '\x53', '\x43', '\x02', '\x66', '\x66', '\x66', '\x66', '\x66', '\x46', '\x40', '\x7A', '\x7A', '\x5A', '\x73', '\x74', '\x72', '\x00', '\x0A'}; - std::string simple_three_reads_output{ - // no hard clipping in output - '\x42', '\x41', '\x4D', '\x01', '\x1C', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4E', - '\x3A', '\x31', '\x2E', '\x36', '\x0A', '\x40', '\x53', '\x51', '\x09', '\x53', '\x4E', '\x3A', '\x72', '\x65', - '\x66', '\x09', '\x4C', '\x4E', '\x3A', '\x33', '\x34', '\x0A', '\x01', '\x00', '\x00', '\x00', '\x04', '\x00', - '\x00', '\x00', '\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00', '\x48', '\x00', '\x00', '\x00', - '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x06', '\x3D', '\x49', '\x12', '\x05', '\x00', - '\x29', '\x00', '\x04', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x09', '\x00', '\x00', '\x00', - '\x2C', '\x01', '\x00', '\x00', '\x72', '\x65', '\x61', '\x64', '\x31', '\x00', '\x14', '\x00', '\x00', '\x00', - '\x10', '\x00', '\x00', '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x11', '\x00', - '\x00', '\x00', '\x12', '\x48', '\x00', '\x02', '\x02', '\x03', '\x41', '\x53', '\x43', '\x02', '\x4E', '\x4D', - '\x43', '\x07', '\x52', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x01', '\x00', '\x00', '\x00', - '\x06', '\x3E', '\x49', '\x12', '\x04', '\x00', '\x2A', '\x00', '\x09', '\x00', '\x00', '\x00', '\x00', '\x00', - '\x00', '\x00', '\x09', '\x00', '\x00', '\x00', '\x2C', '\x01', '\x00', '\x00', '\x72', '\x65', '\x61', '\x64', - '\x32', '\x00', '\x70', '\x00', '\x00', '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', - '\x14', '\x00', '\x00', '\x00', '\x14', '\x42', '\x84', '\xF1', '\x40', '\x00', '\x02', '\x02', '\x03', '\x05', - '\x06', '\x07', '\x08', '\x09', '\x78', '\x79', '\x42', '\x53', '\x03', '\x00', '\x00', '\x00', '\x03', '\x00', - '\x04', '\x00', '\x05', '\x00', '\x5A', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x02', '\x00', - '\x00', '\x00', '\x06', '\x3F', '\x49', '\x12', '\x0A', '\x00', '\x2B', '\x00', '\x08', '\x00', '\x00', '\x00', - '\x00', '\x00', '\x00', '\x00', '\x09', '\x00', '\x00', '\x00', '\x2C', '\x01', '\x00', '\x00', '\x72', '\x65', - '\x61', '\x64', '\x33', '\x00', '\x14', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x16', '\x00', - '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x11', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', - '\x11', '\x00', '\x00', '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x14', '\x00', - '\x00', '\x00', '\x44', '\x14', '\x81', '\x81', '\x00', '\x00', '\x09', '\x0A', '\x0B', '\x0C', '\x0D', '\x0E'}; - std::string verbose_output{ - '\x42', '\x41', '\x4D', '\x01', '\xA6', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4E', - '\x3A', '\x31', '\x2E', '\x36', '\x09', '\x53', '\x4F', '\x3A', '\x75', '\x6E', '\x6B', '\x6E', '\x6F', '\x77', - '\x6E', '\x09', '\x47', '\x4F', '\x3A', '\x6E', '\x6F', '\x6E', '\x65', '\x0A', '\x40', '\x53', '\x51', '\x09', - '\x53', '\x4E', '\x3A', '\x72', '\x65', '\x66', '\x09', '\x4C', '\x4E', '\x3A', '\x33', '\x34', '\x09', '\x41', - '\x4E', '\x3A', '\x6F', '\x74', '\x68', '\x65', '\x72', '\x5F', '\x6E', '\x61', '\x6D', '\x65', '\x0A', '\x40', - '\x52', '\x47', '\x09', '\x49', '\x44', '\x3A', '\x67', '\x72', '\x6F', '\x75', '\x70', '\x31', '\x09', '\x44', - '\x53', '\x3A', '\x6D', '\x6F', '\x72', '\x65', '\x20', '\x69', '\x6E', '\x66', '\x6F', '\x0A', '\x40', '\x50', - '\x47', '\x09', '\x49', '\x44', '\x3A', '\x70', '\x72', '\x6F', '\x67', '\x31', '\x09', '\x50', '\x4E', '\x3A', - '\x63', '\x6F', '\x6F', '\x6C', '\x5F', '\x70', '\x72', '\x6F', '\x67', '\x72', '\x61', '\x6D', '\x09', '\x43', - '\x4C', '\x3A', '\x2E', '\x2F', '\x70', '\x72', '\x6F', '\x67', '\x31', '\x09', '\x50', '\x50', '\x3A', '\x61', - '\x09', '\x44', '\x53', '\x3A', '\x62', '\x09', '\x56', '\x4E', '\x3A', '\x63', '\x0A', '\x40', '\x43', '\x4F', - '\x09', '\x54', '\x68', '\x69', '\x73', '\x20', '\x69', '\x73', '\x20', '\x61', '\x20', '\x63', '\x6F', '\x6D', - '\x6D', '\x65', '\x6E', '\x74', '\x2E', '\x0A', '\x01', '\x00', '\x00', '\x00', '\x04', '\x00', '\x00', '\x00', + '\x42', '\x41', '\x4d', '\x01', '\xa6', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4e', + '\x3a', '\x31', '\x2e', '\x36', '\x09', '\x53', '\x4f', '\x3a', '\x75', '\x6e', '\x6b', '\x6e', '\x6f', '\x77', + '\x6e', '\x09', '\x47', '\x4f', '\x3a', '\x6e', '\x6f', '\x6e', '\x65', '\x0a', '\x40', '\x53', '\x51', '\x09', + '\x53', '\x4e', '\x3a', '\x72', '\x65', '\x66', '\x09', '\x4c', '\x4e', '\x3a', '\x33', '\x34', '\x09', '\x41', + '\x4e', '\x3a', '\x6f', '\x74', '\x68', '\x65', '\x72', '\x5f', '\x6e', '\x61', '\x6d', '\x65', '\x0a', '\x40', + '\x52', '\x47', '\x09', '\x49', '\x44', '\x3a', '\x67', '\x72', '\x6f', '\x75', '\x70', '\x31', '\x09', '\x44', + '\x53', '\x3a', '\x6d', '\x6f', '\x72', '\x65', '\x20', '\x69', '\x6e', '\x66', '\x6f', '\x0a', '\x40', '\x50', + '\x47', '\x09', '\x49', '\x44', '\x3a', '\x70', '\x72', '\x6f', '\x67', '\x31', '\x09', '\x50', '\x4e', '\x3a', + '\x63', '\x6f', '\x6f', '\x6c', '\x5f', '\x70', '\x72', '\x6f', '\x67', '\x72', '\x61', '\x6d', '\x09', '\x43', + '\x4c', '\x3a', '\x2e', '\x2f', '\x70', '\x72', '\x6f', '\x67', '\x31', '\x09', '\x50', '\x50', '\x3a', '\x61', + '\x09', '\x44', '\x53', '\x3a', '\x62', '\x09', '\x56', '\x4e', '\x3a', '\x63', '\x0a', '\x40', '\x43', '\x4f', + '\x09', '\x54', '\x68', '\x69', '\x73', '\x20', '\x69', '\x73', '\x20', '\x61', '\x20', '\x63', '\x6f', '\x6d', + '\x6d', '\x65', '\x6e', '\x74', '\x2e', '\x0a', '\x01', '\x00', '\x00', '\x00', '\x04', '\x00', '\x00', '\x00', '\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00', '\x64', '\x00', '\x00', '\x00', '\x00', '\x00', - '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x06', '\x3D', '\x49', '\x12', '\x05', '\x00', '\x29', '\x00', - '\x04', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x09', '\x00', '\x00', '\x00', '\x2C', '\x01', + '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x06', '\x3d', '\x49', '\x12', '\x05', '\x00', '\x29', '\x00', + '\x04', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x09', '\x00', '\x00', '\x00', '\x2c', '\x01', '\x00', '\x00', '\x72', '\x65', '\x61', '\x64', '\x31', '\x00', '\x14', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x11', '\x00', '\x00', '\x00', - '\x12', '\x48', '\x00', '\x02', '\x02', '\x03', '\x41', '\x53', '\x43', '\x02', '\x43', '\x43', '\x53', '\x2C', - '\x01', '\x4E', '\x4D', '\x63', '\xF9', '\x61', '\x61', '\x41', '\x63', '\x63', '\x63', '\x73', '\xD4', '\xFE', - '\x66', '\x66', '\x66', '\x66', '\x66', '\x46', '\x40', '\x7A', '\x7A', '\x5A', '\x73', '\x74', '\x72', '\x00', - '\xA7', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x01', '\x00', '\x00', '\x00', '\x06', '\x3E', - '\x49', '\x12', '\x04', '\x00', '\x2A', '\x00', '\x09', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', - '\x09', '\x00', '\x00', '\x00', '\x2C', '\x01', '\x00', '\x00', '\x72', '\x65', '\x61', '\x64', '\x32', '\x00', - '\x70', '\x00', '\x00', '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x14', '\x00', - '\x00', '\x00', '\x14', '\x42', '\x84', '\xF1', '\x40', '\x00', '\x02', '\x02', '\x03', '\x05', '\x06', '\x07', - '\x08', '\x09', '\x62', '\x43', '\x42', '\x43', '\x02', '\x00', '\x00', '\x00', '\x03', '\xC8', '\x62', '\x49', - '\x42', '\x49', '\x01', '\x00', '\x00', '\x00', '\x00', '\xD8', '\x94', '\x11', '\x62', '\x53', '\x42', '\x53', - '\x03', '\x00', '\x00', '\x00', '\x2C', '\x01', '\x28', '\x00', '\xF4', '\x01', '\x62', '\x63', '\x42', '\x63', - '\x01', '\x00', '\x00', '\x00', '\xFD', '\x62', '\x66', '\x42', '\x66', '\x03', '\x00', '\x00', '\x00', '\x00', - '\x00', '\x60', '\x40', '\xCD', '\xCC', '\xCC', '\x3D', '\x33', '\x33', '\x2F', '\x42', '\x62', '\x69', '\x42', - '\x69', '\x03', '\x00', '\x00', '\x00', '\xFD', '\xFF', '\xFF', '\xFF', '\xC8', '\x00', '\x00', '\x00', '\x30', - '\xFE', '\xFE', '\xFF', '\x62', '\x73', '\x42', '\x73', '\x03', '\x00', '\x00', '\x00', '\xFD', '\xFF', '\xC8', - '\x00', '\xD4', '\xFE', '\x5A', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x02', '\x00', '\x00', - '\x00', '\x06', '\x3F', '\x49', '\x12', '\x0A', '\x00', '\x2B', '\x00', '\x08', '\x00', '\x00', '\x00', '\x00', - '\x00', '\x00', '\x00', '\x09', '\x00', '\x00', '\x00', '\x2C', '\x01', '\x00', '\x00', '\x72', '\x65', '\x61', - '\x64', '\x33', '\x00', '\x14', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x16', '\x00', '\x00', - '\x00', '\x10', '\x00', '\x00', '\x00', '\x11', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x11', - '\x00', '\x00', '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x14', '\x00', '\x00', - '\x00', '\x44', '\x14', '\x81', '\x81', '\x00', '\x00', '\x09', '\x0A', '\x0B', '\x0C', '\x0D', '\x0E'}; + '\x12', '\x48', '\x00', '\x02', '\x02', '\x03', '\x41', '\x53', '\x43', '\x02', '\x43', '\x43', '\x53', '\x2c', + '\x01', '\x4e', '\x4d', '\x63', '\xf9', '\x61', '\x61', '\x41', '\x63', '\x63', '\x63', '\x73', '\xd4', '\xfe', + '\x66', '\x66', '\x66', '\x66', '\x66', '\x46', '\x40', '\x7a', '\x7a', '\x5a', '\x73', '\x74', '\x72', '\x00', + '\xaf', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x01', '\x00', '\x00', '\x00', '\x06', '\x3e', + '\x49', '\x12', '\x06', '\x00', '\x2a', '\x00', '\x09', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', + '\x09', '\x00', '\x00', '\x00', '\x2c', '\x01', '\x00', '\x00', '\x72', '\x65', '\x61', '\x64', '\x32', '\x00', + '\x15', '\x00', '\x00', '\x00', '\x70', '\x00', '\x00', '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00', + '\x00', '\x00', '\x14', '\x00', '\x00', '\x00', '\x25', '\x00', '\x00', '\x00', '\x14', '\x42', '\x84', '\xf1', + '\x40', '\x00', '\x02', '\x02', '\x03', '\x05', '\x06', '\x07', '\x08', '\x09', '\x62', '\x43', '\x42', '\x43', + '\x02', '\x00', '\x00', '\x00', '\x03', '\xc8', '\x62', '\x49', '\x42', '\x49', '\x01', '\x00', '\x00', '\x00', + '\x00', '\xd8', '\x94', '\x11', '\x62', '\x53', '\x42', '\x53', '\x03', '\x00', '\x00', '\x00', '\x2c', '\x01', + '\x28', '\x00', '\xf4', '\x01', '\x62', '\x63', '\x42', '\x63', '\x01', '\x00', '\x00', '\x00', '\xfd', '\x62', + '\x66', '\x42', '\x66', '\x03', '\x00', '\x00', '\x00', '\x00', '\x00', '\x60', '\x40', '\xcd', '\xcc', '\xcc', + '\x3d', '\x33', '\x33', '\x2f', '\x42', '\x62', '\x69', '\x42', '\x69', '\x03', '\x00', '\x00', '\x00', '\xfd', + '\xff', '\xff', '\xff', '\xc8', '\x00', '\x00', '\x00', '\x30', '\xfe', '\xfe', '\xff', '\x62', '\x73', '\x42', + '\x73', '\x03', '\x00', '\x00', '\x00', '\xfd', '\xff', '\xc8', '\x00', '\xd4', '\xfe', '\x5a', '\x00', '\x00', + '\x00', '\x00', '\x00', '\x00', '\x00', '\x02', '\x00', '\x00', '\x00', '\x06', '\x3f', '\x49', '\x12', '\x0a', + '\x00', '\x2b', '\x00', '\x08', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x09', '\x00', '\x00', + '\x00', '\x2c', '\x01', '\x00', '\x00', '\x72', '\x65', '\x61', '\x64', '\x33', '\x00', '\x14', '\x00', '\x00', + '\x00', '\x10', '\x00', '\x00', '\x00', '\x16', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x11', + '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x11', '\x00', '\x00', '\x00', '\x12', '\x00', '\x00', + '\x00', '\x10', '\x00', '\x00', '\x00', '\x14', '\x00', '\x00', '\x00', '\x44', '\x14', '\x81', '\x81', '\x00', + '\x00', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e'}; std::string special_output{ '\x42', '\x41', '\x4D', '\x01', '\x1C', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4E', @@ -560,15 +536,14 @@ TEST_F(bam_format, too_long_cigar_string_read) seqan3::sam_file_input fin{stream, this->ref_ids, this->ref_sequences, seqan3::format_bam{}}; - EXPECT_RANGE_EQ(std::get<0>((*fin.begin()).alignment()), std::get<0>(this->alignments[0])); - EXPECT_RANGE_EQ(std::get<1>((*fin.begin()).alignment()), std::get<1>(this->alignments[0])); + EXPECT_RANGE_EQ((*fin.begin()).cigar_sequence(), this->cigars[0]); EXPECT_EQ((*fin.begin()).tags().size(), 0u); // redundant CG tag is removed } { // error: sam_tag_dictionary is not read std::istringstream stream{sam_file_with_too_long_cigar_string}; - seqan3::sam_file_input fin{stream, seqan3::format_bam{}, seqan3::fields{}}; + seqan3::sam_file_input fin{stream, seqan3::format_bam{}, seqan3::fields{}}; ASSERT_THROW(fin.begin(), seqan3::format_error); } @@ -577,7 +552,7 @@ TEST_F(bam_format, too_long_cigar_string_read) seqan3::sam_file_input fin{stream, seqan3::format_bam{}, - seqan3::fields{}}; + seqan3::fields{}}; ASSERT_THROW(fin.begin(), seqan3::format_error); } @@ -603,25 +578,20 @@ TEST_F(bam_format, too_long_cigar_string_read) TEST_F(bam_format, too_long_cigar_string_write) { - // create an alignment resulting more than 65535 cigar elements - // ------------------------------------------------------------------------- auto read = seqan3::views::repeat_n('T'_dna5, 70'000); auto ref = seqan3::views::repeat_n('A'_dna5, 2 * read.size() - 1); - auto gapped_ref = seqan3::gap_decorator{ref}; - - // a gap_decorator on a repeat_n view also works but is slow when inserting gaps. - std::vector> gapped_read; - gapped_read.reserve(2 * read.size()); - // create gap of length one every second character => T-T-T-T-T-T... - for (auto chr : read) - gapped_read.push_back(chr), gapped_read.push_back(seqan3::gap{}); - gapped_read.pop_back(); // remove last seqan3::gap + // create a cigar with more than 65535 cigar elements + std::vector too_long_cigar{}; + for (size_t i = 0; i < 69'999; ++i) + { + too_long_cigar.push_back({1, 'M'_cigar_operation}); + too_long_cigar.push_back({1, 'D'_cigar_operation}); + } + too_long_cigar.push_back({1, 'M'_cigar_operation}); - auto alignment = std::tie(gapped_ref, gapped_read); // Expected output. ATTENTION this could not be validated by samtools as it does not support too long cigar strings - // ------------------------------------------------------------------------- std::string expected = std::string /*the beginning*/ {'\x42', '\x41', '\x4D', '\x01', '\x20', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4E', @@ -659,10 +629,10 @@ TEST_F(bam_format, too_long_cigar_string_write) seqan3::field::seq, seqan3::field::ref_id, seqan3::field::ref_offset, - seqan3::field::alignment, + seqan3::field::cigar, seqan3::field::mapq>{}}; - fout.emplace_back(&header, std::string{"long_read"}, read, 0, 0, alignment, 255); + fout.emplace_back(&header, std::string{"long_read"}, read, 0, 0, too_long_cigar, 255); } os.flush(); @@ -690,7 +660,7 @@ TEST_F(bam_format, issue2417) seqan3::sam_file_input fin{stream, seqan3::format_bam{}, - seqan3::fields{}}; + seqan3::fields{}}; std::vector> const empty_sequence{}; @@ -699,12 +669,11 @@ TEST_F(bam_format, issue2417) // In 2417, the sequence was not consumed. Thus, wrong bytes were read for the following records. // With the chosen `input` this also means that there will be more than 1 record in the alignment file. // Hence, we need the for loop even though there is only 1 record. - for (auto && [id, alignment] : fin) + for (auto && [id, cigar] : fin) { ++num_records; EXPECT_RANGE_EQ(id, std::string{"read1"}); - EXPECT_RANGE_EQ(std::get<0>(alignment), empty_sequence); - EXPECT_RANGE_EQ(std::get<1>(alignment), empty_sequence); + EXPECT_TRUE(cigar.empty()); } EXPECT_EQ(num_records, 1u); diff --git a/test/unit/io/sam_file/format_sam_test.cpp b/test/unit/io/sam_file/format_sam_test.cpp index c442296a372..e44378b6c1e 100644 --- a/test/unit/io/sam_file/format_sam_test.cpp +++ b/test/unit/io/sam_file/format_sam_test.cpp @@ -7,8 +7,6 @@ #include -#include - #include "sam_file_format_test_template.hpp" template <> @@ -78,14 +76,6 @@ read3 43 ref 3 63 1S1M1P1M1I1M1I1D1M1S ref 10 300 GGAGTATA !!*+,-./ // formatted output // ----------------------------------------------------------------------------------------------------------------- - std::string simple_three_reads_output{// compared to simple_three_reads_input this has no hard clipping - R"(@HD VN:1.6 -@SQ SN:ref LN:34 -read1 41 ref 1 61 1S1M1D1M1I ref 10 300 ACGT !##$ AS:i:2 NM:i:7 -read2 42 ref 2 62 7M1D1M1S ref 10 300 AGGCTGNAG !##$&'()* xy:B:S,3,4,5 -read3 43 ref 3 63 1S1M1P1M1I1M1I1D1M1S ref 10 300 GGAGTATA !!*+,-./ -)"}; - std::string verbose_output{ R"(@HD VN:1.6 SO:unknown GO:none @SQ SN:ref LN:34 AN:other_name @@ -93,7 +83,7 @@ read3 43 ref 3 63 1S1M1P1M1I1M1I1D1M1S ref 10 300 GGAGTATA !!*+,-./ @PG ID:prog1 PN:cool_program CL:./prog1 PP:a DS:b VN:c @CO This is a comment. read1 41 ref 1 61 1S1M1D1M1I ref 10 300 ACGT !##$ AS:i:2 CC:i:300 NM:i:-7 aa:A:c cc:i:-300 ff:f:3.1 zz:Z:str -read2 42 ref 2 62 7M1D1M1S ref 10 300 AGGCTGNAG !##$&'()* bC:B:C,3,200 bI:B:I,294967296 bS:B:S,300,40,500 bc:B:c,-3 bf:B:f,3.5,0.1,43.8 bi:B:i,-3,200,-66000 bs:B:s,-3,200,-300 +read2 42 ref 2 62 1H7M1D1M1S2H ref 10 300 AGGCTGNAG !##$&'()* bC:B:C,3,200 bI:B:I,294967296 bS:B:S,300,40,500 bc:B:c,-3 bf:B:f,3.5,0.1,43.8 bi:B:i,-3,200,-66000 bs:B:s,-3,200,-300 read3 43 ref 3 63 1S1M1P1M1I1M1I1D1M1S ref 10 300 GGAGTATA !!*+,-./ )"}; @@ -361,24 +351,16 @@ TEST_F(sam_format, short_cigar_string_with_softclipping) using seqan3::operator""_dna5; // The member function transfer_soft_clipping_to needs to work on 2 element cigar strings - { - std::istringstream istream("id 16 ref 0 255 10M5S * 0 0 AGAGGGGGATAACCA *\n"); - seqan3::sam_file_input fin{istream, - ref_ids, - ref_sequences, - seqan3::format_sam{}, - seqan3::fields{}}; - EXPECT_RANGE_EQ(std::get<1>(std::get<0>(*fin.begin())), "AGAGGGGGAT"_dna5); - } - + // TODO: This test should be moved to the function that constructs an alignment from the cigar string + // The field offset should be removed to. { std::istringstream istream("id 16 ref 0 255 5S10M * 0 0 AGAGGGGGATAACCA *\n"); seqan3::sam_file_input fin{istream, ref_ids, ref_sequences, seqan3::format_sam{}, - seqan3::fields{}}; - EXPECT_RANGE_EQ(std::get<1>(std::get<0>(*fin.begin())), "GGGATAACCA"_dna5); + seqan3::fields{}}; + EXPECT_EQ((*fin.begin()).sequence_position(), 5); } } diff --git a/test/unit/io/sam_file/sam_file_format_test_template.hpp b/test/unit/io/sam_file/sam_file_format_test_template.hpp index d442028d01c..f318125b1bd 100644 --- a/test/unit/io/sam_file/sam_file_format_test_template.hpp +++ b/test/unit/io/sam_file/sam_file_format_test_template.hpp @@ -96,29 +96,6 @@ struct sam_file_data : public ::testing::Test std::vector ref_offsets{0, 1, 2}; - std::vector>, std::vector>>> - alignments{ - {ref_seq_gapped1, std::vector>{'C'_dna5, seqan3::gap{}, 'G'_dna5, 'T'_dna5}}, - {ref_seq_gapped2, - std::vector>{'A'_dna5, - 'G'_dna5, - 'G'_dna5, - 'C'_dna5, - 'T'_dna5, - 'G'_dna5, - 'N'_dna5, - seqan3::gap{}, - 'A'_dna5}}, - {ref_seq_gapped3, - std::vector>{'G'_dna5, - seqan3::gap{}, - 'A'_dna5, - 'G'_dna5, - 'T'_dna5, - 'A'_dna5, - seqan3::gap{}, - 'T'_dna5}}}; - std::vector flags{seqan3::sam_flag{41u}, seqan3::sam_flag{42u}, seqan3::sam_flag{43u}}; std::vector mapqs{61u, 62u, 63u}; @@ -233,10 +210,9 @@ TYPED_TEST_P(sam_file_read, read_in_all_data) EXPECT_EQ(rec.id(), this->ids[i]); EXPECT_EQ(rec.base_qualities(), this->quals[i]); EXPECT_EQ(rec.sequence_position(), this->offsets[i]); + EXPECT_RANGE_EQ(rec.cigar_sequence(), this->cigars[i]); EXPECT_EQ(rec.reference_id(), 0); EXPECT_EQ(*rec.reference_position(), this->ref_offsets[i]); - EXPECT_RANGE_EQ(std::get<0>(rec.alignment()), std::get<0>(this->alignments[i])); - EXPECT_RANGE_EQ(std::get<1>(rec.alignment()), std::get<1>(this->alignments[i])); EXPECT_EQ(rec.flag(), this->flags[i]); EXPECT_EQ(rec.mapping_quality(), this->mapqs[i]); EXPECT_EQ(rec.mate_reference_id(), std::get<0>(this->mates[i])); @@ -256,10 +232,9 @@ TYPED_TEST_P(sam_file_read, read_in_all_but_empty_data) EXPECT_TRUE((*fin.begin()).id().empty()); EXPECT_TRUE((*fin.begin()).base_qualities().empty()); EXPECT_EQ((*fin.begin()).sequence_position(), 0); + EXPECT_TRUE((*fin.begin()).cigar_sequence().empty()); EXPECT_TRUE(!(*fin.begin()).reference_id().has_value()); EXPECT_TRUE(!(*fin.begin()).reference_position().has_value()); - EXPECT_TRUE(std::ranges::empty(std::get<0>((*fin.begin()).alignment()))); - EXPECT_TRUE(std::ranges::empty(std::get<1>((*fin.begin()).alignment()))); EXPECT_EQ((*fin.begin()).flag(), seqan3::sam_flag{0u}); EXPECT_EQ((*fin.begin()).mapping_quality(), 0u); EXPECT_TRUE(!(*fin.begin()).mate_reference_id().has_value()); @@ -278,62 +253,6 @@ TYPED_TEST_P(sam_file_read, read_in_almost_nothing) EXPECT_EQ(mapq, this->mapqs[i++]); } -TYPED_TEST_P(sam_file_read, read_in_alignment_only_with_ref) -{ - { - typename TestFixture::stream_type istream{this->simple_three_reads_input}; - seqan3::sam_file_input fin{istream, - this->ref_ids, - this->ref_sequences, - TypeParam{}, - seqan3::fields{}}; - - size_t i{0}; - for (auto & [alignment] : fin) - { - EXPECT_RANGE_EQ(std::get<0>(alignment), std::get<0>(this->alignments[i])); - EXPECT_RANGE_EQ(std::get<1>(alignment), std::get<1>(this->alignments[i])); - ++i; - } - } - - { // empty cigar - typename TestFixture::stream_type istream{this->empty_cigar}; - seqan3::sam_file_input fin{istream, - this->ref_ids, - this->ref_sequences, - TypeParam{}, - seqan3::fields{}}; - - EXPECT_TRUE(std::ranges::empty(std::get<0>((*fin.begin()).alignment()))); - EXPECT_TRUE(std::ranges::empty(std::get<1>((*fin.begin()).alignment()))); - } -} - -TYPED_TEST_P(sam_file_read, read_in_alignment_only_without_ref) -{ - { - typename TestFixture::stream_type istream{this->simple_three_reads_input}; - seqan3::sam_file_input fin{istream, TypeParam{}, seqan3::fields{}}; - - size_t i{0}; - for (auto & [alignment] : fin) - { - EXPECT_RANGE_EQ(std::get<1>(alignment), std::get<1>(this->alignments[i++])); - auto & ref_aln = std::get<0>(alignment); - EXPECT_THROW((ref_aln[0]), std::logic_error); // access on a dummy seq is not allowed - } - } - - { // empty cigar - typename TestFixture::stream_type istream{this->empty_cigar}; - seqan3::sam_file_input fin{istream, TypeParam{}, seqan3::fields{}}; - - EXPECT_TRUE(std::ranges::empty(std::get<0>((*fin.begin()).alignment()))); - EXPECT_TRUE(std::ranges::empty(std::get<1>((*fin.begin()).alignment()))); - } -} - TYPED_TEST_P(sam_file_read, read_mate_but_not_ref_id_with_ref) { { /*with reference information*/ @@ -411,17 +330,13 @@ TYPED_TEST_P(sam_file_read, issue2423) // sam_file_write // ---------------------------------------------------------------------------- -// Note that these differ from the sam_file_output default fields: -// 1. They don't contain field::bit_score and field::evalue since these belong to the BLAST format. -// 2. field::alignment and field::cigar are redundant. Since field::alignment is the more complex one it is chosen here. -// The behaviour if both are given is tested in a separate test. using sam_fields = seqan3::fieldsostream, TypeParam{}, sam_fields{}}; - using default_align_t = std::pair>, std::span>>; using default_mate_t = std::tuple, int32_t>; fout.emplace_back(&(this->header), @@ -455,7 +369,7 @@ TYPED_TEST_P(sam_file_write, write_empty_members) std::string_view{}, -1, 0, - default_align_t{}, + std::vector{}, 0, default_mate_t{}, std::string_view{}, @@ -480,7 +394,7 @@ TYPED_TEST_P(sam_file_write, default_options_all_members_specified) 0 /*ref_id*/, this->ref_offsets[i], this->mapqs[i], - this->alignments[i], + this->cigars[i], this->offsets[i], this->mates[i], this->seqs[i], @@ -490,7 +404,7 @@ TYPED_TEST_P(sam_file_write, default_options_all_members_specified) } this->ostream.flush(); - EXPECT_EQ(this->ostream.str(), this->simple_three_reads_output); + EXPECT_EQ(this->ostream.str(), this->simple_three_reads_input); } TYPED_TEST_P(sam_file_write, write_ref_id_with_different_types) @@ -506,7 +420,7 @@ TYPED_TEST_P(sam_file_write, write_ref_id_with_different_types) /*----------------------->*/ this->ref_id, this->ref_offsets[0], this->mapqs[0], - this->alignments[0], + this->cigars[0], this->offsets[0], this->mates[0], this->seqs[0], @@ -520,7 +434,7 @@ TYPED_TEST_P(sam_file_write, write_ref_id_with_different_types) /*----------------------->*/ std::string_view{this->ref_id}, this->ref_offsets[1], this->mapqs[1], - this->alignments[1], + this->cigars[1], this->offsets[1], this->mates[1], this->seqs[1], @@ -534,7 +448,7 @@ TYPED_TEST_P(sam_file_write, write_ref_id_with_different_types) /*----------------------->*/ this->ref_id | std::views::take(20), this->ref_offsets[2], this->mapqs[2], - this->alignments[2], + this->cigars[2], this->offsets[2], this->mates[2], this->seqs[2], @@ -544,7 +458,7 @@ TYPED_TEST_P(sam_file_write, write_ref_id_with_different_types) this->ostream.flush(); - EXPECT_EQ(this->ostream.str(), this->simple_three_reads_output); + EXPECT_EQ(this->ostream.str(), this->simple_three_reads_input); } TYPED_TEST_P(sam_file_write, with_header) @@ -569,7 +483,7 @@ TYPED_TEST_P(sam_file_write, with_header) 0 /*ref_id*/, this->ref_offsets[i], this->mapqs[i], - this->alignments[i], + this->cigars[i], this->offsets[i], this->mates[i], this->seqs[i], @@ -586,7 +500,7 @@ TYPED_TEST_P(sam_file_write, with_header) TYPED_TEST_P(sam_file_write, cigar_vector) { { - seqan3::sam_file_output fout{this->ostream, TypeParam{}}; // default fields contain CIGAR and alignment + seqan3::sam_file_output fout{this->ostream, TypeParam{}}; for (size_t i = 0ul; i < 3ul; ++i) { ASSERT_NO_THROW(fout.emplace_back(this->seqs[i], @@ -594,7 +508,6 @@ TYPED_TEST_P(sam_file_write, cigar_vector) this->offsets[i], 0 /*ref_id*/, this->ref_offsets[i], - this->alignments[i], this->cigars[i], this->mapqs[i], this->quals[i], @@ -622,7 +535,6 @@ TYPED_TEST_P(sam_file_write, cigar_vector) seqan3::field::ref_offset, seqan3::field::mapq, seqan3::field::cigar, - // cigar instead of alignment seqan3::field::offset, seqan3::field::mate, seqan3::field::seq, @@ -668,7 +580,7 @@ TYPED_TEST_P(sam_file_write, special_cases) rid, this->ref_offsets[0], this->mapqs[0], - this->alignments[0], + this->cigars[0], this->offsets[0], mate, this->seqs[0], @@ -694,7 +606,7 @@ TYPED_TEST_P(sam_file_write, special_cases) std::string(""), this->ref_offsets[0], this->mapqs[0], - this->alignments[0], + this->cigars[0], this->offsets[0], mate_str, this->seqs[0], @@ -717,7 +629,7 @@ TYPED_TEST_P(sam_file_write, format_errors) std::string("ref_id_that_does_not_exist"), this->ref_offsets[0], this->mapqs[0], - this->alignments[0], + this->cigars[0], this->offsets[0], this->mates[0], this->seqs[0], @@ -732,7 +644,7 @@ TYPED_TEST_P(sam_file_write, format_errors) this->ref_id, -3, this->mapqs[0], - this->alignments[0], + this->cigars[0], this->offsets[0], this->mates[0], this->seqs[0], @@ -747,8 +659,6 @@ REGISTER_TYPED_TEST_SUITE_P(sam_file_read, read_in_all_data, read_in_all_but_empty_data, read_in_almost_nothing, - read_in_alignment_only_with_ref, - read_in_alignment_only_without_ref, read_mate_but_not_ref_id_with_ref, read_mate_but_not_ref_id_without_ref, cigar_vector, diff --git a/test/unit/io/sam_file/sam_file_input_test.cpp b/test/unit/io/sam_file/sam_file_input_test.cpp index bf369b71dce..c1d97b3359a 100644 --- a/test/unit/io/sam_file/sam_file_input_test.cpp +++ b/test/unit/io/sam_file/sam_file_input_test.cpp @@ -144,7 +144,6 @@ TEST_F(sam_file_input_f, default_template_args_and_deduction_guides) seqan3::field::offset, seqan3::field::ref_id, seqan3::field::ref_offset, - seqan3::field::alignment, seqan3::field::cigar, seqan3::field::mapq, seqan3::field::qual, @@ -491,123 +490,14 @@ TEST_F(sam_file_input_f, read_empty_bz2_file) #endif // ---------------------------------------------------------------------------- -// SAM format specificities +// BAM format specificities // ---------------------------------------------------------------------------- -struct sam_file_input_sam_format_f : public sam_file_input_f +struct sam_file_input_bam_format_f : public sam_file_input_f { std::vector const ref_seqs = {"ACTGATCGAGAGGATCTAGAGGAGATCGTAGGAC"_dna4}; std::vector const ref_ids = {"ref"}; - std::vector> ref_seq_gapped1 = {'A'_dna4, 'C'_dna4, 'T'_dna4, 'G'_dna4}; - std::vector> ref_seq_gapped2 = - {'C'_dna4, 'T'_dna4, 'G'_dna4, 'A'_dna4, 'T'_dna4, 'C'_dna4, 'G'_dna4, 'A'_dna4, 'G'_dna4}; - std::vector> ref_seq_gapped3 = { - 'T'_dna4, - 'G'_dna4, - 'A'_dna4, - 'T'_dna4, - 'C'_dna4, - 'G'_dna4, - 'A'_dna4, - 'G'_dna4, - }; - - std::vector>, std::vector>>> - alignments_expected{ - {ref_seq_gapped1, std::vector>{'C'_dna5, seqan3::gap{}, 'G'_dna5, 'T'_dna5}}, - {ref_seq_gapped2, - std::vector>{'A'_dna5, - 'G'_dna5, - 'G'_dna5, - 'C'_dna5, - 'T'_dna5, - 'G'_dna5, - 'N'_dna5, - seqan3::gap{}, - 'A'_dna5}}, - {ref_seq_gapped3, - std::vector>{'G'_dna5, - seqan3::gap{}, - 'A'_dna5, - 'G'_dna5, - 'T'_dna5, - 'A'_dna5, - seqan3::gap{}, - 'T'_dna5}}}; -}; - -TEST_F(sam_file_input_sam_format_f, construct_by_filename_and_read_alignments) -{ - seqan3::test::tmp_directory tmp{}; - auto filename = tmp.path() / "sam_file_input_constructor.sam"; - - { - std::ofstream filecreator{filename, std::ios::out | std::ios::binary}; - filecreator << input; - } - - seqan3::sam_file_input fin{filename, ref_ids, ref_seqs, seqan3::fields{}}; - - EXPECT_EQ(fin.header().ref_ids(), ref_ids); - - size_t counter = 0; - for (auto & [alignment] : fin) - { - EXPECT_RANGE_EQ(std::get<0>(alignment), std::get<0>(alignments_expected[counter])); - EXPECT_RANGE_EQ(std::get<0>(alignment), std::get<0>(alignments_expected[counter])); - - counter++; - } - - EXPECT_EQ(counter, 3u); -} - -TEST_F(sam_file_input_sam_format_f, construct_from_stream_and_read_alignments) -{ - seqan3::sam_file_input fin{std::istringstream{input}, - ref_ids, - ref_seqs, - seqan3::format_sam{}, - seqan3::fields{}}; - - EXPECT_EQ(fin.header().ref_ids(), ref_ids); - - size_t counter = 0; - for (auto & [alignment] : fin) - { - EXPECT_RANGE_EQ(std::get<0>(alignment), std::get<0>(alignments_expected[counter])); - EXPECT_RANGE_EQ(std::get<0>(alignment), std::get<0>(alignments_expected[counter])); - - counter++; - } - - EXPECT_EQ(counter, 3u); -} - -TEST_F(sam_file_input_sam_format_f, construct_from_stream_and_read_alignment_with_dummy) -{ - seqan3::sam_file_input fin{std::istringstream{input}, - seqan3::format_sam{}, - seqan3::fields{}}; - - size_t counter = 0; - for (auto & [alignment] : fin) - { - EXPECT_EQ(std::get<1>(alignment), std::get<1>(alignments_expected[counter])); - - counter++; - } - - EXPECT_EQ(counter, 3u); -} - -// ---------------------------------------------------------------------------- -// BAM format specificities -// ---------------------------------------------------------------------------- - -struct sam_file_input_bam_format_f : public sam_file_input_sam_format_f -{ std::string binary_input{ // corresponds to 'input' from sam_file_input_f fixture '\x1F', '\x8B', '\x08', '\x04', '\x00', '\x00', '\x00', '\x00', '\x00', '\xFF', '\x06', '\x00', '\x42', '\x43', @@ -650,21 +540,18 @@ TEST_F(sam_file_input_bam_format_f, construct_by_filename) filename, ref_ids, ref_seqs, - seqan3::fields{}}; + seqan3::fields{}}; EXPECT_EQ(fin.header().ref_ids(), ref_ids); EXPECT_EQ(fin.header().comments[0], std::string{"This is a comment."}); size_t counter = 0; - for (auto & [id, seq, qual, alignment] : fin) + for (auto & [id, seq, qual] : fin) { EXPECT_EQ(id, id_comp[counter]); EXPECT_EQ(seq, seq_comp[counter]); EXPECT_EQ(qual, qual_comp[counter]); - EXPECT_RANGE_EQ(std::get<0>(alignment), std::get<0>(alignments_expected[counter])); - EXPECT_RANGE_EQ(std::get<0>(alignment), std::get<0>(alignments_expected[counter])); - counter++; } @@ -680,21 +567,18 @@ TEST_F(sam_file_input_bam_format_f, construct_by_stream) ref_ids, ref_seqs, seqan3::format_bam{}, - seqan3::fields{}}; + seqan3::fields{}}; EXPECT_EQ(fin.header().ref_ids(), ref_ids); EXPECT_EQ(fin.header().comments[0], std::string{"This is a comment."}); size_t counter = 0; - for (auto & [id, seq, qual, alignment] : fin) + for (auto & [id, seq, qual] : fin) { EXPECT_EQ(id, id_comp[counter]); EXPECT_EQ(seq, seq_comp[counter]); EXPECT_EQ(qual, qual_comp[counter]); - EXPECT_RANGE_EQ(std::get<0>(alignment), std::get<0>(alignments_expected[counter])); - EXPECT_RANGE_EQ(std::get<0>(alignment), std::get<0>(alignments_expected[counter])); - counter++; } diff --git a/test/unit/io/sam_file/sam_file_output_test.cpp b/test/unit/io/sam_file/sam_file_output_test.cpp index 6e25c848401..5cc87b533a6 100644 --- a/test/unit/io/sam_file/sam_file_output_test.cpp +++ b/test/unit/io/sam_file/sam_file_output_test.cpp @@ -105,7 +105,6 @@ TEST(general, default_template_args_and_deduction_guides) seqan3::field::offset, seqan3::field::ref_id, seqan3::field::ref_offset, - seqan3::field::alignment, seqan3::field::cigar, seqan3::field::mapq, seqan3::field::qual, @@ -142,11 +141,11 @@ TEST(general, default_template_args_and_deduction_guides) seqan3::test::tmp_directory tmp{}; auto filename = tmp.path() / "sam_file_output_constructor.sam"; - seqan3::sam_file_output fout{filename, seqan3::fields{}}; + seqan3::sam_file_output fout{filename, seqan3::fields{}}; using t = decltype(fout); EXPECT_TRUE( - (std::is_same_v>)); // changed + (std::is_same_v>)); // changed EXPECT_TRUE((std::is_same_v)); EXPECT_TRUE((std::is_same_v)); } diff --git a/test/unit/io/sam_file/sam_file_record_test.cpp b/test/unit/io/sam_file/sam_file_record_test.cpp index a56dea58e07..0910639f292 100644 --- a/test/unit/io/sam_file/sam_file_record_test.cpp +++ b/test/unit/io/sam_file/sam_file_record_test.cpp @@ -37,15 +37,12 @@ using seqan3::test::operator""_cigar; struct sam_record : public ::testing::Test { - using alignment_t = std::pair>, std::vector>>; - using mate_t = std::tuple, std::optional, int32_t>; using types = seqan3::type_list, // seqan3::field::qual, int32_t, // seqan3::field::offset, - alignment_t, // seqan3::field::alignment, std::optional, // seqan3::field::ref_id, std::optional, // seqan3::field::ref_offset, // seqan3::field::header_ptr, @@ -60,7 +57,6 @@ struct sam_record : public ::testing::Test seqan3::field::seq, seqan3::field::qual, seqan3::field::offset, - seqan3::field::alignment, seqan3::field::ref_id, seqan3::field::ref_offset, seqan3::field::header_ptr, @@ -78,7 +74,6 @@ struct sam_record : public ::testing::Test /*.sequence =*/"ACGT"_dna5, /*.base_qualities =*/"!##$"_phred42, /*.sequence_position =*/1, - /*.alignment =*/alignment_t{}, /*.reference_id =*/0, // "ref" /*.reference_position =*/0, /*.header_ptr =*/nullptr, @@ -101,7 +96,6 @@ TEST_F(sam_record, definition_tuple_traits) seqan3::dna5_vector, std::vector, int32_t, - alignment_t, std::optional, std::optional, seqan3::sam_file_header> *, @@ -116,17 +110,16 @@ TEST_F(sam_record, definition_tuple_traits) EXPECT_SAME_TYPE(seqan3::dna5_vector, (std::tuple_element_t<1, record_type>)); EXPECT_SAME_TYPE(std::vector, (std::tuple_element_t<2, record_type>)); EXPECT_SAME_TYPE(int32_t, (std::tuple_element_t<3, record_type>)); - EXPECT_SAME_TYPE(alignment_t, (std::tuple_element_t<4, record_type>)); + EXPECT_SAME_TYPE(std::optional, (std::tuple_element_t<4, record_type>)); EXPECT_SAME_TYPE(std::optional, (std::tuple_element_t<5, record_type>)); - EXPECT_SAME_TYPE(std::optional, (std::tuple_element_t<6, record_type>)); - EXPECT_SAME_TYPE(seqan3::sam_file_header> *, (std::tuple_element_t<7, record_type>)); - EXPECT_SAME_TYPE(seqan3::sam_flag, (std::tuple_element_t<8, record_type>)); - EXPECT_SAME_TYPE(mate_t, (std::tuple_element_t<9, record_type>)); - EXPECT_SAME_TYPE(uint8_t, (std::tuple_element_t<10, record_type>)); - EXPECT_SAME_TYPE(std::vector, (std::tuple_element_t<11, record_type>)); - EXPECT_SAME_TYPE(seqan3::sam_tag_dictionary, (std::tuple_element_t<12, record_type>)); + EXPECT_SAME_TYPE(seqan3::sam_file_header> *, (std::tuple_element_t<6, record_type>)); + EXPECT_SAME_TYPE(seqan3::sam_flag, (std::tuple_element_t<7, record_type>)); + EXPECT_SAME_TYPE(mate_t, (std::tuple_element_t<8, record_type>)); + EXPECT_SAME_TYPE(uint8_t, (std::tuple_element_t<9, record_type>)); + EXPECT_SAME_TYPE(std::vector, (std::tuple_element_t<10, record_type>)); + EXPECT_SAME_TYPE(seqan3::sam_tag_dictionary, (std::tuple_element_t<11, record_type>)); - EXPECT_EQ(std::tuple_size_v, 13ul); + EXPECT_EQ(std::tuple_size_v, 12ul); EXPECT_TRUE(seqan3::tuple_like); } @@ -144,15 +137,14 @@ TEST_F(sam_record, get_by_index) EXPECT_RANGE_EQ(std::get<1>(r), "ACGT"_dna5); EXPECT_RANGE_EQ(std::get<2>(r), "!##$"_phred42); EXPECT_DOUBLE_EQ(std::get<3>(r), 1); - EXPECT_EQ(std::get<4>(r), alignment_t{}); - EXPECT_EQ(std::get<5>(r), 0); // "ref" - EXPECT_EQ(std::get<6>(r), 0); - EXPECT_EQ(std::get<7>(r), nullptr); - EXPECT_EQ(std::get<8>(r), seqan3::sam_flag{41u}); - EXPECT_EQ(std::get<9>(r), (mate_t{0, 9, 300})); - EXPECT_EQ(std::get<10>(r), 61u); - EXPECT_RANGE_EQ(std::get<11>(r), "1S1M1D1M1I"_cigar); - EXPECT_EQ(std::get<12>(r), seqan3::sam_tag_dictionary{}); + EXPECT_EQ(std::get<4>(r), 0); // "ref" + EXPECT_EQ(std::get<5>(r), 0); + EXPECT_EQ(std::get<6>(r), nullptr); + EXPECT_EQ(std::get<7>(r), seqan3::sam_flag{41u}); + EXPECT_EQ(std::get<8>(r), (mate_t{0, 9, 300})); + EXPECT_EQ(std::get<9>(r), 61u); + EXPECT_RANGE_EQ(std::get<10>(r), "1S1M1D1M1I"_cigar); + EXPECT_EQ(std::get<11>(r), seqan3::sam_tag_dictionary{}); } TEST_F(sam_record, get_by_type) @@ -163,7 +155,6 @@ TEST_F(sam_record, get_by_type) EXPECT_RANGE_EQ(std::get(r), "ACGT"_dna5); EXPECT_RANGE_EQ(std::get>(r), "!##$"_phred42); EXPECT_DOUBLE_EQ(std::get(r), 1); - EXPECT_EQ(std::get(r), alignment_t{}); // EXPECT_EQ(std::get>(r), 0); // "ref" // ambiguous // EXPECT_EQ(std::get>(r), 0); // ambiguous EXPECT_EQ(std::get> *>(r), nullptr); @@ -182,7 +173,6 @@ TEST_F(sam_record, get_by_member) EXPECT_RANGE_EQ(r.sequence(), "ACGT"_dna5); EXPECT_RANGE_EQ(r.base_qualities(), "!##$"_phred42); EXPECT_DOUBLE_EQ(r.sequence_position(), 1); - EXPECT_EQ(r.alignment(), alignment_t{}); EXPECT_EQ(r.reference_id(), 0); // "ref" EXPECT_EQ(r.reference_position(), 0); EXPECT_EQ(r.header_ptr(), nullptr); @@ -203,7 +193,6 @@ TEST_F(sam_record, member_types) EXPECT_SAME_TYPE(seqan3::dna5_vector &, decltype(r.sequence())); EXPECT_SAME_TYPE(std::vector &, decltype(r.base_qualities())); EXPECT_SAME_TYPE(int32_t &, decltype(r.sequence_position())); - EXPECT_SAME_TYPE(alignment_t &, decltype(r.alignment())); EXPECT_SAME_TYPE(std::optional &, decltype(r.reference_id())); EXPECT_SAME_TYPE(std::optional &, decltype(r.reference_position())); EXPECT_SAME_TYPE(seqan3::sam_file_header> *&, decltype(r.header_ptr())); @@ -219,7 +208,6 @@ TEST_F(sam_record, member_types) EXPECT_SAME_TYPE(seqan3::dna5_vector const &, decltype(std::as_const(r.sequence()))); EXPECT_SAME_TYPE(std::vector const &, decltype(std::as_const(r.base_qualities()))); EXPECT_SAME_TYPE(int32_t const &, decltype(std::as_const(r.sequence_position()))); - EXPECT_SAME_TYPE(alignment_t const &, decltype(std::as_const(r.alignment()))); EXPECT_SAME_TYPE(std::optional const &, decltype(std::as_const(r.reference_id()))); EXPECT_SAME_TYPE(std::optional const &, decltype(std::as_const(r.reference_position()))); EXPECT_SAME_TYPE(seqan3::sam_file_header> * const &, @@ -236,7 +224,6 @@ TEST_F(sam_record, member_types) EXPECT_SAME_TYPE(seqan3::dna5_vector &&, decltype(std::move(r.sequence()))); EXPECT_SAME_TYPE(std::vector &&, decltype(std::move(r.base_qualities()))); EXPECT_SAME_TYPE(int32_t &&, decltype(std::move(r.sequence_position()))); - EXPECT_SAME_TYPE(alignment_t &&, decltype(std::move(r.alignment()))); EXPECT_SAME_TYPE(std::optional &&, decltype(std::move(r.reference_id()))); EXPECT_SAME_TYPE(std::optional &&, decltype(std::move(r.reference_position()))); EXPECT_SAME_TYPE(seqan3::sam_file_header> * &&, decltype(std::move(r.header_ptr()))); @@ -252,7 +239,6 @@ TEST_F(sam_record, member_types) EXPECT_SAME_TYPE(seqan3::dna5_vector const &&, decltype(std::move(std::as_const(r.sequence())))); EXPECT_SAME_TYPE(std::vector const &&, decltype(std::move(std::as_const(r.base_qualities())))); EXPECT_SAME_TYPE(int32_t const &&, decltype(std::move(std::as_const(r.sequence_position())))); - EXPECT_SAME_TYPE(alignment_t const &&, decltype(std::move(std::as_const(r.alignment())))); EXPECT_SAME_TYPE(std::optional const &&, decltype(std::move(std::as_const(r.reference_id())))); EXPECT_SAME_TYPE(std::optional const &&, decltype(std::move(std::as_const(r.reference_position())))); EXPECT_SAME_TYPE(seqan3::sam_file_header> * const &&, diff --git a/test/unit/io/sam_file/sam_file_seek_test.cpp b/test/unit/io/sam_file/sam_file_seek_test.cpp index dd7e4dbcfa3..4c684735a6d 100644 --- a/test/unit/io/sam_file/sam_file_seek_test.cpp +++ b/test/unit/io/sam_file/sam_file_seek_test.cpp @@ -39,9 +39,6 @@ struct sam_file_seek_test : public ::testing::TestWithParam(record.alignment()), std::get<0>(expected_record.alignment())); - EXPECT_RANGE_EQ(std::get<1>(record.alignment()), std::get<1>(expected_record.alignment())); EXPECT_RANGE_EQ(record.cigar_sequence(), expected_record.cigar_sequence()); EXPECT_EQ(record.flag(), expected_record.flag()); EXPECT_EQ(record.mapping_quality(), expected_record.mapping_quality());