diff --git a/include/seqan3/alphabet/detail/convert.hpp b/include/seqan3/alphabet/detail/convert.hpp index 8150f189fb..d1d301186c 100755 --- a/include/seqan3/alphabet/detail/convert.hpp +++ b/include/seqan3/alphabet/detail/convert.hpp @@ -39,7 +39,8 @@ constexpr std::array> convert_through_char_representa [] () constexpr { std::array> ret{}; - for (typename in_t::rank_type i = 0; i < alphabet_size; ++i) + // for (decltype(alphabet_size) i = 0; ...) causes indefinite compilation :( + for (auto i = decltype(alphabet_size){0}; i < alphabet_size; ++i) assign_char_to(to_char(assign_rank_to(i, in_t{})), ret[i]); return ret; }() diff --git a/include/seqan3/io/alignment_file/format_bam.hpp b/include/seqan3/io/alignment_file/format_bam.hpp index 8fb4a0d253..582aad1eab 100644 --- a/include/seqan3/io/alignment_file/format_bam.hpp +++ b/include/seqan3/io/alignment_file/format_bam.hpp @@ -836,7 +836,7 @@ class alignment_file_output_format : alignment_file_output_format(std::ranges::distance(id) + 1), + /* l_read_name */ std::max(std::min(std::ranges::distance(id) + 1, 255), 2), /* mapq */ mapq, /* bin */ reg2bin(ref_offset.value_or(-1), std::ranges::distance(get<1>(align))), /* n_cigar_op */ static_cast(cigar.size()), @@ -866,7 +866,25 @@ class alignment_file_output_format : alignment_file_output_format && + std::ranges::SizedRange && + ForwardingRange) + { + id_it = header.ref_dict.find(std::span{std::ranges::data(id_source), + std::ranges::size(id_source)}); + } + else + { + using header_ref_id_type = std::remove_reference_t; + + static_assert(ImplicitlyConvertibleTo, + "The ref_id type is not convertible to the reference id information stored in the " + "reference dictionary of the header object."); + + id_it = header.ref_dict.find(id_source); + } if (id_it == header.ref_dict.end()) { @@ -874,6 +892,7 @@ class alignment_file_output_format : alignment_file_output_formatsecond; } } @@ -896,7 +915,10 @@ class alignment_file_output_format : alignment_file_output_format(&core), sizeof(core), stream_it); // write core - std::ranges::copy_n(std::ranges::begin(id), std::ranges::distance(id), stream_it); // write read id + if (std::ranges::distance(id) == 0) // empty id is represented as * for backward compatibility + stream_it = '*'; + else + std::ranges::copy_n(std::ranges::begin(id), core.l_read_name - 1, stream_it); // write read id stream_it = '\0'; // write cigar diff --git a/include/seqan3/io/alignment_file/format_sam.hpp b/include/seqan3/io/alignment_file/format_sam.hpp index e198f079d8..9ce4af6b5b 100644 --- a/include/seqan3/io/alignment_file/format_sam.hpp +++ b/include/seqan3/io/alignment_file/format_sam.hpp @@ -838,7 +838,7 @@ class alignment_file_input_format if (is_char<'S'>(*std::ranges::begin(stream_view))) // SQ (sequence dictionary) tag { ref_info_present_in_header = true; - std::string id; + value_type_t id; std::tuple info{}; parse_tag_value(id); // parse required SN (sequence name) tag @@ -988,7 +988,9 @@ class alignment_file_output_format typename align_type, typename qual_type, typename mate_type, - typename tag_dict_type> + typename tag_dict_type, + typename e_value_type, + typename bit_score_type> void write(stream_type & stream, alignment_file_output_options const & options, header_type && header, @@ -1004,8 +1006,8 @@ class alignment_file_output_format uint8_t mapq, mate_type && mate, tag_dict_type && tag_dict, - double SEQAN3_DOXYGEN_ONLY(e_value), - double SEQAN3_DOXYGEN_ONLY(bit_score)) + e_value_type && SEQAN3_DOXYGEN_ONLY(e_value), + bit_score_type && SEQAN3_DOXYGEN_ONLY(bit_score)) { /* Note the following general things: * @@ -1033,11 +1035,6 @@ class alignment_file_output_format "The id object must be a std::ranges::ForwardRange over " "letters that model seqan3::Alphabet."); - static_assert((std::ranges::ForwardRange && - Alphabet>), - "The ref_seq object must be a std::ranges::ForwardRange " - "over letters that model seqan3::Alphabet."); - if constexpr (!detail::decays_to_ignore_v) { static_assert((std::ranges::ForwardRange || @@ -1100,15 +1097,31 @@ class alignment_file_output_format !std::Integral> && !detail::is_type_specialisation_of_v, std::optional>) { - static_assert(ImplicitlyConvertibleTo, - "The ref_id type is not convertible to the reference id information stored in the " - "reference dictionary of the header object."); if (options.sam_require_header && !std::ranges::empty(ref_id)) { - if ((header.ref_dict).count(ref_id) == 0) // no reference id matched - throw format_error{std::string("The ref_id '") + std::string(ref_id) + - "' was not in the list of references"}; + auto id_it = header.ref_dict.end(); + + if constexpr (std::ranges::ContiguousRange && + std::ranges::SizedRange && + ForwardingRange) + { + id_it = header.ref_dict.find(std::span{std::ranges::data(ref_id), std::ranges::size(ref_id)}); + } + else + { + using header_ref_id_type = std::remove_reference_t; + + static_assert(ImplicitlyConvertibleTo, + "The ref_id type is not convertible to the reference id information stored in the " + "reference dictionary of the header object."); + + id_it = header.ref_dict.find(ref_id); + } + + if (id_it == header.ref_dict.end()) // no reference id matched + throw format_error{detail::to_string("The ref_id '", ref_id, "' was not in the list of references:", + header.ref_ids())}; } } diff --git a/include/seqan3/io/alignment_file/header.hpp b/include/seqan3/io/alignment_file/header.hpp index 5acc64e974..34364c6ece 100644 --- a/include/seqan3/io/alignment_file/header.hpp +++ b/include/seqan3/io/alignment_file/header.hpp @@ -16,8 +16,11 @@ #include #include +#include +#include #include #include +#include #include namespace seqan3 @@ -88,7 +91,9 @@ class alignment_file_header //!\brief Stream deleter with default behaviour (ownership assumed). static void ref_ids_deleter_default(ref_ids_type * ptr) { delete ptr; } //!\brief The key's type of ref_dict. - using key_type = std::ranges::all_view>; + using key_type = std::conditional_t>, + std::span const>, + all_view>>; //!\brief The pointer to reference ids information (non-owning if reference information is given). ref_ids_ptr_t ref_ids_ptr{new ref_ids_type{}, ref_ids_deleter_default}; diff --git a/include/seqan3/io/alignment_file/input.hpp b/include/seqan3/io/alignment_file/input.hpp index 0362af21ac..b1b23f9155 100644 --- a/include/seqan3/io/alignment_file/input.hpp +++ b/include/seqan3/io/alignment_file/input.hpp @@ -78,11 +78,8 @@ namespace seqan3 * \brief Type template of the seqan3::field::SEQ, a container template over `sequence_alphabet`; * must model seqan3::SequenceContainer. */ -/*!\typedef using id_alphabet - * \brief Alphabet of the characters for the seqan3::field::ID; must model seqan3::Alphabet. - */ /*!\typedef using id_container - * \brief Type template of the seqan3::field::ID, a container template over `id_alphabet`; + * \brief Type template of the seqan3::field::ID, a container template over `char`; * must model seqan3::SequenceContainer. */ /*!\typedef using quality_alphabet @@ -122,8 +119,7 @@ SEQAN3_CONCEPT AlignmentFileInputTraits = requires (t v) requires SequenceContainer>; // field::ID - requires WritableAlphabet; - requires SequenceContainer>; + requires SequenceContainer>; // field::QUAL requires WritableQualityAlphabet; @@ -200,9 +196,6 @@ struct alignment_file_input_default_traits template using sequence_container = std::vector<_sequence_alphabet>; - //!\brief The alphabet for an identifier string is char. - using id_alphabet = char; - //!\brief The string type for an identifier is std::basic_string. template using id_container = std::basic_string<_id_alphabet>; @@ -417,8 +410,7 @@ class alignment_file_input using sequence_type = typename traits_type::template sequence_container< typename traits_type::sequence_alphabet>; //!\brief The type of field::ID (default std::string by default). - using id_type = typename traits_type::template id_container< - typename traits_type::id_alphabet>; + using id_type = typename traits_type::template id_container; //!\brief The type of field::OFFSET is fixed to int32_t. using offset_type = int32_t; /*!\brief The type of field::REF_SEQ (default depends on construction). @@ -913,16 +905,27 @@ class alignment_file_input template void set_references(typename traits_type::ref_ids & ref_ids, ref_sequences_t && ref_sequences) { - assert(std::ranges::size(ref_ids) == std::ranges::size(ref_sequences)); + assert(std::ranges::distance(ref_ids) == std::ranges::distance(ref_sequences)); header_ptr = std::unique_ptr{std::make_unique(ref_ids)}; reference_sequences_ptr = &ref_sequences; // initialise reference map and ref_dict if ref_ids are non-empty - for (size_t idx = 0; idx < ref_ids.size(); ++idx) + for (int32_t idx = 0; idx < std::ranges::distance(ref_ids); ++idx) { - header_ptr->ref_id_info.emplace_back(std::ranges::size(ref_sequences[idx]), ""); - header_ptr->ref_dict[header_ptr->ref_ids()[idx]] = idx; + header_ptr->ref_id_info.emplace_back(std::ranges::distance(ref_sequences[idx]), ""); + + if constexpr (std::ranges::ContiguousRange> && + std::ranges::SizedRange> && + ForwardingRange>) + { + auto && id = header_ptr->ref_ids()[idx]; + header_ptr->ref_dict[std::span{std::ranges::data(id), std::ranges::size(id)}] = idx; + } + else + { + header_ptr->ref_dict[header_ptr->ref_ids()[idx]] = idx; + } } } //!\} diff --git a/include/seqan3/io/alignment_file/output.hpp b/include/seqan3/io/alignment_file/output.hpp index e04d497835..8c08a824fc 100644 --- a/include/seqan3/io/alignment_file/output.hpp +++ b/include/seqan3/io/alignment_file/output.hpp @@ -389,16 +389,7 @@ class alignment_file_output alignment_file_output{filename, selected_field_ids{}} { - assert(std::ranges::size(ref_ids) == std::ranges::size(ref_lengths)); - - header_ptr = std::make_unique>(std::forward(ref_ids)); - - // fill ref_dict - for (size_t idx = 0; idx < std::ranges::size(ref_ids); ++idx) - { - header_ptr->ref_id_info.push_back({ref_lengths[idx], ""}); - header_ptr->ref_dict[(header_ptr->ref_ids()[idx])] = idx; - } + initialise_header_information(ref_ids, ref_lengths); } /*!\brief Construct from an existing stream and with specified format. @@ -436,16 +427,7 @@ class alignment_file_output selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) : alignment_file_output{std::forward(stream), file_format{}, selected_field_ids{}} { - assert(std::ranges::size(ref_ids) == std::ranges::size(ref_lengths)); - - header_ptr = std::make_unique>(std::forward(ref_ids)); - - // fill ref_dict - for (uint32_t idx = 0; idx < std::ranges::size(ref_ids); ++idx) - { - header_ptr->ref_id_info.emplace_back(ref_lengths[idx], ""); - header_ptr->ref_dict[header_ptr->ref_ids()[idx]] = idx; - } + initialise_header_information(ref_ids, ref_lengths); } //!\} @@ -763,6 +745,32 @@ class alignment_file_output //!\brief The file header object (will be set on construction). std::unique_ptr header_ptr; + //!\brief Fill the header reference dictionary, with the given info. + template + void initialise_header_information(ref_ids_type_ && ref_ids, ref_lengths_type && ref_lengths) + { + assert(std::ranges::size(ref_ids) == std::ranges::size(ref_lengths)); + + header_ptr = std::make_unique>(std::forward(ref_ids)); + + for (int32_t idx = 0; idx < std::ranges::distance(ref_ids); ++idx) + { + header_ptr->ref_id_info.emplace_back(ref_lengths[idx], ""); + + if constexpr (std::ranges::ContiguousRange> && + std::ranges::SizedRange> && + ForwardingRange>) + { + auto && id = header_ptr->ref_ids()[idx]; + header_ptr->ref_dict[std::span{std::ranges::data(id), std::ranges::size(id)}] = idx; + } + else + { + header_ptr->ref_dict[header_ptr->ref_ids()[idx]] = idx; + } + } + } + //!\brief Write record to format. template void write_record(record_header_ptr_t && record_header_ptr, pack_type && ...remainder) diff --git a/test/unit/io/alignment_file/alignment_file_format_test_template.hpp b/test/unit/io/alignment_file/alignment_file_format_test_template.hpp index 457d49201e..a8dc07fc45 100644 --- a/test/unit/io/alignment_file/alignment_file_format_test_template.hpp +++ b/test/unit/io/alignment_file/alignment_file_format_test_template.hpp @@ -461,6 +461,29 @@ TYPED_TEST_P(alignment_file_write, output_concept) EXPECT_TRUE((AlignmentFileOutputFormat)); } +TYPED_TEST_P(alignment_file_write, write_empty_members) +{ + detail::alignment_file_output_format format; + + std::ostringstream ostream; + { + alignment_file_header header{std::vector{this->ref_id}}; + header.ref_id_info.push_back({this->ref_seq.size(), ""}); + header.ref_dict[this->ref_id] = 0; + + using default_align_t = std::pair>, std::span>>; + using default_mate_t = std::tuple, int32_t>; + + ASSERT_NO_THROW(format.write(ostream, output_options, header, std::string_view{}, std::string_view{}, + std::string_view{}, 0, std::string_view{}, std::string_view{}, + std::optional{std::nullopt}, default_align_t{}, 0, 0, + default_mate_t{}, sam_tag_dictionary{}, 0, 0)); + } + + ostream.flush(); + EXPECT_EQ(ostream.str(), this->empty_input); +} + TYPED_TEST_P(alignment_file_write, default_options_all_members_specified) { detail::alignment_file_output_format format; @@ -485,6 +508,47 @@ TYPED_TEST_P(alignment_file_write, default_options_all_members_specified) EXPECT_EQ(ostream.str(), this->simple_three_reads_output); } +TYPED_TEST_P(alignment_file_write, write_ref_id_with_different_types) +{ + detail::alignment_file_output_format format; + + std::ostringstream ostream; + + alignment_file_header header{std::vector{this->ref_id}}; + header.ref_id_info.push_back({this->ref_seq.size(), ""}); + header.ref_dict[this->ref_id] = 0; + + this->tag_dicts[0]["NM"_tag] = 7; + this->tag_dicts[0]["AS"_tag] = 2; + this->tag_dicts[1]["xy"_tag] = std::vector{3,4,5}; + + // header ref_id_type is std::string + + // std::string + ASSERT_NO_THROW(format.write(ostream, output_options, header, this->seqs[0], this->quals[0], this->ids[0], + this->offsets[0], std::string{}, + /*----------------------->*/ this->ref_id, + this->ref_offsets[0], this->alignments[0], + this->flags[0], this->mapqs[0], this->mates[0], this->tag_dicts[0], 0, 0)); + // std::string_view + ASSERT_NO_THROW(format.write(ostream, output_options, header, this->seqs[1], this->quals[1], this->ids[1], + this->offsets[1], std::string{}, + /*----------------------->*/ std::string_view{this->ref_id}, + this->ref_offsets[1], this->alignments[1], + this->flags[1], this->mapqs[1], this->mates[1], this->tag_dicts[1], 0, 0)); + + // view on string + ASSERT_NO_THROW(format.write(ostream, output_options, header, this->seqs[2], this->quals[2], this->ids[2], + this->offsets[2], std::string{}, + /*----------------------->*/ this->ref_id | view::take(20), + this->ref_offsets[2], this->alignments[2], + this->flags[2], this->mapqs[2], this->mates[2], this->tag_dicts[2], 0, 0)); + + ostream.flush(); + + EXPECT_EQ(ostream.str(), this->simple_three_reads_output); +} + TYPED_TEST_P(alignment_file_write, with_header) { detail::alignment_file_output_format format; @@ -595,8 +659,10 @@ REGISTER_TYPED_TEST_CASE_P(alignment_file_read, format_error_ref_id_not_in_reference_information); REGISTER_TYPED_TEST_CASE_P(alignment_file_write, + write_empty_members, output_concept, default_options_all_members_specified, + write_ref_id_with_different_types, with_header, special_cases, format_errors); diff --git a/test/unit/io/alignment_file/format_bam_test.cpp b/test/unit/io/alignment_file/format_bam_test.cpp index cb33f3e6ab..5c67c27c5d 100644 --- a/test/unit/io/alignment_file/format_bam_test.cpp +++ b/test/unit/io/alignment_file/format_bam_test.cpp @@ -146,7 +146,7 @@ struct alignment_file_read : public alignment_file_data '\x00', '\x04', '\x00', '\x00', '\x00', '\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00', '\x22', '\x00', '\x00', '\x00', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\x02', '\x00', '\x48', '\x12', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\xFF', '\xFF', - '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\x00', '\x00', '\x00', '\x00', '\x2A', '\x00', '\x0A' + '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\x00', '\x00', '\x00', '\x00', '\x2A', '\x00' }; std::string empty_cigar{ diff --git a/test/unit/io/alignment_file/format_sam_test.cpp b/test/unit/io/alignment_file/format_sam_test.cpp index 901079c9f9..d111b2e51d 100644 --- a/test/unit/io/alignment_file/format_sam_test.cpp +++ b/test/unit/io/alignment_file/format_sam_test.cpp @@ -54,7 +54,7 @@ read3 43 ref 3 63 1S1M1D1M1I1M1I1D1M1S = 10 300 GGAGTATA !!*+,-./ "\tbf:B:f,3.5,0.1,43.8\n" "read3\t43\tref\t3\t63\t1S1M1D1M1I1M1I1D1M1S\tref\t10\t300\tGGAGTATA\t!!*+,-./\n"}; - std::string empty_input{"*\t0\t*\t0\t0\t*\t*\t0\t0\t*\t*\n"}; + std::string empty_input{"@HD\tVN:1.6\n@SQ\tSN:ref\tLN:34\n*\t0\t*\t0\t0\t*\t*\t0\t0\t*\t*\n"}; std::string empty_cigar{"read1\t41\tref\t1\t61\t*\tref\t10\t300\tACGT\t!##$\n"};