From 89fd17bc2402eb634ccf29b1beb00b5b89042742 Mon Sep 17 00:00:00 2001 From: Enrico Seiler Date: Mon, 3 Jun 2024 15:57:57 +0200 Subject: [PATCH 1/2] [MISC] Revert tag handling --- doc/cookbook/index.md | 1 - .../io/sam_file/detail/format_sam_base.hpp | 12 +- include/seqan3/io/sam_file/format_bam.hpp | 41 ++++--- include/seqan3/io/sam_file/format_sam.hpp | 41 ++++--- include/seqan3/io/sam_file/input_options.hpp | 16 +-- .../io/sam_file/sam_file_input_options.cpp | 114 ------------------ .../io/sam_file/sam_file_input_options.err | 8 -- .../sam_file_input_options.err.license | 3 - .../io/sam_file/sam_file_input_options.out | 10 -- .../sam_file_input_options.out.license | 3 - test/unit/io/sam_file/format_sam_test.cpp | 14 +-- .../sam_file_format_test_template.hpp | 65 +--------- 12 files changed, 50 insertions(+), 278 deletions(-) delete mode 100644 test/snippet/io/sam_file/sam_file_input_options.cpp delete mode 100644 test/snippet/io/sam_file/sam_file_input_options.err delete mode 100644 test/snippet/io/sam_file/sam_file_input_options.err.license delete mode 100644 test/snippet/io/sam_file/sam_file_input_options.out delete mode 100644 test/snippet/io/sam_file/sam_file_input_options.out.license diff --git a/doc/cookbook/index.md b/doc/cookbook/index.md index c5d533e68d..6122ea2a79 100644 --- a/doc/cookbook/index.md +++ b/doc/cookbook/index.md @@ -439,7 +439,6 @@ Search for keywords with `Strg + F`. \include test/snippet/io/sam_file/sam_file_input_front.cpp \include test/snippet/io/sam_file/sam_file_input_get_header.cpp \include test/snippet/io/sam_file/sam_file_input_my_traits.cpp -\include test/snippet/io/sam_file/sam_file_input_options.cpp \include test/snippet/io/sam_file/sam_file_input_reading_custom_fields.cpp \include test/snippet/io/sam_file/sam_file_input_reading_filter.cpp \include test/snippet/io/sam_file/sam_file_input_reading_move_record.cpp diff --git a/include/seqan3/io/sam_file/detail/format_sam_base.hpp b/include/seqan3/io/sam_file/detail/format_sam_base.hpp index 10f3b209c8..e84a85fdc6 100644 --- a/include/seqan3/io/sam_file/detail/format_sam_base.hpp +++ b/include/seqan3/io/sam_file/detail/format_sam_base.hpp @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -83,11 +82,10 @@ class format_sam_base template void read_arithmetic_field(std::string_view const & str, arithmetic_target_type & arithmetic_target); - template + template void read_header(stream_view_type && stream_view, sam_file_header & hdr, - ref_seqs_type & /*ref_id_to_pos_map*/, - sam_file_input_options const & options); + ref_seqs_type & /*ref_id_to_pos_map*/); template void write_header(stream_t & stream, sam_file_output_options const & options, header_type & header); @@ -260,7 +258,6 @@ inline void format_sam_base::read_arithmetic_field(std::string_view const & str, * \tparam stream_view_type The type of the stream as a view. * \param[in, out] stream_view The stream view to iterate over. * \param[in, out] hdr The header (as a pointer) to store the parsed values. - * \param[in] options The options to alter the parsing process. * * \throws seqan3::format_error if any unexpected character or format is encountered. * @@ -275,11 +272,10 @@ inline void format_sam_base::read_arithmetic_field(std::string_view const & str, * If any unknown tag was encountered, a warning will be emitted to std::cerr. This can be configured with * seqan3::sam_file_input_options::stream_warnings_to. */ -template +template inline void format_sam_base::read_header(stream_view_type && stream_view, sam_file_header & hdr, - ref_seqs_type & /*ref_id_to_pos_map*/, - sam_file_input_options const & options) + ref_seqs_type & /*ref_id_to_pos_map*/) { auto it = std::ranges::begin(stream_view); auto end = std::ranges::end(stream_view); diff --git a/include/seqan3/io/sam_file/format_bam.hpp b/include/seqan3/io/sam_file/format_bam.hpp index f4bc7b058a..4bfbeb2f7e 100644 --- a/include/seqan3/io/sam_file/format_bam.hpp +++ b/include/seqan3/io/sam_file/format_bam.hpp @@ -84,7 +84,7 @@ class format_bam : private detail::format_sam_base typename e_value_type, typename bit_score_type> void read_alignment_record(stream_type & stream, - sam_file_input_options const & options, + sam_file_input_options const & SEQAN3_DOXYGEN_ONLY(options), ref_seqs_type & ref_seqs, sam_file_header & header, stream_pos_type & position_buffer, @@ -257,24 +257,25 @@ template -inline void format_bam::read_alignment_record(stream_type & stream, - sam_file_input_options const & options, - ref_seqs_type & ref_seqs, - sam_file_header & header, - stream_pos_type & position_buffer, - seq_type & seq, - qual_type & qual, - id_type & id, - ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq), - ref_id_type & ref_id, - ref_offset_type & ref_offset, - cigar_type & cigar_vector, - flag_type & flag, - mapq_type & mapq, - mate_type & mate, - tag_dict_type & tag_dict, - e_value_type & SEQAN3_DOXYGEN_ONLY(e_value), - bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score)) +inline void +format_bam::read_alignment_record(stream_type & stream, + sam_file_input_options const & SEQAN3_DOXYGEN_ONLY(options), + ref_seqs_type & ref_seqs, + sam_file_header & header, + stream_pos_type & position_buffer, + seq_type & seq, + qual_type & qual, + id_type & id, + ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq), + ref_id_type & ref_id, + ref_offset_type & ref_offset, + cigar_type & cigar_vector, + flag_type & flag, + mapq_type & mapq, + mate_type & mate, + tag_dict_type & tag_dict, + e_value_type & SEQAN3_DOXYGEN_ONLY(e_value), + bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score)) { static_assert(detail::decays_to_ignore_v || detail::is_type_specialisation_of_v, @@ -304,7 +305,7 @@ inline void format_bam::read_alignment_record(stream_type & stream, read_integral_byte_field(stream_view, l_text); if (l_text > 0) // header text is present - read_header(stream_view | detail::take_exactly_or_throw(l_text), header, ref_seqs, options); + read_header(stream_view | detail::take_exactly_or_throw(l_text), header, ref_seqs); read_integral_byte_field(stream_view, n_ref); diff --git a/include/seqan3/io/sam_file/format_sam.hpp b/include/seqan3/io/sam_file/format_sam.hpp index 081a732c2c..511462f5e5 100644 --- a/include/seqan3/io/sam_file/format_sam.hpp +++ b/include/seqan3/io/sam_file/format_sam.hpp @@ -165,7 +165,7 @@ class format_sam : protected detail::format_sam_base typename e_value_type, typename bit_score_type> void read_alignment_record(stream_type & stream, - sam_file_input_options const & options, + sam_file_input_options const & SEQAN3_DOXYGEN_ONLY(options), ref_seqs_type & ref_seqs, sam_file_header & header, stream_pos_type & position_buffer, @@ -355,24 +355,25 @@ template -inline void format_sam::read_alignment_record(stream_type & stream, - sam_file_input_options const & options, - ref_seqs_type & ref_seqs, - sam_file_header & header, - stream_pos_type & position_buffer, - seq_type & seq, - qual_type & qual, - id_type & id, - ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq), - ref_id_type & ref_id, - ref_offset_type & ref_offset, - cigar_type & cigar_vector, - flag_type & flag, - mapq_type & mapq, - mate_type & mate, - tag_dict_type & tag_dict, - e_value_type & SEQAN3_DOXYGEN_ONLY(e_value), - bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score)) +inline void +format_sam::read_alignment_record(stream_type & stream, + sam_file_input_options const & SEQAN3_DOXYGEN_ONLY(options), + ref_seqs_type & ref_seqs, + sam_file_header & header, + stream_pos_type & position_buffer, + seq_type & seq, + qual_type & qual, + id_type & id, + ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq), + ref_id_type & ref_id, + ref_offset_type & ref_offset, + cigar_type & cigar_vector, + flag_type & flag, + mapq_type & mapq, + mate_type & mate, + tag_dict_type & tag_dict, + e_value_type & SEQAN3_DOXYGEN_ONLY(e_value), + bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score)) { static_assert(detail::decays_to_ignore_v || detail::is_type_specialisation_of_v, @@ -389,7 +390,7 @@ inline void format_sam::read_alignment_record(stream_type & stream, // ------------------------------------------------------------------------------------------------------------- if (is_char<'@'>(*stream_it)) // we always read the header if present { - read_header(stream_view, header, ref_seqs, options); + read_header(stream_view, header, ref_seqs); if (std::ranges::begin(stream_view) == std::ranges::end(stream_view)) // file has no records return; diff --git a/include/seqan3/io/sam_file/input_options.hpp b/include/seqan3/io/sam_file/input_options.hpp index 59a6aba985..698654c870 100644 --- a/include/seqan3/io/sam_file/input_options.hpp +++ b/include/seqan3/io/sam_file/input_options.hpp @@ -9,8 +9,6 @@ #pragma once -#include - #include namespace seqan3 @@ -23,18 +21,6 @@ namespace seqan3 */ template struct sam_file_input_options -{ - /*!\brief The stream to write warnings to. Defaults to std::cerr. - * \details - * ### Example - * \include test/snippet/io/sam_file/sam_file_input_options.cpp - * Output to std::cerr: - * \include test/snippet/io/sam_file/sam_file_input_options.err - * Output to std::cout: - * \include test/snippet/io/sam_file/sam_file_input_options.out - * \experimentalapi{Experimental since version 3.4.} - */ - std::ostream * stream_warnings_to{std::addressof(std::cerr)}; -}; +{}; } // namespace seqan3 diff --git a/test/snippet/io/sam_file/sam_file_input_options.cpp b/test/snippet/io/sam_file/sam_file_input_options.cpp deleted file mode 100644 index e17404ecbf..0000000000 --- a/test/snippet/io/sam_file/sam_file_input_options.cpp +++ /dev/null @@ -1,114 +0,0 @@ -// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin -// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik -// SPDX-License-Identifier: CC0-1.0 - -#include - -#include - -// A helper struct to create a temporary file and remove it when it goes out of scope. -struct temporary_file -{ - std::filesystem::path const path{std::filesystem::temp_directory_path() / "warnings.txt"}; - - temporary_file() - { - std::ofstream file{path}; // Create file - } - temporary_file(temporary_file const &) = delete; - temporary_file & operator=(temporary_file const &) = delete; - temporary_file(temporary_file &&) = delete; - temporary_file & operator=(temporary_file &&) = delete; - ~temporary_file() - { - std::filesystem::remove(path); - } - - std::string read_content() const - { - std::ifstream file{path}; - return std::string{std::istreambuf_iterator{file}, std::istreambuf_iterator{}}; - } -}; - -static constexpr auto sam_file_raw = R"(@HD VN:1.6 pb:5.0.0 ot:ter -@SQ SN:ref LN:34 -)"; - -static auto get_sam_file_input() -{ - return seqan3::sam_file_input{std::istringstream{sam_file_raw}, seqan3::format_sam{}}; -} - -void defaults_to_cerr() -{ - auto fin = get_sam_file_input(); - auto it = fin.begin(); -} - -void redirect_to_cout() -{ - auto fin = get_sam_file_input(); - fin.options.stream_warnings_to = std::addressof(std::cout); // Equivalent to `= &std::cout;` - auto it = fin.begin(); -} - -void redirect_to_file() -{ - temporary_file tmp_file{}; - auto fin = get_sam_file_input(); - - { // Inner scope to close file before reading - std::ofstream warning_file{tmp_file.path}; - fin.options.stream_warnings_to = std::addressof(warning_file); // Equivalent to `= &warning_file;` - auto it = fin.begin(); - } - - std::cout << "File content:\n" << tmp_file.read_content(); -} - -void silence_warnings() -{ - auto fin = get_sam_file_input(); - fin.options.stream_warnings_to = nullptr; - auto it = fin.begin(); -} - -void filter() -{ - auto fin = get_sam_file_input(); - std::stringstream stream{}; - fin.options.stream_warnings_to = std::addressof(stream); // Equivalent to `= &stream;` - auto it = fin.begin(); - - for (std::string line{}; std::getline(stream, line);) - { - // If "pb" is not found in the warning, print it to cerr. - if (line.find("pb") == std::string::npos) // C++23: `!line.contains("pb")` - std::cerr << line << '\n'; - } -} - -void print_section(std::string_view const section) -{ - std::cout << "### " << section << " ###\n"; - std::cerr << "### " << section << " ###\n"; -} - -int main() -{ - print_section("defaults_to_cerr"); - defaults_to_cerr(); - - print_section("redirect_to_cout"); - redirect_to_cout(); - - print_section("redirect_to_file"); - redirect_to_file(); - - print_section("silence_warnings"); - silence_warnings(); - - print_section("filter"); - filter(); -} diff --git a/test/snippet/io/sam_file/sam_file_input_options.err b/test/snippet/io/sam_file/sam_file_input_options.err deleted file mode 100644 index 08a5fea99b..0000000000 --- a/test/snippet/io/sam_file/sam_file_input_options.err +++ /dev/null @@ -1,8 +0,0 @@ -### defaults_to_cerr ### -Unsupported tag found in SAM header @HD: "pb:5.0.0" -Unsupported tag found in SAM header @HD: "ot:ter" -### redirect_to_cout ### -### redirect_to_file ### -### silence_warnings ### -### filter ### -Unsupported tag found in SAM header @HD: "ot:ter" diff --git a/test/snippet/io/sam_file/sam_file_input_options.err.license b/test/snippet/io/sam_file/sam_file_input_options.err.license deleted file mode 100644 index b8b3e60969..0000000000 --- a/test/snippet/io/sam_file/sam_file_input_options.err.license +++ /dev/null @@ -1,3 +0,0 @@ -SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin -SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik -SPDX-License-Identifier: CC0-1.0 diff --git a/test/snippet/io/sam_file/sam_file_input_options.out b/test/snippet/io/sam_file/sam_file_input_options.out deleted file mode 100644 index 809310587e..0000000000 --- a/test/snippet/io/sam_file/sam_file_input_options.out +++ /dev/null @@ -1,10 +0,0 @@ -### defaults_to_cerr ### -### redirect_to_cout ### -Unsupported tag found in SAM header @HD: "pb:5.0.0" -Unsupported tag found in SAM header @HD: "ot:ter" -### redirect_to_file ### -File content: -Unsupported tag found in SAM header @HD: "pb:5.0.0" -Unsupported tag found in SAM header @HD: "ot:ter" -### silence_warnings ### -### filter ### diff --git a/test/snippet/io/sam_file/sam_file_input_options.out.license b/test/snippet/io/sam_file/sam_file_input_options.out.license deleted file mode 100644 index b8b3e60969..0000000000 --- a/test/snippet/io/sam_file/sam_file_input_options.out.license +++ /dev/null @@ -1,3 +0,0 @@ -SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin -SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik -SPDX-License-Identifier: CC0-1.0 diff --git a/test/unit/io/sam_file/format_sam_test.cpp b/test/unit/io/sam_file/format_sam_test.cpp index 8b724974c9..8d2f519570 100644 --- a/test/unit/io/sam_file/format_sam_test.cpp +++ b/test/unit/io/sam_file/format_sam_test.cpp @@ -173,29 +173,19 @@ TEST_F(sam_format, header_errors) seqan3::sam_file_input fin{istream, seqan3::format_sam{}}; EXPECT_NO_THROW(fin.begin()); } - { // user defined tags should not trigger errors, but print warnings to cerr + { // user defined tags should not trigger errors std::string header_str{ "@HD\tVN:1.6\tVB:user_tag\tSB:user_tag\tGB:user_tag\tpb:user_tag\n" "@SQ\tSN:ref2\tLN:243199373\tSB:user_tag\tLB:user_tag\tpb:user_tag\n" "@RG\tID:U0a_A2_L1\tIB:user_tag\tpb:user_tag\n" "@PG\tID:qc\tIB:user_tag\tPB:user_tag\tCB:user_tag\tDB:user_tag\tVB:user_tag\tpb:user_tag\n"}; - std::string expected_cerr{"Unsupported tag found in SAM header @HD: \"VB:user_tag\"\n" - "Unsupported tag found in SAM header @HD: \"SB:user_tag\"\n" - "Unsupported tag found in SAM header @HD: \"GB:user_tag\"\n" - "Unsupported tag found in SAM header @HD: \"pb:user_tag\"\n" - "Unsupported tag found in SAM header @PG: \"IB:user_tag\"\n" - "Unsupported tag found in SAM header @PG: \"PB:user_tag\"\n" - "Unsupported tag found in SAM header @PG: \"CB:user_tag\"\n" - "Unsupported tag found in SAM header @PG: \"DB:user_tag\"\n" - "Unsupported tag found in SAM header @PG: \"VB:user_tag\"\n" - "Unsupported tag found in SAM header @PG: \"pb:user_tag\"\n"}; std::istringstream istream(header_str); seqan3::sam_file_input fin{istream, seqan3::format_sam{}}; testing::internal::CaptureStderr(); EXPECT_NO_THROW(fin.begin()); - EXPECT_EQ(testing::internal::GetCapturedStderr(), expected_cerr); + EXPECT_EQ(testing::internal::GetCapturedStderr(), ""); } { // missing VN tag in @HD std::string header_str{"@HD\n"}; diff --git a/test/unit/io/sam_file/sam_file_format_test_template.hpp b/test/unit/io/sam_file/sam_file_format_test_template.hpp index b160972fb4..2ef33776ee 100644 --- a/test/unit/io/sam_file/sam_file_format_test_template.hpp +++ b/test/unit/io/sam_file/sam_file_format_test_template.hpp @@ -19,7 +19,6 @@ #include #include #include -#include using seqan3::operator""_cigar_operation; using seqan3::operator""_dna5; @@ -359,69 +358,7 @@ TYPED_TEST_P(sam_file_read, issue2423) } TYPED_TEST_P(sam_file_read, unknown_header_tag) -{ - constexpr std::string_view expected_warning = "Unsupported tag found in SAM header @HD: \"pb:5.0.0\"\n" - "Unsupported tag found in SAM header @HD: \"otter\"\n" - "Unsupported tag found in SAM header @PG: \"pb:5.0.0\"\n" - "Unsupported tag found in SAM header @PG: \"otter\"\n"; - // Default: Warnings to cerr - { - typename TestFixture::stream_type istream{this->unknown_tag_header}; - seqan3::sam_file_input fin{istream, TypeParam{}}; - testing::internal::CaptureStdout(); - testing::internal::CaptureStderr(); - EXPECT_NO_THROW(fin.begin()); - EXPECT_EQ(testing::internal::GetCapturedStdout(), ""); - EXPECT_EQ(testing::internal::GetCapturedStderr(), expected_warning); - } - // Redirect to cout - { - typename TestFixture::stream_type istream{this->unknown_tag_header}; - seqan3::sam_file_input fin{istream, TypeParam{}}; - fin.options.stream_warnings_to = std::addressof(std::cout); - testing::internal::CaptureStdout(); - testing::internal::CaptureStderr(); - EXPECT_NO_THROW(fin.begin()); - EXPECT_EQ(testing::internal::GetCapturedStdout(), expected_warning); - EXPECT_EQ(testing::internal::GetCapturedStderr(), ""); - } - // Redirect to file - { - seqan3::test::tmp_directory tmp{}; - auto filename = tmp.path() / "warnings.txt"; - - // Scope for ofstream-RAII - { - std::ofstream warning_file{filename}; - ASSERT_TRUE(warning_file.good()); - - typename TestFixture::stream_type istream{this->unknown_tag_header}; - seqan3::sam_file_input fin{istream, TypeParam{}}; - fin.options.stream_warnings_to = std::addressof(warning_file); - testing::internal::CaptureStdout(); - testing::internal::CaptureStderr(); - EXPECT_NO_THROW(fin.begin()); - EXPECT_EQ(testing::internal::GetCapturedStdout(), ""); - EXPECT_EQ(testing::internal::GetCapturedStderr(), ""); - } - - std::ifstream warning_file{filename}; - ASSERT_TRUE(warning_file.good()); - std::string content{std::istreambuf_iterator(warning_file), std::istreambuf_iterator()}; - EXPECT_EQ(content, expected_warning); - } - // Silence - { - typename TestFixture::stream_type istream{this->unknown_tag_header}; - seqan3::sam_file_input fin{istream, TypeParam{}}; - fin.options.stream_warnings_to = nullptr; - testing::internal::CaptureStdout(); - testing::internal::CaptureStderr(); - EXPECT_NO_THROW(fin.begin()); - EXPECT_EQ(testing::internal::GetCapturedStdout(), ""); - EXPECT_EQ(testing::internal::GetCapturedStderr(), ""); - } -} +{} // ---------------------------------------------------------------------------- // sam_file_write From d0eabcf8adbe5a5674bee1211cb0da501229c884 Mon Sep 17 00:00:00 2001 From: Enrico Seiler Date: Mon, 3 Jun 2024 15:58:33 +0200 Subject: [PATCH 2/2] [FIX] Accept user-defined tags --- .../io/sam_file/detail/format_sam_base.hpp | 33 +++-- include/seqan3/io/sam_file/header.hpp | 3 + include/seqan3/io/sam_file/input_options.hpp | 2 + test/unit/io/sam_file/format_bam_test.cpp | 114 ++++++++++-------- test/unit/io/sam_file/format_sam_test.cpp | 11 +- .../sam_file_format_test_template.hpp | 18 ++- 6 files changed, 103 insertions(+), 78 deletions(-) diff --git a/include/seqan3/io/sam_file/detail/format_sam_base.hpp b/include/seqan3/io/sam_file/detail/format_sam_base.hpp index e84a85fdc6..c3e8553364 100644 --- a/include/seqan3/io/sam_file/detail/format_sam_base.hpp +++ b/include/seqan3/io/sam_file/detail/format_sam_base.hpp @@ -269,8 +269,11 @@ inline void format_sam_base::read_arithmetic_field(std::string_view const & str, * The function throws a seqan3::format_error if the format is not in a correct state (e.g. required fields are not * given), but throwing might occur downstream of the actual error. * - * If any unknown tag was encountered, a warning will be emitted to std::cerr. This can be configured with - * seqan3::sam_file_input_options::stream_warnings_to. + * Any user-defined tags are not checked for correctness ([TAG]:[VALUE]) and are stored as strings: + * * HD: seqan3::sam_file_header::user_tags + * * SQ: seqan3::sam_file_header::ref_id_info + * * RG: seqan3::sam_file_header::read_groups + * * PG: seqan3::sam_file_header::program_infos / seqan3::sam_file_program_info_t::user_tags */ template inline void format_sam_base::read_header(stream_view_type && stream_view, @@ -337,20 +340,6 @@ inline void format_sam_base::read_header(stream_view_type && stream_view, read_forward_range_field(string_buffer, value); }; - auto consume_unsupported_tag_and_print_warning = - [&](char const * const header_tag, std::array const raw_tag) - { - // Not using `copy_next_tag_value_into_buffer` because we do not care whether the tag is valid. - // E.g., `pb5.0.0` instead of `pb:5.0.0`, would break the parsing if we used `copy_next_tag_value_into_buffer`. - take_until_predicate(is_char<'\t'> || is_char<'\n'>); - - if (options.stream_warnings_to == nullptr) - return; - - *options.stream_warnings_to << "Unsupported tag found in SAM header @" << header_tag << ": \"" << raw_tag[0] - << raw_tag[1] << string_buffer << "\"\n"; - }; - while (it != end && is_char<'@'>(*it)) { ++it; // skip @ @@ -387,9 +376,9 @@ inline void format_sam_base::read_header(stream_view_type && stream_view, header_entry = std::addressof(hdr.grouping); break; } - default: // unsupported header tag + default: // unknown/user tag { - consume_unsupported_tag_and_print_warning("HD", raw_tag); + parse_and_append_unhandled_tag_to_string(hdr.user_tags, raw_tag); } } @@ -561,7 +550,7 @@ inline void format_sam_base::read_header(stream_view_type && stream_view, } default: // unsupported header tag { - consume_unsupported_tag_and_print_warning("PG", raw_tag); + parse_and_append_unhandled_tag_to_string(tmp.user_tags, raw_tag); } } @@ -665,6 +654,9 @@ format_sam_base::write_header(stream_t & stream, sam_file_output_options const & if (!header.grouping.empty()) stream << "\tGO:" << header.grouping; + if (!header.user_tags.empty()) + stream << '\t' << header.user_tags; + detail::write_eol(stream_it, options.add_carriage_return); // (@SQ) Write Reference Sequence Dictionary lines [required]. @@ -715,6 +707,9 @@ format_sam_base::write_header(stream_t & stream, sam_file_output_options const & if (!program.version.empty()) stream << "\tVN:" << program.version; + if (!program.user_tags.empty()) + stream << '\t' << program.user_tags; + detail::write_eol(stream_it, options.add_carriage_return); } diff --git a/include/seqan3/io/sam_file/header.hpp b/include/seqan3/io/sam_file/header.hpp index 5947af304d..f52f7aa0cf 100644 --- a/include/seqan3/io/sam_file/header.hpp +++ b/include/seqan3/io/sam_file/header.hpp @@ -34,6 +34,7 @@ struct sam_file_program_info_t std::string previous; //!< The id of the previous program if program calls were chained. std::string description; //!< A description of the program and/or program call. std::string version; //!< The program/tool version. + std::string user_tags; //!< Additional user-defined tags. }; /*!\brief Stores the header information of SAM/BAM files. @@ -213,6 +214,8 @@ class sam_file_header * * **SM:** Sample. Use pool name where a pool is being sequenced. */ std::vector> read_groups; + + std::string user_tags; //!< Additional user-defined tags. }; } // namespace seqan3 diff --git a/include/seqan3/io/sam_file/input_options.hpp b/include/seqan3/io/sam_file/input_options.hpp index 698654c870..6f2185b312 100644 --- a/include/seqan3/io/sam_file/input_options.hpp +++ b/include/seqan3/io/sam_file/input_options.hpp @@ -17,6 +17,8 @@ namespace seqan3 /*!\brief The options type defines various option members that influence the behaviour of all or some formats. * \ingroup io_sam_file * + * \note As of now, there are no specific options for the SAM format. This class may be used in the future for possible + * SAM parsing extensions. * \remark For a complete overview, take a look at \ref io_sam_file */ template diff --git a/test/unit/io/sam_file/format_bam_test.cpp b/test/unit/io/sam_file/format_bam_test.cpp index 82f3c2a154..e3d436efe3 100644 --- a/test/unit/io/sam_file/format_bam_test.cpp +++ b/test/unit/io/sam_file/format_bam_test.cpp @@ -38,14 +38,24 @@ struct sam_file_read : public sam_file_data '\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00'}; std::string unknown_tag_header{ - '\x42', '\x41', '\x4d', '\x01', '\x4b', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', - '\x4e', '\x3a', '\x31', '\x2e', '\x36', '\x09', '\x70', '\x62', '\x3a', '\x35', '\x2e', '\x30', '\x2e', - '\x30', '\x09', '\x6f', '\x74', '\x74', '\x65', '\x72', '\x0a', '\x40', '\x53', '\x51', '\x09', '\x53', - '\x4e', '\x3a', '\x72', '\x65', '\x66', '\x09', '\x4c', '\x4e', '\x3a', '\x33', '\x34', '\x0a', '\x40', - '\x50', '\x47', '\x09', '\x49', '\x44', '\x3a', '\x6e', '\x6f', '\x76', '\x6f', '\x61', '\x6c', '\x69', - '\x67', '\x6e', '\x09', '\x70', '\x62', '\x3a', '\x35', '\x2e', '\x30', '\x2e', '\x30', '\x09', '\x6f', - '\x74', '\x74', '\x65', '\x72', '\x0a', '\x01', '\x00', '\x00', '\x00', '\x04', '\x00', '\x00', '\x00', - '\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00'}; + '\x42', '\x41', '\x4d', '\x01', '\xe3', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4e', + '\x3a', '\x31', '\x2e', '\x36', '\x09', '\x70', '\x62', '\x3a', '\x35', '\x2e', '\x30', '\x2e', '\x30', '\x09', + '\x6f', '\x74', '\x74', '\x65', '\x72', '\x0a', '\x40', '\x53', '\x51', '\x09', '\x53', '\x4e', '\x3a', '\x72', + '\x65', '\x66', '\x09', '\x4c', '\x4e', '\x3a', '\x33', '\x34', '\x09', '\x70', '\x62', '\x3a', '\x35', '\x2e', + '\x30', '\x2e', '\x30', '\x09', '\x6f', '\x74', '\x74', '\x65', '\x72', '\x0a', '\x40', '\x52', '\x47', '\x09', + '\x49', '\x44', '\x3a', '\x52', '\x31', '\x09', '\x70', '\x62', '\x3a', '\x35', '\x2e', '\x30', '\x2e', '\x30', + '\x09', '\x6f', '\x74', '\x74', '\x65', '\x72', '\x0a', '\x40', '\x50', '\x47', '\x09', '\x49', '\x44', '\x3a', + '\x6e', '\x6f', '\x76', '\x6f', '\x61', '\x6c', '\x69', '\x67', '\x6e', '\x09', '\x70', '\x62', '\x3a', '\x35', + '\x2e', '\x30', '\x2e', '\x30', '\x09', '\x6f', '\x74', '\x74', '\x65', '\x72', '\x0a', '\x40', '\x50', '\x47', + '\x09', '\x49', '\x44', '\x3a', '\x73', '\x61', '\x6d', '\x74', '\x6f', '\x6f', '\x6c', '\x73', '\x09', '\x50', + '\x4e', '\x3a', '\x73', '\x61', '\x6d', '\x74', '\x6f', '\x6f', '\x6c', '\x73', '\x09', '\x50', '\x50', '\x3a', + '\x6e', '\x6f', '\x76', '\x6f', '\x61', '\x6c', '\x69', '\x67', '\x6e', '\x09', '\x56', '\x4e', '\x3a', '\x31', + '\x2e', '\x31', '\x33', '\x09', '\x43', '\x4c', '\x3a', '\x73', '\x61', '\x6d', '\x74', '\x6f', '\x6f', '\x6c', + '\x73', '\x20', '\x76', '\x69', '\x65', '\x77', '\x20', '\x2d', '\x48', '\x20', '\x2d', '\x62', '\x20', '\x2f', + '\x68', '\x6f', '\x6d', '\x65', '\x2f', '\x69', '\x6e', '\x66', '\x72', '\x69', '\x2f', '\x64', '\x65', '\x76', + '\x65', '\x6c', '\x6f', '\x70', '\x2f', '\x73', '\x65', '\x71', '\x61', '\x6e', '\x33', '\x2f', '\x74', '\x65', + '\x73', '\x74', '\x2e', '\x73', '\x61', '\x6d', '\x0a', '\x01', '\x00', '\x00', '\x00', '\x04', '\x00', '\x00', + '\x00', '\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00'}; std::string big_header_input{ '\x42', '\x41', '\x4D', '\x01', '\xB7', '\x01', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4E', @@ -316,47 +326,51 @@ struct sam_file_read : public sam_file_data '\x66', '\x66', '\x66', '\x46', '\x40', '\x7A', '\x7A', '\x5A', '\x73', '\x74', '\x72', '\x00', '\x0A'}; std::string verbose_output{ - '\x42', '\x41', '\x4d', '\x01', '\xa6', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4e', + '\x42', '\x41', '\x4d', '\x01', '\xe2', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4e', '\x3a', '\x31', '\x2e', '\x36', '\x09', '\x53', '\x4f', '\x3a', '\x75', '\x6e', '\x6b', '\x6e', '\x6f', '\x77', - '\x6e', '\x09', '\x47', '\x4f', '\x3a', '\x6e', '\x6f', '\x6e', '\x65', '\x0a', '\x40', '\x53', '\x51', '\x09', - '\x53', '\x4e', '\x3a', '\x72', '\x65', '\x66', '\x09', '\x4c', '\x4e', '\x3a', '\x33', '\x34', '\x09', '\x41', - '\x4e', '\x3a', '\x6f', '\x74', '\x68', '\x65', '\x72', '\x5f', '\x6e', '\x61', '\x6d', '\x65', '\x0a', '\x40', - '\x52', '\x47', '\x09', '\x49', '\x44', '\x3a', '\x67', '\x72', '\x6f', '\x75', '\x70', '\x31', '\x09', '\x44', - '\x53', '\x3a', '\x6d', '\x6f', '\x72', '\x65', '\x20', '\x69', '\x6e', '\x66', '\x6f', '\x0a', '\x40', '\x50', - '\x47', '\x09', '\x49', '\x44', '\x3a', '\x70', '\x72', '\x6f', '\x67', '\x31', '\x09', '\x50', '\x4e', '\x3a', - '\x63', '\x6f', '\x6f', '\x6c', '\x5f', '\x70', '\x72', '\x6f', '\x67', '\x72', '\x61', '\x6d', '\x09', '\x43', - '\x4c', '\x3a', '\x2e', '\x2f', '\x70', '\x72', '\x6f', '\x67', '\x31', '\x09', '\x50', '\x50', '\x3a', '\x61', - '\x09', '\x44', '\x53', '\x3a', '\x62', '\x09', '\x56', '\x4e', '\x3a', '\x63', '\x0a', '\x40', '\x43', '\x4f', - '\x09', '\x54', '\x68', '\x69', '\x73', '\x20', '\x69', '\x73', '\x20', '\x61', '\x20', '\x63', '\x6f', '\x6d', - '\x6d', '\x65', '\x6e', '\x74', '\x2e', '\x0a', '\x01', '\x00', '\x00', '\x00', '\x04', '\x00', '\x00', '\x00', - '\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00', '\x64', '\x00', '\x00', '\x00', '\x00', '\x00', - '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x06', '\x3d', '\x49', '\x12', '\x05', '\x00', '\x29', '\x00', - '\x04', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x09', '\x00', '\x00', '\x00', '\x2c', '\x01', - '\x00', '\x00', '\x72', '\x65', '\x61', '\x64', '\x31', '\x00', '\x14', '\x00', '\x00', '\x00', '\x10', '\x00', - '\x00', '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x11', '\x00', '\x00', '\x00', - '\x12', '\x48', '\x00', '\x02', '\x02', '\x03', '\x41', '\x53', '\x43', '\x02', '\x43', '\x43', '\x53', '\x2c', - '\x01', '\x4e', '\x4d', '\x63', '\xf9', '\x61', '\x61', '\x41', '\x63', '\x63', '\x63', '\x73', '\xd4', '\xfe', - '\x66', '\x66', '\x66', '\x66', '\x66', '\x46', '\x40', '\x7a', '\x7a', '\x5a', '\x73', '\x74', '\x72', '\x00', - '\xaf', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x01', '\x00', '\x00', '\x00', '\x06', '\x3e', - '\x49', '\x12', '\x06', '\x00', '\x2a', '\x00', '\x09', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', - '\x09', '\x00', '\x00', '\x00', '\x2c', '\x01', '\x00', '\x00', '\x72', '\x65', '\x61', '\x64', '\x32', '\x00', - '\x15', '\x00', '\x00', '\x00', '\x70', '\x00', '\x00', '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00', - '\x00', '\x00', '\x14', '\x00', '\x00', '\x00', '\x25', '\x00', '\x00', '\x00', '\x14', '\x42', '\x84', '\xf1', - '\x40', '\x00', '\x02', '\x02', '\x03', '\x05', '\x06', '\x07', '\x08', '\x09', '\x62', '\x43', '\x42', '\x43', - '\x02', '\x00', '\x00', '\x00', '\x03', '\xc8', '\x62', '\x49', '\x42', '\x49', '\x01', '\x00', '\x00', '\x00', - '\x00', '\xd8', '\x94', '\x11', '\x62', '\x53', '\x42', '\x53', '\x03', '\x00', '\x00', '\x00', '\x2c', '\x01', - '\x28', '\x00', '\xf4', '\x01', '\x62', '\x63', '\x42', '\x63', '\x01', '\x00', '\x00', '\x00', '\xfd', '\x62', - '\x66', '\x42', '\x66', '\x03', '\x00', '\x00', '\x00', '\x00', '\x00', '\x60', '\x40', '\xcd', '\xcc', '\xcc', - '\x3d', '\x33', '\x33', '\x2f', '\x42', '\x62', '\x69', '\x42', '\x69', '\x03', '\x00', '\x00', '\x00', '\xfd', - '\xff', '\xff', '\xff', '\xc8', '\x00', '\x00', '\x00', '\x30', '\xfe', '\xfe', '\xff', '\x62', '\x73', '\x42', - '\x73', '\x03', '\x00', '\x00', '\x00', '\xfd', '\xff', '\xc8', '\x00', '\xd4', '\xfe', '\x5a', '\x00', '\x00', - '\x00', '\x00', '\x00', '\x00', '\x00', '\x02', '\x00', '\x00', '\x00', '\x06', '\x3f', '\x49', '\x12', '\x0a', - '\x00', '\x2b', '\x00', '\x08', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x09', '\x00', '\x00', - '\x00', '\x2c', '\x01', '\x00', '\x00', '\x72', '\x65', '\x61', '\x64', '\x33', '\x00', '\x14', '\x00', '\x00', - '\x00', '\x10', '\x00', '\x00', '\x00', '\x16', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x11', - '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x11', '\x00', '\x00', '\x00', '\x12', '\x00', '\x00', - '\x00', '\x10', '\x00', '\x00', '\x00', '\x14', '\x00', '\x00', '\x00', '\x44', '\x14', '\x81', '\x81', '\x00', - '\x00', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e'}; + '\x6e', '\x09', '\x47', '\x4f', '\x3a', '\x6e', '\x6f', '\x6e', '\x65', '\x09', '\x70', '\x62', '\x3a', '\x35', + '\x2e', '\x30', '\x2e', '\x30', '\x09', '\x6f', '\x74', '\x74', '\x65', '\x72', '\x0a', '\x40', '\x53', '\x51', + '\x09', '\x53', '\x4e', '\x3a', '\x72', '\x65', '\x66', '\x09', '\x4c', '\x4e', '\x3a', '\x33', '\x34', '\x09', + '\x41', '\x4e', '\x3a', '\x6f', '\x74', '\x68', '\x65', '\x72', '\x5f', '\x6e', '\x61', '\x6d', '\x65', '\x09', + '\x70', '\x62', '\x3a', '\x35', '\x2e', '\x30', '\x2e', '\x30', '\x09', '\x6f', '\x74', '\x74', '\x65', '\x72', + '\x0a', '\x40', '\x52', '\x47', '\x09', '\x49', '\x44', '\x3a', '\x67', '\x72', '\x6f', '\x75', '\x70', '\x31', + '\x09', '\x44', '\x53', '\x3a', '\x6d', '\x6f', '\x72', '\x65', '\x20', '\x69', '\x6e', '\x66', '\x6f', '\x09', + '\x70', '\x62', '\x3a', '\x35', '\x2e', '\x30', '\x2e', '\x30', '\x09', '\x6f', '\x74', '\x74', '\x65', '\x72', + '\x0a', '\x40', '\x50', '\x47', '\x09', '\x49', '\x44', '\x3a', '\x70', '\x72', '\x6f', '\x67', '\x31', '\x09', + '\x50', '\x4e', '\x3a', '\x63', '\x6f', '\x6f', '\x6c', '\x5f', '\x70', '\x72', '\x6f', '\x67', '\x72', '\x61', + '\x6d', '\x09', '\x43', '\x4c', '\x3a', '\x2e', '\x2f', '\x70', '\x72', '\x6f', '\x67', '\x31', '\x09', '\x50', + '\x50', '\x3a', '\x61', '\x09', '\x44', '\x53', '\x3a', '\x62', '\x09', '\x56', '\x4e', '\x3a', '\x63', '\x09', + '\x70', '\x62', '\x3a', '\x35', '\x2e', '\x30', '\x2e', '\x30', '\x09', '\x6f', '\x74', '\x74', '\x65', '\x72', + '\x0a', '\x40', '\x43', '\x4f', '\x09', '\x54', '\x68', '\x69', '\x73', '\x20', '\x69', '\x73', '\x20', '\x61', + '\x20', '\x63', '\x6f', '\x6d', '\x6d', '\x65', '\x6e', '\x74', '\x2e', '\x0a', '\x01', '\x00', '\x00', '\x00', + '\x04', '\x00', '\x00', '\x00', '\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00', '\x64', '\x00', + '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x06', '\x3d', '\x49', '\x12', + '\x05', '\x00', '\x29', '\x00', '\x04', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x09', '\x00', + '\x00', '\x00', '\x2c', '\x01', '\x00', '\x00', '\x72', '\x65', '\x61', '\x64', '\x31', '\x00', '\x14', '\x00', + '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', + '\x11', '\x00', '\x00', '\x00', '\x12', '\x48', '\x00', '\x02', '\x02', '\x03', '\x41', '\x53', '\x43', '\x02', + '\x43', '\x43', '\x53', '\x2c', '\x01', '\x4e', '\x4d', '\x63', '\xf9', '\x61', '\x61', '\x41', '\x63', '\x63', + '\x63', '\x73', '\xd4', '\xfe', '\x66', '\x66', '\x66', '\x66', '\x66', '\x46', '\x40', '\x7a', '\x7a', '\x5a', + '\x73', '\x74', '\x72', '\x00', '\xaf', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x01', '\x00', + '\x00', '\x00', '\x06', '\x3e', '\x49', '\x12', '\x06', '\x00', '\x2a', '\x00', '\x09', '\x00', '\x00', '\x00', + '\x00', '\x00', '\x00', '\x00', '\x09', '\x00', '\x00', '\x00', '\x2c', '\x01', '\x00', '\x00', '\x72', '\x65', + '\x61', '\x64', '\x32', '\x00', '\x15', '\x00', '\x00', '\x00', '\x70', '\x00', '\x00', '\x00', '\x12', '\x00', + '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x14', '\x00', '\x00', '\x00', '\x25', '\x00', '\x00', '\x00', + '\x14', '\x42', '\x84', '\xf1', '\x40', '\x00', '\x02', '\x02', '\x03', '\x05', '\x06', '\x07', '\x08', '\x09', + '\x62', '\x43', '\x42', '\x43', '\x02', '\x00', '\x00', '\x00', '\x03', '\xc8', '\x62', '\x49', '\x42', '\x49', + '\x01', '\x00', '\x00', '\x00', '\x00', '\xd8', '\x94', '\x11', '\x62', '\x53', '\x42', '\x53', '\x03', '\x00', + '\x00', '\x00', '\x2c', '\x01', '\x28', '\x00', '\xf4', '\x01', '\x62', '\x63', '\x42', '\x63', '\x01', '\x00', + '\x00', '\x00', '\xfd', '\x62', '\x66', '\x42', '\x66', '\x03', '\x00', '\x00', '\x00', '\x00', '\x00', '\x60', + '\x40', '\xcd', '\xcc', '\xcc', '\x3d', '\x33', '\x33', '\x2f', '\x42', '\x62', '\x69', '\x42', '\x69', '\x03', + '\x00', '\x00', '\x00', '\xfd', '\xff', '\xff', '\xff', '\xc8', '\x00', '\x00', '\x00', '\x30', '\xfe', '\xfe', + '\xff', '\x62', '\x73', '\x42', '\x73', '\x03', '\x00', '\x00', '\x00', '\xfd', '\xff', '\xc8', '\x00', '\xd4', + '\xfe', '\x5a', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x02', '\x00', '\x00', '\x00', '\x06', + '\x3f', '\x49', '\x12', '\x0a', '\x00', '\x2b', '\x00', '\x08', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', + '\x00', '\x09', '\x00', '\x00', '\x00', '\x2c', '\x01', '\x00', '\x00', '\x72', '\x65', '\x61', '\x64', '\x33', + '\x00', '\x14', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x16', '\x00', '\x00', '\x00', '\x10', + '\x00', '\x00', '\x00', '\x11', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x11', '\x00', '\x00', + '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x14', '\x00', '\x00', '\x00', '\x44', + '\x14', '\x81', '\x81', '\x00', '\x00', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e'}; std::string special_output{ '\x42', '\x41', '\x4D', '\x01', '\x1C', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4E', @@ -593,12 +607,12 @@ TEST_F(bam_format, too_long_cigar_string_read) TEST_F(bam_format, too_long_cigar_string_write) { - auto read = seqan3::views::repeat_n('T'_dna5, 70'000); + auto read = seqan3::views::repeat_n('T'_dna5, 70000); auto ref = seqan3::views::repeat_n('A'_dna5, 2 * read.size() - 1); // create a cigar with more than 65535 cigar elements std::vector too_long_cigar{}; - for (size_t i = 0; i < 69'999; ++i) + for (size_t i = 0; i < 69999; ++i) { too_long_cigar.push_back({1, 'M'_cigar_operation}); too_long_cigar.push_back({1, 'D'_cigar_operation}); diff --git a/test/unit/io/sam_file/format_sam_test.cpp b/test/unit/io/sam_file/format_sam_test.cpp index 8d2f519570..f69cca440e 100644 --- a/test/unit/io/sam_file/format_sam_test.cpp +++ b/test/unit/io/sam_file/format_sam_test.cpp @@ -24,7 +24,8 @@ struct sam_file_read : public sam_file_data // However, encountering such a tag should not break the parsing. std::string unknown_tag_header{ R"(@HD VN:1.6 pb:5.0.0 otter -@SQ SN:ref LN:34 +@SQ SN:ref LN:34 pb:5.0.0 otter +@RG ID:R1 pb:5.0.0 otter @PG ID:novoalign pb:5.0.0 otter )"}; @@ -87,10 +88,10 @@ read3 43 ref 3 63 1S1M1P1M1I1M1I1D1M1S ref 10 300 GGAGTATA !!*+,-./ // ----------------------------------------------------------------------------------------------------------------- std::string verbose_output{ - R"(@HD VN:1.6 SO:unknown GO:none -@SQ SN:ref LN:34 AN:other_name -@RG ID:group1 DS:more info -@PG ID:prog1 PN:cool_program CL:./prog1 PP:a DS:b VN:c + R"(@HD VN:1.6 SO:unknown GO:none pb:5.0.0 otter +@SQ SN:ref LN:34 AN:other_name pb:5.0.0 otter +@RG ID:group1 DS:more info pb:5.0.0 otter +@PG ID:prog1 PN:cool_program CL:./prog1 PP:a DS:b VN:c pb:5.0.0 otter @CO This is a comment. read1 41 ref 1 61 1S1M1D1M1I ref 10 300 ACGT !##$ AS:i:2 CC:i:300 NM:i:-7 aa:A:c cc:i:-300 ff:f:3.1 zz:Z:str read2 42 ref 2 62 1H7M1D1M1S2H ref 10 300 AGGCTGNAG !##$&'()* bC:B:C,3,200 bI:B:I,294967296 bS:B:S,300,40,500 bc:B:c,-3 bf:B:f,3.5,0.1,43.8 bi:B:i,-3,200,-66000 bs:B:s,-3,200,-300 diff --git a/test/unit/io/sam_file/sam_file_format_test_template.hpp b/test/unit/io/sam_file/sam_file_format_test_template.hpp index 2ef33776ee..6f9b3bc01a 100644 --- a/test/unit/io/sam_file/sam_file_format_test_template.hpp +++ b/test/unit/io/sam_file/sam_file_format_test_template.hpp @@ -358,7 +358,16 @@ TYPED_TEST_P(sam_file_read, issue2423) } TYPED_TEST_P(sam_file_read, unknown_header_tag) -{} +{ + typename TestFixture::stream_type istream{this->unknown_tag_header}; + seqan3::sam_file_input fin{istream, TypeParam{}}; + ASSERT_NO_THROW(fin.begin()); + + EXPECT_EQ(fin.header().user_tags, "pb:5.0.0\totter"); // HD + EXPECT_EQ(std::get<1>(fin.header().ref_id_info.front()), "pb:5.0.0\totter"); // SQ + EXPECT_EQ(std::get<1>(fin.header().read_groups.front()), "pb:5.0.0\totter"); // RG + EXPECT_EQ(fin.header().program_infos.front().user_tags, "pb:5.0.0\totter"); // PG +} // ---------------------------------------------------------------------------- // sam_file_write @@ -510,11 +519,12 @@ TYPED_TEST_P(sam_file_write, with_header) seqan3::sam_file_header header{std::vector{this->ref_id}}; header.sorting = "unknown"; header.grouping = "none"; - header.ref_id_info.push_back({this->ref_seq.size(), "AN:other_name"}); + header.ref_id_info.push_back({this->ref_seq.size(), "AN:other_name\tpb:5.0.0\totter"}); header.ref_dict[this->ref_id] = 0; - header.program_infos.push_back({"prog1", "cool_program", "./prog1", "a", "b", "c"}); - header.read_groups.emplace_back("group1", "DS:more info"); + header.program_infos.push_back({"prog1", "cool_program", "./prog1", "a", "b", "c", "pb:5.0.0\totter"}); + header.read_groups.emplace_back("group1", "DS:more info\tpb:5.0.0\totter"); header.comments.push_back("This is a comment."); + header.user_tags = "pb:5.0.0\totter"; { seqan3::sam_file_output fout{this->ostream, TypeParam{}, sam_fields{}};