From 32112bd38b85c37a8997c16063b02a699c1b1e12 Mon Sep 17 00:00:00 2001 From: Enrico Seiler Date: Mon, 3 Jun 2024 15:58:33 +0200 Subject: [PATCH] [FIX] Accept user-defined tags --- .../io/sam_file/detail/format_sam_base.hpp | 33 +++--- include/seqan3/io/sam_file/header.hpp | 3 + include/seqan3/io/sam_file/input_options.hpp | 2 + test/unit/io/sam_file/format_bam_test.cpp | 110 ++++++++++-------- test/unit/io/sam_file/format_sam_test.cpp | 11 +- .../sam_file_format_test_template.hpp | 18 ++- 6 files changed, 101 insertions(+), 76 deletions(-) diff --git a/include/seqan3/io/sam_file/detail/format_sam_base.hpp b/include/seqan3/io/sam_file/detail/format_sam_base.hpp index e84a85fdc6..c3e8553364 100644 --- a/include/seqan3/io/sam_file/detail/format_sam_base.hpp +++ b/include/seqan3/io/sam_file/detail/format_sam_base.hpp @@ -269,8 +269,11 @@ inline void format_sam_base::read_arithmetic_field(std::string_view const & str, * The function throws a seqan3::format_error if the format is not in a correct state (e.g. required fields are not * given), but throwing might occur downstream of the actual error. * - * If any unknown tag was encountered, a warning will be emitted to std::cerr. This can be configured with - * seqan3::sam_file_input_options::stream_warnings_to. + * Any user-defined tags are not checked for correctness ([TAG]:[VALUE]) and are stored as strings: + * * HD: seqan3::sam_file_header::user_tags + * * SQ: seqan3::sam_file_header::ref_id_info + * * RG: seqan3::sam_file_header::read_groups + * * PG: seqan3::sam_file_header::program_infos / seqan3::sam_file_program_info_t::user_tags */ template inline void format_sam_base::read_header(stream_view_type && stream_view, @@ -337,20 +340,6 @@ inline void format_sam_base::read_header(stream_view_type && stream_view, read_forward_range_field(string_buffer, value); }; - auto consume_unsupported_tag_and_print_warning = - [&](char const * const header_tag, std::array const raw_tag) - { - // Not using `copy_next_tag_value_into_buffer` because we do not care whether the tag is valid. - // E.g., `pb5.0.0` instead of `pb:5.0.0`, would break the parsing if we used `copy_next_tag_value_into_buffer`. - take_until_predicate(is_char<'\t'> || is_char<'\n'>); - - if (options.stream_warnings_to == nullptr) - return; - - *options.stream_warnings_to << "Unsupported tag found in SAM header @" << header_tag << ": \"" << raw_tag[0] - << raw_tag[1] << string_buffer << "\"\n"; - }; - while (it != end && is_char<'@'>(*it)) { ++it; // skip @ @@ -387,9 +376,9 @@ inline void format_sam_base::read_header(stream_view_type && stream_view, header_entry = std::addressof(hdr.grouping); break; } - default: // unsupported header tag + default: // unknown/user tag { - consume_unsupported_tag_and_print_warning("HD", raw_tag); + parse_and_append_unhandled_tag_to_string(hdr.user_tags, raw_tag); } } @@ -561,7 +550,7 @@ inline void format_sam_base::read_header(stream_view_type && stream_view, } default: // unsupported header tag { - consume_unsupported_tag_and_print_warning("PG", raw_tag); + parse_and_append_unhandled_tag_to_string(tmp.user_tags, raw_tag); } } @@ -665,6 +654,9 @@ format_sam_base::write_header(stream_t & stream, sam_file_output_options const & if (!header.grouping.empty()) stream << "\tGO:" << header.grouping; + if (!header.user_tags.empty()) + stream << '\t' << header.user_tags; + detail::write_eol(stream_it, options.add_carriage_return); // (@SQ) Write Reference Sequence Dictionary lines [required]. @@ -715,6 +707,9 @@ format_sam_base::write_header(stream_t & stream, sam_file_output_options const & if (!program.version.empty()) stream << "\tVN:" << program.version; + if (!program.user_tags.empty()) + stream << '\t' << program.user_tags; + detail::write_eol(stream_it, options.add_carriage_return); } diff --git a/include/seqan3/io/sam_file/header.hpp b/include/seqan3/io/sam_file/header.hpp index 5947af304d..f52f7aa0cf 100644 --- a/include/seqan3/io/sam_file/header.hpp +++ b/include/seqan3/io/sam_file/header.hpp @@ -34,6 +34,7 @@ struct sam_file_program_info_t std::string previous; //!< The id of the previous program if program calls were chained. std::string description; //!< A description of the program and/or program call. std::string version; //!< The program/tool version. + std::string user_tags; //!< Additional user-defined tags. }; /*!\brief Stores the header information of SAM/BAM files. @@ -213,6 +214,8 @@ class sam_file_header * * **SM:** Sample. Use pool name where a pool is being sequenced. */ std::vector> read_groups; + + std::string user_tags; //!< Additional user-defined tags. }; } // namespace seqan3 diff --git a/include/seqan3/io/sam_file/input_options.hpp b/include/seqan3/io/sam_file/input_options.hpp index 698654c870..6f2185b312 100644 --- a/include/seqan3/io/sam_file/input_options.hpp +++ b/include/seqan3/io/sam_file/input_options.hpp @@ -17,6 +17,8 @@ namespace seqan3 /*!\brief The options type defines various option members that influence the behaviour of all or some formats. * \ingroup io_sam_file * + * \note As of now, there are no specific options for the SAM format. This class may be used in the future for possible + * SAM parsing extensions. * \remark For a complete overview, take a look at \ref io_sam_file */ template diff --git a/test/unit/io/sam_file/format_bam_test.cpp b/test/unit/io/sam_file/format_bam_test.cpp index 82f3c2a154..33668367b3 100644 --- a/test/unit/io/sam_file/format_bam_test.cpp +++ b/test/unit/io/sam_file/format_bam_test.cpp @@ -38,14 +38,24 @@ struct sam_file_read : public sam_file_data '\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00'}; std::string unknown_tag_header{ - '\x42', '\x41', '\x4d', '\x01', '\x4b', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', - '\x4e', '\x3a', '\x31', '\x2e', '\x36', '\x09', '\x70', '\x62', '\x3a', '\x35', '\x2e', '\x30', '\x2e', - '\x30', '\x09', '\x6f', '\x74', '\x74', '\x65', '\x72', '\x0a', '\x40', '\x53', '\x51', '\x09', '\x53', - '\x4e', '\x3a', '\x72', '\x65', '\x66', '\x09', '\x4c', '\x4e', '\x3a', '\x33', '\x34', '\x0a', '\x40', - '\x50', '\x47', '\x09', '\x49', '\x44', '\x3a', '\x6e', '\x6f', '\x76', '\x6f', '\x61', '\x6c', '\x69', - '\x67', '\x6e', '\x09', '\x70', '\x62', '\x3a', '\x35', '\x2e', '\x30', '\x2e', '\x30', '\x09', '\x6f', - '\x74', '\x74', '\x65', '\x72', '\x0a', '\x01', '\x00', '\x00', '\x00', '\x04', '\x00', '\x00', '\x00', - '\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00'}; + '\x42', '\x41', '\x4d', '\x01', '\xe3', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4e', + '\x3a', '\x31', '\x2e', '\x36', '\x09', '\x70', '\x62', '\x3a', '\x35', '\x2e', '\x30', '\x2e', '\x30', '\x09', + '\x6f', '\x74', '\x74', '\x65', '\x72', '\x0a', '\x40', '\x53', '\x51', '\x09', '\x53', '\x4e', '\x3a', '\x72', + '\x65', '\x66', '\x09', '\x4c', '\x4e', '\x3a', '\x33', '\x34', '\x09', '\x70', '\x62', '\x3a', '\x35', '\x2e', + '\x30', '\x2e', '\x30', '\x09', '\x6f', '\x74', '\x74', '\x65', '\x72', '\x0a', '\x40', '\x52', '\x47', '\x09', + '\x49', '\x44', '\x3a', '\x52', '\x31', '\x09', '\x70', '\x62', '\x3a', '\x35', '\x2e', '\x30', '\x2e', '\x30', + '\x09', '\x6f', '\x74', '\x74', '\x65', '\x72', '\x0a', '\x40', '\x50', '\x47', '\x09', '\x49', '\x44', '\x3a', + '\x6e', '\x6f', '\x76', '\x6f', '\x61', '\x6c', '\x69', '\x67', '\x6e', '\x09', '\x70', '\x62', '\x3a', '\x35', + '\x2e', '\x30', '\x2e', '\x30', '\x09', '\x6f', '\x74', '\x74', '\x65', '\x72', '\x0a', '\x40', '\x50', '\x47', + '\x09', '\x49', '\x44', '\x3a', '\x73', '\x61', '\x6d', '\x74', '\x6f', '\x6f', '\x6c', '\x73', '\x09', '\x50', + '\x4e', '\x3a', '\x73', '\x61', '\x6d', '\x74', '\x6f', '\x6f', '\x6c', '\x73', '\x09', '\x50', '\x50', '\x3a', + '\x6e', '\x6f', '\x76', '\x6f', '\x61', '\x6c', '\x69', '\x67', '\x6e', '\x09', '\x56', '\x4e', '\x3a', '\x31', + '\x2e', '\x31', '\x33', '\x09', '\x43', '\x4c', '\x3a', '\x73', '\x61', '\x6d', '\x74', '\x6f', '\x6f', '\x6c', + '\x73', '\x20', '\x76', '\x69', '\x65', '\x77', '\x20', '\x2d', '\x48', '\x20', '\x2d', '\x62', '\x20', '\x2f', + '\x68', '\x6f', '\x6d', '\x65', '\x2f', '\x69', '\x6e', '\x66', '\x72', '\x69', '\x2f', '\x64', '\x65', '\x76', + '\x65', '\x6c', '\x6f', '\x70', '\x2f', '\x73', '\x65', '\x71', '\x61', '\x6e', '\x33', '\x2f', '\x74', '\x65', + '\x73', '\x74', '\x2e', '\x73', '\x61', '\x6d', '\x0a', '\x01', '\x00', '\x00', '\x00', '\x04', '\x00', '\x00', + '\x00', '\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00'}; std::string big_header_input{ '\x42', '\x41', '\x4D', '\x01', '\xB7', '\x01', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4E', @@ -316,47 +326,51 @@ struct sam_file_read : public sam_file_data '\x66', '\x66', '\x66', '\x46', '\x40', '\x7A', '\x7A', '\x5A', '\x73', '\x74', '\x72', '\x00', '\x0A'}; std::string verbose_output{ - '\x42', '\x41', '\x4d', '\x01', '\xa6', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4e', + '\x42', '\x41', '\x4d', '\x01', '\xe2', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4e', '\x3a', '\x31', '\x2e', '\x36', '\x09', '\x53', '\x4f', '\x3a', '\x75', '\x6e', '\x6b', '\x6e', '\x6f', '\x77', - '\x6e', '\x09', '\x47', '\x4f', '\x3a', '\x6e', '\x6f', '\x6e', '\x65', '\x0a', '\x40', '\x53', '\x51', '\x09', - '\x53', '\x4e', '\x3a', '\x72', '\x65', '\x66', '\x09', '\x4c', '\x4e', '\x3a', '\x33', '\x34', '\x09', '\x41', - '\x4e', '\x3a', '\x6f', '\x74', '\x68', '\x65', '\x72', '\x5f', '\x6e', '\x61', '\x6d', '\x65', '\x0a', '\x40', - '\x52', '\x47', '\x09', '\x49', '\x44', '\x3a', '\x67', '\x72', '\x6f', '\x75', '\x70', '\x31', '\x09', '\x44', - '\x53', '\x3a', '\x6d', '\x6f', '\x72', '\x65', '\x20', '\x69', '\x6e', '\x66', '\x6f', '\x0a', '\x40', '\x50', - '\x47', '\x09', '\x49', '\x44', '\x3a', '\x70', '\x72', '\x6f', '\x67', '\x31', '\x09', '\x50', '\x4e', '\x3a', - '\x63', '\x6f', '\x6f', '\x6c', '\x5f', '\x70', '\x72', '\x6f', '\x67', '\x72', '\x61', '\x6d', '\x09', '\x43', - '\x4c', '\x3a', '\x2e', '\x2f', '\x70', '\x72', '\x6f', '\x67', '\x31', '\x09', '\x50', '\x50', '\x3a', '\x61', - '\x09', '\x44', '\x53', '\x3a', '\x62', '\x09', '\x56', '\x4e', '\x3a', '\x63', '\x0a', '\x40', '\x43', '\x4f', - '\x09', '\x54', '\x68', '\x69', '\x73', '\x20', '\x69', '\x73', '\x20', '\x61', '\x20', '\x63', '\x6f', '\x6d', - '\x6d', '\x65', '\x6e', '\x74', '\x2e', '\x0a', '\x01', '\x00', '\x00', '\x00', '\x04', '\x00', '\x00', '\x00', - '\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00', '\x64', '\x00', '\x00', '\x00', '\x00', '\x00', - '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x06', '\x3d', '\x49', '\x12', '\x05', '\x00', '\x29', '\x00', - '\x04', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x09', '\x00', '\x00', '\x00', '\x2c', '\x01', - '\x00', '\x00', '\x72', '\x65', '\x61', '\x64', '\x31', '\x00', '\x14', '\x00', '\x00', '\x00', '\x10', '\x00', - '\x00', '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x11', '\x00', '\x00', '\x00', - '\x12', '\x48', '\x00', '\x02', '\x02', '\x03', '\x41', '\x53', '\x43', '\x02', '\x43', '\x43', '\x53', '\x2c', - '\x01', '\x4e', '\x4d', '\x63', '\xf9', '\x61', '\x61', '\x41', '\x63', '\x63', '\x63', '\x73', '\xd4', '\xfe', - '\x66', '\x66', '\x66', '\x66', '\x66', '\x46', '\x40', '\x7a', '\x7a', '\x5a', '\x73', '\x74', '\x72', '\x00', - '\xaf', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x01', '\x00', '\x00', '\x00', '\x06', '\x3e', - '\x49', '\x12', '\x06', '\x00', '\x2a', '\x00', '\x09', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', - '\x09', '\x00', '\x00', '\x00', '\x2c', '\x01', '\x00', '\x00', '\x72', '\x65', '\x61', '\x64', '\x32', '\x00', - '\x15', '\x00', '\x00', '\x00', '\x70', '\x00', '\x00', '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00', - '\x00', '\x00', '\x14', '\x00', '\x00', '\x00', '\x25', '\x00', '\x00', '\x00', '\x14', '\x42', '\x84', '\xf1', - '\x40', '\x00', '\x02', '\x02', '\x03', '\x05', '\x06', '\x07', '\x08', '\x09', '\x62', '\x43', '\x42', '\x43', - '\x02', '\x00', '\x00', '\x00', '\x03', '\xc8', '\x62', '\x49', '\x42', '\x49', '\x01', '\x00', '\x00', '\x00', - '\x00', '\xd8', '\x94', '\x11', '\x62', '\x53', '\x42', '\x53', '\x03', '\x00', '\x00', '\x00', '\x2c', '\x01', - '\x28', '\x00', '\xf4', '\x01', '\x62', '\x63', '\x42', '\x63', '\x01', '\x00', '\x00', '\x00', '\xfd', '\x62', - '\x66', '\x42', '\x66', '\x03', '\x00', '\x00', '\x00', '\x00', '\x00', '\x60', '\x40', '\xcd', '\xcc', '\xcc', - '\x3d', '\x33', '\x33', '\x2f', '\x42', '\x62', '\x69', '\x42', '\x69', '\x03', '\x00', '\x00', '\x00', '\xfd', - '\xff', '\xff', '\xff', '\xc8', '\x00', '\x00', '\x00', '\x30', '\xfe', '\xfe', '\xff', '\x62', '\x73', '\x42', - '\x73', '\x03', '\x00', '\x00', '\x00', '\xfd', '\xff', '\xc8', '\x00', '\xd4', '\xfe', '\x5a', '\x00', '\x00', - '\x00', '\x00', '\x00', '\x00', '\x00', '\x02', '\x00', '\x00', '\x00', '\x06', '\x3f', '\x49', '\x12', '\x0a', - '\x00', '\x2b', '\x00', '\x08', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x09', '\x00', '\x00', - '\x00', '\x2c', '\x01', '\x00', '\x00', '\x72', '\x65', '\x61', '\x64', '\x33', '\x00', '\x14', '\x00', '\x00', - '\x00', '\x10', '\x00', '\x00', '\x00', '\x16', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x11', - '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x11', '\x00', '\x00', '\x00', '\x12', '\x00', '\x00', - '\x00', '\x10', '\x00', '\x00', '\x00', '\x14', '\x00', '\x00', '\x00', '\x44', '\x14', '\x81', '\x81', '\x00', - '\x00', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e'}; + '\x6e', '\x09', '\x47', '\x4f', '\x3a', '\x6e', '\x6f', '\x6e', '\x65', '\x09', '\x70', '\x62', '\x3a', '\x35', + '\x2e', '\x30', '\x2e', '\x30', '\x09', '\x6f', '\x74', '\x74', '\x65', '\x72', '\x0a', '\x40', '\x53', '\x51', + '\x09', '\x53', '\x4e', '\x3a', '\x72', '\x65', '\x66', '\x09', '\x4c', '\x4e', '\x3a', '\x33', '\x34', '\x09', + '\x41', '\x4e', '\x3a', '\x6f', '\x74', '\x68', '\x65', '\x72', '\x5f', '\x6e', '\x61', '\x6d', '\x65', '\x09', + '\x70', '\x62', '\x3a', '\x35', '\x2e', '\x30', '\x2e', '\x30', '\x09', '\x6f', '\x74', '\x74', '\x65', '\x72', + '\x0a', '\x40', '\x52', '\x47', '\x09', '\x49', '\x44', '\x3a', '\x67', '\x72', '\x6f', '\x75', '\x70', '\x31', + '\x09', '\x44', '\x53', '\x3a', '\x6d', '\x6f', '\x72', '\x65', '\x20', '\x69', '\x6e', '\x66', '\x6f', '\x09', + '\x70', '\x62', '\x3a', '\x35', '\x2e', '\x30', '\x2e', '\x30', '\x09', '\x6f', '\x74', '\x74', '\x65', '\x72', + '\x0a', '\x40', '\x50', '\x47', '\x09', '\x49', '\x44', '\x3a', '\x70', '\x72', '\x6f', '\x67', '\x31', '\x09', + '\x50', '\x4e', '\x3a', '\x63', '\x6f', '\x6f', '\x6c', '\x5f', '\x70', '\x72', '\x6f', '\x67', '\x72', '\x61', + '\x6d', '\x09', '\x43', '\x4c', '\x3a', '\x2e', '\x2f', '\x70', '\x72', '\x6f', '\x67', '\x31', '\x09', '\x50', + '\x50', '\x3a', '\x61', '\x09', '\x44', '\x53', '\x3a', '\x62', '\x09', '\x56', '\x4e', '\x3a', '\x63', '\x09', + '\x70', '\x62', '\x3a', '\x35', '\x2e', '\x30', '\x2e', '\x30', '\x09', '\x6f', '\x74', '\x74', '\x65', '\x72', + '\x0a', '\x40', '\x43', '\x4f', '\x09', '\x54', '\x68', '\x69', '\x73', '\x20', '\x69', '\x73', '\x20', '\x61', + '\x20', '\x63', '\x6f', '\x6d', '\x6d', '\x65', '\x6e', '\x74', '\x2e', '\x0a', '\x01', '\x00', '\x00', '\x00', + '\x04', '\x00', '\x00', '\x00', '\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00', '\x64', '\x00', + '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x06', '\x3d', '\x49', '\x12', + '\x05', '\x00', '\x29', '\x00', '\x04', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x09', '\x00', + '\x00', '\x00', '\x2c', '\x01', '\x00', '\x00', '\x72', '\x65', '\x61', '\x64', '\x31', '\x00', '\x14', '\x00', + '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', + '\x11', '\x00', '\x00', '\x00', '\x12', '\x48', '\x00', '\x02', '\x02', '\x03', '\x41', '\x53', '\x43', '\x02', + '\x43', '\x43', '\x53', '\x2c', '\x01', '\x4e', '\x4d', '\x63', '\xf9', '\x61', '\x61', '\x41', '\x63', '\x63', + '\x63', '\x73', '\xd4', '\xfe', '\x66', '\x66', '\x66', '\x66', '\x66', '\x46', '\x40', '\x7a', '\x7a', '\x5a', + '\x73', '\x74', '\x72', '\x00', '\xaf', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x01', '\x00', + '\x00', '\x00', '\x06', '\x3e', '\x49', '\x12', '\x06', '\x00', '\x2a', '\x00', '\x09', '\x00', '\x00', '\x00', + '\x00', '\x00', '\x00', '\x00', '\x09', '\x00', '\x00', '\x00', '\x2c', '\x01', '\x00', '\x00', '\x72', '\x65', + '\x61', '\x64', '\x32', '\x00', '\x15', '\x00', '\x00', '\x00', '\x70', '\x00', '\x00', '\x00', '\x12', '\x00', + '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x14', '\x00', '\x00', '\x00', '\x25', '\x00', '\x00', '\x00', + '\x14', '\x42', '\x84', '\xf1', '\x40', '\x00', '\x02', '\x02', '\x03', '\x05', '\x06', '\x07', '\x08', '\x09', + '\x62', '\x43', '\x42', '\x43', '\x02', '\x00', '\x00', '\x00', '\x03', '\xc8', '\x62', '\x49', '\x42', '\x49', + '\x01', '\x00', '\x00', '\x00', '\x00', '\xd8', '\x94', '\x11', '\x62', '\x53', '\x42', '\x53', '\x03', '\x00', + '\x00', '\x00', '\x2c', '\x01', '\x28', '\x00', '\xf4', '\x01', '\x62', '\x63', '\x42', '\x63', '\x01', '\x00', + '\x00', '\x00', '\xfd', '\x62', '\x66', '\x42', '\x66', '\x03', '\x00', '\x00', '\x00', '\x00', '\x00', '\x60', + '\x40', '\xcd', '\xcc', '\xcc', '\x3d', '\x33', '\x33', '\x2f', '\x42', '\x62', '\x69', '\x42', '\x69', '\x03', + '\x00', '\x00', '\x00', '\xfd', '\xff', '\xff', '\xff', '\xc8', '\x00', '\x00', '\x00', '\x30', '\xfe', '\xfe', + '\xff', '\x62', '\x73', '\x42', '\x73', '\x03', '\x00', '\x00', '\x00', '\xfd', '\xff', '\xc8', '\x00', '\xd4', + '\xfe', '\x5a', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x02', '\x00', '\x00', '\x00', '\x06', + '\x3f', '\x49', '\x12', '\x0a', '\x00', '\x2b', '\x00', '\x08', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', + '\x00', '\x09', '\x00', '\x00', '\x00', '\x2c', '\x01', '\x00', '\x00', '\x72', '\x65', '\x61', '\x64', '\x33', + '\x00', '\x14', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x16', '\x00', '\x00', '\x00', '\x10', + '\x00', '\x00', '\x00', '\x11', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x11', '\x00', '\x00', + '\x00', '\x12', '\x00', '\x00', '\x00', '\x10', '\x00', '\x00', '\x00', '\x14', '\x00', '\x00', '\x00', '\x44', + '\x14', '\x81', '\x81', '\x00', '\x00', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e'}; std::string special_output{ '\x42', '\x41', '\x4D', '\x01', '\x1C', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4E', diff --git a/test/unit/io/sam_file/format_sam_test.cpp b/test/unit/io/sam_file/format_sam_test.cpp index 8d2f519570..f69cca440e 100644 --- a/test/unit/io/sam_file/format_sam_test.cpp +++ b/test/unit/io/sam_file/format_sam_test.cpp @@ -24,7 +24,8 @@ struct sam_file_read : public sam_file_data // However, encountering such a tag should not break the parsing. std::string unknown_tag_header{ R"(@HD VN:1.6 pb:5.0.0 otter -@SQ SN:ref LN:34 +@SQ SN:ref LN:34 pb:5.0.0 otter +@RG ID:R1 pb:5.0.0 otter @PG ID:novoalign pb:5.0.0 otter )"}; @@ -87,10 +88,10 @@ read3 43 ref 3 63 1S1M1P1M1I1M1I1D1M1S ref 10 300 GGAGTATA !!*+,-./ // ----------------------------------------------------------------------------------------------------------------- std::string verbose_output{ - R"(@HD VN:1.6 SO:unknown GO:none -@SQ SN:ref LN:34 AN:other_name -@RG ID:group1 DS:more info -@PG ID:prog1 PN:cool_program CL:./prog1 PP:a DS:b VN:c + R"(@HD VN:1.6 SO:unknown GO:none pb:5.0.0 otter +@SQ SN:ref LN:34 AN:other_name pb:5.0.0 otter +@RG ID:group1 DS:more info pb:5.0.0 otter +@PG ID:prog1 PN:cool_program CL:./prog1 PP:a DS:b VN:c pb:5.0.0 otter @CO This is a comment. read1 41 ref 1 61 1S1M1D1M1I ref 10 300 ACGT !##$ AS:i:2 CC:i:300 NM:i:-7 aa:A:c cc:i:-300 ff:f:3.1 zz:Z:str read2 42 ref 2 62 1H7M1D1M1S2H ref 10 300 AGGCTGNAG !##$&'()* bC:B:C,3,200 bI:B:I,294967296 bS:B:S,300,40,500 bc:B:c,-3 bf:B:f,3.5,0.1,43.8 bi:B:i,-3,200,-66000 bs:B:s,-3,200,-300 diff --git a/test/unit/io/sam_file/sam_file_format_test_template.hpp b/test/unit/io/sam_file/sam_file_format_test_template.hpp index 2ef33776ee..6f9b3bc01a 100644 --- a/test/unit/io/sam_file/sam_file_format_test_template.hpp +++ b/test/unit/io/sam_file/sam_file_format_test_template.hpp @@ -358,7 +358,16 @@ TYPED_TEST_P(sam_file_read, issue2423) } TYPED_TEST_P(sam_file_read, unknown_header_tag) -{} +{ + typename TestFixture::stream_type istream{this->unknown_tag_header}; + seqan3::sam_file_input fin{istream, TypeParam{}}; + ASSERT_NO_THROW(fin.begin()); + + EXPECT_EQ(fin.header().user_tags, "pb:5.0.0\totter"); // HD + EXPECT_EQ(std::get<1>(fin.header().ref_id_info.front()), "pb:5.0.0\totter"); // SQ + EXPECT_EQ(std::get<1>(fin.header().read_groups.front()), "pb:5.0.0\totter"); // RG + EXPECT_EQ(fin.header().program_infos.front().user_tags, "pb:5.0.0\totter"); // PG +} // ---------------------------------------------------------------------------- // sam_file_write @@ -510,11 +519,12 @@ TYPED_TEST_P(sam_file_write, with_header) seqan3::sam_file_header header{std::vector{this->ref_id}}; header.sorting = "unknown"; header.grouping = "none"; - header.ref_id_info.push_back({this->ref_seq.size(), "AN:other_name"}); + header.ref_id_info.push_back({this->ref_seq.size(), "AN:other_name\tpb:5.0.0\totter"}); header.ref_dict[this->ref_id] = 0; - header.program_infos.push_back({"prog1", "cool_program", "./prog1", "a", "b", "c"}); - header.read_groups.emplace_back("group1", "DS:more info"); + header.program_infos.push_back({"prog1", "cool_program", "./prog1", "a", "b", "c", "pb:5.0.0\totter"}); + header.read_groups.emplace_back("group1", "DS:more info\tpb:5.0.0\totter"); header.comments.push_back("This is a comment."); + header.user_tags = "pb:5.0.0\totter"; { seqan3::sam_file_output fout{this->ostream, TypeParam{}, sam_fields{}};