diff --git a/CHANGELOG.md b/CHANGELOG.md index a106c974ca..09bed7be24 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -59,8 +59,8 @@ If possible, provide tooling that performs the changes, e.g. a shell-script. to add convenient functions that compute information based on the record itself and to provide better documentation. ([\#2340](https://github.com/seqan/seqan3/pull/2340), [\#2380](https://github.com/seqan/seqan3/pull/2380), [\#2389](https://github.com/seqan/seqan3/pull/2389)) -* Deprecated `seqan3::field::seq_qual`. Use `seqan3::fiel::seq` and `seqan3::field::qual` instead. - ([\#2379](https://github.com/seqan/seqan3/pull/2379)). Check out +* Deprecated `seqan3::field::seq_qual`. Use `seqan3::field::seq` and `seqan3::field::qual` instead. + ([\#2379](https://github.com/seqan/seqan3/pull/2379)). Check out [SeqAn3 Cookbook - Write Record](https://docs.seqan.de/seqan/3.0.3/cookbook.html) for usage. @@ -112,8 +112,8 @@ If possible, provide tooling that performs the changes, e.g. a shell-script. ## API changes -Most of our API or header file changes will trigger a deprecation warning to let you know if something changed and, if -applicable, when it will be removed. We recommend upgrading version-by-version to check whether you need to change code. +Most of our API or header file changes will trigger a deprecation warning to let you know if something changed and, if +applicable, when it will be removed. We recommend upgrading version-by-version to check whether you need to change code. You can either directly check the reported code or verify with our documentation how the new API should be used. For a complete list of behavioural changes in our public and internal API, you can consult our API stability @@ -128,6 +128,8 @@ regression test suite and patches at https://github.com/seqan/seqan3/tree/master ([\#2388](https://github.com/seqan/seqan3/pull/2388)). * The literal 'M'_cigar_op was renamed to 'M'_cigar_operation ([\#2388](https://github.com/seqan/seqan3/pull/2388)). +* Renamed `seqan3::sam_dna16` to `seqan3::dna16sam` + ([\#2521](https://github.com/seqan/seqan3/pull/2521)). #### Argument Parser diff --git a/include/seqan3/alphabet/nucleotide/dna16sam.hpp b/include/seqan3/alphabet/nucleotide/dna16sam.hpp new file mode 100644 index 0000000000..6ed8337ae9 --- /dev/null +++ b/include/seqan3/alphabet/nucleotide/dna16sam.hpp @@ -0,0 +1,219 @@ +// ----------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md +// ----------------------------------------------------------------------------------------------------- + +/*!\file + * \author Svenja Mehringer + * \brief Provides seqan3::dna16sam. + */ + +#pragma once + +#include + +namespace seqan3 +{ + +/*!\brief A 16 letter DNA alphabet, containing all IUPAC symbols minus the gap and plus an equality sign ('='). + * \ingroup nucleotide + * \implements seqan3::writable_alphabet + * \implements seqan3::nucleotide_alphabet + * \implements seqan3::trivially_copyable + * \implements seqan3::standard_layout + * \if DEV \implements seqan3::detail::writable_constexpr_alphabet \endif + * + * \details + * + * The seqan3::dna16sam alphabet is the nucleotide alphabet used inside the SAM, BAM and CRAM formats. + * It has all the letters of the seqan3::dna15 alphabet and the extra alphabet character '=' which denotes a + * nucleotide character identical to the reference. + * Without the context of this reference sequence, no assumptions can be made about the actual value of '=' letter. + * + * Note that you can assign 'U' as a character to dna16sam and it will silently + * be converted to 'T'. + * Lower case letters are accepted when assigning from char (just like seqan3::dna15) and unknown characters are + * silently converted to 'N'. + * + * The complement is the same as for seqan3::dna15, with the addition that the complement of '=' is unknown and + * therefore set to 'N'. + * + * \include test/snippet/alphabet/nucleotide/dna16sam.cpp + * + * \stableapi{Since version 3.1.} + */ +class dna16sam : public nucleotide_base +{ +private: + //!\brief The base class. + using base_t = nucleotide_base; + + //!\brief Befriend seqan3::nucleotide_base. + friend base_t; + //!\cond \brief Befriend seqan3::alphabet_base. + friend base_t::base_t; + //!\endcond + +public: + /*!\name Constructors, destructor and assignment + * \{ + */ + constexpr dna16sam() noexcept = default; //!< Defaulted. + constexpr dna16sam(dna16sam const &) noexcept = default; //!< Defaulted. + constexpr dna16sam(dna16sam &&) noexcept = default; //!< Defaulted. + constexpr dna16sam & operator=(dna16sam const &) noexcept = default; //!< Defaulted. + constexpr dna16sam & operator=(dna16sam &&) noexcept = default; //!< Defaulted. + ~dna16sam() noexcept = default; //!< Defaulted. + + using base_t::base_t; + //!\} + +private: + //!\copydoc seqan3::dna4::rank_to_char_table + static constexpr char_type rank_to_char_table[alphabet_size] + { + '=', + 'A', + 'C', + 'M', + 'G', + 'R', + 'S', + 'V', + 'T', + 'W', + 'Y', + 'H', + 'K', + 'D', + 'B', + 'N' + }; + + //!\copydoc seqan3::dna4::char_to_rank_table + static constexpr std::array char_to_rank_table + { + [] () constexpr + { + std::array ret{}; + + // initialize with UNKNOWN (std::array::fill unfortunately not constexpr) + for (auto & c : ret) + c = 15; // rank of 'N' + + // reverse mapping for characters and their lowercase + for (size_t rnk = 0u; rnk < alphabet_size; ++rnk) + { + ret[rank_to_char_table[rnk]] = rnk; + ret[to_lower(rank_to_char_table[rnk])] = rnk; + } + + // set U equal to T + ret['U'] = ret['T']; ret['u'] = ret['t']; + + return ret; + }() + }; + + //!\copydoc seqan3::dna4::complement_table + static const std::array complement_table; + + /*!\copydoc seqan3::dna4::rank_to_char + * + * The representation is the same as in the SAM specifications (which is NOT in alphabetical order). + */ + static constexpr char_type rank_to_char(rank_type const rank) + { + return rank_to_char_table[rank]; + } + + //!\copydoc seqan3::dna4::char_to_rank + static constexpr rank_type char_to_rank(char_type const chr) + { + using index_t = std::make_unsigned_t; + return char_to_rank_table[static_cast(chr)]; + } +}; + +// ------------------------------------------------------------------ +// containers +// ------------------------------------------------------------------ + +/*!\brief Alias for an std::vector of seqan3::dna16sam. + * \relates dna16sam + * \details + * \stableapi{Since version 3.1.} + */ +using dna16sam_vector = std::vector; + +// ------------------------------------------------------------------ +// literals +// ------------------------------------------------------------------ + +/*!\name Literals + * \{ + */ + +/*!\brief The seqan3::dna16sam char literal. + * \relates seqan3::dna16sam + * \returns seqan3::dna16sam + * \param[in] c The character to assign from. + * \details + * \stableapi{Since version 3.1.} + */ +constexpr dna16sam operator""_dna16sam(char const c) noexcept +{ + return dna16sam{}.assign_char(c); +} + +/*!\brief The seqan3::dna16sam string literal. + * \relates seqan3::dna16sam + * \returns seqan3::dna16sam_vector + * \param[in] s The string literal to assign from. + * \param[in] n The length of the string literal s. + * + * You can use this string literal to easily assign to seqan3::dna16sam_vector: + * + * \include test/snippet/alphabet/nucleotide/dna16sam_literal.cpp + * + * \stableapi{Since version 3.1.} + */ +inline dna16sam_vector operator""_dna16sam(char const * s, size_t n) +{ + dna16sam_vector r; + r.resize(n); + + for (size_t i = 0; i < n; ++i) + r[i].assign_char(s[i]); + + return r; +} +//!\} + +// ------------------------------------------------------------------ +// complement deferred definition +// ------------------------------------------------------------------ + +constexpr std::array dna16sam::complement_table +{ + 'N'_dna16sam, // complement of '='_dna16sam + 'T'_dna16sam, // complement of 'A'_dna16sam + 'G'_dna16sam, // complement of 'C'_dna16sam + 'K'_dna16sam, // complement of 'M'_dna16sam + 'C'_dna16sam, // complement of 'G'_dna16sam + 'Y'_dna16sam, // complement of 'R'_dna16sam + 'S'_dna16sam, // complement of 'S'_dna16sam + 'B'_dna16sam, // complement of 'V'_dna16sam + 'A'_dna16sam, // complement of 'T'_dna16sam + 'W'_dna16sam, // complement of 'W'_dna16sam + 'R'_dna16sam, // complement of 'Y'_dna16sam + 'D'_dna16sam, // complement of 'H'_dna16sam + 'M'_dna16sam, // complement of 'K'_dna16sam + 'H'_dna16sam, // complement of 'D'_dna16sam + 'V'_dna16sam, // complement of 'B'_dna16sam + 'N'_dna16sam // complement of 'N'_dna16sam +}; + +} // namespace seqan3 diff --git a/include/seqan3/alphabet/nucleotide/sam_dna16.hpp b/include/seqan3/alphabet/nucleotide/sam_dna16.hpp index 4dc23363ee..2e0aea9d89 100644 --- a/include/seqan3/alphabet/nucleotide/sam_dna16.hpp +++ b/include/seqan3/alphabet/nucleotide/sam_dna16.hpp @@ -6,205 +6,20 @@ // ----------------------------------------------------------------------------------------------------- /*!\file + * \brief [DEPRECATED] Provides seqan3::dna16sam. * \author Svenja Mehringer - * \brief Provides seqan3::sam_dna16. + * \deprecated This header will be removed in 3.1.0; Please \#include seqan3/alphabet/nucleotide/dna16sam.hpp instead. */ #pragma once -#include +#include namespace seqan3 { - -/*!\brief A 16 letter DNA alphabet, containing all IUPAC symbols minus the gap and plus an equality sign ('='). - * \ingroup nucleotide - * \implements seqan3::writable_alphabet - * \implements seqan3::nucleotide_alphabet - * \implements seqan3::trivially_copyable - * \implements seqan3::standard_layout - * \if DEV \implements seqan3::detail::writable_constexpr_alphabet \endif - * - * \details - * - * The seqan3::sam_dna16 alphabet is the nucleotide alphabet used inside the SAM, BAM and CRAM formats. - * It has all the letters of the seqan3::dna15 alphabet and the extra alphabet character '=' which denotes a - * nucleotide character identical to the reference. - * Without the context of this reference sequence, no assumptions can be made about the actual value of '=' letter. - * - * Note that you can assign 'U' as a character to sam_dna16 and it will silently - * be converted to 'T'. - * Lower case letters are accepted when assigning from char (just like seqan3::dna15) and unknown characters are - * silently converted to 'N'. - * - * The complement is the same as for seqan3::dna15, with the addition that the complement of '=' is unknown and - * therefore set to 'N'. - * - * \include test/snippet/alphabet/nucleotide/sam_dna16.cpp - */ -class sam_dna16 : public nucleotide_base -{ -private: - //!\brief The base class. - using base_t = nucleotide_base; - - //!\brief Befriend seqan3::nucleotide_base. - friend base_t; - //!\cond \brief Befriend seqan3::alphabet_base. - friend base_t::base_t; - //!\endcond - -public: - /*!\name Constructors, destructor and assignment - * \{ - */ - constexpr sam_dna16() noexcept = default; //!< Defaulted. - constexpr sam_dna16(sam_dna16 const &) noexcept = default; //!< Defaulted. - constexpr sam_dna16(sam_dna16 &&) noexcept = default; //!< Defaulted. - constexpr sam_dna16 & operator=(sam_dna16 const &) noexcept = default; //!< Defaulted. - constexpr sam_dna16 & operator=(sam_dna16 &&) noexcept = default; //!< Defaulted. - ~sam_dna16() noexcept = default; //!< Defaulted. - - using base_t::base_t; - //!\} - -private: - //!\copydoc seqan3::dna4::rank_to_char_table - static constexpr char_type rank_to_char_table[alphabet_size] - { - '=', - 'A', - 'C', - 'M', - 'G', - 'R', - 'S', - 'V', - 'T', - 'W', - 'Y', - 'H', - 'K', - 'D', - 'B', - 'N' - }; - - //!\copydoc seqan3::dna4::char_to_rank_table - static constexpr std::array char_to_rank_table - { - [] () constexpr - { - std::array ret{}; - - // initialize with UNKNOWN (std::array::fill unfortunately not constexpr) - for (auto & c : ret) - c = 15; // rank of 'N' - - // reverse mapping for characters and their lowercase - for (size_t rnk = 0u; rnk < alphabet_size; ++rnk) - { - ret[rank_to_char_table[rnk]] = rnk; - ret[to_lower(rank_to_char_table[rnk])] = rnk; - } - - // set U equal to T - ret['U'] = ret['T']; ret['u'] = ret['t']; - - return ret; - }() - }; - - //!\copydoc seqan3::dna4::complement_table - static const std::array complement_table; - - /*!\copydoc seqan3::dna4::rank_to_char - * - * The representation is the same as in the SAM specifications (which is NOT in alphabetical order). - */ - static constexpr char_type rank_to_char(rank_type const rank) - { - return rank_to_char_table[rank]; - } - - //!\copydoc seqan3::dna4::char_to_rank - static constexpr rank_type char_to_rank(char_type const chr) - { - using index_t = std::make_unsigned_t; - return char_to_rank_table[static_cast(chr)]; - } -}; - -// ------------------------------------------------------------------ -// containers -// ------------------------------------------------------------------ - -//!\brief Alias for an std::vector of seqan3::sam_dna16. -//!\relates sam_dna16 -using sam_dna16_vector = std::vector; - -// ------------------------------------------------------------------ -// literals -// ------------------------------------------------------------------ - -/*!\name Literals - * \{ - */ - -/*!\brief The seqan3::sam_dna16 char literal. - * \relates seqan3::sam_dna16 - * \returns seqan3::sam_dna16 - * \param[in] c The character to assign from. - */ -constexpr sam_dna16 operator""_sam_dna16(char const c) noexcept -{ - return sam_dna16{}.assign_char(c); -} - -/*!\brief The seqan3::sam_dna16 string literal. - * \relates seqan3::sam_dna16 - * \returns seqan3::sam_dna16_vector - * \param[in] s The string literal to assign from. - * \param[in] n The length of the string literal s. - * - * You can use this string literal to easily assign to seqan3::sam_dna16_vector: - * - * \include test/snippet/alphabet/nucleotide/sam_dna16_literal.cpp - */ -inline sam_dna16_vector operator""_sam_dna16(char const * s, size_t n) -{ - sam_dna16_vector r; - r.resize(n); - - for (size_t i = 0; i < n; ++i) - r[i].assign_char(s[i]); - - return r; -} -//!\} - -// ------------------------------------------------------------------ -// complement deferred definition -// ------------------------------------------------------------------ - -constexpr std::array sam_dna16::complement_table -{ - 'N'_sam_dna16, // complement of '='_sam_dna16 - 'T'_sam_dna16, // complement of 'A'_sam_dna16 - 'G'_sam_dna16, // complement of 'C'_sam_dna16 - 'K'_sam_dna16, // complement of 'M'_sam_dna16 - 'C'_sam_dna16, // complement of 'G'_sam_dna16 - 'Y'_sam_dna16, // complement of 'R'_sam_dna16 - 'S'_sam_dna16, // complement of 'S'_sam_dna16 - 'B'_sam_dna16, // complement of 'V'_sam_dna16 - 'A'_sam_dna16, // complement of 'T'_sam_dna16 - 'W'_sam_dna16, // complement of 'W'_sam_dna16 - 'R'_sam_dna16, // complement of 'Y'_sam_dna16 - 'D'_sam_dna16, // complement of 'H'_sam_dna16 - 'M'_sam_dna16, // complement of 'K'_sam_dna16 - 'H'_sam_dna16, // complement of 'D'_sam_dna16 - 'V'_sam_dna16, // complement of 'B'_sam_dna16 - 'N'_sam_dna16 // complement of 'N'_sam_dna16 -}; - +//!\deprecated Please use seqan3::dna16sam instead. +using sam_dna16 SEQAN3_DEPRECATED_310 = seqan3::dna16sam; } // namespace seqan3 + +SEQAN3_DEPRECATED_HEADER( + "This header is deprecated and will be removed in SeqAn-3.1.0; Please #include instead.") diff --git a/include/seqan3/io/sam_file/format_bam.hpp b/include/seqan3/io/sam_file/format_bam.hpp index 99f5217d2e..75720b1379 100644 --- a/include/seqan3/io/sam_file/format_bam.hpp +++ b/include/seqan3/io/sam_file/format_bam.hpp @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include #include @@ -437,10 +437,10 @@ inline void format_bam::read_alignment_record(stream_type & stream, { auto seq_stream = stream_view | views::take_exactly_or_throw(core.l_seq / 2) // one too short if uneven - | std::views::transform([] (char c) -> std::pair + | std::views::transform([] (char c) -> std::pair { - return {sam_dna16{}.assign_rank(std::min(15, static_cast(c) >> 4)), - sam_dna16{}.assign_rank(std::min(15, static_cast(c) & 0x0f))}; + return {dna16sam{}.assign_rank(std::min(15, static_cast(c) >> 4)), + dna16sam{}.assign_rank(std::min(15, static_cast(c) & 0x0f))}; }); if constexpr (detail::decays_to_ignore_v) @@ -462,7 +462,7 @@ inline void format_bam::read_alignment_record(stream_type & stream, { assert(core.l_seq == (seq_length + offset_tmp + soft_clipping_end)); // sanity check using alph_t = std::ranges::range_value_t(align))>; - constexpr auto from_dna16 = detail::convert_through_char_representation; + constexpr auto from_dna16 = detail::convert_through_char_representation; get<1>(align).reserve(seq_length); @@ -506,7 +506,7 @@ inline void format_bam::read_alignment_record(stream_type & stream, else { using alph_t = std::ranges::range_value_t; - constexpr auto from_dna16 = detail::convert_through_char_representation; + constexpr auto from_dna16 = detail::convert_through_char_representation; for (auto [d1, d2] : seq_stream) { @@ -516,7 +516,7 @@ inline void format_bam::read_alignment_record(stream_type & stream, if (core.l_seq & 1) { - sam_dna16 d = sam_dna16{}.assign_rank(std::min(15, static_cast(*std::ranges::begin(stream_view)) >> 4)); + dna16sam d = dna16sam{}.assign_rank(std::min(15, static_cast(*std::ranges::begin(stream_view)) >> 4)); seq.push_back(from_dna16[to_rank(d)]); std::ranges::next(std::ranges::begin(stream_view)); } @@ -882,9 +882,9 @@ inline void format_bam::write_alignment_record([[maybe_unused]] stream_type & s std::ranges::copy_n(reinterpret_cast(&cigar_count), 4, stream_it); } - // write seq (bit-compressed: sam_dna16 characters go into one byte) + // write seq (bit-compressed: dna16sam characters go into one byte) using alph_t = std::ranges::range_value_t; - constexpr auto to_dna16 = detail::convert_through_char_representation; + constexpr auto to_dna16 = detail::convert_through_char_representation; auto sit = std::ranges::begin(seq); for (int32_t sidx = 0; sidx < ((core.l_seq & 1) ? core.l_seq - 1 : core.l_seq); ++sidx, ++sit) diff --git a/test/snippet/alphabet/nucleotide/sam_dna16.cpp b/test/snippet/alphabet/nucleotide/dna16sam.cpp similarity index 62% rename from test/snippet/alphabet/nucleotide/sam_dna16.cpp rename to test/snippet/alphabet/nucleotide/dna16sam.cpp index 369b728689..a8c2e05b18 100644 --- a/test/snippet/alphabet/nucleotide/sam_dna16.cpp +++ b/test/snippet/alphabet/nucleotide/dna16sam.cpp @@ -1,11 +1,11 @@ -#include +#include #include int main() { - using seqan3::operator""_sam_dna16; + using seqan3::operator""_dna16sam; - seqan3::sam_dna16 my_letter{'A'_sam_dna16}; + seqan3::dna16sam my_letter{'A'_dna16sam}; my_letter.assign_char('='); diff --git a/test/snippet/alphabet/nucleotide/dna16sam_literal.cpp b/test/snippet/alphabet/nucleotide/dna16sam_literal.cpp new file mode 100644 index 0000000000..8ade683d06 --- /dev/null +++ b/test/snippet/alphabet/nucleotide/dna16sam_literal.cpp @@ -0,0 +1,13 @@ +#include +#include + +int main() +{ + using seqan3::operator""_dna16sam; + + seqan3::dna16sam_vector foo{"ACgtTA"_dna16sam}; + seqan3::dna16sam_vector bar = "ACG==A"_dna16sam; + auto bax = "A=GTT!"_dna16sam; + + seqan3::debug_stream << foo << "\n" << bar << "\n" << bax << "\n"; +} diff --git a/test/snippet/alphabet/nucleotide/sam_dna16_literal.cpp b/test/snippet/alphabet/nucleotide/sam_dna16_literal.cpp deleted file mode 100644 index e0fe619966..0000000000 --- a/test/snippet/alphabet/nucleotide/sam_dna16_literal.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include -#include - -int main() -{ - using seqan3::operator""_sam_dna16; - - seqan3::sam_dna16_vector foo{"ACgtTA"_sam_dna16}; - seqan3::sam_dna16_vector bar = "ACG==A"_sam_dna16; - auto bax = "A=GTT!"_sam_dna16; - - seqan3::debug_stream << foo << "\n" << bar << "\n" << bax << "\n"; -} diff --git a/test/unit/alphabet/nucleotide/CMakeLists.txt b/test/unit/alphabet/nucleotide/CMakeLists.txt index d3403366c2..f5ba8b7590 100644 --- a/test/unit/alphabet/nucleotide/CMakeLists.txt +++ b/test/unit/alphabet/nucleotide/CMakeLists.txt @@ -1,9 +1,9 @@ seqan3_test(dna4_test.cpp) seqan3_test(dna5_test.cpp) seqan3_test(dna15_test.cpp) +seqan3_test(dna16sam_test.cpp) seqan3_test(dna3bs_test.cpp) seqan3_test(rna4_test.cpp) seqan3_test(rna5_test.cpp) seqan3_test(rna15_test.cpp) seqan3_test(nucleotide_conversion_integration_test.cpp) -seqan3_test(sam_dna16_test.cpp) diff --git a/test/unit/alphabet/nucleotide/dna16sam_test.cpp b/test/unit/alphabet/nucleotide/dna16sam_test.cpp new file mode 100644 index 0000000000..7d5d2b89a6 --- /dev/null +++ b/test/unit/alphabet/nucleotide/dna16sam_test.cpp @@ -0,0 +1,127 @@ +// ----------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md +// ----------------------------------------------------------------------------------------------------- + +#include +#include +#include + +#include "../alphabet_constexpr_test_template.hpp" +#include "../alphabet_test_template.hpp" +#include "../semi_alphabet_constexpr_test_template.hpp" +#include "../semi_alphabet_test_template.hpp" + +using seqan3::operator""_dna16sam; + +// ------------------------------------------------------------------ +// dna16sam alphabet +// ------------------------------------------------------------------ + +INSTANTIATE_TYPED_TEST_SUITE_P(dna16sam, alphabet, seqan3::dna16sam, ); +INSTANTIATE_TYPED_TEST_SUITE_P(dna16sam, semi_alphabet_test, seqan3::dna16sam, ); +INSTANTIATE_TYPED_TEST_SUITE_P(dna16sam, alphabet_constexpr, seqan3::dna16sam, ); +INSTANTIATE_TYPED_TEST_SUITE_P(dna16sam, semi_alphabet_constexpr, seqan3::dna16sam, ); + +// nucleotide test: (because the complement is not bijective for dna16sam we need to test it manually) +TEST(dna16sam, nucleotide) +{ + EXPECT_TRUE(seqan3::nucleotide_alphabet); + EXPECT_TRUE(seqan3::nucleotide_alphabet); + + EXPECT_EQ(seqan3::complement('='_dna16sam), 'N'_dna16sam); + EXPECT_EQ(seqan3::complement('A'_dna16sam), 'T'_dna16sam); + EXPECT_EQ(seqan3::complement('C'_dna16sam), 'G'_dna16sam); + EXPECT_EQ(seqan3::complement('M'_dna16sam), 'K'_dna16sam); + EXPECT_EQ(seqan3::complement('G'_dna16sam), 'C'_dna16sam); + EXPECT_EQ(seqan3::complement('R'_dna16sam), 'Y'_dna16sam); + EXPECT_EQ(seqan3::complement('S'_dna16sam), 'S'_dna16sam); + EXPECT_EQ(seqan3::complement('V'_dna16sam), 'B'_dna16sam); + EXPECT_EQ(seqan3::complement('T'_dna16sam), 'A'_dna16sam); + EXPECT_EQ(seqan3::complement('W'_dna16sam), 'W'_dna16sam); + EXPECT_EQ(seqan3::complement('Y'_dna16sam), 'R'_dna16sam); + EXPECT_EQ(seqan3::complement('H'_dna16sam), 'D'_dna16sam); + EXPECT_EQ(seqan3::complement('K'_dna16sam), 'M'_dna16sam); + EXPECT_EQ(seqan3::complement('D'_dna16sam), 'H'_dna16sam); + EXPECT_EQ(seqan3::complement('B'_dna16sam), 'V'_dna16sam); + EXPECT_EQ(seqan3::complement('N'_dna16sam), 'N'_dna16sam); +} + +TEST(dna16sam, to_char_assign_char) +{ + using rank_t = seqan3::alphabet_rank_t; + for (rank_t rank = 0; rank < seqan3::alphabet_size; ++rank) + { + char chr = seqan3::to_char(seqan3::assign_rank_to(rank, seqan3::dna16sam{})); + EXPECT_EQ(seqan3::to_char(seqan3::dna16sam{}.assign_char(chr)), chr); + } + + EXPECT_EQ(seqan3::to_char(seqan3::dna16sam{}.assign_char('a')), 'A'); + EXPECT_EQ(seqan3::to_char(seqan3::dna16sam{}.assign_char('c')), 'C'); + EXPECT_EQ(seqan3::to_char(seqan3::dna16sam{}.assign_char('g')), 'G'); + EXPECT_EQ(seqan3::to_char(seqan3::dna16sam{}.assign_char('t')), 'T'); + + EXPECT_EQ(seqan3::to_char(seqan3::dna16sam{}.assign_char('U')), 'T'); + EXPECT_EQ(seqan3::to_char(seqan3::dna16sam{}.assign_char('!')), 'N'); +} + +TEST(dna16sam, char_literal) +{ + EXPECT_EQ(seqan3::to_char('A'_dna16sam), 'A'); + EXPECT_EQ(seqan3::to_char('C'_dna16sam), 'C'); + EXPECT_EQ(seqan3::to_char('G'_dna16sam), 'G'); + + EXPECT_EQ(seqan3::to_char('U'_dna16sam), 'T'); + EXPECT_EQ(seqan3::to_char('T'_dna16sam), 'T'); + + EXPECT_EQ(seqan3::to_char('R'_dna16sam), 'R'); + EXPECT_EQ(seqan3::to_char('Y'_dna16sam), 'Y'); + EXPECT_EQ(seqan3::to_char('S'_dna16sam), 'S'); + EXPECT_EQ(seqan3::to_char('W'_dna16sam), 'W'); + EXPECT_EQ(seqan3::to_char('K'_dna16sam), 'K'); + EXPECT_EQ(seqan3::to_char('M'_dna16sam), 'M'); + EXPECT_EQ(seqan3::to_char('B'_dna16sam), 'B'); + EXPECT_EQ(seqan3::to_char('D'_dna16sam), 'D'); + EXPECT_EQ(seqan3::to_char('H'_dna16sam), 'H'); + EXPECT_EQ(seqan3::to_char('V'_dna16sam), 'V'); + + EXPECT_EQ(seqan3::to_char('='_dna16sam), '='); + + EXPECT_EQ(seqan3::to_char('N'_dna16sam), 'N'); + EXPECT_EQ(seqan3::to_char('!'_dna16sam), 'N'); +} + +TEST(dna16sam, string_literal) +{ + seqan3::dna16sam_vector v; + v.resize(5, 'A'_dna16sam); + EXPECT_EQ(v, "AAAAA"_dna16sam); + + std::vector w{'A'_dna16sam, + '='_dna16sam, + 'G'_dna16sam, + 'T'_dna16sam, + 'U'_dna16sam, + 'N'_dna16sam}; + EXPECT_EQ(w, "A=GTTN"_dna16sam); +} + +TEST(dna16sam, char_is_valid) +{ + constexpr auto validator = seqan3::is_char<'A'> || seqan3::is_char<'C'> || seqan3::is_char<'G'> || + seqan3::is_char<'T'> || seqan3::is_char<'U'> || seqan3::is_char<'a'> || + seqan3::is_char<'c'> || seqan3::is_char<'g'> || seqan3::is_char<'t'> || + seqan3::is_char<'u'> || seqan3::is_char<'N'> || seqan3::is_char<'n'> || + seqan3::is_char<'R'> || seqan3::is_char<'Y'> || seqan3::is_char<'S'> || + seqan3::is_char<'W'> || seqan3::is_char<'K'> || seqan3::is_char<'M'> || + seqan3::is_char<'B'> || seqan3::is_char<'D'> || seqan3::is_char<'H'> || + seqan3::is_char<'V'> || seqan3::is_char<'r'> || seqan3::is_char<'y'> || + seqan3::is_char<'s'> || seqan3::is_char<'w'> || seqan3::is_char<'k'> || + seqan3::is_char<'m'> || seqan3::is_char<'b'> || seqan3::is_char<'d'> || + seqan3::is_char<'h'> || seqan3::is_char<'v'> || seqan3::is_char<'='>; + + for (char c : std::views::iota(std::numeric_limits::min(), std::numeric_limits::max())) + EXPECT_EQ(seqan3::dna16sam::char_is_valid(c), validator(c)); +} diff --git a/test/unit/alphabet/nucleotide/sam_dna16_test.cpp b/test/unit/alphabet/nucleotide/sam_dna16_test.cpp deleted file mode 100644 index 242a48a9c2..0000000000 --- a/test/unit/alphabet/nucleotide/sam_dna16_test.cpp +++ /dev/null @@ -1,127 +0,0 @@ -// ----------------------------------------------------------------------------------------------------- -// Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin -// Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik -// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License -// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md -// ----------------------------------------------------------------------------------------------------- - -#include -#include -#include - -#include "../alphabet_constexpr_test_template.hpp" -#include "../alphabet_test_template.hpp" -#include "../semi_alphabet_constexpr_test_template.hpp" -#include "../semi_alphabet_test_template.hpp" - -using seqan3::operator""_sam_dna16; - -// ------------------------------------------------------------------ -// sam_dna16 alphabet -// ------------------------------------------------------------------ - -INSTANTIATE_TYPED_TEST_SUITE_P(sam_dna16, alphabet, seqan3::sam_dna16, ); -INSTANTIATE_TYPED_TEST_SUITE_P(sam_dna16, semi_alphabet_test, seqan3::sam_dna16, ); -INSTANTIATE_TYPED_TEST_SUITE_P(sam_dna16, alphabet_constexpr, seqan3::sam_dna16, ); -INSTANTIATE_TYPED_TEST_SUITE_P(sam_dna16, semi_alphabet_constexpr, seqan3::sam_dna16, ); - -// nucleotide test: (because the complement is not bijective for sam_dna16 we need to test it manually) -TEST(sam_dna16, nucleotide) -{ - EXPECT_TRUE(seqan3::nucleotide_alphabet); - EXPECT_TRUE(seqan3::nucleotide_alphabet); - - EXPECT_EQ(seqan3::complement('='_sam_dna16), 'N'_sam_dna16); - EXPECT_EQ(seqan3::complement('A'_sam_dna16), 'T'_sam_dna16); - EXPECT_EQ(seqan3::complement('C'_sam_dna16), 'G'_sam_dna16); - EXPECT_EQ(seqan3::complement('M'_sam_dna16), 'K'_sam_dna16); - EXPECT_EQ(seqan3::complement('G'_sam_dna16), 'C'_sam_dna16); - EXPECT_EQ(seqan3::complement('R'_sam_dna16), 'Y'_sam_dna16); - EXPECT_EQ(seqan3::complement('S'_sam_dna16), 'S'_sam_dna16); - EXPECT_EQ(seqan3::complement('V'_sam_dna16), 'B'_sam_dna16); - EXPECT_EQ(seqan3::complement('T'_sam_dna16), 'A'_sam_dna16); - EXPECT_EQ(seqan3::complement('W'_sam_dna16), 'W'_sam_dna16); - EXPECT_EQ(seqan3::complement('Y'_sam_dna16), 'R'_sam_dna16); - EXPECT_EQ(seqan3::complement('H'_sam_dna16), 'D'_sam_dna16); - EXPECT_EQ(seqan3::complement('K'_sam_dna16), 'M'_sam_dna16); - EXPECT_EQ(seqan3::complement('D'_sam_dna16), 'H'_sam_dna16); - EXPECT_EQ(seqan3::complement('B'_sam_dna16), 'V'_sam_dna16); - EXPECT_EQ(seqan3::complement('N'_sam_dna16), 'N'_sam_dna16); -} - -TEST(sam_dna16, to_char_assign_char) -{ - using rank_t = seqan3::alphabet_rank_t; - for (rank_t rank = 0; rank < seqan3::alphabet_size; ++rank) - { - char chr = seqan3::to_char(seqan3::assign_rank_to(rank, seqan3::sam_dna16{})); - EXPECT_EQ(seqan3::to_char(seqan3::sam_dna16{}.assign_char(chr)), chr); - } - - EXPECT_EQ(seqan3::to_char(seqan3::sam_dna16{}.assign_char('a')), 'A'); - EXPECT_EQ(seqan3::to_char(seqan3::sam_dna16{}.assign_char('c')), 'C'); - EXPECT_EQ(seqan3::to_char(seqan3::sam_dna16{}.assign_char('g')), 'G'); - EXPECT_EQ(seqan3::to_char(seqan3::sam_dna16{}.assign_char('t')), 'T'); - - EXPECT_EQ(seqan3::to_char(seqan3::sam_dna16{}.assign_char('U')), 'T'); - EXPECT_EQ(seqan3::to_char(seqan3::sam_dna16{}.assign_char('!')), 'N'); -} - -TEST(sam_dna16, char_literal) -{ - EXPECT_EQ(seqan3::to_char('A'_sam_dna16), 'A'); - EXPECT_EQ(seqan3::to_char('C'_sam_dna16), 'C'); - EXPECT_EQ(seqan3::to_char('G'_sam_dna16), 'G'); - - EXPECT_EQ(seqan3::to_char('U'_sam_dna16), 'T'); - EXPECT_EQ(seqan3::to_char('T'_sam_dna16), 'T'); - - EXPECT_EQ(seqan3::to_char('R'_sam_dna16), 'R'); - EXPECT_EQ(seqan3::to_char('Y'_sam_dna16), 'Y'); - EXPECT_EQ(seqan3::to_char('S'_sam_dna16), 'S'); - EXPECT_EQ(seqan3::to_char('W'_sam_dna16), 'W'); - EXPECT_EQ(seqan3::to_char('K'_sam_dna16), 'K'); - EXPECT_EQ(seqan3::to_char('M'_sam_dna16), 'M'); - EXPECT_EQ(seqan3::to_char('B'_sam_dna16), 'B'); - EXPECT_EQ(seqan3::to_char('D'_sam_dna16), 'D'); - EXPECT_EQ(seqan3::to_char('H'_sam_dna16), 'H'); - EXPECT_EQ(seqan3::to_char('V'_sam_dna16), 'V'); - - EXPECT_EQ(seqan3::to_char('='_sam_dna16), '='); - - EXPECT_EQ(seqan3::to_char('N'_sam_dna16), 'N'); - EXPECT_EQ(seqan3::to_char('!'_sam_dna16), 'N'); -} - -TEST(sam_dna16, string_literal) -{ - seqan3::sam_dna16_vector v; - v.resize(5, 'A'_sam_dna16); - EXPECT_EQ(v, "AAAAA"_sam_dna16); - - std::vector w{'A'_sam_dna16, - '='_sam_dna16, - 'G'_sam_dna16, - 'T'_sam_dna16, - 'U'_sam_dna16, - 'N'_sam_dna16}; - EXPECT_EQ(w, "A=GTTN"_sam_dna16); -} - -TEST(sam_dna16, char_is_valid) -{ - constexpr auto validator = seqan3::is_char<'A'> || seqan3::is_char<'C'> || seqan3::is_char<'G'> || - seqan3::is_char<'T'> || seqan3::is_char<'U'> || seqan3::is_char<'a'> || - seqan3::is_char<'c'> || seqan3::is_char<'g'> || seqan3::is_char<'t'> || - seqan3::is_char<'u'> || seqan3::is_char<'N'> || seqan3::is_char<'n'> || - seqan3::is_char<'R'> || seqan3::is_char<'Y'> || seqan3::is_char<'S'> || - seqan3::is_char<'W'> || seqan3::is_char<'K'> || seqan3::is_char<'M'> || - seqan3::is_char<'B'> || seqan3::is_char<'D'> || seqan3::is_char<'H'> || - seqan3::is_char<'V'> || seqan3::is_char<'r'> || seqan3::is_char<'y'> || - seqan3::is_char<'s'> || seqan3::is_char<'w'> || seqan3::is_char<'k'> || - seqan3::is_char<'m'> || seqan3::is_char<'b'> || seqan3::is_char<'d'> || - seqan3::is_char<'h'> || seqan3::is_char<'v'> || seqan3::is_char<'='>; - - for (char c : std::views::iota(std::numeric_limits::min(), std::numeric_limits::max())) - EXPECT_EQ(seqan3::sam_dna16::char_is_valid(c), validator(c)); -}