Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nucleotide_base complement table static member functions #2584

Merged
merged 5 commits into from
May 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,19 @@ regression test suite and patches at https://github.com/seqan/seqan3/tree/master

#### Alphabet

* We relaxed some requirements of `seqan3::alphabet_base<alphabet_t>`
([\#2427](https://github.com/seqan/seqan3/pull/2427)):
* Old requirements: `alphabet_t::rank_to_char` and `alphabet_t::char_to_rank` must be lookup tables.
* New requirements: `alphabet_t::rank_to_char` and `alphabet_t::char_to_rank` must be static member functions.

This allows for more flexible rank <-> char conversion implementations. Lookup tables are still possible within those
static member functions. However, alphabets that do not need a lookup table can now use easier and/or more efficient
implementations. For example, `seqan3::gap` always returns rank `0` or char `-`, or `seqan3::phred42` where the rank
and char representations are offset by a fixed value.
* We relaxed a requirement of `seqan3::nucleotide_base<alphabet_t>`
([\#2584](https://github.com/seqan/seqan3/pull/2584)):
* Old requirement: `alphabet_t::complement_table` must be a lookup table.
* New requirement: `alphabet_t::rank_complement` must be a static member function.
* Removed seqan3::char_is_valid_for requirement from seqan3::writable_alphabet and
seqan3::detail::writable_constexpr_alphabet
([\#2337](https://github.com/seqan/seqan3/pull/2337)).
Expand Down
29 changes: 17 additions & 12 deletions doc/cookbook/custom_dna4.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ class my_dna4 : public seqan3::nucleotide_base<my_dna4, 4/*alphabet size is 4*/>
return char_to_rank_table[static_cast<index_t>(chr)];
}

// Returns the complement by rank. This is where complement is handled and with this, my_dna4 models
// seqan3::nucleotide_alphabet.
static constexpr rank_type rank_complement(rank_type const rank)
{
return rank_complement_table[rank];
}

private:
// === lookup-table implementation detail ===

Expand All @@ -46,8 +53,14 @@ class my_dna4 : public seqan3::nucleotide_base<my_dna4, 4/*alphabet size is 4*/>
}()
};

// The forward declaration of the complement table. With this, my_dna4 models seqan3::nucleotide_alphabet.
static const std::array<my_dna4, alphabet_size> complement_table;
// The rank complement table.
static constexpr rank_type rank_complement_table[alphabet_size]
{
3, // T is complement of 'A'_dna4
2, // G is complement of 'C'_dna4
1, // C is complement of 'G'_dna4
0 // A is complement of 'T'_dna4
};

friend nucleotide_base<my_dna4, 4>; // Grant seqan3::nucleotide_base access to private/protected members.
friend nucleotide_base<my_dna4, 4>::base_t; // Grant seqan3::alphabet_base access to private/protected members.
Expand All @@ -59,20 +72,12 @@ constexpr my_dna4 operator""_my_dna4(char const c) noexcept
return my_dna4{}.assign_char(c);
}

// The definition of the complement table. With this, my_dna4 models seqan3::nucleotide_alphabet.
constexpr std::array<my_dna4, my_dna4::alphabet_size> my_dna4::complement_table
{
'T'_my_dna4, // the complement of 'A'_my_dna4
'G'_my_dna4, // the complement of 'C'_my_dna4
'C'_my_dna4, // the complement of 'G'_my_dna4
'A'_my_dna4 // the complement of 'T'_my_dna4
};

int main()
{
my_dna4 my_letter{'C'_my_dna4};

my_letter.assign_char('S'); // Characters other than A,C,G,T are implicitly converted to `A`.

seqan3::debug_stream << my_letter; // "A";
seqan3::debug_stream << my_letter << "\n"; // "A";
seqan3::debug_stream << seqan3::complement(my_letter) << "\n"; // "T";
}
52 changes: 26 additions & 26 deletions include/seqan3/alphabet/nucleotide/dna15.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,32 @@ class dna15 : public nucleotide_base<dna15, 15>
}()
};

//!\copydoc seqan3::dna4::rank_complement_table
static constexpr rank_type rank_complement_table[alphabet_size]
{
11, // T is complement of 'A'_dna15
12, // V is complement of 'B'_dna15
4, // G is complement of 'C'_dna15
5, // H is complement of 'D'_dna15
2, // C is complement of 'G'_dna15
3, // D is complement of 'H'_dna15
7, // M is complement of 'K'_dna15
6, // K is complement of 'M'_dna15
8, // N is complement of 'N'_dna15
14, // Y is complement of 'R'_dna15
10, // S is complement of 'S'_dna15
0, // A is complement of 'T'_dna15
1, // B is complement of 'V'_dna15
13, // W is complement of 'W'_dna15
9 // R is complement of 'Y'_dna15
};

//!\copydoc seqan3::dna4::rank_complement
static constexpr rank_type rank_complement(rank_type const rank)
{
return rank_complement_table[rank];
}

//!\copydoc seqan3::dna4::rank_to_char
static constexpr char_type rank_to_char(rank_type const rank)
{
Expand All @@ -143,9 +169,6 @@ class dna15 : public nucleotide_base<dna15, 15>
using index_t = std::make_unsigned_t<char_type>;
return char_to_rank_table[static_cast<index_t>(chr)];
}

//!\copydoc seqan3::dna4::complement_table
static const std::array<dna15, alphabet_size> complement_table;
};

// ------------------------------------------------------------------
Expand Down Expand Up @@ -206,27 +229,4 @@ inline dna15_vector operator""_dna15(char const * s, std::size_t n)

} // inline namespace literals

// ------------------------------------------------------------------
// dna15 (deferred definition)
// ------------------------------------------------------------------

constexpr std::array<dna15, dna15::alphabet_size> dna15::complement_table
{
'T'_dna15, // complement of 'A'_dna15
'V'_dna15, // complement of 'B'_dna15
'G'_dna15, // complement of 'C'_dna15
'H'_dna15, // complement of 'D'_dna15
'C'_dna15, // complement of 'G'_dna15
'D'_dna15, // complement of 'H'_dna15
'M'_dna15, // complement of 'K'_dna15
'K'_dna15, // complement of 'M'_dna15
'N'_dna15, // complement of 'N'_dna15
'Y'_dna15, // complement of 'R'_dna15
'S'_dna15, // complement of 'S'_dna15
'A'_dna15, // complement of 'T'_dna15
'B'_dna15, // complement of 'V'_dna15
'W'_dna15, // complement of 'W'_dna15
'R'_dna15 // complement of 'Y'_dna15
};

} // namespace seqan3
52 changes: 26 additions & 26 deletions include/seqan3/alphabet/nucleotide/dna16sam.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,32 @@ class dna16sam : public nucleotide_base<dna16sam, 16>
}()
};

//!\copydoc seqan3::dna4::complement_table
static const std::array<dna16sam, alphabet_size> complement_table;
//!\copydoc seqan3::dna4::rank_complement_table
static constexpr rank_type rank_complement_table[alphabet_size]
{
15, // N is complement of '='_dna16sam 0
8, // T is complement of 'A'_dna16sam 1
4, // G is complement of 'C'_dna16sam 2
12, // K is complement of 'M'_dna16sam 3
2, // C is complement of 'G'_dna16sam 4
10, // Y is complement of 'R'_dna16sam 5
6, // S is complement of 'S'_dna16sam 6
14, // B is complement of 'V'_dna16sam 7
1, // A is complement of 'T'_dna16sam 8
9, // W is complement of 'W'_dna16sam 9
5, // R is complement of 'Y'_dna16sam 10
13, // D is complement of 'H'_dna16sam 11
3, // M is complement of 'K'_dna16sam 12
11, // H is complement of 'D'_dna16sam 13
7, // V is complement of 'B'_dna16sam 14
15 // N is complement of 'N'_dna16sam 15
};

//!\copydoc seqan3::dna4::rank_complement
static constexpr rank_type rank_complement(rank_type const rank)
{
return rank_complement_table[rank];
}

/*!\copydoc seqan3::dna4::rank_to_char
*
Expand Down Expand Up @@ -216,28 +240,4 @@ SEQAN3_DEPRECATED_310 inline dna16sam_vector operator""_sam_dna16(char const * s

} // inline namespace literals

// ------------------------------------------------------------------
// complement deferred definition
// ------------------------------------------------------------------

constexpr std::array<dna16sam, dna16sam::alphabet_size> dna16sam::complement_table
{
'N'_dna16sam, // complement of '='_dna16sam
'T'_dna16sam, // complement of 'A'_dna16sam
'G'_dna16sam, // complement of 'C'_dna16sam
'K'_dna16sam, // complement of 'M'_dna16sam
'C'_dna16sam, // complement of 'G'_dna16sam
'Y'_dna16sam, // complement of 'R'_dna16sam
'S'_dna16sam, // complement of 'S'_dna16sam
'B'_dna16sam, // complement of 'V'_dna16sam
'A'_dna16sam, // complement of 'T'_dna16sam
'W'_dna16sam, // complement of 'W'_dna16sam
'R'_dna16sam, // complement of 'Y'_dna16sam
'D'_dna16sam, // complement of 'H'_dna16sam
'M'_dna16sam, // complement of 'K'_dna16sam
'H'_dna16sam, // complement of 'D'_dna16sam
'V'_dna16sam, // complement of 'B'_dna16sam
'N'_dna16sam // complement of 'N'_dna16sam
};

} // namespace seqan3
26 changes: 13 additions & 13 deletions include/seqan3/alphabet/nucleotide/dna3bs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,19 @@ class dna3bs : public nucleotide_base<dna3bs, 3>
}()
};

//!\copydoc seqan3::dna4::complement_table
static const std::array<dna3bs, alphabet_size> complement_table;
//!\copydoc seqan3::dna4::rank_complement_table
static constexpr rank_type rank_complement_table[alphabet_size]
{
2, // T is complement of 'A'_dna3bs
2, // T is complement of 'G'_dna3bs
0 // A is complement of 'T'_dna3bs
};

//!\copydoc seqan3::dna4::rank_complement
static constexpr rank_type rank_complement(rank_type const rank)
{
return rank_complement_table[rank];
}

//!\copydoc seqan3::dna4::rank_to_char
static constexpr char_type rank_to_char(rank_type const rank)
Expand Down Expand Up @@ -201,15 +212,4 @@ inline dna3bs_vector operator""_dna3bs(char const * s, std::size_t n)

} // inline namespace literals

// ------------------------------------------------------------------
// dna3bs (deferred definition)
// ------------------------------------------------------------------

constexpr std::array<dna3bs, dna3bs::alphabet_size> dna3bs::complement_table
{
'T'_dna3bs, // complement of 'A'_dna3bs
'T'_dna3bs, // complement of 'G'_dna3bs
'A'_dna3bs // complement of 'T'_dna3bs
};

} // namespace seqan3
31 changes: 17 additions & 14 deletions include/seqan3/alphabet/nucleotide/dna4.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,23 @@ class dna4 : public nucleotide_base<dna4, 4>
}()
};

//!\brief The complement table.
static const std::array<dna4, alphabet_size> complement_table;
//!\brief The rank complement table.
static constexpr rank_type rank_complement_table[alphabet_size]
{
3, // T is complement of 'A'_dna4
2, // G is complement of 'C'_dna4
1, // C is complement of 'G'_dna4
0 // A is complement of 'T'_dna4
};

/*!\brief Returns the complement by rank.
* \details
* This function is required by seqan3::nucleotide_base.
*/
static constexpr rank_type rank_complement(rank_type const rank)
{
return rank_complement_table[rank];
}

/*!\brief Returns the character representation of rank.
* \details
Expand Down Expand Up @@ -234,16 +249,4 @@ inline dna4_vector operator""_dna4(char const * s, std::size_t n)

} // inline namespace literals

// ------------------------------------------------------------------
// dna4 (deferred definition)
// ------------------------------------------------------------------

constexpr std::array<dna4, dna4::alphabet_size> dna4::complement_table
{
'T'_dna4, // complement of 'A'_dna4
'G'_dna4, // complement of 'C'_dna4
'C'_dna4, // complement of 'G'_dna4
'A'_dna4 // complement of 'T'_dna4
};

} // namespace seqan3
30 changes: 15 additions & 15 deletions include/seqan3/alphabet/nucleotide/dna5.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,21 @@ class dna5 : public nucleotide_base<dna5, 5>
}()
};

//!\copydoc seqan3::dna4::complement_table
static const std::array<dna5, alphabet_size> complement_table;
//!\copydoc seqan3::dna4::rank_complement_table
static constexpr rank_type rank_complement_table[alphabet_size]
{
4, // T is complement of 'A'_dna5
2, // G is complement of 'C'_dna5
1, // C is complement of 'G'_dna5
3, // N is complement of 'N'_dna5
0 // A is complement of 'T'_dna5
};

//!\copydoc seqan3::dna4::rank_complement
static constexpr rank_type rank_complement(rank_type const rank)
{
return rank_complement_table[rank];
}

//!\copydoc seqan3::dna4::rank_to_char
static constexpr char_type rank_to_char(rank_type const rank)
Expand Down Expand Up @@ -197,17 +210,4 @@ inline dna5_vector operator""_dna5(char const * s, std::size_t n)

} // inline namespace literals

// ------------------------------------------------------------------
// dna5 (deferred definition)
// ------------------------------------------------------------------

constexpr std::array<dna5, dna5::alphabet_size> dna5::complement_table
{
'T'_dna5, // complement of 'A'_dna5
'G'_dna5, // complement of 'C'_dna5
'C'_dna5, // complement of 'G'_dna5
'N'_dna5, // complement of 'N'_dna5
'A'_dna5 // complement of 'T'_dna5
};

} // namespace seqan3
38 changes: 37 additions & 1 deletion include/seqan3/alphabet/nucleotide/nucleotide_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,20 @@
#include <seqan3/alphabet/nucleotide/concept.hpp>
#include <seqan3/utility/char_operations/transform.hpp>

#ifdef SEQAN3_DEPRECATED_310
namespace seqan3::detail
{
//!\cond
// helper concept to deprecate old char_to_rank lookup tables
template <typename alphabet_t>
SEQAN3_CONCEPT has_complement_table = requires()
{
{ alphabet_t::complement_table[0] };
};
//!\endcond
} // namespace seqan3::detail
#endif // SEQAN3_DEPRECATED_310

namespace seqan3
{

Expand Down Expand Up @@ -114,8 +128,30 @@ class nucleotide_base : public alphabet_base<derived_type, size, char>
*/
constexpr derived_type complement() const noexcept
{
return derived_type::complement_table[to_rank()];
#ifdef SEQAN3_DEPRECATED_310
if constexpr (detail::has_complement_table<derived_type>)
return complement_table_deprecated(to_rank());
else
return derived_type{}.assign_rank(derived_type{}.rank_complement(to_rank()));
#else // ^^^ before 3.1.0 release / after 3.1.0 release vvv
return derived_type{}.assign_rank(derived_type{}.rank_complement(to_rank()));
#endif // SEQAN3_DEPRECATED_310
}

#ifdef SEQAN3_DEPRECATED_310
private:

/*!\brief Before SeqAn 3.0.3, we defined derived_type::complement_table as a lookup table. We relaxed this to be a
* function to give the implementer more freedom.
* \deprecated Define derived_type::complement_table as a function named derived_type::rank_complement.
*/
SEQAN3_DEPRECATED_310 static constexpr derived_type complement_table_deprecated(rank_type const rank) noexcept
{
return derived_type::complement_table[rank];
}

public:
#endif // SEQAN3_DEPRECATED_310
//!\}

/*!\brief Validate whether a character value has a one-to-one mapping to an alphabet value.
Expand Down
Loading