Skip to content

Commit

Permalink
Merge pull request #2584 from marehr/complement_table
Browse files Browse the repository at this point in the history
nucleotide_base complement table static member functions
  • Loading branch information
marehr authored May 4, 2021
2 parents 9f36049 + 26888c0 commit b562ea6
Show file tree
Hide file tree
Showing 11 changed files with 179 additions and 161 deletions.
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,19 @@ regression test suite and patches at https://github.com/seqan/seqan3/tree/master

#### Alphabet

* We relaxed some requirements of `seqan3::alphabet_base<alphabet_t>`
([\#2427](https://github.com/seqan/seqan3/pull/2427)):
* Old requirements: `alphabet_t::rank_to_char` and `alphabet_t::char_to_rank` must be lookup tables.
* New requirements: `alphabet_t::rank_to_char` and `alphabet_t::char_to_rank` must be static member functions.

This allows for more flexible rank <-> char conversion implementations. Lookup tables are still possible within those
static member functions. However, alphabets that do not need a lookup table can now use easier and/or more efficient
implementations. For example, `seqan3::gap` always returns rank `0` or char `-`, or `seqan3::phred42` where the rank
and char representations are offset by a fixed value.
* We relaxed a requirement of `seqan3::nucleotide_base<alphabet_t>`
([\#2584](https://github.com/seqan/seqan3/pull/2584)):
* Old requirement: `alphabet_t::complement_table` must be a lookup table.
* New requirement: `alphabet_t::rank_complement` must be a static member function.
* Removed seqan3::char_is_valid_for requirement from seqan3::writable_alphabet and
seqan3::detail::writable_constexpr_alphabet
([\#2337](https://github.com/seqan/seqan3/pull/2337)).
Expand Down
29 changes: 17 additions & 12 deletions doc/cookbook/custom_dna4.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ class my_dna4 : public seqan3::nucleotide_base<my_dna4, 4/*alphabet size is 4*/>
return char_to_rank_table[static_cast<index_t>(chr)];
}

// Returns the complement by rank. This is where complement is handled and with this, my_dna4 models
// seqan3::nucleotide_alphabet.
static constexpr rank_type rank_complement(rank_type const rank)
{
return rank_complement_table[rank];
}

private:
// === lookup-table implementation detail ===

Expand All @@ -46,8 +53,14 @@ class my_dna4 : public seqan3::nucleotide_base<my_dna4, 4/*alphabet size is 4*/>
}()
};

// The forward declaration of the complement table. With this, my_dna4 models seqan3::nucleotide_alphabet.
static const std::array<my_dna4, alphabet_size> complement_table;
// The rank complement table.
static constexpr rank_type rank_complement_table[alphabet_size]
{
3, // T is complement of 'A'_dna4
2, // G is complement of 'C'_dna4
1, // C is complement of 'G'_dna4
0 // A is complement of 'T'_dna4
};

friend nucleotide_base<my_dna4, 4>; // Grant seqan3::nucleotide_base access to private/protected members.
friend nucleotide_base<my_dna4, 4>::base_t; // Grant seqan3::alphabet_base access to private/protected members.
Expand All @@ -59,20 +72,12 @@ constexpr my_dna4 operator""_my_dna4(char const c) noexcept
return my_dna4{}.assign_char(c);
}

// The definition of the complement table. With this, my_dna4 models seqan3::nucleotide_alphabet.
constexpr std::array<my_dna4, my_dna4::alphabet_size> my_dna4::complement_table
{
'T'_my_dna4, // the complement of 'A'_my_dna4
'G'_my_dna4, // the complement of 'C'_my_dna4
'C'_my_dna4, // the complement of 'G'_my_dna4
'A'_my_dna4 // the complement of 'T'_my_dna4
};

int main()
{
my_dna4 my_letter{'C'_my_dna4};

my_letter.assign_char('S'); // Characters other than A,C,G,T are implicitly converted to `A`.

seqan3::debug_stream << my_letter; // "A";
seqan3::debug_stream << my_letter << "\n"; // "A";
seqan3::debug_stream << seqan3::complement(my_letter) << "\n"; // "T";
}
52 changes: 26 additions & 26 deletions include/seqan3/alphabet/nucleotide/dna15.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,32 @@ class dna15 : public nucleotide_base<dna15, 15>
}()
};

//!\copydoc seqan3::dna4::rank_complement_table
static constexpr rank_type rank_complement_table[alphabet_size]
{
11, // T is complement of 'A'_dna15
12, // V is complement of 'B'_dna15
4, // G is complement of 'C'_dna15
5, // H is complement of 'D'_dna15
2, // C is complement of 'G'_dna15
3, // D is complement of 'H'_dna15
7, // M is complement of 'K'_dna15
6, // K is complement of 'M'_dna15
8, // N is complement of 'N'_dna15
14, // Y is complement of 'R'_dna15
10, // S is complement of 'S'_dna15
0, // A is complement of 'T'_dna15
1, // B is complement of 'V'_dna15
13, // W is complement of 'W'_dna15
9 // R is complement of 'Y'_dna15
};

//!\copydoc seqan3::dna4::rank_complement
static constexpr rank_type rank_complement(rank_type const rank)
{
return rank_complement_table[rank];
}

//!\copydoc seqan3::dna4::rank_to_char
static constexpr char_type rank_to_char(rank_type const rank)
{
Expand All @@ -143,9 +169,6 @@ class dna15 : public nucleotide_base<dna15, 15>
using index_t = std::make_unsigned_t<char_type>;
return char_to_rank_table[static_cast<index_t>(chr)];
}

//!\copydoc seqan3::dna4::complement_table
static const std::array<dna15, alphabet_size> complement_table;
};

// ------------------------------------------------------------------
Expand Down Expand Up @@ -206,27 +229,4 @@ inline dna15_vector operator""_dna15(char const * s, std::size_t n)

} // inline namespace literals

// ------------------------------------------------------------------
// dna15 (deferred definition)
// ------------------------------------------------------------------

constexpr std::array<dna15, dna15::alphabet_size> dna15::complement_table
{
'T'_dna15, // complement of 'A'_dna15
'V'_dna15, // complement of 'B'_dna15
'G'_dna15, // complement of 'C'_dna15
'H'_dna15, // complement of 'D'_dna15
'C'_dna15, // complement of 'G'_dna15
'D'_dna15, // complement of 'H'_dna15
'M'_dna15, // complement of 'K'_dna15
'K'_dna15, // complement of 'M'_dna15
'N'_dna15, // complement of 'N'_dna15
'Y'_dna15, // complement of 'R'_dna15
'S'_dna15, // complement of 'S'_dna15
'A'_dna15, // complement of 'T'_dna15
'B'_dna15, // complement of 'V'_dna15
'W'_dna15, // complement of 'W'_dna15
'R'_dna15 // complement of 'Y'_dna15
};

} // namespace seqan3
52 changes: 26 additions & 26 deletions include/seqan3/alphabet/nucleotide/dna16sam.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,32 @@ class dna16sam : public nucleotide_base<dna16sam, 16>
}()
};

//!\copydoc seqan3::dna4::complement_table
static const std::array<dna16sam, alphabet_size> complement_table;
//!\copydoc seqan3::dna4::rank_complement_table
static constexpr rank_type rank_complement_table[alphabet_size]
{
15, // N is complement of '='_dna16sam 0
8, // T is complement of 'A'_dna16sam 1
4, // G is complement of 'C'_dna16sam 2
12, // K is complement of 'M'_dna16sam 3
2, // C is complement of 'G'_dna16sam 4
10, // Y is complement of 'R'_dna16sam 5
6, // S is complement of 'S'_dna16sam 6
14, // B is complement of 'V'_dna16sam 7
1, // A is complement of 'T'_dna16sam 8
9, // W is complement of 'W'_dna16sam 9
5, // R is complement of 'Y'_dna16sam 10
13, // D is complement of 'H'_dna16sam 11
3, // M is complement of 'K'_dna16sam 12
11, // H is complement of 'D'_dna16sam 13
7, // V is complement of 'B'_dna16sam 14
15 // N is complement of 'N'_dna16sam 15
};

//!\copydoc seqan3::dna4::rank_complement
static constexpr rank_type rank_complement(rank_type const rank)
{
return rank_complement_table[rank];
}

/*!\copydoc seqan3::dna4::rank_to_char
*
Expand Down Expand Up @@ -216,28 +240,4 @@ SEQAN3_DEPRECATED_310 inline dna16sam_vector operator""_sam_dna16(char const * s

} // inline namespace literals

// ------------------------------------------------------------------
// complement deferred definition
// ------------------------------------------------------------------

constexpr std::array<dna16sam, dna16sam::alphabet_size> dna16sam::complement_table
{
'N'_dna16sam, // complement of '='_dna16sam
'T'_dna16sam, // complement of 'A'_dna16sam
'G'_dna16sam, // complement of 'C'_dna16sam
'K'_dna16sam, // complement of 'M'_dna16sam
'C'_dna16sam, // complement of 'G'_dna16sam
'Y'_dna16sam, // complement of 'R'_dna16sam
'S'_dna16sam, // complement of 'S'_dna16sam
'B'_dna16sam, // complement of 'V'_dna16sam
'A'_dna16sam, // complement of 'T'_dna16sam
'W'_dna16sam, // complement of 'W'_dna16sam
'R'_dna16sam, // complement of 'Y'_dna16sam
'D'_dna16sam, // complement of 'H'_dna16sam
'M'_dna16sam, // complement of 'K'_dna16sam
'H'_dna16sam, // complement of 'D'_dna16sam
'V'_dna16sam, // complement of 'B'_dna16sam
'N'_dna16sam // complement of 'N'_dna16sam
};

} // namespace seqan3
26 changes: 13 additions & 13 deletions include/seqan3/alphabet/nucleotide/dna3bs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,19 @@ class dna3bs : public nucleotide_base<dna3bs, 3>
}()
};

//!\copydoc seqan3::dna4::complement_table
static const std::array<dna3bs, alphabet_size> complement_table;
//!\copydoc seqan3::dna4::rank_complement_table
static constexpr rank_type rank_complement_table[alphabet_size]
{
2, // T is complement of 'A'_dna3bs
2, // T is complement of 'G'_dna3bs
0 // A is complement of 'T'_dna3bs
};

//!\copydoc seqan3::dna4::rank_complement
static constexpr rank_type rank_complement(rank_type const rank)
{
return rank_complement_table[rank];
}

//!\copydoc seqan3::dna4::rank_to_char
static constexpr char_type rank_to_char(rank_type const rank)
Expand Down Expand Up @@ -201,15 +212,4 @@ inline dna3bs_vector operator""_dna3bs(char const * s, std::size_t n)

} // inline namespace literals

// ------------------------------------------------------------------
// dna3bs (deferred definition)
// ------------------------------------------------------------------

constexpr std::array<dna3bs, dna3bs::alphabet_size> dna3bs::complement_table
{
'T'_dna3bs, // complement of 'A'_dna3bs
'T'_dna3bs, // complement of 'G'_dna3bs
'A'_dna3bs // complement of 'T'_dna3bs
};

} // namespace seqan3
31 changes: 17 additions & 14 deletions include/seqan3/alphabet/nucleotide/dna4.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,23 @@ class dna4 : public nucleotide_base<dna4, 4>
}()
};

//!\brief The complement table.
static const std::array<dna4, alphabet_size> complement_table;
//!\brief The rank complement table.
static constexpr rank_type rank_complement_table[alphabet_size]
{
3, // T is complement of 'A'_dna4
2, // G is complement of 'C'_dna4
1, // C is complement of 'G'_dna4
0 // A is complement of 'T'_dna4
};

/*!\brief Returns the complement by rank.
* \details
* This function is required by seqan3::nucleotide_base.
*/
static constexpr rank_type rank_complement(rank_type const rank)
{
return rank_complement_table[rank];
}

/*!\brief Returns the character representation of rank.
* \details
Expand Down Expand Up @@ -234,16 +249,4 @@ inline dna4_vector operator""_dna4(char const * s, std::size_t n)

} // inline namespace literals

// ------------------------------------------------------------------
// dna4 (deferred definition)
// ------------------------------------------------------------------

constexpr std::array<dna4, dna4::alphabet_size> dna4::complement_table
{
'T'_dna4, // complement of 'A'_dna4
'G'_dna4, // complement of 'C'_dna4
'C'_dna4, // complement of 'G'_dna4
'A'_dna4 // complement of 'T'_dna4
};

} // namespace seqan3
30 changes: 15 additions & 15 deletions include/seqan3/alphabet/nucleotide/dna5.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,21 @@ class dna5 : public nucleotide_base<dna5, 5>
}()
};

//!\copydoc seqan3::dna4::complement_table
static const std::array<dna5, alphabet_size> complement_table;
//!\copydoc seqan3::dna4::rank_complement_table
static constexpr rank_type rank_complement_table[alphabet_size]
{
4, // T is complement of 'A'_dna5
2, // G is complement of 'C'_dna5
1, // C is complement of 'G'_dna5
3, // N is complement of 'N'_dna5
0 // A is complement of 'T'_dna5
};

//!\copydoc seqan3::dna4::rank_complement
static constexpr rank_type rank_complement(rank_type const rank)
{
return rank_complement_table[rank];
}

//!\copydoc seqan3::dna4::rank_to_char
static constexpr char_type rank_to_char(rank_type const rank)
Expand Down Expand Up @@ -197,17 +210,4 @@ inline dna5_vector operator""_dna5(char const * s, std::size_t n)

} // inline namespace literals

// ------------------------------------------------------------------
// dna5 (deferred definition)
// ------------------------------------------------------------------

constexpr std::array<dna5, dna5::alphabet_size> dna5::complement_table
{
'T'_dna5, // complement of 'A'_dna5
'G'_dna5, // complement of 'C'_dna5
'C'_dna5, // complement of 'G'_dna5
'N'_dna5, // complement of 'N'_dna5
'A'_dna5 // complement of 'T'_dna5
};

} // namespace seqan3
38 changes: 37 additions & 1 deletion include/seqan3/alphabet/nucleotide/nucleotide_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,20 @@
#include <seqan3/alphabet/nucleotide/concept.hpp>
#include <seqan3/utility/char_operations/transform.hpp>

#ifdef SEQAN3_DEPRECATED_310
namespace seqan3::detail
{
//!\cond
// helper concept to deprecate old char_to_rank lookup tables
template <typename alphabet_t>
SEQAN3_CONCEPT has_complement_table = requires()
{
{ alphabet_t::complement_table[0] };
};
//!\endcond
} // namespace seqan3::detail
#endif // SEQAN3_DEPRECATED_310

namespace seqan3
{

Expand Down Expand Up @@ -114,8 +128,30 @@ class nucleotide_base : public alphabet_base<derived_type, size, char>
*/
constexpr derived_type complement() const noexcept
{
return derived_type::complement_table[to_rank()];
#ifdef SEQAN3_DEPRECATED_310
if constexpr (detail::has_complement_table<derived_type>)
return complement_table_deprecated(to_rank());
else
return derived_type{}.assign_rank(derived_type{}.rank_complement(to_rank()));
#else // ^^^ before 3.1.0 release / after 3.1.0 release vvv
return derived_type{}.assign_rank(derived_type{}.rank_complement(to_rank()));
#endif // SEQAN3_DEPRECATED_310
}

#ifdef SEQAN3_DEPRECATED_310
private:

/*!\brief Before SeqAn 3.0.3, we defined derived_type::complement_table as a lookup table. We relaxed this to be a
* function to give the implementer more freedom.
* \deprecated Define derived_type::complement_table as a function named derived_type::rank_complement.
*/
SEQAN3_DEPRECATED_310 static constexpr derived_type complement_table_deprecated(rank_type const rank) noexcept
{
return derived_type::complement_table[rank];
}

public:
#endif // SEQAN3_DEPRECATED_310
//!\}

/*!\brief Validate whether a character value has a one-to-one mapping to an alphabet value.
Expand Down
Loading

1 comment on commit b562ea6

@vercel
Copy link

@vercel vercel bot commented on b562ea6 May 4, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.