Skip to content

Commit

Permalink
[MISC] Add typed test to kmerhash
Browse files Browse the repository at this point in the history
  • Loading branch information
MitraDarja committed May 4, 2020
1 parent f5278f7 commit 786c90d
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 125 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ Note that 3.1.0 will be the first API stable release and interfaces in this rele
([\#1410](https://github.com/seqan/seqan3/pull/1410)).
* Renamed `seqan3::views::all` to `seqan3::views::type_reduce`
([\#1410](https://github.com/seqan/seqan3/pull/1410)).
* Added size() function to `seqan3::views::kmer_hash`
([\#1722](https://github.com/seqan/seqan3/pull/1722)).

#### Search

Expand Down
12 changes: 12 additions & 0 deletions include/seqan3/range/views/kmer_hash.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,18 @@ class kmer_hash_view : public std::ranges::view_interface<kmer_hash_view<urng_t>
return end();
}
//!\}

/*!\brief Returns the size of the range, if the underlying range is a std::ranges::sized_range.
* \returns Size of range.
*/
auto size() const
requires std::ranges::sized_range<urng_t>
{
auto range_size{std::ranges::size(urange)};
if (range_size >= shape_.size())
return range_size - shape_.size() + 1;
return decltype(range_size){0};
}
};

/*!\brief Iterator for calculating hash values via a given seqan3::shape.
Expand Down
208 changes: 83 additions & 125 deletions test/unit/range/views/view_kmer_hash_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,158 +22,103 @@
using seqan3::operator""_dna4;
using seqan3::operator""_dna5;
using seqan3::operator""_shape;
using result_t = std::vector<size_t>;

class kmer_hash_test : public ::testing::Test
{
protected:
using result_t = std::vector<size_t>;

static constexpr auto ungapped_view = seqan3::views::kmer_hash(seqan3::ungapped{3});
static constexpr auto gapped_view = seqan3::views::kmer_hash(0b101_shape);

std::vector<seqan3::dna4> text1{"AAAAA"_dna4};
std::vector<seqan3::dna4> const ctext1{"AAAAA"_dna4};
result_t ungapped1{0,0,0};
result_t gapped1{0,0,0};

std::vector<seqan3::dna4> text2{"ACGTAGC"_dna4};
std::vector<seqan3::dna4> const ctext2{"ACGTAGC"_dna4};
result_t ungapped2{6,27,44,50,9};
result_t gapped2{2, 7, 8, 14, 1};

std::vector<seqan3::dna4> text3{"AC"_dna4};
std::vector<seqan3::dna4> const ctext3{"AC"_dna4};
result_t ungapped3{};
result_t gapped3{ungapped3};

seqan3::bitcompressed_vector<seqan3::dna4> text4{"ACGTAGC"_dna4};
seqan3::bitcompressed_vector<seqan3::dna4> const ctext4{"ACGTAGC"_dna4};
result_t ungapped4{ungapped2};
result_t gapped4{gapped2};

std::list<seqan3::dna4> text5{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4};
std::list<seqan3::dna4> const ctext5{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4};
result_t ungapped5{ungapped2};
result_t gapped5{gapped2};
static constexpr auto ungapped_view = seqan3::views::kmer_hash(seqan3::ungapped{3});
static constexpr auto gapped_view = seqan3::views::kmer_hash(0b101_shape);
static constexpr auto prefix_until_first_thymine = seqan3::views::take_until([] (seqan3::dna4 x)
{ return x == 'T'_dna4; });

std::forward_list<seqan3::dna4> text6{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4};
std::forward_list<seqan3::dna4> const ctext6{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4};
result_t ungapped6{ungapped2};
result_t gapped6{gapped2};
template <typename T>
class kmer_hash_test: public ::testing::Test {};

std::vector<seqan3::dna4> text7{"ACG"_dna4};
std::vector<seqan3::dna4> const ctext7{"ACG"_dna4};
result_t ungapped7{6};
result_t gapped7{2};
};
using underlying_range_types = ::testing::Types<std::vector<seqan3::dna4>,
std::vector<seqan3::dna4> const,
seqan3::bitcompressed_vector<seqan3::dna4>,
seqan3::bitcompressed_vector<seqan3::dna4> const,
std::list<seqan3::dna4>,
std::list<seqan3::dna4> const,
std::forward_list<seqan3::dna4>,
std::forward_list<seqan3::dna4> const>;

TYPED_TEST_SUITE(kmer_hash_test, underlying_range_types, );

TEST_F(kmer_hash_test, concepts)
TYPED_TEST(kmer_hash_test, ungapped_combined_with_container)
{
auto v1 = text1 | ungapped_view;
EXPECT_TRUE(std::ranges::input_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::forward_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::bidirectional_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::random_access_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::view<decltype(v1)>);
EXPECT_TRUE(std::ranges::sized_range<decltype(v1)>);
EXPECT_FALSE(std::ranges::common_range<decltype(v1)>);
EXPECT_TRUE(seqan3::const_iterable_range<decltype(v1)>);
EXPECT_FALSE((std::ranges::output_range<decltype(v1), size_t>));

auto v2 = text5 | ungapped_view;
EXPECT_TRUE(std::ranges::input_range<decltype(v2)>);
EXPECT_TRUE(std::ranges::forward_range<decltype(v2)>);
EXPECT_TRUE(std::ranges::bidirectional_range<decltype(v2)>);
EXPECT_FALSE(std::ranges::random_access_range<decltype(v2)>);
EXPECT_TRUE(std::ranges::view<decltype(v2)>);
EXPECT_FALSE(std::ranges::sized_range<decltype(v2)>);
EXPECT_FALSE(std::ranges::common_range<decltype(v2)>);
EXPECT_TRUE(seqan3::const_iterable_range<decltype(v2)>);
EXPECT_FALSE((std::ranges::output_range<decltype(v2), size_t>));

auto v3 = text6 | ungapped_view;
EXPECT_TRUE(std::ranges::input_range<decltype(v3)>);
EXPECT_TRUE(std::ranges::forward_range<decltype(v3)>);
EXPECT_FALSE(std::ranges::bidirectional_range<decltype(v3)>);
EXPECT_FALSE(std::ranges::random_access_range<decltype(v3)>);
EXPECT_TRUE(std::ranges::view<decltype(v3)>);
EXPECT_FALSE(std::ranges::sized_range<decltype(v3)>);
EXPECT_FALSE(std::ranges::common_range<decltype(v3)>);
EXPECT_TRUE(seqan3::const_iterable_range<decltype(v3)>);
EXPECT_FALSE((std::ranges::output_range<decltype(v3), size_t>));
}
TypeParam text1{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4}; // ACGTAGC
result_t ungapped1{6, 27, 44, 50, 9};
TypeParam text2{'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4}; // AAAAA
result_t ungapped2{0,0,0};
TypeParam text3{'A'_dna4, 'C'_dna4}; // AC
result_t ungapped3{};
TypeParam text4{'A'_dna4, 'C'_dna4, 'G'_dna4}; // AC
result_t ungapped4{6};

TEST_F(kmer_hash_test, ungapped)
{
EXPECT_EQ(ungapped1, text1 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped2, text2 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped3, text3 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped4, text4 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped5, text5 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped6, text6 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped7, text7 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped4, text1 | prefix_until_first_thymine | ungapped_view | seqan3::views::to<result_t>);
}

TEST_F(kmer_hash_test, gapped)
TYPED_TEST(kmer_hash_test, gapped_combined_with_container)
{
TypeParam text1{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4}; // ACGTAGC
result_t gapped1{2, 7, 8, 14, 1};
TypeParam text2{'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4}; // AAAAA
result_t gapped2{0,0,0};
TypeParam text3{'A'_dna4, 'C'_dna4}; // AC
result_t gapped3{};
TypeParam text4{'A'_dna4, 'C'_dna4, 'G'_dna4}; // AC
result_t gapped4{2};
EXPECT_EQ(gapped1, text1 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped2, text2 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped3, text3 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped4, text4 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped5, text5 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped6, text6 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped7, text7 | gapped_view | seqan3::views::to<result_t>);
}

TEST_F(kmer_hash_test, const_ungapped)
{
EXPECT_EQ(ungapped1, ctext1 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped2, ctext2 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped3, ctext3 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped4, ctext4 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped5, ctext5 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped6, ctext6 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped7, ctext7 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped4, text1 | prefix_until_first_thymine| gapped_view | seqan3::views::to<result_t>);
}

TEST_F(kmer_hash_test, const_gapped)
TYPED_TEST(kmer_hash_test, ungapped_concepts)
{
EXPECT_EQ(gapped1, ctext1 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped2, ctext2 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped3, ctext3 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped4, ctext4 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped5, ctext5 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped6, ctext6 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped7, ctext7 | gapped_view | seqan3::views::to<result_t>);
TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4}; // ACGT
auto v1 = text | ungapped_view;
EXPECT_TRUE(std::ranges::input_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::forward_range<decltype(v1)>);
EXPECT_EQ(std::ranges::bidirectional_range<decltype(text)>, std::ranges::bidirectional_range<decltype(v1)>);
EXPECT_EQ(std::ranges::random_access_range<decltype(text)>, std::ranges::random_access_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::view<decltype(v1)>);
EXPECT_EQ(std::ranges::sized_range<decltype(text)>, std::ranges::sized_range<decltype(v1)>);
EXPECT_FALSE(std::ranges::common_range<decltype(v1)>);
EXPECT_TRUE(seqan3::const_iterable_range<decltype(v1)>);
EXPECT_FALSE((std::ranges::output_range<decltype(v1), size_t>));
}

TEST_F(kmer_hash_test, combinability)
TYPED_TEST(kmer_hash_test, gapped_concepts)
{
auto stop_at_t = seqan3::views::take_until([] (seqan3::dna4 const x) { return x == 'T'_dna4; });
EXPECT_EQ(result_t{6}, text2 | stop_at_t | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(result_t{6}, text5 | stop_at_t | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(result_t{6}, text6 | stop_at_t | ungapped_view | seqan3::views::to<result_t>);

EXPECT_EQ(ungapped2 | std::views::reverse | seqan3::views::to<result_t>,
text2 | ungapped_view | std::views::reverse | seqan3::views::to<result_t>);

EXPECT_EQ(gapped2 | std::views::reverse | seqan3::views::to<result_t>,
text2 | gapped_view | std::views::reverse | seqan3::views::to<result_t>);

EXPECT_EQ(ungapped5 | std::views::reverse | seqan3::views::to<result_t>,
text5 | ungapped_view | std::views::reverse | seqan3::views::to<result_t>);

EXPECT_EQ(gapped5 | std::views::reverse | seqan3::views::to<result_t>,
text5 | gapped_view | std::views::reverse | seqan3::views::to<result_t>);
TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4}; // ACGT
auto v1 = text | gapped_view;
EXPECT_TRUE(std::ranges::input_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::forward_range<decltype(v1)>);
EXPECT_EQ(std::ranges::bidirectional_range<decltype(text)>, std::ranges::bidirectional_range<decltype(v1)>);
EXPECT_EQ(std::ranges::random_access_range<decltype(text)>, std::ranges::random_access_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::view<decltype(v1)>);
EXPECT_EQ(std::ranges::sized_range<decltype(text)>, std::ranges::sized_range<decltype(v1)>);
EXPECT_FALSE(std::ranges::common_range<decltype(v1)>);
EXPECT_TRUE(seqan3::const_iterable_range<decltype(v1)>);
EXPECT_FALSE((std::ranges::output_range<decltype(v1), size_t>));
}

TEST_F(kmer_hash_test, invalid_sizes)
TYPED_TEST(kmer_hash_test, invalid_sizes)
{
TypeParam text1{'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4};
EXPECT_NO_THROW(text1 | seqan3::views::kmer_hash(seqan3::ungapped{32}));
EXPECT_THROW(text1 | seqan3::views::kmer_hash(seqan3::ungapped{33}), std::invalid_argument);
EXPECT_NO_THROW(text1 | std::views::reverse | seqan3::views::kmer_hash(seqan3::ungapped{32}));
EXPECT_THROW(text1 | std::views::reverse | seqan3::views::kmer_hash(seqan3::ungapped{33}), std::invalid_argument);
if constexpr (std::ranges::bidirectional_range<TypeParam>) // excludes forward_list
{
EXPECT_NO_THROW(text1 | std::views::reverse | seqan3::views::kmer_hash(seqan3::ungapped{32}));
EXPECT_THROW(text1 | std::views::reverse | seqan3::views::kmer_hash(seqan3::ungapped{33}),
std::invalid_argument);
}

EXPECT_NO_THROW(text1 | seqan3::views::kmer_hash(0xFFFFFFFE001_shape)); // size=44, count=32
EXPECT_THROW(text1 | seqan3::views::kmer_hash(0xFFFFFFFFE009_shape), std::invalid_argument); // size=44, count=33
Expand All @@ -184,15 +129,15 @@ TEST_F(kmer_hash_test, invalid_sizes)
}

// https://github.com/seqan/seqan3/issues/1614
TEST_F(kmer_hash_test, issue1614)
TEST(kmer_hash_test, issue1614)
{
std::vector<seqan3::dna5> sequence{"TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"_dna5};
EXPECT_EQ(sequence | seqan3::views::kmer_hash(seqan3::ungapped{25}) | seqan3::views::to<std::vector<size_t>>,
seqan3::views::repeat_n(298023223876953124, 26) | seqan3::views::to<std::vector<size_t>>);
}

// https://github.com/seqan/seqan3/issues/1643
TEST_F(kmer_hash_test, issue1643)
TEST(kmer_hash_test, issue1643)
{
std::vector<seqan3::dna4> text_23_elements{"ACGATCGATCGTAGCTACTGAGC"_dna4};

Expand All @@ -206,3 +151,16 @@ TEST_F(kmer_hash_test, issue1643)
auto k_mer_size_25_view = text_23_elements | seqan3::views::kmer_hash(seqan3::ungapped{25u});
EXPECT_TRUE(k_mer_size_25_view.empty());
}

// https://github.com/seqan/seqan3/issues/1719
TEST(kmer_hash_test, issue1719)
{
uint64_t const expected = 0;
std::vector<seqan3::dna5> sequence{""_dna5};
auto v = sequence | seqan3::views::kmer_hash(seqan3::ungapped{25});
EXPECT_EQ(expected, v.size());

std::vector<seqan3::dna5> sequence2{"ACGATCGATCGTAGCTACTGAGC"_dna5};
auto v2 = sequence2 | seqan3::views::kmer_hash(seqan3::ungapped{25});
EXPECT_EQ(expected, v2.size());
}

0 comments on commit 786c90d

Please sign in to comment.