Skip to content

Commit

Permalink
Merge pull request #1722 from MitraDarja/kmer_hash_typed_test
Browse files Browse the repository at this point in the history
Add typed test to kmer_hash
  • Loading branch information
smehringer authored May 5, 2020
2 parents 2d689d4 + 8cd8edd commit e9db871
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 138 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ Note that 3.1.0 will be the first API stable release and interfaces in this rele
`seqan3::bi_fm_index_cursor::extend_left()` functions handle c-style strings without including the null character
([\#1588](https://github.com/seqan/seqan3/pull/1588)).

#### Range

* Added size() function to `seqan3::views::kmer_hash`
([\#1722](https://github.com/seqan/seqan3/pull/1722)).

# 3.0.1

Note that 3.1.0 will be the first API stable release and interfaces in this release might still change.
Expand Down
12 changes: 12 additions & 0 deletions include/seqan3/range/views/kmer_hash.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,18 @@ class kmer_hash_view : public std::ranges::view_interface<kmer_hash_view<urng_t>
return end();
}
//!\}

/*!\brief Returns the size of the range, if the underlying range is a std::ranges::sized_range.
* \returns Size of range.
*/
auto size() const
//!\cond
requires std::ranges::sized_range<urng_t>
//!\endcond
{
using size_type = decltype(std::ranges::size(urange));
return std::max<size_type>(std::ranges::size(urange) + 1, shape_.size()) - shape_.size();
}
};

/*!\brief Iterator for calculating hash values via a given seqan3::shape.
Expand Down
256 changes: 118 additions & 138 deletions test/unit/range/views/view_kmer_hash_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,165 +15,127 @@
#include <seqan3/range/views/kmer_hash.hpp>
#include <seqan3/range/views/repeat_n.hpp>
#include <seqan3/range/views/take_until.hpp>
#include <seqan3/range/views/to.hpp>
#include <seqan3/test/expect_range_eq.hpp>

#include <gtest/gtest.h>

using seqan3::operator""_dna4;
using seqan3::operator""_dna5;
using seqan3::operator""_shape;
using result_t = std::vector<size_t>;

class kmer_hash_test : public ::testing::Test
{
protected:
using result_t = std::vector<size_t>;

static constexpr auto ungapped_view = seqan3::views::kmer_hash(seqan3::ungapped{3});
static constexpr auto gapped_view = seqan3::views::kmer_hash(0b101_shape);

std::vector<seqan3::dna4> text1{"AAAAA"_dna4};
std::vector<seqan3::dna4> const ctext1{"AAAAA"_dna4};
result_t ungapped1{0,0,0};
result_t gapped1{0,0,0};

std::vector<seqan3::dna4> text2{"ACGTAGC"_dna4};
std::vector<seqan3::dna4> const ctext2{"ACGTAGC"_dna4};
result_t ungapped2{6,27,44,50,9};
result_t gapped2{2, 7, 8, 14, 1};

std::vector<seqan3::dna4> text3{"AC"_dna4};
std::vector<seqan3::dna4> const ctext3{"AC"_dna4};
result_t ungapped3{};
result_t gapped3{ungapped3};

seqan3::bitcompressed_vector<seqan3::dna4> text4{"ACGTAGC"_dna4};
seqan3::bitcompressed_vector<seqan3::dna4> const ctext4{"ACGTAGC"_dna4};
result_t ungapped4{ungapped2};
result_t gapped4{gapped2};

std::list<seqan3::dna4> text5{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4};
std::list<seqan3::dna4> const ctext5{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4};
result_t ungapped5{ungapped2};
result_t gapped5{gapped2};

std::forward_list<seqan3::dna4> text6{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4};
std::forward_list<seqan3::dna4> const ctext6{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4};
result_t ungapped6{ungapped2};
result_t gapped6{gapped2};

std::vector<seqan3::dna4> text7{"ACG"_dna4};
std::vector<seqan3::dna4> const ctext7{"ACG"_dna4};
result_t ungapped7{6};
result_t gapped7{2};
};


TEST_F(kmer_hash_test, concepts)
{
auto v1 = text1 | ungapped_view;
EXPECT_TRUE(std::ranges::input_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::forward_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::bidirectional_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::random_access_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::view<decltype(v1)>);
EXPECT_TRUE(std::ranges::sized_range<decltype(v1)>);
EXPECT_FALSE(std::ranges::common_range<decltype(v1)>);
EXPECT_TRUE(seqan3::const_iterable_range<decltype(v1)>);
EXPECT_FALSE((std::ranges::output_range<decltype(v1), size_t>));
static constexpr auto ungapped_view = seqan3::views::kmer_hash(seqan3::ungapped{3});
static constexpr auto gapped_view = seqan3::views::kmer_hash(0b101_shape);
static constexpr auto prefix_until_first_thymine = seqan3::views::take_until([] (seqan3::dna4 x)
{ return x == 'T'_dna4; });

auto v2 = text5 | ungapped_view;
EXPECT_TRUE(std::ranges::input_range<decltype(v2)>);
EXPECT_TRUE(std::ranges::forward_range<decltype(v2)>);
EXPECT_TRUE(std::ranges::bidirectional_range<decltype(v2)>);
EXPECT_FALSE(std::ranges::random_access_range<decltype(v2)>);
EXPECT_TRUE(std::ranges::view<decltype(v2)>);
EXPECT_FALSE(std::ranges::sized_range<decltype(v2)>);
EXPECT_FALSE(std::ranges::common_range<decltype(v2)>);
EXPECT_TRUE(seqan3::const_iterable_range<decltype(v2)>);
EXPECT_FALSE((std::ranges::output_range<decltype(v2), size_t>));

auto v3 = text6 | ungapped_view;
EXPECT_TRUE(std::ranges::input_range<decltype(v3)>);
EXPECT_TRUE(std::ranges::forward_range<decltype(v3)>);
EXPECT_FALSE(std::ranges::bidirectional_range<decltype(v3)>);
EXPECT_FALSE(std::ranges::random_access_range<decltype(v3)>);
EXPECT_TRUE(std::ranges::view<decltype(v3)>);
EXPECT_FALSE(std::ranges::sized_range<decltype(v3)>);
EXPECT_FALSE(std::ranges::common_range<decltype(v3)>);
EXPECT_TRUE(seqan3::const_iterable_range<decltype(v3)>);
EXPECT_FALSE((std::ranges::output_range<decltype(v3), size_t>));
}
template <typename T>
class kmer_hash_ungapped_test: public ::testing::Test {};

TEST_F(kmer_hash_test, ungapped)
{
EXPECT_EQ(ungapped1, text1 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped2, text2 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped3, text3 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped4, text4 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped5, text5 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped6, text6 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped7, text7 | ungapped_view | seqan3::views::to<result_t>);
}
template <typename T>
class kmer_hash_gapped_test: public ::testing::Test {};

using underlying_range_types = ::testing::Types<std::vector<seqan3::dna4>,
std::vector<seqan3::dna4> const,
seqan3::bitcompressed_vector<seqan3::dna4>,
seqan3::bitcompressed_vector<seqan3::dna4> const,
std::list<seqan3::dna4>,
std::list<seqan3::dna4> const,
std::forward_list<seqan3::dna4>,
std::forward_list<seqan3::dna4> const>;

TEST_F(kmer_hash_test, gapped)
TYPED_TEST_SUITE(kmer_hash_ungapped_test, underlying_range_types, );
TYPED_TEST_SUITE(kmer_hash_gapped_test, underlying_range_types, );

TYPED_TEST(kmer_hash_ungapped_test, combined_with_container)
{
EXPECT_EQ(gapped1, text1 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped2, text2 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped3, text3 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped4, text4 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped5, text5 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped6, text6 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped7, text7 | gapped_view | seqan3::views::to<result_t>);
{
TypeParam text1{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4}; // ACGTAGC
result_t ungapped1{6, 27, 44, 50, 9};
EXPECT_RANGE_EQ(ungapped1, text1 | ungapped_view);
EXPECT_RANGE_EQ(result_t{6}, text1 | prefix_until_first_thymine | ungapped_view);
}
{
TypeParam text2{'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4}; // AAAAA
result_t ungapped2{0, 0, 0};
EXPECT_RANGE_EQ(ungapped2, text2 | ungapped_view);
}
{
TypeParam text3{'A'_dna4, 'C'_dna4}; // AC
EXPECT_RANGE_EQ(result_t{}, text3 | ungapped_view);
}
{
TypeParam text4{'A'_dna4, 'C'_dna4, 'G'_dna4}; // AC
EXPECT_RANGE_EQ(result_t{6}, text4 | ungapped_view);
}
}

TEST_F(kmer_hash_test, const_ungapped)
TYPED_TEST(kmer_hash_gapped_test, combined_with_container)
{
EXPECT_EQ(ungapped1, ctext1 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped2, ctext2 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped3, ctext3 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped4, ctext4 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped5, ctext5 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped6, ctext6 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped7, ctext7 | ungapped_view | seqan3::views::to<result_t>);
{
TypeParam text1{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4}; // ACGTAGC
result_t gapped1{2, 7, 8, 14, 1};
EXPECT_RANGE_EQ(gapped1, text1 | gapped_view);
EXPECT_RANGE_EQ(result_t{2}, text1 | prefix_until_first_thymine| gapped_view);
}
{
TypeParam text2{'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4}; // AAAAA
result_t gapped2{0, 0, 0};
EXPECT_RANGE_EQ(gapped2, text2 | gapped_view);
}
{
TypeParam text3{'A'_dna4, 'C'_dna4}; // AC
EXPECT_RANGE_EQ(result_t{}, text3 | gapped_view);
}
{
TypeParam text4{'A'_dna4, 'C'_dna4, 'G'_dna4}; // AC
EXPECT_RANGE_EQ(result_t{2}, text4 | gapped_view);
}
}

TEST_F(kmer_hash_test, const_gapped)
TYPED_TEST(kmer_hash_ungapped_test, concepts)
{
EXPECT_EQ(gapped1, ctext1 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped2, ctext2 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped3, ctext3 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped4, ctext4 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped5, ctext5 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped6, ctext6 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped7, ctext7 | gapped_view | seqan3::views::to<result_t>);
TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4}; // ACGT
auto v1 = text | ungapped_view;
EXPECT_TRUE(std::ranges::input_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::forward_range<decltype(v1)>);
EXPECT_EQ(std::ranges::bidirectional_range<decltype(text)>, std::ranges::bidirectional_range<decltype(v1)>);
EXPECT_EQ(std::ranges::random_access_range<decltype(text)>, std::ranges::random_access_range<decltype(v1)>);
EXPECT_FALSE(std::ranges::contiguous_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::view<decltype(v1)>);
EXPECT_EQ(std::ranges::sized_range<decltype(text)>, std::ranges::sized_range<decltype(v1)>);
EXPECT_FALSE(std::ranges::common_range<decltype(v1)>);
EXPECT_TRUE(seqan3::const_iterable_range<decltype(v1)>);
EXPECT_FALSE((std::ranges::output_range<decltype(v1), size_t>));
}

TEST_F(kmer_hash_test, combinability)
TYPED_TEST(kmer_hash_gapped_test, concepts)
{
auto stop_at_t = seqan3::views::take_until([] (seqan3::dna4 const x) { return x == 'T'_dna4; });
EXPECT_EQ(result_t{6}, text2 | stop_at_t | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(result_t{6}, text5 | stop_at_t | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(result_t{6}, text6 | stop_at_t | ungapped_view | seqan3::views::to<result_t>);

EXPECT_EQ(ungapped2 | std::views::reverse | seqan3::views::to<result_t>,
text2 | ungapped_view | std::views::reverse | seqan3::views::to<result_t>);

EXPECT_EQ(gapped2 | std::views::reverse | seqan3::views::to<result_t>,
text2 | gapped_view | std::views::reverse | seqan3::views::to<result_t>);

EXPECT_EQ(ungapped5 | std::views::reverse | seqan3::views::to<result_t>,
text5 | ungapped_view | std::views::reverse | seqan3::views::to<result_t>);

EXPECT_EQ(gapped5 | std::views::reverse | seqan3::views::to<result_t>,
text5 | gapped_view | std::views::reverse | seqan3::views::to<result_t>);
TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4}; // ACGT
auto v1 = text | gapped_view;
EXPECT_TRUE(std::ranges::input_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::forward_range<decltype(v1)>);
EXPECT_EQ(std::ranges::bidirectional_range<decltype(text)>, std::ranges::bidirectional_range<decltype(v1)>);
EXPECT_EQ(std::ranges::random_access_range<decltype(text)>, std::ranges::random_access_range<decltype(v1)>);
EXPECT_FALSE(std::ranges::contiguous_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::view<decltype(v1)>);
EXPECT_EQ(std::ranges::sized_range<decltype(text)>, std::ranges::sized_range<decltype(v1)>);
EXPECT_FALSE(std::ranges::common_range<decltype(v1)>);
EXPECT_TRUE(seqan3::const_iterable_range<decltype(v1)>);
EXPECT_FALSE((std::ranges::output_range<decltype(v1), size_t>));
}

TEST_F(kmer_hash_test, invalid_sizes)
TYPED_TEST(kmer_hash_ungapped_test, invalid_sizes)
{
TypeParam text1{'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4};
EXPECT_NO_THROW(text1 | seqan3::views::kmer_hash(seqan3::ungapped{32}));
EXPECT_THROW(text1 | seqan3::views::kmer_hash(seqan3::ungapped{33}), std::invalid_argument);
EXPECT_NO_THROW(text1 | std::views::reverse | seqan3::views::kmer_hash(seqan3::ungapped{32}));
EXPECT_THROW(text1 | std::views::reverse | seqan3::views::kmer_hash(seqan3::ungapped{33}), std::invalid_argument);
if constexpr (std::ranges::bidirectional_range<TypeParam>) // excludes forward_list
{
EXPECT_NO_THROW(text1 | std::views::reverse | seqan3::views::kmer_hash(seqan3::ungapped{32}));
EXPECT_THROW(text1 | std::views::reverse | seqan3::views::kmer_hash(seqan3::ungapped{33}),
std::invalid_argument);
}

EXPECT_NO_THROW(text1 | seqan3::views::kmer_hash(0xFFFFFFFE001_shape)); // size=44, count=32
EXPECT_THROW(text1 | seqan3::views::kmer_hash(0xFFFFFFFFE009_shape), std::invalid_argument); // size=44, count=33
Expand All @@ -184,15 +146,15 @@ TEST_F(kmer_hash_test, invalid_sizes)
}

// https://github.com/seqan/seqan3/issues/1614
TEST_F(kmer_hash_test, issue1614)
TEST(kmer_hash_ungapped_test, issue1614)
{
std::vector<seqan3::dna5> sequence{"TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"_dna5};
EXPECT_EQ(sequence | seqan3::views::kmer_hash(seqan3::ungapped{25}) | seqan3::views::to<std::vector<size_t>>,
seqan3::views::repeat_n(298023223876953124, 26) | seqan3::views::to<std::vector<size_t>>);
EXPECT_RANGE_EQ(sequence | seqan3::views::kmer_hash(seqan3::ungapped{25}),
seqan3::views::repeat_n(298023223876953124, 26));
}

// https://github.com/seqan/seqan3/issues/1643
TEST_F(kmer_hash_test, issue1643)
TEST(kmer_hash_ungapped_test, issue1643)
{
std::vector<seqan3::dna4> text_23_elements{"ACGATCGATCGTAGCTACTGAGC"_dna4};

Expand All @@ -206,3 +168,21 @@ TEST_F(kmer_hash_test, issue1643)
auto k_mer_size_25_view = text_23_elements | seqan3::views::kmer_hash(seqan3::ungapped{25u});
EXPECT_TRUE(k_mer_size_25_view.empty());
}

// https://github.com/seqan/seqan3/issues/1719
TYPED_TEST(kmer_hash_ungapped_test, issue1719)
{
if constexpr (std::ranges::sized_range<TypeParam>)
{
TypeParam sequence{};
auto v = sequence | seqan3::views::kmer_hash(seqan3::ungapped{8});
EXPECT_EQ(0u, v.size());

TypeParam sequence2{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4};
auto v2 = sequence2 | seqan3::views::kmer_hash(seqan3::ungapped{8});
EXPECT_EQ(0u, v2.size());

auto v3 = sequence2 | seqan3::views::kmer_hash(seqan3::ungapped{4});
EXPECT_EQ(4u, v3.size());
}
}

0 comments on commit e9db871

Please sign in to comment.