Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add typed test to kmer_hash #1722

Merged
merged 2 commits into from
May 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ Note that 3.1.0 will be the first API stable release and interfaces in this rele
`seqan3::bi_fm_index_cursor::extend_left()` functions handle c-style strings without including the null character
([\#1588](https://github.com/seqan/seqan3/pull/1588)).

#### Range

* Added size() function to `seqan3::views::kmer_hash`
([\#1722](https://github.com/seqan/seqan3/pull/1722)).

# 3.0.1

Note that 3.1.0 will be the first API stable release and interfaces in this release might still change.
Expand Down
12 changes: 12 additions & 0 deletions include/seqan3/range/views/kmer_hash.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,18 @@ class kmer_hash_view : public std::ranges::view_interface<kmer_hash_view<urng_t>
return end();
}
//!\}

/*!\brief Returns the size of the range, if the underlying range is a std::ranges::sized_range.
* \returns Size of range.
*/
auto size() const
//!\cond
requires std::ranges::sized_range<urng_t>
smehringer marked this conversation as resolved.
Show resolved Hide resolved
//!\endcond
{
using size_type = decltype(std::ranges::size(urange));
return std::max<size_type>(std::ranges::size(urange) + 1, shape_.size()) - shape_.size();
}
};

/*!\brief Iterator for calculating hash values via a given seqan3::shape.
Expand Down
256 changes: 118 additions & 138 deletions test/unit/range/views/view_kmer_hash_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,165 +15,127 @@
#include <seqan3/range/views/kmer_hash.hpp>
#include <seqan3/range/views/repeat_n.hpp>
#include <seqan3/range/views/take_until.hpp>
#include <seqan3/range/views/to.hpp>
#include <seqan3/test/expect_range_eq.hpp>

#include <gtest/gtest.h>

using seqan3::operator""_dna4;
using seqan3::operator""_dna5;
using seqan3::operator""_shape;
using result_t = std::vector<size_t>;

class kmer_hash_test : public ::testing::Test
{
protected:
using result_t = std::vector<size_t>;

static constexpr auto ungapped_view = seqan3::views::kmer_hash(seqan3::ungapped{3});
static constexpr auto gapped_view = seqan3::views::kmer_hash(0b101_shape);

std::vector<seqan3::dna4> text1{"AAAAA"_dna4};
std::vector<seqan3::dna4> const ctext1{"AAAAA"_dna4};
result_t ungapped1{0,0,0};
result_t gapped1{0,0,0};

std::vector<seqan3::dna4> text2{"ACGTAGC"_dna4};
std::vector<seqan3::dna4> const ctext2{"ACGTAGC"_dna4};
result_t ungapped2{6,27,44,50,9};
result_t gapped2{2, 7, 8, 14, 1};

std::vector<seqan3::dna4> text3{"AC"_dna4};
std::vector<seqan3::dna4> const ctext3{"AC"_dna4};
result_t ungapped3{};
result_t gapped3{ungapped3};

seqan3::bitcompressed_vector<seqan3::dna4> text4{"ACGTAGC"_dna4};
seqan3::bitcompressed_vector<seqan3::dna4> const ctext4{"ACGTAGC"_dna4};
result_t ungapped4{ungapped2};
result_t gapped4{gapped2};

std::list<seqan3::dna4> text5{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4};
std::list<seqan3::dna4> const ctext5{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4};
result_t ungapped5{ungapped2};
result_t gapped5{gapped2};

std::forward_list<seqan3::dna4> text6{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4};
std::forward_list<seqan3::dna4> const ctext6{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4};
result_t ungapped6{ungapped2};
result_t gapped6{gapped2};

std::vector<seqan3::dna4> text7{"ACG"_dna4};
std::vector<seqan3::dna4> const ctext7{"ACG"_dna4};
result_t ungapped7{6};
result_t gapped7{2};
};


TEST_F(kmer_hash_test, concepts)
{
auto v1 = text1 | ungapped_view;
EXPECT_TRUE(std::ranges::input_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::forward_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::bidirectional_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::random_access_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::view<decltype(v1)>);
EXPECT_TRUE(std::ranges::sized_range<decltype(v1)>);
EXPECT_FALSE(std::ranges::common_range<decltype(v1)>);
EXPECT_TRUE(seqan3::const_iterable_range<decltype(v1)>);
EXPECT_FALSE((std::ranges::output_range<decltype(v1), size_t>));
static constexpr auto ungapped_view = seqan3::views::kmer_hash(seqan3::ungapped{3});
static constexpr auto gapped_view = seqan3::views::kmer_hash(0b101_shape);
static constexpr auto prefix_until_first_thymine = seqan3::views::take_until([] (seqan3::dna4 x)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe put that into the type test class kmer_hash_properties_test

Copy link
Member

@marehr marehr May 2, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You wrote me that you couldn't make this work. Since these are view adaptors, it is fine for me to put them here.

I'm wondering why TestFixture::ungapped_view wasn't working.

{ return x == 'T'_dna4; });

auto v2 = text5 | ungapped_view;
EXPECT_TRUE(std::ranges::input_range<decltype(v2)>);
EXPECT_TRUE(std::ranges::forward_range<decltype(v2)>);
EXPECT_TRUE(std::ranges::bidirectional_range<decltype(v2)>);
EXPECT_FALSE(std::ranges::random_access_range<decltype(v2)>);
EXPECT_TRUE(std::ranges::view<decltype(v2)>);
EXPECT_FALSE(std::ranges::sized_range<decltype(v2)>);
EXPECT_FALSE(std::ranges::common_range<decltype(v2)>);
EXPECT_TRUE(seqan3::const_iterable_range<decltype(v2)>);
EXPECT_FALSE((std::ranges::output_range<decltype(v2), size_t>));

auto v3 = text6 | ungapped_view;
EXPECT_TRUE(std::ranges::input_range<decltype(v3)>);
EXPECT_TRUE(std::ranges::forward_range<decltype(v3)>);
EXPECT_FALSE(std::ranges::bidirectional_range<decltype(v3)>);
EXPECT_FALSE(std::ranges::random_access_range<decltype(v3)>);
EXPECT_TRUE(std::ranges::view<decltype(v3)>);
EXPECT_FALSE(std::ranges::sized_range<decltype(v3)>);
EXPECT_FALSE(std::ranges::common_range<decltype(v3)>);
EXPECT_TRUE(seqan3::const_iterable_range<decltype(v3)>);
EXPECT_FALSE((std::ranges::output_range<decltype(v3), size_t>));
}
template <typename T>
class kmer_hash_ungapped_test: public ::testing::Test {};

TEST_F(kmer_hash_test, ungapped)
{
EXPECT_EQ(ungapped1, text1 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped2, text2 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped3, text3 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped4, text4 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped5, text5 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped6, text6 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped7, text7 | ungapped_view | seqan3::views::to<result_t>);
}
template <typename T>
class kmer_hash_gapped_test: public ::testing::Test {};

using underlying_range_types = ::testing::Types<std::vector<seqan3::dna4>,
std::vector<seqan3::dna4> const,
seqan3::bitcompressed_vector<seqan3::dna4>,
seqan3::bitcompressed_vector<seqan3::dna4> const,
std::list<seqan3::dna4>,
std::list<seqan3::dna4> const,
std::forward_list<seqan3::dna4>,
std::forward_list<seqan3::dna4> const>;

TEST_F(kmer_hash_test, gapped)
TYPED_TEST_SUITE(kmer_hash_ungapped_test, underlying_range_types, );
TYPED_TEST_SUITE(kmer_hash_gapped_test, underlying_range_types, );

TYPED_TEST(kmer_hash_ungapped_test, combined_with_container)
{
EXPECT_EQ(gapped1, text1 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped2, text2 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped3, text3 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped4, text4 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped5, text5 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped6, text6 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped7, text7 | gapped_view | seqan3::views::to<result_t>);
{
TypeParam text1{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4}; // ACGTAGC
result_t ungapped1{6, 27, 44, 50, 9};
EXPECT_RANGE_EQ(ungapped1, text1 | ungapped_view);
EXPECT_RANGE_EQ(result_t{6}, text1 | prefix_until_first_thymine | ungapped_view);
}
{
TypeParam text2{'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4}; // AAAAA
result_t ungapped2{0, 0, 0};
EXPECT_RANGE_EQ(ungapped2, text2 | ungapped_view);
}
{
TypeParam text3{'A'_dna4, 'C'_dna4}; // AC
EXPECT_RANGE_EQ(result_t{}, text3 | ungapped_view);
}
{
TypeParam text4{'A'_dna4, 'C'_dna4, 'G'_dna4}; // AC
EXPECT_RANGE_EQ(result_t{6}, text4 | ungapped_view);
}
}

TEST_F(kmer_hash_test, const_ungapped)
TYPED_TEST(kmer_hash_gapped_test, combined_with_container)
{
EXPECT_EQ(ungapped1, ctext1 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped2, ctext2 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped3, ctext3 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped4, ctext4 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped5, ctext5 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped6, ctext6 | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(ungapped7, ctext7 | ungapped_view | seqan3::views::to<result_t>);
{
TypeParam text1{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4}; // ACGTAGC
result_t gapped1{2, 7, 8, 14, 1};
EXPECT_RANGE_EQ(gapped1, text1 | gapped_view);
EXPECT_RANGE_EQ(result_t{2}, text1 | prefix_until_first_thymine| gapped_view);
}
{
TypeParam text2{'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4}; // AAAAA
result_t gapped2{0, 0, 0};
EXPECT_RANGE_EQ(gapped2, text2 | gapped_view);
}
{
TypeParam text3{'A'_dna4, 'C'_dna4}; // AC
EXPECT_RANGE_EQ(result_t{}, text3 | gapped_view);
}
{
TypeParam text4{'A'_dna4, 'C'_dna4, 'G'_dna4}; // AC
EXPECT_RANGE_EQ(result_t{2}, text4 | gapped_view);
}
}

TEST_F(kmer_hash_test, const_gapped)
TYPED_TEST(kmer_hash_ungapped_test, concepts)
{
EXPECT_EQ(gapped1, ctext1 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped2, ctext2 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped3, ctext3 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped4, ctext4 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped5, ctext5 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped6, ctext6 | gapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(gapped7, ctext7 | gapped_view | seqan3::views::to<result_t>);
TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4}; // ACGT
auto v1 = text | ungapped_view;
EXPECT_TRUE(std::ranges::input_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::forward_range<decltype(v1)>);
EXPECT_EQ(std::ranges::bidirectional_range<decltype(text)>, std::ranges::bidirectional_range<decltype(v1)>);
EXPECT_EQ(std::ranges::random_access_range<decltype(text)>, std::ranges::random_access_range<decltype(v1)>);
EXPECT_FALSE(std::ranges::contiguous_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::view<decltype(v1)>);
marehr marked this conversation as resolved.
Show resolved Hide resolved
EXPECT_EQ(std::ranges::sized_range<decltype(text)>, std::ranges::sized_range<decltype(v1)>);
EXPECT_FALSE(std::ranges::common_range<decltype(v1)>);
EXPECT_TRUE(seqan3::const_iterable_range<decltype(v1)>);
EXPECT_FALSE((std::ranges::output_range<decltype(v1), size_t>));
}

TEST_F(kmer_hash_test, combinability)
TYPED_TEST(kmer_hash_gapped_test, concepts)
{
auto stop_at_t = seqan3::views::take_until([] (seqan3::dna4 const x) { return x == 'T'_dna4; });
EXPECT_EQ(result_t{6}, text2 | stop_at_t | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(result_t{6}, text5 | stop_at_t | ungapped_view | seqan3::views::to<result_t>);
EXPECT_EQ(result_t{6}, text6 | stop_at_t | ungapped_view | seqan3::views::to<result_t>);

EXPECT_EQ(ungapped2 | std::views::reverse | seqan3::views::to<result_t>,
text2 | ungapped_view | std::views::reverse | seqan3::views::to<result_t>);

EXPECT_EQ(gapped2 | std::views::reverse | seqan3::views::to<result_t>,
text2 | gapped_view | std::views::reverse | seqan3::views::to<result_t>);

EXPECT_EQ(ungapped5 | std::views::reverse | seqan3::views::to<result_t>,
text5 | ungapped_view | std::views::reverse | seqan3::views::to<result_t>);

EXPECT_EQ(gapped5 | std::views::reverse | seqan3::views::to<result_t>,
text5 | gapped_view | std::views::reverse | seqan3::views::to<result_t>);
TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4}; // ACGT
auto v1 = text | gapped_view;
EXPECT_TRUE(std::ranges::input_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::forward_range<decltype(v1)>);
EXPECT_EQ(std::ranges::bidirectional_range<decltype(text)>, std::ranges::bidirectional_range<decltype(v1)>);
EXPECT_EQ(std::ranges::random_access_range<decltype(text)>, std::ranges::random_access_range<decltype(v1)>);
EXPECT_FALSE(std::ranges::contiguous_range<decltype(v1)>);
EXPECT_TRUE(std::ranges::view<decltype(v1)>);
EXPECT_EQ(std::ranges::sized_range<decltype(text)>, std::ranges::sized_range<decltype(v1)>);
EXPECT_FALSE(std::ranges::common_range<decltype(v1)>);
EXPECT_TRUE(seqan3::const_iterable_range<decltype(v1)>);
EXPECT_FALSE((std::ranges::output_range<decltype(v1), size_t>));
}

TEST_F(kmer_hash_test, invalid_sizes)
TYPED_TEST(kmer_hash_ungapped_test, invalid_sizes)
{
TypeParam text1{'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4};
EXPECT_NO_THROW(text1 | seqan3::views::kmer_hash(seqan3::ungapped{32}));
EXPECT_THROW(text1 | seqan3::views::kmer_hash(seqan3::ungapped{33}), std::invalid_argument);
EXPECT_NO_THROW(text1 | std::views::reverse | seqan3::views::kmer_hash(seqan3::ungapped{32}));
EXPECT_THROW(text1 | std::views::reverse | seqan3::views::kmer_hash(seqan3::ungapped{33}), std::invalid_argument);
if constexpr (std::ranges::bidirectional_range<TypeParam>) // excludes forward_list
{
EXPECT_NO_THROW(text1 | std::views::reverse | seqan3::views::kmer_hash(seqan3::ungapped{32}));
EXPECT_THROW(text1 | std::views::reverse | seqan3::views::kmer_hash(seqan3::ungapped{33}),
std::invalid_argument);
}

EXPECT_NO_THROW(text1 | seqan3::views::kmer_hash(0xFFFFFFFE001_shape)); // size=44, count=32
EXPECT_THROW(text1 | seqan3::views::kmer_hash(0xFFFFFFFFE009_shape), std::invalid_argument); // size=44, count=33
Expand All @@ -184,15 +146,15 @@ TEST_F(kmer_hash_test, invalid_sizes)
}

// https://github.com/seqan/seqan3/issues/1614
TEST_F(kmer_hash_test, issue1614)
TEST(kmer_hash_ungapped_test, issue1614)
{
std::vector<seqan3::dna5> sequence{"TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"_dna5};
EXPECT_EQ(sequence | seqan3::views::kmer_hash(seqan3::ungapped{25}) | seqan3::views::to<std::vector<size_t>>,
seqan3::views::repeat_n(298023223876953124, 26) | seqan3::views::to<std::vector<size_t>>);
EXPECT_RANGE_EQ(sequence | seqan3::views::kmer_hash(seqan3::ungapped{25}),
seqan3::views::repeat_n(298023223876953124, 26));
}

// https://github.com/seqan/seqan3/issues/1643
TEST_F(kmer_hash_test, issue1643)
TEST(kmer_hash_ungapped_test, issue1643)
{
std::vector<seqan3::dna4> text_23_elements{"ACGATCGATCGTAGCTACTGAGC"_dna4};

Expand All @@ -206,3 +168,21 @@ TEST_F(kmer_hash_test, issue1643)
auto k_mer_size_25_view = text_23_elements | seqan3::views::kmer_hash(seqan3::ungapped{25u});
EXPECT_TRUE(k_mer_size_25_view.empty());
}

// https://github.com/seqan/seqan3/issues/1719
TYPED_TEST(kmer_hash_ungapped_test, issue1719)
{
if constexpr (std::ranges::sized_range<TypeParam>)
{
TypeParam sequence{};
auto v = sequence | seqan3::views::kmer_hash(seqan3::ungapped{8});
EXPECT_EQ(0u, v.size());

TypeParam sequence2{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4};
auto v2 = sequence2 | seqan3::views::kmer_hash(seqan3::ungapped{8});
EXPECT_EQ(0u, v2.size());

auto v3 = sequence2 | seqan3::views::kmer_hash(seqan3::ungapped{4});
EXPECT_EQ(4u, v3.size());
}
}