From 786c90d3cf6a2e13655cd97488728d1428d1c723 Mon Sep 17 00:00:00 2001 From: Mitra Darja Darvish Date: Tue, 7 Apr 2020 12:37:14 +0200 Subject: [PATCH] [MISC] Add typed test to kmerhash --- CHANGELOG.md | 2 + include/seqan3/range/views/kmer_hash.hpp | 12 + test/unit/range/views/view_kmer_hash_test.cpp | 208 +++++++----------- 3 files changed, 97 insertions(+), 125 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 415d1a8e5c6..fdd0581a0df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -175,6 +175,8 @@ Note that 3.1.0 will be the first API stable release and interfaces in this rele ([\#1410](https://github.com/seqan/seqan3/pull/1410)). * Renamed `seqan3::views::all` to `seqan3::views::type_reduce` ([\#1410](https://github.com/seqan/seqan3/pull/1410)). +* Added size() function to `seqan3::views::kmer_hash` + ([\#1722](https://github.com/seqan/seqan3/pull/1722)). #### Search diff --git a/include/seqan3/range/views/kmer_hash.hpp b/include/seqan3/range/views/kmer_hash.hpp index 5b4a334e135..2d7f189e0e7 100644 --- a/include/seqan3/range/views/kmer_hash.hpp +++ b/include/seqan3/range/views/kmer_hash.hpp @@ -174,6 +174,18 @@ class kmer_hash_view : public std::ranges::view_interface return end(); } //!\} + + /*!\brief Returns the size of the range, if the underlying range is a std::ranges::sized_range. + * \returns Size of range. + */ + auto size() const + requires std::ranges::sized_range + { + auto range_size{std::ranges::size(urange)}; + if (range_size >= shape_.size()) + return range_size - shape_.size() + 1; + return decltype(range_size){0}; + } }; /*!\brief Iterator for calculating hash values via a given seqan3::shape. diff --git a/test/unit/range/views/view_kmer_hash_test.cpp b/test/unit/range/views/view_kmer_hash_test.cpp index a3e950ac6fa..bd3545aba56 100644 --- a/test/unit/range/views/view_kmer_hash_test.cpp +++ b/test/unit/range/views/view_kmer_hash_test.cpp @@ -22,158 +22,103 @@ using seqan3::operator""_dna4; using seqan3::operator""_dna5; using seqan3::operator""_shape; +using result_t = std::vector; -class kmer_hash_test : public ::testing::Test -{ -protected: - using result_t = std::vector; - - static constexpr auto ungapped_view = seqan3::views::kmer_hash(seqan3::ungapped{3}); - static constexpr auto gapped_view = seqan3::views::kmer_hash(0b101_shape); - - std::vector text1{"AAAAA"_dna4}; - std::vector const ctext1{"AAAAA"_dna4}; - result_t ungapped1{0,0,0}; - result_t gapped1{0,0,0}; - - std::vector text2{"ACGTAGC"_dna4}; - std::vector const ctext2{"ACGTAGC"_dna4}; - result_t ungapped2{6,27,44,50,9}; - result_t gapped2{2, 7, 8, 14, 1}; - - std::vector text3{"AC"_dna4}; - std::vector const ctext3{"AC"_dna4}; - result_t ungapped3{}; - result_t gapped3{ungapped3}; - - seqan3::bitcompressed_vector text4{"ACGTAGC"_dna4}; - seqan3::bitcompressed_vector const ctext4{"ACGTAGC"_dna4}; - result_t ungapped4{ungapped2}; - result_t gapped4{gapped2}; - - std::list text5{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4}; - std::list const ctext5{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4}; - result_t ungapped5{ungapped2}; - result_t gapped5{gapped2}; +static constexpr auto ungapped_view = seqan3::views::kmer_hash(seqan3::ungapped{3}); +static constexpr auto gapped_view = seqan3::views::kmer_hash(0b101_shape); +static constexpr auto prefix_until_first_thymine = seqan3::views::take_until([] (seqan3::dna4 x) + { return x == 'T'_dna4; }); - std::forward_list text6{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4}; - std::forward_list const ctext6{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4}; - result_t ungapped6{ungapped2}; - result_t gapped6{gapped2}; +template +class kmer_hash_test: public ::testing::Test {}; - std::vector text7{"ACG"_dna4}; - std::vector const ctext7{"ACG"_dna4}; - result_t ungapped7{6}; - result_t gapped7{2}; -}; +using underlying_range_types = ::testing::Types, + std::vector const, + seqan3::bitcompressed_vector, + seqan3::bitcompressed_vector const, + std::list, + std::list const, + std::forward_list, + std::forward_list const>; +TYPED_TEST_SUITE(kmer_hash_test, underlying_range_types, ); -TEST_F(kmer_hash_test, concepts) +TYPED_TEST(kmer_hash_test, ungapped_combined_with_container) { - auto v1 = text1 | ungapped_view; - EXPECT_TRUE(std::ranges::input_range); - EXPECT_TRUE(std::ranges::forward_range); - EXPECT_TRUE(std::ranges::bidirectional_range); - EXPECT_TRUE(std::ranges::random_access_range); - EXPECT_TRUE(std::ranges::view); - EXPECT_TRUE(std::ranges::sized_range); - EXPECT_FALSE(std::ranges::common_range); - EXPECT_TRUE(seqan3::const_iterable_range); - EXPECT_FALSE((std::ranges::output_range)); - - auto v2 = text5 | ungapped_view; - EXPECT_TRUE(std::ranges::input_range); - EXPECT_TRUE(std::ranges::forward_range); - EXPECT_TRUE(std::ranges::bidirectional_range); - EXPECT_FALSE(std::ranges::random_access_range); - EXPECT_TRUE(std::ranges::view); - EXPECT_FALSE(std::ranges::sized_range); - EXPECT_FALSE(std::ranges::common_range); - EXPECT_TRUE(seqan3::const_iterable_range); - EXPECT_FALSE((std::ranges::output_range)); - - auto v3 = text6 | ungapped_view; - EXPECT_TRUE(std::ranges::input_range); - EXPECT_TRUE(std::ranges::forward_range); - EXPECT_FALSE(std::ranges::bidirectional_range); - EXPECT_FALSE(std::ranges::random_access_range); - EXPECT_TRUE(std::ranges::view); - EXPECT_FALSE(std::ranges::sized_range); - EXPECT_FALSE(std::ranges::common_range); - EXPECT_TRUE(seqan3::const_iterable_range); - EXPECT_FALSE((std::ranges::output_range)); -} + TypeParam text1{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4}; // ACGTAGC + result_t ungapped1{6, 27, 44, 50, 9}; + TypeParam text2{'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4}; // AAAAA + result_t ungapped2{0,0,0}; + TypeParam text3{'A'_dna4, 'C'_dna4}; // AC + result_t ungapped3{}; + TypeParam text4{'A'_dna4, 'C'_dna4, 'G'_dna4}; // AC + result_t ungapped4{6}; -TEST_F(kmer_hash_test, ungapped) -{ EXPECT_EQ(ungapped1, text1 | ungapped_view | seqan3::views::to); EXPECT_EQ(ungapped2, text2 | ungapped_view | seqan3::views::to); EXPECT_EQ(ungapped3, text3 | ungapped_view | seqan3::views::to); EXPECT_EQ(ungapped4, text4 | ungapped_view | seqan3::views::to); - EXPECT_EQ(ungapped5, text5 | ungapped_view | seqan3::views::to); - EXPECT_EQ(ungapped6, text6 | ungapped_view | seqan3::views::to); - EXPECT_EQ(ungapped7, text7 | ungapped_view | seqan3::views::to); + EXPECT_EQ(ungapped4, text1 | prefix_until_first_thymine | ungapped_view | seqan3::views::to); } -TEST_F(kmer_hash_test, gapped) +TYPED_TEST(kmer_hash_test, gapped_combined_with_container) { + TypeParam text1{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4}; // ACGTAGC + result_t gapped1{2, 7, 8, 14, 1}; + TypeParam text2{'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4}; // AAAAA + result_t gapped2{0,0,0}; + TypeParam text3{'A'_dna4, 'C'_dna4}; // AC + result_t gapped3{}; + TypeParam text4{'A'_dna4, 'C'_dna4, 'G'_dna4}; // AC + result_t gapped4{2}; EXPECT_EQ(gapped1, text1 | gapped_view | seqan3::views::to); EXPECT_EQ(gapped2, text2 | gapped_view | seqan3::views::to); EXPECT_EQ(gapped3, text3 | gapped_view | seqan3::views::to); EXPECT_EQ(gapped4, text4 | gapped_view | seqan3::views::to); - EXPECT_EQ(gapped5, text5 | gapped_view | seqan3::views::to); - EXPECT_EQ(gapped6, text6 | gapped_view | seqan3::views::to); - EXPECT_EQ(gapped7, text7 | gapped_view | seqan3::views::to); -} - -TEST_F(kmer_hash_test, const_ungapped) -{ - EXPECT_EQ(ungapped1, ctext1 | ungapped_view | seqan3::views::to); - EXPECT_EQ(ungapped2, ctext2 | ungapped_view | seqan3::views::to); - EXPECT_EQ(ungapped3, ctext3 | ungapped_view | seqan3::views::to); - EXPECT_EQ(ungapped4, ctext4 | ungapped_view | seqan3::views::to); - EXPECT_EQ(ungapped5, ctext5 | ungapped_view | seqan3::views::to); - EXPECT_EQ(ungapped6, ctext6 | ungapped_view | seqan3::views::to); - EXPECT_EQ(ungapped7, ctext7 | ungapped_view | seqan3::views::to); + EXPECT_EQ(gapped4, text1 | prefix_until_first_thymine| gapped_view | seqan3::views::to); } -TEST_F(kmer_hash_test, const_gapped) +TYPED_TEST(kmer_hash_test, ungapped_concepts) { - EXPECT_EQ(gapped1, ctext1 | gapped_view | seqan3::views::to); - EXPECT_EQ(gapped2, ctext2 | gapped_view | seqan3::views::to); - EXPECT_EQ(gapped3, ctext3 | gapped_view | seqan3::views::to); - EXPECT_EQ(gapped4, ctext4 | gapped_view | seqan3::views::to); - EXPECT_EQ(gapped5, ctext5 | gapped_view | seqan3::views::to); - EXPECT_EQ(gapped6, ctext6 | gapped_view | seqan3::views::to); - EXPECT_EQ(gapped7, ctext7 | gapped_view | seqan3::views::to); + TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4}; // ACGT + auto v1 = text | ungapped_view; + EXPECT_TRUE(std::ranges::input_range); + EXPECT_TRUE(std::ranges::forward_range); + EXPECT_EQ(std::ranges::bidirectional_range, std::ranges::bidirectional_range); + EXPECT_EQ(std::ranges::random_access_range, std::ranges::random_access_range); + EXPECT_TRUE(std::ranges::view); + EXPECT_EQ(std::ranges::sized_range, std::ranges::sized_range); + EXPECT_FALSE(std::ranges::common_range); + EXPECT_TRUE(seqan3::const_iterable_range); + EXPECT_FALSE((std::ranges::output_range)); } -TEST_F(kmer_hash_test, combinability) +TYPED_TEST(kmer_hash_test, gapped_concepts) { - auto stop_at_t = seqan3::views::take_until([] (seqan3::dna4 const x) { return x == 'T'_dna4; }); - EXPECT_EQ(result_t{6}, text2 | stop_at_t | ungapped_view | seqan3::views::to); - EXPECT_EQ(result_t{6}, text5 | stop_at_t | ungapped_view | seqan3::views::to); - EXPECT_EQ(result_t{6}, text6 | stop_at_t | ungapped_view | seqan3::views::to); - - EXPECT_EQ(ungapped2 | std::views::reverse | seqan3::views::to, - text2 | ungapped_view | std::views::reverse | seqan3::views::to); - - EXPECT_EQ(gapped2 | std::views::reverse | seqan3::views::to, - text2 | gapped_view | std::views::reverse | seqan3::views::to); - - EXPECT_EQ(ungapped5 | std::views::reverse | seqan3::views::to, - text5 | ungapped_view | std::views::reverse | seqan3::views::to); - - EXPECT_EQ(gapped5 | std::views::reverse | seqan3::views::to, - text5 | gapped_view | std::views::reverse | seqan3::views::to); + TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4}; // ACGT + auto v1 = text | gapped_view; + EXPECT_TRUE(std::ranges::input_range); + EXPECT_TRUE(std::ranges::forward_range); + EXPECT_EQ(std::ranges::bidirectional_range, std::ranges::bidirectional_range); + EXPECT_EQ(std::ranges::random_access_range, std::ranges::random_access_range); + EXPECT_TRUE(std::ranges::view); + EXPECT_EQ(std::ranges::sized_range, std::ranges::sized_range); + EXPECT_FALSE(std::ranges::common_range); + EXPECT_TRUE(seqan3::const_iterable_range); + EXPECT_FALSE((std::ranges::output_range)); } -TEST_F(kmer_hash_test, invalid_sizes) +TYPED_TEST(kmer_hash_test, invalid_sizes) { + TypeParam text1{'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4}; EXPECT_NO_THROW(text1 | seqan3::views::kmer_hash(seqan3::ungapped{32})); EXPECT_THROW(text1 | seqan3::views::kmer_hash(seqan3::ungapped{33}), std::invalid_argument); - EXPECT_NO_THROW(text1 | std::views::reverse | seqan3::views::kmer_hash(seqan3::ungapped{32})); - EXPECT_THROW(text1 | std::views::reverse | seqan3::views::kmer_hash(seqan3::ungapped{33}), std::invalid_argument); + if constexpr (std::ranges::bidirectional_range) // excludes forward_list + { + EXPECT_NO_THROW(text1 | std::views::reverse | seqan3::views::kmer_hash(seqan3::ungapped{32})); + EXPECT_THROW(text1 | std::views::reverse | seqan3::views::kmer_hash(seqan3::ungapped{33}), + std::invalid_argument); + } EXPECT_NO_THROW(text1 | seqan3::views::kmer_hash(0xFFFFFFFE001_shape)); // size=44, count=32 EXPECT_THROW(text1 | seqan3::views::kmer_hash(0xFFFFFFFFE009_shape), std::invalid_argument); // size=44, count=33 @@ -184,7 +129,7 @@ TEST_F(kmer_hash_test, invalid_sizes) } // https://github.com/seqan/seqan3/issues/1614 -TEST_F(kmer_hash_test, issue1614) +TEST(kmer_hash_test, issue1614) { std::vector sequence{"TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"_dna5}; EXPECT_EQ(sequence | seqan3::views::kmer_hash(seqan3::ungapped{25}) | seqan3::views::to>, @@ -192,7 +137,7 @@ TEST_F(kmer_hash_test, issue1614) } // https://github.com/seqan/seqan3/issues/1643 -TEST_F(kmer_hash_test, issue1643) +TEST(kmer_hash_test, issue1643) { std::vector text_23_elements{"ACGATCGATCGTAGCTACTGAGC"_dna4}; @@ -206,3 +151,16 @@ TEST_F(kmer_hash_test, issue1643) auto k_mer_size_25_view = text_23_elements | seqan3::views::kmer_hash(seqan3::ungapped{25u}); EXPECT_TRUE(k_mer_size_25_view.empty()); } + +// https://github.com/seqan/seqan3/issues/1719 +TEST(kmer_hash_test, issue1719) +{ + uint64_t const expected = 0; + std::vector sequence{""_dna5}; + auto v = sequence | seqan3::views::kmer_hash(seqan3::ungapped{25}); + EXPECT_EQ(expected, v.size()); + + std::vector sequence2{"ACGATCGATCGTAGCTACTGAGC"_dna5}; + auto v2 = sequence2 | seqan3::views::kmer_hash(seqan3::ungapped{25}); + EXPECT_EQ(expected, v2.size()); +}