diff --git a/CHANGELOG.md b/CHANGELOG.md index 415d1a8e5c..adccecd57d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -89,6 +89,11 @@ Note that 3.1.0 will be the first API stable release and interfaces in this rele `seqan3::bi_fm_index_cursor::extend_left()` functions handle c-style strings without including the null character ([\#1588](https://github.com/seqan/seqan3/pull/1588)). +#### Range + +* Added size() function to `seqan3::views::kmer_hash` + ([\#1722](https://github.com/seqan/seqan3/pull/1722)). + # 3.0.1 Note that 3.1.0 will be the first API stable release and interfaces in this release might still change. diff --git a/include/seqan3/range/views/kmer_hash.hpp b/include/seqan3/range/views/kmer_hash.hpp index 5b4a334e13..36e5656d89 100644 --- a/include/seqan3/range/views/kmer_hash.hpp +++ b/include/seqan3/range/views/kmer_hash.hpp @@ -174,6 +174,18 @@ class kmer_hash_view : public std::ranges::view_interface return end(); } //!\} + + /*!\brief Returns the size of the range, if the underlying range is a std::ranges::sized_range. + * \returns Size of range. + */ + auto size() const + //!\cond + requires std::ranges::sized_range + //!\endcond + { + using size_type = decltype(std::ranges::size(urange)); + return std::max(std::ranges::size(urange) + 1, shape_.size()) - shape_.size(); + } }; /*!\brief Iterator for calculating hash values via a given seqan3::shape. diff --git a/test/unit/range/views/view_kmer_hash_test.cpp b/test/unit/range/views/view_kmer_hash_test.cpp index a3e950ac6f..3ab2d376db 100644 --- a/test/unit/range/views/view_kmer_hash_test.cpp +++ b/test/unit/range/views/view_kmer_hash_test.cpp @@ -15,165 +15,127 @@ #include #include #include -#include +#include #include using seqan3::operator""_dna4; using seqan3::operator""_dna5; using seqan3::operator""_shape; +using result_t = std::vector; -class kmer_hash_test : public ::testing::Test -{ -protected: - using result_t = std::vector; - - static constexpr auto ungapped_view = seqan3::views::kmer_hash(seqan3::ungapped{3}); - static constexpr auto gapped_view = seqan3::views::kmer_hash(0b101_shape); - - std::vector text1{"AAAAA"_dna4}; - std::vector const ctext1{"AAAAA"_dna4}; - result_t ungapped1{0,0,0}; - result_t gapped1{0,0,0}; - - std::vector text2{"ACGTAGC"_dna4}; - std::vector const ctext2{"ACGTAGC"_dna4}; - result_t ungapped2{6,27,44,50,9}; - result_t gapped2{2, 7, 8, 14, 1}; - - std::vector text3{"AC"_dna4}; - std::vector const ctext3{"AC"_dna4}; - result_t ungapped3{}; - result_t gapped3{ungapped3}; - - seqan3::bitcompressed_vector text4{"ACGTAGC"_dna4}; - seqan3::bitcompressed_vector const ctext4{"ACGTAGC"_dna4}; - result_t ungapped4{ungapped2}; - result_t gapped4{gapped2}; - - std::list text5{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4}; - std::list const ctext5{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4}; - result_t ungapped5{ungapped2}; - result_t gapped5{gapped2}; - - std::forward_list text6{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4}; - std::forward_list const ctext6{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4}; - result_t ungapped6{ungapped2}; - result_t gapped6{gapped2}; - - std::vector text7{"ACG"_dna4}; - std::vector const ctext7{"ACG"_dna4}; - result_t ungapped7{6}; - result_t gapped7{2}; -}; - - -TEST_F(kmer_hash_test, concepts) -{ - auto v1 = text1 | ungapped_view; - EXPECT_TRUE(std::ranges::input_range); - EXPECT_TRUE(std::ranges::forward_range); - EXPECT_TRUE(std::ranges::bidirectional_range); - EXPECT_TRUE(std::ranges::random_access_range); - EXPECT_TRUE(std::ranges::view); - EXPECT_TRUE(std::ranges::sized_range); - EXPECT_FALSE(std::ranges::common_range); - EXPECT_TRUE(seqan3::const_iterable_range); - EXPECT_FALSE((std::ranges::output_range)); +static constexpr auto ungapped_view = seqan3::views::kmer_hash(seqan3::ungapped{3}); +static constexpr auto gapped_view = seqan3::views::kmer_hash(0b101_shape); +static constexpr auto prefix_until_first_thymine = seqan3::views::take_until([] (seqan3::dna4 x) + { return x == 'T'_dna4; }); - auto v2 = text5 | ungapped_view; - EXPECT_TRUE(std::ranges::input_range); - EXPECT_TRUE(std::ranges::forward_range); - EXPECT_TRUE(std::ranges::bidirectional_range); - EXPECT_FALSE(std::ranges::random_access_range); - EXPECT_TRUE(std::ranges::view); - EXPECT_FALSE(std::ranges::sized_range); - EXPECT_FALSE(std::ranges::common_range); - EXPECT_TRUE(seqan3::const_iterable_range); - EXPECT_FALSE((std::ranges::output_range)); - - auto v3 = text6 | ungapped_view; - EXPECT_TRUE(std::ranges::input_range); - EXPECT_TRUE(std::ranges::forward_range); - EXPECT_FALSE(std::ranges::bidirectional_range); - EXPECT_FALSE(std::ranges::random_access_range); - EXPECT_TRUE(std::ranges::view); - EXPECT_FALSE(std::ranges::sized_range); - EXPECT_FALSE(std::ranges::common_range); - EXPECT_TRUE(seqan3::const_iterable_range); - EXPECT_FALSE((std::ranges::output_range)); -} +template +class kmer_hash_ungapped_test: public ::testing::Test {}; -TEST_F(kmer_hash_test, ungapped) -{ - EXPECT_EQ(ungapped1, text1 | ungapped_view | seqan3::views::to); - EXPECT_EQ(ungapped2, text2 | ungapped_view | seqan3::views::to); - EXPECT_EQ(ungapped3, text3 | ungapped_view | seqan3::views::to); - EXPECT_EQ(ungapped4, text4 | ungapped_view | seqan3::views::to); - EXPECT_EQ(ungapped5, text5 | ungapped_view | seqan3::views::to); - EXPECT_EQ(ungapped6, text6 | ungapped_view | seqan3::views::to); - EXPECT_EQ(ungapped7, text7 | ungapped_view | seqan3::views::to); -} +template +class kmer_hash_gapped_test: public ::testing::Test {}; + +using underlying_range_types = ::testing::Types, + std::vector const, + seqan3::bitcompressed_vector, + seqan3::bitcompressed_vector const, + std::list, + std::list const, + std::forward_list, + std::forward_list const>; -TEST_F(kmer_hash_test, gapped) +TYPED_TEST_SUITE(kmer_hash_ungapped_test, underlying_range_types, ); +TYPED_TEST_SUITE(kmer_hash_gapped_test, underlying_range_types, ); + +TYPED_TEST(kmer_hash_ungapped_test, combined_with_container) { - EXPECT_EQ(gapped1, text1 | gapped_view | seqan3::views::to); - EXPECT_EQ(gapped2, text2 | gapped_view | seqan3::views::to); - EXPECT_EQ(gapped3, text3 | gapped_view | seqan3::views::to); - EXPECT_EQ(gapped4, text4 | gapped_view | seqan3::views::to); - EXPECT_EQ(gapped5, text5 | gapped_view | seqan3::views::to); - EXPECT_EQ(gapped6, text6 | gapped_view | seqan3::views::to); - EXPECT_EQ(gapped7, text7 | gapped_view | seqan3::views::to); + { + TypeParam text1{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4}; // ACGTAGC + result_t ungapped1{6, 27, 44, 50, 9}; + EXPECT_RANGE_EQ(ungapped1, text1 | ungapped_view); + EXPECT_RANGE_EQ(result_t{6}, text1 | prefix_until_first_thymine | ungapped_view); + } + { + TypeParam text2{'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4}; // AAAAA + result_t ungapped2{0, 0, 0}; + EXPECT_RANGE_EQ(ungapped2, text2 | ungapped_view); + } + { + TypeParam text3{'A'_dna4, 'C'_dna4}; // AC + EXPECT_RANGE_EQ(result_t{}, text3 | ungapped_view); + } + { + TypeParam text4{'A'_dna4, 'C'_dna4, 'G'_dna4}; // AC + EXPECT_RANGE_EQ(result_t{6}, text4 | ungapped_view); + } } -TEST_F(kmer_hash_test, const_ungapped) +TYPED_TEST(kmer_hash_gapped_test, combined_with_container) { - EXPECT_EQ(ungapped1, ctext1 | ungapped_view | seqan3::views::to); - EXPECT_EQ(ungapped2, ctext2 | ungapped_view | seqan3::views::to); - EXPECT_EQ(ungapped3, ctext3 | ungapped_view | seqan3::views::to); - EXPECT_EQ(ungapped4, ctext4 | ungapped_view | seqan3::views::to); - EXPECT_EQ(ungapped5, ctext5 | ungapped_view | seqan3::views::to); - EXPECT_EQ(ungapped6, ctext6 | ungapped_view | seqan3::views::to); - EXPECT_EQ(ungapped7, ctext7 | ungapped_view | seqan3::views::to); + { + TypeParam text1{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4}; // ACGTAGC + result_t gapped1{2, 7, 8, 14, 1}; + EXPECT_RANGE_EQ(gapped1, text1 | gapped_view); + EXPECT_RANGE_EQ(result_t{2}, text1 | prefix_until_first_thymine| gapped_view); + } + { + TypeParam text2{'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4}; // AAAAA + result_t gapped2{0, 0, 0}; + EXPECT_RANGE_EQ(gapped2, text2 | gapped_view); + } + { + TypeParam text3{'A'_dna4, 'C'_dna4}; // AC + EXPECT_RANGE_EQ(result_t{}, text3 | gapped_view); + } + { + TypeParam text4{'A'_dna4, 'C'_dna4, 'G'_dna4}; // AC + EXPECT_RANGE_EQ(result_t{2}, text4 | gapped_view); + } } -TEST_F(kmer_hash_test, const_gapped) +TYPED_TEST(kmer_hash_ungapped_test, concepts) { - EXPECT_EQ(gapped1, ctext1 | gapped_view | seqan3::views::to); - EXPECT_EQ(gapped2, ctext2 | gapped_view | seqan3::views::to); - EXPECT_EQ(gapped3, ctext3 | gapped_view | seqan3::views::to); - EXPECT_EQ(gapped4, ctext4 | gapped_view | seqan3::views::to); - EXPECT_EQ(gapped5, ctext5 | gapped_view | seqan3::views::to); - EXPECT_EQ(gapped6, ctext6 | gapped_view | seqan3::views::to); - EXPECT_EQ(gapped7, ctext7 | gapped_view | seqan3::views::to); + TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4}; // ACGT + auto v1 = text | ungapped_view; + EXPECT_TRUE(std::ranges::input_range); + EXPECT_TRUE(std::ranges::forward_range); + EXPECT_EQ(std::ranges::bidirectional_range, std::ranges::bidirectional_range); + EXPECT_EQ(std::ranges::random_access_range, std::ranges::random_access_range); + EXPECT_FALSE(std::ranges::contiguous_range); + EXPECT_TRUE(std::ranges::view); + EXPECT_EQ(std::ranges::sized_range, std::ranges::sized_range); + EXPECT_FALSE(std::ranges::common_range); + EXPECT_TRUE(seqan3::const_iterable_range); + EXPECT_FALSE((std::ranges::output_range)); } -TEST_F(kmer_hash_test, combinability) +TYPED_TEST(kmer_hash_gapped_test, concepts) { - auto stop_at_t = seqan3::views::take_until([] (seqan3::dna4 const x) { return x == 'T'_dna4; }); - EXPECT_EQ(result_t{6}, text2 | stop_at_t | ungapped_view | seqan3::views::to); - EXPECT_EQ(result_t{6}, text5 | stop_at_t | ungapped_view | seqan3::views::to); - EXPECT_EQ(result_t{6}, text6 | stop_at_t | ungapped_view | seqan3::views::to); - - EXPECT_EQ(ungapped2 | std::views::reverse | seqan3::views::to, - text2 | ungapped_view | std::views::reverse | seqan3::views::to); - - EXPECT_EQ(gapped2 | std::views::reverse | seqan3::views::to, - text2 | gapped_view | std::views::reverse | seqan3::views::to); - - EXPECT_EQ(ungapped5 | std::views::reverse | seqan3::views::to, - text5 | ungapped_view | std::views::reverse | seqan3::views::to); - - EXPECT_EQ(gapped5 | std::views::reverse | seqan3::views::to, - text5 | gapped_view | std::views::reverse | seqan3::views::to); + TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4}; // ACGT + auto v1 = text | gapped_view; + EXPECT_TRUE(std::ranges::input_range); + EXPECT_TRUE(std::ranges::forward_range); + EXPECT_EQ(std::ranges::bidirectional_range, std::ranges::bidirectional_range); + EXPECT_EQ(std::ranges::random_access_range, std::ranges::random_access_range); + EXPECT_FALSE(std::ranges::contiguous_range); + EXPECT_TRUE(std::ranges::view); + EXPECT_EQ(std::ranges::sized_range, std::ranges::sized_range); + EXPECT_FALSE(std::ranges::common_range); + EXPECT_TRUE(seqan3::const_iterable_range); + EXPECT_FALSE((std::ranges::output_range)); } -TEST_F(kmer_hash_test, invalid_sizes) +TYPED_TEST(kmer_hash_ungapped_test, invalid_sizes) { + TypeParam text1{'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4, 'A'_dna4}; EXPECT_NO_THROW(text1 | seqan3::views::kmer_hash(seqan3::ungapped{32})); EXPECT_THROW(text1 | seqan3::views::kmer_hash(seqan3::ungapped{33}), std::invalid_argument); - EXPECT_NO_THROW(text1 | std::views::reverse | seqan3::views::kmer_hash(seqan3::ungapped{32})); - EXPECT_THROW(text1 | std::views::reverse | seqan3::views::kmer_hash(seqan3::ungapped{33}), std::invalid_argument); + if constexpr (std::ranges::bidirectional_range) // excludes forward_list + { + EXPECT_NO_THROW(text1 | std::views::reverse | seqan3::views::kmer_hash(seqan3::ungapped{32})); + EXPECT_THROW(text1 | std::views::reverse | seqan3::views::kmer_hash(seqan3::ungapped{33}), + std::invalid_argument); + } EXPECT_NO_THROW(text1 | seqan3::views::kmer_hash(0xFFFFFFFE001_shape)); // size=44, count=32 EXPECT_THROW(text1 | seqan3::views::kmer_hash(0xFFFFFFFFE009_shape), std::invalid_argument); // size=44, count=33 @@ -184,15 +146,15 @@ TEST_F(kmer_hash_test, invalid_sizes) } // https://github.com/seqan/seqan3/issues/1614 -TEST_F(kmer_hash_test, issue1614) +TEST(kmer_hash_ungapped_test, issue1614) { std::vector sequence{"TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"_dna5}; - EXPECT_EQ(sequence | seqan3::views::kmer_hash(seqan3::ungapped{25}) | seqan3::views::to>, - seqan3::views::repeat_n(298023223876953124, 26) | seqan3::views::to>); + EXPECT_RANGE_EQ(sequence | seqan3::views::kmer_hash(seqan3::ungapped{25}), + seqan3::views::repeat_n(298023223876953124, 26)); } // https://github.com/seqan/seqan3/issues/1643 -TEST_F(kmer_hash_test, issue1643) +TEST(kmer_hash_ungapped_test, issue1643) { std::vector text_23_elements{"ACGATCGATCGTAGCTACTGAGC"_dna4}; @@ -206,3 +168,21 @@ TEST_F(kmer_hash_test, issue1643) auto k_mer_size_25_view = text_23_elements | seqan3::views::kmer_hash(seqan3::ungapped{25u}); EXPECT_TRUE(k_mer_size_25_view.empty()); } + +// https://github.com/seqan/seqan3/issues/1719 +TYPED_TEST(kmer_hash_ungapped_test, issue1719) +{ + if constexpr (std::ranges::sized_range) + { + TypeParam sequence{}; + auto v = sequence | seqan3::views::kmer_hash(seqan3::ungapped{8}); + EXPECT_EQ(0u, v.size()); + + TypeParam sequence2{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4, 'C'_dna4}; + auto v2 = sequence2 | seqan3::views::kmer_hash(seqan3::ungapped{8}); + EXPECT_EQ(0u, v2.size()); + + auto v3 = sequence2 | seqan3::views::kmer_hash(seqan3::ungapped{4}); + EXPECT_EQ(4u, v3.size()); + } +}