From e4f57d50d37785a15a3978d4a649dee226087499 Mon Sep 17 00:00:00 2001 From: Enrico Seiler Date: Tue, 23 Mar 2021 11:56:30 +0100 Subject: [PATCH 1/3] [FIX] contrib/span not working for (const) vectors --- include/seqan3/contrib/span/span | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/include/seqan3/contrib/span/span b/include/seqan3/contrib/span/span index 788022980b..cfd720f4a5 100644 --- a/include/seqan3/contrib/span/span +++ b/include/seqan3/contrib/span/span @@ -13,11 +13,9 @@ #include // for array #include // for ptrdiff_t -#include // for iterators -#include // for remove_cv, etc - #include // for iterator concepts #include // for range concepts +#include // for remove_cvref_t, etc namespace std { @@ -236,13 +234,13 @@ public: constexpr span& operator=(const span&) noexcept = default; template - requires is_convertible_v>(*)[], element_type(*)[]> + requires is_convertible_v>(*)[], element_type(*)[]> constexpr span(iterator_t f, index_type count) noexcept : data_{addressof(*f)}, size_{count} {} template requires sized_sentinel_for && - is_convertible_v>(*)[], element_type(*)[]> && + is_convertible_v>(*)[], element_type(*)[]> && (!is_convertible_v) constexpr span(iterator f, sentinel_t l) noexcept : data_{addressof(*f)}, size_{static_cast(distance(f, l))} {} @@ -257,11 +255,11 @@ public: inline constexpr span(const array& arr) noexcept : data_{arr.data()}, size_{span_sz} {} template - requires (!same_as>, span>) && // guard for recursive instantiation in constructible_from - (!is_span>::value) && - (!is_std_array>::value) && - (!is_array_v>) && - is_convertible_v>>(*)[], + requires (!same_as, span>) && // guard for recursive instantiation in constructible_from + (!is_span>::value) && + (!is_std_array>::value) && + (!is_array_v>) && + is_convertible_v>(*)[], element_type(*)[]> && ranges::contiguous_range && ranges::sized_range && @@ -430,7 +428,7 @@ template span(const array&) -> span; template - span(range_t &&) -> span>; + span(range_t &&) -> span>>; } // namespace std //!\endcond From f0f1306c7923d2b60d0415a8ec1d673c513841cf Mon Sep 17 00:00:00 2001 From: Enrico Seiler Date: Tue, 23 Mar 2021 11:57:54 +0100 Subject: [PATCH 2/3] [FIX] conserve const when validating bgzf header --- include/seqan3/io/detail/magic_header.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/seqan3/io/detail/magic_header.hpp b/include/seqan3/io/detail/magic_header.hpp index a09f860eb4..85a7a39b9e 100644 --- a/include/seqan3/io/detail/magic_header.hpp +++ b/include/seqan3/io/detail/magic_header.hpp @@ -99,14 +99,14 @@ struct bgzf_compression static_assert(seqan3::detail::weakly_equality_comparable_with, "The given char type of the span must be comparable with char."); - return (header[0] == magic_header[0] && // GZ_ID1 - header[1] == magic_header[1] && // GZ_ID2 - header[2] == magic_header[2] && // GZ_CM - (header[3] & magic_header[3]) != 0 && // FLG_FEXTRA - to_little_endian(*reinterpret_cast(&header[10])) == magic_header[10] && // BGZF_ID1 - header[12] == magic_header[12] && // BGZF_ID2 - header[13] == magic_header[13] && // BGZF_SLEN - to_little_endian(*reinterpret_cast(&header[14])) == magic_header[14]); // BGZF_XLEN + return (header[0] == magic_header[0] && // GZ_ID1 + header[1] == magic_header[1] && // GZ_ID2 + header[2] == magic_header[2] && // GZ_CM + (header[3] & magic_header[3]) != 0 && // FLG_FEXTRA + to_little_endian(*reinterpret_cast(&header[10])) == magic_header[10] && // BGZF_ID1 + header[12] == magic_header[12] && // BGZF_ID2 + header[13] == magic_header[13] && // BGZF_SLEN + to_little_endian(*reinterpret_cast(&header[14])) == magic_header[14]); // BGZF_XLEN } }; From 217cdb14e29b96489bf969a19d488aabc186d952 Mon Sep 17 00:00:00 2001 From: Enrico Seiler Date: Tue, 23 Mar 2021 11:59:03 +0100 Subject: [PATCH 3/3] [FIX] compression streams writing wrong format --- include/seqan3/io/detail/misc_input.hpp | 2 +- include/seqan3/io/detail/misc_output.hpp | 33 +++++---- test/unit/io/detail/CMakeLists.txt | 1 + test/unit/io/detail/misc_output_test.cpp | 68 +++++++++++++++++++ test/unit/io/detail/misc_test.cpp | 1 - .../unit/io/sam_file/sam_file_output_test.cpp | 2 +- .../sequence_file_output_test.cpp | 2 +- .../structure_file_output_test.cpp | 2 +- 8 files changed, 94 insertions(+), 17 deletions(-) create mode 100644 test/unit/io/detail/misc_output_test.cpp diff --git a/include/seqan3/io/detail/misc_input.hpp b/include/seqan3/io/detail/misc_input.hpp index c300eaa18c..7c161ff5af 100644 --- a/include/seqan3/io/detail/misc_input.hpp +++ b/include/seqan3/io/detail/misc_input.hpp @@ -21,7 +21,6 @@ #include #include - #ifdef SEQAN3_HAS_BZIP2 #include #endif @@ -31,6 +30,7 @@ #include #endif #include +#include #include namespace seqan3::detail diff --git a/include/seqan3/io/detail/misc_output.hpp b/include/seqan3/io/detail/misc_output.hpp index 568b0c59f6..f712b511fd 100644 --- a/include/seqan3/io/detail/misc_output.hpp +++ b/include/seqan3/io/detail/misc_output.hpp @@ -12,11 +12,11 @@ #pragma once +#include #include #include #include -#include #ifdef SEQAN3_HAS_BZIP2 #include #endif @@ -24,7 +24,8 @@ #include #include #endif -#include +#include +#include namespace seqan3::detail { @@ -46,26 +47,34 @@ inline auto make_secondary_ostream(std::basic_ostream & primary_stream, std::string extension = filename.extension().string(); - if ((extension == ".gz") || (extension == ".bgzf") || (extension == ".bam")) + if (extension == ".gz") { - #ifdef SEQAN3_HAS_ZLIB +#ifdef SEQAN3_HAS_ZLIB + filename.replace_extension(""); + return {new contrib::basic_gz_ostream{primary_stream}, stream_deleter_default}; +#else + throw file_open_error{"Trying to write a gzipped file, but no ZLIB available."}; +#endif + } + else if ((extension == ".bgzf") || (extension == ".bam")) + { +#ifdef SEQAN3_HAS_ZLIB if (extension != ".bam") // remove extension except for bam filename.replace_extension(""); - return {new contrib::basic_bgzf_ostream{primary_stream}, - stream_deleter_default}; - #else - throw file_open_error{"Trying to write a gzipped file, but no ZLIB available."}; - #endif + return {new contrib::basic_bgzf_ostream{primary_stream}, stream_deleter_default}; +#else + throw file_open_error{"Trying to write a bgzf'ed file, but no ZLIB available."}; +#endif } else if (extension == ".bz2") { - #ifdef SEQAN3_HAS_BZIP2 +#ifdef SEQAN3_HAS_BZIP2 filename.replace_extension(""); return {new contrib::basic_bz2_ostream{primary_stream}, stream_deleter_default}; - #else +#else throw file_open_error{"Trying to write a bzipped file, but no libbz2 available."}; - #endif +#endif } else if (extension == ".zst") { diff --git a/test/unit/io/detail/CMakeLists.txt b/test/unit/io/detail/CMakeLists.txt index 32763995c2..a6ce1f3d49 100644 --- a/test/unit/io/detail/CMakeLists.txt +++ b/test/unit/io/detail/CMakeLists.txt @@ -1,6 +1,7 @@ seqan3_test(detail_record_test.cpp) seqan3_test(in_file_iterator_test.cpp) seqan3_test(misc_test.cpp) +seqan3_test(misc_output_test.cpp) seqan3_test(out_file_iterator_test.cpp) seqan3_test(ignore_output_iterator_test.cpp) seqan3_test(record_like_test.cpp) diff --git a/test/unit/io/detail/misc_output_test.cpp b/test/unit/io/detail/misc_output_test.cpp new file mode 100644 index 0000000000..2094ed199f --- /dev/null +++ b/test/unit/io/detail/misc_output_test.cpp @@ -0,0 +1,68 @@ +// ----------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md +// ----------------------------------------------------------------------------------------------------- + +#include + +#include +#include + +#include + +#include +#include +#include + +inline seqan3::test::tmp_filename tmp_compressed_file(std::string const & file_extension) +{ + std::string const tmp_file_name = "io_misc_output_test.txt." + file_extension; + seqan3::test::tmp_filename tmp_file{tmp_file_name.c_str()}; + + // We need a copy of the path because `make_secondary_ostream` will strip the compression extension. + auto file_path = tmp_file.get_path(); + std::ofstream filestream{file_path}; + auto stream_ptr = seqan3::detail::make_secondary_ostream(filestream, file_path); + *stream_ptr << std::string(8, 'a') << '\n'; + + return tmp_file; +} + +inline std::vector read_file_content(std::filesystem::path const & path) +{ + std::ifstream filestream{path}; + using char_t = decltype(filestream)::char_type; + return {std::istreambuf_iterator{filestream}, std::istreambuf_iterator{}}; +} + +#ifdef SEQAN3_HAS_ZLIB +TEST(misc_output, issue2455_gz) +{ + seqan3::test::tmp_filename const compressed_file = tmp_compressed_file("gz"); + std::vector const file_content = read_file_content(compressed_file.get_path()); + + EXPECT_TRUE(seqan3::detail::starts_with(file_content, seqan3::detail::gz_compression::magic_header)); + // gz should not have a valid bgzf header (the gz header is a prefix of the bgzf header) + EXPECT_FALSE(seqan3::detail::bgzf_compression::validate_header(std::span{file_content})); +} + +TEST(misc_output, issue2455_bgzf) +{ + seqan3::test::tmp_filename const compressed_file = tmp_compressed_file("bgzf"); + std::vector const file_content = read_file_content(compressed_file.get_path()); + + EXPECT_TRUE(seqan3::detail::bgzf_compression::validate_header(std::span{file_content})); +} +#endif + +#ifdef SEQAN3_HAS_BZIP2 +TEST(misc_output, issue2455_bz) +{ + seqan3::test::tmp_filename const compressed_file = tmp_compressed_file("bz2"); + std::vector const file_content = read_file_content(compressed_file.get_path()); + + EXPECT_TRUE(seqan3::detail::starts_with(file_content, seqan3::detail::bz2_compression::magic_header)); +} +#endif diff --git a/test/unit/io/detail/misc_test.cpp b/test/unit/io/detail/misc_test.cpp index 50d41cc917..2ca06958da 100644 --- a/test/unit/io/detail/misc_test.cpp +++ b/test/unit/io/detail/misc_test.cpp @@ -12,7 +12,6 @@ #include #include -#include #include struct dummy_file diff --git a/test/unit/io/sam_file/sam_file_output_test.cpp b/test/unit/io/sam_file/sam_file_output_test.cpp index 575bae1226..bc31df5629 100644 --- a/test/unit/io/sam_file/sam_file_output_test.cpp +++ b/test/unit/io/sam_file/sam_file_output_test.cpp @@ -650,7 +650,7 @@ TEST(compression, by_filename_gz) std::string buffer = compression_by_filename_impl(filename); buffer[9] = '\x00'; // zero out OS byte. - EXPECT_EQ(buffer, expected_bgzf); + EXPECT_EQ(buffer, expected_gz); } TEST(compression, by_stream_gz) diff --git a/test/unit/io/sequence_file/sequence_file_output_test.cpp b/test/unit/io/sequence_file/sequence_file_output_test.cpp index 0367538f12..d5cb90fd58 100644 --- a/test/unit/io/sequence_file/sequence_file_output_test.cpp +++ b/test/unit/io/sequence_file/sequence_file_output_test.cpp @@ -528,7 +528,7 @@ TEST(compression, by_filename_gz) std::string buffer = compression_by_filename_impl(filename); buffer[9] = '\x00'; // zero out OS byte - EXPECT_EQ(buffer, expected_bgzf); + EXPECT_EQ(buffer, expected_gz); } TEST(compression, by_stream_gz) diff --git a/test/unit/io/structure_file/structure_file_output_test.cpp b/test/unit/io/structure_file/structure_file_output_test.cpp index 2a710a2045..40d58b9b1c 100644 --- a/test/unit/io/structure_file/structure_file_output_test.cpp +++ b/test/unit/io/structure_file/structure_file_output_test.cpp @@ -481,7 +481,7 @@ TEST_F(structure_file_output_compression, by_filename_gz) seqan3::test::tmp_filename filename{"structure_file_output_test.dbn.gz"}; std::string buffer = compression_by_filename_impl(filename); buffer[9] = '\x00'; // zero out OS byte - EXPECT_EQ(buffer, expected_bgzf); + EXPECT_EQ(buffer, expected_gz); } TEST_F(structure_file_output_compression, by_stream_gz)