From 50d1e4841779568161193818a97b534641746776 Mon Sep 17 00:00:00 2001 From: smehringer Date: Thu, 4 Jul 2019 12:21:41 +0200 Subject: [PATCH] [MISC] Restructure file formats to now hold a stream handle. --- .../seqan3/io/alignment_file/format_bam.hpp | 118 +++++--- .../seqan3/io/alignment_file/format_sam.hpp | 252 +++++++++-------- include/seqan3/io/alignment_file/input.hpp | 92 +++---- .../alignment_file/input_format_concept.hpp | 16 +- include/seqan3/io/alignment_file/output.hpp | 39 ++- .../alignment_file/output_format_concept.hpp | 12 +- include/seqan3/io/detail/misc.hpp | 41 ++- include/seqan3/io/detail/misc_input.hpp | 7 +- include/seqan3/io/detail/misc_output.hpp | 5 +- .../seqan3/io/sequence_file/format_embl.hpp | 69 +++-- .../seqan3/io/sequence_file/format_fasta.hpp | 102 +++---- .../seqan3/io/sequence_file/format_fastq.hpp | 79 ++++-- .../io/sequence_file/format_genbank.hpp | 67 +++-- .../seqan3/io/sequence_file/format_sam.hpp | 57 ++-- include/seqan3/io/sequence_file/input.hpp | 71 +++-- .../io/sequence_file/input_format_concept.hpp | 14 +- include/seqan3/io/sequence_file/output.hpp | 65 ++--- .../sequence_file/output_format_concept.hpp | 14 +- .../io/structure_file/format_vienna.hpp | 76 +++-- include/seqan3/io/structure_file/input.hpp | 96 +++---- .../structure_file/input_format_concept.hpp | 25 +- include/seqan3/io/structure_file/output.hpp | 44 ++- .../structure_file/output_format_concept.hpp | 25 +- .../performance/io/format_fasta_benchmark.cpp | 12 +- .../alignment_file_format_test_template.hpp | 260 +++++++++--------- .../alignment_file_output_test.cpp | 17 +- .../io/alignment_file/format_bam_test.cpp | 130 +++++---- .../io/alignment_file/format_sam_test.cpp | 187 +++++++------ .../sequence_file_format_embl_test.cpp | 43 +-- .../sequence_file_format_fasta_test.cpp | 37 +-- .../sequence_file_format_fastq_test.cpp | 46 ++-- .../sequence_file_format_genbank_test.cpp | 40 +-- .../sequence_file_format_sam_test.cpp | 54 ++-- .../io/structure_file/format_vienna_test.cpp | 106 ++++--- 34 files changed, 1254 insertions(+), 1064 deletions(-) diff --git a/include/seqan3/io/alignment_file/format_bam.hpp b/include/seqan3/io/alignment_file/format_bam.hpp index 582aad1eab..8bea83f13b 100644 --- a/include/seqan3/io/alignment_file/format_bam.hpp +++ b/include/seqan3/io/alignment_file/format_bam.hpp @@ -18,6 +18,8 @@ #include #include +#include +#include #include #include #include @@ -37,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -87,11 +90,18 @@ struct alignment_record_core int32_t tlen; //!< The template length of the read and its mate. }; -//!\brief The seqan3::alignment_file_input_format specialisation that handles formatted BAM input. -//!\ingroup alignment_file -template <> -class alignment_file_input_format : alignment_file_input_format +/*!\brief The seqan3::alignment_file_input_format specialisation that handles formatted BAM input. + * \ingroup alignment_file + * \tparam stream_char_type The underlying character of the stream (usually `char`). + */ +template +class alignment_file_input_format : + alignment_file_input_format { +private: + //!\brief The SAM format type that this class inherits from. + using sam_fmt = alignment_file_input_format; + public: //!\brief Exposes the format tag that this class is specialised with using format_tag = format_bam; @@ -99,19 +109,22 @@ class alignment_file_input_format : alignment_file_input_format
& stream) : + stream_ptr{new contrib::basic_bgzf_istream{stream}} + {} //!\} //!\copydoc AlignmentFileInputFormat::read - template : alignment_file_input_format - void read(stream_type & stream, - alignment_file_input_options const & SEQAN3_DOXYGEN_ONLY(options), + bool read(alignment_file_input_options const & SEQAN3_DOXYGEN_ONLY(options), ref_seqs_type & ref_seqs, alignment_file_header & header, seq_type & seq, @@ -157,9 +169,10 @@ class alignment_file_input_format : alignment_file_input_format || std::Same, "The type of field::FLAG must be uint8_t."); - using stream_buf_t = std::istreambuf_iterator; - auto stream_view = std::ranges::subrange - {stream_buf_t{stream}, stream_buf_t{}}; + auto stream_view = view::istreambuf(*stream_ptr); + + if (std::ranges::begin(stream_view) == std::ranges::end(stream_view)) // no records follow + return true; // these variables need to be stored to compute the ALIGNMENT [[maybe_unused]] int32_t offset_tmp{}; @@ -179,10 +192,10 @@ class alignment_file_input_format : alignment_file_input_format 0) // header text is present - read_header(stream_view | view::take_exactly_or_throw(tmp32) - | view::take_until_and_consume(is_char<'\0'>), - header, - ref_seqs); + sam_fmt::read_header(stream_view | view::take_exactly_or_throw(tmp32) + | view::take_until_and_consume(is_char<'\0'>), + header, + ref_seqs); int32_t n_ref; read_field(stream_view, n_ref); @@ -220,8 +233,8 @@ class alignment_file_input_format : alignment_file_input_format : alignment_file_input_format> stream_ptr{nullptr}; //!\brief A variable that tracks whether the content of header has been read or not. bool header_was_read{false}; @@ -433,7 +450,7 @@ class alignment_file_input_format : alignment_file_input_format::read_field; + using sam_fmt::read_field; /*!\brief Reads a arithmetic field from binary stream by directly reinterpreting the bits. * \tparam stream_view_type The type of the stream as a view. @@ -459,7 +476,7 @@ class alignment_file_input_format : alignment_file_input_format(&target)); } - //!\copydoc seqan3::detail::alignment_file_input_format::read_sam_dict_vector + //!\copydoc seqan3::detail::alignment_file_input_format::read_sam_dict_vector template void read_sam_dict_vector(seqan3::detail::sam_tag_variant & variant, stream_view_type && stream_view, @@ -643,11 +660,18 @@ class alignment_file_input_format : alignment_file_input_format -class alignment_file_output_format : alignment_file_output_format +/*!\brief The seqan3::alignment_file_output_format specialisation that can write formatted BAM. + * \ingroup alignment_file + * \tparam stream_char_type The underlying character of the stream (usually `char`). + */ +template +class alignment_file_output_format : + alignment_file_output_format { +private: + //!\brief The SAM format type that this class inherits from. + using sam_fmt = alignment_file_output_format; + public: //!\brief Exposes the format tag that this class is specialised with using format_tag = format_bam; @@ -655,19 +679,23 @@ class alignment_file_output_format : alignment_file_output_format & stream) : + stream_ptr{new contrib::basic_bgzf_ostream{stream}} + {} //!\} //!\copydoc AlignmentFileOutputFormat::write - template : alignment_file_output_format - void write([[maybe_unused]] stream_type & stream, - [[maybe_unused]] alignment_file_output_options const & options, + void write([[maybe_unused]] alignment_file_output_options const & options, [[maybe_unused]] header_type && header, [[maybe_unused]] seq_type && seq, [[maybe_unused]] qual_type && qual, @@ -771,20 +798,21 @@ class alignment_file_output_format : alignment_file_output_format= -1 but is: ", ref_offset)}; - seqan3::ostreambuf_iterator stream_it{stream}; + seqan3::ostreambuf_iterator stream_it{*stream_ptr}; // --------------------------------------------------------------------- // Writing the Header on first call // --------------------------------------------------------------------- - if (!written_header) + if (!sam_fmt::written_header) { - stream << "BAM\1"; + std::ranges::copy(std::string_view{"BAM\1"}, stream_it); std::ostringstream os; - write_header(os, options, header); // write header to temporary stream to query the size. + sam_fmt sf{os}; + sf.write_header(options, header); // write header to temporary stream to query the size. int32_t l_text{static_cast(os.str().size())}; std::ranges::copy_n(reinterpret_cast(&l_text), 4, stream_it); // write read id - stream << os.str(); + std::ranges::copy(os.str(), stream_it); int32_t n_ref{static_cast(header.ref_ids().size())}; std::ranges::copy_n(reinterpret_cast(&n_ref), 4, stream_it); // write read id @@ -800,7 +828,7 @@ class alignment_file_output_format : alignment_file_output_format(&get<0>(header.ref_id_info[ridx])), 4, stream_it); } - written_header = true; + sam_fmt::written_header = true; } // --------------------------------------------------------------------- @@ -959,10 +987,14 @@ class alignment_file_output_format : alignment_file_output_format) } +private: + //!\brief A pointer to the bgzf uncompressed stream. + std::unique_ptr> stream_ptr{nullptr}; + //!\brief Converts a cigar op character to the rank according to the official BAM specifications. static constexpr std::array char_to_sam_rank { diff --git a/include/seqan3/io/alignment_file/format_sam.hpp b/include/seqan3/io/alignment_file/format_sam.hpp index 9ce4af6b5b..d147c6e7eb 100644 --- a/include/seqan3/io/alignment_file/format_sam.hpp +++ b/include/seqan3/io/alignment_file/format_sam.hpp @@ -140,10 +140,12 @@ struct format_sam namespace seqan3::detail { -//!\brief The seqan3::alignment_file_input_format specialisation that handles formatted SAM input. -//!\ingroup alignment_file -template <> -class alignment_file_input_format +/*!\brief The seqan3::alignment_file_input_format specialisation that handles formatted SAM input. + * \ingroup alignment_file + * \tparam stream_char_type The underlying character of the stream (usually `char`). + */ +template +class alignment_file_input_format { public: //!\brief Exposes the format tag that this class is specialised with @@ -152,19 +154,22 @@ class alignment_file_input_format /*!\name Constructors, destructor and assignment * \{ */ - alignment_file_input_format() noexcept = default; //!< Defaulted. + alignment_file_input_format() = default; //!< Defaulted. //!\brief Copy construction is explicitly deleted, because you can't have multiple access to the same file. alignment_file_input_format(alignment_file_input_format const &) = delete; //!\brief Copy assignment is explicitly deleted, because you can't have multiple access to the same file. alignment_file_input_format & operator=(alignment_file_input_format const &) = delete; - alignment_file_input_format(alignment_file_input_format &&) noexcept = default; //!< Defaulted. - alignment_file_input_format & operator=(alignment_file_input_format &&) noexcept = default; //!< Defaulted. - ~alignment_file_input_format() noexcept = default; //!< Defaulted. + alignment_file_input_format(alignment_file_input_format &&) = default; //!< Defaulted. + alignment_file_input_format & operator=(alignment_file_input_format &&) = default; //!< Defaulted. + ~alignment_file_input_format() = default; //!< Defaulted. + + alignment_file_input_format(std::basic_istream & stream) : + stream_view{view::istreambuf(stream)} + {} //!\} //!\copydoc AlignmentFileInputFormat::read - template typename tag_dict_type, typename e_value_type, typename bit_score_type> - void read(stream_type & stream, - alignment_file_input_options const & SEQAN3_DOXYGEN_ONLY(options), + bool read(alignment_file_input_options const & SEQAN3_DOXYGEN_ONLY(options), ref_seqs_type & ref_seqs, alignment_file_header & header, seq_type & seq, @@ -204,7 +208,9 @@ class alignment_file_input_format detail::is_type_specialisation_of_v, "The ref_offset must be a specialisation of std::optional."); - auto stream_view = view::istreambuf(stream); + if (std::ranges::begin(stream_view) == std::ranges::end(stream_view)) // file has no records + return true; + auto field_view = stream_view | view::take_until_or_throw_and_consume(is_char<'\t'>); // these variables need to be stored to compute the ALIGNMENT @@ -222,7 +228,7 @@ class alignment_file_input_format read_header(stream_view, header, ref_seqs); if (std::ranges::begin(stream_view) == std::ranges::end(stream_view)) // file has no records - return; + return true; } // Fields 1-5: ID FLAG REF_ID REF_OFFSET MAPQ @@ -415,10 +421,15 @@ class alignment_file_input_format construct_alignment(align, cigar, ref_idx, ref_seqs, ref_offset_tmp, ref_length); } + + return false; } protected: //!\privatesection + //!\brief A view over the file stream. + decltype(view::istreambuf(std::declval &>())) stream_view{}; + //!\brief A buffer used when parsing arithmetic values with std::from_chars. std::array buffer{}; // Doubles can be up to 316 characters @@ -956,10 +967,12 @@ class alignment_file_input_format } }; -//!\brief The seqan3::alignment_file_output_format specialisation that can write formatted SAM. -//!\ingroup alignment_file -template <> -class alignment_file_output_format +/*!\brief The seqan3::alignment_file_output_format specialisation that can write formatted SAM. + * \ingroup alignment_file + * \tparam stream_char_type The underlying character of the stream (usually `char`). + */ +template +class alignment_file_output_format { public: //!\brief Exposes the format tag that this class is specialised with @@ -976,11 +989,15 @@ class alignment_file_output_format alignment_file_output_format(alignment_file_output_format &&) noexcept = default; //!< Defaulted. alignment_file_output_format & operator=(alignment_file_output_format &&) noexcept = default; //!< Defaulted. ~alignment_file_output_format() noexcept = default; //!< Defaulted. + + //!\brief Construct from an output stream to write to. + alignment_file_output_format(std::basic_ostream & stream) : + stream_it{stream} + {} //!\} //!\copydoc AlignmentFileOutputFormat::write - template typename tag_dict_type, typename e_value_type, typename bit_score_type> - void write(stream_type & stream, - alignment_file_output_options const & options, + void write(alignment_file_output_options const & options, header_type && header, seq_type && seq, qual_type && qual, @@ -1016,7 +1032,7 @@ class alignment_file_output_format * - Arithmetic values default to 0 while all others default to '*' * * - Because of the former, arithmetic values can be directly streamed - * into 'stream' as operator<< is defined for all arithmetic types + * into '*stream_ptr' as operator<< is defined for all arithmetic types * and the default value (0) is also the SAM default. * * - All other non-arithmetic values need to be checked for emptiness @@ -1135,7 +1151,7 @@ class alignment_file_output_format { if (options.sam_require_header && !written_header) { - write_header(stream, options, header); + write_header(options, header); written_header = true; } } @@ -1143,44 +1159,45 @@ class alignment_file_output_format // --------------------------------------------------------------------- // Writing the Record // --------------------------------------------------------------------- - seqan3::ostreambuf_iterator stream_it{stream}; char const separator{'\t'}; - write_range(stream_it, std::forward(id)); - - stream << separator; + write_range(std::forward(id)); + stream_it = separator; - stream << flag << separator; + write_field(flag); + stream_it = separator; if constexpr (!detail::decays_to_ignore_v) { if constexpr (std::Integral>) { - write_range(stream_it, (header.ref_ids())[ref_id]); + write_range((header.ref_ids())[ref_id]); } else if constexpr (detail::is_type_specialisation_of_v, std::optional>) { if (ref_id.has_value()) - write_range(stream_it, (header.ref_ids())[ref_id.value()]); + write_range((header.ref_ids())[ref_id.value()]); else - stream << '*'; + stream_it = '*'; } else { - write_range(stream_it, std::forward(ref_id)); + write_range(std::forward(ref_id)); } } else { - stream << '*'; + stream_it = '*'; } - stream << separator; + stream_it = separator; // SAM is 1 based, 0 indicates unmapped read if optional is not set - stream << (ref_offset.value_or(-1) + 1) << separator; + write_field((ref_offset.value_or(-1) + 1)); + stream_it = separator; - stream << static_cast(mapq) << separator; + write_field(mapq); + stream_it = separator; if (!std::ranges::empty(get<0>(align)) && !std::ranges::empty(get<1>(align))) { @@ -1194,77 +1211,77 @@ class alignment_file_output_format ++off_end; off_end -= std::ranges::size(get<1>(align)); - write_range(stream_it, detail::get_cigar_string(std::forward(align), offset, off_end)); + write_range(detail::get_cigar_string(std::forward(align), offset, off_end)); } else { - stream << '*'; + stream_it = '*'; } - stream << separator; + stream_it = separator; if constexpr (std::Integral(mate))>>) { - write_range(stream_it, (header.ref_ids())[get<0>(mate)]); + write_range((header.ref_ids())[get<0>(mate)]); } else if constexpr (detail::is_type_specialisation_of_v(mate))>, std::optional>) { if (get<0>(mate).has_value()) // value_or(0) instead of value() (which is equivalent here) as a // workaround for a ubsan false-positive in GCC8: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90058 - write_range(stream_it, header.ref_ids()[get<0>(mate).value_or(0)]); + write_range(header.ref_ids()[get<0>(mate).value_or(0)]); else - stream << '*'; + stream_it = '*'; } else { - write_range(stream_it, get<0>(mate)); + write_range(get<0>(mate)); } - stream << separator; + stream_it = separator; if constexpr (detail::is_type_specialisation_of_v(mate))>, std::optional>) { // SAM is 1 based, 0 indicates unmapped read if optional is not set - stream << (get<1>(mate).value_or(-1) + 1) << separator; + write_field((get<1>(mate).value_or(-1) + 1)); } else { - stream << get<1>(mate) << separator; + write_field(get<1>(mate)); } + stream_it = separator; - stream << get<2>(mate) << separator; + write_field(get<2>(mate)); + stream_it = separator; - write_range(stream_it, std::forward(seq)); + write_range(std::forward(seq)); - stream << separator; + stream_it = separator; - write_range(stream_it, std::forward(qual)); + write_range(std::forward(qual)); - write_tag_fields(stream, std::forward(tag_dict), separator); + write_tag_fields(std::forward(tag_dict), separator); detail::write_eol(stream_it, options.add_carriage_return); } protected: //!\privatesection + //!\brief An ostreambuf iterator to the output stream to write to. + seqan3::ostreambuf_iterator stream_it{}; //!\brief The format version string. static constexpr char format_version[4] = "1.6"; + //!\brief A buffer used when parsing arithmetic values with std::from_chars. + std::array buffer{}; // Doubles can be up to 316 characters //!\brief A variable that tracks whether the content of header has been written or not. bool written_header{false}; /*!\brief Writes a field value to the stream. - * \tparam stream_it_t The stream iterator type. * \tparam field_type The type of the field value. Must model std::ranges::ForwardRange. - * - * \param[in,out] stream_it The stream iterator to print to. * \param[in] field_value The value to print. */ - template - //!\cond - requires std::ranges::ForwardRange - //!\endcond - void write_range(stream_it_t & stream_it, field_type && field_value) + template + void write_range(field_type && field_value) { if (std::ranges::empty(field_value)) stream_it = '*'; @@ -1273,49 +1290,44 @@ class alignment_file_output_format } /*!\brief Writes a field value to the stream. - * \tparam stream_it_t The stream iterator type. - * - * \param[in,out] stream_it The stream iterator to print to. * \param[in] field_value The value to print. */ - template - void write_range(stream_it_t & stream_it, char const * const field_value) + void write_range(char const * const field_value) { - write_range(stream_it, std::string_view{field_value}); + write_range(std::string_view{field_value}); } /*!\brief Writes a field value to the stream. - * \tparam stream_t The stream type. - * \param[in,out] stream The stream to print to. * \param[in] field_value The value to print. */ - template - void write_field(stream_t & stream, field_type field_value) + template + void write_field(field_type field_value) { - // TODO: replace this with to_chars for efficiency - if constexpr (std::Same || std::Same) - stream << static_cast(field_value); - else - stream << field_value; + auto res = std::to_chars(buffer.data(), buffer.data() + buffer.size(), field_value); + std::ranges::copy(buffer.data(), res.ptr, stream_it); } /*!\brief Writes the optional fields of the seqan3::sam_tag_dictionary. - * \tparam stream_t The stream type. - * - * \param[in,out] stream The stream to print to. * \param[in] tag_dict The tag dictionary to print. * \param[in] separator The field separator to append. */ - template - void write_tag_fields(stream_t & stream, sam_tag_dictionary const & tag_dict, char const separator) + void write_tag_fields(sam_tag_dictionary const & tag_dict, char const separator) { - auto stream_variant_fn = [this, &stream] (auto && arg) // helper to print an std::variant + auto stream_variant_fn = [this] (auto && arg) // helper to print an std::variant { using T = remove_cvref_t; - if constexpr (!Container || std::Same) + if constexpr (std::Same) + { + stream_it = arg; + } + else if constexpr (std::Same) + { + std::ranges::copy(arg, stream_it); + } + else if constexpr (!Container) { - stream << arg; + write_field(arg); } else { @@ -1323,35 +1335,36 @@ class alignment_file_output_format { for (auto it = arg.begin(); it != (arg.end() - 1); ++it) { - write_field(stream, *it); - stream << ','; + write_field(*it); + stream_it = ','; } - write_field(stream, *(arg.end() - 1)); // write last value without trailing ',' + write_field(*(arg.end() - 1)); // write last value without trailing ',' } } }; for (auto & [tag, variant] : tag_dict) { - stream << separator; + stream_it = separator; - char char0 = tag / 256; - char char1 = tag % 256; - - stream << char0 << char1 << ':' << detail::sam_tag_type_char[variant.index()] << ':'; + stream_it = tag / 256; + stream_it = tag % 256; + stream_it = ':'; + stream_it = detail::sam_tag_type_char[variant.index()]; + stream_it = ':'; if (detail::sam_tag_type_char_extra[variant.index()] != '\0') - stream << detail::sam_tag_type_char_extra[variant.index()] << ','; + { + stream_it = detail::sam_tag_type_char_extra[variant.index()]; + stream_it = ','; + } std::visit(stream_variant_fn, variant); } } /*!\brief Writes the SAM header. - * \tparam stream_t The stream type. - * - * \param[in,out] stream The stream to print to. * \param[in] options The options to alter printing. * \param[in] header The header to print. * @@ -1364,10 +1377,8 @@ class alignment_file_output_format * according to the rules of the official * [SAM format specifications](https://samtools.github.io/hts-specs/SAMv1.pdf). */ - template - void write_header(stream_t & stream, - alignment_file_output_options const & options, - alignment_file_header & header) + template + void write_header(alignment_file_output_options const & options, alignment_file_header & header) { // ----------------------------------------------------------------- // Check Header @@ -1404,34 +1415,32 @@ class alignment_file_output_format // ----------------------------------------------------------------- // Write Header // ----------------------------------------------------------------- - seqan3::ostreambuf_iterator stream_it{stream}; - // (@HD) Write header line [required]. - stream << "@HD\tVN:"; - stream << format_sam::format_version; + std::ranges::copy(std::string_view{"@HD\tVN:"}, stream_it); + std::ranges::copy(std::string_view{format_sam::format_version}, stream_it); if (!header.sorting.empty()) - stream << "\tSO:" << header.sorting; + std::ranges::copy("\tSO:" + header.sorting, stream_it); if (!header.subsorting.empty()) - stream << "\tSS:" << header.subsorting; + std::ranges::copy("\tSS:" + header.subsorting, stream_it); if (!header.grouping.empty()) - stream << "\tGO:" << header.grouping; + std::ranges::copy("\tGO:" + header.grouping, stream_it); detail::write_eol(stream_it, options.add_carriage_return); // (@SQ) Write Reference Sequence Dictionary lines [required]. for (auto const & [ref_name, ref_info] : std::view::zip(header.ref_ids(), header.ref_id_info)) { - stream << "@SQ\tSN:"; - + std::ranges::copy(std::string_view{"@SQ\tSN:"}, stream_it); std::ranges::copy(ref_name, stream_it); - stream << "\tLN:" << get<0>(ref_info); + std::ranges::copy(std::string_view{"\tLN:"}, stream_it); + write_field(get<0>(ref_info)); if (!get<1>(ref_info).empty()) - stream << "\t" << get<1>(ref_info); + std::ranges::copy("\t" + get<1>(ref_info), stream_it); detail::write_eol(stream_it, options.add_carriage_return); } @@ -1439,11 +1448,11 @@ class alignment_file_output_format // Write read group (@RG) lines if specified. for (auto const & read_group : header.read_groups) { - stream << "@RG" - << "\tID:" << get<0>(read_group); + std::ranges::copy(std::string_view{"@RG\tID:"}, stream_it); + std::ranges::copy(get<0>(read_group), stream_it); if (!get<1>(read_group).empty()) - stream << "\t" << get<1>(read_group); + std::ranges::copy("\t" + get<1>(read_group), stream_it); detail::write_eol(stream_it, options.add_carriage_return); } @@ -1451,23 +1460,23 @@ class alignment_file_output_format // Write program (@PG) lines if specified. for (auto const & program : header.program_infos) { - stream << "@PG" - << "\tID:" << program.id; + std::ranges::copy(std::string_view{"@PG\tID:"}, stream_it); + std::ranges::copy(program.id, stream_it); if (!program.name.empty()) - stream << "\tPN:" << program.name; + std::ranges::copy("\tPN:" + program.name, stream_it); if (!program.command_line_call.empty()) - stream << "\tCL:" << program.command_line_call; + std::ranges::copy("\tCL:" + program.command_line_call, stream_it); if (!program.previous.empty()) - stream << "\tPP:" << program.previous; + std::ranges::copy("\tPP:" + program.previous, stream_it); if (!program.description.empty()) - stream << "\tDS:" << program.description; + std::ranges::copy("\tDS:" + program.description, stream_it); if (!program.version.empty()) - stream << "\tVN:" << program.version; + std::ranges::copy("\tVN:" + program.version, stream_it); detail::write_eol(stream_it, options.add_carriage_return); } @@ -1475,7 +1484,8 @@ class alignment_file_output_format // Write comment (@CO) lines if specified. for (auto const & comment : header.comments) { - stream << "@CO\t" << comment; + std::ranges::copy(std::string_view{"@CO\t"}, stream_it); + std::ranges::copy(comment, stream_it); detail::write_eol(stream_it, options.add_carriage_return); } } diff --git a/include/seqan3/io/alignment_file/input.hpp b/include/seqan3/io/alignment_file/input.hpp index b1b23f9155..1eef4fc804 100644 --- a/include/seqan3/io/alignment_file/input.hpp +++ b/include/seqan3/io/alignment_file/input.hpp @@ -578,7 +578,8 @@ class alignment_file_input */ alignment_file_input(std::filesystem::path filename, selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) : - primary_stream{new std::ifstream{filename, std::ios_base::in | std::ios::binary}, stream_deleter_default} + primary_stream{new std::ifstream{filename, std::ios_base::in | std::ios::binary}, + detail::istream_deleter_default} { init(filename); } @@ -606,7 +607,7 @@ class alignment_file_input alignment_file_input(stream_t & stream, file_format const & SEQAN3_DOXYGEN_ONLY(format_tag), selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) : - primary_stream{&stream, stream_deleter_noop} + primary_stream{&stream, detail::istream_deleter_noop} { init(file_format{}); } @@ -616,7 +617,7 @@ class alignment_file_input alignment_file_input(stream_t && stream, file_format const & SEQAN3_DOXYGEN_ONLY(format_tag), selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) : - primary_stream{new stream_t{std::move(stream)}, stream_deleter_default} + primary_stream{new stream_t{std::move(stream)}, detail::istream_deleter_default} { init(file_format{}); } @@ -648,7 +649,8 @@ class alignment_file_input typename traits_type::ref_ids & ref_ids, typename traits_type::ref_sequences & ref_sequences, selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) : - primary_stream{new std::ifstream{filename, std::ios_base::in | std::ios::binary}, stream_deleter_default} + primary_stream{new std::ifstream{filename, std::ios_base::in | std::ios::binary}, + detail::istream_deleter_default} { // initialize reference information set_references(ref_ids, ref_sequences); @@ -687,7 +689,7 @@ class alignment_file_input typename traits_type::ref_sequences & ref_sequences, file_format const & SEQAN3_DOXYGEN_ONLY(format_tag), selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) : - primary_stream{&stream, stream_deleter_noop} + primary_stream{&stream, detail::istream_deleter_noop} { // initialize reference information set_references(ref_ids, ref_sequences); @@ -702,7 +704,7 @@ class alignment_file_input typename traits_type::ref_sequences & ref_sequences, file_format const & SEQAN3_DOXYGEN_ONLY(format_tag), selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) : - primary_stream{new stream_t{std::move(stream)}, stream_deleter_default} + primary_stream{new stream_t{std::move(stream)}, detail::istream_deleter_default} { // initialize reference information set_references(ref_ids, ref_sequences); @@ -830,7 +832,12 @@ class alignment_file_input throw file_open_error{"Could not open file " + filename.string() + " for reading."}; secondary_stream = detail::make_secondary_istream(*primary_stream, filename); - detail::set_format(format, filename); + + if (std::istreambuf_iterator{*secondary_stream} == + std::istreambuf_iterator{}) + at_end = true; + + detail::set_format(format, *secondary_stream, filename); // buffer first record read_next_record(); @@ -843,8 +850,16 @@ class alignment_file_input static_assert(meta::in::value, "You selected a format that is not in the valid_formats of this file."); - format = detail::alignment_file_input_format{}; - secondary_stream = detail::make_secondary_istream(*primary_stream); + // forward dummyy filename to ensure format specific compression handling + std::filesystem::path dummy_filename{"dummy." + format_type::file_extensions[0]}; + + secondary_stream = detail::make_secondary_istream(*primary_stream, dummy_filename); + + if (std::istreambuf_iterator{*secondary_stream} == + std::istreambuf_iterator{}) + at_end = true; + + format = detail::alignment_file_input_format{*secondary_stream}; // buffer first record read_next_record(); @@ -863,24 +878,18 @@ class alignment_file_input /*!\name Stream / file access * \{ */ - //!\brief The type of the internal stream pointers. Allows dynamically setting ownership management. - using stream_ptr_t = std::unique_ptr, - std::function*)>>; - //!\brief Stream deleter that does nothing (no ownership assumed). - static void stream_deleter_noop(std::basic_istream *) {} - //!\brief Stream deleter with default behaviour (ownership assumed). - static void stream_deleter_default(std::basic_istream * ptr) { delete ptr; } - //!\brief The primary stream is the user provided stream or the file stream if constructed from filename. - stream_ptr_t primary_stream{nullptr, stream_deleter_noop}; + detail::istream_ptr_type primary_stream{nullptr, detail::istream_deleter_noop}; //!\brief The secondary stream is a compression layer on the primary or just points to the primary (no compression). - stream_ptr_t secondary_stream{nullptr, stream_deleter_noop}; + detail::istream_ptr_type secondary_stream{nullptr, detail::istream_deleter_noop}; //!\brief File is one position behind the last record. bool at_end{false}; //!\brief Type of the format, an std::variant over the `valid_formats`. - using format_type = typename detail::variant_from_tags::type; + using format_type = typename detail::variant_from_tags::type; //!\brief The actual std::variant holding a pointer to the detected/selected format. format_type format; @@ -937,36 +946,27 @@ class alignment_file_input record_buffer.clear(); detail::get_or_ignore(record_buffer) = header_ptr.get(); - // at end if we could not read further - if (std::istreambuf_iterator{*secondary_stream} == - std::istreambuf_iterator{}) - { - at_end = true; - return; - } - auto call_read_func = [this] (auto & ref_seq_info) { std::visit([&] (auto & f) { - f.read(*secondary_stream, - options, - ref_seq_info, - *header_ptr, - detail::get_or_ignore(record_buffer), - detail::get_or_ignore(record_buffer), - detail::get_or_ignore(record_buffer), - detail::get_or_ignore(record_buffer), - detail::get_or_ignore(record_buffer), - detail::get_or_ignore(record_buffer), - detail::get_or_ignore(record_buffer), - detail::get_or_ignore(record_buffer), - detail::get_or_ignore(record_buffer), - detail::get_or_ignore(record_buffer), - detail::get_or_ignore(record_buffer), - detail::get_or_ignore(record_buffer), - detail::get_or_ignore(record_buffer), - detail::get_or_ignore(record_buffer)); + at_end = f.read(options, + ref_seq_info, + *header_ptr, + detail::get_or_ignore(record_buffer), + detail::get_or_ignore(record_buffer), + detail::get_or_ignore(record_buffer), + detail::get_or_ignore(record_buffer), + detail::get_or_ignore(record_buffer), + detail::get_or_ignore(record_buffer), + detail::get_or_ignore(record_buffer), + detail::get_or_ignore(record_buffer), + detail::get_or_ignore(record_buffer), + detail::get_or_ignore(record_buffer), + detail::get_or_ignore(record_buffer), + detail::get_or_ignore(record_buffer), + detail::get_or_ignore(record_buffer), + detail::get_or_ignore(record_buffer)); }, format); }; diff --git a/include/seqan3/io/alignment_file/input_format_concept.hpp b/include/seqan3/io/alignment_file/input_format_concept.hpp index b1d95a8eb9..6da39015a8 100644 --- a/include/seqan3/io/alignment_file/input_format_concept.hpp +++ b/include/seqan3/io/alignment_file/input_format_concept.hpp @@ -12,7 +12,6 @@ #pragma once -#include #include #include @@ -30,7 +29,7 @@ namespace seqan3::detail { //!\brief The alignment file input format base class. -template +template class alignment_file_input_format {}; @@ -53,7 +52,6 @@ namespace seqan3 template SEQAN3_CONCEPT AlignmentFileInputFormat = requires (detail::alignment_file_input_format & v, - std::ifstream & stream, alignment_file_input_options & options, std::vector & ref_sequences, alignment_file_header<> & header, @@ -75,8 +73,7 @@ SEQAN3_CONCEPT AlignmentFileInputFormat = t::file_extensions; // std::Same>; - { v.read(stream, - options, + { v.read(options, ref_sequences, header, seq, @@ -94,8 +91,7 @@ SEQAN3_CONCEPT AlignmentFileInputFormat = e_value, bit_score)}; - { v.read(stream, - options, + { v.read(options, std::ignore, header, std::ignore, @@ -121,14 +117,13 @@ SEQAN3_CONCEPT AlignmentFileInputFormat = * \{ */ -/*!\fn void read(stream_type & stream, alignment_file_input_options const & options, +/*!\fn void read(alignment_file_input_options const & options, * ref_seqs_type & ref_seqs, header_type & header, * seq_type & seq, qual_type & qual, id_type & id, offset_type & offset, ref_seq_type & ref_seq, * ref_id_type & ref_id, ref_offset_type & ref_offset, align_type & align, flag_type & flag, * mapq_type & mapq, mate_type & mate, tag_dict_type & tag_dict, e_value_type & e_value, * bit_score_type & bit_score) * \brief Read from the specified stream and back-insert into the given field buffers. - * \tparam stream_type The input stream type; Must be derived from std::ostream. * \tparam ref_seqs_type e.g. std::deque or decltype(std::ignore). * \tparam seq_type Type of the seqan3::field::SEQ input (see seqan3::AlignmentFileInputTraits). * \tparam qual_type Type of the seqan3::field::QUAL input (see seqan3::AlignmentFileInputTraits). @@ -145,7 +140,6 @@ SEQAN3_CONCEPT AlignmentFileInputFormat = * \tparam e_value_type Type of the seqan3::field::EVALUE input (see seqan3::AlignmentFileInputTraits). * \tparam bit_score_type Type of the seqan3::field::BIT_SCORE input (see seqan3::AlignmentFileInputTraits). * - * \param[in,out] stream The input stream to read from. * \param[in] options File specific options passed to the format. * \param[out] ref_seqs The reference sequences to the corresponding alignments. * \param[out] header A pointer to the seqan3::alignment_file_header object. @@ -169,7 +163,7 @@ SEQAN3_CONCEPT AlignmentFileInputFormat = * ### Additional requirements * * * The function must also accept std::ignore as parameter for any of the fields, - * except stream, options and header. [This is enforced by the concept checker!] + * except options and header. [This is enforced by the concept checker!] * * In this case the data read for that field shall be discarded by the format. */ /*!\var static inline std::vector seqan3::AlignmentFileInputFormat::file_extensions diff --git a/include/seqan3/io/alignment_file/output.hpp b/include/seqan3/io/alignment_file/output.hpp index 8c08a824fc..ae40fff8a8 100644 --- a/include/seqan3/io/alignment_file/output.hpp +++ b/include/seqan3/io/alignment_file/output.hpp @@ -294,7 +294,8 @@ class alignment_file_output */ alignment_file_output(std::filesystem::path filename, selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) : - primary_stream{new std::ofstream{filename, std::ios_base::out | std::ios::binary}, stream_deleter_default} + primary_stream{new std::ofstream{filename, std::ios_base::out | std::ios::binary}, + detail::ostream_deleter_default} { // open stream if (!primary_stream->good()) @@ -304,7 +305,7 @@ class alignment_file_output secondary_stream = detail::make_secondary_ostream(*primary_stream, filename); // initialise format handler or throw if format is not found - detail::set_format(format, filename); + detail::set_format(format, *secondary_stream, filename); } /*!\brief Construct from an existing stream and with specified format. @@ -327,9 +328,9 @@ class alignment_file_output alignment_file_output(stream_type & stream, file_format const & SEQAN3_DOXYGEN_ONLY(format_tag), selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) : - primary_stream{&stream, stream_deleter_noop}, - secondary_stream{&stream, stream_deleter_noop}, - format{detail::alignment_file_output_format{}} + primary_stream{&stream, detail::ostream_deleter_noop}, + secondary_stream{&stream, detail::ostream_deleter_noop}, + format{detail::alignment_file_output_format{*secondary_stream}} { static_assert(meta::in::value, "You selected a format that is not in the valid_formats of this file."); @@ -340,9 +341,9 @@ class alignment_file_output alignment_file_output(stream_type && stream, file_format const & SEQAN3_DOXYGEN_ONLY(format_tag), selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) : - primary_stream{new stream_type{std::move(stream)}, stream_deleter_default}, - secondary_stream{&*primary_stream, stream_deleter_noop}, - format{detail::alignment_file_output_format{}} + primary_stream{new stream_type{std::move(stream)}, detail::ostream_deleter_default}, + secondary_stream{&*primary_stream, detail::ostream_deleter_noop}, + format{detail::alignment_file_output_format{*secondary_stream}} { static_assert(meta::in::value, "You selected a format that is not in the valid_formats of this file."); @@ -717,21 +718,15 @@ class alignment_file_output /*!\name Stream / file access * \{ */ - //!\brief The type of the internal stream pointers. Allows dynamically setting ownership management. - using stream_ptr_t = std::unique_ptr, - std::function*)>>; - //!\brief Stream deleter that does nothing (no ownership assumed). - static void stream_deleter_noop(std::basic_ostream *) {} - //!\brief Stream deleter with default behaviour (ownership assumed). - static void stream_deleter_default(std::basic_ostream * ptr) { delete ptr; } - //!\brief The primary stream is the user provided stream or the file stream if constructed from filename. - stream_ptr_t primary_stream{nullptr, stream_deleter_noop}; + detail::ostream_ptr_type primary_stream{nullptr, detail::ostream_deleter_noop}; //!\brief The secondary stream is a compression layer on the primary or just points to the primary (no compression). - stream_ptr_t secondary_stream{nullptr, stream_deleter_noop}; + detail::ostream_ptr_type secondary_stream{nullptr, detail::ostream_deleter_noop}; //!\brief Type of the format, an std::variant over the `valid_formats`. - using format_type = typename detail::variant_from_tags::type; + using format_type = typename detail::variant_from_tags::type; //!\brief The actual std::variant holding a pointer to the detected/selected format. format_type format; @@ -783,11 +778,11 @@ class alignment_file_output { // use header from record if explicitly given, e.g. file_output = file_input if constexpr (!std::Same) - f.write(*secondary_stream, options, *record_header_ptr, std::forward(remainder)...); + f.write(options, *record_header_ptr, std::forward(remainder)...); else if constexpr (std::Same) - f.write(*secondary_stream, options, std::ignore, std::forward(remainder)...); + f.write(options, std::ignore, std::forward(remainder)...); else - f.write(*secondary_stream, options, *header_ptr, std::forward(remainder)...); + f.write(options, *header_ptr, std::forward(remainder)...); }, format); } diff --git a/include/seqan3/io/alignment_file/output_format_concept.hpp b/include/seqan3/io/alignment_file/output_format_concept.hpp index 01dab7a96a..e44e4941fc 100644 --- a/include/seqan3/io/alignment_file/output_format_concept.hpp +++ b/include/seqan3/io/alignment_file/output_format_concept.hpp @@ -12,7 +12,6 @@ #pragma once -#include #include #include @@ -29,7 +28,7 @@ namespace seqan3::detail { //!\brief The alignment file output format base class. -template +template class alignment_file_output_format {}; @@ -53,7 +52,6 @@ namespace seqan3 template SEQAN3_CONCEPT AlignmentFileOutputFormat = requires (detail::alignment_file_output_format & v, - std::ofstream & stream, alignment_file_output_options & options, alignment_file_header<> & header, dna5_vector & seq, @@ -73,8 +71,7 @@ SEQAN3_CONCEPT AlignmentFileOutputFormat = { t::file_extensions; - { v.write(stream, - options, + { v.write(options, header, seq, qual, @@ -100,8 +97,7 @@ SEQAN3_CONCEPT AlignmentFileOutputFormat = * \{ */ -/*!\fn void write(stream_type & stream, - alignment_file_output_options const & options, +/*!\fn void write(alignment_file_output_options const & options, alignment_file_header<> & header, seq_type && seq, qual_type && qual, @@ -118,7 +114,6 @@ SEQAN3_CONCEPT AlignmentFileOutputFormat = e_value_type && e_value, bit_score_type && bit_score) * \brief Write the given fields to the specified stream. - * \tparam stream_type Output stream, must model seqan3::OStream with `char`. * \tparam seq_type Type of the seqan3 * \tparam id_type Type of the seqan3 * \tparam offset_type Type of the seqan3 @@ -134,7 +129,6 @@ SEQAN3_CONCEPT AlignmentFileOutputFormat = * \tparam e_value_type Type of the seqan3 * \tparam bit_score_type Type of the seqan3 * - * \param[in,out] stream The output stream to write into. * \param[in] options File specific options passed to the format. * \param[in] header A pointer to the header object of the file. * \param[in] seq The data for seqan3::field::SEQ, i.e. the query sequence. diff --git a/include/seqan3/io/detail/misc.hpp b/include/seqan3/io/detail/misc.hpp index 88d59876d9..59e827307c 100644 --- a/include/seqan3/io/detail/misc.hpp +++ b/include/seqan3/io/detail/misc.hpp @@ -25,18 +25,44 @@ namespace seqan3::detail { +//!\brief Stream deleter that does nothing (no ownership assumed). +template +static void istream_deleter_noop(std::basic_istream *) {} + +//!\brief Stream deleter that does nothing (no ownership assumed). +template +static void ostream_deleter_noop(std::basic_ostream *) {} + +//!\brief Stream deleter with default behaviour (ownership assumed). +template +static void istream_deleter_default(std::basic_istream * ptr) { delete ptr; } + +//!\brief Stream deleter with default behaviour (ownership assumed). +template +static void ostream_deleter_default(std::basic_ostream * ptr) { delete ptr; } + +//!\brief The type of the internal stream pointers. Allows dynamically setting ownership management. +template +using istream_ptr_type = std::unique_ptr, + std::function*)>>; + +//!\brief The type of the internal stream pointers. Allows dynamically setting ownership management. +template +using ostream_ptr_type = std::unique_ptr, + std::function*)>>; + //!\brief Base class to deduce the std::variant type from format tags. //!\ingroup io -template typename output_t> +template typename output_t, typename stream_char_type> struct variant_from_tags; //!\brief Transfers a list of format tags (`...ts`) onto a std::variant by specialising output_t with each. //!\ingroup io -template