Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIX] 3 minor changes in alignment IO #1110

Merged
merged 6 commits into from
Jul 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion include/seqan3/alphabet/detail/convert.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ constexpr std::array<out_t, alphabet_size<in_t>> convert_through_char_representa
[] () constexpr
{
std::array<out_t, alphabet_size<in_t>> ret{};
for (typename in_t::rank_type i = 0; i < alphabet_size<in_t>; ++i)
// for (decltype(alphabet_size<in_t>) i = 0; ...) causes indefinite compilation :(
for (auto i = decltype(alphabet_size<in_t>){0}; i < alphabet_size<in_t>; ++i)
assign_char_to(to_char(assign_rank_to(i, in_t{})), ret[i]);
return ret;
}()
Expand Down
28 changes: 25 additions & 3 deletions include/seqan3/io/alignment_file/format_bam.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -836,7 +836,7 @@ class alignment_file_output_format<format_bam> : alignment_file_output_format<fo
/* block_size */ 0, // will be initialised right after
/* refID */ -1, // will be initialised right after
/* pos */ ref_offset.value_or(-1),
/* l_read_name */ static_cast<uint8_t>(std::ranges::distance(id) + 1),
/* l_read_name */ std::max<uint8_t>(std::min<size_t>(std::ranges::distance(id) + 1, 255), 2),
/* mapq */ mapq,
/* bin */ reg2bin(ref_offset.value_or(-1), std::ranges::distance(get<1>(align))),
/* n_cigar_op */ static_cast<uint16_t>(cigar.size()),
Expand Down Expand Up @@ -866,14 +866,33 @@ class alignment_file_output_format<format_bam> : alignment_file_output_format<fo
{
if (!std::ranges::empty(id_source)) // otherwise default will remain (-1)
{
auto id_it = header.ref_dict.find(id_source);
auto id_it = header.ref_dict.end();

if constexpr (std::ranges::ContiguousRange<decltype(id_source)> &&
std::ranges::SizedRange<decltype(id_source)> &&
ForwardingRange<decltype(id_source)>)
{
id_it = header.ref_dict.find(std::span{std::ranges::data(id_source),
std::ranges::size(id_source)});
}
else
{
using header_ref_id_type = std::remove_reference_t<decltype(header.ref_ids()[0])>;

static_assert(ImplicitlyConvertibleTo<decltype(id_source), header_ref_id_type>,
"The ref_id type is not convertible to the reference id information stored in the "
"reference dictionary of the header object.");
smehringer marked this conversation as resolved.
Show resolved Hide resolved

id_it = header.ref_dict.find(id_source);
}

if (id_it == header.ref_dict.end())
{
throw format_error{detail::to_string("Unknown reference name '", id_source, "' could "
"not be found in BAM header ref_dict: ",
header.ref_dict, ".")};
}

id_target = id_it->second;
}
}
Expand All @@ -896,7 +915,10 @@ class alignment_file_output_format<format_bam> : alignment_file_output_format<fo

std::ranges::copy_n(reinterpret_cast<char *>(&core), sizeof(core), stream_it); // write core

std::ranges::copy_n(std::ranges::begin(id), std::ranges::distance(id), stream_it); // write read id
if (std::ranges::distance(id) == 0) // empty id is represented as * for backward compatibility
stream_it = '*';
else
std::ranges::copy_n(std::ranges::begin(id), core.l_read_name - 1, stream_it); // write read id
stream_it = '\0';

// write cigar
Expand Down
43 changes: 28 additions & 15 deletions include/seqan3/io/alignment_file/format_sam.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -838,7 +838,7 @@ class alignment_file_input_format<format_sam>
if (is_char<'S'>(*std::ranges::begin(stream_view))) // SQ (sequence dictionary) tag
{
ref_info_present_in_header = true;
std::string id;
value_type_t<decltype(hdr.ref_ids())> id;
std::tuple<int32_t, std::string> info{};

parse_tag_value(id); // parse required SN (sequence name) tag
Expand Down Expand Up @@ -988,7 +988,9 @@ class alignment_file_output_format<format_sam>
typename align_type,
typename qual_type,
typename mate_type,
typename tag_dict_type>
typename tag_dict_type,
typename e_value_type,
typename bit_score_type>
void write(stream_type & stream,
alignment_file_output_options const & options,
header_type && header,
Expand All @@ -1004,8 +1006,8 @@ class alignment_file_output_format<format_sam>
uint8_t mapq,
mate_type && mate,
tag_dict_type && tag_dict,
double SEQAN3_DOXYGEN_ONLY(e_value),
double SEQAN3_DOXYGEN_ONLY(bit_score))
e_value_type && SEQAN3_DOXYGEN_ONLY(e_value),
bit_score_type && SEQAN3_DOXYGEN_ONLY(bit_score))
{
/* Note the following general things:
*
Expand Down Expand Up @@ -1033,11 +1035,6 @@ class alignment_file_output_format<format_sam>
"The id object must be a std::ranges::ForwardRange over "
"letters that model seqan3::Alphabet.");

static_assert((std::ranges::ForwardRange<ref_seq_type> &&
eseiler marked this conversation as resolved.
Show resolved Hide resolved
Alphabet<reference_t<ref_seq_type>>),
"The ref_seq object must be a std::ranges::ForwardRange "
"over letters that model seqan3::Alphabet.");

if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
{
static_assert((std::ranges::ForwardRange<ref_id_type> ||
Expand Down Expand Up @@ -1100,15 +1097,31 @@ class alignment_file_output_format<format_sam>
!std::Integral<std::remove_reference_t<ref_id_type>> &&
!detail::is_type_specialisation_of_v<std::remove_reference_t<ref_id_type>, std::optional>)
{
static_assert(ImplicitlyConvertibleTo<ref_id_type &, typename decltype(header.ref_dict)::key_type>,
"The ref_id type is not convertible to the reference id information stored in the "
"reference dictionary of the header object.");

if (options.sam_require_header && !std::ranges::empty(ref_id))
{
if ((header.ref_dict).count(ref_id) == 0) // no reference id matched
throw format_error{std::string("The ref_id '") + std::string(ref_id) +
"' was not in the list of references"};
auto id_it = header.ref_dict.end();

if constexpr (std::ranges::ContiguousRange<decltype(ref_id)> &&
std::ranges::SizedRange<decltype(ref_id)> &&
ForwardingRange<decltype(ref_id)>)
{
id_it = header.ref_dict.find(std::span{std::ranges::data(ref_id), std::ranges::size(ref_id)});
}
else
{
using header_ref_id_type = std::remove_reference_t<decltype(header.ref_ids()[0])>;

static_assert(ImplicitlyConvertibleTo<ref_id_type, header_ref_id_type>,
"The ref_id type is not convertible to the reference id information stored in the "
"reference dictionary of the header object.");

id_it = header.ref_dict.find(ref_id);
}

if (id_it == header.ref_dict.end()) // no reference id matched
throw format_error{detail::to_string("The ref_id '", ref_id, "' was not in the list of references:",
header.ref_ids())};
}
}

Expand Down
7 changes: 6 additions & 1 deletion include/seqan3/io/alignment_file/header.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@
#include <unordered_map>
#include <vector>

#include <seqan3/alphabet/concept.hpp>
#include <seqan3/alphabet/detail/hash.hpp>
#include <seqan3/core/type_traits/pre.hpp>
#include <seqan3/io/alignment_file/detail.hpp>
#include <seqan3/range/view/view_all.hpp>
#include <seqan3/std/ranges>

namespace seqan3
Expand Down Expand Up @@ -88,7 +91,9 @@ class alignment_file_header
//!\brief Stream deleter with default behaviour (ownership assumed).
static void ref_ids_deleter_default(ref_ids_type * ptr) { delete ptr; }
//!\brief The key's type of ref_dict.
using key_type = std::ranges::all_view<reference_t<ref_ids_type>>;
using key_type = std::conditional_t<std::ranges::ContiguousRange<reference_t<ref_ids_type>>,
std::span<innermost_value_type_t<ref_ids_type> const>,
all_view<reference_t<ref_ids_type>>>;
//!\brief The pointer to reference ids information (non-owning if reference information is given).
ref_ids_ptr_t ref_ids_ptr{new ref_ids_type{}, ref_ids_deleter_default};

Expand Down
33 changes: 18 additions & 15 deletions include/seqan3/io/alignment_file/input.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,8 @@ namespace seqan3
* \brief Type template of the seqan3::field::SEQ, a container template over `sequence_alphabet`;
* must model seqan3::SequenceContainer.
*/
/*!\typedef using id_alphabet
* \brief Alphabet of the characters for the seqan3::field::ID; must model seqan3::Alphabet.
*/
/*!\typedef using id_container
* \brief Type template of the seqan3::field::ID, a container template over `id_alphabet`;
* \brief Type template of the seqan3::field::ID, a container template over `char`;
* must model seqan3::SequenceContainer.
*/
/*!\typedef using quality_alphabet
Expand Down Expand Up @@ -122,8 +119,7 @@ SEQAN3_CONCEPT AlignmentFileInputTraits = requires (t v)
requires SequenceContainer<typename t::template sequence_container<typename t::sequence_alphabet>>;

// field::ID
requires WritableAlphabet<typename t::id_alphabet>;
requires SequenceContainer<typename t::template id_container<typename t::id_alphabet>>;
requires SequenceContainer<typename t::template id_container<char>>;
h-2 marked this conversation as resolved.
Show resolved Hide resolved

// field::QUAL
requires WritableQualityAlphabet<typename t::quality_alphabet>;
Expand Down Expand Up @@ -200,9 +196,6 @@ struct alignment_file_input_default_traits
template <typename _sequence_alphabet>
using sequence_container = std::vector<_sequence_alphabet>;

//!\brief The alphabet for an identifier string is char.
using id_alphabet = char;

//!\brief The string type for an identifier is std::basic_string.
template <typename _id_alphabet>
using id_container = std::basic_string<_id_alphabet>;
Expand Down Expand Up @@ -417,8 +410,7 @@ class alignment_file_input
using sequence_type = typename traits_type::template sequence_container<
typename traits_type::sequence_alphabet>;
//!\brief The type of field::ID (default std::string by default).
using id_type = typename traits_type::template id_container<
typename traits_type::id_alphabet>;
using id_type = typename traits_type::template id_container<char>;
//!\brief The type of field::OFFSET is fixed to int32_t.
using offset_type = int32_t;
/*!\brief The type of field::REF_SEQ (default depends on construction).
Expand Down Expand Up @@ -913,16 +905,27 @@ class alignment_file_input
template <std::ranges::ForwardRange ref_sequences_t>
void set_references(typename traits_type::ref_ids & ref_ids, ref_sequences_t && ref_sequences)
{
assert(std::ranges::size(ref_ids) == std::ranges::size(ref_sequences));
assert(std::ranges::distance(ref_ids) == std::ranges::distance(ref_sequences));

header_ptr = std::unique_ptr<header_type>{std::make_unique<header_type>(ref_ids)};
reference_sequences_ptr = &ref_sequences;

// initialise reference map and ref_dict if ref_ids are non-empty
for (size_t idx = 0; idx < ref_ids.size(); ++idx)
for (int32_t idx = 0; idx < std::ranges::distance(ref_ids); ++idx)
{
header_ptr->ref_id_info.emplace_back(std::ranges::size(ref_sequences[idx]), "");
header_ptr->ref_dict[header_ptr->ref_ids()[idx]] = idx;
header_ptr->ref_id_info.emplace_back(std::ranges::distance(ref_sequences[idx]), "");

if constexpr (std::ranges::ContiguousRange<reference_t<typename traits_type::ref_ids>> &&
std::ranges::SizedRange<reference_t<typename traits_type::ref_ids>> &&
ForwardingRange<reference_t<typename traits_type::ref_ids>>)
{
auto && id = header_ptr->ref_ids()[idx];
header_ptr->ref_dict[std::span{std::ranges::data(id), std::ranges::size(id)}] = idx;
}
else
{
header_ptr->ref_dict[header_ptr->ref_ids()[idx]] = idx;
}
}
}
//!\}
Expand Down
48 changes: 28 additions & 20 deletions include/seqan3/io/alignment_file/output.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -389,16 +389,7 @@ class alignment_file_output
alignment_file_output{filename, selected_field_ids{}}

{
assert(std::ranges::size(ref_ids) == std::ranges::size(ref_lengths));

header_ptr = std::make_unique<alignment_file_header<ref_ids_type>>(std::forward<ref_ids_type_>(ref_ids));

// fill ref_dict
for (size_t idx = 0; idx < std::ranges::size(ref_ids); ++idx)
{
header_ptr->ref_id_info.push_back({ref_lengths[idx], ""});
header_ptr->ref_dict[(header_ptr->ref_ids()[idx])] = idx;
}
initialise_header_information(ref_ids, ref_lengths);
}

/*!\brief Construct from an existing stream and with specified format.
Expand Down Expand Up @@ -436,16 +427,7 @@ class alignment_file_output
selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
alignment_file_output{std::forward<stream_type>(stream), file_format{}, selected_field_ids{}}
{
assert(std::ranges::size(ref_ids) == std::ranges::size(ref_lengths));

header_ptr = std::make_unique<alignment_file_header<ref_ids_type>>(std::forward<ref_ids_type_>(ref_ids));

// fill ref_dict
for (uint32_t idx = 0; idx < std::ranges::size(ref_ids); ++idx)
{
header_ptr->ref_id_info.emplace_back(ref_lengths[idx], "");
header_ptr->ref_dict[header_ptr->ref_ids()[idx]] = idx;
}
initialise_header_information(ref_ids, ref_lengths);
}
//!\}

Expand Down Expand Up @@ -763,6 +745,32 @@ class alignment_file_output
//!\brief The file header object (will be set on construction).
std::unique_ptr<header_type> header_ptr;

//!\brief Fill the header reference dictionary, with the given info.
template <typename ref_ids_type_, typename ref_lengths_type>
void initialise_header_information(ref_ids_type_ && ref_ids, ref_lengths_type && ref_lengths)
{
assert(std::ranges::size(ref_ids) == std::ranges::size(ref_lengths));

header_ptr = std::make_unique<alignment_file_header<ref_ids_type>>(std::forward<ref_ids_type_>(ref_ids));

for (int32_t idx = 0; idx < std::ranges::distance(ref_ids); ++idx)
{
header_ptr->ref_id_info.emplace_back(ref_lengths[idx], "");

if constexpr (std::ranges::ContiguousRange<reference_t<ref_ids_type_>> &&
std::ranges::SizedRange<reference_t<ref_ids_type_>> &&
ForwardingRange<reference_t<ref_ids_type_>>)
{
auto && id = header_ptr->ref_ids()[idx];
header_ptr->ref_dict[std::span{std::ranges::data(id), std::ranges::size(id)}] = idx;
}
else
{
header_ptr->ref_dict[header_ptr->ref_ids()[idx]] = idx;
}
}
}

//!\brief Write record to format.
template <typename record_header_ptr_t, typename ...pack_type>
void write_record(record_header_ptr_t && record_header_ptr, pack_type && ...remainder)
Expand Down
Loading