Skip to content

Commit

Permalink
nucmer: query sequences optional if --save switch is passed.
Browse files Browse the repository at this point in the history
  • Loading branch information
Guillaume Marcais committed Jun 2, 2016
1 parent 6c44bc4 commit 2e04345
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 27 deletions.
18 changes: 9 additions & 9 deletions include/mummer/nucmer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,14 +275,12 @@ class FileAligner {
template<typename AlignmentOut>
void align_file(const char* query_path, AlignmentOut alignments, unsigned int threads = std::thread::hardware_concurrency()) const;

typedef jellyfish::stream_manager<const char**> stream_manager;
typedef jellyfish::whole_sequence_parser<stream_manager> sequence_parser;
template<typename AlignmentOut>
static void trampoline_align_file(const FileAligner* self, sequence_parser* parser, AlignmentOut alignments) {
template<typename Parser, typename AlignmentOut>
static void trampoline_align_file(const FileAligner* self, Parser* parser, AlignmentOut alignments) {
self->thread_align_file(*parser, alignments);
}
template<typename AlignmentOut>
void thread_align_file(sequence_parser& parser, AlignmentOut alignments) const;
template<typename Parser, typename AlignmentOut>
void thread_align_file(Parser& parser, AlignmentOut alignments) const;

template<typename AlignmentOut>
void align_long_sequences(const FastaRecordSeq& query, AlignmentOut alignments) const;
Expand Down Expand Up @@ -364,6 +362,8 @@ class FileAligner {

template<typename AlignmentOut>
void FileAligner::align_file(const char* query_path, AlignmentOut alignments, unsigned int nb_threads) const {
typedef jellyfish::stream_manager<const char**> stream_manager;
typedef jellyfish::whole_sequence_parser<stream_manager> sequence_parser;
stream_manager streams(&query_path, &query_path + 1);
sequence_parser parser(4 * nb_threads, 10, 1, streams);

Expand All @@ -375,8 +375,8 @@ void FileAligner::align_file(const char* query_path, AlignmentOut alignments, un
th.join();
}

template<typename AlignmentOut>
void FileAligner::thread_align_file(sequence_parser& parser, AlignmentOut alignments) const {
template<typename Parser, typename AlignmentOut>
void FileAligner::thread_align_file(Parser& parser, AlignmentOut alignments) const {
typedef postnuc::Synteny<FastaRecordPtr> synteny_type;
std::vector<mgaps::Match_t> fwd_matches(1), bwd_matches(1);
std::vector<synteny_type> syntenys;
Expand Down Expand Up @@ -423,7 +423,7 @@ void FileAligner::thread_align_file(sequence_parser& parser, AlignmentOut alignm
};

while(true) {
sequence_parser::job j(parser);
typename Parser::job j(parser);
if(j.is_empty()) break;

for(size_t i = 0; i < j->nb_filled; ++i) {
Expand Down
3 changes: 2 additions & 1 deletion src/umd/nucmer_cmdline.yaggo
Original file line number Diff line number Diff line change
Expand Up @@ -95,4 +95,5 @@ arg("ref") {
c_string; typestr "path" }
arg("qry") {
description "Query sequence file"
c_string; typestr "path" }
c_string; typestr "path"
multiple; }
33 changes: 19 additions & 14 deletions src/umd/nucmer_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ struct getrealpath {
operator const char*() const { return res ? res : path; }
};

typedef jellyfish::stream_manager<const char**> stream_manager;
typedef std::vector<const char*>::const_iterator path_iterator;
typedef jellyfish::stream_manager<path_iterator> stream_manager;
typedef jellyfish::whole_sequence_parser<stream_manager> sequence_parser;

void query_thread(mummer::nucmer::FileAligner* aligner, sequence_parser* parser,
Expand Down Expand Up @@ -92,17 +93,22 @@ int main(int argc, char *argv[]) {
: (args.sam_short_given ? args.sam_short_arg
: (args.sam_long_given ? args.sam_long_arg
: args.prefix_arg + ".delta"));
std::ofstream os(output_file);
if(!os.good())
nucmer_cmdline::error() << "Failed to open output file '" << output_file << '\'';

getrealpath real_ref(args.ref_arg), real_qry(args.qry_arg);
if(args.sam_short_given || args.sam_long_given) {
os << "@HD VN1.0 SO:unsorted\n"
<< "@PG ID:nucmer PN:nucmer VN:4.0 CL:\"" << cmdline << "\"\n";
} else {
os << real_ref << ' ' << real_qry << '\n'
<< "NUCMER\n";
std::ofstream os;
if(!args.qry_arg.empty()) {
if(args.qry_arg.size() != 1 && !(args.sam_short_given || args.sam_long_given))
nucmer_cmdline::error() << "Multiple query file is only supported with the SAM output format";
os.open(output_file);
if(!os.good())
nucmer_cmdline::error() << "Failed to open output file '" << output_file << '\'';

getrealpath real_ref(args.ref_arg), real_qry(args.qry_arg[0]);
if(args.sam_short_given || args.sam_long_given) {
os << "@HD VN1.0 SO:unsorted\n"
<< "@PG ID:nucmer PN:nucmer VN:4.0 CL:\"" << cmdline << "\"\n";
} else {
os << real_ref << ' ' << real_qry << '\n'
<< "NUCMER\n";
}
}
thread_pipe::ostream_buffered output(os);

Expand All @@ -127,14 +133,13 @@ int main(int argc, char *argv[]) {
if(args.save_given && !aligner->sa().save(args.save_arg))
nucmer_cmdline::error() << "Can't save the suffix array to '" << args.save_arg << "'";

stream_manager streams(&args.qry_arg, &args.qry_arg + 1);
stream_manager streams(args.qry_arg.cbegin(), args.qry_arg.cend());
const unsigned int nb_threads = args.threads_given ? args.threads_arg : std::thread::hardware_concurrency();
#ifdef _OPENMP
if(args.threads_given) omp_set_num_threads(nb_threads);
#endif // _OPENMP

if(!args.genome_flag) {
stream_manager streams(&args.qry_arg, &args.qry_arg + 1);
sequence_parser parser(4 * nb_threads, 10, args.max_chunk_arg, 1, streams);

#ifdef _OPENMP
Expand Down
9 changes: 6 additions & 3 deletions tests/save_load.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
nucmer --save ${N}_sa --delta ${N}_1.delta $D/small_reads_1.fa $D/small_reads_0.fa
nucmer --load ${N}_sa --delta ${N}_2.delta $D/small_reads_1.fa $D/small_reads_0.fa
diff <(ufasta sort -H ${N}_1.delta) <(ufasta sort -H ${N}_2.delta) > $N.diff
nucmer --save ${N}_sa0 $D/small_reads_1.fa
nucmer --save ${N}_sa1 --delta ${N}_1.delta $D/small_reads_1.fa $D/small_reads_0.fa
nucmer --load ${N}_sa1 --delta ${N}_2.delta $D/small_reads_1.fa $D/small_reads_0.fa
nucmer --load ${N}_sa0 --delta ${N}_3.delta $D/small_reads_1.fa $D/small_reads_0.fa
diff <(ufasta sort -H ${N}_1.delta) <(ufasta sort -H ${N}_2.delta) > ${N}_2.diff
diff <(ufasta sort -H ${N}_1.delta) <(ufasta sort -H ${N}_3.delta) > ${N}_3.diff

0 comments on commit 2e04345

Please sign in to comment.