diff --git a/CMakeLists.txt b/CMakeLists.txt
index 093d437..52fb339 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,10 +4,10 @@ enable_testing()
project (Sailfish)
-set(CPACK_PACKAGE_VERSION "0.6.0")
+set(CPACK_PACKAGE_VERSION "0.6.1")
set(CPACK_PACKAGE_VERSION_MAJOR "0")
set(CPACK_PACKAGE_VERSION_MINOR "6")
-set(CPACK_PACKAGE_VERSION_PATCH "0")
+set(CPACK_PACKAGE_VERSION_PATCH "1")
set(CPACK_GENERATOR "TGZ")
set(CPACK_SOURCE_GENERATOR "TGZ")
set(CPACK_PACKAGE_VENDOR "Carnegie Mellon University")
@@ -323,7 +323,7 @@ ExternalProject_Add(libshark
URL http://www.cs.cmu.edu/~robp/files/Shark.tar.gz
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/Shark
INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
- CMAKE_ARGS -DOPT_DYNAMIC_LIBRARY=TRUE -DBoost_NO_SYSTEM_PATHS=TRUE -DBOOST_INCLUDEDIR=${Boost_INCLUDE_DIRS}
+ CMAKE_ARGS -DOPT_DYNAMIC_LIBRARY=FALSE -DBoost_NO_SYSTEM_PATHS=TRUE -DBOOST_INCLUDEDIR=${Boost_INCLUDE_DIRS}
-DBOOST_LIBRARYDIR=${Boost_LIBRARY_DIRS} -DOPT_MAKE_TESTS=OFF -DCMAKE_INSTALL_PREFIX=${SHARK_INSTALL_DIR}
BUILD_COMMAND sh -c "make"
INSTALL_COMMAND sh -c "make install"
diff --git a/include/BinaryLiteral.hpp b/include/BinaryLiteral.hpp
index c07d1e0..f111634 100644
--- a/include/BinaryLiteral.hpp
+++ b/include/BinaryLiteral.hpp
@@ -1,3 +1,25 @@
+/**
+>HEADER
+ Copyright (c) 2013 Rob Patro robp@cs.cmu.edu
+
+ This file is part of Sailfish.
+
+ Sailfish is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Sailfish is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Sailfish. If not, see .
+HEADER
+ Copyright (c) 2013 Rob Patro robp@cs.cmu.edu
+
+ This file is part of Sailfish.
+
+ Sailfish is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Sailfish is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Sailfish. If not, see .
+HEADER
+ Copyright (c) 2013 Rob Patro robp@cs.cmu.edu
+
+ This file is part of Sailfish.
+
+ Sailfish is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Sailfish is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Sailfish. If not, see .
+HEADER
+ Copyright (c) 2013 Rob Patro robp@cs.cmu.edu
+
+ This file is part of Sailfish.
+
+ Sailfish is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Sailfish is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Sailfish. If not, see .
+
+class ReadProducer {
+};
+
+template <>
+class ReadProducer {
+public:
+ ReadProducer(jellyfish::parse_read& parser) : s_(new ReadSeq), stream_(parser.new_thread()) {}
+ ~ReadProducer() { delete s_; }
+
+ bool nextRead(ReadSeq*& s) {
+ if ((read_ = stream_.next_read())) {
+ // we iterate over the entire read
+ const char* start = read_->seq_s;
+ const char* const end = read_->seq_e;
+ uint32_t readLen = std::distance(start, end);
+
+ s_->seq = const_cast(read_->seq_s);
+ s_->len = readLen;
+ s_->name = const_cast(read_->header);
+ s_->nlen = read_->hlen;
+ s = s_;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ void finishedWithRead(ReadSeq*& s) { s = nullptr; }
+
+private:
+ ReadSeq* s_;
+ jellyfish::parse_read::read_t* read_;//{parser.new_thread()};
+ jellyfish::parse_read::thread stream_;//{parser.new_thread()};
+};
+
+template <>
+class ReadProducer {
+public:
+ ReadProducer(StreamingReadParser& parser) : parser_(parser) {}
+
+ bool nextRead(ReadSeq*& s) {
+ if (parser_.nextRead(s)) {
+ return true;
+ } else {
+ s = nullptr;
+ return false;
+ }
+ }
+
+ void finishedWithRead(ReadSeq*& s) { parser_.finishedWithRead(s); }
+
+private:
+ StreamingReadParser& parser_;
+};
+
+#endif // __READPRODUCER_HPP__
diff --git a/include/SailfishConfig.hpp b/include/SailfishConfig.hpp
index 16aba1d..24ee037 100644
--- a/include/SailfishConfig.hpp
+++ b/include/SailfishConfig.hpp
@@ -28,8 +28,8 @@
namespace Sailfish {
constexpr char majorVersion[] = "0";
constexpr char minorVersion[] = "6";
- constexpr char patchVersion[] = "0";
- constexpr char version[] = "0.6.0";
+ constexpr char patchVersion[] = "1";
+ constexpr char version[] = "0.6.1";
}
#endif // SAILFISH_CONFIG_HPP
diff --git a/include/StreamingSequenceParser.hpp b/include/StreamingSequenceParser.hpp
index 69d28d4..0959208 100644
--- a/include/StreamingSequenceParser.hpp
+++ b/include/StreamingSequenceParser.hpp
@@ -1,3 +1,25 @@
+/**
+>HEADER
+ Copyright (c) 2013 Rob Patro robp@cs.cmu.edu
+
+ This file is part of Sailfish.
+
+ Sailfish is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Sailfish is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Sailfish. If not, see .
+HEADER
+ Copyright (c) 2013 Rob Patro robp@cs.cmu.edu
+
+ This file is part of Sailfish.
+
+ Sailfish is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Sailfish is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Sailfish. If not, see .
+
#include
#include
diff --git a/src/BuildLUT.cpp b/src/BuildLUT.cpp
index d58f251..d8272b7 100644
--- a/src/BuildLUT.cpp
+++ b/src/BuildLUT.cpp
@@ -62,6 +62,8 @@
#include "CountDBNew.hpp"
#include "ezETAProgressBar.hpp"
#include "PartitionRefiner.hpp"
+#include "StreamingSequenceParser.hpp"
+#include "ReadProducer.hpp"
using TranscriptID = uint32_t;
using KmerID = uint64_t;
@@ -95,6 +97,7 @@ int buildLUTs(
using tbb::blocked_range;
+ /*
char** fnames = new char*[transcriptFiles.size()];
size_t z{0};
size_t numFnames{0};
@@ -105,9 +108,13 @@ int buildLUTs(
fnames[numFnames] = const_cast(s.c_str());
++numFnames;
}
-
// Create a jellyfish parser
jellyfish::parse_read parser( fnames, fnames+numFnames, 1000);
+ */
+
+ vector paths{transcriptFiles[0]};
+ StreamingReadParser parser(paths);
+ parser.start();
vector threads;
vector transcriptsForKmer;
@@ -119,17 +126,17 @@ int buildLUTs(
auto merLen = transcriptHash.kmerLength();
bool done {false};
atomic numRes {0};
- atomic nworking{numThreads-1};
+ atomic nworking{(numThreads > 1) ? (numThreads - 1) : 1};
// Start the thread that will print the progress bar
std::cerr << "Number of kmers : " << transcriptHash.size() << "\n";
- std::cerr << "Parsing transcripts an building k-mer equivalence classes\n";
+ std::cerr << "Parsing transcripts and building k-mer equivalence classes\n";
threads.push_back( std::thread( [&numRes, &nworking, numTranscripts] () {
size_t lastCount = numRes;
ez::ezETAProgressBar show_progress(numTranscripts);
show_progress.start();
- while ( nworking > 0 ) {
+ while ( numRes < numTranscripts ) {
auto diff = numRes - lastCount;
if ( diff > 0 ) {
show_progress += static_cast(diff);
@@ -147,38 +154,42 @@ int buildLUTs(
// Start the desired number of threads to parse the transcripts
// and build our data structure.
- for (size_t i = 0; i < numThreads - 1; ++i) {
+ size_t numWorkers = (numThreads > 1) ? numThreads -1 : 1;
+ for (size_t i = 0; i < numWorkers; ++i) {
threads.push_back( std::thread(
[&numRes, &tgmap, &parser, &transcriptHash, &nworking, &transcripts,
&transcriptIndex, &transcriptsForKmer, &refiner, &refinerMutex, merLen]() -> void {
// Each thread gets it's own stream
- jellyfish::parse_read::thread stream = parser.new_thread();
- jellyfish::parse_read::read_t* read;
+ //jellyfish::parse_read::thread stream = parser.new_thread();
+ //jellyfish::parse_read::read_t* read;
+ ReadProducer producer(parser);
+ ReadSeq* s;
+
auto INVALID = transcriptHash.INVALID;
bool useCanonical{transcriptIndex.canonical()};
// while there are transcripts left to process
- while ( (read = stream.next_read()) ) {
-
+ while (producer.nextRead(s)) {
// The transcript name
- std::string fullHeader(read->header, read->hlen);
+ std::string fullHeader(s->name, s->nlen);
std::string header = fullHeader.substr(0, fullHeader.find(' '));
// The transcript sequence; strip the newlines from the
// sequence and put it into a string (newSeq)
- std::string seq(read->seq_s, std::distance(read->seq_s, read->seq_e) - 1);
- auto newEnd = std::remove( seq.begin(), seq.end(), '\n' );
- auto readLen = std::distance( seq.begin(), newEnd );
- std::string newSeq(seq.begin(), seq.begin() + readLen);
+ //std::string seq(s->seq_s, std::distance(read->seq_s, read->seq_e) - 1);
+ //auto newEnd = std::remove( seq.begin(), seq.end(), '\n' );
+ //auto readLen = std::distance( seq.begin(), newEnd );
+ auto readLen = s->len;
+ std::string newSeq(s->seq, s->len);//seq.begin(), seq.begin() + readLen);
// Lookup the ID of this transcript in our transcript -> gene map
auto transcriptIndex = tgmap.findTranscriptID(header);
bool valid = (transcriptIndex != tgmap.INVALID);
auto geneIndex = tgmap.gene(transcriptIndex);
- if ( not valid ) { continue; }
+ if ( not valid ) { ++numRes; continue; }
++numRes;
size_t numKmers {(readLen >= merLen) ? static_cast(readLen) - merLen + 1 : 0};
@@ -216,6 +227,7 @@ int buildLUTs(
refinerMutex.unlock();
transcripts[transcriptIndex] = tinfo;
+ producer.finishedWithRead(s);
}
--nworking;
diff --git a/src/ComputeBiasFeatures.cpp b/src/ComputeBiasFeatures.cpp
index a30d67b..433c655 100644
--- a/src/ComputeBiasFeatures.cpp
+++ b/src/ComputeBiasFeatures.cpp
@@ -1,3 +1,4 @@
+
/**
>HEADER
Copyright (c) 2013 Rob Patro robp@cs.cmu.edu
@@ -43,6 +44,8 @@
#include
#include "CommonTypes.hpp"
+#include "ReadProducer.hpp"
+#include "StreamingSequenceParser.hpp"
// holding 2-mers as a uint64_t is a waste of space,
// but using Jellyfish makes life so much easier, so
@@ -51,91 +54,29 @@ using Kmer = uint64_t;
using Sailfish::TranscriptFeatures;
namespace bfs = boost::filesystem;
-
-int computeBiasFeatures(
- std::vector& transcriptFiles,
- bfs::path outFilePath,
- size_t numThreads) {
-
- using std::string;
-
- std::vector alphabet{{"AA", "AC", "AG", "AT",
- "CA", "CC", "CG", "CT",
- "GA", "GC", "GG", "GT",
- "TA", "TC", "TG", "TT"}};
-
- std::vector diNucleotides(16);
- for (auto i : boost::irange(size_t{0}, diNucleotides.size())) {
- diNucleotides[i] = jellyfish::parse_dna::mer_string_to_binary(alphabet[i].c_str(), 2);
- }
-
-
- std::vector readFiles = transcriptFiles;
- for( auto rf : readFiles ) {
- std::cerr << "readFile: " << rf << ", ";
- }
- std::cerr << "\n";
-
- char** fnames = new char*[readFiles.size()];
- size_t z{0};
- size_t numFnames{0};
- for ( auto& s : readFiles ){
- // Ugly, yes? But this is not as ugly as the alternatives.
- // The char*'s contained in fname are owned by the readFiles
- // vector and need not be manually freed.
- fnames[numFnames] = const_cast(s.c_str());
- ++numFnames;
- }
-
- // Create a jellyfish parser
- jellyfish::parse_read parser( fnames, fnames+numFnames, 5000);
-
-
- size_t merLen = 2;
- Kmer lshift(2 * (merLen - 1));
- Kmer masq((1UL << (2 * merLen)) - 1);
- std::atomic readNum{0};
-
-
- size_t numActors = numThreads;
- size_t numComplete = 0;
- std::vector threads;
- auto tstart = std::chrono::steady_clock::now();
-
- tbb::concurrent_bounded_queue featQueue;
-
- std::ofstream ofile(outFilePath.string());
-
- auto outputThread = std::thread(
- [&ofile, &numComplete, &featQueue, numActors]() -> void {
- TranscriptFeatures tf{};
- while( numComplete < numActors or !featQueue.empty() ) {
- while(featQueue.try_pop(tf)) {
- ofile << tf.name << '\t';
- ofile << tf.length << '\t';
- ofile << tf.gcContent << '\t';
- for (auto i : boost::irange(size_t{0}, tf.diNucleotides.size())) {
- ofile << tf.diNucleotides[i];
- char end = (i == tf.diNucleotides.size() - 1) ? '\n' : '\t';
- ofile << end;
- }
- }
- boost::this_thread::sleep_for(boost::chrono::milliseconds(100));
-
- }
- ofile.close();
- }
- );
-
- for (auto i : boost::irange(size_t{0}, numActors)) {
- threads.push_back(std::thread(
+template
+bool computeBiasFeaturesHelper(ParserT& parser,
+ tbb::concurrent_bounded_queue& featQueue,
+ size_t& numComplete, size_t numThreads) {
+ size_t merLen = 2;
+ Kmer lshift(2 * (merLen - 1));
+ Kmer masq((1UL << (2 * merLen)) - 1);
+ std::atomic readNum{0};
+
+ size_t numActors = numThreads;
+ std::vector threads;
+ auto tstart = std::chrono::steady_clock::now();
+
+ for (auto i : boost::irange(size_t{0}, numActors)) {
+ threads.push_back(std::thread(
[&featQueue, &numComplete, &parser, &readNum, &tstart, lshift, masq, merLen, numActors]() -> void {
- jellyfish::parse_read::read_t* read;
- jellyfish::parse_read::thread stream = parser.new_thread();
+ ReadProducer producer(parser);
+
+ ReadSeq* s;
size_t cmlen, kmer, numKmers;
- while ( (read = stream.next_read()) ) {
- ++readNum; //++locallyProcessedReads;
+ while (producer.nextRead(s)) {
+ ++readNum;
if (readNum % 1000 == 0) {
auto tend = std::chrono::steady_clock::now();
auto sec = std::chrono::duration_cast(tend-tstart);
@@ -144,20 +85,23 @@ int computeBiasFeatures(
std::cerr << "processed " << readNum << " transcripts (" << rate << ") transcripts/s\r\r";
}
-
// we iterate over the entire read
- const char *start = read->seq_s;
- const char * const end = read->seq_e;
+ const char* start = s->seq;
+ uint32_t readLen = s->len;
+ const char* const end = s->seq + readLen;
- TranscriptFeatures tfeat{};
+ TranscriptFeatures tfeat{};
// reset all of the counts
numKmers = 0;
cmlen = kmer = 0;
- uint32_t readLen = std::distance(start, end);
+ // the maximum number of kmers we'd have to store
+ uint32_t maxNumKmers = (readLen >= merLen) ? readLen - merLen + 1 : 0;
+ if (maxNumKmers == 0) { featQueue.push(tfeat); continue; }
+
// The transcript name
- std::string fullHeader(read->header, read->hlen);
+ std::string fullHeader(s->name, s->nlen);
tfeat.name = fullHeader.substr(0, fullHeader.find(' '));
tfeat.length = readLen;
auto nfact = 1.0 / readLen;
@@ -186,9 +130,9 @@ int computeBiasFeatures(
// form the new kmer
kmer = ((kmer << 2) & masq) | c;
if (++cmlen >= merLen) {
- tfeat.diNucleotides[kmer]++;
- if (base == 'G' or base == 'C') { tfeat.gcContent += nfact; }
- }
+ tfeat.diNucleotides[kmer]++;
+ if (base == 'G' or base == 'C') { tfeat.gcContent += nfact; }
+ }
} // end switch
@@ -198,15 +142,88 @@ int computeBiasFeatures(
if (lastBase == 'G' or lastBase == 'C') { tfeat.gcContent += nfact; }
featQueue.push(tfeat);
+ producer.finishedWithRead(s);
+
} // end reads
} // end lambda
));
- } // actor loop
+ } // actor loop
- for (auto& t : threads) { t.join(); ++numComplete; }
- std::cerr << "\n";
- outputThread.join();
+ for (auto& t : threads) { t.join(); ++numComplete; }
+ return true;
+}
+
+int computeBiasFeatures(
+ std::vector& transcriptFiles,
+ bfs::path outFilePath,
+ bool useStreamingParser,
+ size_t numThreads) {
+ using std::string;
+ using std::vector;
+ using std::cerr;
+
+ size_t numActors = numThreads;
+ size_t numComplete = 0;
+ tbb::concurrent_bounded_queue featQueue;
+
+ std::ofstream ofile(outFilePath.string());
+
+ auto outputThread = std::thread(
+ [&ofile, &numComplete, &featQueue, numActors]() -> void {
+ TranscriptFeatures tf{};
+ while( numComplete < numActors or !featQueue.empty() ) {
+ while(featQueue.try_pop(tf)) {
+ ofile << tf.name << '\t';
+ ofile << tf.length << '\t';
+ ofile << tf.gcContent << '\t';
+ for (auto i : boost::irange(size_t{0}, tf.diNucleotides.size())) {
+ ofile << tf.diNucleotides[i];
+ char end = (i == tf.diNucleotides.size() - 1) ? '\n' : '\t';
+ ofile << end;
+ }
+ }
+ boost::this_thread::sleep_for(boost::chrono::milliseconds(100));
+
+ }
+ ofile.close();
+ });
+
+ bool tryJellyfish = !useStreamingParser;
+
+ std::vector readFiles = transcriptFiles;
+ for( auto rf : readFiles ) {
+ std::cerr << "readFile: " << rf << ", ";
+ }
+ std::cerr << "\n";
+
+ for (auto& readFile : readFiles) {
+ std::cerr << "file " << readFile << ": \n";
+
+ namespace bfs = boost::filesystem;
+ bfs::path filePath(readFile);
+
+ // If this is a regular file, then use the Jellyfish parser
+ if (tryJellyfish and bfs::is_regular_file(filePath)) {
+
+ char** fnames = new char*[1];// fnames[1];
+ fnames[0] = const_cast(readFile.c_str());
+
+ jellyfish::parse_read parser(fnames, fnames+1, 5000);
+
+ computeBiasFeaturesHelper(
+ parser, featQueue, numComplete, numActors);
+
+ } else { // If this is a named pipe, then use the kseq-based parser
+ vector paths{readFile};
+ StreamingReadParser parser(paths);
+ parser.start();
+ computeBiasFeaturesHelper(
+ parser, featQueue, numComplete, numActors);
+ }
+ }
+ std::cerr << "\n";
+ outputThread.join();
}
diff --git a/src/HeptamerIndex.cpp b/src/HeptamerIndex.cpp
index 0b7b6f5..ae48e29 100644
--- a/src/HeptamerIndex.cpp
+++ b/src/HeptamerIndex.cpp
@@ -1,9 +1,30 @@
+/**
+>HEADER
+ Copyright (c) 2013 Rob Patro robp@cs.cmu.edu
+
+ This file is part of Sailfish.
+
+ Sailfish is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Sailfish is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Sailfish. If not, see .
+
//#include "BinaryLiteral.hpp"
-HeptamerIndex::HeptamerIndex() :
+HeptamerIndex::HeptamerIndex() :
heptamers_(std::vector(HeptamerIndex::PossibleHeptamers)) {}
@@ -14,7 +35,7 @@ std::size_t HeptamerIndex::index(uint64_t heptamer) {
idx += mult_[1] * ((heptamer & 0x0000000C) >> 2);
// base 3
idx += mult_[2] * ((heptamer & 0x00000030) >> 4);
- // base 4
+ // base 4
idx += mult_[3] * ((heptamer & 0x000000C0) >> 6);
// base 5
idx += mult_[4] * ((heptamer & 0x00000300) >> 8);
@@ -36,5 +57,5 @@ std::size_t HeptamerIndex::index(uint64_t heptamer) {
HeptamerIndex::incHeptamer(uint64_t heptamer) {
auto idx = index_(heptamer);
-
-}
\ No newline at end of file
+
+}
diff --git a/src/IndexedCounter.cpp b/src/IndexedCounter.cpp
index 1a31963..28d850a 100644
--- a/src/IndexedCounter.cpp
+++ b/src/IndexedCounter.cpp
@@ -49,6 +49,8 @@
#include "tbb/parallel_for.h"
#include "tbb/task_scheduler_init.h"
+#include "ReadProducer.hpp"
+
#include "jellyfish/parse_dna.hpp"
#include "jellyfish/mapped_file.hpp"
#include "jellyfish/parse_read.hpp"
@@ -66,66 +68,6 @@
enum class MerDirection : std::int8_t { FORWARD = 1, REVERSE = 2, BOTH = 3 };
-template
-class ReadProducer {
-};
-
-template <>
-class ReadProducer {
-public:
- ReadProducer(jellyfish::parse_read& parser) : s_(new ReadSeq), stream_(parser.new_thread()) {}
- ~ReadProducer() { delete s_; }
-
- bool nextRead(ReadSeq*& s) {
- if ((read_ = stream_.next_read())) {
- // we iterate over the entire read
- const char* start = read_->seq_s;
- const char* const end = read_->seq_e;
- uint32_t readLen = std::distance(start, end);
-
- s_->seq = const_cast(read_->seq_s);
- s_->len = readLen;
- s = s_;
- return true;
- } else {
- return false;
- }
- }
-
- void finishedWithRead(ReadSeq*& s) { s = nullptr; }
-
-private:
- ReadSeq* s_;
- jellyfish::parse_read::read_t* read_;//{parser.new_thread()};
- jellyfish::parse_read::thread stream_;//{parser.new_thread()};
-};
-
-
-
-template <>
-class ReadProducer {
-public:
- ReadProducer(StreamingReadParser& parser) : parser_(parser) {}
-
- bool nextRead(ReadSeq*& s) {
- if (parser_.nextRead(s)) {
- return true;
- } else {
- s = nullptr;
- return false;
- }
- }
-
- void finishedWithRead(ReadSeq*& s) { parser_.finishedWithRead(s); }
-
-private:
- StreamingReadParser& parser_;
-};
-
-
-
-
-
template
bool countKmers(ParserT& parser, PerfectHashIndex& phi, CountDBNew& rhash, size_t merLen,
bool discardPolyA, MerDirection direction, std::atomic& numReadsProcessed,
diff --git a/src/LookUpTableUtils.cpp b/src/LookUpTableUtils.cpp
index c7fe065..72ac80c 100644
--- a/src/LookUpTableUtils.cpp
+++ b/src/LookUpTableUtils.cpp
@@ -1,3 +1,25 @@
+/**
+>HEADER
+ Copyright (c) 2013 Rob Patro robp@cs.cmu.edu
+
+ This file is part of Sailfish.
+
+ Sailfish is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Sailfish is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Sailfish. If not, see .
+
#include
#include
diff --git a/src/PCA.cpp b/src/PCA.cpp
index 0a821b9..b5cc1b8 100644
--- a/src/PCA.cpp
+++ b/src/PCA.cpp
@@ -1,3 +1,25 @@
+/**
+>HEADER
+ Copyright (c) 2013 Rob Patro robp@cs.cmu.edu
+
+ This file is part of Sailfish.
+
+ Sailfish is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Sailfish is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Sailfish. If not, see .
+
#ifdef DEBUG
@@ -9,11 +31,11 @@
using namespace std;
using namespace Eigen;
-int Pca::Calculate(vector &x,
- const unsigned int &nrows,
- const unsigned int &ncols,
- const bool is_corr,
- const bool is_center,
+int Pca::Calculate(vector &x,
+ const unsigned int &nrows,
+ const unsigned int &ncols,
+ const bool is_corr,
+ const bool is_center,
const bool is_scale) {
_ncols = ncols;
_nrows = nrows;
@@ -43,7 +65,7 @@ int Pca::Calculate(vector &x,
for (unsigned int i = 0; i < _ncols; ++i) {
VectorXf curr_col = VectorXf::Constant(_nrows, mean_vector(i)); // mean(x) for column x
curr_col = _xXf.col(i) - curr_col; // x - mean(x)
- curr_col = curr_col.array().square(); // (x-mean(x))^2
+ curr_col = curr_col.array().square(); // (x-mean(x))^2
sd_vector(i) = sqrt((curr_col.sum())/denom);
if (0 == sd_vector(i)) {
zero_sd_num++;
@@ -112,13 +134,13 @@ int Pca::Calculate(vector &x,
}
#ifdef DEBUG
cout << "\n\nStandard deviations for PCs:\n";
- copy(_sd.begin(), _sd.end(),std::ostream_iterator(std::cout," "));
+ copy(_sd.begin(), _sd.end(),std::ostream_iterator(std::cout," "));
cout << "\n\nKaiser criterion: PC #" << _kaiser << endl;
#endif
tmp_vec.resize(0);
// PC's cumulative proportion
_thresh95 = 1;
- _cum_prop.push_back(_prop_of_var[0]);
+ _cum_prop.push_back(_prop_of_var[0]);
for (unsigned int i = 1; i < _prop_of_var.size(); ++i) {
_cum_prop.push_back(_cum_prop[i-1]+_prop_of_var[i]);
if (_cum_prop[i] < 0.95) {
@@ -127,7 +149,7 @@ int Pca::Calculate(vector &x,
}
#ifdef DEBUG
cout << "\nCumulative proportion:\n";
- copy(_cum_prop.begin(), _cum_prop.end(),std::ostream_iterator(std::cout," "));
+ copy(_cum_prop.begin(), _cum_prop.end(),std::ostream_iterator(std::cout," "));
cout << "\n\nThresh95 criterion: PC #" << _thresh95 << endl;
#endif
// Scores
@@ -144,8 +166,8 @@ int Pca::Calculate(vector &x,
eigen_scores.resize(0, 0);
#ifdef DEBUG
cout << "\n\nScores in vector:\n";
- copy(_scores.begin(), _scores.end(),std::ostream_iterator(std::cout," "));
- cout << "\n";
+ copy(_scores.begin(), _scores.end(),std::ostream_iterator(std::cout," "));
+ cout << "\n";
#endif
} else { // COR OR COV MATRICES ARE HERE
_method = "cor";
@@ -167,14 +189,14 @@ int Pca::Calculate(vector &x,
#ifdef DEBUG
cout << endl << eigen_eigenvalues.transpose() << endl;
#endif
- MatrixXf eigen_eigenvectors = edc.eigenvectors().real();
+ MatrixXf eigen_eigenvectors = edc.eigenvectors().real();
#ifdef DEBUG
cout << endl << eigen_eigenvectors << endl;
#endif
// The eigenvalues and eigenvectors are not sorted in any particular order.
// So, we should sort them
typedef pair eigen_pair;
- vector ep;
+ vector ep;
for (unsigned int i = 0 ; i < _ncols; ++i) {
ep.push_back(make_pair(eigen_eigenvalues(i), i));
}
@@ -191,11 +213,11 @@ int Pca::Calculate(vector &x,
#ifdef DEBUG
cout << endl << eigen_eigenvalues_sorted.transpose() << endl;
cout << endl << eigen_eigenvectors_sorted << endl;
- #endif
+ #endif
// We don't need not sorted arrays anymore
eigen_eigenvalues.resize(0);
eigen_eigenvectors.resize(0, 0);
-
+
_sd.clear(); _prop_of_var.clear(); _kaiser = 0;
float tmp_sum = eigen_eigenvalues_sorted.sum();
for (unsigned int i = 0; i < _ncols; ++i) {
@@ -207,9 +229,9 @@ int Pca::Calculate(vector &x,
}
#ifdef DEBUG
cout << "\nStandard deviations for PCs:\n";
- copy(_sd.begin(), _sd.end(), std::ostream_iterator(std::cout," "));
+ copy(_sd.begin(), _sd.end(), std::ostream_iterator(std::cout," "));
cout << "\nProportion of variance:\n";
- copy(_prop_of_var.begin(), _prop_of_var.end(), std::ostream_iterator(std::cout," "));
+ copy(_prop_of_var.begin(), _prop_of_var.end(), std::ostream_iterator(std::cout," "));
cout << "\nKaiser criterion: PC #" << _kaiser << endl;
#endif
// PC's cumulative proportion
@@ -220,10 +242,10 @@ int Pca::Calculate(vector &x,
if (_cum_prop[i] < 0.95) {
_thresh95 = i+1;
}
- }
+ }
#ifdef DEBUG
cout << "\n\nCumulative proportions:\n";
- copy(_cum_prop.begin(), _cum_prop.end(), std::ostream_iterator(std::cout," "));
+ copy(_cum_prop.begin(), _cum_prop.end(), std::ostream_iterator(std::cout," "));
cout << "\n\n95% threshold: PC #" << _thresh95 << endl;
#endif
// Scores for PCA with correlation matrix
@@ -246,14 +268,14 @@ int Pca::Calculate(vector &x,
eigen_scores.resize(0, 0);
#ifdef DEBUG
cout << "\n\nScores in vector:\n";
- copy(_scores.begin(), _scores.end(), std::ostream_iterator(std::cout," "));
- cout << "\n";
+ copy(_scores.begin(), _scores.end(), std::ostream_iterator(std::cout," "));
+ cout << "\n";
#endif
}
return 0;
}
-std::vector Pca::sd(void) { return _sd; };
+std::vector Pca::sd(void) { return _sd; };
std::vector Pca::prop_of_var(void) {return _prop_of_var; };
std::vector Pca::cum_prop(void) { return _cum_prop; };
std::vector Pca::scores(void) { return _scores; };
@@ -270,7 +292,7 @@ Pca::Pca(void) {
_ncols = 0;
// Variables will be scaled by default
_is_center = true;
- _is_scale = true;
+ _is_scale = true;
// By default will be used singular value decomposition
_method = "svd";
_is_corr = false;
@@ -278,7 +300,7 @@ Pca::Pca(void) {
_kaiser = 0;
_thresh95 = 1;
}
-Pca::~Pca(void) {
+Pca::~Pca(void) {
_xXf.resize(0, 0);
_x.clear();
}
diff --git a/src/PCAUtils.cpp b/src/PCAUtils.cpp
index 445be43..ce4d5fd 100644
--- a/src/PCAUtils.cpp
+++ b/src/PCAUtils.cpp
@@ -1,3 +1,25 @@
+/**
+>HEADER
+ Copyright (c) 2013 Rob Patro robp@cs.cmu.edu
+
+ This file is part of Sailfish.
+
+ Sailfish is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Sailfish is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Sailfish. If not, see .
+
#include
diff --git a/src/PartitionRefiner.cpp b/src/PartitionRefiner.cpp
index 5c4cab0..fa4dc27 100644
--- a/src/PartitionRefiner.cpp
+++ b/src/PartitionRefiner.cpp
@@ -1,3 +1,25 @@
+/**
+>HEADER
+ Copyright (c) 2013 Rob Patro robp@cs.cmu.edu
+
+ This file is part of Sailfish.
+
+ Sailfish is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Sailfish is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Sailfish. If not, see .
+
#include
diff --git a/src/PerfectHashIndexer.cpp b/src/PerfectHashIndexer.cpp
index c8cda91..34d0862 100644
--- a/src/PerfectHashIndexer.cpp
+++ b/src/PerfectHashIndexer.cpp
@@ -234,6 +234,7 @@ void buildLUTs(
int computeBiasFeatures(
std::vector& transcriptFiles,
boost::filesystem::path outFilePath,
+ bool useStreamingParser,
size_t numThreads);
int mainIndex( int argc, char *argv[] ) {
@@ -241,6 +242,7 @@ int mainIndex( int argc, char *argv[] ) {
namespace po = boost::program_options;
uint32_t maxThreads = std::thread::hardware_concurrency();
+ bool useStreamingParser = true;
po::options_description generic("Command Line Options");
generic.add_options()
@@ -265,7 +267,7 @@ int mainIndex( int argc, char *argv[] ) {
;
po::variables_map vm;
-
+
try {
po::store(po::command_line_parser(argc, argv).options(generic).run(), vm);
@@ -321,7 +323,7 @@ the Jellyfish database [thash] of the transcripts.
// First, compute the transcript features in case the user
// ever wants to bias-correct his / her results
bfs::path transcriptBiasFile(outputPath); transcriptBiasFile /= "bias_feats.txt";
- computeBiasFeatures(transcriptFiles, transcriptBiasFile, numThreads);
+ computeBiasFeatures(transcriptFiles, transcriptBiasFile, useStreamingParser, numThreads);
bfs::path jfHashFile(outputPath); jfHashFile /= "jf.counts_0";
@@ -381,6 +383,7 @@ the Jellyfish database [thash] of the transcripts.
std::cerr << "building transcript to gene map using transcript fasta file [" <<
transcriptFiles[0] << "] . . .\n";
tgmap = sailfish::utils::transcriptToGeneMapFromFasta(transcriptFiles[0]);
+ std::cerr << "there are " << tgmap.numTranscripts() << " transcripts . . . ";
std::cerr << "done\n";
}
diff --git a/src/PerformBiasCorrection.cpp b/src/PerformBiasCorrection.cpp
index f17d378..58481aa 100644
--- a/src/PerformBiasCorrection.cpp
+++ b/src/PerformBiasCorrection.cpp
@@ -1,3 +1,25 @@
+/**
+>HEADER
+ Copyright (c) 2013 Rob Patro robp@cs.cmu.edu
+
+ This file is part of Sailfish.
+
+ Sailfish is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Sailfish is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Sailfish. If not, see .
+
#include
#include
diff --git a/src/PerformBiasCorrection_old.cpp b/src/PerformBiasCorrection_old.cpp
index 22d2dac..cf2117b 100644
--- a/src/PerformBiasCorrection_old.cpp
+++ b/src/PerformBiasCorrection_old.cpp
@@ -1,3 +1,25 @@
+/**
+>HEADER
+ Copyright (c) 2013 Rob Patro robp@cs.cmu.edu
+
+ This file is part of Sailfish.
+
+ Sailfish is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Sailfish is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Sailfish. If not, see .
+
#include
#include
@@ -134,7 +156,7 @@ int main(int argc, char* argv[]) {
auto& tname = features[i].name;
auto rpkm = sfres.expressions[tname].rpkm;
shark::RealVector v(1);
-
+
if ( rpkm >= 0.001 ) {
retainedRows.emplace_back(i);
retainedNames.push_back(tname);
@@ -147,7 +169,7 @@ int main(int argc, char* argv[]) {
minLRPKM = std::min(minLRPKM, v(0));
maxLRPKM = std::max(maxLRPKM, v(0));
avgLRPKM += v(0);
- RPKMLabels.emplace_back(v);
+ RPKMLabels.emplace_back(v);
}
}
avgLRPKM /= retainedRows.size();
@@ -203,7 +225,7 @@ int main(int argc, char* argv[]) {
v(0) = std::log(static_cast(features[r].length));
train.X[c][0] = std::log(static_cast(features[r].length));
- train2.X[c][0] = std::log(static_cast(features[r].length));
+ train2.X[c][0] = std::log(static_cast(features[r].length));
for (auto j : boost::irange(size_t{1}, dimCutoff+1)) {
train.X[c][j] = encodedXsub.element(c)(j-1);
train2.X[c][j] = encodedXsub.element(c)(j-1);
@@ -270,8 +292,8 @@ int main(int argc, char* argv[]) {
));*/
/*
- std::cerr << "there are " << numRetainedSamples << " samples\n";
- std::cerr << "there are " << train.n_features << " features\n";
+ std::cerr << "there are " << numRetainedSamples << " samples\n";
+ std::cerr << "there are " << train.n_features << " features\n";
reg->build(train.X, train.y, numRetainedSamples);
REAL* pred = new REAL[numRetainedSamples];
@@ -283,19 +305,19 @@ int main(int argc, char* argv[]) {
std::cerr << "Train RMSE=" << trn_rmse << ", Correlation Coefficient=" << trn_r2 << "\n";
*/
-
+
shark::Data input = shark::createDataFromRange(X);
shark::Data regLabels = shark::createDataFromRange(labels);
shark::LabeledData regressionData(input, regLabels);
-
+
shark::RFTrainer trainer;
shark::RFClassifier model;
-
+
trainer.setNodeSize(20);
trainer.setNTrees(500);
trainer.setOOBratio(0.001);
-
+
//std::cerr << "number of classes = " << shark::numberOfClasses(regressionSubset) << "\n";
//std::cerr << "number of classes = " << shark::numberOfClasses(regressionSubset) << "\n";
trainer.train(model, regressionData);
@@ -316,10 +338,10 @@ int main(int argc, char* argv[]) {
/*
auto mm = std::minmax_element(train.y, train.y + numRetainedSamples);
double minLRPKM = std::get<0>(mm);
- double maxLRPKM = std::get<1>(mm);
+ double maxLRPKM = std::get<1>(mm);
*/
- for (auto i : boost::irange(size_t{0}, size_t{numRetainedSamples})) {
+ for (auto i : boost::irange(size_t{0}, size_t{numRetainedSamples})) {
pred[i] = RPKMLabels[i](0) - pred[i];
}
@@ -333,9 +355,9 @@ int main(int argc, char* argv[]) {
std::cerr << "min, max pred : " << minPred << ", " << maxPred << "\n";
std::cerr << "SCALE: " << scale << "\n";
-
+
minPred = std::numeric_limits::max();
- for (auto i : boost::irange(size_t{0}, size_t{numRetainedSamples})) {
+ for (auto i : boost::irange(size_t{0}, size_t{numRetainedSamples})) {
pred[i] *= scale;
minPred = std::min(minPred, pred[i]);
}
@@ -348,7 +370,7 @@ int main(int argc, char* argv[]) {
minPred = std::min(minPred, pred[i]);
maxPred = std::max(maxPred, pred[i]);
}
-
+
/*
trn_rmse=rmse(pred, train.y, numRetainedSamples);
trn_r2=R2(pred, train.y, numRetainedSamples);
@@ -365,8 +387,8 @@ int main(int argc, char* argv[]) {
double rpkm = 0.0;
auto& name = features[i].name;
- auto& r = sfres.expressions[name];
-
+ auto& r = sfres.expressions[name];
+
if (i == retainedRows[retainedCnt]) {
//rpkm = std::exp(pred[retainedCnt]);
rpkm = std::exp(pred[retainedCnt]);
@@ -381,7 +403,7 @@ int main(int argc, char* argv[]) {
}
rpkm = std::exp(RPKMLabels[retainedCnt](0));// pred[retainedCnt]);
-
+
++retainedCnt;
} else {
rpkm = sfres.expressions[features[i].name].rpkm;
@@ -395,4 +417,4 @@ int main(int argc, char* argv[]) {
//for ( auto i : retainedRows ) { std::cerr << i << " "; }
ofile.close();
-}
\ No newline at end of file
+}
diff --git a/src/SailfishUtils.cpp b/src/SailfishUtils.cpp
index b2aaf83..b9136b8 100644
--- a/src/SailfishUtils.cpp
+++ b/src/SailfishUtils.cpp
@@ -35,6 +35,8 @@
#include "TranscriptGeneMap.hpp"
#include "GenomicFeature.hpp"
#include "SailfishUtils.hpp"
+#include "ReadProducer.hpp"
+#include "StreamingSequenceParser.hpp"
namespace sailfish {
namespace utils {
@@ -167,23 +169,24 @@ TranscriptGeneMap readTranscriptToGeneMap( std::ifstream &ifile ) {
TranscriptGeneMap transcriptToGeneMapFromFasta( const std::string& transcriptsFile ) {
+ using std::vector;
NameVector transcriptNames;
NameVector geneNames {"gene"};
- char* fnames[1] = { const_cast(transcriptsFile.c_str()) };
- // Create a jellyfish parser
- jellyfish::parse_read parser( fnames, fnames+1, 1000);
+ vector paths{transcriptsFile};
+ StreamingReadParser parser(paths);
+ parser.start();
- // Each thread gets it's own stream
- jellyfish::parse_read::thread stream = parser.new_thread();
- jellyfish::parse_read::read_t* read;
+ ReadProducer producer(parser);
+ ReadSeq* s;
// while there are transcripts left to process
- while ( (read = stream.next_read()) ) {
+ while (producer.nextRead(s)) {
// The transcript name
- std::string fullHeader(read->header, read->hlen);
+ std::string fullHeader(s->name, s->nlen);
std::string header = fullHeader.substr(0, fullHeader.find(' '));
transcriptNames.emplace_back(header);
+ producer.finishedWithRead(s);
}
// Sort the transcript names
diff --git a/src/StreamingSequenceParser.cpp b/src/StreamingSequenceParser.cpp
index 76e00d0..4435d8e 100644
--- a/src/StreamingSequenceParser.cpp
+++ b/src/StreamingSequenceParser.cpp
@@ -1,3 +1,25 @@
+/**
+>HEADER
+ Copyright (c) 2013 Rob Patro robp@cs.cmu.edu
+
+ This file is part of Sailfish.
+
+ Sailfish is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Sailfish is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Sailfish. If not, see .
+
@@ -79,11 +101,23 @@ bool StreamingReadParser::start() {
int ksv = kseq_read(seq);
while (ksv >= 0) {
this->seqContainerQueue_.pop(s);
+
+ // Possibly allocate more space for the sequence
if (seq->seq.l > s->len) {
s->seq = static_cast(realloc(s->seq, seq->seq.l));
}
+ // Copy the sequence length and sequence over to the ReadSeq struct
s->len = seq->seq.l;
memcpy(s->seq, seq->seq.s, s->len);
+
+ // Possibly allocate more space for the name
+ if (seq->name.l > s->nlen) {
+ s->name = static_cast(realloc(s->name, seq->name.l));
+ }
+ // Copy the name length and name over to the ReadSeq struct
+ s->nlen = seq->name.l;
+ memcpy(s->name, seq->name.s, s->nlen);
+
this->readQueue_.push(s);
ksv = kseq_read(seq);
}
@@ -91,7 +125,6 @@ bool StreamingReadParser::start() {
// destroy the parser and close the file
kseq_destroy(seq);
close(pfd.fd);
-
}
@@ -105,10 +138,10 @@ bool StreamingReadParser::start() {
}
bool StreamingReadParser::nextRead(ReadSeq*& seq) {
- while(parsing_) {
+ while(parsing_ or !readQueue_.empty()) {
if (readQueue_.try_pop(seq)) { return true; }
}
return false;
- }
+}
void StreamingReadParser::finishedWithRead(ReadSeq*& s) { seqContainerQueue_.push(s); }
diff --git a/src/TestHeptamerIndex.cpp b/src/TestHeptamerIndex.cpp
index 014ce3d..196f823 100644
--- a/src/TestHeptamerIndex.cpp
+++ b/src/TestHeptamerIndex.cpp
@@ -1,3 +1,25 @@
+/**
+>HEADER
+ Copyright (c) 2013 Rob Patro robp@cs.cmu.edu
+
+ This file is part of Sailfish.
+
+ Sailfish is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Sailfish is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Sailfish. If not, see .
+
#include
@@ -21,7 +43,7 @@ int main(int argc, char* argv[]) {
jellyfish::parse_read::thread stream{parser.new_thread()};
HeptamerIndex hi;
- size_t merLen = 7;
+ size_t merLen = 7;
uint64_t lshift{2 * (merLen - 1)};
uint64_t masq{(1UL << (2 * merLen)) - 1};
@@ -48,7 +70,7 @@ int main(int argc, char* argv[]) {
cmlen = kmer = rkmer = 0;
break;
- default:
+ default:
kmer = ((kmer << 2) & masq) | c;
rkmer = (rkmer >> 2) | ((0x3 - c) << lshift);
// count if the kmer is valid in the forward and