From d5281c664d33ca8b936093ec45622f84862a59e2 Mon Sep 17 00:00:00 2001 From: Kent Riemondy Date: Sat, 20 Aug 2022 10:18:41 -0600 Subject: [PATCH 1/6] updated interval tree: - added friend declaration for findClosest into external header - moved findClosest definition out of external header - uses std::move to build tree - commit id from ekg/intervaltree f0c404 --- inst/include/IntervalTree.h | 450 ++++++++++++++++++-------------- inst/include/intervalTree_ext.h | 36 +++ inst/include/intervals.h | 4 +- inst/include/valr.h | 1 + revdep/.gitignore | 9 - revdep/README.md | 30 --- revdep/cran.md | 7 - revdep/failures.md | 1 - revdep/problems.md | 1 - src/Makevars | 1 - src/closest.cpp | 86 +++++- src/coverage.cpp | 4 +- src/intersect.cpp | 5 +- src/shuffle.cpp | 2 +- src/subtract.cpp | 4 +- 15 files changed, 381 insertions(+), 260 deletions(-) create mode 100644 inst/include/intervalTree_ext.h delete mode 100644 revdep/.gitignore delete mode 100644 revdep/README.md delete mode 100644 revdep/cran.md delete mode 100644 revdep/failures.md delete mode 100644 revdep/problems.md diff --git a/inst/include/IntervalTree.h b/inst/include/IntervalTree.h index fefb149d..d96edeb7 100644 --- a/inst/include/IntervalTree.h +++ b/inst/include/IntervalTree.h @@ -19,307 +19,363 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/* Notes for valr: + https://github.com/ekg/intervaltree/blob/master/IntervalTree.h + commit id f0c4046514f41072be20da07b69d8a92220c9294 + Added a friend function declaration for findClosest() to enable + access to private class members +*/ + #ifndef valr__INTERVAL_TREE_H #define valr__INTERVAL_TREE_H +#ifndef __INTERVAL_TREE_H +#define __INTERVAL_TREE_H + #include #include #include #include +#include +#include -template +#ifdef USE_INTERVAL_TREE_NAMESPACE +namespace interval_tree { +#endif +template class Interval { public: - K start; - K stop; - T value; - Interval(K s, K e, const T& v) - : start(s) - , stop(e) + Scalar start; + Scalar stop; + Value value; + Interval(const Scalar& s, const Scalar& e, const Value& v) + : start(std::min(s, e)) + , stop(std::max(s, e)) , value(v) - { } + {} }; -template -K intervalStart(const Interval& i) { +template +Value intervalStart(const Interval& i) { return i.start; } -template -K intervalStop(const Interval& i) { +template +Value intervalStop(const Interval& i) { return i.stop; } -template -std::ostream& operator<<(std::ostream& out, Interval& i) { +template +std::ostream& operator<<(std::ostream& out, const Interval& i) { out << "Interval(" << i.start << ", " << i.stop << "): " << i.value; return out; } - -template -K intervalOverlap(const Interval& a, const Interval& b) { - return std::min(a.stop, b.stop) - std::max(a.start, b.start) ; -} - -template -class IntervalStartSorter { -public: - bool operator()(const Interval& a, const Interval& b) { - return a.start < b.start; - } -}; - -template -class IntervalSorterDesc { +template +class IntervalTree { public: - bool operator()(const Interval& a, const Interval& b) { - return a.start > b.start; - } -}; + typedef Interval interval; + typedef std::vector interval_vector; -template -class IntervalTree { -public: - typedef Interval interval; - typedef std::vector intervalVector; - typedef IntervalTree intervalTree; + struct IntervalStartCmp { + bool operator()(const interval& a, const interval& b) { + return a.start < b.start; + } + }; - intervalVector intervals; - std::unique_ptr left; - std::unique_ptr right; - K center; + struct IntervalStopCmp { + bool operator()(const interval& a, const interval& b) { + return a.stop < b.stop; + } + }; - IntervalTree(void) + IntervalTree() : left(nullptr) , right(nullptr) , center(0) - { } + {} -private: - std::unique_ptr copyTree(const intervalTree& orig) { - return std::unique_ptr(new intervalTree(orig)); - } + ~IntervalTree() = default; -public: + std::unique_ptr clone() const { + return std::unique_ptr(new IntervalTree(*this)); + } - IntervalTree(const intervalTree& other) + IntervalTree(const IntervalTree& other) : intervals(other.intervals), - left(other.left ? copyTree(*other.left) : nullptr), - right(other.right ? copyTree(*other.right) : nullptr), + left(other.left ? other.left->clone() : nullptr), + right(other.right ? other.right->clone() : nullptr), center(other.center) - { - } + {} -public: + IntervalTree& operator=(IntervalTree&&) = default; + IntervalTree(IntervalTree&&) = default; - IntervalTree& operator=(const intervalTree& other) { - center = other.center ; + IntervalTree& operator=(const IntervalTree& other) { + center = other.center; intervals = other.intervals; - left = other.left ? copyTree(*other.left) : nullptr; - right = other.right ? copyTree(*other.right) : nullptr; + left = other.left ? other.left->clone() : nullptr; + right = other.right ? other.right->clone() : nullptr; return *this; } - // Note: changes the order of ivals - IntervalTree( - intervalVector& ivals, + IntervalTree( + interval_vector&& ivals, std::size_t depth = 16, std::size_t minbucket = 64, - K leftextent = 0, - K rightextent = 0, - std::size_t maxbucket = 512 - ) + std::size_t maxbucket = 512, + Scalar leftextent = 0, + Scalar rightextent = 0) : left(nullptr) , right(nullptr) { - --depth; - IntervalStartSorter intervalStartSorter; + const auto minmaxStop = std::minmax_element(ivals.begin(), ivals.end(), + IntervalStopCmp()); + const auto minmaxStart = std::minmax_element(ivals.begin(), ivals.end(), + IntervalStartCmp()); + if (!ivals.empty()) { + center = (minmaxStart.first->start + minmaxStop.second->stop) / 2; + } + if (leftextent == 0 && rightextent == 0) { + // sort intervals by start + std::sort(ivals.begin(), ivals.end(), IntervalStartCmp()); + } else { + assert(std::is_sorted(ivals.begin(), ivals.end(), IntervalStartCmp())); + } if (depth == 0 || (ivals.size() < minbucket && ivals.size() < maxbucket)) { - std::sort(ivals.begin(), ivals.end(), intervalStartSorter); - intervals = ivals; - center = ivals.at(ivals.size() / 2).start; + std::sort(ivals.begin(), ivals.end(), IntervalStartCmp()); + intervals = std::move(ivals); + assert(is_valid().first); + return; } else { - if (leftextent == 0 && rightextent == 0) { - // sort intervals by start - std::sort(ivals.begin(), ivals.end(), intervalStartSorter); - } - - K leftp = 0; - K rightp = 0; - K centerp = 0; + Scalar leftp = 0; + Scalar rightp = 0; if (leftextent || rightextent) { leftp = leftextent; rightp = rightextent; } else { leftp = ivals.front().start; - std::vector stops; - stops.resize(ivals.size()); - transform(ivals.begin(), ivals.end(), stops.begin(), intervalStop); - rightp = *max_element(stops.begin(), stops.end()); + rightp = std::max_element(ivals.begin(), ivals.end(), + IntervalStopCmp())->stop; } - //centerp = ( leftp + rightp ) / 2; - centerp = ivals.at(ivals.size() / 2).start; - center = centerp; - - intervalVector lefts; - intervalVector rights; + interval_vector lefts; + interval_vector rights; - for (typename intervalVector::const_iterator i = ivals.begin(); i != ivals.end(); ++i) { + for (typename interval_vector::const_iterator i = ivals.begin(); + i != ivals.end(); ++i) { const interval& interval = *i; - if (centerp > interval.stop) { + if (interval.stop < center) { lefts.push_back(interval); - } else if (interval.start > centerp) { + } else if (interval.start > center) { rights.push_back(interval); } else { + assert(interval.start <= center); + assert(center <= interval.stop); intervals.push_back(interval); } } if (!lefts.empty()) { - left = std::unique_ptr(new intervalTree(lefts, depth, minbucket, leftp, centerp)); + left.reset(new IntervalTree(std::move(lefts), + depth, minbucket, maxbucket, + leftp, center)); } if (!rights.empty()) { - right = std::unique_ptr(new intervalTree(rights, depth, minbucket, centerp, rightp)); + right.reset(new IntervalTree(std::move(rights), + depth, minbucket, maxbucket, + center, rightp)); } } + assert(is_valid().first); } - intervalVector findOverlapping(K start, K stop) const { - intervalVector ov; - this->findOverlapping(start, stop, ov); - return ov; - } - - void findOverlapping(K start, K stop, intervalVector& overlapping) const { - if (!intervals.empty() && !(stop < intervals.front().start)) { - for (typename intervalVector::const_iterator i = intervals.begin(); i != intervals.end(); ++i) { - const interval& interval = *i; - if (interval.stop >= start && interval.start <= stop) { - overlapping.push_back(interval); - } + // Call f on all intervals near the range [start, stop]: + template + void visit_near(const Scalar& start, const Scalar& stop, UnaryFunction f) const { + if (!intervals.empty() && ! (stop < intervals.front().start)) { + for (auto & i : intervals) { + f(i); } } - if (left && start <= center) { - left->findOverlapping(start, stop, overlapping); + left->visit_near(start, stop, f); } - if (right && stop >= center) { - right->findOverlapping(start, stop, overlapping); + right->visit_near(start, stop, f); } + } + // Call f on all intervals crossing pos + template + void visit_overlapping(const Scalar& pos, UnaryFunction f) const { + visit_overlapping(pos, pos, f); } - intervalVector findContained(K start, K stop) const { - intervalVector contained; - this->findContained(start, stop, contained); - return contained; + // Call f on all intervals overlapping [start, stop] + template + void visit_overlapping(const Scalar& start, const Scalar& stop, UnaryFunction f) const { + auto filterF = [&](const interval& interval) { + if (interval.stop >= start && interval.start <= stop) { + // Only apply f if overlapping + f(interval); + } + }; + visit_near(start, stop, filterF); } - void findContained(K start, K stop, intervalVector& contained) const { - if (!intervals.empty() && !(stop < intervals.front().start)) { - for (typename intervalVector::const_iterator i = intervals.begin(); i != intervals.end(); ++i) { - const interval& interval = *i; - if (interval.start >= start && interval.stop <= stop) { - contained.push_back(interval); - } + // Call f on all intervals contained within [start, stop] + template + void visit_contained(const Scalar& start, const Scalar& stop, UnaryFunction f) const { + auto filterF = [&](const interval& interval) { + if (start <= interval.start && interval.stop <= stop) { + f(interval); } - } + }; + visit_near(start, stop, filterF); + } - if (left && start <= center) { - left->findContained(start, stop, contained); - } + interval_vector findOverlapping(const Scalar& start, const Scalar& stop) const { + interval_vector result; + visit_overlapping(start, stop, + [&](const interval& interval) { + result.emplace_back(interval); + }); + return result; + } - if (right && stop >= center) { - right->findContained(start, stop, contained); - } + interval_vector findContained(const Scalar& start, const Scalar& stop) const { + interval_vector result; + visit_contained(start, stop, + [&](const interval& interval) { + result.push_back(interval); + }); + return result; + } + bool empty() const { + if (left && !left->empty()) { + return false; + } + if (!intervals.empty()) { + return false; + } + if (right && !right->empty()) { + return false; + } + return true; } - intervalVector findClosest(K start, K stop) const { - intervalVector closest ; - std::pair min_dist; - this->findClosest(start, stop, closest, min_dist) ; - return closest; + template + void visit_all(UnaryFunction f) const { + if (left) { + left->visit_all(f); + } + std::for_each(intervals.begin(), intervals.end(), f); + if (right) { + right->visit_all(f); + } } - void findClosest(K start, K stop, intervalVector& closest, - std::pair& min_dist) const { + std::pair extentBruitForce() const { + struct Extent { + std::pair x = {std::numeric_limits::max(), + std::numeric_limits::min() }; + void operator()(const interval & interval) { + x.first = std::min(x.first, interval.start); + x.second = std::max(x.second, interval.stop); + } + }; + Extent extent; - if (!intervals.empty() && !(stop < intervals.front().start)) { - for (typename intervalVector::const_iterator i = intervals.begin(); i != intervals.end(); ++i) { - const interval& interval = *i; - if (interval.stop > start && interval.start < stop) { - // adjacent intervals are considered non-overlappping - closest.push_back(interval); - } else if (stop <= interval.start) { - // find distance on left - int ivl_dist_l = interval.start - stop ; - // if smaller than best min dist found update pair with dist and intervals - if (ivl_dist_l < min_dist.first) { - min_dist.first = ivl_dist_l ; - min_dist.second.clear() ; - min_dist.second.push_back(interval) ; - } else if (ivl_dist_l == min_dist.first) { - // if same dist append intervals - min_dist.second.push_back(interval) ; - } - } else if (start >= interval.stop) { - // find distance on right - int ivl_dist_r = start - interval.stop ; - // if smaller than best min dist found update pair with dist and intervals - if (ivl_dist_r < min_dist.first) { - min_dist.first = ivl_dist_r ; - min_dist.second.clear() ; - min_dist.second.push_back(interval) ; - } else if (ivl_dist_r == min_dist.first) { - // if same dist append interval - min_dist.second.push_back(interval) ; - } - } + visit_all([&](const interval & interval) { extent(interval); }); + return extent.x; + } + + // Check all constraints. + // If first is false, second is invalid. + std::pair> is_valid() const { + const auto minmaxStop = std::minmax_element(intervals.begin(), intervals.end(), + IntervalStopCmp()); + const auto minmaxStart = std::minmax_element(intervals.begin(), intervals.end(), + IntervalStartCmp()); + + std::pair> result = {true, { std::numeric_limits::max(), + std::numeric_limits::min() }}; + if (!intervals.empty()) { + result.second.first = std::min(result.second.first, minmaxStart.first->start); + result.second.second = std::min(result.second.second, minmaxStop.second->stop); + } + if (left) { + auto valid = left->is_valid(); + result.first &= valid.first; + result.second.first = std::min(result.second.first, valid.second.first); + result.second.second = std::min(result.second.second, valid.second.second); + if (!result.first) { return result; } + if (valid.second.second >= center) { + result.first = false; + return result; } - } else if (!intervals.empty() && (stop <= intervals.front().start)) { - for (typename intervalVector::const_iterator i = intervals.begin(); i != intervals.end(); ++i) { - const interval& interval = *i; - if (interval.start > intervals.front().start) { - continue ; - } else { - // find distance on left - int ivl_dist_l = interval.start - stop ; - // if smaller than best min dist found update pair with dist and intervals - if (ivl_dist_l <= min_dist.first) { - min_dist.first = ivl_dist_l; - min_dist.second.clear() ; - min_dist.second.push_back(interval) ; - } else if (ivl_dist_l == min_dist.first) { - // if same dist append intervals - min_dist.second.push_back(interval) ; - } - } + } + if (right) { + auto valid = right->is_valid(); + result.first &= valid.first; + result.second.first = std::min(result.second.first, valid.second.first); + result.second.second = std::min(result.second.second, valid.second.second); + if (!result.first) { return result; } + if (valid.second.first <= center) { + result.first = false; + return result; } } + if (!std::is_sorted(intervals.begin(), intervals.end(), IntervalStartCmp())) { + result.first = false; + } + return result; + } + friend std::ostream& operator<<(std::ostream& os, const IntervalTree& itree) { + return writeOut(os, itree); + } - if (left && start <= center) { - left->findClosest(start, stop, closest, min_dist); + friend std::ostream& writeOut(std::ostream& os, const IntervalTree& itree, + std::size_t depth = 0) { + auto pad = [&]() { for (std::size_t i = 0; i != depth; ++i) { os << ' '; } }; + pad(); os << "center: " << itree.center << '\n'; + for (const interval & inter : itree.intervals) { + pad(); os << inter << '\n'; } - - if (right && stop >= center) { - right->findClosest(start, stop, closest, min_dist); + if (itree.left) { + pad(); os << "left:\n"; + writeOut(os, *itree.left, depth + 1); + } else { + pad(); os << "left: nullptr\n"; } - - // Finally report all of the non-overlapping closest intervals, only if at a left_node - if (!(right && left)) { - closest.insert(closest.end(), min_dist.second.begin(), min_dist.second.end()) ; + if (itree.right) { + pad(); os << "right:\n"; + writeOut(os, *itree.right, depth + 1); + } else { + pad(); os << "right: nullptr\n"; } + return os; } - ~IntervalTree(void) = default; + friend void findClosest(const IntervalTree& tree, int start, int stop, + interval_vector& closest, + std::pair& min_dist); +private: + interval_vector intervals; + std::unique_ptr left; + std::unique_ptr right; + Scalar center; }; +#ifdef USE_INTERVAL_TREE_NAMESPACE +} +#endif + +#endif #endif diff --git a/inst/include/intervalTree_ext.h b/inst/include/intervalTree_ext.h new file mode 100644 index 00000000..9720373f --- /dev/null +++ b/inst/include/intervalTree_ext.h @@ -0,0 +1,36 @@ +// intervalTree_ext.h +// +// Copyright (C) 2022 Jay Hesselberth and Kent Riemondy +// +// This file is part of valr. +// +// This software may be modified and distributed under the terms +// of the MIT license. See the LICENSE file for details. + +#ifndef valr__intervalTree_ext_H +#define valr__intervalTree_ext_H + +#include "valr.h" + + +template +K intervalOverlap(const Interval& a, const Interval& b) { + return std::min(a.stop, b.stop) - std::max(a.start, b.start) ; +} + +template +class IntervalStartSorter { +public: + bool operator()(const Interval& a, const Interval& b) { + return a.start < b.start; + } +}; + +template +class IntervalSorterDesc { +public: + bool operator()(const Interval& a, const Interval& b) { + return a.start > b.start; + } +}; +#endif diff --git a/inst/include/intervals.h b/inst/include/intervals.h index 2efbf223..bcaea0b3 100644 --- a/inst/include/intervals.h +++ b/inst/include/intervals.h @@ -13,9 +13,9 @@ #include "valr.h" // main interval types used in valr -typedef Interval ivl_t ; +typedef Interval ivl_t ; typedef std::vector ivl_vector_t ; -typedef IntervalTree ivl_tree_t ; +typedef IntervalTree ivl_tree_t ; inline ivl_vector_t makeIntervalVector(const DataFrame& df, const IntegerVector& si, diff --git a/inst/include/valr.h b/inst/include/valr.h index 9397cf07..b1647e72 100644 --- a/inst/include/valr.h +++ b/inst/include/valr.h @@ -19,6 +19,7 @@ using namespace Rcpp ; #include "grouped_dataframe.h" #include "IntervalTree.h" #include "intervals.h" +#include "intervalTree_ext.h" #include "group_apply.h" #include "genome.h" #include "random.h" diff --git a/revdep/.gitignore b/revdep/.gitignore deleted file mode 100644 index d3ac4e16..00000000 --- a/revdep/.gitignore +++ /dev/null @@ -1,9 +0,0 @@ -checks -library -checks.noindex -library.noindex -data.sqlite -*.html -download -lib -cloud.noindex diff --git a/revdep/README.md b/revdep/README.md deleted file mode 100644 index 781d95d5..00000000 --- a/revdep/README.md +++ /dev/null @@ -1,30 +0,0 @@ -# Platform - -|field |value | -|:--------|:-----------------------------------------| -|version |R version 4.2.0 (2022-04-22) | -|os |macOS Monterey 12.2.1 | -|system |x86_64, darwin17.0 | -|ui |RStudio | -|language |(EN) | -|collate |en_US.UTF-8 | -|ctype |en_US.UTF-8 | -|tz |America/Denver | -|date |2022-08-18 | -|rstudio |2022.07.1+554 Spotted Wakerobin (desktop) | -|pandoc |2.17.1.1 @ /opt/homebrew/bin/pandoc | - -# Dependencies - -|package |old |new |Δ | -|:-----------------|:-----|:----------|:--| -|valr |0.6.4 |0.6.4.9000 |* | -|broom |NA |1.0.0 |* | -|farver |NA |2.1.1 |* | -|generics |NA |0.1.3 |* | -|GenomeInfoDb |NA |1.32.3 |* | -|GenomicAlignments |NA |1.32.1 |* | -|RCurl |NA |1.98-1.8 |* | - -# Revdeps - diff --git a/revdep/cran.md b/revdep/cran.md deleted file mode 100644 index 62d1ce49..00000000 --- a/revdep/cran.md +++ /dev/null @@ -1,7 +0,0 @@ -## revdepcheck results - -We checked 1 reverse dependencies (0 from CRAN + 1 from Bioconductor), comparing R CMD check results across CRAN and dev versions of this package. - - * We saw 0 new problems - * We failed to check 0 packages - diff --git a/revdep/failures.md b/revdep/failures.md deleted file mode 100644 index 9a207363..00000000 --- a/revdep/failures.md +++ /dev/null @@ -1 +0,0 @@ -*Wow, no problems at all. :)* \ No newline at end of file diff --git a/revdep/problems.md b/revdep/problems.md deleted file mode 100644 index 9a207363..00000000 --- a/revdep/problems.md +++ /dev/null @@ -1 +0,0 @@ -*Wow, no problems at all. :)* \ No newline at end of file diff --git a/src/Makevars b/src/Makevars index 90274cc2..e5688685 100644 --- a/src/Makevars +++ b/src/Makevars @@ -1,3 +1,2 @@ CXX_STD = CXX11 PKG_CPPFLAGS = -I../inst/include - diff --git a/src/closest.cpp b/src/closest.cpp index 96d3c044..c999d2bf 100644 --- a/src/closest.cpp +++ b/src/closest.cpp @@ -9,22 +9,98 @@ #include "valr.h" -void closest_grouped(ivl_vector_t& vx, ivl_vector_t& vy, +void findClosest(const IntervalTree& tree, int start, int stop, + ivl_vector_t& closest, + std::pair& min_dist) { + + ivl_vector_t intervals = tree.intervals; + typedef ivl_t interval; + if (!intervals.empty() && !(stop < intervals.front().start)) { + for (typename ivl_vector_t::const_iterator i = intervals.begin(); i != intervals.end(); ++i) { + const interval& interval = *i; + if (interval.stop > start && interval.start < stop) { + // adjacent intervals are considered non-overlappping + closest.push_back(interval); + } else if (stop <= interval.start) { + // find distance on left + int ivl_dist_l = interval.start - stop ; + // if smaller than best min dist found update pair with dist and intervals + if (ivl_dist_l < min_dist.first) { + min_dist.first = ivl_dist_l ; + min_dist.second.clear() ; + min_dist.second.push_back(interval) ; + } else if (ivl_dist_l == min_dist.first) { + // if same dist append intervals + min_dist.second.push_back(interval) ; + } + } else if (start >= interval.stop) { + // find distance on right + int ivl_dist_r = start - interval.stop ; + // if smaller than best min dist found update pair with dist and intervals + if (ivl_dist_r < min_dist.first) { + min_dist.first = ivl_dist_r ; + min_dist.second.clear() ; + min_dist.second.push_back(interval) ; + } else if (ivl_dist_r == min_dist.first) { + // if same dist append interval + min_dist.second.push_back(interval) ; + } + } + } + } else if (!intervals.empty() && (stop <= intervals.front().start)) { + for (typename ivl_vector_t::const_iterator i = intervals.begin(); i != intervals.end(); ++i) { + const interval& interval = *i; + if (interval.start > intervals.front().start) { + continue ; + } else { + // find distance on left + int ivl_dist_l = interval.start - stop ; + // if smaller than best min dist found update pair with dist and intervals + if (ivl_dist_l <= min_dist.first) { + min_dist.first = ivl_dist_l; + min_dist.second.clear() ; + min_dist.second.push_back(interval) ; + } else if (ivl_dist_l == min_dist.first) { + // if same dist append intervals + min_dist.second.push_back(interval) ; + } + } + } + } + + + if (tree.left && start <= tree.center) { + findClosest(*tree.left, start, stop, closest, min_dist); + } + + if (tree.right && stop >= tree.center) { + findClosest(*tree.right, start, stop,closest, min_dist); + } + + // Finally report all of the non-overlapping closest intervals, only if at a left_node + if (!(tree.right && tree.left)) { + closest.insert(closest.end(), min_dist.second.begin(), min_dist.second.end()) ; + } +} + +void closest_grouped(const ivl_vector_t& vx, ivl_vector_t& vy, std::vector& indices_x, std::vector& indices_y, std::vector& overlap_sizes, std::vector& distance_sizes) { - ivl_tree_t tree_y(vy) ; - std::pair min_dist; - // initiatialize maximum left and right distances to minimize for closest + + // initialize maximum left and right distances to minimize for closest int max_end = std::max(vx.back().stop, vy.back().stop) ; + // vy should not be referenced after std::move + ivl_tree_t tree_y(std::move(vy)) ; + for (auto const& vx_it : vx) { ivl_vector_t closest ; ivl_vector_t closest_ivls ; min_dist = std::make_pair(max_end, closest_ivls) ; - tree_y.findClosest(vx_it.start, vx_it.stop, closest, min_dist) ; + findClosest(tree_y, vx_it.start, vx_it.stop, closest, min_dist) ; for (auto const& ov_it : closest) { diff --git a/src/coverage.cpp b/src/coverage.cpp index fc32dae5..e89f0c7c 100644 --- a/src/coverage.cpp +++ b/src/coverage.cpp @@ -14,7 +14,7 @@ void coverage_group(ivl_vector_t vx, ivl_vector_t vy, std::vector& x_ivl_lengths, std::vector& fractions_covered, std::vector& indices_x) { - ivl_tree_t tree_y(vy) ; + ivl_tree_t tree_y(std::move(vy)) ; ivl_vector_t overlaps ; IntervalSorterDesc intervalSorterDesc; @@ -22,7 +22,7 @@ void coverage_group(ivl_vector_t vx, ivl_vector_t vy, indices_x.push_back(it.value); - tree_y.findOverlapping(it.start, it.stop, overlaps) ; + overlaps = tree_y.findOverlapping(it.start, it.stop) ; // compute number of overlaps int overlap_count = overlaps.size(); diff --git a/src/intersect.cpp b/src/intersect.cpp index 69377e2a..d5e1bfe0 100644 --- a/src/intersect.cpp +++ b/src/intersect.cpp @@ -51,12 +51,13 @@ void intersect_group(ivl_vector_t vx, ivl_vector_t vy, std::vector& indices_x, std::vector& indices_y, std::vector& overlap_sizes, bool invert = false) { - ivl_tree_t tree_y(vy) ; + // do not call vy after std::move + ivl_tree_t tree_y(std::move(vy)) ; ivl_vector_t overlaps ; for (auto it : vx) { - tree_y.findOverlapping(it.start, it.stop, overlaps) ; + overlaps = tree_y.findOverlapping(it.start, it.stop) ; if (overlaps.empty() && invert) { indices_x.push_back(it.value) ; diff --git a/src/shuffle.cpp b/src/shuffle.cpp index ae47a357..1539d2c2 100644 --- a/src/shuffle.cpp +++ b/src/shuffle.cpp @@ -24,7 +24,7 @@ chrom_tree_t makeIntervalTrees(DataFrame incl, interval_map_t interval_map) { std::string chrom = kv.first ; ivl_vector_t iv = kv.second ; - chrom_trees[chrom] = ivl_tree_t(iv) ; + chrom_trees[chrom] = ivl_tree_t(std::move(iv)) ; } return chrom_trees ; diff --git a/src/subtract.cpp b/src/subtract.cpp index 094ac122..146b80e0 100644 --- a/src/subtract.cpp +++ b/src/subtract.cpp @@ -13,7 +13,7 @@ void subtract_group(ivl_vector_t vx, ivl_vector_t vy, std::vector& indices_out, std::vector& starts_out, std::vector& ends_out) { - ivl_tree_t tree_y(vy) ; + ivl_tree_t tree_y(std::move(vy)) ; ivl_vector_t overlaps ; IntervalStartSorter ivl_sorter ; @@ -22,7 +22,7 @@ void subtract_group(ivl_vector_t vx, ivl_vector_t vy, auto x_start = it.start; auto x_stop = it.stop; - tree_y.findOverlapping(it.start, it.stop, overlaps) ; + overlaps = tree_y.findOverlapping(it.start, it.stop) ; // compute number of overlaps int overlap_count = overlaps.size(); From 9a2e59fdf0821dc0edd43af86565bf39025d24a8 Mon Sep 17 00:00:00 2001 From: Kent Riemondy Date: Sat, 20 Aug 2022 14:37:03 -0600 Subject: [PATCH 2/6] add news and update copyright for modified files --- NEWS.md | 2 ++ inst/include/intervalTree_ext.h | 4 ++-- inst/include/intervals.h | 2 +- inst/include/valr.h | 4 ++-- src/closest.cpp | 2 +- src/coverage.cpp | 2 +- src/intersect.cpp | 2 +- src/subtract.cpp | 2 +- 8 files changed, 11 insertions(+), 9 deletions(-) diff --git a/NEWS.md b/NEWS.md index 4b1d4217..02044b70 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # valr (development version) +* Updated intervalTree header to commit f0c4046 + # valr 0.6.5 * Handle `max_dist` for first intervals in `bed_cluster()` (#388) diff --git a/inst/include/intervalTree_ext.h b/inst/include/intervalTree_ext.h index 9720373f..60b4f2de 100644 --- a/inst/include/intervalTree_ext.h +++ b/inst/include/intervalTree_ext.h @@ -1,4 +1,4 @@ -// intervalTree_ext.h +// IntervalTree_ext.h // // Copyright (C) 2022 Jay Hesselberth and Kent Riemondy // @@ -12,7 +12,6 @@ #include "valr.h" - template K intervalOverlap(const Interval& a, const Interval& b) { return std::min(a.stop, b.stop) - std::max(a.start, b.start) ; @@ -33,4 +32,5 @@ class IntervalSorterDesc { return a.start > b.start; } }; + #endif diff --git a/inst/include/intervals.h b/inst/include/intervals.h index bcaea0b3..85d66642 100644 --- a/inst/include/intervals.h +++ b/inst/include/intervals.h @@ -1,6 +1,6 @@ // intervals.h // -// Copyright (C) 2016 - 2018 Jay Hesselberth and Kent Riemondy +// Copyright (C) 2016 - 2022 Jay Hesselberth and Kent Riemondy // // This file is part of valr. // diff --git a/inst/include/valr.h b/inst/include/valr.h index b1647e72..ab30ebf3 100644 --- a/inst/include/valr.h +++ b/inst/include/valr.h @@ -1,6 +1,6 @@ // valr.h // -// Copyright (C) 2016 - 2018 Jay Hesselberth and Kent Riemondy +// Copyright (C) 2016 - 2022 Jay Hesselberth and Kent Riemondy // // This file is part of valr. // @@ -19,7 +19,7 @@ using namespace Rcpp ; #include "grouped_dataframe.h" #include "IntervalTree.h" #include "intervals.h" -#include "intervalTree_ext.h" +#include "IntervalTree_ext.h" #include "group_apply.h" #include "genome.h" #include "random.h" diff --git a/src/closest.cpp b/src/closest.cpp index c999d2bf..5b0a52b4 100644 --- a/src/closest.cpp +++ b/src/closest.cpp @@ -1,6 +1,6 @@ // closest.cpp // -// Copyright (C) 2016 - 2018 Jay Hesselberth and Kent Riemondy +// Copyright (C) 2016 - 2022 Jay Hesselberth and Kent Riemondy // // This file is part of valr. // diff --git a/src/coverage.cpp b/src/coverage.cpp index e89f0c7c..16c49a79 100644 --- a/src/coverage.cpp +++ b/src/coverage.cpp @@ -1,6 +1,6 @@ // coverage.cpp // -// Copyright (C) 2016 - 2018 Jay Hesselberth and Kent Riemondy +// Copyright (C) 2016 - 2022 Jay Hesselberth and Kent Riemondy // // This file is part of valr. // diff --git a/src/intersect.cpp b/src/intersect.cpp index d5e1bfe0..6c080ce7 100644 --- a/src/intersect.cpp +++ b/src/intersect.cpp @@ -1,6 +1,6 @@ // intersect.cpp // -// Copyright (C) 2016 - 2018 Jay Hesselberth and Kent Riemondy +// Copyright (C) 2016 - 2022 Jay Hesselberth and Kent Riemondy // // This file is part of valr. // diff --git a/src/subtract.cpp b/src/subtract.cpp index 146b80e0..4899cd0b 100644 --- a/src/subtract.cpp +++ b/src/subtract.cpp @@ -1,6 +1,6 @@ // subtract.cpp // -// Copyright (C) 2016 - 2018 Jay Hesselberth and Kent Riemondy +// Copyright (C) 2016 - 2022 Jay Hesselberth and Kent Riemondy // // This file is part of valr. // From 83a1f5e38cc439f147629bc55e01d2fbe6bd3207 Mon Sep 17 00:00:00 2001 From: Kent Riemondy Date: Sat, 20 Aug 2022 14:58:40 -0600 Subject: [PATCH 3/6] rename file in git --- inst/include/{intervalTree_ext.h => IntervalTree_ext.h} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename inst/include/{intervalTree_ext.h => IntervalTree_ext.h} (100%) diff --git a/inst/include/intervalTree_ext.h b/inst/include/IntervalTree_ext.h similarity index 100% rename from inst/include/intervalTree_ext.h rename to inst/include/IntervalTree_ext.h From 805a1d39da20a9e15efeac15b54752932594a2bf Mon Sep 17 00:00:00 2001 From: Kent Riemondy Date: Tue, 23 Aug 2022 12:35:48 -0600 Subject: [PATCH 4/6] use template function definition and call via wrapper --- inst/include/IntervalTree.h | 8 +- inst/include/IntervalTree_ext.h | 75 ++++++++++++++++ src/closest.cpp | 149 +++++++++++++++++--------------- 3 files changed, 158 insertions(+), 74 deletions(-) diff --git a/inst/include/IntervalTree.h b/inst/include/IntervalTree.h index d96edeb7..bb09ab72 100644 --- a/inst/include/IntervalTree.h +++ b/inst/include/IntervalTree.h @@ -362,10 +362,10 @@ class IntervalTree { } return os; } - - friend void findClosest(const IntervalTree& tree, int start, int stop, - interval_vector& closest, - std::pair& min_dist); + template + friend void findClosestIvls(const IntervalTree& tree, int start, int stop, + interval_vector& closest, + std::pair& min_dist); private: interval_vector intervals; std::unique_ptr left; diff --git a/inst/include/IntervalTree_ext.h b/inst/include/IntervalTree_ext.h index 60b4f2de..e67c9dc5 100644 --- a/inst/include/IntervalTree_ext.h +++ b/inst/include/IntervalTree_ext.h @@ -33,4 +33,79 @@ class IntervalSorterDesc { } }; +template +void findClosestIvls(const IntervalTree& tree, int start, int stop, + ivl_vector_t& closest, + std::pair& min_dist) { + + ivl_vector_t intervals = tree.intervals; + typedef ivl_t interval; + if (!intervals.empty() && !(stop < intervals.front().start)) { + for (typename ivl_vector_t::const_iterator i = intervals.begin(); i != intervals.end(); ++i) { + const interval& interval = *i; + if (interval.stop > start && interval.start < stop) { + // adjacent intervals are considered non-overlappping + closest.push_back(interval); + } else if (stop <= interval.start) { + // find distance on left + int ivl_dist_l = interval.start - stop ; + // if smaller than best min dist found update pair with dist and intervals + if (ivl_dist_l < min_dist.first) { + min_dist.first = ivl_dist_l ; + min_dist.second.clear() ; + min_dist.second.push_back(interval) ; + } else if (ivl_dist_l == min_dist.first) { + // if same dist append intervals + min_dist.second.push_back(interval) ; + } + } else if (start >= interval.stop) { + // find distance on right + int ivl_dist_r = start - interval.stop ; + // if smaller than best min dist found update pair with dist and intervals + if (ivl_dist_r < min_dist.first) { + min_dist.first = ivl_dist_r ; + min_dist.second.clear() ; + min_dist.second.push_back(interval) ; + } else if (ivl_dist_r == min_dist.first) { + // if same dist append interval + min_dist.second.push_back(interval) ; + } + } + } + } else if (!intervals.empty() && (stop <= intervals.front().start)) { + for (typename ivl_vector_t::const_iterator i = intervals.begin(); i != intervals.end(); ++i) { + const interval& interval = *i; + if (interval.start > intervals.front().start) { + continue ; + } else { + // find distance on left + int ivl_dist_l = interval.start - stop ; + // if smaller than best min dist found update pair with dist and intervals + if (ivl_dist_l <= min_dist.first) { + min_dist.first = ivl_dist_l; + min_dist.second.clear() ; + min_dist.second.push_back(interval) ; + } else if (ivl_dist_l == min_dist.first) { + // if same dist append intervals + min_dist.second.push_back(interval) ; + } + } + } + } + + + if (tree.left && start <= tree.center) { + findClosestIvls(*tree.left, start, stop, closest, min_dist); + } + + if (tree.right && stop >= tree.center) { + findClosestIvls(*tree.right, start, stop,closest, min_dist); + } + + // Finally report all of the non-overlapping closest intervals, only if at a left_node + if (!(tree.right && tree.left)) { + closest.insert(closest.end(), min_dist.second.begin(), min_dist.second.end()) ; + } +} + #endif diff --git a/src/closest.cpp b/src/closest.cpp index 5b0a52b4..4c9ceb7b 100644 --- a/src/closest.cpp +++ b/src/closest.cpp @@ -9,79 +9,88 @@ #include "valr.h" -void findClosest(const IntervalTree& tree, int start, int stop, - ivl_vector_t& closest, - std::pair& min_dist) { - - ivl_vector_t intervals = tree.intervals; - typedef ivl_t interval; - if (!intervals.empty() && !(stop < intervals.front().start)) { - for (typename ivl_vector_t::const_iterator i = intervals.begin(); i != intervals.end(); ++i) { - const interval& interval = *i; - if (interval.stop > start && interval.start < stop) { - // adjacent intervals are considered non-overlappping - closest.push_back(interval); - } else if (stop <= interval.start) { - // find distance on left - int ivl_dist_l = interval.start - stop ; - // if smaller than best min dist found update pair with dist and intervals - if (ivl_dist_l < min_dist.first) { - min_dist.first = ivl_dist_l ; - min_dist.second.clear() ; - min_dist.second.push_back(interval) ; - } else if (ivl_dist_l == min_dist.first) { - // if same dist append intervals - min_dist.second.push_back(interval) ; - } - } else if (start >= interval.stop) { - // find distance on right - int ivl_dist_r = start - interval.stop ; - // if smaller than best min dist found update pair with dist and intervals - if (ivl_dist_r < min_dist.first) { - min_dist.first = ivl_dist_r ; - min_dist.second.clear() ; - min_dist.second.push_back(interval) ; - } else if (ivl_dist_r == min_dist.first) { - // if same dist append interval - min_dist.second.push_back(interval) ; - } - } - } - } else if (!intervals.empty() && (stop <= intervals.front().start)) { - for (typename ivl_vector_t::const_iterator i = intervals.begin(); i != intervals.end(); ++i) { - const interval& interval = *i; - if (interval.start > intervals.front().start) { - continue ; - } else { - // find distance on left - int ivl_dist_l = interval.start - stop ; - // if smaller than best min dist found update pair with dist and intervals - if (ivl_dist_l <= min_dist.first) { - min_dist.first = ivl_dist_l; - min_dist.second.clear() ; - min_dist.second.push_back(interval) ; - } else if (ivl_dist_l == min_dist.first) { - // if same dist append intervals - min_dist.second.push_back(interval) ; - } - } - } - } - - if (tree.left && start <= tree.center) { - findClosest(*tree.left, start, stop, closest, min_dist); - } +void findClosest(const IntervalTree& tree, int start, int stop, + ivl_vector_t& closest, + std::pair& min_dist) { + findClosestIvls(tree, start, stop, closest, min_dist); +} - if (tree.right && stop >= tree.center) { - findClosest(*tree.right, start, stop,closest, min_dist); - } - // Finally report all of the non-overlapping closest intervals, only if at a left_node - if (!(tree.right && tree.left)) { - closest.insert(closest.end(), min_dist.second.begin(), min_dist.second.end()) ; - } -} +// template +// void findClosest(const IntervalTree& tree, int start, int stop, +// ivl_vector_t& closest, +// std::pair& min_dist) { +// +// ivl_vector_t intervals = tree.intervals; +// typedef ivl_t interval; +// if (!intervals.empty() && !(stop < intervals.front().start)) { +// for (typename ivl_vector_t::const_iterator i = intervals.begin(); i != intervals.end(); ++i) { +// const interval& interval = *i; +// if (interval.stop > start && interval.start < stop) { +// // adjacent intervals are considered non-overlappping +// closest.push_back(interval); +// } else if (stop <= interval.start) { +// // find distance on left +// int ivl_dist_l = interval.start - stop ; +// // if smaller than best min dist found update pair with dist and intervals +// if (ivl_dist_l < min_dist.first) { +// min_dist.first = ivl_dist_l ; +// min_dist.second.clear() ; +// min_dist.second.push_back(interval) ; +// } else if (ivl_dist_l == min_dist.first) { +// // if same dist append intervals +// min_dist.second.push_back(interval) ; +// } +// } else if (start >= interval.stop) { +// // find distance on right +// int ivl_dist_r = start - interval.stop ; +// // if smaller than best min dist found update pair with dist and intervals +// if (ivl_dist_r < min_dist.first) { +// min_dist.first = ivl_dist_r ; +// min_dist.second.clear() ; +// min_dist.second.push_back(interval) ; +// } else if (ivl_dist_r == min_dist.first) { +// // if same dist append interval +// min_dist.second.push_back(interval) ; +// } +// } +// } +// } else if (!intervals.empty() && (stop <= intervals.front().start)) { +// for (typename ivl_vector_t::const_iterator i = intervals.begin(); i != intervals.end(); ++i) { +// const interval& interval = *i; +// if (interval.start > intervals.front().start) { +// continue ; +// } else { +// // find distance on left +// int ivl_dist_l = interval.start - stop ; +// // if smaller than best min dist found update pair with dist and intervals +// if (ivl_dist_l <= min_dist.first) { +// min_dist.first = ivl_dist_l; +// min_dist.second.clear() ; +// min_dist.second.push_back(interval) ; +// } else if (ivl_dist_l == min_dist.first) { +// // if same dist append intervals +// min_dist.second.push_back(interval) ; +// } +// } +// } +// } +// +// +// if (tree.left && start <= tree.center) { +// findClosest(*tree.left, start, stop, closest, min_dist); +// } +// +// if (tree.right && stop >= tree.center) { +// findClosest(*tree.right, start, stop,closest, min_dist); +// } +// +// // Finally report all of the non-overlapping closest intervals, only if at a left_node +// if (!(tree.right && tree.left)) { +// closest.insert(closest.end(), min_dist.second.begin(), min_dist.second.end()) ; +// } +// } void closest_grouped(const ivl_vector_t& vx, ivl_vector_t& vy, std::vector& indices_x, std::vector& indices_y, From 3a6c04defa0a0db2d308167904e8b3274d2de18e Mon Sep 17 00:00:00 2001 From: Kent Riemondy Date: Wed, 24 Aug 2022 08:32:48 -0600 Subject: [PATCH 5/6] remove SEXP type from dataframe builder, can crash R during gc() call. Also remove add_vec, instead add to df builder class manually, avoids needing templates for each Rcpp vector class --- inst/include/DataFrameBuilder.h | 34 +++++-------- src/closest.cpp | 84 +++------------------------------ src/coverage.cpp | 15 ++++-- src/dist.cpp | 3 +- src/intersect.cpp | 3 +- src/merge.cpp | 7 ++- 6 files changed, 39 insertions(+), 107 deletions(-) diff --git a/inst/include/DataFrameBuilder.h b/inst/include/DataFrameBuilder.h index a4ed3265..98b61324 100644 --- a/inst/include/DataFrameBuilder.h +++ b/inst/include/DataFrameBuilder.h @@ -1,6 +1,6 @@ // DataFrameBuilder.h // -// Copyright (C) 2016 - 2018 Jay Hesselberth and Kent Riemondy +// Copyright (C) 2016 - 2022 Jay Hesselberth and Kent Riemondy // // This file is part of valr. // @@ -15,26 +15,18 @@ class DataFrameBuilder { public: std::vector names ; - std::vector data ; // set to SEXP to handle any R type vector + List data ; DataFrameBuilder() {} ; // output List with: List out = DataFrameBuilder inline operator List() const { - List out = wrap(data) ; - return out ; + return data ; } inline size_t size() const { return data.size() ; } - // add vector to DataFrameBuilder - // non-SEXP objects should be passed with Rcpp::wrap(obj) - inline void add_vec(std::string name, SEXP x) { - names.push_back(name) ; - data.push_back(x) ; - } - // add dataframe to DataFrameBuilder with suffix inline void add_df(const DataFrame& df, std::string suffix = "", @@ -50,8 +42,8 @@ class DataFrameBuilder { } else if (drop_chrom) { continue ; } - - this->add_vec(name, df[i]) ; + this->names.push_back(name) ; + this->data.push_back(df[i]) ; } } @@ -69,25 +61,25 @@ class DataFrameBuilder { if (name == "chrom" && drop_chrom) { continue ; } - this->add_vec(name, df[i]) ; + this->names.push_back(name) ; + this->data.push_back(df[i]) ; } } // apply common attributes to output dataframe // by default groups are stripped and tbl_df is returned inline List format_df(int nrow) { - List res = *this ; auto names = this->names ; - set_rownames(res, nrow) ; - res.attr("names") = names ; + set_rownames(this->data, nrow) ; + this->data.attr("names") = names ; - if (Rf_inherits(res, "grouped_df")) { - ValrGroupedDataFrame::strip_groups(res) ; + if (Rf_inherits(this->data, "grouped_df")) { + ValrGroupedDataFrame::strip_groups(this->data) ; } - res.attr("class") = Rcpp::CharacterVector::create("tbl_df", "tbl", "data.frame") ; - return res ; + this->data.attr("class") = Rcpp::CharacterVector::create("tbl_df", "tbl", "data.frame") ; + return this->data ; } }; diff --git a/src/closest.cpp b/src/closest.cpp index 4c9ceb7b..e9224af1 100644 --- a/src/closest.cpp +++ b/src/closest.cpp @@ -16,82 +16,6 @@ void findClosest(const IntervalTree& tree, int start, int stop, findClosestIvls(tree, start, stop, closest, min_dist); } - -// template -// void findClosest(const IntervalTree& tree, int start, int stop, -// ivl_vector_t& closest, -// std::pair& min_dist) { -// -// ivl_vector_t intervals = tree.intervals; -// typedef ivl_t interval; -// if (!intervals.empty() && !(stop < intervals.front().start)) { -// for (typename ivl_vector_t::const_iterator i = intervals.begin(); i != intervals.end(); ++i) { -// const interval& interval = *i; -// if (interval.stop > start && interval.start < stop) { -// // adjacent intervals are considered non-overlappping -// closest.push_back(interval); -// } else if (stop <= interval.start) { -// // find distance on left -// int ivl_dist_l = interval.start - stop ; -// // if smaller than best min dist found update pair with dist and intervals -// if (ivl_dist_l < min_dist.first) { -// min_dist.first = ivl_dist_l ; -// min_dist.second.clear() ; -// min_dist.second.push_back(interval) ; -// } else if (ivl_dist_l == min_dist.first) { -// // if same dist append intervals -// min_dist.second.push_back(interval) ; -// } -// } else if (start >= interval.stop) { -// // find distance on right -// int ivl_dist_r = start - interval.stop ; -// // if smaller than best min dist found update pair with dist and intervals -// if (ivl_dist_r < min_dist.first) { -// min_dist.first = ivl_dist_r ; -// min_dist.second.clear() ; -// min_dist.second.push_back(interval) ; -// } else if (ivl_dist_r == min_dist.first) { -// // if same dist append interval -// min_dist.second.push_back(interval) ; -// } -// } -// } -// } else if (!intervals.empty() && (stop <= intervals.front().start)) { -// for (typename ivl_vector_t::const_iterator i = intervals.begin(); i != intervals.end(); ++i) { -// const interval& interval = *i; -// if (interval.start > intervals.front().start) { -// continue ; -// } else { -// // find distance on left -// int ivl_dist_l = interval.start - stop ; -// // if smaller than best min dist found update pair with dist and intervals -// if (ivl_dist_l <= min_dist.first) { -// min_dist.first = ivl_dist_l; -// min_dist.second.clear() ; -// min_dist.second.push_back(interval) ; -// } else if (ivl_dist_l == min_dist.first) { -// // if same dist append intervals -// min_dist.second.push_back(interval) ; -// } -// } -// } -// } -// -// -// if (tree.left && start <= tree.center) { -// findClosest(*tree.left, start, stop, closest, min_dist); -// } -// -// if (tree.right && stop >= tree.center) { -// findClosest(*tree.right, start, stop,closest, min_dist); -// } -// -// // Finally report all of the non-overlapping closest intervals, only if at a left_node -// if (!(tree.right && tree.left)) { -// closest.insert(closest.end(), min_dist.second.begin(), min_dist.second.end()) ; -// } -// } - void closest_grouped(const ivl_vector_t& vx, ivl_vector_t& vy, std::vector& indices_x, std::vector& indices_y, std::vector& overlap_sizes, std::vector& distance_sizes) { @@ -169,8 +93,12 @@ DataFrame closest_impl(ValrGroupedDataFrame x, ValrGroupedDataFrame y, out.add_df(subset_y, suffix_y, true) ; // overlaps and distances - out.add_vec(".overlap", wrap(overlap_sizes)) ; - out.add_vec(".dist", wrap(distance_sizes)) ; + out.names.push_back(".overlap") ; + out.data.push_back(overlap_sizes) ; + + out.names.push_back(".dist") ; + out.data.push_back(distance_sizes) ; + auto nrows = subset_x.nrows() ; auto res = out.format_df(nrows) ; diff --git a/src/coverage.cpp b/src/coverage.cpp index 16c49a79..be81d604 100644 --- a/src/coverage.cpp +++ b/src/coverage.cpp @@ -160,10 +160,17 @@ DataFrame coverage_impl(ValrGroupedDataFrame x, ValrGroupedDataFrame y, out.add_df(subset_x, false) ; // additional columns - out.add_vec(".ints", wrap(overlap_counts)) ; - out.add_vec(".cov", wrap(ivls_bases_covered)) ; - out.add_vec(".len", wrap(x_ivl_lengths)) ; - out.add_vec(".frac", wrap(fractions_covered)) ; + out.names.push_back(".ints") ; + out.data.push_back(overlap_counts) ; + + out.names.push_back(".cov") ; + out.data.push_back(ivls_bases_covered) ; + + out.names.push_back(".len") ; + out.data.push_back(x_ivl_lengths) ; + + out.names.push_back(".frac") ; + out.data.push_back(fractions_covered) ; auto nrows = subset_x.nrows() ; auto res = out.format_df(nrows) ; diff --git a/src/dist.cpp b/src/dist.cpp index 76b879ea..394f8dc0 100644 --- a/src/dist.cpp +++ b/src/dist.cpp @@ -115,7 +115,8 @@ DataFrame dist_impl(ValrGroupedDataFrame x, ValrGroupedDataFrame y, // distances std::string distname = distcalc == "absdist" ? ".absdist" : ".reldist" ; - out.add_vec(distname, wrap(distances)) ; + out.names.push_back(distname); + out.data.push_back(distances); auto nrows = subset_x.nrows() ; auto res = out.format_df(nrows) ; diff --git a/src/intersect.cpp b/src/intersect.cpp index 6c080ce7..00c9a390 100644 --- a/src/intersect.cpp +++ b/src/intersect.cpp @@ -120,7 +120,8 @@ DataFrame intersect_impl(ValrGroupedDataFrame x, ValrGroupedDataFrame y, out.add_df(subset_y, suffix_y, true) ; // overlaps - out.add_vec(".overlap", wrap(overlap_sizes)) ; + out.names.push_back(".overlap"); + out.data.push_back(overlap_sizes); auto nrows = subset_x.nrows() ; auto res = out.format_df(nrows) ; diff --git a/src/merge.cpp b/src/merge.cpp index 6a1d0d92..1a2c27fa 100644 --- a/src/merge.cpp +++ b/src/merge.cpp @@ -133,8 +133,11 @@ DataFrame clusterMergedIntervals(const ValrGroupedDataFrame& gdf, out.add_df(df, false) ; // ids and overlaps - out.add_vec(".id_merge", wrap(ids)) ; - out.add_vec(".overlap_merge", wrap(overlaps)) ; + out.names.push_back(".id_merge"); + out.data.push_back(ids); + + out.names.push_back(".overlap_merge"); + out.data.push_back(overlaps); auto nrows = df.nrows() ; auto res = out.format_df(nrows) ; From a5f80ca2a5f50289e93f9df2ca6882abfee41b6d Mon Sep 17 00:00:00 2001 From: Kent Riemondy Date: Wed, 5 Oct 2022 11:34:11 -0600 Subject: [PATCH 6/6] move into copyright statement --- inst/include/IntervalTree_ext.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/inst/include/IntervalTree_ext.h b/inst/include/IntervalTree_ext.h index 49b9b06d..691b0ed1 100644 --- a/inst/include/IntervalTree_ext.h +++ b/inst/include/IntervalTree_ext.h @@ -6,18 +6,18 @@ // // This software may be modified and distributed under the terms // of the MIT license. See the LICENSE file for details. +// +// intervalOverlap and IntervalStartSorter +// from https://github.com/ekg/intervaltree +// circa commit 8fc4be9 Dec 13, 2015 +// see inst/include/intervalTree.h for copyright +// #ifndef valr__intervalTree_ext_H #define valr__intervalTree_ext_H #include "valr.h" -/* intervalOverlap and IntervalStartSorter - * from https://github.com/ekg/intervaltree - * circa commit 8fc4be9 Dec 13, 2015 - * see inst/include/intervalTree.h for copyright - */ - template K intervalOverlap(const Interval& a, const Interval& b) { return std::min(a.stop, b.stop) - std::max(a.start, b.start) ; @@ -31,8 +31,6 @@ class IntervalStartSorter { } }; -/* valr code below */ - template class IntervalSorterDesc { public: