Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfixes & improve RTree delete performance #397

Merged
merged 2 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions spatial/include/spatial/core/geometry/bbox.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ struct Box {
Box(const V &min_p, const V &max_p) : min(min_p), max(max_p) {
}

bool IsUnbounded() const {
return *this == Box();
}

// Only does a 2D intersection check
bool Intersects(const Box &other) const {
return !(min.x > other.max.x || max.x < other.min.x || min.y > other.max.y || max.y < other.min.y);
Expand Down
3 changes: 3 additions & 0 deletions spatial/include/spatial/core/index/rtree/rtree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ struct RTree {
RTreePointer MakePage(RTreeNodeType type) const;
static RTreePointer MakeRowId(row_t row_id);

string ToString() const;
void Print() const;

private:
void Free(RTreePointer &pointer);

Expand Down
44 changes: 22 additions & 22 deletions spatial/src/spatial/core/geometry/geometry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,14 @@ void SinglePartGeometry::SetVertexType(Geometry &geom, ArenaAllocator &alloc, bo
MakeMutable(geom, alloc);
}

auto used_to_have_z = geom.properties.HasZ();
auto used_to_have_m = geom.properties.HasM();
auto old_vertex_size = geom.properties.VertexSize();
const auto used_to_have_z = geom.properties.HasZ();
const auto used_to_have_m = geom.properties.HasM();
const auto old_vertex_size = geom.properties.VertexSize();

geom.properties.SetZ(has_z);
geom.properties.SetM(has_m);

auto new_vertex_size = geom.properties.VertexSize();
const auto new_vertex_size = geom.properties.VertexSize();
// Case 1: The new vertex size is larger than the old vertex size
if (new_vertex_size > old_vertex_size) {
geom.data_ptr = alloc.ReallocateAligned(geom.data_ptr, geom.data_count * old_vertex_size,
Expand All @@ -97,24 +97,24 @@ void SinglePartGeometry::SetVertexType(Geometry &geom, ArenaAllocator &alloc, bo
// 1. We go from XYM to XYZM
// This is special, because we need to slide the M value to the end of each vertex
for (int64_t i = geom.data_count - 1; i >= 0; i--) {
auto old_offset = i * old_vertex_size;
auto new_offset = i * new_vertex_size;
auto old_m_offset = old_offset + sizeof(double) * 2;
auto new_z_offset = new_offset + sizeof(double) * 2;
auto new_m_offset = new_offset + sizeof(double) * 3;
const auto old_offset = i * old_vertex_size;
const auto new_offset = i * new_vertex_size;
const auto old_m_offset = old_offset + sizeof(double) * 2;
const auto new_z_offset = new_offset + sizeof(double) * 2;
const auto new_m_offset = new_offset + sizeof(double) * 3;
// Move the M value
memcpy(geom.data_ptr + new_m_offset, geom.data_ptr + old_m_offset, sizeof(double));
// Set the new Z value
memcpy(geom.data_ptr + new_z_offset, &default_z, sizeof(double));
// Move the X and Y values
memcpy(geom.data_ptr + new_offset, geom.data_ptr + old_offset, sizeof(double) * 2);
memmove(geom.data_ptr + new_offset, geom.data_ptr + old_offset, sizeof(double) * 2);
}
} else if (!used_to_have_z && has_z && !used_to_have_m && has_m) {
// 2. We go from XY to XYZM
// This is special, because we need to add both the default Z and M values to the end of each vertex
for (int64_t i = geom.data_count - 1; i >= 0; i--) {
auto old_offset = i * old_vertex_size;
auto new_offset = i * new_vertex_size;
const auto old_offset = i * old_vertex_size;
const auto new_offset = i * new_vertex_size;
memcpy(geom.data_ptr + new_offset, geom.data_ptr + old_offset, sizeof(double) * 2);
memcpy(geom.data_ptr + new_offset + sizeof(double) * 2, &default_z, sizeof(double));
memcpy(geom.data_ptr + new_offset + sizeof(double) * 3, &default_m, sizeof(double));
Expand All @@ -125,10 +125,10 @@ void SinglePartGeometry::SetVertexType(Geometry &geom, ArenaAllocator &alloc, bo
// 4. We go from XY to XYM
// 5. We go from XYZ to XYZM
// These are all really the same, we just add the default to the end
auto default_value = has_m ? default_m : default_z;
const auto default_value = has_m ? default_m : default_z;
for (int64_t i = geom.data_count - 1; i >= 0; i--) {
auto old_offset = i * old_vertex_size;
auto new_offset = i * new_vertex_size;
const auto old_offset = i * old_vertex_size;
const auto new_offset = i * new_vertex_size;
memmove(geom.data_ptr + new_offset, geom.data_ptr + old_offset, old_vertex_size);
memcpy(geom.data_ptr + new_offset + old_vertex_size, &default_value, sizeof(double));
}
Expand All @@ -138,9 +138,9 @@ void SinglePartGeometry::SetVertexType(Geometry &geom, ArenaAllocator &alloc, bo
else if (new_vertex_size == old_vertex_size) {
// This only happens when we go from XYZ -> XYM or XYM -> XYZ
// In this case we just need to set the default on the third dimension
auto default_value = has_m ? default_m : default_z;
const auto default_value = has_m ? default_m : default_z;
for (uint32_t i = 0; i < geom.data_count; i++) {
auto offset = i * new_vertex_size + sizeof(double) * 2;
const auto offset = i * new_vertex_size + sizeof(double) * 2;
memcpy(geom.data_ptr + offset, &default_value, sizeof(double));
}
}
Expand All @@ -153,17 +153,17 @@ void SinglePartGeometry::SetVertexType(Geometry &geom, ArenaAllocator &alloc, bo
// Special case: If we go from XYZM to XYM, we need to slide the M value to the end of each vertex
if (used_to_have_z && used_to_have_m && !has_z && has_m) {
for (uint32_t i = 0; i < geom.data_count; i++) {
auto old_offset = i * old_vertex_size;
auto new_offset = i * new_vertex_size;
const auto old_offset = i * old_vertex_size;
const auto new_offset = i * new_vertex_size;
memcpy(new_data + new_offset, geom.data_ptr + old_offset, sizeof(double) * 2);
auto m_offset = old_offset + sizeof(double) * 3;
const auto m_offset = old_offset + sizeof(double) * 3;
memcpy(new_data + new_offset + sizeof(double) * 2, geom.data_ptr + m_offset, sizeof(double));
}
} else {
// Otherwise, we just copy the data over
for (uint32_t i = 0; i < geom.data_count; i++) {
auto old_offset = i * old_vertex_size;
auto new_offset = i * new_vertex_size;
const auto old_offset = i * old_vertex_size;
const auto new_offset = i * new_vertex_size;
memcpy(new_data + new_offset, geom.data_ptr + old_offset, new_vertex_size);
}
}
Expand Down
90 changes: 84 additions & 6 deletions spatial/src/spatial/core/index/rtree/rtree.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "spatial/core/index/rtree/rtree.hpp"
#include "duckdb/common/printer.hpp"

namespace spatial {

Expand Down Expand Up @@ -415,6 +416,9 @@ void RTree::RootInsert(RTreeEntry &root_entry, const RTreeEntry &new_entry) {
// Delete
//------------------------------------------------------------------------------
DeleteResult RTree::NodeDelete(RTreeEntry &entry, const RTreeEntry &target, vector<RTreeEntry> &orphans) {
if(!entry.bounds.Intersects(target.bounds)) {
return {false, false, false};
}
const auto is_leaf = entry.pointer.IsLeafPage();
return is_leaf ? LeafDelete(entry, target, orphans) : BranchDelete(entry, target, orphans);
}
Expand Down Expand Up @@ -461,8 +465,8 @@ DeleteResult RTree::BranchDelete(RTreeEntry &entry, const RTreeEntry &target, ve
entry.bounds = node.GetBounds();
shrunk = entry.bounds != old_bounds;

// Assert that the bounds shrunk
D_ASSERT(entry.bounds.Area() <= old_bounds.Area());
// If the min capacity is zero, the bounds can grow when a node becomes empty
D_ASSERT(entry.bounds.IsUnbounded() || entry.bounds.Area() <= old_bounds.Area());
}
return {true, shrunk, false};
}
Expand All @@ -475,7 +479,8 @@ DeleteResult RTree::BranchDelete(RTreeEntry &entry, const RTreeEntry &target, ve
entry.bounds = node.GetBounds();
shrunk = entry.bounds != old_bounds;

D_ASSERT(entry.bounds.Area() <= old_bounds.Area());
// If the min capacity is zero, the bounds can grow when a node becomes empty
D_ASSERT(entry.bounds.IsUnbounded() || (entry.bounds.Area() <= old_bounds.Area()));
}

return {true, shrunk, false};
Expand All @@ -488,15 +493,21 @@ DeleteResult RTree::LeafDelete(RTreeEntry &entry, const RTreeEntry &target, vect

// Do a binary search with std::lower_bound to find the matching rowid
// This is faster than a linear search
auto it = std::lower_bound(node.begin(), node.end(), target.pointer.GetRowId(),
const auto it = std::lower_bound(node.begin(), node.end(), target.pointer.GetRowId(),
[](const RTreeEntry &item, const row_t &row) { return item.pointer.GetRowId() < row; });
if (it == node.end()) {
// Not found in this leaf
return {false, false, false};
}

auto &child = *it;
auto child_idx = it - node.begin();
const auto &child = *it;

// Ok, did the binary search actually find the rowid?
if(child.pointer.GetRowId() != target.pointer.GetRowId()) {
return {false, false, false};
}

const auto child_idx = it - node.begin();

D_ASSERT(child.pointer.IsRowId());

Expand Down Expand Up @@ -581,6 +592,73 @@ void RTree::RootDelete(RTreeEntry &root, const RTreeEntry &target) {
}
}

// Print as ascii tree
string RTree::ToString() const {
string result;

struct PrintState {
RTreePointer pointer;
idx_t entry_idx;
explicit PrintState(const RTreePointer &pointer_p) : pointer(pointer_p), entry_idx(0) {
}
};

vector<PrintState> stack;
idx_t level = 0;

stack.emplace_back(root.pointer);

while(!stack.empty()) {
auto &frame = stack.back();
const auto &node = Ref(frame.pointer);
const auto count = node.GetCount();

if(frame.pointer.IsLeafPage()) {
while(frame.entry_idx < count) {
auto &entry = node[frame.entry_idx];
// TODO: Print entry
for(idx_t i = 0; i < level; i++) {
result += " ";
}

result += "Leaf: " + std::to_string(entry.pointer.GetRowId()) + "\n";

frame.entry_idx++;
}
stack.pop_back();
level--;
}
else {
D_ASSERT(frame.pointer.IsBranchPage());
if(frame.entry_idx < count) {
auto &entry = node[frame.entry_idx];

// TODO: Print entry
for(idx_t i = 0; i < level; i++) {
result += " ";
}

result += "Branch: " + std::to_string(frame.entry_idx) + "\n";

frame.entry_idx++;
level++;
stack.emplace_back(entry.pointer);
} else {
stack.pop_back();
level--;
}
}
}


return result;
}

void RTree::Print() const {
Printer::Print(ToString());
}


} // namespace core

} // namespace spatial
2 changes: 1 addition & 1 deletion spatial/src/spatial/core/io/shapefile/read_shapefile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ static void ConvertStringAttributeLoop(Vector &result, int record_start, idx_t c
auto string_bytes = DBFReadStringAttribute(dbf_handle, record_idx, field_idx);
string_t result_str;
if (attribute_encoding == AttributeEncoding::LATIN1) {
conversion_buffer.reserve(strlen(string_bytes) * 2 + 1); // worst case (all non-ascii chars)
conversion_buffer.resize(strlen(string_bytes) * 2 + 1); // worst case (all non-ascii chars)
auto out_len =
EncodingUtil::LatinToUTF8Buffer(const_data_ptr_cast(string_bytes), conversion_buffer.data());
result_str = StringVector::AddString(result, const_char_ptr_cast(conversion_buffer.data()), out_len);
Expand Down
Loading