From 6963fe325b4fd7ba5a849247a5f154c265b5293c Mon Sep 17 00:00:00 2001 From: Mark Raasveldt Date: Wed, 28 Feb 2024 16:25:30 +0100 Subject: [PATCH 1/6] Fix an issue where we weren't computing the amount of committed rows correctly in ::Checkpoint causing an internal invariant to be invalidated and an internal exception to be thrown --- .../duckdb/storage/table/chunk_info.hpp | 9 ++- .../storage/table/row_version_manager.hpp | 2 +- src/storage/table/chunk_info.cpp | 18 +++-- src/storage/table/row_group.cpp | 6 +- src/storage/table/row_version_manager.cpp | 5 +- ...repeated_deletes_and_checkpoints.test_slow | 65 +++++++++++++++++++ 6 files changed, 93 insertions(+), 12 deletions(-) create mode 100644 test/sql/storage/vacuum/repeated_deletes_and_checkpoints.test_slow diff --git a/src/include/duckdb/storage/table/chunk_info.hpp b/src/include/duckdb/storage/table/chunk_info.hpp index 14ed981e3707..853a1887148e 100644 --- a/src/include/duckdb/storage/table/chunk_info.hpp +++ b/src/include/duckdb/storage/table/chunk_info.hpp @@ -44,7 +44,8 @@ class ChunkInfo { //! Returns whether or not a single row in the ChunkInfo should be used or not for the given transaction virtual bool Fetch(TransactionData transaction, row_t row) = 0; virtual void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) = 0; - virtual idx_t GetCommittedDeletedCount(idx_t max_count) = 0; + virtual idx_t GetCommittedDeletedCount(transaction_t min_start_id, transaction_t min_transaction_id, + idx_t max_count) = 0; virtual bool HasDeletes() const = 0; @@ -85,7 +86,8 @@ class ChunkConstantInfo : public ChunkInfo { SelectionVector &sel_vector, idx_t max_count) override; bool Fetch(TransactionData transaction, row_t row) override; void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) override; - idx_t GetCommittedDeletedCount(idx_t max_count) override; + idx_t GetCommittedDeletedCount(transaction_t min_start_id, transaction_t min_transaction_id, + idx_t max_count) override; bool HasDeletes() const override; @@ -122,7 +124,8 @@ class ChunkVectorInfo : public ChunkInfo { SelectionVector &sel_vector, idx_t max_count) override; bool Fetch(TransactionData transaction, row_t row) override; void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) override; - idx_t GetCommittedDeletedCount(idx_t max_count) override; + idx_t GetCommittedDeletedCount(transaction_t min_start_id, transaction_t min_transaction_id, + idx_t max_count) override; void Append(idx_t start, idx_t end, transaction_t commit_id); diff --git a/src/include/duckdb/storage/table/row_version_manager.hpp b/src/include/duckdb/storage/table/row_version_manager.hpp index 0763513b767d..12757a059297 100644 --- a/src/include/duckdb/storage/table/row_version_manager.hpp +++ b/src/include/duckdb/storage/table/row_version_manager.hpp @@ -26,7 +26,7 @@ class RowVersionManager { return start; } void SetStart(idx_t start); - idx_t GetCommittedDeletedCount(idx_t count); + idx_t GetCommittedDeletedCount(transaction_t min_start_id, transaction_t min_transaction_id, idx_t count); idx_t GetSelVector(TransactionData transaction, idx_t vector_idx, SelectionVector &sel_vector, idx_t max_count); idx_t GetCommittedSelVector(transaction_t start_time, transaction_t transaction_id, idx_t vector_idx, diff --git a/src/storage/table/chunk_info.cpp b/src/storage/table/chunk_info.cpp index d220e0809022..464e2eb4d2e6 100644 --- a/src/storage/table/chunk_info.cpp +++ b/src/storage/table/chunk_info.cpp @@ -90,8 +90,12 @@ bool ChunkConstantInfo::HasDeletes() const { return is_deleted; } -idx_t ChunkConstantInfo::GetCommittedDeletedCount(idx_t max_count) { - return delete_id < TRANSACTION_ID_START ? max_count : 0; +idx_t ChunkConstantInfo::GetCommittedDeletedCount(transaction_t min_start_id, transaction_t min_transaction_id, + idx_t max_count) { + if (CommittedVersionOperator::UseDeletedVersion(min_start_id, min_transaction_id, delete_id)) { + return max_count; + } + return 0; } void ChunkConstantInfo::Write(WriteStream &writer) const { @@ -166,7 +170,10 @@ idx_t ChunkVectorInfo::GetSelVector(transaction_t start_time, transaction_t tran idx_t ChunkVectorInfo::GetCommittedSelVector(transaction_t min_start_id, transaction_t min_transaction_id, SelectionVector &sel_vector, idx_t max_count) { - return TemplatedGetSelVector(min_start_id, min_transaction_id, sel_vector, max_count); + auto count = + TemplatedGetSelVector(min_start_id, min_transaction_id, sel_vector, max_count); + D_ASSERT(count == (max_count - GetCommittedDeletedCount(min_start_id, min_transaction_id, max_count))); + return count; } idx_t ChunkVectorInfo::GetSelVector(TransactionData transaction, SelectionVector &sel_vector, idx_t max_count) { @@ -229,13 +236,14 @@ bool ChunkVectorInfo::HasDeletes() const { return any_deleted; } -idx_t ChunkVectorInfo::GetCommittedDeletedCount(idx_t max_count) { +idx_t ChunkVectorInfo::GetCommittedDeletedCount(transaction_t min_start_id, transaction_t min_transaction_id, + idx_t max_count) { if (!any_deleted) { return 0; } idx_t delete_count = 0; for (idx_t i = 0; i < max_count; i++) { - if (deleted[i] < TRANSACTION_ID_START) { + if (!CommittedVersionOperator::UseDeletedVersion(min_start_id, min_transaction_id, deleted[i])) { delete_count++; } } diff --git a/src/storage/table/row_group.cpp b/src/storage/table/row_group.cpp index 77d7c20f7c42..9231a51c878b 100644 --- a/src/storage/table/row_group.cpp +++ b/src/storage/table/row_group.cpp @@ -813,7 +813,11 @@ idx_t RowGroup::GetCommittedRowCount() { if (!vinfo) { return count; } - return count - vinfo->GetCommittedDeletedCount(count); + auto &transaction_manager = DuckTransactionManager::Get(GetCollection().GetAttached()); + + auto lowest_active_start = transaction_manager.LowestActiveStart(); + auto lowest_active_id = transaction_manager.LowestActiveId(); + return count - vinfo->GetCommittedDeletedCount(lowest_active_start, lowest_active_id, count); } bool RowGroup::HasUnloadedDeletes() const { diff --git a/src/storage/table/row_version_manager.cpp b/src/storage/table/row_version_manager.cpp index e1ffecb4b8ff..5c35d03885aa 100644 --- a/src/storage/table/row_version_manager.cpp +++ b/src/storage/table/row_version_manager.cpp @@ -22,7 +22,8 @@ void RowVersionManager::SetStart(idx_t new_start) { } } -idx_t RowVersionManager::GetCommittedDeletedCount(idx_t count) { +idx_t RowVersionManager::GetCommittedDeletedCount(transaction_t min_start_id, transaction_t min_transaction_id, + idx_t count) { lock_guard l(version_lock); idx_t deleted_count = 0; for (idx_t r = 0, i = 0; r < count; r += STANDARD_VECTOR_SIZE, i++) { @@ -33,7 +34,7 @@ idx_t RowVersionManager::GetCommittedDeletedCount(idx_t count) { if (max_count == 0) { break; } - deleted_count += vector_info[i]->GetCommittedDeletedCount(max_count); + deleted_count += vector_info[i]->GetCommittedDeletedCount(min_start_id, min_transaction_id, max_count); } return deleted_count; } diff --git a/test/sql/storage/vacuum/repeated_deletes_and_checkpoints.test_slow b/test/sql/storage/vacuum/repeated_deletes_and_checkpoints.test_slow new file mode 100644 index 000000000000..e603c0d86c7f --- /dev/null +++ b/test/sql/storage/vacuum/repeated_deletes_and_checkpoints.test_slow @@ -0,0 +1,65 @@ +# name: test/sql/storage/vacuum/repeated_deletes_and_checkpoints.test_slow +# description: Test running repeated deletes and checkpoints +# group: [vacuum] + +load __TEST_DIR__/repeated_deletes_and_checkpoints.db + +statement ok +CREATE TABLE test (pk INT); + +statement ok +INSERT INTO test SELECT * FROM generate_series(0, 1000000); + +statement ok +CHECKPOINT; + +restart + +query I +DELETE FROM test WHERE pk > 738645 AND pk < 978908; +---- +240262 + +query I +SELECT COUNT(*) FROM test; +---- +759739 + +restart + +query I +DELETE FROM test WHERE pk > 282475 AND pk < 522738; +---- +240262 + +query I +SELECT COUNT(*) FROM test; +---- +519477 + +restart + +query I +INSERT INTO test SELECT * FROM generate_series(1201414, 1201514); +---- +101 + +query I +SELECT COUNT(*) FROM test; +---- +519577 + +restart + +query I +SELECT COUNT(*) FROM test; +---- +519577 + +statement ok +CHECKPOINT; + +query I +SELECT COUNT(*) FROM test; +---- +519577 From 6f60893ec6466301dbcf401a0de224f6e7a60949 Mon Sep 17 00:00:00 2001 From: Mark Raasveldt Date: Wed, 28 Feb 2024 17:58:49 +0100 Subject: [PATCH 2/6] Correctly return number of deletes rows, and convert this test to sqllogictest --- src/storage/table/chunk_info.cpp | 4 +- .../appending_table_exceeding_limit.test_slow | 53 +++++++++++++++++++ test/sql/storage/test_buffer_manager.cpp | 52 ------------------ 3 files changed, 55 insertions(+), 54 deletions(-) create mode 100644 test/sql/storage/buffer_manager/appending_table_exceeding_limit.test_slow diff --git a/src/storage/table/chunk_info.cpp b/src/storage/table/chunk_info.cpp index 464e2eb4d2e6..7e87a52ff85f 100644 --- a/src/storage/table/chunk_info.cpp +++ b/src/storage/table/chunk_info.cpp @@ -93,9 +93,9 @@ bool ChunkConstantInfo::HasDeletes() const { idx_t ChunkConstantInfo::GetCommittedDeletedCount(transaction_t min_start_id, transaction_t min_transaction_id, idx_t max_count) { if (CommittedVersionOperator::UseDeletedVersion(min_start_id, min_transaction_id, delete_id)) { - return max_count; + return 0; } - return 0; + return max_count; } void ChunkConstantInfo::Write(WriteStream &writer) const { diff --git a/test/sql/storage/buffer_manager/appending_table_exceeding_limit.test_slow b/test/sql/storage/buffer_manager/appending_table_exceeding_limit.test_slow new file mode 100644 index 000000000000..b551d1d70109 --- /dev/null +++ b/test/sql/storage/buffer_manager/appending_table_exceeding_limit.test_slow @@ -0,0 +1,53 @@ +# name: test/sql/storage/buffer_manager/appending_table_exceeding_limit.test_slow +# description: Test appending and checkpointing a table that exceeds buffer manager size +# group: [buffer_manager] + +# load the DB from disk +load __TEST_DIR__/test_table_exceeding_limit.db + +statement ok +SET force_compression='uncompressed' + +statement ok +SET memory_limit = '10MB' + +statement ok +SET threads=1 + +statement ok +CREATE TABLE test (a INTEGER, b INTEGER); + +statement ok +INSERT INTO test VALUES (1, 10), (2, 20), (3, 30), (NULL, NULL) + +loop i 0 23 + +statement ok +INSERT INTO test SELECT * FROM test + +endloop + +query IIII +SELECT COUNT(*), COUNT(a), SUM(a), SUM(b) FROM test +---- +33554432 25165824 50331648 503316480 + +loop i 0 2 + +restart + +statement ok +SET force_compression='uncompressed' + +statement ok +SET memory_limit = '10MB' + +statement ok +SET threads=1 + +query IIII +SELECT COUNT(*), COUNT(a), SUM(a), SUM(b) FROM test +---- +33554432 25165824 50331648 503316480 + +endloop diff --git a/test/sql/storage/test_buffer_manager.cpp b/test/sql/storage/test_buffer_manager.cpp index 4809638c8d03..e730fe892b92 100644 --- a/test/sql/storage/test_buffer_manager.cpp +++ b/test/sql/storage/test_buffer_manager.cpp @@ -72,58 +72,6 @@ TEST_CASE("Test storing a big string that exceeds buffer manager size", "[storag DeleteDatabase(storage_database); } -TEST_CASE("Test appending and checkpointing a table that exceeds buffer manager size", "[storage][.]") { - duckdb::unique_ptr result; - auto storage_database = TestCreatePath("storage_test"); - auto config = GetTestConfig(); - - // maximum memory is 10MB - config->options.force_compression = CompressionType::COMPRESSION_UNCOMPRESSED; - config->options.maximum_memory = 10000000; - config->options.maximum_threads = 1; - - // create a table of size 10 times the buffer pool size - uint64_t size = 0, size_a, sum_a, sum_b; - uint64_t table_size = 100000000 / sizeof(int32_t); - // make sure the database does not exist - DeleteDatabase(storage_database); - { - // create a database and insert the big string - DuckDB db(storage_database, config.get()); - Connection con(db); - REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b INTEGER);")); - REQUIRE_NO_FAIL(con.Query("INSERT INTO test VALUES (1, 10), (2, 20), (3, 30), (NULL, NULL)")); - size_a = 3; - sum_a = 1 + 2 + 3; - sum_b = 10 + 20 + 30; - for (size = 4; size < table_size; size *= 2) { - REQUIRE_NO_FAIL(con.Query("INSERT INTO test SELECT * FROM test")); - size_a *= 2; - sum_a *= 2; - sum_b *= 2; - } - - // check the aggregate statistics of the table - result = con.Query("SELECT COUNT(*), COUNT(a), SUM(a), SUM(b) FROM test"); - REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(size)})); - REQUIRE(CHECK_COLUMN(result, 1, {Value::BIGINT(size_a)})); - REQUIRE(CHECK_COLUMN(result, 2, {Value::BIGINT(sum_a)})); - REQUIRE(CHECK_COLUMN(result, 3, {Value::BIGINT(sum_b)})); - } - for (idx_t i = 0; i < 2; i++) { - // reload the table and checkpoint, still with a 10MB limit - DuckDB db(storage_database, config.get()); - Connection con(db); - - result = con.Query("SELECT COUNT(*), COUNT(a), SUM(a), SUM(b) FROM test"); - REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(size)})); - REQUIRE(CHECK_COLUMN(result, 1, {Value::BIGINT(size_a)})); - REQUIRE(CHECK_COLUMN(result, 2, {Value::BIGINT(sum_a)})); - REQUIRE(CHECK_COLUMN(result, 3, {Value::BIGINT(sum_b)})); - } - DeleteDatabase(storage_database); -} - TEST_CASE("Modifying the buffer manager limit at runtime for an in-memory database", "[storage][.]") { duckdb::unique_ptr result; From 40f9af39302ee667cde8f65c033071ca0e847de6 Mon Sep 17 00:00:00 2001 From: Mark Raasveldt Date: Wed, 28 Feb 2024 22:15:28 +0100 Subject: [PATCH 3/6] Avoid skipping the write of a row group during delete vacuum when we have empty interleaved row groups --- .../duckdb/storage/table/chunk_info.hpp | 9 ++---- .../duckdb/storage/table/column_segment.hpp | 4 +-- src/storage/table/chunk_info.cpp | 18 ++++------- src/storage/table/row_group.cpp | 6 +--- src/storage/table/row_group_collection.cpp | 5 ++-- src/storage/table/row_version_manager.cpp | 5 ++-- ...repeated_deletes_and_checkpoints.test_slow | 30 +++++++++---------- 7 files changed, 31 insertions(+), 46 deletions(-) diff --git a/src/include/duckdb/storage/table/chunk_info.hpp b/src/include/duckdb/storage/table/chunk_info.hpp index 853a1887148e..14ed981e3707 100644 --- a/src/include/duckdb/storage/table/chunk_info.hpp +++ b/src/include/duckdb/storage/table/chunk_info.hpp @@ -44,8 +44,7 @@ class ChunkInfo { //! Returns whether or not a single row in the ChunkInfo should be used or not for the given transaction virtual bool Fetch(TransactionData transaction, row_t row) = 0; virtual void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) = 0; - virtual idx_t GetCommittedDeletedCount(transaction_t min_start_id, transaction_t min_transaction_id, - idx_t max_count) = 0; + virtual idx_t GetCommittedDeletedCount(idx_t max_count) = 0; virtual bool HasDeletes() const = 0; @@ -86,8 +85,7 @@ class ChunkConstantInfo : public ChunkInfo { SelectionVector &sel_vector, idx_t max_count) override; bool Fetch(TransactionData transaction, row_t row) override; void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) override; - idx_t GetCommittedDeletedCount(transaction_t min_start_id, transaction_t min_transaction_id, - idx_t max_count) override; + idx_t GetCommittedDeletedCount(idx_t max_count) override; bool HasDeletes() const override; @@ -124,8 +122,7 @@ class ChunkVectorInfo : public ChunkInfo { SelectionVector &sel_vector, idx_t max_count) override; bool Fetch(TransactionData transaction, row_t row) override; void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) override; - idx_t GetCommittedDeletedCount(transaction_t min_start_id, transaction_t min_transaction_id, - idx_t max_count) override; + idx_t GetCommittedDeletedCount(idx_t max_count) override; void Append(idx_t start, idx_t end, transaction_t commit_id); diff --git a/src/include/duckdb/storage/table/column_segment.hpp b/src/include/duckdb/storage/table/column_segment.hpp index 6ae03d4f3aa9..fa11971603a2 100644 --- a/src/include/duckdb/storage/table/column_segment.hpp +++ b/src/include/duckdb/storage/table/column_segment.hpp @@ -68,8 +68,8 @@ class ColumnSegment : public SegmentBase { //! Fetch a value of the specific row id and append it to the result void FetchRow(ColumnFetchState &state, row_t row_id, Vector &result, idx_t result_idx); - static idx_t FilterSelection(SelectionVector &sel, Vector &result, const TableFilter &filter, - idx_t &approved_tuple_count, ValidityMask &mask); + static idx_t FilterSelection(SelectionVector &sel, Vector &vector, UnifiedVectorFormat &vdata, + const TableFilter &filter, idx_t scan_count, idx_t &approved_tuple_count); //! Skip a scan forward to the row_index specified in the scan state void Skip(ColumnScanState &state); diff --git a/src/storage/table/chunk_info.cpp b/src/storage/table/chunk_info.cpp index 7e87a52ff85f..d220e0809022 100644 --- a/src/storage/table/chunk_info.cpp +++ b/src/storage/table/chunk_info.cpp @@ -90,12 +90,8 @@ bool ChunkConstantInfo::HasDeletes() const { return is_deleted; } -idx_t ChunkConstantInfo::GetCommittedDeletedCount(transaction_t min_start_id, transaction_t min_transaction_id, - idx_t max_count) { - if (CommittedVersionOperator::UseDeletedVersion(min_start_id, min_transaction_id, delete_id)) { - return 0; - } - return max_count; +idx_t ChunkConstantInfo::GetCommittedDeletedCount(idx_t max_count) { + return delete_id < TRANSACTION_ID_START ? max_count : 0; } void ChunkConstantInfo::Write(WriteStream &writer) const { @@ -170,10 +166,7 @@ idx_t ChunkVectorInfo::GetSelVector(transaction_t start_time, transaction_t tran idx_t ChunkVectorInfo::GetCommittedSelVector(transaction_t min_start_id, transaction_t min_transaction_id, SelectionVector &sel_vector, idx_t max_count) { - auto count = - TemplatedGetSelVector(min_start_id, min_transaction_id, sel_vector, max_count); - D_ASSERT(count == (max_count - GetCommittedDeletedCount(min_start_id, min_transaction_id, max_count))); - return count; + return TemplatedGetSelVector(min_start_id, min_transaction_id, sel_vector, max_count); } idx_t ChunkVectorInfo::GetSelVector(TransactionData transaction, SelectionVector &sel_vector, idx_t max_count) { @@ -236,14 +229,13 @@ bool ChunkVectorInfo::HasDeletes() const { return any_deleted; } -idx_t ChunkVectorInfo::GetCommittedDeletedCount(transaction_t min_start_id, transaction_t min_transaction_id, - idx_t max_count) { +idx_t ChunkVectorInfo::GetCommittedDeletedCount(idx_t max_count) { if (!any_deleted) { return 0; } idx_t delete_count = 0; for (idx_t i = 0; i < max_count; i++) { - if (!CommittedVersionOperator::UseDeletedVersion(min_start_id, min_transaction_id, deleted[i])) { + if (deleted[i] < TRANSACTION_ID_START) { delete_count++; } } diff --git a/src/storage/table/row_group.cpp b/src/storage/table/row_group.cpp index 9231a51c878b..77d7c20f7c42 100644 --- a/src/storage/table/row_group.cpp +++ b/src/storage/table/row_group.cpp @@ -813,11 +813,7 @@ idx_t RowGroup::GetCommittedRowCount() { if (!vinfo) { return count; } - auto &transaction_manager = DuckTransactionManager::Get(GetCollection().GetAttached()); - - auto lowest_active_start = transaction_manager.LowestActiveStart(); - auto lowest_active_id = transaction_manager.LowestActiveId(); - return count - vinfo->GetCommittedDeletedCount(lowest_active_start, lowest_active_id, count); + return count - vinfo->GetCommittedDeletedCount(count); } bool RowGroup::HasUnloadedDeletes() const { diff --git a/src/storage/table/row_group_collection.cpp b/src/storage/table/row_group_collection.cpp index cae3c373b119..d2a16934980a 100644 --- a/src/storage/table/row_group_collection.cpp +++ b/src/storage/table/row_group_collection.cpp @@ -778,11 +778,12 @@ class VacuumTask : public BaseCheckpointTask { scan_state.Initialize(column_ids); scan_state.table_state.Initialize(types); scan_state.table_state.max_row = idx_t(-1); - idx_t next_idx = segment_idx + merge_count; - for (idx_t c_idx = segment_idx; c_idx < next_idx; c_idx++) { + idx_t merged_row_groups = 0; + for (idx_t c_idx = segment_idx; merged_row_groups < merge_count && c_idx < vacuum_state.row_group_counts.size(); c_idx++) { if (vacuum_state.row_group_counts[c_idx] == 0) { continue; } + auto ¤t_row_group = *checkpoint_state.segments[c_idx].node; current_row_group.InitializeScan(scan_state.table_state); diff --git a/src/storage/table/row_version_manager.cpp b/src/storage/table/row_version_manager.cpp index 5c35d03885aa..e1ffecb4b8ff 100644 --- a/src/storage/table/row_version_manager.cpp +++ b/src/storage/table/row_version_manager.cpp @@ -22,8 +22,7 @@ void RowVersionManager::SetStart(idx_t new_start) { } } -idx_t RowVersionManager::GetCommittedDeletedCount(transaction_t min_start_id, transaction_t min_transaction_id, - idx_t count) { +idx_t RowVersionManager::GetCommittedDeletedCount(idx_t count) { lock_guard l(version_lock); idx_t deleted_count = 0; for (idx_t r = 0, i = 0; r < count; r += STANDARD_VECTOR_SIZE, i++) { @@ -34,7 +33,7 @@ idx_t RowVersionManager::GetCommittedDeletedCount(transaction_t min_start_id, tr if (max_count == 0) { break; } - deleted_count += vector_info[i]->GetCommittedDeletedCount(min_start_id, min_transaction_id, max_count); + deleted_count += vector_info[i]->GetCommittedDeletedCount(max_count); } return deleted_count; } diff --git a/test/sql/storage/vacuum/repeated_deletes_and_checkpoints.test_slow b/test/sql/storage/vacuum/repeated_deletes_and_checkpoints.test_slow index e603c0d86c7f..a4baa528ef1f 100644 --- a/test/sql/storage/vacuum/repeated_deletes_and_checkpoints.test_slow +++ b/test/sql/storage/vacuum/repeated_deletes_and_checkpoints.test_slow @@ -20,10 +20,10 @@ DELETE FROM test WHERE pk > 738645 AND pk < 978908; ---- 240262 -query I -SELECT COUNT(*) FROM test; +query II +SELECT COUNT(*), SUM(pk) FROM test; ---- -759739 +759739 293669140557 restart @@ -32,10 +32,10 @@ DELETE FROM test WHERE pk > 282475 AND pk < 522738; ---- 240262 -query I -SELECT COUNT(*) FROM test; +query II +SELECT COUNT(*), SUM(pk) FROM test; ---- -519477 +519477 196938097654 restart @@ -44,22 +44,22 @@ INSERT INTO test SELECT * FROM generate_series(1201414, 1201514); ---- 101 -query I -SELECT COUNT(*) FROM test; +query II +SELECT COUNT(*), SUM(pk) FROM test; ---- -519577 +519578 197059445518 restart -query I -SELECT COUNT(*) FROM test; +query II +SELECT COUNT(*), SUM(pk) FROM test; ---- -519577 +519578 197059445518 statement ok CHECKPOINT; -query I -SELECT COUNT(*) FROM test; +query II +SELECT COUNT(*), SUM(pk) FROM test; ---- -519577 +519578 197059445518 From 726ca21afe455157165e0aae7f152380aec39532 Mon Sep 17 00:00:00 2001 From: Mark Raasveldt Date: Wed, 28 Feb 2024 22:16:47 +0100 Subject: [PATCH 4/6] Add missing file --- src/include/duckdb/storage/table/row_version_manager.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/duckdb/storage/table/row_version_manager.hpp b/src/include/duckdb/storage/table/row_version_manager.hpp index 12757a059297..0763513b767d 100644 --- a/src/include/duckdb/storage/table/row_version_manager.hpp +++ b/src/include/duckdb/storage/table/row_version_manager.hpp @@ -26,7 +26,7 @@ class RowVersionManager { return start; } void SetStart(idx_t start); - idx_t GetCommittedDeletedCount(transaction_t min_start_id, transaction_t min_transaction_id, idx_t count); + idx_t GetCommittedDeletedCount(idx_t count); idx_t GetSelVector(TransactionData transaction, idx_t vector_idx, SelectionVector &sel_vector, idx_t max_count); idx_t GetCommittedSelVector(transaction_t start_time, transaction_t transaction_id, idx_t vector_idx, From 5eae178d16f3dfc3dbed52bb6e917a90575a4b44 Mon Sep 17 00:00:00 2001 From: Mark Raasveldt Date: Wed, 28 Feb 2024 22:17:24 +0100 Subject: [PATCH 5/6] Format fix --- src/storage/table/row_group_collection.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/storage/table/row_group_collection.cpp b/src/storage/table/row_group_collection.cpp index d2a16934980a..5c93a1bd2207 100644 --- a/src/storage/table/row_group_collection.cpp +++ b/src/storage/table/row_group_collection.cpp @@ -779,7 +779,8 @@ class VacuumTask : public BaseCheckpointTask { scan_state.table_state.Initialize(types); scan_state.table_state.max_row = idx_t(-1); idx_t merged_row_groups = 0; - for (idx_t c_idx = segment_idx; merged_row_groups < merge_count && c_idx < vacuum_state.row_group_counts.size(); c_idx++) { + for (idx_t c_idx = segment_idx; merged_row_groups < merge_count && c_idx < vacuum_state.row_group_counts.size(); + c_idx++) { if (vacuum_state.row_group_counts[c_idx] == 0) { continue; } From faef0f8e0db0ebdf75990f050181ecdd0554dcfe Mon Sep 17 00:00:00 2001 From: Mark Raasveldt Date: Wed, 28 Feb 2024 22:18:46 +0100 Subject: [PATCH 6/6] Actual actual fix --- src/storage/table/row_group_collection.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/storage/table/row_group_collection.cpp b/src/storage/table/row_group_collection.cpp index 5c93a1bd2207..91c843ed72c4 100644 --- a/src/storage/table/row_group_collection.cpp +++ b/src/storage/table/row_group_collection.cpp @@ -778,12 +778,13 @@ class VacuumTask : public BaseCheckpointTask { scan_state.Initialize(column_ids); scan_state.table_state.Initialize(types); scan_state.table_state.max_row = idx_t(-1); - idx_t merged_row_groups = 0; - for (idx_t c_idx = segment_idx; merged_row_groups < merge_count && c_idx < vacuum_state.row_group_counts.size(); - c_idx++) { + idx_t merged_groups = 0; + idx_t total_row_groups = vacuum_state.row_group_counts.size(); + for (idx_t c_idx = segment_idx; merged_groups < merge_count && c_idx < total_row_groups; c_idx++) { if (vacuum_state.row_group_counts[c_idx] == 0) { continue; } + merged_groups++; auto ¤t_row_group = *checkpoint_state.segments[c_idx].node;