Skip to content

Commit

Permalink
Merge pull request duckdb#10897 from Mytherin/checkpointfatal
Browse files Browse the repository at this point in the history
Storage: Fix an internal exception that could be triggered when deleting many rows and checkpointing repeatedly
  • Loading branch information
Mytherin authored Feb 29, 2024
2 parents 2aadb0b + faef0f8 commit b0f4d75
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 54 deletions.
7 changes: 5 additions & 2 deletions src/storage/table/row_group_collection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -778,11 +778,14 @@ class VacuumTask : public BaseCheckpointTask {
scan_state.Initialize(column_ids);
scan_state.table_state.Initialize(types);
scan_state.table_state.max_row = idx_t(-1);
idx_t next_idx = segment_idx + merge_count;
for (idx_t c_idx = segment_idx; c_idx < next_idx; c_idx++) {
idx_t merged_groups = 0;
idx_t total_row_groups = vacuum_state.row_group_counts.size();
for (idx_t c_idx = segment_idx; merged_groups < merge_count && c_idx < total_row_groups; c_idx++) {
if (vacuum_state.row_group_counts[c_idx] == 0) {
continue;
}
merged_groups++;

auto &current_row_group = *checkpoint_state.segments[c_idx].node;

current_row_group.InitializeScan(scan_state.table_state);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# name: test/sql/storage/buffer_manager/appending_table_exceeding_limit.test_slow
# description: Test appending and checkpointing a table that exceeds buffer manager size
# group: [buffer_manager]

# load the DB from disk
load __TEST_DIR__/test_table_exceeding_limit.db

statement ok
SET force_compression='uncompressed'

statement ok
SET memory_limit = '10MB'

statement ok
SET threads=1

statement ok
CREATE TABLE test (a INTEGER, b INTEGER);

statement ok
INSERT INTO test VALUES (1, 10), (2, 20), (3, 30), (NULL, NULL)

loop i 0 23

statement ok
INSERT INTO test SELECT * FROM test

endloop

query IIII
SELECT COUNT(*), COUNT(a), SUM(a), SUM(b) FROM test
----
33554432 25165824 50331648 503316480

loop i 0 2

restart

statement ok
SET force_compression='uncompressed'

statement ok
SET memory_limit = '10MB'

statement ok
SET threads=1

query IIII
SELECT COUNT(*), COUNT(a), SUM(a), SUM(b) FROM test
----
33554432 25165824 50331648 503316480

endloop
52 changes: 0 additions & 52 deletions test/sql/storage/test_buffer_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,58 +72,6 @@ TEST_CASE("Test storing a big string that exceeds buffer manager size", "[storag
DeleteDatabase(storage_database);
}

TEST_CASE("Test appending and checkpointing a table that exceeds buffer manager size", "[storage][.]") {
duckdb::unique_ptr<MaterializedQueryResult> result;
auto storage_database = TestCreatePath("storage_test");
auto config = GetTestConfig();

// maximum memory is 10MB
config->options.force_compression = CompressionType::COMPRESSION_UNCOMPRESSED;
config->options.maximum_memory = 10000000;
config->options.maximum_threads = 1;

// create a table of size 10 times the buffer pool size
uint64_t size = 0, size_a, sum_a, sum_b;
uint64_t table_size = 100000000 / sizeof(int32_t);
// make sure the database does not exist
DeleteDatabase(storage_database);
{
// create a database and insert the big string
DuckDB db(storage_database, config.get());
Connection con(db);
REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b INTEGER);"));
REQUIRE_NO_FAIL(con.Query("INSERT INTO test VALUES (1, 10), (2, 20), (3, 30), (NULL, NULL)"));
size_a = 3;
sum_a = 1 + 2 + 3;
sum_b = 10 + 20 + 30;
for (size = 4; size < table_size; size *= 2) {
REQUIRE_NO_FAIL(con.Query("INSERT INTO test SELECT * FROM test"));
size_a *= 2;
sum_a *= 2;
sum_b *= 2;
}

// check the aggregate statistics of the table
result = con.Query("SELECT COUNT(*), COUNT(a), SUM(a), SUM(b) FROM test");
REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(size)}));
REQUIRE(CHECK_COLUMN(result, 1, {Value::BIGINT(size_a)}));
REQUIRE(CHECK_COLUMN(result, 2, {Value::BIGINT(sum_a)}));
REQUIRE(CHECK_COLUMN(result, 3, {Value::BIGINT(sum_b)}));
}
for (idx_t i = 0; i < 2; i++) {
// reload the table and checkpoint, still with a 10MB limit
DuckDB db(storage_database, config.get());
Connection con(db);

result = con.Query("SELECT COUNT(*), COUNT(a), SUM(a), SUM(b) FROM test");
REQUIRE(CHECK_COLUMN(result, 0, {Value::BIGINT(size)}));
REQUIRE(CHECK_COLUMN(result, 1, {Value::BIGINT(size_a)}));
REQUIRE(CHECK_COLUMN(result, 2, {Value::BIGINT(sum_a)}));
REQUIRE(CHECK_COLUMN(result, 3, {Value::BIGINT(sum_b)}));
}
DeleteDatabase(storage_database);
}

TEST_CASE("Modifying the buffer manager limit at runtime for an in-memory database", "[storage][.]") {
duckdb::unique_ptr<MaterializedQueryResult> result;

Expand Down
65 changes: 65 additions & 0 deletions test/sql/storage/vacuum/repeated_deletes_and_checkpoints.test_slow
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# name: test/sql/storage/vacuum/repeated_deletes_and_checkpoints.test_slow
# description: Test running repeated deletes and checkpoints
# group: [vacuum]

load __TEST_DIR__/repeated_deletes_and_checkpoints.db

statement ok
CREATE TABLE test (pk INT);

statement ok
INSERT INTO test SELECT * FROM generate_series(0, 1000000);

statement ok
CHECKPOINT;

restart

query I
DELETE FROM test WHERE pk > 738645 AND pk < 978908;
----
240262

query II
SELECT COUNT(*), SUM(pk) FROM test;
----
759739 293669140557

restart

query I
DELETE FROM test WHERE pk > 282475 AND pk < 522738;
----
240262

query II
SELECT COUNT(*), SUM(pk) FROM test;
----
519477 196938097654

restart

query I
INSERT INTO test SELECT * FROM generate_series(1201414, 1201514);
----
101

query II
SELECT COUNT(*), SUM(pk) FROM test;
----
519578 197059445518

restart

query II
SELECT COUNT(*), SUM(pk) FROM test;
----
519578 197059445518

statement ok
CHECKPOINT;

query II
SELECT COUNT(*), SUM(pk) FROM test;
----
519578 197059445518

0 comments on commit b0f4d75

Please sign in to comment.