Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Don't mark the region tracker page as allocated
Browse files Browse the repository at this point in the history
When writing the allocator state table, it's better not to include the
region tracker page as one of the allocated pages. Instead, we should
allocate the region tracker page during quick-repair after loading the
rest of the allocator state, exactly like we do for full repair.

This avoids an unlikely corruption that could happen for databases
larger than 4 TB, if we crash on shutdown after writing out a new
region tracker page number but before clearing the recovery_required
bit. It also means that in the future, when we drop support for the old
allocator state format, we'll be able to get rid of the region tracker
page entirely instead of having to keep around some of the allocation
code for compatibility.
mconst committed Nov 19, 2024
1 parent e4f0ce6 commit ffb8f6d
Showing 1 changed file with 46 additions and 37 deletions.
83 changes: 46 additions & 37 deletions src/tree_store/page_store/page_manager.rs
Original file line number Diff line number Diff line change
@@ -361,26 +361,7 @@ impl TransactionalMemory {
}

pub(crate) fn end_repair(&self) -> Result<()> {
let mut state = self.state.lock().unwrap();
let tracker_len = state.allocators.region_tracker.to_vec().len();
let tracker_page = state.header.region_tracker();

let allocator = state.get_region_mut(tracker_page.region);
// Allocate a new tracker page, if the old one was overwritten or is too small
if allocator.is_allocated(tracker_page.page_index, tracker_page.page_order)
|| tracker_page.page_size_bytes(self.page_size) < tracker_len as u64
{
drop(state);
let new_tracker_page = self.allocate(tracker_len)?.get_page_number();

let mut state = self.state.lock().unwrap();
state.header.set_region_tracker(new_tracker_page);
self.write_header(&state.header)?;
self.storage.flush(false)?;
} else {
allocator.record_alloc(tracker_page.page_index, tracker_page.page_order);
drop(state);
}
self.allocate_region_tracker_page()?;

let mut state = self.state.lock().unwrap();
let tracker_page = state.header.region_tracker();
@@ -436,10 +417,17 @@ impl TransactionalMemory {
num_regions: u32,
) -> Result<bool> {
// Has the number of regions changed since reserve_allocator_state() was called?
if num_regions != self.state.lock().unwrap().header.layout().num_regions() {
let state = self.state.lock().unwrap();
if num_regions != state.header.layout().num_regions() {
return Ok(false);
}

// Temporarily free the region tracker page, because we don't want to include it in our
// recorded allocations
let tracker_page = state.header.region_tracker();
drop(state);
self.free(tracker_page);

for i in 0..num_regions {
let region_bytes =
&self.state.lock().unwrap().allocators.region_allocators[i as usize].to_vec();
@@ -458,6 +446,9 @@ impl TransactionalMemory {
&region_tracker_bytes.as_ref(),
)?;

// Restore the region tracker page
self.mark_page_allocated(tracker_page);

Ok(true)
}

@@ -509,10 +500,8 @@ impl TransactionalMemory {
state.allocators.resize_to(layout);
drop(state);

// Allocate a larger region tracker page if necessary. This also happens automatically on
// shutdown, but we do it here because we want our allocator state to exactly match the
// result of running a full repair
self.ensure_region_tracker_page()?;
// Allocate a page for the region tracker
self.allocate_region_tracker_page()?;

self.state.lock().unwrap().header.recovery_required = false;
self.needs_recovery.store(false, Ordering::Release);
@@ -527,27 +516,47 @@ impl TransactionalMemory {
allocator.is_allocated(page.page_index, page.page_order)
}

// Make sure we have a large enough region-tracker page. This uses allocate_non_transactional(),
// so it should only be called from outside a transaction
fn ensure_region_tracker_page(&self) -> Result {
let state = self.state.lock().unwrap();
// Allocate a page for the region tracker. If possible, this will pick the same page that
// was used last time; otherwise it'll pick a new page and update the database header to
// match. This uses allocate_non_transactional(), so it should only be called from outside
// a transaction
fn allocate_region_tracker_page(&self) -> Result {
let mut state = self.state.lock().unwrap();
let tracker_len = state.allocators.region_tracker.to_vec().len();
let mut tracker_page = state.header.region_tracker();
drop(state);
let tracker_page = state.header.region_tracker();

if tracker_page.page_size_bytes(self.page_size) < (tracker_len as u64) {
// Allocate a larger tracker page
self.free(tracker_page);
tracker_page = self
let allocator = state.get_region_mut(tracker_page.region);
// Pick a new tracker page, if the old one was overwritten or is too small
if allocator.is_allocated(tracker_page.page_index, tracker_page.page_order)
|| tracker_page.page_size_bytes(self.page_size) < tracker_len as u64
{
drop(state);

let new_tracker_page = self
.allocate_non_transactional(tracker_len)?
.get_page_number();

let mut state = self.state.lock().unwrap();
state.header.set_region_tracker(tracker_page);
state.header.set_region_tracker(new_tracker_page);
self.write_header(&state.header)?;
self.storage.flush(false)?;
} else {
// The old page is available, so just mark it as allocated
allocator.record_alloc(tracker_page.page_index, tracker_page.page_order);
drop(state);
}

Ok(())
}

// Grow the region tracker page if necessary. This uses allocate_non_transactional(), so it
// should only be called from outside a transaction
fn reallocate_region_tracker_page(&self) -> Result {
let tracker_page = self.state.lock().unwrap().header.region_tracker();
self.free(tracker_page);
self.allocate_region_tracker_page()
}

// Relocates the region tracker to a lower page, if possible
// Returns true if the page was moved
pub(crate) fn relocate_region_tracker(&self) -> Result<bool> {
@@ -1166,7 +1175,7 @@ impl Drop for TransactionalMemory {
}

// Allocate a larger region-tracker page if necessary
if self.ensure_region_tracker_page().is_err() {
if self.reallocate_region_tracker_page().is_err() {
#[cfg(feature = "logging")]
warn!("Failure while flushing allocator state. Repair required at restart.");
return;

0 comments on commit ffb8f6d

Please sign in to comment.