diff --git a/Cargo.lock b/Cargo.lock
index e2bb4476..47e65990 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -641,13 +641,10 @@ dependencies = [
 
 [[package]]
 name = "imago"
-version = "0.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "301cd8e9a7de4545ed7387d5d563f4cc5adbdc44d8e658ffbb548bccb5bfe194"
+version = "0.1.3"
 dependencies = [
  "async-trait",
  "bincode",
- "futures",
  "libc",
  "miniz_oxide",
  "rustc_version",
@@ -655,7 +652,6 @@ dependencies = [
  "tokio",
  "tracing",
  "vm-memory",
- "windows-sys 0.59.0",
 ]
 
 [[package]]
diff --git a/src/devices/Cargo.toml b/src/devices/Cargo.toml
index f703da64..7e516346 100644
--- a/src/devices/Cargo.toml
+++ b/src/devices/Cargo.toml
@@ -34,7 +34,7 @@ utils = { path = "../utils" }
 polly = { path = "../polly" }
 rutabaga_gfx = { path = "../rutabaga_gfx", features = ["virgl_renderer", "virgl_renderer_next"], optional = true }
 
-imago = { version = "0.1.2", features = ["sync-wrappers", "vm-memory"] }
+imago = { path = "../imago", features = ["sync-wrappers", "vm-memory"] }
 
 [target.'cfg(target_os = "macos")'.dependencies]
 hvf = { path = "../hvf" }
diff --git a/src/imago/.cargo_vcs_info.json b/src/imago/.cargo_vcs_info.json
new file mode 100644
index 00000000..4ff846d6
--- /dev/null
+++ b/src/imago/.cargo_vcs_info.json
@@ -0,0 +1,6 @@
+{
+  "git": {
+    "sha1": "6d4fbca7dd85c4d740261c91f0350d3403cc6ee5"
+  },
+  "path_in_vcs": ""
+}
\ No newline at end of file
diff --git a/src/imago/.gitignore b/src/imago/.gitignore
new file mode 100644
index 00000000..ea8c4bf7
--- /dev/null
+++ b/src/imago/.gitignore
@@ -0,0 +1 @@
+/target
diff --git a/src/imago/Cargo.toml b/src/imago/Cargo.toml
new file mode 100644
index 00000000..75b041d8
--- /dev/null
+++ b/src/imago/Cargo.toml
@@ -0,0 +1,71 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2021"
+name = "imago"
+version = "0.1.3"
+build = "build.rs"
+autolib = false
+autobins = false
+autoexamples = false
+autotests = false
+autobenches = false
+description = "A library for accessing virtual machine disk images."
+readme = "README.md"
+license = "MIT"
+repository = "https://gitlab.com/hreitz/imago"
+
+[package.metadata.docs.rs]
+all-features = true
+
+[features]
+default = []
+sync-wrappers = []
+vm-memory = ["dep:vm-memory"]
+
+[lib]
+name = "imago"
+path = "src/lib.rs"
+
+[dependencies.async-trait]
+version = "0.1"
+
+[dependencies.bincode]
+version = "1.3"
+
+[dependencies.miniz_oxide]
+version = "0.8"
+features = ["std"]
+
+[dependencies.serde]
+version = "1.0"
+features = ["derive"]
+
+[dependencies.tokio]
+version = "1"
+features = [
+    "rt",
+    "sync",
+]
+
+[dependencies.tracing]
+version = "0.1"
+
+[dependencies.vm-memory]
+version = "0.16"
+optional = true
+
+[build-dependencies.rustc_version]
+version = "0.4.0"
+
+[target."cfg(unix)".dependencies.libc]
+version = "0.2"
diff --git a/src/imago/LICENSE b/src/imago/LICENSE
new file mode 100644
index 00000000..c8f51b0b
--- /dev/null
+++ b/src/imago/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2024 imago contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/src/imago/README.md b/src/imago/README.md
new file mode 100644
index 00000000..2849f443
--- /dev/null
+++ b/src/imago/README.md
@@ -0,0 +1,76 @@
+# Imago
+
+Provides access to VM image formats.
+
+Simple example (requires the `sync-wrappers` feature):
+```rust
+use imago::file::File;
+use imago::qcow2::Qcow2;
+use imago::SyncFormatAccess;
+use std::fs::OpenOptions;
+
+// Produce read-only qcow2 instance using purely `File` for storage
+let mut qcow2 = Qcow2::<File>::open_path_sync("image.qcow2", false)?;
+qcow2.open_implicit_dependencies_sync()?;
+
+let qcow2 = SyncFormatAccess::new(qcow2)?;
+
+let mut buf = vec![0u8; 512];
+qcow2.read(&mut buf, 0)?;
+```
+
+Another example, using the native async interface instead of sync wrapper functions, explicitly
+overriding the implicit references contained in qcow2 files, and showcasing using different
+types of storage (specifically normal files and null storage):
+```rust
+use imago::file::File;
+use imago::null::Null;
+use imago::qcow2::Qcow2;
+use imago::raw::Raw;
+use imago::{DynStorage, FormatAccess, Storage, StorageOpenOptions};
+use std::sync::Arc;
+
+let qcow2_file_opts = StorageOpenOptions::new()
+    .write(true)
+    .filename(String::from("image.qcow2"));
+let qcow2_file = File::open(qcow2_file_opts).await?;
+
+// Produce qcow2 instance with arbitrary (and potentially mixed) storage instances
+let mut qcow2 =
+    Qcow2::<Box<dyn DynStorage>, Arc<FormatAccess<_>>>::open_image(Box::new(qcow2_file), true)
+        .await?;
+
+let backing_storage: Box<dyn DynStorage> = Box::new(Null::new(0));
+let backing = Raw::open_image(backing_storage, false).await?;
+let backing = Arc::new(FormatAccess::new(backing));
+qcow2.set_backing(Some(Arc::clone(&backing)));
+
+// Open potentially remaining dependencies (like an external data file)
+qcow2.open_implicit_dependencies().await?;
+
+let qcow2 = FormatAccess::new(qcow2);
+
+let mut buf = vec![0u8; 512];
+qcow2.read(&mut buf, 0).await?;
+
+qcow2.flush().await?;
+```
+
+# Flushing
+
+Given that `AsyncDrop` is not stable yet (and probably will not be stable for a long time),
+callers must ensure that images are properly flushed before dropping them, i.e. call
+`.flush().await` on any image that is not read-only.
+
+(The synchronous wrapper `SyncFormatAccess` does perform a synchronous flush in its `Drop`
+implementation.)
+
+# Features
+
+- `sync-wrappers`: Provide synchronous wrappers for the native `async` interface.  Note that
+  these build a `tokio` runtime in which they run the `async` functions, so using the `async`
+  interface is definitely preferred.
+
+- `vm-memory`: Provide conversion functions `IoVector::from_volatile_slice` and
+  `IoVectorMut::from_volatile_slice` to convert the vm-memory crate’s `[VolatileSlice]` arrays into
+  imago’s native I/O vectors.
diff --git a/src/imago/build.rs b/src/imago/build.rs
new file mode 100644
index 00000000..667d1dc9
--- /dev/null
+++ b/src/imago/build.rs
@@ -0,0 +1,9 @@
+use rustc_version::{version_meta, Channel};
+
+fn main() {
+    println!("cargo:rustc-check-cfg=cfg(nightly)");
+
+    if version_meta().unwrap().channel == Channel::Nightly {
+        println!("cargo:rustc-cfg=nightly");
+    }
+}
diff --git a/src/imago/rustfmt.toml b/src/imago/rustfmt.toml
new file mode 100644
index 00000000..48b16b35
--- /dev/null
+++ b/src/imago/rustfmt.toml
@@ -0,0 +1,3 @@
+edition = "2021"
+format_code_in_doc_comments = true
+imports_granularity = "Module"
diff --git a/src/imago/src/annotated.rs b/src/imago/src/annotated.rs
new file mode 100644
index 00000000..f9d016ff
--- /dev/null
+++ b/src/imago/src/annotated.rs
@@ -0,0 +1,172 @@
+//! Annotating wrapper around storage objects.
+//!
+//! Wraps other storage objects, adding an arbitrary tag to them.
+//!
+//! This may be useful when using the “mapping” interface, to identify the storage objects returned
+//! in raw mappings.
+//!
+//! Example:
+//! ```
+//! # use imago::{FormatAccess, Mapping};
+//! # use imago::annotated::Annotated;
+//! # use imago::null::Null;
+//! # use imago::raw::Raw;
+//! # tokio::runtime::Builder::new_current_thread()
+//! #   .build()
+//! #   .unwrap()
+//! #   .block_on(async move {
+//! #
+//! const TEST_TAG: u32 = 42;
+//!
+//! let disk_size = 16 << 30;
+//! let test_offset = 1 << 30;
+//!
+//! let inner_storage = Null::new(disk_size);
+//! let annotated_storage = Annotated::new(inner_storage, TEST_TAG);
+//! let image = Raw::open_image(annotated_storage, false).await?;
+//! let image = FormatAccess::new(image);
+//!
+//! let mapping = image.get_mapping(test_offset, 1).await?.0;
+//! let Mapping::Raw {
+//!     storage,
+//!     offset,
+//!     writable,
+//! } = mapping
+//! else {
+//!     panic!("Raw mapping expected");
+//! };
+//! assert_eq!(*storage.tag(), TEST_TAG);
+//! assert_eq!(offset, test_offset);
+//! #
+//! # Ok::<(), std::io::Error>(())
+//! # }).unwrap()
+//! ```
+
+use crate::io_buffers::{IoVector, IoVectorMut};
+use crate::storage::drivers::CommonStorageHelper;
+use crate::{Storage, StorageOpenOptions};
+use std::fmt::{self, Debug, Display, Formatter};
+use std::io;
+use std::ops::{Deref, DerefMut};
+use std::path::{Path, PathBuf};
+
+/// Annotating wrapper around storage objects.
+///
+/// Wraps other storage objects, adding an arbitrary tag to them.
+// TODO: Remove the `Default` requirement.  We want to implement `Storage::open()` if `Default` is
+// implemented, though, but return an error if it is not.  Doing that probably requires
+// specialization, though.
+#[derive(Debug)]
+pub struct Annotated<Tag: Debug + Default + Display + Send + Sync, S: Storage> {
+    /// Wrapped storage object.
+    inner: S,
+
+    /// Tag.
+    tag: Tag,
+}
+
+impl<T: Debug + Default + Display + Send + Sync, S: Storage> Annotated<T, S> {
+    /// Wrap `storage`, adding the tag `tag`.
+    pub fn new(storage: S, tag: T) -> Self {
+        Annotated {
+            inner: storage,
+            tag,
+        }
+    }
+
+    /// Get the tag.
+    pub fn tag(&self) -> &T {
+        &self.tag
+    }
+
+    /// Allow modifying or changing the tag.
+    pub fn tag_mut(&mut self) -> &mut T {
+        &mut self.tag
+    }
+}
+
+impl<T: Debug + Default + Display + Send + Sync, S: Storage> From<S> for Annotated<T, S> {
+    fn from(storage: S) -> Self {
+        Self::new(storage, T::default())
+    }
+}
+
+impl<T: Debug + Default + Display + Send + Sync, S: Storage> Storage for Annotated<T, S> {
+    async fn open(opts: StorageOpenOptions) -> io::Result<Self> {
+        Ok(S::open(opts).await?.into())
+    }
+
+    #[cfg(feature = "sync-wrappers")]
+    fn open_sync(opts: StorageOpenOptions) -> io::Result<Self> {
+        Ok(S::open_sync(opts)?.into())
+    }
+
+    fn mem_align(&self) -> usize {
+        self.inner.mem_align()
+    }
+
+    fn req_align(&self) -> usize {
+        self.inner.req_align()
+    }
+
+    fn size(&self) -> io::Result<u64> {
+        self.inner.size()
+    }
+
+    fn resolve_relative_path<P: AsRef<Path>>(&self, relative: P) -> io::Result<PathBuf> {
+        self.inner.resolve_relative_path(relative)
+    }
+
+    async unsafe fn pure_readv(&self, bufv: IoVectorMut<'_>, offset: u64) -> io::Result<()> {
+        // Caller guarantees safety
+        unsafe { self.inner.pure_readv(bufv, offset) }.await
+    }
+
+    async unsafe fn pure_writev(&self, bufv: IoVector<'_>, offset: u64) -> io::Result<()> {
+        // Caller guarantees safety
+        unsafe { self.inner.pure_writev(bufv, offset) }.await
+    }
+
+    async unsafe fn pure_write_zeroes(&self, offset: u64, length: u64) -> io::Result<()> {
+        // Caller guarantees safety
+        unsafe { self.inner.pure_write_zeroes(offset, length) }.await
+    }
+
+    async unsafe fn pure_discard(&self, offset: u64, length: u64) -> io::Result<()> {
+        // Caller guarantees safety
+        unsafe { self.inner.pure_discard(offset, length) }.await
+    }
+
+    async fn flush(&self) -> io::Result<()> {
+        self.inner.flush().await
+    }
+
+    async fn sync(&self) -> io::Result<()> {
+        self.inner.sync().await
+    }
+
+    fn get_storage_helper(&self) -> &CommonStorageHelper {
+        // Share storage helper from inner (to e.g. get same request serialization)
+        self.inner.get_storage_helper()
+    }
+}
+
+impl<T: Debug + Default + Display + Send + Sync, S: Storage> Deref for Annotated<T, S> {
+    type Target = S;
+
+    fn deref(&self) -> &S {
+        &self.inner
+    }
+}
+
+impl<T: Debug + Default + Display + Send + Sync, S: Storage> DerefMut for Annotated<T, S> {
+    fn deref_mut(&mut self) -> &mut S {
+        &mut self.inner
+    }
+}
+
+impl<T: Debug + Default + Display + Send + Sync, S: Storage> Display for Annotated<T, S> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        write!(f, "annotated({})[{}]", self.tag, self.inner)
+    }
+}
diff --git a/src/imago/src/async_lru_cache.rs b/src/imago/src/async_lru_cache.rs
new file mode 100644
index 00000000..72c66438
--- /dev/null
+++ b/src/imago/src/async_lru_cache.rs
@@ -0,0 +1,429 @@
+//! Provides a least-recently-used cache with async access.
+//!
+//! To operate, this cache is bound to an I/O back-end object that provides the loading and
+//! flushing of cache entries.
+//!
+//! Also supports inter-cache dependency, e.g. for when the qcow2 L2 table cache needs to be
+//! flushed before the refblock cache, because some clusters were freed (so the L2 references need
+//! to be cleared before the clusters are deallocated).
+
+#![allow(dead_code)]
+
+use crate::vector_select::FutureVector;
+use async_trait::async_trait;
+use std::collections::HashMap;
+use std::fmt::Debug;
+use std::hash::Hash;
+use std::io;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::Arc;
+use tokio::sync::{Mutex, MutexGuard, RwLock, RwLockWriteGuard};
+use tracing::{error, span, trace, Level};
+
+/// Cache entry structure, wrapping the cached object.
+pub(crate) struct AsyncLruCacheEntry<V> {
+    /// Cached object.
+    ///
+    /// Always set during operation, only cleared when trying to unwrap the `Arc` on eviction.
+    value: Option<Arc<V>>,
+
+    /// When this entry was last accessed.
+    last_used: AtomicUsize,
+}
+
+/// Least-recently-used cache with async access.
+struct AsyncLruCacheInner<
+    Key: Clone + Copy + Debug + PartialEq + Eq + Hash + Send + Sync,
+    Value: Send + Sync,
+    IoBackend: AsyncLruCacheBackend<Key = Key, Value = Value>,
+> {
+    /// I/O back-end that performs loading and flushing of cache entries.
+    backend: IoBackend,
+
+    /// Cache entries.
+    map: RwLock<HashMap<Key, AsyncLruCacheEntry<Value>>>,
+
+    /// Flush dependencies (flush these first).
+    flush_before: Mutex<Vec<Arc<dyn FlushableCache>>>,
+
+    /// Monotonically increasing counter to generate “timestamps”.
+    lru_timer: AtomicUsize,
+
+    /// Upper limit of how many entries to cache.
+    limit: usize,
+}
+
+/// Least-recently-used cache with async access.
+///
+/// Keeps the least recently used entries up to a limited count.  Accessing and flushing is
+/// async-aware.
+///
+/// `K` is the key used to uniquely identify cache entries, `V` is the cached data.
+pub(crate) struct AsyncLruCache<
+    K: Clone + Copy + Debug + PartialEq + Eq + Hash + Send + Sync,
+    V: Send + Sync,
+    B: AsyncLruCacheBackend<Key = K, Value = V>,
+>(Arc<AsyncLruCacheInner<K, V, B>>);
+
+/// Internal trait used to implement inter-cache flush dependencies.
+#[async_trait(?Send)]
+trait FlushableCache: Send + Sync {
+    /// Flush the cache.
+    async fn flush(&self) -> io::Result<()>;
+
+    /// Check of circular dependencies.
+    ///
+    /// Return `true` if (and only if) `other` is already a transitive dependency of `self`.
+    async fn check_circular(&self, other: &Arc<dyn FlushableCache>) -> bool;
+}
+
+/// Provides loading and flushing for cache entries.
+pub(crate) trait AsyncLruCacheBackend: Send + Sync {
+    /// Key type.
+    type Key: Clone + Copy + Debug + PartialEq + Eq + Hash + Send + Sync;
+    /// Value (object) type.
+    type Value: Send + Sync;
+
+    /// Load the given object.
+    #[allow(async_fn_in_trait)] // No need for Send
+    async fn load(&self, key: Self::Key) -> io::Result<Self::Value>;
+
+    /// Flush the given object.
+    ///
+    /// The implementation should itself check whether the object is dirty; `flush()` is called for
+    /// all evicted cache entries, regardless of whether they actually are dirty or not.
+    #[allow(async_fn_in_trait)] // No need for Send
+    async fn flush(&self, key: Self::Key, value: Arc<Self::Value>) -> io::Result<()>;
+}
+
+impl<
+        K: Clone + Copy + Debug + PartialEq + Eq + Hash + Send + Sync,
+        V: Send + Sync,
+        B: AsyncLruCacheBackend<Key = K, Value = V>,
+    > AsyncLruCache<K, V, B>
+{
+    /// Create a new cache.
+    ///
+    /// `size` is the maximum number of entries to keep in the cache.
+    pub fn new(backend: B, size: usize) -> Self {
+        AsyncLruCache(Arc::new(AsyncLruCacheInner {
+            backend,
+            map: Default::default(),
+            flush_before: Default::default(),
+            lru_timer: AtomicUsize::new(0),
+            limit: size,
+        }))
+    }
+
+    /// Retrieve an entry from the cache.
+    ///
+    /// If there is no entry yet, run `read()` to generate it.  If then there are more entries in
+    /// the cache than its limit, flush out the oldest entry via `flush()`.
+    pub async fn get_or_insert(&self, key: K) -> io::Result<Arc<V>> {
+        self.0.get_or_insert(key).await
+    }
+
+    /// Force-insert the given object into the cache.
+    ///
+    /// If there is an existing object under that key, it is flushed first.
+    pub async fn insert(&self, key: K, value: Arc<V>) -> io::Result<()> {
+        self.0.insert(key, value).await
+    }
+
+    /// Flush all cache entries.
+    ///
+    /// Those entries are not evicted, but remain in the cache.
+    pub async fn flush(&self) -> io::Result<()> {
+        self.0.flush().await
+    }
+}
+
+impl<
+        K: Clone + Copy + Debug + PartialEq + Eq + Hash + Send + Sync + 'static,
+        V: Send + Sync + 'static,
+        B: AsyncLruCacheBackend<Key = K, Value = V> + 'static,
+    > AsyncLruCache<K, V, B>
+{
+    /// Set up a flush dependency.
+    ///
+    /// Ensure that before anything in this cache is flushed, `flush_before` is flushed first.
+    pub async fn depend_on<
+        K2: Clone + Copy + Debug + PartialEq + Eq + Hash + Send + Sync + 'static,
+        V2: Send + Sync + 'static,
+        B2: AsyncLruCacheBackend<Key = K2, Value = V2> + 'static,
+    >(
+        &self,
+        other: &AsyncLruCache<K2, V2, B2>,
+    ) -> io::Result<()> {
+        let _span = span!(
+            Level::TRACE,
+            "AsyncLruCache::depend_on",
+            self = Arc::as_ptr(&self.0) as usize,
+            other = Arc::as_ptr(&other.0) as usize
+        )
+        .entered();
+
+        let cloned: Arc<AsyncLruCacheInner<K2, V2, B2>> = Arc::clone(&other.0);
+        let cloned: Arc<dyn FlushableCache> = cloned;
+
+        loop {
+            {
+                let mut locked = self.0.flush_before.lock().await;
+                // Shouldn’t be long, so linear search seems fine
+                if locked.iter().any(|x| Arc::ptr_eq(x, &cloned)) {
+                    break;
+                }
+
+                let self_arc: Arc<AsyncLruCacheInner<K, V, B>> = Arc::clone(&self.0);
+                let self_arc: Arc<dyn FlushableCache> = self_arc;
+                if !other.0.check_circular(&self_arc).await {
+                    trace!("No circular dependency, entering new dependency");
+                    locked.push(cloned);
+                    break;
+                }
+            }
+
+            trace!("Circular dependency detected, flushing other cache first");
+
+            other.0.flush().await?;
+        }
+
+        Ok(())
+    }
+}
+
+impl<
+        K: Clone + Copy + Debug + PartialEq + Eq + Hash + Send + Sync,
+        V: Send + Sync,
+        B: AsyncLruCacheBackend<Key = K, Value = V>,
+    > AsyncLruCacheInner<K, V, B>
+{
+    /// Flush all dependencies.
+    ///
+    /// Flush all caches that must be flushed before this one.  Remove all successfully flushed
+    /// caches from our dependency list.
+    ///
+    /// Call with a guard that should be dropped only after this cache is flushed, so that no new
+    /// dependencies can enter while we are still flushing this cache.
+    async fn flush_dependencies(
+        flush_before: &mut MutexGuard<'_, Vec<Arc<dyn FlushableCache>>>,
+    ) -> io::Result<()> {
+        let _span = span!(Level::TRACE, "AsyncLruCache::flush_dependencies").entered();
+
+        while let Some(dep) = flush_before.pop() {
+            trace!("Flushing dependency {:?}", Arc::as_ptr(&dep) as *const _);
+            if let Err(err) = dep.flush().await {
+                flush_before.push(dep);
+                return Err(err);
+            }
+        }
+        Ok(())
+    }
+
+    /// Ensure there is at least one free entry in the cache.
+    ///
+    /// Do this by evicting (flushing) existing entries, if necessary.
+    async fn ensure_free_entry(
+        &self,
+        map: &mut RwLockWriteGuard<'_, HashMap<K, AsyncLruCacheEntry<V>>>,
+    ) -> io::Result<()> {
+        let _span = span!(
+            Level::TRACE,
+            "AsyncLruCache::ensure_free_entry",
+            self = &self as *const _ as usize
+        )
+        .entered();
+
+        while map.len() >= self.limit {
+            trace!("{} / {} used", map.len(), self.limit);
+
+            let now = self.lru_timer.load(Ordering::Relaxed);
+            let (evicted_object, key, last_used) = loop {
+                let oldest = map.iter().fold((0, None), |oldest, (key, entry)| {
+                    // Cannot drop entries that are in use
+                    if Arc::strong_count(entry.value()) > 1 {
+                        return oldest;
+                    }
+
+                    let age = now.wrapping_sub(entry.last_used.load(Ordering::Relaxed));
+                    if age >= oldest.0 {
+                        (age, Some(*key))
+                    } else {
+                        oldest
+                    }
+                });
+
+                let Some(oldest_key) = oldest.1 else {
+                    error!("Cannot evict entry from cache; everything is in use");
+                    return Err(io::Error::other(
+                        "Cannot evict entry from cache; everything is in use",
+                    ));
+                };
+
+                trace!(
+                    "Removing entry with key {:?}, aged {}",
+                    oldest_key,
+                    oldest.0
+                );
+
+                let mut oldest_entry = map.remove(&oldest_key).unwrap();
+                match Arc::try_unwrap(oldest_entry.value.take().unwrap()) {
+                    Ok(object) => {
+                        break (
+                            object,
+                            oldest_key,
+                            oldest_entry.last_used.load(Ordering::Relaxed),
+                        )
+                    }
+                    Err(arc) => {
+                        trace!("Entry is still in use, retrying");
+
+                        // Found a race, retry.
+                        // (`Arc::strong_count()` should return `1` in the next iteration,
+                        // filtering this entry out.)
+                        oldest_entry.value = Some(arc);
+                    }
+                }
+            };
+
+            let mut dep_guard = self.flush_before.lock().await;
+            Self::flush_dependencies(&mut dep_guard).await?;
+            let obj = Arc::new(evicted_object);
+            trace!("Flushing {key:?}");
+            if let Err(err) = self.backend.flush(key, Arc::clone(&obj)).await {
+                map.insert(
+                    key,
+                    AsyncLruCacheEntry {
+                        value: Some(obj),
+                        last_used: last_used.into(),
+                    },
+                );
+                return Err(err);
+            }
+            let _ = Arc::into_inner(obj).expect("flush() must not clone the object");
+        }
+
+        Ok(())
+    }
+
+    /// Retrieve an entry from the cache.
+    ///
+    /// If there is no entry yet, run `read()` to generate it.  If then there are more entries in
+    /// the cache than its limit, flush out the oldest entry via `flush()`.
+    async fn get_or_insert(&self, key: K) -> io::Result<Arc<V>> {
+        {
+            let map = self.map.read().await;
+            if let Some(entry) = map.get(&key) {
+                entry.last_used.store(
+                    self.lru_timer.fetch_add(1, Ordering::Relaxed),
+                    Ordering::Relaxed,
+                );
+                return Ok(Arc::clone(entry.value()));
+            }
+        }
+
+        let mut map = self.map.write().await;
+        if let Some(entry) = map.get(&key) {
+            entry.last_used.store(
+                self.lru_timer.fetch_add(1, Ordering::Relaxed),
+                Ordering::Relaxed,
+            );
+            return Ok(Arc::clone(entry.value()));
+        }
+
+        self.ensure_free_entry(&mut map).await?;
+
+        let object = Arc::new(self.backend.load(key).await?);
+
+        let new_entry = AsyncLruCacheEntry {
+            value: Some(Arc::clone(&object)),
+            last_used: AtomicUsize::new(self.lru_timer.fetch_add(1, Ordering::Relaxed)),
+        };
+        map.insert(key, new_entry);
+
+        Ok(object)
+    }
+
+    /// Force-insert the given object into the cache.
+    ///
+    /// If there is an existing object under that key, it is flushed first.
+    async fn insert(&self, key: K, value: Arc<V>) -> io::Result<()> {
+        let mut map = self.map.write().await;
+        if let Some(entry) = map.get_mut(&key) {
+            entry.last_used.store(
+                self.lru_timer.fetch_add(1, Ordering::Relaxed),
+                Ordering::Relaxed,
+            );
+            let mut dep_guard = self.flush_before.lock().await;
+            Self::flush_dependencies(&mut dep_guard).await?;
+            self.backend.flush(key, Arc::clone(entry.value())).await?;
+            entry.value = Some(value);
+        } else {
+            self.ensure_free_entry(&mut map).await?;
+
+            let new_entry = AsyncLruCacheEntry {
+                value: Some(value),
+                last_used: AtomicUsize::new(self.lru_timer.fetch_add(1, Ordering::Relaxed)),
+            };
+            map.insert(key, new_entry);
+        }
+
+        Ok(())
+    }
+
+    /// Flush all cache entries.
+    ///
+    /// Those entries are not evicted, but remain in the cache.
+    async fn flush(&self) -> io::Result<()> {
+        let _span = span!(
+            Level::TRACE,
+            "AsyncLruCache::flush",
+            self = &self as *const _ as usize
+        )
+        .entered();
+
+        let mut futs = FutureVector::new();
+
+        let mut dep_guard = self.flush_before.lock().await;
+        Self::flush_dependencies(&mut dep_guard).await?;
+
+        let map = self.map.read().await;
+        for (key, entry) in map.iter() {
+            let key = *key;
+            let object = Arc::clone(entry.value());
+            trace!("Flushing {key:?}");
+            futs.push(Box::pin(self.backend.flush(key, object)));
+        }
+
+        futs.discarding_join().await
+    }
+}
+
+impl<V> AsyncLruCacheEntry<V> {
+    /// Return the cached object.
+    fn value(&self) -> &Arc<V> {
+        self.value.as_ref().unwrap()
+    }
+}
+
+#[async_trait(?Send)]
+impl<
+        K: Clone + Copy + Debug + PartialEq + Eq + Hash + Send + Sync,
+        V: Send + Sync,
+        B: AsyncLruCacheBackend<Key = K, Value = V>,
+    > FlushableCache for AsyncLruCacheInner<K, V, B>
+{
+    async fn flush(&self) -> io::Result<()> {
+        AsyncLruCacheInner::<K, V, B>::flush(self).await
+    }
+
+    async fn check_circular(&self, other: &Arc<dyn FlushableCache>) -> bool {
+        let deps = self.flush_before.lock().await;
+        for dep in deps.iter() {
+            if Arc::ptr_eq(dep, other) {
+                return true;
+            }
+        }
+        false
+    }
+}
diff --git a/src/imago/src/file.rs b/src/imago/src/file.rs
new file mode 100644
index 00000000..75a859e4
--- /dev/null
+++ b/src/imago/src/file.rs
@@ -0,0 +1,456 @@
+//! Use a plain as storage.
+
+use crate::io_buffers::{IoVector, IoVectorMut};
+use crate::storage::drivers::CommonStorageHelper;
+use crate::{Storage, StorageOpenOptions};
+use std::fmt::{self, Display, Formatter};
+use std::fs;
+use std::io::{self, Seek, SeekFrom, Write};
+#[cfg(any(target_os = "linux", target_os = "macos"))]
+use std::os::fd::AsRawFd;
+#[cfg(all(unix, not(target_os = "macos")))]
+use std::os::unix::fs::OpenOptionsExt;
+#[cfg(windows)]
+use std::os::windows::fs::{FileExt, OpenOptionsExt};
+#[cfg(windows)]
+use std::os::windows::io::AsRawHandle;
+use std::path::{Path, PathBuf};
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::RwLock;
+#[cfg(windows)]
+use windows_sys::Win32::System::Ioctl::{FILE_ZERO_DATA_INFORMATION, FSCTL_SET_ZERO_DATA};
+#[cfg(windows)]
+use windows_sys::Win32::System::IO::DeviceIoControl;
+
+/// Use a plain file as storage objects.
+#[derive(Debug)]
+pub struct File {
+    /// The file.
+    file: RwLock<fs::File>,
+
+    /// Whether we are using direct I/O.
+    direct_io: bool,
+
+    /// For debug purposes, and to resolve relative filenames.
+    filename: Option<PathBuf>,
+
+    /// Cached file length.
+    ///
+    /// Third parties changing the length concurrently is pretty certain to break things anyway.
+    size: AtomicU64,
+
+    /// Storage helper.
+    common_storage_helper: CommonStorageHelper,
+}
+
+impl TryFrom<fs::File> for File {
+    type Error = io::Error;
+
+    /// Use the given existing `std::fs::File`.
+    ///
+    /// Convert the given existing `std::fs::File` object into an imago storage object.
+    ///
+    /// When using this, the resulting object will not know its own filename.  That makes it
+    /// impossible to auto-resolve relative paths to it, e.g. qcow2 backing file names.
+    fn try_from(mut file: fs::File) -> io::Result<Self> {
+        let size = file.seek(SeekFrom::End(0))?;
+
+        Ok(File {
+            file: RwLock::new(file),
+            // TODO: Find out, or better yet, drop `direct_io` and just probe the alignment.
+            direct_io: false,
+            filename: None,
+            size: AtomicU64::new(size),
+            common_storage_helper: Default::default(),
+        })
+    }
+}
+
+impl Storage for File {
+    async fn open(opts: StorageOpenOptions) -> io::Result<Self> {
+        Self::do_open_sync(opts)
+    }
+
+    #[cfg(feature = "sync-wrappers")]
+    fn open_sync(opts: StorageOpenOptions) -> io::Result<Self> {
+        Self::do_open_sync(opts)
+    }
+
+    fn mem_align(&self) -> usize {
+        // TODO: Probe
+        if self.direct_io {
+            4096
+        } else {
+            1
+        }
+    }
+
+    fn req_align(&self) -> usize {
+        // TODO: Probe
+        if self.direct_io {
+            4096
+        } else {
+            1
+        }
+    }
+
+    fn size(&self) -> io::Result<u64> {
+        Ok(self.size.load(Ordering::Relaxed))
+    }
+
+    fn resolve_relative_path<P: AsRef<Path>>(&self, relative: P) -> io::Result<PathBuf> {
+        let relative = relative.as_ref();
+
+        if relative.is_absolute() {
+            return Ok(relative.to_path_buf());
+        }
+
+        let filename = self
+            .filename
+            .as_ref()
+            .ok_or_else(|| io::Error::other("No filename set for base image"))?;
+
+        let dirname = filename
+            .parent()
+            .ok_or_else(|| io::Error::other("Invalid base image filename set"))?;
+
+        Ok(dirname.join(relative))
+    }
+
+    #[cfg(unix)]
+    async unsafe fn pure_readv(
+        &self,
+        mut bufv: IoVectorMut<'_>,
+        mut offset: u64,
+    ) -> io::Result<()> {
+        while !bufv.is_empty() {
+            let iovec = unsafe { bufv.as_iovec() };
+            let result = unsafe {
+                libc::preadv(
+                    self.file.read().unwrap().as_raw_fd(),
+                    iovec.as_ptr(),
+                    iovec.len() as libc::c_int,
+                    offset
+                        .try_into()
+                        .map_err(|_| io::Error::other("Read offset overflow"))?,
+                )
+            };
+
+            let len = if result < 0 {
+                let err = io::Error::last_os_error();
+                if err.raw_os_error() == Some(libc::EINTR) {
+                    continue;
+                }
+                return Err(err);
+            } else {
+                result as u64
+            };
+
+            if len == 0 {
+                // End of file
+                bufv.fill(0);
+                break;
+            }
+
+            bufv = bufv.split_tail_at(len);
+            offset = offset
+                .checked_add(len)
+                .ok_or_else(|| io::Error::other("Read offset overflow"))?;
+        }
+
+        Ok(())
+    }
+
+    #[cfg(windows)]
+    async unsafe fn pure_readv(&self, bufv: IoVectorMut<'_>, mut offset: u64) -> io::Result<()> {
+        for mut buffer in bufv.into_inner() {
+            let mut buffer: &mut [u8] = &mut buffer;
+            while !buffer.is_empty() {
+                let len = if offset >= self.size.load(Ordering::Relaxed) {
+                    buffer.fill(0);
+                    buffer.len()
+                } else {
+                    self.file.write().unwrap().seek_read(buffer, offset)?
+                };
+                offset = offset
+                    .checked_add(len as u64)
+                    .ok_or_else(|| io::Error::other("Read offset overflow"))?;
+                buffer = buffer.split_at_mut(len).1;
+            }
+        }
+        Ok(())
+    }
+
+    #[cfg(unix)]
+    async unsafe fn pure_writev(&self, mut bufv: IoVector<'_>, mut offset: u64) -> io::Result<()> {
+        while !bufv.is_empty() {
+            let iovec = unsafe { bufv.as_iovec() };
+            let result = unsafe {
+                libc::pwritev(
+                    self.file.read().unwrap().as_raw_fd(),
+                    iovec.as_ptr(),
+                    iovec.len() as libc::c_int,
+                    offset
+                        .try_into()
+                        .map_err(|_| io::Error::other("Write offset overflow"))?,
+                )
+            };
+
+            let len = if result < 0 {
+                let err = io::Error::last_os_error();
+                if err.raw_os_error() == Some(libc::EINTR) {
+                    continue;
+                }
+                return Err(err);
+            } else {
+                result as u64
+            };
+
+            if result == 0 {
+                // Should not happen, i.e. is an error
+                return Err(io::ErrorKind::WriteZero.into());
+            }
+
+            bufv = bufv.split_tail_at(len);
+            offset = offset
+                .checked_add(len)
+                .ok_or_else(|| io::Error::other("Write offset overflow"))?;
+            self.size.fetch_max(offset, Ordering::Relaxed);
+        }
+
+        Ok(())
+    }
+
+    #[cfg(windows)]
+    async unsafe fn pure_writev(&self, bufv: IoVector<'_>, mut offset: u64) -> io::Result<()> {
+        for buffer in bufv.into_inner() {
+            let mut buffer: &[u8] = &buffer;
+            while !buffer.is_empty() {
+                let len = self.file.write().unwrap().seek_write(buffer, offset)?;
+                offset = offset
+                    .checked_add(len as u64)
+                    .ok_or_else(|| io::Error::other("Write offset overflow"))?;
+                self.size.fetch_max(offset, Ordering::Relaxed);
+                buffer = buffer.split_at(len).1;
+            }
+        }
+        Ok(())
+    }
+
+    #[cfg(any(target_os = "linux", windows, target_os = "macos"))]
+    async unsafe fn pure_write_zeroes(&self, offset: u64, length: u64) -> io::Result<()> {
+        // All of our discard methods also ensure the range reads back as zeroes
+        unsafe { self.pure_discard(offset, length) }.await
+    }
+
+    // Beware when adding new discard methods: This is called by `pure_write_zeroes()`, so the
+    // current expectation is that discarded ranges will read back as zeroes.  If the new method
+    // does not guarantee that, you will need to modify `pure_write_zeroes()`.
+    #[cfg(target_os = "linux")]
+    async unsafe fn pure_discard(&self, offset: u64, length: u64) -> io::Result<()> {
+        if self.try_discard_by_truncate(offset, length)? {
+            return Ok(());
+        }
+
+        // If offset or length are too big, just skip discarding.
+        let Ok(offset) = libc::off_t::try_from(offset) else {
+            return Ok(());
+        };
+        let Ok(length) = libc::off_t::try_from(length) else {
+            return Ok(());
+        };
+
+        let file = self.file.read().unwrap();
+        // Safe: File descriptor is valid, and the rest are simple integer parameters.
+        let ret = unsafe {
+            libc::fallocate(
+                file.as_raw_fd(),
+                libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
+                offset,
+                length,
+            )
+        };
+        if ret < 0 {
+            return Err(io::Error::last_os_error());
+        }
+
+        Ok(())
+    }
+
+    // Beware when adding new discard methods: This is called by `pure_write_zeroes()`, so the
+    // current expectation is that discarded ranges will read back as zeroes.  If the new method
+    // does not guarantee that, you will need to modify `pure_write_zeroes()`.
+    #[cfg(windows)]
+    async unsafe fn pure_discard(&self, offset: u64, length: u64) -> io::Result<()> {
+        if self.try_discard_by_truncate(offset, length)? {
+            return Ok(());
+        }
+
+        // If offset or length are too big, just skip discarding.
+        let Ok(offset) = i64::try_from(offset) else {
+            return Ok(());
+        };
+        let Ok(length) = i64::try_from(length) else {
+            return Ok(());
+        };
+
+        let end = offset.saturating_add(length).saturating_add(1);
+        let params = FILE_ZERO_DATA_INFORMATION {
+            FileOffset: offset,
+            BeyondFinalZero: end,
+        };
+        let mut _returned = 0;
+        let file = self.file.read().unwrap();
+        // Safe: File handle is valid, mandatory pointers (input, returned length) are passed and
+        // valid, the parameter type matches the call, and the input size matches the object
+        // passed.
+        let ret = unsafe {
+            DeviceIoControl(
+                file.as_raw_handle(),
+                FSCTL_SET_ZERO_DATA,
+                (&params as *const FILE_ZERO_DATA_INFORMATION).cast::<std::ffi::c_void>(),
+                size_of_val(&params) as u32,
+                std::ptr::null_mut(),
+                0,
+                &mut _returned,
+                std::ptr::null_mut(),
+            )
+        };
+        if ret == 0 {
+            return Err(io::Error::last_os_error());
+        }
+
+        Ok(())
+    }
+
+    // Beware when adding new discard methods: This is called by `pure_write_zeroes()`, so the
+    // current expectation is that discarded ranges will read back as zeroes.  If the new method
+    // does not guarantee that, you will need to modify `pure_write_zeroes()`.
+    #[cfg(target_os = "macos")]
+    async unsafe fn pure_discard(&self, offset: u64, length: u64) -> io::Result<()> {
+        if self.try_discard_by_truncate(offset, length)? {
+            return Ok(());
+        }
+
+        // If offset or length are too big, just skip discarding.
+        let Ok(offset) = libc::off_t::try_from(offset) else {
+            return Ok(());
+        };
+        let Ok(length) = libc::off_t::try_from(length) else {
+            return Ok(());
+        };
+
+        let params = libc::fpunchhole_t {
+            fp_flags: 0,
+            reserved: 0,
+            fp_offset: offset,
+            fp_length: length,
+        };
+        let file = self.file.read().unwrap();
+        // Safe: FD is valid, passed pointer is valid and its type matches the call.
+        let ret = unsafe { libc::fcntl(file.as_raw_fd(), libc::F_PUNCHHOLE, &params) };
+        if ret < 0 {
+            return Err(io::Error::last_os_error());
+        }
+
+        Ok(())
+    }
+
+    async fn flush(&self) -> io::Result<()> {
+        self.file.write().unwrap().flush()
+    }
+
+    async fn sync(&self) -> io::Result<()> {
+        self.file.write().unwrap().sync_all()
+    }
+
+    fn get_storage_helper(&self) -> &CommonStorageHelper {
+        &self.common_storage_helper
+    }
+}
+
+impl File {
+    /// Implementation for [`File::open()`] and [`File::open_sync()`].
+    fn do_open_sync(opts: StorageOpenOptions) -> io::Result<Self> {
+        let Some(filename) = opts.filename else {
+            return Err(io::Error::other("Filename required"));
+        };
+
+        let mut file_opts = fs::OpenOptions::new();
+        file_opts.read(true).write(opts.writable);
+        #[cfg(not(target_os = "macos"))]
+        if opts.direct {
+            file_opts.custom_flags(
+                #[cfg(unix)]
+                libc::O_DIRECT,
+                #[cfg(windows)]
+                windows_sys::Win32::Storage::FileSystem::FILE_FLAG_NO_BUFFERING,
+            );
+        }
+
+        let filename_owned = filename.to_owned();
+        let mut file = file_opts.open(filename)?;
+
+        let size = file.seek(SeekFrom::End(0))?;
+
+        #[cfg(target_os = "macos")]
+        if opts.direct {
+            // Safe: We check the return value.
+            let ret = unsafe { libc::fcntl(file.as_raw_fd(), libc::F_NOCACHE, 1) };
+            if ret < 0 {
+                let err = io::Error::last_os_error();
+                return Err(io::Error::new(
+                    err.kind(),
+                    format!("Failed to disable host cache: {err}"),
+                ));
+            }
+        }
+
+        Ok(File {
+            file: RwLock::new(file),
+            direct_io: opts.direct,
+            filename: Some(filename_owned),
+            size: AtomicU64::new(size),
+            common_storage_helper: Default::default(),
+        })
+    }
+
+    /// Attempt to discard range by truncating the file.
+    ///
+    /// If the given range is at the end of the file, discard it by simply truncating the file.
+    /// Return `true` on success.
+    ///
+    /// If the range is not at the end of the file, i.e. another method of discarding is needed,
+    /// return `false`.
+    fn try_discard_by_truncate(&self, offset: u64, length: u64) -> io::Result<bool> {
+        // Prevent modifications to the file length
+        #[allow(clippy::readonly_write_lock)]
+        let file = self.file.write().unwrap();
+
+        let size = self.size.load(Ordering::Relaxed);
+        if offset >= size {
+            // Nothing to do
+            return Ok(true);
+        }
+
+        // If `offset + length` overflows, we can just assume it ends at `size`.  (Anything past
+        // `size is irrelevant anyway.)
+        let end = offset.checked_add(length).unwrap_or(size);
+        if end < size {
+            return Ok(false);
+        }
+
+        file.set_len(offset)?;
+        Ok(true)
+    }
+}
+
+impl Display for File {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        if let Some(filename) = self.filename.as_ref() {
+            write!(f, "file:{filename:?}")
+        } else {
+            write!(f, "file:<unknown path>")
+        }
+    }
+}
diff --git a/src/imago/src/format/access.rs b/src/imago/src/format/access.rs
new file mode 100644
index 00000000..5d16ab36
--- /dev/null
+++ b/src/imago/src/format/access.rs
@@ -0,0 +1,438 @@
+//! Actual public image access functionality.
+//!
+//! Provides access to different image formats via `FormatAccess` objects.
+
+use super::drivers::{self, FormatDriverInstance};
+use crate::io_buffers::{IoVector, IoVectorMut};
+use crate::vector_select::FutureVector;
+use crate::{Storage, StorageExt};
+use std::fmt::{self, Display, Formatter};
+use std::{cmp, io, ptr};
+
+/// Provides access to a disk image.
+#[derive(Debug)]
+pub struct FormatAccess<S: Storage> {
+    /// Image format driver.
+    inner: Box<dyn FormatDriverInstance<Storage = S>>,
+
+    /// Whether this image may be modified.
+    writable: bool,
+
+    /// How many asynchronous requests to perform per read request in parallel.
+    read_parallelization: usize,
+
+    /// How many asynchronous requests to perform per write request in parallel.
+    write_parallelization: usize,
+}
+
+/// Fully recursive mapping information.
+///
+/// Mapping information that resolves down to the storage object layer (except for special data).
+#[derive(Debug)]
+pub enum Mapping<'a, S: Storage> {
+    /// Raw data.
+    Raw {
+        /// Storage object where this data is stored.
+        storage: &'a S,
+
+        /// Offset in `storage` where this data is stored.
+        offset: u64,
+
+        /// Whether this mapping may be written to.
+        ///
+        /// If `true`, you can directly write to `offset` on `storage` to change the disk image’s
+        /// data accordingly.
+        ///
+        /// If `false`, the disk image format does not allow writing to `offset` on `storage`; a
+        /// new mapping must be allocated first.
+        writable: bool,
+    },
+
+    /// Range is to be read as zeroes.
+    Zero,
+
+    /// End of file reached.
+    ///
+    /// The accompanying length is always 0.
+    Eof,
+
+    /// Data is encoded in some manner, e.g. compressed or encrypted.
+    ///
+    /// Such data cannot be accessed directly, but must be interpreted by the image format driver.
+    Special {
+        /// Format layer where this special data was encountered.
+        layer: &'a FormatAccess<S>,
+
+        /// Original (“guest”) offset on `layer` to pass to `readv_special()`.
+        offset: u64,
+    },
+}
+
+// When adding new public methods, don’t forget to add them to sync_wrappers, too.
+impl<S: Storage> FormatAccess<S> {
+    /// Wrap a format driver instance in `FormatAccess`.
+    ///
+    /// `FormatAccess` provides I/O access to disk images, based on the functionality offered by
+    /// the individual format drivers via `FormatDriverInstance`.
+    pub fn new<D: FormatDriverInstance<Storage = S> + 'static>(inner: D) -> Self {
+        let writable = inner.writable();
+        FormatAccess {
+            inner: Box::new(inner),
+            read_parallelization: 1,
+            write_parallelization: 1,
+            writable,
+        }
+    }
+
+    /// Return the disk size in bytes.
+    pub fn size(&self) -> u64 {
+        self.inner.size()
+    }
+
+    /// Set the number of simultaneous async requests per read.
+    ///
+    /// When issuing read requests, issue this many async requests in parallel (still in a single
+    /// thread).  The default count is `1`, i.e. no parallel requests.
+    pub fn set_async_read_parallelization(&mut self, count: usize) {
+        self.read_parallelization = count;
+    }
+
+    /// Set the number of simultaneous async requests per write.
+    ///
+    /// When issuing write requests, issue this many async requests in parallel (still in a single
+    /// thread).  The default count is `1`, i.e. no parallel requests.
+    pub fn set_async_write_parallelization(&mut self, count: usize) {
+        self.write_parallelization = count;
+    }
+
+    /// Return all storage dependencies of this image.
+    ///
+    /// Includes recursive dependencies, i.e. those from other image dependencies like backing
+    /// images.
+    pub(crate) fn collect_storage_dependencies(&self) -> Vec<&S> {
+        self.inner.collect_storage_dependencies()
+    }
+
+    /// Minimal I/O alignment, for both length and offset.
+    ///
+    /// All requests to this image should be aligned to this value, both in length and offset.
+    ///
+    /// Requests that do not match this alignment will be realigned internally, which requires
+    /// creating bounce buffers and read-modify-write cycles for write requests, which is costly,
+    /// so should be avoided.
+    pub fn req_align(&self) -> usize {
+        self.inner
+            .collect_storage_dependencies()
+            .into_iter()
+            .fold(1, |max, s| cmp::max(max, s.req_align()))
+    }
+
+    /// Minimal memory buffer alignment, for both address and length.
+    ///
+    /// All buffers used in requests to this image should be aligned to this value, both their
+    /// address and length.
+    ///
+    /// Request buffers that do not match this alignment will be realigned internally, which
+    /// requires creating bounce buffers, which is costly, so should be avoided.
+    pub fn mem_align(&self) -> usize {
+        self.inner
+            .collect_storage_dependencies()
+            .into_iter()
+            .fold(1, |max, s| cmp::max(max, s.mem_align()))
+    }
+
+    /// Read the data from the given mapping.
+    async fn read_chunk(
+        &self,
+        mut bufv: IoVectorMut<'_>,
+        mapping: Mapping<'_, S>,
+    ) -> io::Result<()> {
+        match mapping {
+            Mapping::Raw {
+                storage,
+                offset,
+                writable: _,
+            } => storage.readv(bufv, offset).await,
+
+            Mapping::Zero | Mapping::Eof => {
+                bufv.fill(0);
+                Ok(())
+            }
+
+            // FIXME: TOCTTOU problem.  Not sure how to fully fix it, if possible at all.
+            // (Concurrent writes can change the mapping, but the driver will have to reload the
+            // mapping because it cannot pass it in `NonRecursiveMapping::Special`.  It may then
+            // find that this is no longer a “special” range.  Even passing the low-level mapping
+            // information in `Mapping::Special` wouldn’t fully fix it, though: If concurrent
+            // writes change the low-level cluster type, and the driver then tries to e.g.
+            // decompress the data that was there, that may well fail.)
+            Mapping::Special { layer, offset } => layer.inner.readv_special(bufv, offset).await,
+        }
+    }
+
+    /// Return the mapping at `offset`.
+    ///
+    /// Find what `offset` is mapped to, return that mapping information, and the length of that
+    /// continuous mapping (from `offset`).
+    pub async fn get_mapping(
+        &self,
+        mut offset: u64,
+        mut max_length: u64,
+    ) -> io::Result<(Mapping<'_, S>, u64)> {
+        let mut format_layer = self;
+        let mut writable_gate = true;
+
+        loop {
+            let (mapping, length) = format_layer.inner.get_mapping(offset, max_length).await?;
+            let length = std::cmp::min(length, max_length);
+
+            match mapping {
+                drivers::Mapping::Raw {
+                    storage,
+                    offset,
+                    writable,
+                } => {
+                    return Ok((
+                        Mapping::Raw {
+                            storage,
+                            offset,
+                            writable: writable && writable_gate,
+                        },
+                        length,
+                    ))
+                }
+
+                drivers::Mapping::Indirect {
+                    layer: recurse_layer,
+                    offset: recurse_offset,
+                    writable: recurse_writable,
+                } => {
+                    format_layer = recurse_layer;
+                    offset = recurse_offset;
+                    writable_gate = recurse_writable;
+                    max_length = length;
+                }
+
+                drivers::Mapping::Zero => return Ok((Mapping::Zero, length)),
+
+                drivers::Mapping::Eof => {
+                    // Return EOF only on top layer, zero otherwise
+                    return if ptr::eq(format_layer, self) {
+                        Ok((Mapping::Eof, 0))
+                    } else {
+                        Ok((Mapping::Zero, max_length))
+                    };
+                }
+
+                drivers::Mapping::Special { offset } => {
+                    return Ok((
+                        Mapping::Special {
+                            layer: format_layer,
+                            offset,
+                        },
+                        length,
+                    ));
+                }
+            }
+        }
+    }
+
+    /// Create a raw data mapping at `offset`.
+    ///
+    /// Ensure that `offset` is directly mapped to some storage object, up to a length of `length`.
+    /// Return the storage object, the corresponding offset there, and the continuous length that
+    /// we were able to map (less than or equal to `length`).
+    ///
+    /// If `overwrite` is true, the contents in the range are supposed to be overwritten and may be
+    /// discarded.  Otherwise, they are kept.
+    pub async fn ensure_data_mapping(
+        &self,
+        offset: u64,
+        length: u64,
+        overwrite: bool,
+    ) -> io::Result<(&S, u64, u64)> {
+        let (storage, mapped_offset, mapped_length) = self
+            .inner
+            .ensure_data_mapping(offset, length, overwrite)
+            .await?;
+        let mapped_length = cmp::min(length, mapped_length);
+        assert!(mapped_length > 0);
+        Ok((storage, mapped_offset, mapped_length))
+    }
+
+    /// Read data at `offset` into `bufv`.
+    ///
+    /// Reads until `bufv` is filled completely, i.e. will not do short reads.  When reaching the
+    /// end of file, the rest of `bufv` is filled with 0.
+    pub async fn readv(&self, mut bufv: IoVectorMut<'_>, mut offset: u64) -> io::Result<()> {
+        let mut workers = (self.read_parallelization > 1).then(FutureVector::new);
+
+        while !bufv.is_empty() {
+            let (mapping, chunk_length) = self.get_mapping(offset, bufv.len()).await?;
+            if chunk_length == 0 {
+                assert!(mapping.is_eof());
+                bufv.fill(0);
+                break;
+            }
+
+            if let Some(workers) = workers.as_mut() {
+                while workers.len() >= self.read_parallelization {
+                    workers.select().await?;
+                }
+            }
+
+            let (chunk, remainder) = bufv.split_at(chunk_length);
+            bufv = remainder;
+            offset += chunk_length;
+
+            if let Some(workers) = workers.as_mut() {
+                workers.push(Box::pin(self.read_chunk(chunk, mapping)));
+            } else {
+                self.read_chunk(chunk, mapping).await?;
+            }
+        }
+
+        if let Some(mut workers) = workers {
+            workers.discarding_join().await?;
+        }
+
+        Ok(())
+    }
+
+    /// Read data at `offset` into `buf`.
+    ///
+    /// Reads until `buf` is filled completely, i.e. will not do short reads.  When reaching the
+    /// end of file, the rest of `buf` is filled with 0.
+    pub async fn read(&self, buf: impl Into<IoVectorMut<'_>>, offset: u64) -> io::Result<()> {
+        self.readv(buf.into(), offset).await
+    }
+
+    /// Write data from `bufv` to `offset`.
+    ///
+    /// Writes all data from `bufv` (or returns an error), i.e. will not do short writes.  Reaching
+    /// the end of file before the end of the buffer results in an error.
+    pub async fn writev(&self, mut bufv: IoVector<'_>, mut offset: u64) -> io::Result<()> {
+        if !self.writable {
+            return Err(io::Error::other("Image is read-only"));
+        }
+
+        // Limit to disk size
+        let disk_size = self.inner.size();
+        if offset >= disk_size {
+            return Ok(());
+        }
+        if bufv.len() > disk_size - offset {
+            bufv = bufv.split_at(disk_size - offset).0;
+        }
+
+        let mut workers = (self.write_parallelization > 1).then(FutureVector::new);
+
+        while !bufv.is_empty() {
+            let (storage, st_offset, st_length) =
+                self.ensure_data_mapping(offset, bufv.len(), true).await?;
+
+            if let Some(workers) = workers.as_mut() {
+                while workers.len() >= self.write_parallelization {
+                    workers.select().await?;
+                }
+            }
+
+            let (chunk, remainder) = bufv.split_at(st_length);
+            bufv = remainder;
+            offset += st_length;
+
+            if let Some(workers) = workers.as_mut() {
+                workers.push(Box::pin(storage.writev(chunk, st_offset)));
+            } else {
+                storage.writev(chunk, st_offset).await?;
+            }
+        }
+
+        if let Some(mut workers) = workers {
+            workers.discarding_join().await?;
+        }
+
+        Ok(())
+    }
+
+    /// Write data from `buf` to `offset`.
+    ///
+    /// Writes all data from `bufv` (or returns an error), i.e. will not do short writes.  Reaching
+    /// the end of file before the end of the buffer results in an error.
+    pub async fn write(&self, buf: impl Into<IoVector<'_>>, offset: u64) -> io::Result<()> {
+        self.writev(buf.into(), offset).await
+    }
+
+    /// Flush internal buffers.  Always call this before drop!
+    ///
+    /// Does not necessarily sync those buffers to disk.  When using `flush()`, consider whether
+    /// you want to call `sync()` afterwards.
+    ///
+    /// Because of the current lack of stable `async_drop`, you must manually call this before
+    /// dropping a `FormatAccess` instance!  (Not necessarily for read-only images, though.)
+    #[allow(async_fn_in_trait)] // No need for Send
+    pub async fn flush(&self) -> io::Result<()> {
+        self.inner.flush().await
+    }
+
+    /// Sync data already written to the storage hardware.
+    ///
+    /// This does not necessarily include flushing internal buffers, i.e. `flush`.  When using
+    /// `sync()`, consider whether you want to call `flush()` before it.
+    #[allow(async_fn_in_trait)] // No need for Send
+    pub async fn sync(&self) -> io::Result<()> {
+        self.inner.sync().await
+    }
+}
+
+impl<S: Storage> Mapping<'_, S> {
+    /// Return `true` if and only if this mapping signifies the end of file.
+    pub fn is_eof(&self) -> bool {
+        matches!(self, Mapping::Eof)
+    }
+}
+
+impl<S: Storage> Display for FormatAccess<S> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        self.inner.fmt(f)
+    }
+}
+
+impl<S: Storage> Display for Mapping<'_, S> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        match self {
+            Mapping::Raw {
+                storage,
+                offset,
+                writable,
+            } => {
+                let writable = if *writable { "rw" } else { "ro" };
+                write!(f, "{storage}:0x{offset:x}/{writable}")
+            }
+
+            Mapping::Zero => write!(f, "<zero>"),
+
+            Mapping::Eof => write!(f, "<eof>"),
+
+            Mapping::Special { layer, offset } => {
+                write!(f, "<special:{layer}:0x{offset:x}>")
+            }
+        }
+    }
+}
+
+/*
+#[cfg(feature = "async-drop")]
+impl<S: Storage> std::future::AsyncDrop for FormatAccess<S> {
+    type Dropper<'a> = std::pin::Pin<Box<dyn std::future::Future<Output = ()> + 'a>> where S: 'a;
+
+    fn async_drop(self: std::pin::Pin<&mut Self>) -> Self::Dropper<'_> {
+        Box::pin(async move {
+            if let Err(err) = self.flush().await {
+                let inner = &self.inner;
+                tracing::error!("Failed to flush {inner}: {err}");
+            }
+        })
+    }
+}
+*/
diff --git a/src/imago/src/format/drivers.rs b/src/imago/src/format/drivers.rs
new file mode 100644
index 00000000..2aa27994
--- /dev/null
+++ b/src/imago/src/format/drivers.rs
@@ -0,0 +1,141 @@
+//! Internal image format driver interface.
+//!
+//! Provides the internal interface for image format drivers to provide their services, on which
+//! the publically visible interface [`FormatAccess`] is built.
+
+use crate::io_buffers::IoVectorMut;
+use crate::{FormatAccess, Storage};
+use async_trait::async_trait;
+use std::fmt::{Debug, Display};
+use std::io;
+
+/// Implementation of a disk image format.
+#[async_trait(?Send)]
+pub trait FormatDriverInstance: Debug + Display + Send + Sync {
+    /// Type of storage used.
+    type Storage: Storage;
+
+    /// Size of the disk represented by this image.
+    fn size(&self) -> u64;
+
+    /// Recursively collect all storage objects associated with this image.
+    ///
+    /// “Recursive” means to recurse to other images like e.g. a backing file.
+    fn collect_storage_dependencies(&self) -> Vec<&Self::Storage>;
+
+    /// Return whether this image may be modified.
+    ///
+    /// This state must not change via interior mutability, i.e. as long as this FDI is wrapped in
+    /// a `FormatAccess`, its writability must remain constant.
+    fn writable(&self) -> bool;
+
+    /// Return the mapping at `offset`.
+    ///
+    /// Find what `offset` is mapped to, return that mapping information, and the length of that
+    /// continuous mapping (from `offset`).
+    ///
+    /// To determine that continuous mapping length, drivers should not perform additional I/O
+    /// beyond what is necessary to get mapping information for `offset` itself.
+    ///
+    /// `max_length` is a hint how long of a range is required at all, but the returned length may
+    /// exceed that value if that simplifies the implementation.
+    ///
+    /// The returned length must only be 0 if `Mapping::Eof` is returned.
+    async fn get_mapping<'a>(
+        &'a self,
+        offset: u64,
+        max_length: u64,
+    ) -> io::Result<(Mapping<'a, Self::Storage>, u64)>;
+
+    /// Ensure that `offset` is directly mapped to some storage object, up to a length of `length`.
+    ///
+    /// Return the storage object, the corresponding offset there, and the continuous length that
+    /// the driver was able to map (less than or equal to `length`).
+    ///
+    /// If the returned length is less than `length`, drivers can expect subsequent calls to
+    /// allocate the rest of the original range.  Therefore, if a driver knows in advance that it
+    /// is impossible to fully map the given range (e.g. because it lies partially or fully beyond
+    /// the end of the disk), it should return an error immediately.
+    ///
+    /// If `overwrite` is true, the contents in the range are supposed to be overwritten and may be
+    /// discarded.  Otherwise, they must be kept.
+    async fn ensure_data_mapping<'a>(
+        &'a self,
+        offset: u64,
+        length: u64,
+        overwrite: bool,
+    ) -> io::Result<(&'a Self::Storage, u64, u64)>;
+
+    /// Read data from a `Mapping::Special` area.
+    async fn readv_special(&self, _bufv: IoVectorMut<'_>, _offset: u64) -> io::Result<()> {
+        Err(io::ErrorKind::Unsupported.into())
+    }
+
+    /// Flush internal buffers.
+    ///
+    /// Does not need to ensure those buffers are synced to disk (hardware).
+    async fn flush(&self) -> io::Result<()>;
+
+    /// Sync data already written to the storage hardware.
+    ///
+    /// Does not need to ensure internal buffers are written, i.e. should generally just be passed
+    /// through to `Storage::sync()` for all underlying storage objects.
+    async fn sync(&self) -> io::Result<()>;
+}
+
+/// Non-recursive mapping information.
+///
+/// Mapping information as returned by `FormatDriverInstance::get_mapping()`, only looking at that
+/// format layer’s information.
+#[derive(Debug)]
+pub enum Mapping<'a, S: Storage> {
+    /// Raw data.
+    Raw {
+        /// Storage object where this data is stored.
+        storage: &'a S,
+
+        /// Offset in `storage` where this data is stored.
+        offset: u64,
+
+        /// Whether this mapping may be written to.
+        ///
+        /// If `true`, you can directly write to `offset` on `storage` to change the disk image’s
+        /// data accordingly.
+        ///
+        /// If `false`, the disk image format does not allow writing to `offset` on `storage`; a
+        /// new mapping must be allocated first.
+        writable: bool,
+    },
+
+    /// Data lives in a different disk image (e.g. a backing file).
+    Indirect {
+        /// Format instance where this data can be obtained.
+        layer: &'a FormatAccess<S>,
+
+        /// Offset in `layer` where this data can be obtained.
+        offset: u64,
+
+        /// Whether this mapping may be written to.
+        ///
+        /// If `true`, you can directly write to `offset` on `layer` to change the disk image’s
+        /// data accordingly.
+        ///
+        /// If `false`, the disk image format does not allow writing to `offset` on `layer`; a new
+        /// mapping must be allocated first.
+        writable: bool,
+    },
+
+    /// Range is to be read as zeroes.
+    Zero,
+
+    /// End of file reached.
+    Eof,
+
+    /// Data is encoded in some manner, e.g. compressed or encrypted.
+    ///
+    /// Such data cannot be accessed directly, but must be interpreted by the image format driver.
+    Special {
+        /// Original (“guest”) offset to pass to `FormatDriverInstance::readv_special()`.
+        offset: u64,
+    },
+}
diff --git a/src/imago/src/format/mod.rs b/src/imago/src/format/mod.rs
new file mode 100644
index 00000000..a863ca19
--- /dev/null
+++ b/src/imago/src/format/mod.rs
@@ -0,0 +1,9 @@
+//! Core functionality.
+//!
+//! Provides access to different image formats via `FormatAccess` objects.
+
+pub mod access;
+pub mod drivers;
+#[cfg(feature = "sync-wrappers")]
+pub mod sync_wrappers;
+pub mod wrapped;
diff --git a/src/imago/src/format/sync_wrappers.rs b/src/imago/src/format/sync_wrappers.rs
new file mode 100644
index 00000000..2f5f4e9b
--- /dev/null
+++ b/src/imago/src/format/sync_wrappers.rs
@@ -0,0 +1,186 @@
+//! Synchronous wrapper around [`FormatAccess`].
+
+use super::drivers::FormatDriverInstance;
+use crate::io_buffers::{IoVector, IoVectorMut};
+use crate::{FormatAccess, Mapping, Storage};
+use std::io;
+
+/// Synchronous wrapper around [`FormatAccess`].
+///
+/// Creates and keeps a tokio runtime in which to run I/O.
+pub struct SyncFormatAccess<S: Storage> {
+    /// Wrapped asynchronous [`FormatAccess`].
+    inner: FormatAccess<S>,
+
+    /// Tokio runtime in which I/O is run.
+    runtime: tokio::runtime::Runtime,
+}
+
+impl<S: Storage> SyncFormatAccess<S> {
+    /// Like [`FormatAccess::new()`], but create a synchronous wrapper.
+    pub fn new<D: FormatDriverInstance<Storage = S> + 'static>(inner: D) -> io::Result<Self> {
+        FormatAccess::new(inner).try_into()
+    }
+
+    /// Get a reference to the contained async [`FormatAccess`] object.
+    pub fn inner(&self) -> &FormatAccess<S> {
+        &self.inner
+    }
+
+    /// Return the disk size in bytes.
+    pub fn size(&self) -> u64 {
+        self.inner.size()
+    }
+
+    /// Set the number of simultaneous async requests per read.
+    ///
+    /// When issuing read requests, issue this many async requests in parallel (still in a single
+    /// thread).  The default count is `1`, i.e. no parallel requests.
+    ///
+    /// Note that inside of this synchronous wrapper, we still run async functions, so this setting
+    /// is valid even for [`SyncFormatAccess`].
+    pub fn set_async_read_parallelization(&mut self, count: usize) {
+        self.inner.set_async_read_parallelization(count)
+    }
+
+    /// Set the number of simultaneous async requests per write.
+    ///
+    /// When issuing write requests, issue this many async requests in parallel (still in a single
+    /// thread).  The default count is `1`, i.e. no parallel requests.
+    ///
+    /// Note that inside of this synchronous wrapper, we still run async functions, so this setting
+    /// is valid even for [`SyncFormatAccess`].
+    pub fn set_async_write_parallelization(&mut self, count: usize) {
+        self.inner.set_async_write_parallelization(count)
+    }
+
+    /// Minimal I/O alignment, for both length and offset.
+    ///
+    /// All requests to this image should be aligned to this value, both in length and offset.
+    ///
+    /// Requests that do not match this alignment will be realigned internally, which requires
+    /// creating bounce buffers and read-modify-write cycles for write requests, which is costly,
+    /// so should be avoided.
+    pub fn req_align(&self) -> usize {
+        self.inner.req_align()
+    }
+
+    /// Minimal memory buffer alignment, for both address and length.
+    ///
+    /// All buffers used in requests to this image should be aligned to this value, both their
+    /// address and length.
+    ///
+    /// Request buffers that do not match this alignment will be realigned internally, which
+    /// requires creating bounce buffers, which is costly, so should be avoided.
+    pub fn mem_align(&self) -> usize {
+        self.inner.mem_align()
+    }
+
+    /// Return the mapping at `offset`.
+    ///
+    /// Find what `offset` is mapped to, return that mapping information, and the length of that
+    /// continuous mapping (from `offset`).
+    pub fn get_mapping_sync(
+        &self,
+        offset: u64,
+        max_length: u64,
+    ) -> io::Result<(Mapping<'_, S>, u64)> {
+        self.runtime
+            .block_on(self.inner.get_mapping(offset, max_length))
+    }
+
+    /// Create a raw data mapping at `offset`.
+    ///
+    /// Ensure that `offset` is directly mapped to some storage object, up to a length of `length`.
+    /// Return the storage object, the corresponding offset there, and the continuous length that
+    /// we were able to map (less than or equal to `length`).
+    ///
+    /// If `overwrite` is true, the contents in the range are supposed to be overwritten and may be
+    /// discarded.  Otherwise, they are kept.
+    pub fn ensure_data_mapping(
+        &self,
+        offset: u64,
+        length: u64,
+        overwrite: bool,
+    ) -> io::Result<(&S, u64, u64)> {
+        self.runtime
+            .block_on(self.inner.ensure_data_mapping(offset, length, overwrite))
+    }
+
+    /// Read data at `offset` into `bufv`.
+    ///
+    /// Reads until `bufv` is filled completely, i.e. will not do short reads.  When reaching the
+    /// end of file, the rest of `bufv` is filled with 0.
+    pub fn readv(&self, bufv: IoVectorMut<'_>, offset: u64) -> io::Result<()> {
+        self.runtime.block_on(self.inner.readv(bufv, offset))
+    }
+
+    /// Read data at `offset` into `buf`.
+    ///
+    /// Reads until `buf` is filled completely, i.e. will not do short reads.  When reaching the
+    /// end of file, the rest of `buf` is filled with 0.
+    pub fn read<'a>(&'a self, buf: impl Into<IoVectorMut<'a>>, offset: u64) -> io::Result<()> {
+        self.readv(buf.into(), offset)
+    }
+
+    /// Write data from `bufv` to `offset`.
+    ///
+    /// Writes all data from `bufv` (or returns an error), i.e. will not do short writes.  Reaching
+    /// the end of file before the end of the buffer results in an error.
+    pub fn writev(&self, bufv: IoVector<'_>, offset: u64) -> io::Result<()> {
+        self.runtime.block_on(self.inner.writev(bufv, offset))
+    }
+
+    /// Write data from `buf` to `offset`.
+    ///
+    /// Writes all data from `bufv` (or returns an error), i.e. will not do short writes.  Reaching
+    /// the end of file before the end of the buffer results in an error.
+    pub fn write<'a>(&'a self, buf: impl Into<IoVector<'a>>, offset: u64) -> io::Result<()> {
+        self.writev(buf.into(), offset)
+    }
+
+    /// Flush internal buffers.
+    ///
+    /// Does not necessarily sync those buffers to disk.  When using `flush()`, consider whether
+    /// you want to call `sync()` afterwards.
+    pub fn flush(&self) -> io::Result<()> {
+        self.runtime.block_on(self.inner.flush())
+    }
+
+    /// Sync data already written to the storage hardware.
+    ///
+    /// This does not necessarily include flushing internal buffers, i.e. `flush`.  When using
+    /// `sync()`, consider whether you want to call `flush()` before it.
+    pub fn sync(&self) -> io::Result<()> {
+        self.runtime.block_on(self.inner.sync())
+    }
+}
+
+impl<S: Storage> TryFrom<FormatAccess<S>> for SyncFormatAccess<S> {
+    type Error = io::Error;
+
+    fn try_from(async_access: FormatAccess<S>) -> io::Result<Self> {
+        let runtime = tokio::runtime::Builder::new_current_thread()
+            .build()
+            .map_err(|err| {
+                io::Error::other(format!(
+                    "Failed to create a tokio runtime for synchronous image access: {err}"
+                ))
+            })?;
+
+        Ok(SyncFormatAccess {
+            inner: async_access,
+            runtime,
+        })
+    }
+}
+
+// #[cfg(not(feature = "async-drop"))]
+impl<S: Storage> Drop for SyncFormatAccess<S> {
+    fn drop(&mut self) {
+        if let Err(err) = self.flush() {
+            let inner = &self.inner;
+            tracing::error!("Failed to flush {inner}: {err}");
+        }
+    }
+}
diff --git a/src/imago/src/format/wrapped.rs b/src/imago/src/format/wrapped.rs
new file mode 100644
index 00000000..6031c1c0
--- /dev/null
+++ b/src/imago/src/format/wrapped.rs
@@ -0,0 +1,59 @@
+//! Allows using [`FormatAccess`] in containers.
+//!
+//! Users may want to wrap [`FormatAccess`] objects e.g. in `Arc` and then assign them as
+//! dependencies to other objects (e.g. as a backing image).  The [`WrappedFormat`] trait provided
+//! here allows images to use other images (`FormatAccess` objects) regardless of whether they are
+//! wrapped in such containers or not.
+
+use crate::{FormatAccess, Storage};
+use std::fmt::{Debug, Display};
+use std::ops::Deref;
+use std::sync::Arc;
+use tokio::sync::{OwnedRwLockReadGuard, RwLock};
+
+/// Represents [`FormatAccess`] wrapped in e.g. `Arc`, `Box`, or nothing at all.
+///
+/// This struct is necessary so that we can reference format instances regardless of whether the
+/// user decides to wrap them or not.
+pub trait WrappedFormat<S: Storage>: Debug + Display + Send + Sync {
+    /// Construct this `WrappedFormat`.
+    fn wrap(inner: FormatAccess<S>) -> Self;
+
+    /// Access the inner format instance.
+    fn unwrap(&self) -> &FormatAccess<S>;
+}
+
+impl<
+        S: Storage,
+        D: Deref<Target = FormatAccess<S>> + Debug + Display + From<FormatAccess<S>> + Send + Sync,
+    > WrappedFormat<S> for D
+{
+    fn wrap(inner: FormatAccess<S>) -> Self {
+        Self::from(inner)
+    }
+
+    fn unwrap(&self) -> &FormatAccess<S> {
+        self.deref()
+    }
+}
+
+impl<S: Storage> WrappedFormat<S> for FormatAccess<S> {
+    fn wrap(inner: FormatAccess<S>) -> Self {
+        inner
+    }
+
+    fn unwrap(&self) -> &FormatAccess<S> {
+        self
+    }
+}
+
+impl<S: Storage> WrappedFormat<S> for OwnedRwLockReadGuard<FormatAccess<S>> {
+    fn wrap(inner: FormatAccess<S>) -> Self {
+        // Ugly, but works.
+        Arc::new(RwLock::new(inner)).try_read_owned().unwrap()
+    }
+
+    fn unwrap(&self) -> &FormatAccess<S> {
+        self.deref()
+    }
+}
diff --git a/src/imago/src/io_buffers.rs b/src/imago/src/io_buffers.rs
new file mode 100644
index 00000000..ff8fb7e6
--- /dev/null
+++ b/src/imago/src/io_buffers.rs
@@ -0,0 +1,1118 @@
+//! Types for I/O buffers.
+//!
+//! This module provides:
+//! - buffer types that can be allocated with arbitrary alignment,
+//! - references to buffers that more or less ensure the content is read only once (because it can
+//!   change for buffers owned by VM guests),
+//! - buffer vector types.
+
+use crate::macros::passthrough_trait_fn;
+#[cfg(feature = "vm-memory")]
+use crate::misc_helpers::ImagoAsRef;
+use std::alloc::{self, GlobalAlloc};
+use std::fmt::{self, Debug, Formatter};
+use std::io::{IoSlice, IoSliceMut};
+use std::marker::PhantomData;
+#[cfg(unix)]
+use std::mem;
+use std::mem::{size_of, size_of_val};
+use std::ops::Range;
+use std::{cmp, io, ptr, slice};
+
+/// Owned memory buffer.
+pub struct IoBuffer {
+    /// Raw pointer to the start of the buffer.
+    pointer: *mut u8,
+
+    /// Size in bytes.
+    size: usize,
+
+    /// Allocation layout.  `None` only for null buffers.
+    layout: Option<alloc::Layout>,
+}
+
+/// Reference to any immutable memory buffer.
+pub struct IoBufferRef<'a> {
+    /// Raw pointer to the start of the buffer.
+    pointer: *const u8,
+
+    /// Size in bytes.
+    size: usize,
+
+    /// Lifetime marker.
+    _lifetime: PhantomData<&'a [u8]>,
+}
+
+/// Reference to any mutable memory buffer.
+pub struct IoBufferMut<'a> {
+    /// Raw pointer to the start of the buffer.
+    pointer: *mut u8,
+
+    /// Size in bytes.
+    size: usize,
+
+    /// Lifetime marker.
+    _lifetime: PhantomData<&'a mut [u8]>,
+}
+
+// Blocked because of the pointer, but we want this to be usable across threads
+unsafe impl Send for IoBuffer {}
+unsafe impl Sync for IoBuffer {}
+unsafe impl Send for IoBufferRef<'_> {}
+unsafe impl Sync for IoBufferRef<'_> {}
+unsafe impl Send for IoBufferMut<'_> {}
+unsafe impl Sync for IoBufferMut<'_> {}
+
+impl IoBuffer {
+    /// Create a new owned buffer, containing uninitialized data.
+    ///
+    /// Do note that the returned buffer contains uninitialized data, which however is perfectly
+    /// fine for an I/O buffer.
+    pub fn new(size: usize, alignment: usize) -> io::Result<Self> {
+        let layout = alloc::Layout::from_size_align(size, alignment).map_err(io::Error::other)?;
+        Self::new_with_layout(layout)
+    }
+
+    /// Create a new owned buffer, containing uninitialized data, with the given `layout`.
+    pub fn new_with_layout(layout: alloc::Layout) -> io::Result<Self> {
+        if layout.size() == 0 {
+            return Ok(IoBuffer {
+                pointer: ptr::null_mut(),
+                size: 0,
+                layout: None,
+            });
+        }
+
+        // We guarantee the size not to be 0 and do not care about the memory being uninitialized,
+        // so this is safe
+        let pointer = unsafe { alloc::System.alloc(layout) };
+
+        if pointer.is_null() {
+            return Err(io::Error::new(
+                io::ErrorKind::OutOfMemory,
+                format!(
+                    "Failed to allocate memory (size={}, alignment={})",
+                    layout.size(),
+                    layout.align(),
+                ),
+            ));
+        }
+
+        Ok(IoBuffer {
+            pointer,
+            size: layout.size(),
+            layout: Some(layout),
+        })
+    }
+
+    /// Length in bytes.
+    pub fn len(&self) -> usize {
+        self.size
+    }
+
+    /// Whether this is a null buffer (length is 0).
+    pub fn is_empty(&self) -> bool {
+        self.size == 0
+    }
+
+    /// Generate an immutable reference.
+    pub fn as_ref(&self) -> IoBufferRef<'_> {
+        IoBufferRef {
+            pointer: self.pointer as *const u8,
+            size: self.size,
+            _lifetime: PhantomData,
+        }
+    }
+
+    /// Generate an immutable reference to a sub-range.
+    pub fn as_ref_range(&self, range: Range<usize>) -> IoBufferRef<'_> {
+        IoBufferRef::from_slice(&self.as_ref().into_slice()[range])
+    }
+
+    /// Generate a mutable reference.
+    pub fn as_mut(&mut self) -> IoBufferMut<'_> {
+        IoBufferMut {
+            pointer: self.pointer,
+            size: self.size,
+            _lifetime: PhantomData,
+        }
+    }
+
+    /// Generate a mutable reference to a sub-range.
+    pub fn as_mut_range(&mut self, range: Range<usize>) -> IoBufferMut<'_> {
+        (&mut self.as_mut().into_slice()[range]).into()
+    }
+}
+
+impl Drop for IoBuffer {
+    /// Free this buffer.
+    fn drop(&mut self) {
+        if let Some(layout) = self.layout {
+            // Safe because we have allocated this buffer using `alloc::System`
+            unsafe {
+                alloc::System.dealloc(self.pointer, layout);
+            }
+        }
+    }
+}
+
+/// Common functions for both `IoBufferRef` and `IoBufferMut`.
+pub trait IoBufferRefTrait<'a>: Sized {
+    /// `&[T]` or `&mut [T]`.
+    type SliceType<T: Copy + Sized + 'a>;
+
+    /// `*const T` or `*mut T`.
+    type PointerType<T: Copy + Sized + 'a>;
+
+    /// Create a reference to a slice.
+    fn from_slice(slice: Self::SliceType<u8>) -> Self;
+
+    /// Create an owned [`IoBuffer`] with the same data (copied).
+    fn try_into_owned(self, alignment: usize) -> io::Result<IoBuffer>;
+
+    /// Size in bytes.
+    fn len(&self) -> usize;
+
+    /// Whether the length is 0.
+    fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Return the pointer to the start of the buffer.
+    fn as_ptr(&self) -> Self::PointerType<u8>;
+
+    /// Turn this reference into a slice.
+    ///
+    /// References to `IoBuffer`s must not be copied/cloned (so they can only be accessed once;
+    /// they are considered volatile due to potential VM guest accesses), so this consumes the
+    /// object.
+    fn into_slice(self) -> Self::SliceType<u8> {
+        // Alignment requirement is always met, resulting data is pure binary data
+        unsafe { self.into_typed_slice::<u8>() }
+    }
+
+    /// Turn this reference into a slice with the given element type.
+    ///
+    /// # Safety
+    /// Caller must ensure that alignment and length requirements are met and that the resulting
+    /// data is valid.
+    unsafe fn into_typed_slice<T: Copy + Sized>(self) -> Self::SliceType<T>;
+
+    /// Split the buffer at `mid`.
+    ///
+    /// Return `&self[..mid]` and `&self[mid..]`.
+    ///
+    /// If `mid > self.len()`, return `&self[..]` and `[]`.
+    fn split_at(self, mid: usize) -> (Self, Self);
+
+    /// Make this reference immutable.
+    fn into_ref(self) -> IoBufferRef<'a>;
+}
+
+impl<'a> IoBufferRef<'a> {
+    /// Create a reference to a slice.
+    pub fn from_slice(slice: &'a [u8]) -> Self {
+        IoBufferRef {
+            pointer: slice.as_ptr(),
+            size: size_of_val(slice),
+            _lifetime: PhantomData,
+        }
+    }
+
+    /// Create an owned [`IoBuffer`] with the same data (copied).
+    pub fn try_into_owned(self, alignment: usize) -> io::Result<IoBuffer> {
+        let mut new_buf = IoBuffer::new(self.len(), alignment)?;
+        new_buf
+            .as_mut()
+            .into_slice()
+            .copy_from_slice(self.into_slice());
+        Ok(new_buf)
+    }
+
+    /// Size in bytes.
+    pub fn len(&self) -> usize {
+        self.size
+    }
+
+    /// Whether the length is 0.
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Return the pointer to the start of the buffer.
+    pub fn as_ptr(&self) -> *const u8 {
+        self.pointer
+    }
+
+    /// Turn this reference into a slice.
+    ///
+    /// References to `IoBuffer`s must not be copied/cloned (so they can only be accessed once;
+    /// they are considered volatile due to potential VM guest accesses), so this consumes the
+    /// object.
+    pub fn into_slice(self) -> &'a [u8] {
+        // Alignment requirement is always met, resulting data is pure binary data
+        unsafe { self.into_typed_slice::<u8>() }
+    }
+
+    /// Turn this reference into a slice with the given element type.
+    ///
+    /// # Safety
+    /// Caller must ensure that alignment and length requirements are met and that the resulting
+    /// data is valid.
+    pub unsafe fn into_typed_slice<T: Copy + Sized>(self) -> &'a [T] {
+        // Safety ensured by the caller; we ensure that nothing outside of this buffer will be part
+        // of the slice
+        unsafe { slice::from_raw_parts(self.as_ptr() as *const T, self.len() / size_of::<T>()) }
+    }
+
+    /// Split the buffer at `mid`.
+    ///
+    /// Return `&self[..mid]` and `&self[mid..]`.
+    ///
+    /// If `mid > self.len()`, return `&self[..]` and `[]`.
+    pub fn split_at(self, mid: usize) -> (IoBufferRef<'a>, IoBufferRef<'a>) {
+        let head_len = cmp::min(mid, self.size);
+
+        (
+            IoBufferRef {
+                pointer: self.pointer,
+                size: head_len,
+                _lifetime: PhantomData,
+            },
+            IoBufferRef {
+                // Safe because we have limited this to `self.size`
+                pointer: unsafe { self.pointer.add(head_len) },
+                size: self.size - head_len,
+                _lifetime: PhantomData,
+            },
+        )
+    }
+
+    /// Make this reference immutable.
+    pub fn into_ref(self) -> IoBufferRef<'a> {
+        self
+    }
+}
+
+impl<'a> IoBufferRefTrait<'a> for IoBufferRef<'a> {
+    type SliceType<T: Copy + Sized + 'a> = &'a [T];
+    type PointerType<T: Copy + Sized + 'a> = *const T;
+
+    passthrough_trait_fn! { fn from_slice(slice: Self::SliceType<u8>) -> Self; }
+    passthrough_trait_fn! { fn try_into_owned(self, alignment: usize) -> io::Result<IoBuffer>; }
+    passthrough_trait_fn! { fn len(&self) -> usize; }
+    passthrough_trait_fn! { fn as_ptr(&self) -> Self::PointerType<u8>; }
+    passthrough_trait_fn! { fn split_at(self, mid: usize) -> (Self, Self); }
+    passthrough_trait_fn! { fn into_ref(self) -> IoBufferRef<'a>; }
+
+    unsafe fn into_typed_slice<T: Copy + Sized>(self) -> Self::SliceType<T> {
+        Self::into_typed_slice(self)
+    }
+}
+
+impl<'a> From<IoSlice<'a>> for IoBufferRef<'a> {
+    fn from(slice: IoSlice<'a>) -> Self {
+        IoBufferRef {
+            pointer: slice.as_ptr(),
+            size: slice.len(),
+            _lifetime: PhantomData,
+        }
+    }
+}
+
+impl<'a> From<IoBufferRef<'a>> for IoSlice<'a> {
+    fn from(buf: IoBufferRef<'a>) -> Self {
+        IoSlice::new(buf.into_slice())
+    }
+}
+
+impl<'a> IoBufferMut<'a> {
+    /// Create a reference to a slice.
+    pub fn from_slice(slice: &'a mut [u8]) -> Self {
+        IoBufferMut {
+            pointer: slice.as_mut_ptr(),
+            size: size_of_val(slice),
+            _lifetime: PhantomData,
+        }
+    }
+
+    /// Create an owned [`IoBuffer`] with the same data (copied).
+    pub fn try_into_owned(self, alignment: usize) -> io::Result<IoBuffer> {
+        let mut new_buf = IoBuffer::new(self.len(), alignment)?;
+        new_buf
+            .as_mut()
+            .into_slice()
+            .copy_from_slice(self.into_slice());
+        Ok(new_buf)
+    }
+
+    /// Size in bytes.
+    pub fn len(&self) -> usize {
+        self.size
+    }
+
+    /// Whether the length is 0.
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Return the pointer to the start of the buffer.
+    pub fn as_ptr(&self) -> *mut u8 {
+        self.pointer
+    }
+
+    /// Turn this reference into a slice.
+    ///
+    /// References to `IoBuffer`s must not be copied/cloned (so they can only be accessed once;
+    /// they are considered volatile due to potential VM guest accesses), so this consumes the
+    /// object.
+    pub fn into_slice(self) -> &'a mut [u8] {
+        // Alignment requirement is always met, resulting data is pure binary data
+        unsafe { self.into_typed_slice::<u8>() }
+    }
+
+    /// Turn this reference into a slice with the given element type.
+    ///
+    /// # Safety
+    /// Caller must ensure that alignment and length requirements are met and that the resulting
+    /// data is valid.
+    pub unsafe fn into_typed_slice<T: Copy + Sized>(self) -> &'a mut [T] {
+        // Safety ensured by the caller; we ensure that nothing outside of this buffer will be part
+        // of the slice
+        unsafe { slice::from_raw_parts_mut(self.as_ptr() as *mut T, self.len() / size_of::<T>()) }
+    }
+
+    /// Split the buffer at `mid`.
+    ///
+    /// Return `&self[..mid]` and `&self[mid..]`.
+    ///
+    /// If `mid > self.len()`, return `&self[..]` and `[]`.
+    pub fn split_at(self, mid: usize) -> (IoBufferMut<'a>, IoBufferMut<'a>) {
+        let head_len = cmp::min(mid, self.size);
+
+        (
+            IoBufferMut {
+                pointer: self.pointer,
+                size: head_len,
+                _lifetime: PhantomData,
+            },
+            IoBufferMut {
+                // Safe because we have limited this to `self.size`
+                pointer: unsafe { self.pointer.add(head_len) },
+                size: self.size - head_len,
+                _lifetime: PhantomData,
+            },
+        )
+    }
+
+    /// Make this reference immutable.
+    pub fn into_ref(self) -> IoBufferRef<'a> {
+        IoBufferRef {
+            pointer: self.pointer,
+            size: self.size,
+            _lifetime: PhantomData,
+        }
+    }
+}
+
+impl<'a> IoBufferRefTrait<'a> for IoBufferMut<'a> {
+    type SliceType<T: Copy + Sized + 'a> = &'a mut [T];
+    type PointerType<T: Copy + Sized + 'a> = *mut T;
+
+    passthrough_trait_fn! { fn from_slice(slice: Self::SliceType<u8>) -> Self; }
+    passthrough_trait_fn! { fn try_into_owned(self, alignment: usize) -> io::Result<IoBuffer>; }
+    passthrough_trait_fn! { fn len(&self) -> usize; }
+    passthrough_trait_fn! { fn as_ptr(&self) -> Self::PointerType<u8>; }
+    passthrough_trait_fn! { fn split_at(self, mid: usize) -> (Self, Self); }
+    passthrough_trait_fn! { fn into_ref(self) -> IoBufferRef<'a>; }
+
+    unsafe fn into_typed_slice<T: Copy + Sized>(self) -> Self::SliceType<T> {
+        Self::into_typed_slice(self)
+    }
+}
+
+impl<'a, T: Sized> From<&'a mut [T]> for IoBufferMut<'a> {
+    fn from(slice: &'a mut [T]) -> Self {
+        IoBufferMut {
+            pointer: slice.as_mut_ptr() as *mut u8,
+            size: size_of_val(slice),
+            _lifetime: PhantomData,
+        }
+    }
+}
+
+impl<'a> From<IoSliceMut<'a>> for IoBufferMut<'a> {
+    fn from(mut slice: IoSliceMut<'a>) -> Self {
+        IoBufferMut {
+            pointer: slice.as_mut_ptr(),
+            size: slice.len(),
+            _lifetime: PhantomData,
+        }
+    }
+}
+
+impl<'a> From<IoBufferMut<'a>> for IoSliceMut<'a> {
+    fn from(buf: IoBufferMut<'a>) -> Self {
+        IoSliceMut::new(buf.into_slice())
+    }
+}
+
+/// Common functions for both `IoVector` and `IoVectorMut`.
+#[allow(dead_code)]
+pub(crate) trait IoVectorTrait: Sized {
+    /// `&[u8]` or `&mut [u8]`.
+    type SliceType;
+
+    /// `IoSlice` or `IoSliceMut`.
+    type BufferType;
+
+    /// Create an empty vector.
+    fn new() -> Self;
+
+    /// Create an empty vector, pre-allocating space for `cap` buffers.
+    ///
+    /// This does not allocate an memory buffer, only space in the buffer vector.
+    fn with_capacity(cap: usize) -> Self;
+
+    /// Append a slice.
+    fn push(&mut self, slice: Self::SliceType);
+
+    /// Append a slice.
+    fn push_ioslice(&mut self, ioslice: Self::BufferType);
+
+    /// Insert a slice at the given `index` in the buffer vector.
+    fn insert(&mut self, index: usize, slice: Self::SliceType);
+
+    /// Return the sum total length in bytes of all buffers in this vector.
+    fn len(&self) -> u64;
+
+    /// Return the number of buffers in this vector.
+    fn buffer_count(&self) -> usize;
+
+    /// Return `true` if and only if this vector’s length is zero.
+    ///
+    /// Synonymous with whether this vector’s buffer count is zero.
+    fn is_empty(&self) -> bool {
+        debug_assert!((self.len() == 0) == (self.buffer_count() == 0));
+        self.len() == 0
+    }
+
+    /// Append all buffers from the given other vector to this vector.
+    fn append(&mut self, other: Self);
+
+    /// Split the vector into two.
+    ///
+    /// The first returned vector contains the bytes in the `[..mid]` range, and the second one
+    /// covers the `[mid..]` range.
+    fn split_at(self, mid: u64) -> (Self, Self);
+
+    /// Like [`IoVectorTrait::split_at()`], but discards the head, only returning the tail.
+    ///
+    /// More efficient than to use `self.split_at(mid).1` because the former requires creating a
+    /// new `Vec` object for the head, which this version skips.
+    fn split_tail_at(self, mid: u64) -> Self;
+
+    /// Copy the data from `self` into `slice`.
+    ///
+    /// Both must have the same length.
+    fn copy_into_slice(&self, slice: &mut [u8]);
+
+    /// Create a single owned [`IoBuffer`] with the same data (copied).
+    fn try_into_owned(self, alignment: usize) -> io::Result<IoBuffer>;
+
+    /// Return a corresponding `&[libc::iovec]`.
+    ///
+    /// # Safety
+    /// `iovec` has no lifetime information.  Callers must ensure no elements in the returned slice
+    /// are used beyond the lifetime `'_`.
+    #[cfg(unix)]
+    unsafe fn as_iovec<'a>(&'a self) -> &'a [libc::iovec]
+    where
+        Self: 'a;
+
+    /// Check whether `self` is aligned.
+    ///
+    /// Each buffer must be aligned to `mem_alignment`, and each buffer’s length must be aligned to
+    /// both `mem_alignment` and `req_alignment` (the I/O request offset/size alignment).
+    fn is_aligned(&self, mem_alignment: usize, req_alignment: usize) -> bool;
+
+    /// Return the internal vector of `IoSlice` objects.
+    fn into_inner(self) -> Vec<Self::BufferType>;
+}
+
+/// Implement most of both `IoVector` and `IoVectorMut`.
+macro_rules! impl_io_vector {
+    ($type:tt, $inner_type:tt, $buffer_type:tt, $slice_type:ty, $slice_type_lifetime_b:ty) => {
+        /// Vector of memory buffers.
+        pub struct $type<'a> {
+            /// Buffer list.
+            vector: Vec<$inner_type<'a>>,
+
+            /// Complete size in bytes.
+            total_size: u64,
+        }
+
+        impl<'a> $type<'a> {
+            /// Create an empty vector.
+            pub fn new() -> Self {
+                Self::default()
+            }
+
+            /// Create an empty vector, pre-allocating space for `cap` buffers.
+            ///
+            /// This does not allocate an memory buffer, only space in the buffer vector.
+            pub fn with_capacity(cap: usize) -> Self {
+                $type {
+                    vector: Vec::with_capacity(cap),
+                    total_size: 0,
+                }
+            }
+
+            /// Append a slice.
+            pub fn push(&mut self, slice: $slice_type) {
+                debug_assert!(!slice.is_empty());
+                self.total_size += slice.len() as u64;
+                self.vector.push($inner_type::new(slice));
+            }
+
+            /// Append a slice.
+            pub fn push_ioslice(&mut self, ioslice: $inner_type<'a>) {
+                debug_assert!(!ioslice.is_empty());
+                self.total_size += ioslice.len() as u64;
+                self.vector.push(ioslice);
+            }
+
+            /// Insert a slice at the given `index` in the buffer vector.
+            pub fn insert(&mut self, index: usize, slice: $slice_type) {
+                debug_assert!(!slice.is_empty());
+                self.total_size += slice.len() as u64;
+                self.vector.insert(index, $inner_type::new(slice));
+            }
+
+            /// Return the sum total length in bytes of all buffers in this vector.
+            pub fn len(&self) -> u64 {
+                self.total_size
+            }
+
+            /// Return the number of buffers in this vector.
+            pub fn buffer_count(&self) -> usize {
+                self.vector.len()
+            }
+
+            /// Return `true` if and only if this vector’s length is zero.
+            ///
+            /// Synonymous with whether this vector’s buffer count is zero.
+            pub fn is_empty(&self) -> bool {
+                debug_assert!((self.len() == 0) == (self.buffer_count() == 0));
+                self.len() == 0
+            }
+
+            /// Append all buffers from the given other vector to this vector.
+            pub fn append(&mut self, mut other: Self) {
+                self.total_size += other.total_size;
+                self.vector.append(&mut other.vector);
+            }
+
+            /// Split the vector into two.
+            ///
+            /// The first returned vector contains the bytes in the `[..mid]` range, and the second
+            /// one covers the `[mid..]` range.
+            pub fn split_at(self, mid: u64) -> (Self, Self) {
+                let (head, tail) = self.do_split_at(mid, true);
+                (head.unwrap(), tail)
+            }
+
+            /// Like [`Self::split_at()`], but discards the head, only returning the tail.
+            ///
+            /// More efficient than to use `self.split_at(mid).1` because the former requires
+            /// creating a new `Vec` object for the head, which this version skips.
+            pub fn split_tail_at(self, mid: u64) -> Self {
+                self.do_split_at(mid, false).1
+            }
+
+            /// Copy the data from `self` into `slice`.
+            ///
+            /// Both must have the same length.
+            pub fn copy_into_slice(&self, slice: &mut [u8]) {
+                if slice.len() as u64 != self.total_size {
+                    panic!("IoVectorTrait::copy_into_slice() called on a slice of different length from the vector");
+                }
+
+                assert!(self.total_size <= usize::MAX as u64);
+
+                let mut offset = 0usize;
+                for elem in self.vector.iter() {
+                    let next_offset = offset + elem.len();
+                    slice[offset..next_offset].copy_from_slice(&elem[..]);
+                    offset = next_offset;
+                }
+            }
+
+            /// Create a single owned [`IoBuffer`] with the same data (copied).
+            pub fn try_into_owned(self, alignment: usize) -> io::Result<IoBuffer> {
+                let size = self.total_size.try_into().map_err(|_| {
+                    io::Error::other(format!("Buffer is too big ({})", self.total_size))
+                })?;
+                let mut new_buf = IoBuffer::new(size, alignment)?;
+                self.copy_into_slice(new_buf.as_mut().into_slice());
+                Ok(new_buf)
+            }
+
+            /// Return a corresponding `&[libc::iovec]`.
+            ///
+            /// # Safety
+            /// `iovec` has no lifetime information.  Callers must ensure no elements in the
+            /// returned slice are used beyond the lifetime `'_`.
+            #[cfg(unix)]
+            pub unsafe fn as_iovec<'b>(&'b self) -> &'b [libc::iovec] where Self: 'b {
+                // IoSlice and IoSliceMut are defined to have the same representation in memory as
+                // libc::iovec does
+                unsafe {
+                    mem::transmute::<&'b [$inner_type<'b>], &'b [libc::iovec]>(&self.vector[..])
+                }
+            }
+
+            /// Check whether `self` is aligned.
+            ///
+            /// Each buffer must be aligned to `mem_alignment`, and each buffer’s length must be
+            /// aligned to both `mem_alignment` and `req_alignment` (the I/O request offset/size
+            /// alignment).
+            pub fn is_aligned(&self, mem_alignment: usize, req_alignment: usize) -> bool {
+                // Trivial case
+                if mem_alignment == 1 && req_alignment == 1 {
+                    return true;
+                }
+
+                debug_assert!(mem_alignment.is_power_of_two() && req_alignment.is_power_of_two());
+                let base_align_mask = mem_alignment - 1;
+                let len_align_mask = base_align_mask | (req_alignment - 1);
+
+                self.vector.iter().all(|buf| {
+                    buf.as_ptr() as usize & base_align_mask == 0 &&
+                        buf.len() & len_align_mask == 0
+                })
+            }
+
+            /// Return the internal vector of `IoSlice` objects.
+            pub fn into_inner(self) -> Vec<$inner_type<'a>> {
+                self.vector
+            }
+
+            /// Same as [`Self::push()`], but takes ownership of `self`.
+            ///
+            /// By taking ownership of `self` and returning it, this method allows reducing the
+            /// lifetime of `self` to that of `slice`, if necessary.
+            pub fn with_pushed<'b>(self, slice: $slice_type_lifetime_b) -> $type<'b>
+            where
+                'a: 'b,
+            {
+                let mut vec: $type<'b> = self;
+                vec.push(slice);
+                vec
+            }
+
+            /// Same as [`Self::insert()`], but takes ownership of `self.`
+            ///
+            /// By taking ownership of `self` and returning it, this method allows reducing the
+            /// lifetime of `self` to that of `slice`, if necessary.
+            pub fn with_inserted<'b>(self, index: usize, slice: $slice_type_lifetime_b) -> $type<'b>
+            where
+                'a: 'b,
+            {
+                let mut vec: $type<'b> = self;
+                vec.insert(index, slice);
+                vec
+            }
+
+            /// Implementation for [`Self::split_at()`] and [`Self::split_tail_at()`].
+            ///
+            /// If `keep_head` is true, both head and tail are returned ([`Self::split_at()`]).
+            /// Otherwise, the head is discarded ([`Self::split_tail_at()`]).
+            fn do_split_at(mut self, mid: u64, keep_head: bool) -> (Option<$type<'a>>, $type<'a>) {
+                if mid >= self.total_size {
+                    // Special case: Empty tail
+                    return (
+                        keep_head.then_some(self),
+                        $type {
+                            vector: Vec::new(),
+                            total_size: 0,
+                        },
+                    );
+                }
+
+                let mut i = 0; // Current element index
+                let mut offset = 0u64; // Current element offset
+                let (vec_head, vec_tail) = loop {
+                    if offset == mid {
+                        // Clean split: `i` is fully behind `mid`, the rest is fully ahead
+                        if keep_head {
+                            let mut vec_head = self.vector;
+                            let vec_tail = vec_head.split_off(i);
+                            break (Some(vec_head), vec_tail);
+                        } else {
+                            break (None, self.vector.split_off(i));
+                        }
+                    }
+
+                    let post_elm_offset = offset + self.vector[i].len() as u64;
+
+                    if post_elm_offset > mid {
+                        // Not so clean split: The beginning of this element was before `mid`, the end is
+                        // behind it, so we must split this element between head and tail
+                        let mut vec_head = self.vector;
+                        let mut tail_iter = vec_head.drain(i..);
+
+                        // This is the current element (at `i`), which must be present
+                        let mid_elm = tail_iter.next().unwrap();
+                        let mid_elm: $buffer_type<'a> = mid_elm.into();
+
+                        // Each element's length is of type usize, so this must fit into usize
+                        let mid_elm_head_len: usize = (mid - offset).try_into().unwrap();
+                        let (mid_head, mid_tail) = mid_elm.split_at(mid_elm_head_len);
+
+                        let mut vec_tail: Vec<$inner_type<'a>> = vec![mid_tail.into()];
+                        vec_tail.extend(tail_iter);
+
+                        if keep_head {
+                            vec_head.push(mid_head.into());
+                            break (Some(vec_head), vec_tail);
+                        } else {
+                            break (None, vec_tail);
+                        }
+                    }
+
+                    offset = post_elm_offset;
+
+                    i += 1;
+                    // We know that `mid < self.total_size`, so we must encounter `mid before the end of
+                    // the vector
+                    assert!(i < self.vector.len());
+                };
+
+                let head = keep_head.then(|| $type {
+                    vector: vec_head.unwrap(),
+                    total_size: mid,
+                });
+                let tail = $type {
+                    vector: vec_tail,
+                    total_size: self.total_size - mid,
+                };
+
+                (head, tail)
+            }
+        }
+
+        impl<'a> IoVectorTrait for $type<'a> {
+            type SliceType = $slice_type;
+            type BufferType = $inner_type<'a>;
+
+            passthrough_trait_fn! { fn new() -> Self; }
+            passthrough_trait_fn! { fn with_capacity(cap: usize) -> Self; }
+            passthrough_trait_fn! { fn push(&mut self, slice: Self::SliceType); }
+            passthrough_trait_fn! { fn push_ioslice(&mut self, ioslice: Self::BufferType); }
+            passthrough_trait_fn! { fn insert(&mut self, index: usize, slice: Self::SliceType); }
+            passthrough_trait_fn! { fn len(&self) -> u64; }
+            passthrough_trait_fn! { fn buffer_count(&self) -> usize; }
+            passthrough_trait_fn! { fn append(&mut self, other: Self); }
+            passthrough_trait_fn! { fn split_at(self, mid: u64) -> (Self, Self); }
+            passthrough_trait_fn! { fn split_tail_at(self, mid: u64) -> Self; }
+            passthrough_trait_fn! { fn copy_into_slice(&self, slice: &mut [u8]); }
+            passthrough_trait_fn! { fn try_into_owned(self, alignment: usize) -> io::Result<IoBuffer>; }
+            passthrough_trait_fn! { fn is_aligned(&self, mem_alignment: usize, req_alignment: usize) -> bool; }
+            passthrough_trait_fn! { fn into_inner(self) -> Vec<Self::BufferType>; }
+
+            #[cfg(unix)]
+            unsafe fn as_iovec<'b>(&'b self) -> &'b [libc::iovec]
+            where
+                Self: 'b
+            {
+                Self::as_iovec(self)
+            }
+        }
+
+        impl<'a> From<Vec<$inner_type<'a>>> for $type<'a> {
+            fn from(vector: Vec<$inner_type<'a>>) -> Self {
+                let total_size = vector
+                    .iter()
+                    .map(|e| e.len())
+                    .fold(0u64, |sum, e| sum + e as u64);
+
+                $type { vector, total_size }
+            }
+        }
+
+        impl<'a> From<$buffer_type<'a>> for $type<'a> {
+            fn from(buffer: $buffer_type<'a>) -> Self {
+                let total_size = buffer.len() as u64;
+                if total_size > 0 {
+                    $type {
+                        vector: vec![buffer.into()],
+                        total_size,
+                    }
+                } else {
+                    $type {
+                        vector: Vec::new(),
+                        total_size: 0,
+                    }
+                }
+            }
+        }
+
+        impl<'a> From<$slice_type> for $type<'a> {
+            fn from(slice: $slice_type) -> Self {
+                let total_size = slice.len() as u64;
+                if total_size > 0 {
+                    $type {
+                        vector: vec![$inner_type::new(slice)],
+                        total_size,
+                    }
+                } else {
+                    $type {
+                        vector: Vec::new(),
+                        total_size: 0,
+                    }
+                }
+            }
+        }
+
+        impl<'a> Default for $type<'a> {
+            fn default() -> Self {
+                $type {
+                    vector: Vec::new(),
+                    total_size: 0,
+                }
+            }
+        }
+
+        impl Debug for $type<'_> {
+            fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+                f.debug_struct(std::stringify!($type))
+                    .field("vector.len()", &self.vector.len())
+                    .field("total_size", &self.total_size)
+                    .finish()
+            }
+        }
+    };
+}
+
+impl_io_vector!(IoVector, IoSlice, IoBufferRef, &'a [u8], &'b [u8]);
+impl_io_vector!(
+    IoVectorMut,
+    IoSliceMut,
+    IoBufferMut,
+    &'a mut [u8],
+    &'b mut [u8]
+);
+
+#[cfg(feature = "vm-memory")]
+impl<'a> IoVector<'a> {
+    /// Converts a `VolatileSlice` array (from vm-memory) into an `IoVector`.
+    ///
+    /// In addition to a the vector, return a guard that ensures that the memory in `slices` is
+    /// indeed mapped while in use.  This guard must not be dropped while this vector is in use!
+    pub fn from_volatile_slice<
+        B: vm_memory::bitmap::BitmapSlice,
+        I: IntoIterator<
+            Item: ImagoAsRef<'a, vm_memory::VolatileSlice<'a, B>>,
+            IntoIter: ExactSizeIterator,
+        >,
+    >(
+        slices: I,
+    ) -> (
+        Self,
+        VolatileSliceGuard<'a, vm_memory::volatile_memory::PtrGuard, B>,
+    ) {
+        let ptr_guards = slices
+            .into_iter()
+            .map(|slice| slice.as_ref().ptr_guard())
+            .collect::<Vec<_>>();
+        let buffers = ptr_guards
+            .iter()
+            .map(|pg| {
+                // Safe because this whole module basically exists to follow the same design concepts
+                // as `VolatileSlice`.
+                let slice = unsafe { std::slice::from_raw_parts(pg.as_ptr(), pg.len()) };
+                IoSlice::new(slice)
+            })
+            .collect::<Vec<_>>();
+
+        let vector = IoVector::from(buffers);
+        let guard = VolatileSliceGuard {
+            _ptr_guards: ptr_guards,
+            // `IoVector` is immutable, so no need to dirty
+            dirty_on_drop: None,
+        };
+
+        (vector, guard)
+    }
+}
+
+impl IoVectorMut<'_> {
+    /// Fill all buffers in the vector with the given byte pattern.
+    pub fn fill(&mut self, value: u8) {
+        for slice in self.vector.iter_mut() {
+            slice.fill(value);
+        }
+    }
+
+    /// Copy data from `slice` into the buffers in this vector.
+    ///
+    /// The vector and the slice must have the same total length.
+    pub fn copy_from_slice(&mut self, slice: &[u8]) {
+        if slice.len() as u64 != self.total_size {
+            panic!("IoVectorMut::copy_from_slice() called on a slice of different length from the vector");
+        }
+
+        assert!(self.total_size <= usize::MAX as u64);
+
+        let mut offset = 0usize;
+        for elem in self.vector.iter_mut() {
+            let next_offset = offset + elem.len();
+            elem.copy_from_slice(&slice[offset..next_offset]);
+            offset = next_offset;
+        }
+    }
+}
+
+#[cfg(feature = "vm-memory")]
+impl<'a> IoVectorMut<'a> {
+    /// Converts a `VolatileSlice` array (from vm-memory) into an `IoVectorMut`.
+    ///
+    /// In addition to a the vector, return a guard that ensures that the memory in `slices` is
+    /// indeed mapped while in use.  This guard must not be dropped while this vector is in use!
+    pub fn from_volatile_slice<
+        B: vm_memory::bitmap::BitmapSlice,
+        I: IntoIterator<
+            Item: ImagoAsRef<'a, vm_memory::VolatileSlice<'a, B>>,
+            IntoIter: ExactSizeIterator,
+        >,
+    >(
+        slices: I,
+    ) -> (
+        Self,
+        VolatileSliceGuard<'a, vm_memory::volatile_memory::PtrGuardMut, B>,
+    ) {
+        let slices = slices.into_iter();
+        let slice_count = slices.len();
+        let mut ptr_guards = Vec::with_capacity(slice_count);
+        let mut dirty_on_drop = Vec::with_capacity(slice_count);
+
+        for slice in slices {
+            let slice = slice.as_ref();
+            ptr_guards.push(slice.ptr_guard_mut());
+            // `IoVector` is mutable, so we can assume it will all be written
+            dirty_on_drop.push((slice.bitmap(), slice.len()));
+        }
+
+        let buffers = ptr_guards
+            .iter()
+            .map(|pg| {
+                // Safe because this whole module basically exists to follow the same design concepts
+                // as `VolatileSlice`.
+                let slice = unsafe { std::slice::from_raw_parts_mut(pg.as_ptr(), pg.len()) };
+                IoSliceMut::new(slice)
+            })
+            .collect::<Vec<_>>();
+
+        let vector = IoVectorMut::from(buffers);
+        let guard = VolatileSliceGuard {
+            _ptr_guards: ptr_guards,
+            dirty_on_drop: Some(dirty_on_drop),
+        };
+
+        (vector, guard)
+    }
+}
+
+impl<'a> From<&'a Vec<u8>> for IoVector<'a> {
+    fn from(vec: &'a Vec<u8>) -> Self {
+        vec.as_slice().into()
+    }
+}
+
+impl<'a> From<&'a IoBuffer> for IoVector<'a> {
+    fn from(buf: &'a IoBuffer) -> Self {
+        buf.as_ref().into_slice().into()
+    }
+}
+
+impl<'a> From<&'a mut Vec<u8>> for IoVectorMut<'a> {
+    fn from(vec: &'a mut Vec<u8>) -> Self {
+        vec.as_mut_slice().into()
+    }
+}
+
+impl<'a> From<&'a mut IoBuffer> for IoVectorMut<'a> {
+    fn from(buf: &'a mut IoBuffer) -> Self {
+        buf.as_mut().into_slice().into()
+    }
+}
+
+/// Ensures an I/O vector’s validity when created from `[VolatileSlice]`.
+///
+/// `[VolatileSlice]` arrays may require being explicitly mapped before use (and unmapped after),
+/// and this guard ensures that the memory is mapped until it is dropped.
+///
+/// Further, for mutable vectors ([`IoVectorMut`]), it will also dirty the corresponding bitmap
+/// slices when dropped, assuming the whole vector has been written.
+#[cfg(feature = "vm-memory")]
+pub struct VolatileSliceGuard<'a, PtrGuardType, BitmapType: vm_memory::bitmap::Bitmap> {
+    /// vm-memory’s pointer guards ensuring the memory remains mapped while used.
+    _ptr_guards: Vec<PtrGuardType>,
+
+    /// If given, mark the given dirty bitmap range as dirty when dropping this guard.
+    ///
+    /// `.1` is the length of the respective `VolatileSlice` (i.e. the length of the area to
+    /// dirty).
+    dirty_on_drop: Option<Vec<(&'a BitmapType, usize)>>,
+}
+
+#[cfg(feature = "vm-memory")]
+impl<P, B: vm_memory::bitmap::Bitmap> Drop for VolatileSliceGuard<'_, P, B> {
+    fn drop(&mut self) {
+        if let Some(dirty_on_drop) = self.dirty_on_drop.take() {
+            for (bitmap, len) in dirty_on_drop {
+                // Every bitmap is a window into the full bitmap for its specific `VolatileSlice`,
+                // so marking the whole thing is dirty is correct.
+                bitmap.mark_dirty(0, len);
+            }
+        }
+    }
+}
+
+#[cfg(all(test, feature = "vm-memory"))]
+mod vm_memory_test {
+    use crate::io_buffers::{IoVector, IoVectorMut};
+    use vm_memory::bitmap::BitmapSlice;
+    use vm_memory::VolatileSlice;
+
+    pub fn do_test_volatile_slice_owned<B: BitmapSlice>(slices: &[VolatileSlice<B>]) {
+        {
+            let _vec = IoVector::from_volatile_slice(slices);
+        }
+        {
+            let _vec = IoVectorMut::from_volatile_slice(slices);
+        }
+    }
+
+    #[test]
+    fn test_volatile_slice_owned() {
+        let empty: Vec<VolatileSlice<()>> = Vec::new();
+        do_test_volatile_slice_owned(&empty);
+    }
+
+    pub fn do_test_volatile_slice_ref<B: BitmapSlice>(slices: &[&VolatileSlice<B>]) {
+        {
+            let _vec = IoVector::from_volatile_slice(slices);
+        }
+        {
+            let _vec = IoVectorMut::from_volatile_slice(slices);
+        }
+    }
+
+    #[test]
+    fn test_volatile_slice_ref() {
+        let empty: Vec<&vm_memory::VolatileSlice<()>> = Vec::new();
+        do_test_volatile_slice_ref(&empty);
+    }
+}
diff --git a/src/imago/src/lib.rs b/src/imago/src/lib.rs
new file mode 100644
index 00000000..bd6d9f11
--- /dev/null
+++ b/src/imago/src/lib.rs
@@ -0,0 +1,106 @@
+// #![feature(async_drop)] -- enable with async-drop
+#![cfg_attr(all(doc, nightly), feature(doc_auto_cfg))] // expect nightly for doc
+#![warn(missing_docs)]
+#![warn(clippy::missing_docs_in_private_items)]
+
+//! Provides access to VM image formats.
+//!
+//! Simple example (requires the `sync-wrappers` feature):
+//! ```no_run
+//! # #[cfg(feature = "sync-wrappers")]
+//! # || -> std::io::Result<()> {
+//! use imago::file::File;
+//! use imago::qcow2::Qcow2;
+//! use imago::SyncFormatAccess;
+//! use std::fs::OpenOptions;
+//!
+//! // Produce read-only qcow2 instance using purely `File` for storage
+//! let mut qcow2 = Qcow2::<File>::open_path_sync("image.qcow2", false)?;
+//! qcow2.open_implicit_dependencies_sync()?;
+//!
+//! let qcow2 = SyncFormatAccess::new(qcow2)?;
+//!
+//! let mut buf = vec![0u8; 512];
+//! qcow2.read(&mut buf, 0)?;
+//! # Ok::<(), std::io::Error>(())
+//! # };
+//! ```
+//!
+//! Another example, using the native async interface instead of sync wrapper functions, explicitly
+//! overriding the implicit references contained in qcow2 files, and showcasing using different
+//! types of storage (specifically normal files and null storage):
+//! ```no_run
+//! # let _ = async {
+//! use imago::file::File;
+//! use imago::null::Null;
+//! use imago::qcow2::Qcow2;
+//! use imago::raw::Raw;
+//! use imago::{DynStorage, FormatAccess, Storage, StorageOpenOptions};
+//! use std::sync::Arc;
+//!
+//! let qcow2_file_opts = StorageOpenOptions::new()
+//!     .write(true)
+//!     .filename(String::from("image.qcow2"));
+//! let qcow2_file = File::open(qcow2_file_opts).await?;
+//!
+//! // Produce qcow2 instance with arbitrary (and potentially mixed) storage instances
+//! let mut qcow2 =
+//!     Qcow2::<Box<dyn DynStorage>, Arc<FormatAccess<_>>>::open_image(Box::new(qcow2_file), true)
+//!         .await?;
+//!
+//! let backing_storage: Box<dyn DynStorage> = Box::new(Null::new(0));
+//! let backing = Raw::open_image(backing_storage, false).await?;
+//! let backing = Arc::new(FormatAccess::new(backing));
+//! qcow2.set_backing(Some(Arc::clone(&backing)));
+//!
+//! // Open potentially remaining dependencies (like an external data file)
+//! qcow2.open_implicit_dependencies().await?;
+//!
+//! let qcow2 = FormatAccess::new(qcow2);
+//!
+//! let mut buf = vec![0u8; 512];
+//! qcow2.read(&mut buf, 0).await?;
+//!
+//! qcow2.flush().await?;
+//! # Ok::<(), std::io::Error>(())
+//! # };
+//! ```
+//!
+//! # Flushing
+//!
+//! Given that `AsyncDrop` is not stable yet (and probably will not be stable for a long time),
+//! callers must ensure that images are properly flushed before dropping them, i.e. call
+//! `.flush().await` on any image that is not read-only.
+//!
+//! (The synchronous wrapper [`SyncFormatAccess`] does perform a synchronous flush in its `Drop`
+//! implementation.)
+//!
+//! # Features
+//!
+//! - `sync-wrappers`: Provide synchronous wrappers for the native `async` interface.  Note that
+//!   these build a `tokio` runtime in which they run the `async` functions, so using the `async`
+//!   interface is definitely preferred.
+//!
+//! - `vm-memory`: Provide conversion functions
+//!   [`IoVector::from_volatile_slice`](io_buffers::IoVector::from_volatile_slice) and
+//!   [`IoVectorMut::from_volatile_slice`](io_buffers::IoVectorMut::from_volatile_slice) to convert
+//!   the vm-memory crate’s `[VolatileSlice]` arrays into imago’s native I/O vectors.
+
+pub mod annotated;
+mod async_lru_cache;
+pub mod file;
+pub mod format;
+pub mod io_buffers;
+mod macros;
+mod misc_helpers;
+pub mod null;
+pub mod qcow2;
+pub mod raw;
+pub mod storage;
+mod vector_select;
+
+pub use format::access::*;
+#[cfg(feature = "sync-wrappers")]
+pub use format::sync_wrappers::*;
+pub use storage::ext::StorageExt;
+pub use storage::*;
diff --git a/src/imago/src/macros.rs b/src/imago/src/macros.rs
new file mode 100644
index 00000000..4c31c0b3
--- /dev/null
+++ b/src/imago/src/macros.rs
@@ -0,0 +1,85 @@
+//! Helper macros.
+
+/// Implements `TryFrom` for enums from their numerical representation.
+macro_rules! numerical_enum {
+    (
+        $(#[$attr:meta])*
+        $vis:vis enum $enum_name:ident as $repr:tt {
+            $(
+                $(#[$id_attr:meta])*
+                $identifier:ident = $value:expr,
+            )+
+        }
+    ) => {
+        $(#[$attr])*
+        #[derive(Copy, Clone, Debug, Eq, PartialEq)]
+        #[repr($repr)]
+        $vis enum $enum_name {
+            $(
+                $(#[$id_attr])*
+                $identifier = $value,
+            )+
+        }
+
+        impl TryFrom<$repr> for $enum_name {
+            type Error = std::io::Error;
+
+            fn try_from(val: $repr) -> std::io::Result<Self> {
+                match val {
+                    $(x if x == $value => Ok($enum_name::$identifier),)*
+                    _ => Err(std::io::Error::new(
+                            std::io::ErrorKind::InvalidData,
+                            format!(
+                                "Invalid value for {}: {:x}",
+                                stringify!($enum_name),
+                                val,
+                            ),
+                    )),
+                }
+            }
+        }
+    }
+}
+
+pub(crate) use numerical_enum;
+
+/// Implements a function as itself.
+///
+/// For traits that generalize interfaces that duplicate what we have on the struct itself, too.
+/// For example, we want to have `IoVectorTrait`, but not export it; requiring users to import that
+/// trait just for `.len()` is silly.  So `.len()` is implemented directly on both `IoVector` and
+/// `IoVectorMut` -- still, we want to have a generic `IoVectorTrait::len()`, too.  This is what
+/// this macro implements.
+macro_rules! passthrough_trait_fn {
+    { fn $name:ident($($param:ident: $type:ty),*) -> $ret:ty; } => {
+        fn $name($($param: $type),*) -> $ret {
+            Self::$name($($param),*)
+        }
+    };
+
+    { fn $name:ident(self$(, $param:ident: $type:ty)*) -> $ret:ty; } => {
+        passthrough_trait_fn! { fn $name(self: Self$(, $param: $type)*) -> $ret; }
+    };
+
+    { fn $name:ident(&self$(, $param:ident: $type:ty)*) -> $ret:ty; } => {
+        passthrough_trait_fn! { fn $name(self: &Self$(, $param: $type)*) -> $ret; }
+    };
+
+    { fn $name:ident(&mut self$(, $param:ident: $type:ty)*) -> $ret:ty; } => {
+        passthrough_trait_fn! { fn $name(self: &mut Self$(, $param: $type)*) -> $ret; }
+    };
+
+    { fn $name:ident(self$(, $param:ident: $type:ty)*); } => {
+        passthrough_trait_fn! { fn $name(self$(, $param: $type)*) -> (); }
+    };
+
+    { fn $name:ident(&self$(, $param:ident: $type:ty)*); } => {
+        passthrough_trait_fn! { fn $name(&self$(, $param: $type)*) -> (); }
+    };
+
+    { fn $name:ident(&mut self$(, $param:ident: $type:ty)*); } => {
+        passthrough_trait_fn! { fn $name(&mut self$(, $param: $type)*) -> (); }
+    };
+}
+
+pub(crate) use passthrough_trait_fn;
diff --git a/src/imago/src/misc_helpers.rs b/src/imago/src/misc_helpers.rs
new file mode 100644
index 00000000..22fe3d93
--- /dev/null
+++ b/src/imago/src/misc_helpers.rs
@@ -0,0 +1,81 @@
+//! Miscellaneous helper functions.
+
+use std::io;
+use std::ops::Range;
+
+/// Checks whether something overlaps with something else.
+pub(crate) trait Overlaps {
+    /// Does this overlap with `other`?
+    fn overlaps(&self, other: &Self) -> bool;
+}
+
+impl<I: Ord> Overlaps for Range<I> {
+    fn overlaps(&self, other: &Self) -> bool {
+        self.start < other.end && other.start < self.end
+    }
+}
+
+/// Prepend `Error` messages by context.
+///
+/// Trait for `Error` objects that allows prepending their error messages by something that gives
+/// context.
+pub(crate) trait ErrorContext {
+    /// Prepend the error by `context`.
+    fn context<C: std::fmt::Display>(self, context: C) -> Self;
+}
+
+impl ErrorContext for io::Error {
+    fn context<C: std::fmt::Display>(self, context: C) -> Self {
+        io::Error::new(self.kind(), format!("{context}: {self}"))
+    }
+}
+
+/// Give results context in case of error.
+///
+/// Lifts the `ErrorContext` trait to `Result` types.
+pub(crate) trait ResultErrorContext {
+    /// Give context if `self` is an error.
+    ///
+    /// If `self` is an error, prepend the given `context`.
+    fn err_context<C: std::fmt::Display, F: FnOnce() -> C>(self, context: F) -> Self;
+}
+
+impl<V, E: ErrorContext> ResultErrorContext for Result<V, E> {
+    fn err_context<C: std::fmt::Display, F: FnOnce() -> C>(self, context: F) -> Self {
+        self.map_err(|err| err.context(context()))
+    }
+}
+
+/// Similar to `AsRef`, but for types where `AsRef` is not implemented.
+///
+/// When we need `AsRef` for a type but it is not implemented in its origin crate, there is no way
+/// but to provide a local trait that we can implement here.  Because there are no negative trait
+/// bounds, we cannot implement this for `AsRef` (to have a common trait).
+///
+/// Also includes a lifetime so that it is possible to borrow things for longer.
+pub trait ImagoAsRef<'a, T: ?Sized> {
+    /// Return a simple reference for `self`.
+    fn as_ref(&self) -> &'a T;
+}
+
+impl<'a, T: ?Sized, U: ImagoAsRef<'a, T>> ImagoAsRef<'a, T> for &'a U {
+    fn as_ref(&self) -> &'a T {
+        <U as ImagoAsRef<T>>::as_ref(self)
+    }
+}
+
+#[cfg(feature = "vm-memory")]
+impl<'a, B: vm_memory::bitmap::BitmapSlice> ImagoAsRef<'a, vm_memory::VolatileSlice<'a, B>>
+    for &'a vm_memory::VolatileSlice<'a, B>
+{
+    fn as_ref(&self) -> &'a vm_memory::VolatileSlice<'a, B> {
+        self
+    }
+}
+
+/// Generate an `io::Error` of kind `InvalidData`.
+pub(crate) fn invalid_data<E: Into<Box<dyn std::error::Error + Send + Sync>>>(
+    error: E,
+) -> io::Error {
+    io::Error::new(io::ErrorKind::InvalidData, error)
+}
diff --git a/src/imago/src/null.rs b/src/imago/src/null.rs
new file mode 100644
index 00000000..d6fa379c
--- /dev/null
+++ b/src/imago/src/null.rs
@@ -0,0 +1,82 @@
+//! Null storage.
+//!
+//! Discard all written data, and return zeroes when read.
+
+use crate::io_buffers::{IoVector, IoVectorMut};
+use crate::storage::drivers::CommonStorageHelper;
+use crate::Storage;
+use std::fmt::{self, Display, Formatter};
+use std::io;
+use std::sync::atomic::{AtomicU64, Ordering};
+
+/// Null storage object.
+///
+/// Reading from this will always return zeroes, writing to it does nothing (except to potentially
+/// grow its virtual “file length”).
+#[derive(Debug)]
+pub struct Null {
+    /// Virtual “file length”.
+    size: AtomicU64,
+
+    /// Storage helper.
+    common_storage_helper: CommonStorageHelper,
+}
+
+impl Null {
+    /// Create a new null storage object with the given initial virtual size.
+    pub fn new(size: u64) -> Self {
+        Null {
+            size: size.into(),
+            common_storage_helper: Default::default(),
+        }
+    }
+}
+
+impl Storage for Null {
+    fn size(&self) -> io::Result<u64> {
+        Ok(self.size.load(Ordering::Relaxed))
+    }
+
+    async unsafe fn pure_readv(&self, mut bufv: IoVectorMut<'_>, _offset: u64) -> io::Result<()> {
+        bufv.fill(0);
+        Ok(())
+    }
+
+    async unsafe fn pure_writev(&self, bufv: IoVector<'_>, offset: u64) -> io::Result<()> {
+        let Some(end) = offset.checked_add(bufv.len()) else {
+            return Err(io::Error::other("Write too long"));
+        };
+
+        self.size.fetch_max(end, Ordering::Relaxed);
+        Ok(())
+    }
+
+    async unsafe fn pure_write_zeroes(&self, offset: u64, length: u64) -> io::Result<()> {
+        let Some(end) = offset.checked_add(length) else {
+            return Err(io::Error::other("Write too long"));
+        };
+
+        self.size.fetch_max(end, Ordering::Relaxed);
+        Ok(())
+    }
+
+    async fn flush(&self) -> io::Result<()> {
+        // Nothing to do, there are no buffers
+        Ok(())
+    }
+
+    async fn sync(&self) -> io::Result<()> {
+        // Nothing to do, there is no hardware
+        Ok(())
+    }
+
+    fn get_storage_helper(&self) -> &CommonStorageHelper {
+        &self.common_storage_helper
+    }
+}
+
+impl Display for Null {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        write!(f, "null:[{}B]", self.size.load(Ordering::Relaxed))
+    }
+}
diff --git a/src/imago/src/qcow2/allocation.rs b/src/imago/src/qcow2/allocation.rs
new file mode 100644
index 00000000..6c08716b
--- /dev/null
+++ b/src/imago/src/qcow2/allocation.rs
@@ -0,0 +1,534 @@
+//! Cluster allocation.
+//!
+//! Functionality for allocating single clusters and ranges of clusters, and general handling of
+//! refcount structures.
+
+use super::cache::RefBlockCacheBackend;
+use super::*;
+use std::mem;
+use tokio::sync::MutexGuard;
+use tracing::{event, warn, Level};
+
+/// Central facility for cluster allocation.
+pub(super) struct Allocator<S: Storage> {
+    /// Qcow2 metadata file.
+    file: Arc<S>,
+
+    /// Qcow2 refcount table.
+    reftable: RefTable,
+
+    /// The first free cluster index in the qcow2 file, to speed up allocation.
+    first_free_cluster: HostCluster,
+
+    /// Qcow2 image header.
+    header: Arc<Header>,
+
+    /// Refblock cache.
+    rb_cache: AsyncLruCache<HostCluster, RefBlock, RefBlockCacheBackend<S>>,
+}
+
+impl<S: Storage + 'static, F: WrappedFormat<S> + 'static> Qcow2<S, F> {
+    /// Return the central allocator instance.
+    ///
+    /// Returns an error for read-only images.
+    async fn allocator(&self) -> io::Result<MutexGuard<'_, Allocator<S>>> {
+        Ok(self
+            .allocator
+            .as_ref()
+            .ok_or_else(|| io::Error::other("Image is read-only"))?
+            .lock()
+            .await)
+    }
+
+    /// Allocate one metadata cluster.
+    ///
+    /// Metadata clusters are allocated exclusively in the metadata (image) file.
+    pub(super) async fn allocate_meta_cluster(&self) -> io::Result<HostCluster> {
+        self.allocate_meta_clusters(ClusterCount(1)).await
+    }
+
+    /// Allocate multiple continuous metadata clusters.
+    ///
+    /// Useful e.g. for the L1 table or refcount table.
+    pub(super) async fn allocate_meta_clusters(
+        &self,
+        count: ClusterCount,
+    ) -> io::Result<HostCluster> {
+        self.allocator().await?.allocate_clusters(count, None).await
+    }
+
+    /// Allocate one data clusters for the given guest cluster.
+    ///
+    /// Without an external data file, data clusters are allocated in the image file, just like
+    /// metadata clusters.
+    ///
+    /// With an external data file, data clusters aren’t really allocated, but just put there at
+    /// the same offset as their guest offset.  Their refcount is not tracked by the qcow2 metadata
+    /// structures (which only cover the metadata (image) file).
+    pub(super) async fn allocate_data_cluster(
+        &self,
+        guest_cluster: GuestCluster,
+    ) -> io::Result<HostCluster> {
+        if self.header.external_data_file() {
+            Ok(HostCluster(guest_cluster.0))
+        } else {
+            let mut allocator = self.allocator().await?;
+
+            // Allocate clusters before setting up L2 entries
+            self.l2_cache.depend_on(&allocator.rb_cache).await?;
+
+            allocator.allocate_clusters(ClusterCount(1), None).await
+        }
+    }
+
+    /// Allocate the data cluster with the given index.
+    ///
+    /// Without a `mandatory_host_cluster` given, this is the same as
+    /// [`Qcow2::allocate_data_cluster()`].
+    ///
+    /// With a `mandatory_host_cluster` given, try to allocate that cluster.  If that is not
+    /// possible because it is already allocated, return `Ok(None)`.
+    pub(super) async fn allocate_data_cluster_at(
+        &self,
+        guest_cluster: GuestCluster,
+        mandatory_host_cluster: Option<HostCluster>,
+    ) -> io::Result<Option<HostCluster>> {
+        let Some(mandatory_host_cluster) = mandatory_host_cluster else {
+            return self.allocate_data_cluster(guest_cluster).await.map(Some);
+        };
+
+        if self.header.external_data_file() {
+            let cluster = HostCluster(guest_cluster.0);
+            Ok((cluster == mandatory_host_cluster).then_some(cluster))
+        } else {
+            let mut allocator = self.allocator().await?;
+
+            // Allocate clusters before setting up L2 entries
+            self.l2_cache.depend_on(&allocator.rb_cache).await?;
+
+            let cluster = allocator
+                .allocate_cluster_at(mandatory_host_cluster)
+                .await?
+                .then_some(mandatory_host_cluster);
+            Ok(cluster)
+        }
+    }
+
+    /// Free metadata clusters (i.e. decrement their refcount).
+    ///
+    /// Best-effort operation.  On error, the given clusters may be leaked, but no errors are ever
+    /// returned (because there is no good way to handle such errors anyway).
+    pub(super) async fn free_meta_clusters(&self, cluster: HostCluster, count: ClusterCount) {
+        if let Ok(mut allocator) = self.allocator().await {
+            allocator.free_clusters(cluster, count).await
+        }
+    }
+
+    /// Free data clusters (i.e. decrement their refcount).
+    ///
+    /// Best-effort operation.  On error, the given clusters may be leaked, but no errors are ever
+    /// returned (because there is no good way to handle such errors anyway).
+    pub(super) async fn free_data_clusters(&self, cluster: HostCluster, count: ClusterCount) {
+        if !self.header.external_data_file() {
+            if let Ok(mut allocator) = self.allocator().await {
+                // Clear L2 entries before deallocating clusters
+                if let Err(err) = allocator.rb_cache.depend_on(&self.l2_cache).await {
+                    warn!("Leaking clusters; cannot set up cache inter-dependency with L2 cache: {err}");
+                    return;
+                }
+
+                allocator.free_clusters(cluster, count).await;
+            }
+        }
+    }
+}
+
+impl<S: Storage> Allocator<S> {
+    /// Create a new allocator for the given image file.
+    pub async fn new(image: Arc<S>, header: Arc<Header>) -> io::Result<Self> {
+        let cb = header.cluster_bits();
+        let rt_offset = header.reftable_offset();
+        let rt_cluster = rt_offset
+            .checked_cluster(cb)
+            .ok_or_else(|| invalid_data(format!("Unaligned refcount table: {rt_offset}")))?;
+
+        let reftable = RefTable::load(
+            image.as_ref(),
+            &header,
+            rt_cluster,
+            header.reftable_entries(),
+        )
+        .await?;
+
+        let rb_cache_backend = RefBlockCacheBackend::new(Arc::clone(&image), Arc::clone(&header));
+        let rb_cache = AsyncLruCache::new(rb_cache_backend, 32);
+
+        Ok(Allocator {
+            file: image,
+            reftable,
+            first_free_cluster: HostCluster(0),
+            header,
+            rb_cache,
+        })
+    }
+
+    /// Flush the refcount block cache.
+    pub async fn flush_rb_cache(&self) -> io::Result<()> {
+        self.rb_cache.flush().await
+    }
+
+    /// Allocate clusters in the image file.
+    ///
+    /// `end_cluster` should only be used when allocating refblocks.  When reaching this cluster
+    /// index, abort trying to allocate.  (This is used for allocating refblocks, to prevent
+    /// infinite recursion and speed things up.)
+    async fn allocate_clusters(
+        &mut self,
+        count: ClusterCount,
+        end_cluster: Option<HostCluster>,
+    ) -> io::Result<HostCluster> {
+        let mut index = self.first_free_cluster;
+        loop {
+            if end_cluster == Some(index) {
+                return Err(io::Error::other("Maximum cluster index reached"));
+            }
+
+            let alloc_count = self.allocate_clusters_at(index, count).await?;
+            if alloc_count == count {
+                return Ok(index);
+            }
+
+            index += alloc_count + ClusterCount(1);
+            if index.offset(self.header.cluster_bits()) > MAX_OFFSET {
+                return Err(io::Error::other("Cannot grow qcow2 file any further"));
+            }
+        }
+    }
+
+    /// Allocate the given clusters in the image file.
+    ///
+    /// Allocate up to `count` unallocated clusters starting from `index`.  When encountering an
+    /// already allocated cluster (or any other error), stop, and free the clusters that were just
+    /// newly allocated.
+    ///
+    /// Returns the number of clusters that could be allocated (starting from `index`), which may
+    /// be 0 if `index` has already been allocated.  Note again that in case this is less than
+    /// `count`, those clusters will have been freed again already, so this is just a hint to
+    /// callers that the cluster at `index + count` is already allocated.
+    async fn allocate_clusters_at(
+        &mut self,
+        mut index: HostCluster,
+        mut count: ClusterCount,
+    ) -> io::Result<ClusterCount> {
+        let start_index = index;
+
+        while count > ClusterCount(0) {
+            // Note that `ensure_rb()` in `allocate_cluster_at()` may allocate clusters (new
+            // refblocks), and also a new refcount table.  This can interfere with us allocating a
+            // large continuous region like so (A is our allocation, R is a refblock, imagine a
+            // refblock covers four clusters):
+            //
+            // |AAAA| -- allocated four clusters need new refblock
+            // |AAAA|R   | -- made refblock self-describing, but now allocation cannot go on
+            //
+            // This gets resolved by us retrying, and future refblocks using the region that has
+            // now become free but already has refblocks to cover it:
+            //
+            // |    |RAAA| -- retry after refblock; need a new refblock again
+            // |R   |RAAA|AAAA| -- the new refblock allocates itself in the region we abandoned
+            //
+            // However, eventually, the new refblocks will run into the new start of our allocation
+            // again:
+            //
+            // |RRRR|RAAA|AAAA|AAAA|AAAA|AAAA| -- need new refblock
+            // |RRRR|RAAA|AAAA|AAAA|AAAA|AAAA|R   | -- allocation cannot go on, again
+            // |RRRR|R   |    |    |    |    |RAAA| -- another attempt
+            // |RRRR|RRRR|R...|    |    |    |RAAA|AAAA|AAAA|AAAA|AAAA|...
+            //
+            // As you can see, the hole we leave behind gets larger each time.  So eventually, this
+            // must converge.
+            //
+            // The same applies to the refcount table being allocated instead of just refblocks.
+
+            let result = self.allocate_cluster_at(index).await;
+            if !matches!(result, Ok(true)) {
+                // Already allocated, or some real error occurred; free everything allocated so far
+                self.free_clusters(start_index, index - start_index).await;
+                return result.map(|_| index - start_index);
+            }
+
+            count -= ClusterCount(1);
+            index += ClusterCount(1);
+        }
+
+        Ok(index - start_index)
+    }
+
+    /// Allocate the given cluster in the image file.
+    ///
+    /// Return `Ok(true)` if allocation was successful, or `Ok(false)` if the cluster was already
+    /// allocated before.
+    async fn allocate_cluster_at(&mut self, index: HostCluster) -> io::Result<bool> {
+        let rb_bits = self.header.rb_bits();
+        let (rt_index, rb_index) = index.rt_rb_indices(rb_bits);
+
+        let rb = self.ensure_rb(rt_index).await?;
+        let mut rb = rb.lock_write().await;
+        let can_allocate = rb.is_zero(rb_index);
+        if can_allocate {
+            rb.increment(rb_index)?;
+        }
+
+        // We now know this is allocated
+        if index == self.first_free_cluster {
+            self.first_free_cluster = index + ClusterCount(1);
+        }
+
+        Ok(can_allocate)
+    }
+
+    /// Get the refblock referenced by the given reftable index, if any.
+    ///
+    /// If there is no refblock for the given reftable index, return `Ok(None)`.
+    async fn get_rb(&mut self, rt_index: usize) -> io::Result<Option<Arc<RefBlock>>> {
+        let rt_entry = self.reftable.get(rt_index);
+        if let Some(rb_offset) = rt_entry.refblock_offset() {
+            let cb = self.header.cluster_bits();
+            let rb_cluster = rb_offset.checked_cluster(cb).ok_or_else(|| {
+                invalid_data(format!("Unaligned refcount block with index {rt_index}; refcount table entry: {rt_entry:?}"))
+            })?;
+
+            self.rb_cache.get_or_insert(rb_cluster).await.map(Some)
+        } else {
+            Ok(None)
+        }
+    }
+
+    /// Get a refblock for the given reftable index.
+    ///
+    /// If there already is a refblock at that index, return it.  Otherwise, create one and hook it
+    /// up.
+    async fn ensure_rb(&mut self, rt_index: usize) -> io::Result<Arc<RefBlock>> {
+        if let Some(rb) = self.get_rb(rt_index).await? {
+            return Ok(rb);
+        }
+
+        if !self.reftable.in_bounds(rt_index) {
+            self.grow_reftable(rt_index).await?;
+            // `grow_reftable` will allocate new refblocks, so check the index again
+            if let Some(rb) = self.get_rb(rt_index).await? {
+                return Ok(rb);
+            }
+        }
+
+        let mut new_rb = RefBlock::new_cleared(self.file.as_ref(), &self.header)?;
+
+        // This is the first cluster covered by the new refblock
+        let rb_cluster = HostCluster::from_ref_indices(rt_index, 0, self.header.rb_bits());
+
+        // Try to allocate a cluster in the already existing refcount structures.
+        // By stopping looking for clusters at `rb_cluster`, we ensure that we will not land here
+        // in this exact function again, trying to allocate the very same refblock (it is possible
+        // we allocate one before the current one, though), and so prevent any possible infinite
+        // recursion.
+        // Recursion is possible, though, so the future must be boxed.
+        // false`), so must be boxed.
+        if let Ok(new_rb_cluster) =
+            Box::pin(self.allocate_clusters(ClusterCount(1), Some(rb_cluster))).await
+        {
+            new_rb.set_cluster(new_rb_cluster);
+        } else {
+            // Place the refblock such that it covers itself
+            new_rb.set_cluster(rb_cluster);
+            new_rb.lock_write().await.increment(0)?;
+        }
+        new_rb.write(self.file.as_ref()).await?;
+
+        self.reftable.enter_refblock(rt_index, &new_rb)?;
+        self.reftable
+            .write_entry(self.file.as_ref(), rt_index)
+            .await?;
+
+        let new_rb = Arc::new(new_rb);
+        self.rb_cache
+            .insert(new_rb.get_cluster().unwrap(), Arc::clone(&new_rb))
+            .await?;
+        Ok(new_rb)
+    }
+
+    /// Create a new refcount table covering at least `at_least_index`.
+    ///
+    /// Create a new reftable of the required size, copy all existing refblock references into it,
+    /// ensure it is refcounted itself (also creating new refblocks if necessary), and have the
+    /// image header reference the new refcount table.
+    async fn grow_reftable(&mut self, at_least_index: usize) -> io::Result<()> {
+        let cb = self.header.cluster_bits();
+        let rb_bits = self.header.rb_bits();
+        let rb_entries = 1 << rb_bits;
+
+        let mut new_rt = self.reftable.clone_and_grow(&self.header, at_least_index)?;
+        let rt_clusters = ClusterCount::from_byte_size(new_rt.byte_size() as u64, cb);
+
+        // Find free range
+        let (mut rt_index, mut rb_index) = self.first_free_cluster.rt_rb_indices(rb_bits);
+        let mut free_cluster_index: Option<HostCluster> = None;
+        let mut free_cluster_count = ClusterCount(0);
+
+        // Number of clusters required to allocate both the new reftable and all new refblocks.
+        // Note that `clone_and_grow()` *guarantees* we can fit the final count in there.
+        let mut required_clusters = rt_clusters;
+
+        while free_cluster_count < required_clusters {
+            // `clone_and_grow()` guarantees it can fit
+            assert!(new_rt.in_bounds(rt_index));
+
+            let rt_entry = new_rt.get(rt_index);
+            let Some(rb_offset) = rt_entry.refblock_offset() else {
+                let start_index = HostCluster::from_ref_indices(rt_index, 0, rb_bits);
+                free_cluster_index.get_or_insert(start_index);
+                free_cluster_count += ClusterCount(rb_entries as u64);
+                // Need to allocate this RB
+                required_clusters += ClusterCount(1);
+                continue;
+            };
+
+            let rb_cluster = rb_offset.checked_cluster(cb).ok_or_else(|| {
+                invalid_data(format!("Unaligned refcount block with index {rt_index}; refcount table entry: {rt_entry:?}"))
+            })?;
+
+            let rb = self.rb_cache.get_or_insert(rb_cluster).await?;
+            for i in rb_index..rb_entries {
+                if rb.is_zero(i) {
+                    let index = HostCluster::from_ref_indices(rt_index, i, rb_bits);
+                    free_cluster_index.get_or_insert(index);
+                    free_cluster_count += ClusterCount(1);
+
+                    if free_cluster_count >= required_clusters {
+                        break;
+                    }
+                } else if free_cluster_index.is_some() {
+                    free_cluster_index.take();
+                    free_cluster_count = ClusterCount(0);
+                    required_clusters = rt_clusters; // reset
+                }
+            }
+
+            rb_index = 0;
+            rt_index += 1;
+        }
+
+        let mut index = free_cluster_index.unwrap();
+        let mut count = required_clusters;
+
+        // Put refblocks first
+        let rt_index_start = index.rt_index(rb_bits);
+        let rt_index_end = (index + count).0.div_ceil(rb_entries as u64) as usize;
+
+        let mut refblocks = Vec::<Arc<RefBlock>>::new();
+        for rt_i in rt_index_start..rt_index_end {
+            if let Some(rb_offset) = new_rt.get(rt_i).refblock_offset() {
+                // Checked in the loop above
+                let rb_cluster = rb_offset.checked_cluster(cb).unwrap();
+                let rb = self.rb_cache.get_or_insert(rb_cluster).await?;
+                refblocks.push(rb);
+                continue;
+            }
+
+            let mut rb = RefBlock::new_cleared(self.file.as_ref(), &self.header)?;
+            rb.set_cluster(index);
+            new_rt.enter_refblock(rt_i, &rb)?;
+            let rb = Arc::new(rb);
+            self.rb_cache.insert(index, Arc::clone(&rb)).await?;
+            refblocks.push(rb);
+            index += ClusterCount(1);
+            count -= ClusterCount(1);
+        }
+
+        assert!(count >= rt_clusters);
+        new_rt.set_cluster(index);
+
+        // Now set allocation information
+        let start_index = free_cluster_index.unwrap();
+        let end_index = index + rt_clusters;
+
+        for index in start_index.0..end_index.0 {
+            let index = HostCluster(index);
+            let (rt_i, rb_i) = index.rt_rb_indices(rb_bits);
+
+            // `refblocks[0]` is for `rt_index_start`
+            let rb_vec_i = rt_i - rt_index_start;
+            // Incrementing from 0 to 1 must succeed
+            refblocks[rb_vec_i]
+                .lock_write()
+                .await
+                .increment(rb_i)
+                .unwrap();
+        }
+
+        // Any errors from here on may lead to leaked clusters if there are refblocks in
+        // `refblocks` that are already part of the old reftable.
+        // TODO: Try to clean that up, though it seems quite hard for little gain.
+        self.rb_cache.flush().await?;
+        new_rt.write(self.file.as_ref()).await?;
+
+        self.header.set_reftable(&new_rt)?;
+        self.header
+            .write_reftable_pointer(self.file.as_ref())
+            .await?;
+
+        // Must set new reftable before calling `free_clusters()`
+        let mut old_reftable = mem::replace(&mut self.reftable, new_rt);
+        if let Some(old_rt_cluster) = old_reftable.get_cluster() {
+            let old_rt_size = old_reftable.cluster_count();
+            old_reftable.unset_cluster();
+            self.free_clusters(old_rt_cluster, old_rt_size).await;
+        }
+
+        Ok(())
+    }
+
+    /// Free clusters (i.e. decrement their refcount).
+    ///
+    /// Best-effort operation.  On error, the given clusters may be leaked, but no errors are ever
+    /// returned (because there is no good way to handle such errors anyway).
+    async fn free_clusters(&mut self, start: HostCluster, mut count: ClusterCount) {
+        if count.0 == 0 {
+            return;
+        }
+
+        if start < self.first_free_cluster {
+            self.first_free_cluster = start;
+        }
+
+        let rb_bits = self.header.rb_bits();
+        let rb_entries = 1 << rb_bits;
+        let (mut rt_index, mut rb_index) = start.rt_rb_indices(rb_bits);
+
+        while count > ClusterCount(0) {
+            let in_rb_count = cmp::min((rb_entries - rb_index) as u64, count.0) as usize;
+
+            match self.get_rb(rt_index).await {
+                Ok(Some(rb)) => {
+                    let mut rb = rb.lock_write().await;
+                    for i in rb_index..(rb_index + in_rb_count) {
+                        if let Err(err) = rb.decrement(i) {
+                            event!(Level::WARN, "Failed to free cluster: {err}");
+                        }
+                    }
+                }
+
+                Ok(None) => {
+                    event!(
+                        Level::WARN,
+                        "Failed to free {in_rb_count} clusters: Not allocated"
+                    )
+                }
+                Err(err) => event!(Level::WARN, "Failed to free {in_rb_count} clusters: {err}"),
+            }
+
+            count -= ClusterCount(in_rb_count as u64);
+            rb_index = 0;
+            rt_index += 1;
+        }
+    }
+}
diff --git a/src/imago/src/qcow2/cache.rs b/src/imago/src/qcow2/cache.rs
new file mode 100644
index 00000000..e61b757a
--- /dev/null
+++ b/src/imago/src/qcow2/cache.rs
@@ -0,0 +1,84 @@
+//! Provides functionality for the L2 and refblock caches.
+
+use super::*;
+use crate::async_lru_cache::AsyncLruCacheBackend;
+use tracing::trace;
+
+/// I/O back-end for the L2 table cache.
+pub(super) struct L2CacheBackend<S: Storage> {
+    /// Qcow2 metadata file.
+    file: Arc<S>,
+
+    /// Qcow2 header.
+    header: Arc<Header>,
+}
+
+/// I/O back-end for the refblock cache.
+pub(super) struct RefBlockCacheBackend<S: Storage> {
+    /// Qcow2 metadata file.
+    file: Arc<S>,
+
+    /// Qcow2 header.
+    header: Arc<Header>,
+}
+
+impl<S: Storage> L2CacheBackend<S> {
+    /// Create a new `L2CacheBackend`.
+    ///
+    /// `file` is the qcow2 metadata (image) file.
+    pub fn new(file: Arc<S>, header: Arc<Header>) -> Self {
+        L2CacheBackend { file, header }
+    }
+}
+
+impl<S: Storage> AsyncLruCacheBackend for L2CacheBackend<S> {
+    type Key = HostCluster;
+    type Value = L2Table;
+
+    async fn load(&self, l2_cluster: HostCluster) -> io::Result<L2Table> {
+        trace!("Loading L2 table");
+
+        L2Table::load(
+            self.file.as_ref(),
+            &self.header,
+            l2_cluster,
+            self.header.l2_entries(),
+        )
+        .await
+    }
+
+    async fn flush(&self, l2_cluster: HostCluster, l2_table: Arc<L2Table>) -> io::Result<()> {
+        trace!("Flushing L2 table");
+        if l2_table.is_modified() {
+            assert!(l2_table.get_cluster().unwrap() == l2_cluster);
+            l2_table.write(self.file.as_ref()).await?;
+        }
+        Ok(())
+    }
+}
+
+impl<S: Storage> RefBlockCacheBackend<S> {
+    /// Create a new `RefBlockCacheBackend`.
+    ///
+    /// `file` is the qcow2 metadata (image) file.
+    pub fn new(file: Arc<S>, header: Arc<Header>) -> Self {
+        RefBlockCacheBackend { file, header }
+    }
+}
+
+impl<S: Storage> AsyncLruCacheBackend for RefBlockCacheBackend<S> {
+    type Key = HostCluster;
+    type Value = RefBlock;
+
+    async fn load(&self, rb_cluster: HostCluster) -> io::Result<RefBlock> {
+        RefBlock::load(self.file.as_ref(), &self.header, rb_cluster).await
+    }
+
+    async fn flush(&self, rb_cluster: HostCluster, refblock: Arc<RefBlock>) -> io::Result<()> {
+        if refblock.is_modified() {
+            assert!(refblock.get_cluster().unwrap() == rb_cluster);
+            refblock.write(self.file.as_ref()).await?;
+        }
+        Ok(())
+    }
+}
diff --git a/src/imago/src/qcow2/compressed.rs b/src/imago/src/qcow2/compressed.rs
new file mode 100644
index 00000000..654b9c38
--- /dev/null
+++ b/src/imago/src/qcow2/compressed.rs
@@ -0,0 +1,55 @@
+//! Support for compressed clusters.
+
+use super::*;
+use crate::io_buffers::IoBuffer;
+use miniz_oxide::inflate::core::{decompress as inflate, DecompressorOxide};
+use miniz_oxide::inflate::TINFLStatus;
+
+impl<S: Storage + 'static, F: WrappedFormat<S> + 'static> Qcow2<S, F> {
+    /// Read one compressed cluster.
+    ///
+    /// Read the compressed data at `compressed_offset` of length `compressed_length` (which must
+    /// be the values from the L2 compressed cluster descriptor) into a bounce buffer, then
+    /// decompress it into `buf` (which must have a length of exactly one cluster).
+    pub(super) async fn read_compressed_cluster(
+        &self,
+        buf: &mut [u8],
+        compressed_offset: HostOffset,
+        compressed_length: u64,
+    ) -> io::Result<()> {
+        debug_assert!(buf.len() == self.header.cluster_size());
+
+        let storage = self.storage();
+
+        // Must fit (really shouldn’t be compressed if this exceeds the cluster size anyway)
+        let compressed_length = compressed_length.try_into().map_err(io::Error::other)?;
+        let mut compressed_buf = IoBuffer::new(compressed_length, storage.mem_align())?;
+        storage
+            .read(&mut compressed_buf, compressed_offset.0)
+            .await?;
+
+        let mut dec_ox = DecompressorOxide::new();
+        let (status, _read, written) =
+            inflate(&mut dec_ox, compressed_buf.as_ref().into_slice(), buf, 0, 0);
+
+        // Because `compressed_length` will generally exceed the actual length, `HasMoreOutput` is
+        // expected and can be ignored
+        if status != TINFLStatus::Done && status != TINFLStatus::HasMoreOutput {
+            return Err(io::Error::other(format!(
+                "Failed to decompress cluster (host offset {}+{}): {:?}",
+                compressed_offset, compressed_length, status
+            )));
+        }
+        if written < buf.len() {
+            return Err(io::Error::other(format!(
+                "Failed to decompress cluster (host offset {}+{}): Decompressed {} bytes, expected {}",
+                compressed_offset,
+                compressed_length,
+                written,
+                buf.len(),
+            )));
+        }
+
+        Ok(())
+    }
+}
diff --git a/src/imago/src/qcow2/cow.rs b/src/imago/src/qcow2/cow.rs
new file mode 100644
index 00000000..8c31b729
--- /dev/null
+++ b/src/imago/src/qcow2/cow.rs
@@ -0,0 +1,295 @@
+//! Copy-on-write operations.
+//!
+//! Implements copy-on-write when writing to clusters that are not simple allocated data clusters.
+
+use super::*;
+use crate::io_buffers::IoBuffer;
+
+impl<S: Storage, F: WrappedFormat<S>> Qcow2<S, F> {
+    /// Do copy-on-write for the given guest cluster, if necessary.
+    ///
+    /// If the given guest cluster is backed by an allocated copied data cluster, return that
+    /// cluster, so it can just be written into.
+    ///
+    /// Otherwise, allocate a new data cluster and copy the previously visible cluster contents
+    /// there:
+    /// - For non-copied data clusters, copy the cluster contents.
+    /// - For zero clusters, write zeroes.
+    /// - For unallocated clusters, copy data from the backing file (if any, zeroes otherwise).
+    /// - For compressed clusters, decompress the data and write it into the new cluster.
+    ///
+    /// Return the new cluster, if any was allocated, or the old cluster in case it was already
+    /// safe to write to.  I.e., the returned cluster is where data for `cluster` may be written
+    /// to.
+    ///
+    /// `cluster` is the guest cluster to COW.
+    ///
+    /// `mandatory_host_cluster` may specify the cluster that must be used for the new allocation,
+    /// or that an existing data cluster allocation must match.  If it does not match, or that
+    /// cluster is already allocated and cannot be used, return `Ok(None)`.
+    ///
+    /// `partial_skip_cow` may give an in-cluster range that is supposed to be overwritten
+    /// immediately anyway, i.e. that need not be copied.
+    ///
+    /// `l2_table` is the L2 table for `offset`.
+    ///
+    /// If a previously existing allocation is replaced, the old one will be put into
+    /// `leaked_allocations`.  The caller must free it.
+    pub(super) async fn cow_cluster(
+        &self,
+        cluster: GuestCluster,
+        mandatory_host_cluster: Option<HostCluster>,
+        partial_skip_cow: Option<Range<usize>>,
+        l2_table: &mut L2TableWriteGuard<'_>,
+        leaked_allocations: &mut Vec<(HostCluster, ClusterCount)>,
+    ) -> io::Result<Option<HostCluster>> {
+        // No need to do COW when writing the full cluster
+        let full_skip_cow = if let Some(skip) = partial_skip_cow.as_ref() {
+            skip.start == 0 && skip.end == self.header.cluster_size()
+        } else {
+            false
+        };
+
+        let existing_mapping = l2_table.get_mapping(cluster)?;
+        if let L2Mapping::DataFile {
+            host_cluster,
+            copied: true,
+        } = existing_mapping
+        {
+            if let Some(mandatory_host_cluster) = mandatory_host_cluster {
+                if host_cluster != mandatory_host_cluster {
+                    return Ok(None);
+                }
+            }
+            return Ok(Some(host_cluster));
+        };
+
+        self.need_writable()?;
+
+        let new_cluster = if let L2Mapping::Zero {
+            host_cluster: Some(host_cluster),
+            copied: true,
+        } = existing_mapping
+        {
+            if let Some(mandatory_host_cluster) = mandatory_host_cluster {
+                if host_cluster == mandatory_host_cluster {
+                    Some(host_cluster)
+                } else {
+                    // Discard existing mapping
+                    self.allocate_data_cluster_at(cluster, Some(mandatory_host_cluster))
+                        .await?
+                }
+            } else {
+                Some(host_cluster)
+            }
+        } else {
+            self.allocate_data_cluster_at(cluster, mandatory_host_cluster)
+                .await?
+        };
+        let Some(new_cluster) = new_cluster else {
+            // Allocation at `mandatory_host_cluster` failed
+            return Ok(None);
+        };
+
+        if !full_skip_cow {
+            match existing_mapping {
+                L2Mapping::DataFile {
+                    host_cluster: _,
+                    copied: true,
+                } => unreachable!(),
+
+                L2Mapping::DataFile {
+                    host_cluster,
+                    copied: false,
+                } => {
+                    self.cow_copy_storage(
+                        self.storage(),
+                        host_cluster,
+                        new_cluster,
+                        partial_skip_cow,
+                    )
+                    .await?
+                }
+
+                L2Mapping::Backing { backing_offset } => {
+                    if let Some(backing) = self.backing.as_ref() {
+                        self.cow_copy_format(backing, backing_offset, new_cluster, partial_skip_cow)
+                            .await?
+                    } else {
+                        self.cow_zero(new_cluster, partial_skip_cow).await?
+                    }
+                }
+
+                L2Mapping::Zero {
+                    host_cluster: _,
+                    copied: _,
+                } => self.cow_zero(new_cluster, partial_skip_cow).await?,
+
+                L2Mapping::Compressed {
+                    host_offset,
+                    length,
+                } => {
+                    self.cow_compressed(host_offset, length, new_cluster)
+                        .await?
+                }
+            }
+        }
+
+        let l2i = cluster.l2_index(self.header.cluster_bits());
+        if let Some(leaked) = l2_table.map_cluster(l2i, new_cluster) {
+            leaked_allocations.push(leaked);
+        }
+
+        Ok(Some(new_cluster))
+    }
+
+    /// Calculate what range of a cluster we need to COW.
+    ///
+    /// Given potentially a range to skip, calculate what we should COW.  The range will only be
+    /// taken into account if it is at one end of the cluster, to always yield a continuous range
+    /// to COW (one without a hole in the middle).
+    ///
+    /// The returned range is also aligned to `alignment` if possible.
+    fn get_cow_range(
+        &self,
+        partial_skip_cow: Option<Range<usize>>,
+        alignment: usize,
+    ) -> Option<Range<usize>> {
+        let mut copy_range = 0..self.header.cluster_size();
+        if let Some(partial_skip_cow) = partial_skip_cow {
+            if partial_skip_cow.start == copy_range.start {
+                copy_range.start = partial_skip_cow.end;
+            } else if partial_skip_cow.end == copy_range.end {
+                copy_range.end = partial_skip_cow.start;
+            }
+        }
+
+        if copy_range.is_empty() {
+            return None;
+        }
+
+        let alignment = cmp::min(alignment, self.header.cluster_size());
+        debug_assert!(alignment.is_power_of_two());
+        let mask = alignment - 1;
+
+        if copy_range.start & mask != 0 {
+            copy_range.start &= !mask;
+        }
+        if copy_range.end & mask != 0 {
+            copy_range.end = (copy_range.end & !mask) + alignment;
+        }
+
+        Some(copy_range)
+    }
+
+    /// Copy data from one data file cluster to another.
+    ///
+    /// Used for COW on non-copied data clusters.
+    async fn cow_copy_storage(
+        &self,
+        from: &S,
+        from_cluster: HostCluster,
+        to_cluster: HostCluster,
+        partial_skip_cow: Option<Range<usize>>,
+    ) -> io::Result<()> {
+        let to = self.storage();
+
+        let align = cmp::max(from.req_align(), to.req_align());
+        let Some(cow_range) = self.get_cow_range(partial_skip_cow, align) else {
+            return Ok(());
+        };
+
+        let mut buf = IoBuffer::new(cow_range.end - cow_range.start, from.mem_align())?;
+
+        let cb = self.header.cluster_bits();
+        let from_offset = from_cluster.offset(cb);
+        let to_offset = to_cluster.offset(cb);
+
+        from.read(&mut buf, from_offset.0 + cow_range.start as u64)
+            .await?;
+
+        to.write(&buf, to_offset.0 + cow_range.start as u64).await?;
+
+        Ok(())
+    }
+
+    /// Copy data from another image into our data file.
+    ///
+    /// Used for COW on clusters served by a backing image.
+    async fn cow_copy_format(
+        &self,
+        from: &F,
+        from_offset: u64,
+        to_cluster: HostCluster,
+        partial_skip_cow: Option<Range<usize>>,
+    ) -> io::Result<()> {
+        let to = self.storage();
+        let from = from.unwrap();
+
+        let align = cmp::max(from.req_align(), to.req_align());
+        let Some(cow_range) = self.get_cow_range(partial_skip_cow, align) else {
+            return Ok(());
+        };
+
+        let mut buf = IoBuffer::new(cow_range.end - cow_range.start, from.mem_align())?;
+
+        let to_offset = to_cluster.offset(self.header.cluster_bits());
+
+        from.read(&mut buf, from_offset + cow_range.start as u64)
+            .await?;
+
+        to.write(&buf, to_offset.0 + cow_range.start as u64).await?;
+
+        Ok(())
+    }
+
+    /// Fill the given cluster with zeroes.
+    ///
+    /// Used for COW on zero clusters.
+    async fn cow_zero(
+        &self,
+        to_cluster: HostCluster,
+        partial_skip_cow: Option<Range<usize>>,
+    ) -> io::Result<()> {
+        let to = self.storage();
+
+        let align = to.req_align();
+        let Some(cow_range) = self.get_cow_range(partial_skip_cow, align) else {
+            return Ok(());
+        };
+
+        let to_offset = to_cluster.offset(self.header.cluster_bits());
+        to.write_zeroes(
+            to_offset.0 + cow_range.start as u64,
+            (cow_range.end - cow_range.start) as u64,
+        )
+        .await?;
+
+        Ok(())
+    }
+
+    /// Decompress a cluster into the target cluster.
+    ///
+    /// Used for COW on compressed clusters.
+    async fn cow_compressed(
+        &self,
+        compressed_offset: HostOffset,
+        compressed_length: u64,
+        to_cluster: HostCluster,
+    ) -> io::Result<()> {
+        let to = self.storage();
+
+        let mut buf = IoBuffer::new(self.header.cluster_size(), to.mem_align())?;
+        self.read_compressed_cluster(
+            buf.as_mut().into_slice(),
+            compressed_offset,
+            compressed_length,
+        )
+        .await?;
+
+        let to_offset = to_cluster.offset(self.header.cluster_bits());
+        to.write(&buf, to_offset.0).await?;
+
+        Ok(())
+    }
+}
diff --git a/src/imago/src/qcow2/io_func.rs b/src/imago/src/qcow2/io_func.rs
new file mode 100644
index 00000000..d1076d98
--- /dev/null
+++ b/src/imago/src/qcow2/io_func.rs
@@ -0,0 +1,81 @@
+//! Special I/O functions.
+//!
+//! Most of I/O should be implemented in the generic
+//! [`imago::format::access`](crate::format::access) module, but some I/O needs to be done directly
+//! by image drivers (like handling compression).
+
+use super::*;
+use crate::io_buffers::IoBuffer;
+
+impl<S: Storage, F: WrappedFormat<S>> Qcow2<S, F> {
+    /// Read the special range at `offset`.
+    ///
+    /// Currently, the only special range we have are compressed clusters.
+    pub(super) async fn do_readv_special(
+        &self,
+        mut bufv: IoVectorMut<'_>,
+        mut offset: GuestOffset,
+    ) -> io::Result<()> {
+        let mut saved_l2_table: Option<Arc<L2Table>> = None;
+        let cb = self.header.cluster_bits();
+
+        // Do everything cluster by cluster.
+        while !bufv.is_empty() {
+            let l2_table = if let Some(saved) = saved_l2_table.as_ref() {
+                saved
+            } else {
+                let new_l2 = self
+                    .get_l2(offset, false)
+                    .await?
+                    .ok_or(io::ErrorKind::Other)?;
+                saved_l2_table.get_or_insert(new_l2)
+            };
+
+            let chunk_length = offset.remaining_in_cluster(cb);
+            let (chunk, remainder) = bufv.split_at(chunk_length);
+            bufv = remainder;
+
+            let mut bounce_buffer_and_chunk = None;
+            let need_bounce_buffer = chunk.buffer_count() != 1
+                || offset.in_cluster_offset(cb) != 0
+                || chunk.len() != self.header.cluster_size() as u64;
+
+            let slice = if need_bounce_buffer {
+                let bounce_buffer = IoBuffer::new(self.header.cluster_size(), 1)?;
+                bounce_buffer_and_chunk = Some((bounce_buffer, chunk));
+                bounce_buffer_and_chunk.as_mut().unwrap().0.as_mut()
+            } else {
+                chunk.into_inner().pop().unwrap().into()
+            };
+
+            let guest_cluster = offset.cluster(cb);
+            match l2_table.get_mapping(guest_cluster)? {
+                L2Mapping::Compressed {
+                    host_offset,
+                    length,
+                } => {
+                    self.read_compressed_cluster(slice.into_slice(), host_offset, length)
+                        .await?;
+                }
+
+                _ => return Err(io::ErrorKind::Other.into()),
+            }
+
+            if let Some((bounce_buffer, mut chunk)) = bounce_buffer_and_chunk {
+                let ofs = offset.in_cluster_offset(cb);
+                let end = ofs + chunk.len() as usize;
+                chunk.copy_from_slice(bounce_buffer.as_ref_range(ofs..end).into_slice());
+            }
+
+            let next_cluster = if let Some(next) = guest_cluster.next_in_l2(cb) {
+                next
+            } else {
+                saved_l2_table.take();
+                guest_cluster.first_in_next_l2(cb)
+            };
+            offset = next_cluster.offset(cb);
+        }
+
+        Ok(())
+    }
+}
diff --git a/src/imago/src/qcow2/mappings.rs b/src/imago/src/qcow2/mappings.rs
new file mode 100644
index 00000000..80dcaf02
--- /dev/null
+++ b/src/imago/src/qcow2/mappings.rs
@@ -0,0 +1,346 @@
+//! Get and establish cluster mappings.
+
+use super::*;
+use tokio::sync::RwLockWriteGuard;
+
+impl<S: Storage, F: WrappedFormat<S>> Qcow2<S, F> {
+    /// Get the given range’s mapping information.
+    ///
+    /// Underlying implementation for [`Qcow2::get_mapping()`].
+    pub(super) async fn do_get_mapping(
+        &self,
+        offset: GuestOffset,
+        max_length: u64,
+    ) -> io::Result<(Mapping<'_, S>, u64)> {
+        let Some(l2_table) = self.get_l2(offset, false).await? else {
+            let cb = self.header.cluster_bits();
+            let len = cmp::min(offset.remaining_in_l2_table(cb), max_length);
+            let mapping = if let Some(backing) = self.backing.as_ref() {
+                Mapping::Indirect {
+                    layer: backing.unwrap(),
+                    offset: offset.0,
+                    writable: false,
+                }
+            } else {
+                Mapping::Zero
+            };
+            return Ok((mapping, len));
+        };
+
+        self.do_get_mapping_with_l2(offset, max_length, &l2_table)
+            .await
+    }
+
+    /// Get the given range’s mapping information, when we already have the L2 table.
+    pub(super) async fn do_get_mapping_with_l2(
+        &self,
+        offset: GuestOffset,
+        max_length: u64,
+        l2_table: &L2Table,
+    ) -> io::Result<(Mapping<'_, S>, u64)> {
+        let cb = self.header.cluster_bits();
+
+        // Get mapping at `offset`
+        let mut current_guest_cluster = offset.cluster(cb);
+        let first_mapping = l2_table.get_mapping(current_guest_cluster)?;
+        let return_mapping = match first_mapping {
+            L2Mapping::DataFile {
+                host_cluster,
+                copied,
+            } => Mapping::Raw {
+                storage: self.storage(),
+                offset: host_cluster.relative_offset(offset, cb).0,
+                writable: copied,
+            },
+
+            L2Mapping::Backing { backing_offset } => {
+                if let Some(backing) = self.backing.as_ref() {
+                    Mapping::Indirect {
+                        layer: backing.unwrap(),
+                        offset: backing_offset + offset.in_cluster_offset(cb) as u64,
+                        writable: false,
+                    }
+                } else {
+                    Mapping::Zero
+                }
+            }
+
+            L2Mapping::Zero {
+                host_cluster: _,
+                copied: _,
+            } => Mapping::Zero,
+
+            L2Mapping::Compressed {
+                host_offset: _,
+                length: _,
+            } => Mapping::Special { offset: offset.0 },
+        };
+
+        // Find out how long this consecutive mapping is, but only within the current L2 table
+        let mut consecutive_length = offset.remaining_in_cluster(cb);
+        let mut preceding_mapping = first_mapping;
+        while consecutive_length < max_length {
+            let Some(next) = current_guest_cluster.next_in_l2(cb) else {
+                break;
+            };
+            current_guest_cluster = next;
+
+            let mapping = l2_table.get_mapping(current_guest_cluster)?;
+            if !mapping.is_consecutive(&preceding_mapping, cb) {
+                break;
+            }
+
+            preceding_mapping = mapping;
+            consecutive_length += self.header.cluster_size() as u64;
+        }
+
+        consecutive_length = cmp::min(consecutive_length, max_length);
+        Ok((return_mapping, consecutive_length))
+    }
+
+    /// Make the given range be mapped by data clusters.
+    ///
+    /// Underlying implementation for [`Qcow2::ensure_data_mapping()`].
+    pub(super) async fn do_ensure_data_mapping(
+        &self,
+        offset: GuestOffset,
+        length: u64,
+        overwrite: bool,
+    ) -> io::Result<(&S, u64, u64)> {
+        let l2_table = self.ensure_l2(offset).await?;
+
+        // Fast path for if everything is already allocated, which should be the common case at
+        // runtime.
+        // It must really be everything, though; we know our caller will want to have everything
+        // allocated eventually, so if anything is missing, go down to the allocation path so we
+        // try to allocate clusters such that they are not fragmented (if possible) and we can
+        // return as big of a single mapping as possible.
+        let existing = self
+            .do_get_mapping_with_l2(offset, length, &l2_table)
+            .await?;
+        if let Mapping::Raw {
+            storage,
+            offset,
+            writable: true,
+        } = existing.0
+        {
+            if existing.1 >= length {
+                return Ok((storage, offset, existing.1));
+            }
+        }
+
+        let l2_table = l2_table.lock_write().await;
+        let mut leaked_allocations = Vec::<(HostCluster, ClusterCount)>::new();
+
+        let res = self
+            .ensure_data_mapping_no_cleanup(
+                offset,
+                length,
+                overwrite,
+                l2_table,
+                &mut leaked_allocations,
+            )
+            .await;
+
+        for alloc in leaked_allocations {
+            self.free_data_clusters(alloc.0, alloc.1).await;
+        }
+        let (host_offset, length) = res?;
+
+        Ok((self.storage(), host_offset, length))
+    }
+
+    /// Get the L2 table referenced by the given L1 table index, if any.
+    ///
+    /// `writable` says whether the L2 table should be modifiable.
+    ///
+    /// If the L1 table index does not point to any L2 table, or the existing entry is not
+    /// modifiable but `writable` is true, return `Ok(None)`.
+    pub(super) async fn get_l2(
+        &self,
+        offset: GuestOffset,
+        writable: bool,
+    ) -> io::Result<Option<Arc<L2Table>>> {
+        let cb = self.header.cluster_bits();
+
+        let l1_entry = self.l1_table.read().await.get(offset.l1_index(cb));
+        if let Some(l2_offset) = l1_entry.l2_offset() {
+            if writable && !l1_entry.is_copied() {
+                return Ok(None);
+            }
+            let l2_cluster = l2_offset.checked_cluster(cb).ok_or_else(|| {
+                invalid_data(format!(
+                    "Unaligned L2 table for {offset:?}; L1 entry: {l1_entry:?}"
+                ))
+            })?;
+
+            self.l2_cache.get_or_insert(l2_cluster).await.map(Some)
+        } else {
+            Ok(None)
+        }
+    }
+
+    /// Get a L2 table for the given L1 table index.
+    ///
+    /// If there already is an L2 table at that index, return it.  Otherwise, create one and hook
+    /// it up.
+    pub(super) async fn ensure_l2(&self, offset: GuestOffset) -> io::Result<Arc<L2Table>> {
+        let cb = self.header.cluster_bits();
+
+        if let Some(l2) = self.get_l2(offset, true).await? {
+            return Ok(l2);
+        }
+
+        self.need_writable()?;
+
+        let mut l1_locked = self.l1_table.write().await;
+        let l1_index = offset.l1_index(cb);
+        if !l1_locked.in_bounds(l1_index) {
+            l1_locked = self.grow_l1_table(l1_locked, l1_index).await?;
+        }
+
+        let l1_entry = l1_locked.get(l1_index);
+        let mut l2_table = if let Some(l2_offset) = l1_entry.l2_offset() {
+            let l2_cluster = l2_offset.checked_cluster(cb).ok_or_else(|| {
+                invalid_data(format!(
+                    "Unaligned L2 table for {offset:?}; L1 entry: {l1_entry:?}"
+                ))
+            })?;
+
+            let l2 = self.l2_cache.get_or_insert(l2_cluster).await?;
+            if l1_entry.is_copied() {
+                return Ok(l2);
+            }
+
+            L2Table::clone(&l2)
+        } else {
+            L2Table::new_cleared(&self.header)
+        };
+
+        let l2_cluster = self.allocate_meta_cluster().await?;
+        l2_table.set_cluster(l2_cluster);
+        l2_table.write(self.metadata.as_ref()).await?;
+
+        l1_locked.enter_l2_table(l1_index, &l2_table)?;
+        l1_locked
+            .write_entry(self.metadata.as_ref(), l1_index)
+            .await?;
+
+        // Free old L2 table, if any
+        if let Some(l2_offset) = l1_entry.l2_offset() {
+            self.free_meta_clusters(l2_offset.cluster(cb), ClusterCount(1))
+                .await;
+        }
+
+        let l2_table = Arc::new(l2_table);
+        self.l2_cache
+            .insert(l2_cluster, Arc::clone(&l2_table))
+            .await?;
+        Ok(l2_table)
+    }
+
+    /// Create a new L1 table covering at least `at_least_index`.
+    ///
+    /// Create a new L1 table of the required size with all the entries of the previous L1 table.
+    async fn grow_l1_table<'a>(
+        &self,
+        mut l1_locked: RwLockWriteGuard<'a, L1Table>,
+        at_least_index: usize,
+    ) -> io::Result<RwLockWriteGuard<'a, L1Table>> {
+        let mut new_l1 = l1_locked.clone_and_grow(at_least_index, &self.header)?;
+
+        let l1_start = self.allocate_meta_clusters(new_l1.cluster_count()).await?;
+
+        new_l1.set_cluster(l1_start);
+        new_l1.write(self.metadata.as_ref()).await?;
+
+        self.header.set_l1_table(&new_l1)?;
+        self.header
+            .write_l1_table_pointer(self.metadata.as_ref())
+            .await?;
+
+        if let Some(old_l1_cluster) = l1_locked.get_cluster() {
+            let old_l1_size = l1_locked.cluster_count();
+            l1_locked.unset_cluster();
+            self.free_meta_clusters(old_l1_cluster, old_l1_size).await;
+        }
+
+        *l1_locked = new_l1;
+
+        Ok(l1_locked)
+    }
+
+    /// Inner implementation for [`Qcow2::do_ensure_data_mapping()`].
+    ///
+    /// Does not do any clean-up: The L2 table will probably be modified, but not written to disk.
+    /// Any existing allocations that have been removed from it (and are thus leaked) are entered
+    /// into `leaked_allocations`, but not freed.
+    ///
+    /// The caller must do both, ensuring it is done both in case of success and in case of error.
+    async fn ensure_data_mapping_no_cleanup(
+        &self,
+        offset: GuestOffset,
+        full_length: u64,
+        overwrite: bool,
+        mut l2_table: L2TableWriteGuard<'_>,
+        leaked_allocations: &mut Vec<(HostCluster, ClusterCount)>,
+    ) -> io::Result<(u64, u64)> {
+        let cb = self.header.cluster_bits();
+
+        let partial_skip_cow = overwrite.then(|| {
+            let start = offset.in_cluster_offset(cb);
+            let end = cmp::min(start as u64 + full_length, 1 << cb) as usize;
+            start..end
+        });
+
+        let mut current_guest_cluster = offset.cluster(cb);
+
+        // Without a mandatory host offset, this should never return `Ok(None)`
+        let host_cluster = self
+            .cow_cluster(
+                current_guest_cluster,
+                None,
+                partial_skip_cow,
+                &mut l2_table,
+                leaked_allocations,
+            )
+            .await?
+            .ok_or_else(|| io::Error::other("Internal allocation error"))?;
+
+        let host_offset_start = host_cluster.relative_offset(offset, cb);
+        let mut allocated_length = offset.remaining_in_cluster(cb);
+        let mut current_host_cluster = host_cluster;
+
+        while allocated_length < full_length {
+            let Some(next) = current_guest_cluster.next_in_l2(cb) else {
+                break;
+            };
+            current_guest_cluster = next;
+
+            let chunk_length = cmp::min(full_length - allocated_length, 1 << cb) as usize;
+            let partial_skip_cow = overwrite.then(|| 0..chunk_length);
+
+            let next_host_cluster = current_host_cluster + ClusterCount(1);
+            let host_cluster = self
+                .cow_cluster(
+                    current_guest_cluster,
+                    Some(next_host_cluster),
+                    partial_skip_cow,
+                    &mut l2_table,
+                    leaked_allocations,
+                )
+                .await?;
+
+            let Some(host_cluster) = host_cluster else {
+                // Cannot continue continuous mapping range
+                break;
+            };
+            assert!(host_cluster == next_host_cluster);
+            current_host_cluster = host_cluster;
+
+            allocated_length += chunk_length as u64;
+        }
+
+        Ok((host_offset_start.0, allocated_length))
+    }
+}
diff --git a/src/imago/src/qcow2/metadata.rs b/src/imago/src/qcow2/metadata.rs
new file mode 100644
index 00000000..e32a2a40
--- /dev/null
+++ b/src/imago/src/qcow2/metadata.rs
@@ -0,0 +1,2545 @@
+//! Functionality for working with qcow2 metadata.
+
+use super::types::*;
+use crate::io_buffers::IoBuffer;
+use crate::macros::numerical_enum;
+use crate::misc_helpers::invalid_data;
+use crate::{Storage, StorageExt};
+use bincode::Options;
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::mem::size_of;
+use std::num::TryFromIntError;
+use std::sync::atomic::{AtomicBool, AtomicU16, AtomicU32, AtomicU64, AtomicU8, Ordering};
+use std::{cmp, io};
+use tokio::sync::{Mutex, MutexGuard};
+use tracing::error;
+
+/// Qcow header magic ("QFI\xfb").
+const MAGIC: u32 = 0x51_46_49_fb;
+
+/// Maximum file length.
+const MAX_FILE_LENGTH: u64 = 0x0100_0000_0000_0000u64;
+
+/// Maximum permissible host offset.
+pub(super) const MAX_OFFSET: HostOffset = HostOffset(MAX_FILE_LENGTH - 512);
+
+/// Minimum cluster size.
+///
+/// Defined by the specification.
+pub(super) const MIN_CLUSTER_SIZE: usize = 512;
+
+/// Maximum cluster size.
+///
+/// This is QEMU’s limit, so we can apply it, too.
+pub(super) const MAX_CLUSTER_SIZE: usize = 2 * 1024 * 1024;
+
+/// Minimum number of bits per refcount entry.
+pub(super) const MIN_REFCOUNT_WIDTH: usize = 1;
+
+/// Maximum number of bits per refcount entry.
+pub(super) const MAX_REFCOUNT_WIDTH: usize = 64;
+
+/// Qcow2 v2 header.
+#[derive(Deserialize, Serialize)]
+struct V2Header {
+    /// Qcow magic string ("QFI\xfb").
+    magic: u32,
+
+    /// Version number (valid values are 2 and 3).
+    version: u32,
+
+    /// Offset into the image file at which the backing file name is stored (NB: The string is not
+    /// null terminated).  0 if the image doesn’t have a backing file.
+    ///
+    /// Note: backing files are incompatible with raw external data files (auto-clear feature bit
+    /// 1).
+    backing_file_offset: u64,
+
+    /// Length of the backing file name in bytes.  Must not be longer than 1023 bytes.  Undefined
+    /// if the image doesn’t have a backing file.
+    backing_file_size: u32,
+
+    /// Number of bits that are used for addressing an offset within a cluster (`1 << cluster_bits`
+    /// is the cluster size).  Must not be less than 9 (i.e. 512 byte clusters).
+    ///
+    /// Note: qemu as of today has an implementation limit of 2 MB as the maximum cluster size and
+    /// won’t be able to open images with larger cluster sizes.
+    ///
+    /// Note: if the image has Extended L2 Entries then `cluster_bits` must be at least 14 (i.e.
+    /// 16384 byte clusters).
+    cluster_bits: u32,
+
+    /// Virtual disk size in bytes.
+    ///
+    /// Note: qemu has an implementation limit of 32 MB as the maximum L1 table size.  With a 2 MB
+    /// cluster size, it is unable to populate a virtual cluster beyond 2 EB (61 bits); with a 512
+    /// byte cluster size, it is unable to populate a virtual size larger than 128 GB (37 bits).
+    /// Meanwhile, L1/L2 table layouts limit an image to no more than 64 PB (56 bits) of populated
+    /// clusters, and an image may hit other limits first (such as a file system’s maximum size).
+    size: u64,
+
+    /// Encryption method:
+    ///
+    /// 0. no encryption
+    /// 1. AES encryption
+    /// 2. LUKS encryption
+    crypt_method: u32,
+
+    /// Number of entries in the active L1 table.
+    l1_size: AtomicU32,
+
+    /// Offset into the image file at which the active L1 table starts.  Must be aligned to a
+    /// cluster boundary.
+    l1_table_offset: AtomicU64,
+
+    /// Offset into the image file at which the refcount table starts.  Must be aligned to a
+    /// cluster boundary.
+    refcount_table_offset: AtomicU64,
+
+    /// Number of clusters that the refcount table occupies.
+    refcount_table_clusters: AtomicU32,
+
+    /// Number of snapshots contained in the image.
+    nb_snapshots: u32,
+
+    /// Offset into the image file at which the snapshot table starts.  Must be aligned to a
+    /// cluster boundary.
+    snapshots_offset: u64,
+}
+
+impl V2Header {
+    /// Raw v2 header length.
+    const RAW_SIZE: usize = 72;
+}
+
+/// Qcow2 v3 header.
+#[derive(Deserialize, Serialize)]
+struct V3HeaderBase {
+    /// Bitmask of incompatible features.  An implementation must fail to open an image if an
+    /// unknown bit is set.
+    ///
+    /// 0. Dirty bit.  If this bit is set then refcounts may be inconsistent, make sure to scan
+    ///    L1/L2 tables to repair refcounts before accessing the image.
+    /// 1. Corrupt bit.  If this bit is set then any data structure may be corrupt and the image
+    ///    must not be written to (unless for regaining consistency).
+    /// 2. External data file bit.  If this bit is set, an external data file is used.  Guest
+    ///    clusters are then stored in the external data file.  For such images, clusters in the
+    ///    external data file are not refcounted.  The offset field in the Standard Cluster
+    ///    Descriptor must match the guest offset and neither compressed clusters nor internal
+    ///    snapshots are supported.  An External Data File Name header extension may be present if
+    ///    this bit is set.
+    /// 3. Compression type bit.  If this bit is set, a non-default compression is used for
+    ///    compressed clusters.  The compression_type field must be present and not zero.
+    /// 4. Extended L2 Entries.  If this bit is set then L2 table entries use an extended format
+    ///    that allows subcluster-based allocation.  See the Extended L2 Entries section for more
+    ///    details.
+    ///
+    /// Bits 5-63 are reserved (set to 0).
+    incompatible_features: u64,
+
+    /// Bitmask of compatible features.  An implementation can safely ignore any unknown bits that
+    /// are set.
+    ///
+    /// 0. Lazy refcounts bit.  If this bit is set then lazy refcount updates can be used.  This
+    ///    means marking the image file dirty and postponing refcount metadata updates.
+    ///
+    /// Bits 1-63 are reserved (set to 0).
+    compatible_features: u64,
+
+    /// Bitmask of auto-clear features.  An implementation may only write to an image with unknown
+    /// auto-clear features if it clears the respective bits from this field first.
+    ///
+    /// 0. Bitmaps extension bit.  This bit indicates consistency for the bitmaps extension data.
+    ///    It is an error if this bit is set without the bitmaps extension present.  If the bitmaps
+    ///    extension is present but this bit is unset, the bitmaps extension data must be
+    ///    considered inconsistent.
+    /// 1. Raw external data bit.  If this bit is set, the external data file can be read as a
+    ///    consistent standalone raw image without looking at the qcow2 metadata.  Setting this bit
+    ///    has a performance impact for some operations on the image (e.g. writing zeros requires
+    ///    writing to the data file instead of only setting the zero flag in the L2 table entry)
+    ///    and conflicts with backing files.  This bit may only be set if the External Data File
+    ///    bit (incompatible feature bit 1) is also set.
+    ///
+    /// Bits 2-63 are reserved (set to 0).
+    autoclear_features: u64,
+
+    /// Describes the width of a reference count block entry (width in bits: `refcount_bits = 1 <<
+    /// refcount_order`).  For version 2 images, the order is always assumed to be 4 (i.e.
+    /// `refcount_bits = 16`).  This value may not exceed 6 (i.e. `refcount_bits = 64`).
+    refcount_order: u32,
+
+    /// Length of the header structure in bytes.  For version 2 images, the length is always
+    /// assumed to be 72 bytes.  For version 3 it’s at least 104 bytes and must be a multiple of 8.
+    header_length: u32,
+}
+
+impl V3HeaderBase {
+    /// Raw v3 header length beyond the v2 header.
+    const RAW_SIZE: usize = 104 - V2Header::RAW_SIZE;
+}
+
+impl Default for V3HeaderBase {
+    fn default() -> Self {
+        V3HeaderBase {
+            incompatible_features: 0,
+            compatible_features: 0,
+            autoclear_features: 0,
+            refcount_order: 4,
+            header_length: (V2Header::RAW_SIZE + V3HeaderBase::RAW_SIZE) as u32,
+        }
+    }
+}
+
+numerical_enum! {
+    /// Incompatible feature bits.
+    pub(super) enum IncompatibleFeatures as u64 {
+        Dirty = 1 << 0,
+        Corrupt = 1 << 1,
+        ExternalDataFile = 1 << 2,
+        CompressionType = 1 << 3,
+        ExtendedL2Entries = 1 << 4,
+    }
+}
+
+numerical_enum! {
+    /// Extension type IDs.
+    pub(super) enum HeaderExtensionType as u32 {
+        /// End of extension list.
+        End = 0,
+
+        /// Backing file format string.
+        BackingFileFormat = 0xe2792aca,
+
+        /// Map of feature bits to human-readable names.
+        FeatureNameTable = 0x6803f857,
+
+        /// External data file filename string.
+        ExternalDataFileName = 0x44415441,
+    }
+}
+
+/// Header for a header extension.
+#[derive(Default, Deserialize, Serialize)]
+struct HeaderExtensionHeader {
+    /// Type code of the header extension.
+    extension_type: u32,
+
+    /// Data length.
+    length: u32,
+}
+
+impl HeaderExtensionHeader {
+    /// Raw struct length.
+    const RAW_SIZE: usize = 8;
+}
+
+numerical_enum! {
+    /// Feature type ID for the feature name table.
+    #[derive(Hash)]
+    pub(super) enum FeatureType as u8 {
+        Incompatible = 0,
+        Compatible = 1,
+        Autoclear = 2,
+    }
+}
+
+/// Header extensions (high-level representation).
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub(super) enum HeaderExtension {
+    /// Backing file format string.
+    BackingFileFormat(String),
+
+    /// Map of feature bits to human-readable names.
+    FeatureNameTable(HashMap<(FeatureType, u8), String>),
+
+    /// External data file filename string.
+    ExternalDataFileName(String),
+
+    /// Unknown extension.
+    Unknown {
+        /// Type.
+        extension_type: u32,
+        /// Data (as read).
+        data: Vec<u8>,
+    },
+}
+
+/// Integrated header representation.
+pub(super) struct Header {
+    /// v2 part of the header.
+    v2: V2Header,
+
+    /// Base v3 part of the header.
+    v3: V3HeaderBase,
+
+    /// Unrecognized header fields.
+    unknown_header_fields: Vec<u8>,
+
+    /// Backing filename string.
+    backing_filename: Option<String>,
+
+    /// Extensions.
+    extensions: Vec<HeaderExtension>,
+
+    /// Whether an external data file is required.
+    external_data_file: bool,
+}
+
+impl Header {
+    /// Load the qcow2 header from disk.
+    ///
+    /// If `writable` is false, do not perform any modifications (e.g. clearing auto-clear bits).
+    pub async fn load<S: Storage>(image: &S, writable: bool) -> io::Result<Self> {
+        let bincode = bincode::DefaultOptions::new()
+            .with_fixint_encoding()
+            .with_big_endian();
+
+        let mut header_buf = vec![0u8; V2Header::RAW_SIZE];
+        image.read(header_buf.as_mut_slice(), 0).await?;
+
+        let header: V2Header = bincode.deserialize(&header_buf).map_err(invalid_data)?;
+        if header.magic != MAGIC {
+            return Err(invalid_data("Not a qcow2 file"));
+        }
+
+        let v3header_base = if header.version == 2 {
+            V3HeaderBase::default()
+        } else if header.version == 3 {
+            let mut header_buf = vec![0u8; V3HeaderBase::RAW_SIZE];
+            image
+                .read(header_buf.as_mut_slice(), V2Header::RAW_SIZE as u64)
+                .await?;
+            bincode.deserialize(&header_buf).map_err(invalid_data)?
+        } else {
+            return Err(invalid_data(format!(
+                "qcow2 v{} is not supported",
+                header.version
+            )));
+        };
+
+        let cluster_size = 1usize.checked_shl(header.cluster_bits).ok_or_else(|| {
+            invalid_data(format!("Invalid cluster size: 2^{}", header.cluster_bits))
+        })?;
+        if !(MIN_CLUSTER_SIZE..=MAX_CLUSTER_SIZE).contains(&cluster_size) {
+            return Err(invalid_data(format!(
+                "Invalid cluster size: {}; must be between {} and {}",
+                cluster_size, MIN_CLUSTER_SIZE, MAX_CLUSTER_SIZE,
+            )));
+        }
+
+        let min_header_size = V2Header::RAW_SIZE + V3HeaderBase::RAW_SIZE;
+        if (v3header_base.header_length as usize) < min_header_size {
+            return Err(invalid_data(format!(
+                "qcow2 header too short: {} < {}",
+                v3header_base.header_length, min_header_size,
+            )));
+        } else if (v3header_base.header_length as usize) > cluster_size {
+            return Err(invalid_data(format!(
+                "qcow2 header too big: {} > {}",
+                v3header_base.header_length, cluster_size,
+            )));
+        }
+
+        let unknown_header_fields = if header.version == 2 {
+            Vec::new()
+        } else {
+            let mut unknown_header_fields =
+                vec![0u8; v3header_base.header_length as usize - min_header_size];
+            image
+                .read(&mut unknown_header_fields, min_header_size as u64)
+                .await?;
+            unknown_header_fields
+        };
+
+        let l1_offset = HostOffset(header.l1_table_offset.load(Ordering::Relaxed));
+        l1_offset
+            .checked_cluster(header.cluster_bits)
+            .ok_or_else(|| invalid_data(format!("Unaligned L1 table: {l1_offset}")))?;
+
+        let rt_offset = HostOffset(header.refcount_table_offset.load(Ordering::Relaxed));
+        rt_offset
+            .checked_cluster(header.cluster_bits)
+            .ok_or_else(|| invalid_data(format!("Unaligned refcount table: {rt_offset}")))?;
+
+        let rc_width = 1usize
+            .checked_shl(v3header_base.refcount_order)
+            .ok_or_else(|| {
+                invalid_data(format!(
+                    "Invalid refcount width: 2^{}",
+                    v3header_base.refcount_order
+                ))
+            })?;
+        if !(MIN_REFCOUNT_WIDTH..=MAX_REFCOUNT_WIDTH).contains(&rc_width) {
+            return Err(invalid_data(format!(
+                "Invalid refcount width: {}; must be between {} and {}",
+                rc_width, MIN_REFCOUNT_WIDTH, MAX_REFCOUNT_WIDTH,
+            )));
+        }
+
+        let backing_filename = if header.backing_file_offset != 0 {
+            let (offset, length) = (header.backing_file_offset, header.backing_file_size);
+            if length > 1023 {
+                return Err(invalid_data(format!(
+                    "Backing file name is too long ({length}, must not exceed 1023)"
+                )));
+            }
+
+            let end = offset.checked_add(length as u64).ok_or(invalid_data(
+                "Backing file name offset is invalid (too high)",
+            ))?;
+            if end >= cluster_size as u64 {
+                return Err(invalid_data(
+                    "Backing file name offset is invalid (beyond first cluster)",
+                ));
+            }
+
+            let mut backing_buf = vec![0; length as usize];
+            image.read(&mut backing_buf, offset).await?;
+
+            Some(
+                String::from_utf8(backing_buf)
+                    .map_err(|err| invalid_data(format!("Backing file name is invalid: {err}")))?,
+            )
+        } else {
+            None
+        };
+
+        let extensions = if header.version == 2 {
+            Vec::new()
+        } else {
+            let mut ext_offset: u64 = v3header_base.header_length as u64;
+            let mut extensions = Vec::<HeaderExtension>::new();
+            loop {
+                if ext_offset + HeaderExtensionHeader::RAW_SIZE as u64 > cluster_size as u64 {
+                    return Err(invalid_data("Header extensions exceed the first cluster"));
+                }
+
+                let mut ext_hdr_buf = vec![0; HeaderExtensionHeader::RAW_SIZE];
+                image.read(&mut ext_hdr_buf, ext_offset).await?;
+
+                ext_offset += HeaderExtensionHeader::RAW_SIZE as u64;
+
+                let ext_hdr: HeaderExtensionHeader =
+                    bincode.deserialize(&ext_hdr_buf).map_err(invalid_data)?;
+                let ext_end = ext_offset
+                    .checked_add(ext_hdr.length as u64)
+                    .ok_or_else(|| invalid_data("Header size overflow"))?;
+                if ext_end > cluster_size as u64 {
+                    return Err(invalid_data("Header extensions exceed the first cluster"));
+                }
+
+                let mut ext_data = vec![0; ext_hdr.length as usize];
+                image.read(&mut ext_data, ext_offset).await?;
+
+                ext_offset += (ext_hdr.length as u64).next_multiple_of(8);
+
+                let Some(extension) =
+                    HeaderExtension::deserialize(ext_hdr.extension_type, ext_data)?
+                else {
+                    break;
+                };
+
+                extensions.push(extension);
+            }
+            extensions
+        };
+
+        // Check for header extension conflicts
+        let backing_fmt = extensions
+            .iter()
+            .find(|ext| matches!(ext, HeaderExtension::BackingFileFormat(_)));
+        if let Some(backing_fmt) = backing_fmt {
+            let conflicting = extensions.iter().find(|ext| {
+                matches!(ext, HeaderExtension::BackingFileFormat(_)) && ext != &backing_fmt
+            });
+            if let Some(conflicting) = conflicting {
+                return Err(io::Error::other(format!(
+                    "Found conflicting backing file formats: {:?} != {:?}",
+                    backing_fmt, conflicting
+                )));
+            }
+        }
+        let ext_data_file = extensions
+            .iter()
+            .find(|ext| matches!(ext, HeaderExtension::ExternalDataFileName(_)));
+        if let Some(ext_data_file) = ext_data_file {
+            let conflicting = extensions.iter().find(|ext| {
+                matches!(ext, HeaderExtension::ExternalDataFileName(_)) && ext != &ext_data_file
+            });
+            if let Some(conflicting) = conflicting {
+                return Err(io::Error::other(format!(
+                    "Found conflicting external data file names: {:?} != {:?}",
+                    ext_data_file, conflicting
+                )));
+            }
+        }
+
+        let mut incompatible_features = v3header_base.incompatible_features;
+        let autoclear_features = v3header_base.autoclear_features;
+
+        let external_data_file =
+            incompatible_features & IncompatibleFeatures::ExternalDataFile as u64 != 0;
+        incompatible_features &= !(IncompatibleFeatures::ExternalDataFile as u64);
+
+        let mut header = Header {
+            v2: header,
+            v3: v3header_base,
+            unknown_header_fields,
+            backing_filename,
+            extensions,
+            external_data_file,
+        };
+
+        // No need to clear autoclear features for read-only images
+        if autoclear_features != 0 && writable {
+            header.v3.autoclear_features = 0;
+            header.write(image).await?;
+        }
+
+        if incompatible_features != 0 {
+            let feats = (0..64)
+                .filter(|bit| header.v3.incompatible_features & (1u64 << bit) != 0)
+                .map(|bit| {
+                    if let Some(name) = header.feature_name(FeatureType::Incompatible, bit) {
+                        format!("{bit} ({name})")
+                    } else {
+                        format!("{bit}")
+                    }
+                })
+                .collect::<Vec<String>>();
+
+            return Err(invalid_data(format!(
+                "Unrecognized incompatible feature(s) {}",
+                feats.join(", ")
+            )));
+        }
+
+        Ok(header)
+    }
+
+    /// Write the qcow2 header to disk.
+    pub async fn write<S: Storage>(&mut self, image: &S) -> io::Result<()> {
+        let bincode = bincode::DefaultOptions::new()
+            .with_fixint_encoding()
+            .with_big_endian();
+
+        let header_len = if self.v2.version > 2 {
+            let len = bincode.serialized_size(&self.v2).unwrap() as usize
+                + bincode.serialized_size(&self.v3).unwrap() as usize
+                + self.unknown_header_fields.len();
+            let len = len.next_multiple_of(8);
+            self.v3.header_length = len as u32;
+            len
+        } else {
+            V2Header::RAW_SIZE
+        };
+
+        let mut header_exts = self.serialize_extensions()?;
+
+        if let Some(backing) = self.backing_filename.as_ref() {
+            let offset = header_len + header_exts.len();
+            let size = backing.len(); // length in bytes
+            let end = offset.checked_add(size).ok_or_else(|| {
+                io::Error::other("Header plus header extensions plus backing filename is too long")
+            })?;
+            if end > self.cluster_size() {
+                return Err(io::Error::other(
+                    "Header plus header extensions plus backing filename is too long",
+                ))?;
+            }
+            self.v2.backing_file_offset = offset as u64;
+            self.v2.backing_file_size = size as u32;
+        } else {
+            self.v2.backing_file_offset = 0;
+            self.v2.backing_file_size = 0;
+        }
+
+        let mut full_buf = bincode.serialize(&self.v2).map_err(invalid_data)?;
+        if self.v2.version > 2 {
+            full_buf.append(&mut bincode.serialize(&self.v3).map_err(invalid_data)?);
+            full_buf.extend_from_slice(&self.unknown_header_fields);
+            full_buf.resize(full_buf.len().next_multiple_of(8), 0);
+        }
+
+        full_buf.append(&mut header_exts);
+
+        if let Some(backing) = self.backing_filename.as_ref() {
+            full_buf.extend_from_slice(backing.as_bytes());
+        }
+
+        if full_buf.len() > self.cluster_size() {
+            return Err(io::Error::other(format!(
+                "Header is too big to write ({}, larger than a cluster ({}))",
+                full_buf.len(),
+                self.cluster_size(),
+            )));
+        }
+
+        image.write(&full_buf, 0).await
+    }
+
+    /// Guest disk size.
+    pub fn size(&self) -> u64 {
+        self.v2.size
+    }
+
+    /// log2 of the cluster size.
+    pub fn cluster_bits(&self) -> u32 {
+        self.v2.cluster_bits
+    }
+
+    /// Cluster size in bytes.
+    pub fn cluster_size(&self) -> usize {
+        1 << self.cluster_bits()
+    }
+
+    /// Number of entries per L2 table.
+    pub fn l2_entries(&self) -> usize {
+        // 3 == log2(size_of::<u64>())
+        1 << (self.cluster_bits() - 3)
+    }
+
+    /// log2 of the number of entries per refcount block.
+    pub fn rb_bits(&self) -> u32 {
+        // log2(cluster_size >> (refcount_order - 3))
+        self.cluster_bits() - (self.refcount_order() - 3)
+    }
+
+    /// Number of entries per refcount block.
+    pub fn rb_entries(&self) -> usize {
+        1 << self.rb_bits()
+    }
+
+    /// log2 of the refcount bits.
+    pub fn refcount_order(&self) -> u32 {
+        self.v3.refcount_order
+    }
+
+    /// Offset of the L1 table.
+    pub fn l1_table_offset(&self) -> HostOffset {
+        HostOffset(self.v2.l1_table_offset.load(Ordering::Relaxed))
+    }
+
+    /// Number of entries in the L1 table.
+    pub fn l1_table_entries(&self) -> usize {
+        self.v2.l1_size.load(Ordering::Relaxed) as usize
+    }
+
+    /// Enter a new L1 table in the image header.
+    pub fn set_l1_table(&self, l1_table: &L1Table) -> io::Result<()> {
+        let offset = l1_table.get_offset().ok_or_else(|| {
+            io::Error::new(
+                io::ErrorKind::InvalidInput,
+                "New L1 table has no assigned offset",
+            )
+        })?;
+
+        let entries = l1_table.entries();
+        let entries = entries
+            .try_into()
+            .map_err(|err| invalid_data(format!("Too many L1 entries ({entries}): {err}")))?;
+
+        self.v2.l1_table_offset.store(offset.0, Ordering::Relaxed);
+
+        self.v2.l1_size.store(entries, Ordering::Relaxed);
+
+        Ok(())
+    }
+
+    /// Offset of the refcount table.
+    pub fn reftable_offset(&self) -> HostOffset {
+        HostOffset(self.v2.refcount_table_offset.load(Ordering::Relaxed))
+    }
+
+    /// Number of clusters occupied by the refcount table.
+    pub fn reftable_clusters(&self) -> ClusterCount {
+        ClusterCount(self.v2.refcount_table_clusters.load(Ordering::Relaxed) as u64)
+    }
+
+    /// Number of entries in the refcount table.
+    pub fn reftable_entries(&self) -> usize {
+        // 3 == log2(size_of::<u64>())
+        (self.reftable_clusters().byte_size(self.cluster_bits()) >> 3) as usize
+    }
+
+    /// Enter a new refcount table in the image header.
+    pub fn set_reftable(&self, reftable: &RefTable) -> io::Result<()> {
+        let offset = reftable.get_offset().ok_or_else(|| {
+            io::Error::new(
+                io::ErrorKind::InvalidInput,
+                "New refcount table has no assigned offset",
+            )
+        })?;
+
+        let clusters = reftable.cluster_count();
+        let clusters = clusters.0.try_into().map_err(|err| {
+            invalid_data(format!("Too many reftable clusters ({clusters}): {err}"))
+        })?;
+
+        self.v2
+            .refcount_table_clusters
+            .store(clusters, Ordering::Relaxed);
+
+        self.v2
+            .refcount_table_offset
+            .store(offset.0, Ordering::Relaxed);
+
+        Ok(())
+    }
+
+    /// Backing filename from the image header (if any).
+    pub fn backing_filename(&self) -> Option<&String> {
+        self.backing_filename.as_ref()
+    }
+
+    /// Backing format string from the image header (if any).
+    pub fn backing_format(&self) -> Option<&String> {
+        self.extensions.iter().find_map(|e| match e {
+            HeaderExtension::BackingFileFormat(fmt) => Some(fmt),
+            _ => None,
+        })
+    }
+
+    /// Whether this image requires an external data file.
+    pub fn external_data_file(&self) -> bool {
+        self.external_data_file
+    }
+
+    /// External data file filename from the image header (if any).
+    pub fn external_data_filename(&self) -> Option<&String> {
+        self.extensions.iter().find_map(|e| match e {
+            HeaderExtension::ExternalDataFileName(filename) => Some(filename),
+            _ => None,
+        })
+    }
+
+    /// Translate a feature bit to a human-readable name.
+    ///
+    /// Uses the feature name table from the image header, if present.
+    pub fn feature_name(&self, feat_type: FeatureType, bit: u32) -> Option<&String> {
+        for e in &self.extensions {
+            if let HeaderExtension::FeatureNameTable(names) = e {
+                if let Some(name) = names.get(&(feat_type, bit as u8)) {
+                    return Some(name);
+                }
+            }
+        }
+
+        None
+    }
+
+    /// Serialize all header extensions.
+    fn serialize_extensions(&self) -> io::Result<Vec<u8>> {
+        let bincode = bincode::DefaultOptions::new()
+            .with_fixint_encoding()
+            .with_big_endian();
+
+        let mut result = Vec::new();
+        for e in &self.extensions {
+            let mut data = e.serialize_data()?;
+            let ext_hdr = HeaderExtensionHeader {
+                extension_type: e.extension_type(),
+                length: data.len().try_into().map_err(|err| {
+                    invalid_data(format!(
+                        "Header extension too long ({}): {}",
+                        data.len(),
+                        err
+                    ))
+                })?,
+            };
+            result.append(&mut bincode.serialize(&ext_hdr).map_err(invalid_data)?);
+            result.append(&mut data);
+            result.resize(result.len().next_multiple_of(8), 0);
+        }
+
+        let end_ext = HeaderExtensionHeader {
+            extension_type: HeaderExtensionType::End as u32,
+            length: 0,
+        };
+        result.append(&mut bincode.serialize(&end_ext).map_err(invalid_data)?);
+        result.resize(result.len().next_multiple_of(8), 0);
+
+        Ok(result)
+    }
+
+    /// Helper for functions that just need to change little bits in the v2 header part.
+    async fn write_v2_header<S: Storage>(&self, image: &S) -> io::Result<()> {
+        let bincode = bincode::DefaultOptions::new()
+            .with_fixint_encoding()
+            .with_big_endian();
+
+        let v2_header = bincode.serialize(&self.v2).map_err(invalid_data)?;
+        image.write(&v2_header, 0).await
+    }
+
+    /// Write the refcount table pointer (offset and size) to disk.
+    pub async fn write_reftable_pointer<S: Storage>(&self, image: &S) -> io::Result<()> {
+        // TODO: Just write the reftable offset and size
+        self.write_v2_header(image).await
+    }
+
+    /// Write the L1 table pointer (offset and size) to disk.
+    pub async fn write_l1_table_pointer<S: Storage>(&self, image: &S) -> io::Result<()> {
+        // TODO: Just write the L1 table offset and size
+        self.write_v2_header(image).await
+    }
+}
+
+impl HeaderExtension {
+    /// Parse an extension from its type and data.  Unrecognized types are stored as `Unknown`
+    /// extensions, encountering the end of extensions returns `Ok(None)`.
+    fn deserialize(ext_type: u32, data: Vec<u8>) -> io::Result<Option<Self>> {
+        let ext = if let Ok(ext_type) = HeaderExtensionType::try_from(ext_type) {
+            match ext_type {
+                HeaderExtensionType::End => return Ok(None),
+                HeaderExtensionType::BackingFileFormat => {
+                    let fmt = String::from_utf8(data).map_err(|err| {
+                        invalid_data(format!("Invalid backing file format: {err}"))
+                    })?;
+                    HeaderExtension::BackingFileFormat(fmt)
+                }
+                HeaderExtensionType::FeatureNameTable => {
+                    let mut feats = HashMap::new();
+                    for feat in data.chunks(48) {
+                        let feat_type: FeatureType = match feat[0].try_into() {
+                            Ok(ft) => ft,
+                            Err(_) => continue, // skip unrecognized entries
+                        };
+                        // Cannot use CStr to parse this, as it may not be NUL-terminated.
+                        // Use this to remove everything from the first NUL byte.
+                        let feat_name_bytes = feat[2..].split(|c| *c == 0).next().unwrap();
+                        // Then just use it as a UTF-8 string.
+                        let feat_name = String::from_utf8_lossy(feat_name_bytes);
+                        feats.insert((feat_type, feat[1]), feat_name.to_string());
+                    }
+                    HeaderExtension::FeatureNameTable(feats)
+                }
+                HeaderExtensionType::ExternalDataFileName => {
+                    let filename = String::from_utf8(data).map_err(|err| {
+                        invalid_data(format!("Invalid external data file name: {err}"))
+                    })?;
+                    HeaderExtension::ExternalDataFileName(filename)
+                }
+            }
+        } else {
+            HeaderExtension::Unknown {
+                extension_type: ext_type,
+                data,
+            }
+        };
+
+        Ok(Some(ext))
+    }
+
+    /// Return the extension type ID.
+    fn extension_type(&self) -> u32 {
+        match self {
+            HeaderExtension::BackingFileFormat(_) => HeaderExtensionType::BackingFileFormat as u32,
+            HeaderExtension::FeatureNameTable(_) => HeaderExtensionType::FeatureNameTable as u32,
+            HeaderExtension::ExternalDataFileName(_) => {
+                HeaderExtensionType::ExternalDataFileName as u32
+            }
+            HeaderExtension::Unknown {
+                extension_type,
+                data: _,
+            } => *extension_type,
+        }
+    }
+
+    /// Serialize this extension’s data (exclusing its header).
+    fn serialize_data(&self) -> io::Result<Vec<u8>> {
+        match self {
+            HeaderExtension::BackingFileFormat(fmt) => Ok(fmt.as_bytes().into()),
+            HeaderExtension::FeatureNameTable(map) => {
+                let mut result = Vec::new();
+                for (bit, name) in map {
+                    result.push(bit.0 as u8);
+                    result.push(bit.1);
+
+                    let mut padded_name = vec![0; 46];
+                    let name_bytes = name.as_bytes();
+                    // Might truncate in the middle of a multibyte character, but getting that
+                    // right is complicated and probably not worth it
+                    let truncated_len = cmp::min(name_bytes.len(), 46);
+                    padded_name[..truncated_len].copy_from_slice(&name_bytes[..truncated_len]);
+                    result.extend_from_slice(&padded_name);
+                }
+                Ok(result)
+            }
+            HeaderExtension::ExternalDataFileName(filename) => Ok(filename.as_bytes().into()),
+            HeaderExtension::Unknown {
+                extension_type: _,
+                data,
+            } => Ok(data.clone()),
+        }
+    }
+}
+
+/// L1 table entry.
+///
+/// - Bit 0 - 8: Reserved (set to 0)
+/// - Bit 9 – 55: Bits 9-55 of the offset into the image file at which the L2 table starts.  Must
+///   be aligned to a cluster boundary.  If the offset is 0, the L2 table and all clusters
+///   described by this L2 table are unallocated.
+/// - Bit 56 - 62: Reserved (set to 0)
+/// - Bit 63: 0 for an L2 table that is unused or requires COW, 1 if its refcount is exactly one.
+///   This information is only accurate in the active L1 table.
+#[derive(Copy, Clone, Default, Debug)]
+pub(super) struct L1Entry(u64);
+
+impl L1Entry {
+    /// Offset of the L2 table, if any.
+    pub fn l2_offset(&self) -> Option<HostOffset> {
+        let ofs = self.0 & 0x00ff_ffff_ffff_fe00u64;
+        if ofs == 0 {
+            None
+        } else {
+            Some(HostOffset(ofs))
+        }
+    }
+
+    /// Whether the L2 table’s cluster is “copied”.
+    ///
+    /// `true` means is refcount is one, `false` means modifying it will require COW.
+    pub fn is_copied(&self) -> bool {
+        self.0 & (1u64 << 63) != 0
+    }
+
+    /// Return all reserved bits.
+    pub fn reserved_bits(&self) -> u64 {
+        self.0 & 0x7f00_0000_0000_01feu64
+    }
+}
+
+impl TableEntry for L1Entry {
+    fn try_from_plain(value: u64, header: &Header) -> io::Result<Self> {
+        let entry = L1Entry(value);
+
+        if entry.reserved_bits() != 0 {
+            return Err(invalid_data(format!(
+                "Invalid L1 entry 0x{:x}, reserved bits set (0x{:x})",
+                value,
+                entry.reserved_bits(),
+            )));
+        }
+
+        if let Some(l2_ofs) = entry.l2_offset() {
+            if l2_ofs.in_cluster_offset(header.cluster_bits()) != 0 {
+                return Err(invalid_data(format!(
+                    "Invalid L1 entry 0x{:x}, offset ({}) is not aligned to cluster size (0x{:x})",
+                    value,
+                    l2_ofs,
+                    header.cluster_size(),
+                )));
+            }
+        }
+
+        Ok(entry)
+    }
+
+    fn to_plain(&self) -> u64 {
+        self.0
+    }
+}
+
+/// L1 table.
+#[derive(Debug)]
+pub(super) struct L1Table {
+    /// First cluster in the image file.
+    cluster: Option<HostCluster>,
+
+    /// Table data.
+    data: Box<[L1Entry]>,
+
+    /// log2 of the cluster size.
+    cluster_bits: u32,
+
+    /// Whether this table has been modified since it was last written.
+    modified: AtomicBool,
+}
+
+impl L1Table {
+    /// Create a clone that covers at least `at_least_index`.
+    pub fn clone_and_grow(&self, at_least_index: usize, header: &Header) -> io::Result<Self> {
+        let new_entry_count = cmp::max(at_least_index + 1, self.data.len());
+        let new_entry_count =
+            new_entry_count.next_multiple_of(header.cluster_size() / size_of::<L1Entry>());
+
+        if new_entry_count > <Self as Table>::MAX_ENTRIES {
+            return Err(io::Error::other(
+                "Cannot grow the image to this size; L1 table would become too big",
+            ));
+        }
+
+        let mut new_data = vec![L1Entry::default(); new_entry_count];
+        new_data[..self.data.len()].copy_from_slice(&self.data);
+
+        Ok(Self {
+            cluster: None,
+            data: new_data.into_boxed_slice(),
+            cluster_bits: header.cluster_bits(),
+            modified: true.into(),
+        })
+    }
+
+    /// Check whether `index` is in bounds.
+    pub fn in_bounds(&self, index: usize) -> bool {
+        index < self.data.len()
+    }
+
+    /// Enter the given L2 table into this L1 table.
+    pub fn enter_l2_table(&mut self, index: usize, l2: &L2Table) -> io::Result<()> {
+        let l2_offset = l2.get_offset().ok_or_else(|| {
+            io::Error::new(
+                io::ErrorKind::InvalidInput,
+                "L2 table has no assigned offset",
+            )
+        })?;
+
+        let l1entry = L1Entry((1 << 63) | l2_offset.0);
+        debug_assert!(l1entry.reserved_bits() == 0);
+        self.data[index] = l1entry;
+        self.modified.store(true, Ordering::Relaxed);
+
+        Ok(())
+    }
+}
+
+impl Table for L1Table {
+    type InternalEntry = L1Entry;
+    type Entry = L1Entry;
+    const NAME: &'static str = "L1 table";
+
+    /// Maximum number of L1 table entries.
+    ///
+    /// Limit taken from QEMU; if QEMU rejects this, we can, too.
+    const MAX_ENTRIES: usize = 4 * 1024 * 1024;
+
+    fn from_data(data: Box<[L1Entry]>, header: &Header) -> Self {
+        Self {
+            cluster: None,
+            data,
+            cluster_bits: header.cluster_bits(),
+            modified: true.into(),
+        }
+    }
+
+    fn entries(&self) -> usize {
+        self.data.len()
+    }
+
+    fn get_ref(&self, index: usize) -> Option<&L1Entry> {
+        self.data.get(index)
+    }
+
+    fn get(&self, index: usize) -> L1Entry {
+        self.data.get(index).copied().unwrap_or(L1Entry(0))
+    }
+
+    fn get_cluster(&self) -> Option<HostCluster> {
+        self.cluster
+    }
+
+    fn get_offset(&self) -> Option<HostOffset> {
+        self.cluster.map(|index| index.offset(self.cluster_bits))
+    }
+
+    fn set_cluster(&mut self, cluster: HostCluster) {
+        self.cluster = Some(cluster);
+        self.modified.store(true, Ordering::Relaxed);
+    }
+
+    fn unset_cluster(&mut self) {
+        self.cluster = None;
+    }
+
+    fn is_modified(&self) -> bool {
+        self.modified.load(Ordering::Relaxed)
+    }
+
+    fn clear_modified(&self) {
+        self.modified.store(false, Ordering::Relaxed);
+    }
+
+    fn set_modified(&self) {
+        self.modified.store(true, Ordering::Relaxed);
+    }
+
+    fn cluster_bits(&self) -> u32 {
+        self.cluster_bits
+    }
+}
+
+/// L2 table entry.
+///
+/// - Bit 0 - 61: Cluster descriptor
+/// - Bit 62: 0 for standard clusters, 1 for compressed clusters
+/// - Bit 63: 0 for clusters that are unused, compressed or require COW.  1 for standard clusters
+///   whose refcount is exactly one.  This information is only accurate in L2 tables that are
+///   reachable from the active L1 table.  With external data files, all guest clusters have an
+///   implicit refcount of 1 (because of the fixed host = guest mapping for guest cluster offsets),
+///   so this bit should be 1 for all allocated clusters.
+///
+/// Standard Cluster Descriptor:
+/// - Bit 0: If set to 1, the cluster reads as all zeros. The host cluster offset can be used to
+///   describe a preallocation, but it won’t be used for reading data from this cluster, nor is
+///   data read from the backing file if the cluster is unallocated.  With version 2 or with
+///   extended L2 entries (see the next section), this is always 0.
+/// - Bit 1 – 8: Reserved (set to 0)
+/// - Bit 9 – 55: Bits 9-55 of host cluster offset. Must be aligned to a cluster boundary. If the
+///   offset is 0 and bit 63 is clear, the cluster is unallocated. The offset may only be 0 with
+///   bit 63 set (indicating a host cluster offset of 0) when an external data file is used.
+/// - Bit 56 - 61: Reserved (set to 0)
+///
+/// Compressed Cluster Descriptor (`x = 62 - (cluster_bits - 8)`):
+/// - Bit 0 - x-1: Host cluster offset.  This is usually _not_ aligned to a cluster or sector
+///   boundary!  If cluster_bits is small enough that this field includes bits beyond 55, those
+///   upper bits must be set to 0.
+/// - Bit x - 61: Number of additional 512-byte sectors used for the compressed data, beyond the
+///   sector containing the offset in the previous field. Some of these sectors may reside in the
+///   next contiguous host cluster.  Note that the compressed data does not necessarily occupy all
+///   of the bytes in the final sector; rather, decompression stops when it has produced a cluster
+///   of data.  Another compressed cluster may map to the tail of the final sector used by this
+///   compressed cluster.
+#[derive(Copy, Clone, Default, Debug)]
+pub(super) struct L2Entry(u64);
+
+/// Internal actual type of L2 entries.
+///
+/// Using atomic allows flushing L2 tables from the cache while they are write-locked.
+#[derive(Default, Debug)]
+pub(super) struct AtomicL2Entry(AtomicU64);
+
+/// High-level representation of an L2 entry.
+#[derive(Debug, Clone)]
+pub(super) enum L2Mapping {
+    /// Data is in the data file.
+    DataFile {
+        /// Cluster in the data file.
+        host_cluster: HostCluster,
+
+        /// Whether the cluster has a refcount of exactly 1.
+        copied: bool,
+    },
+
+    /// Data is in the backing file.
+    Backing {
+        /// Guest cluster index.
+        backing_offset: u64,
+    },
+
+    /// Data is zero.
+    Zero {
+        /// Preallocated cluster in the data file, if any.
+        host_cluster: Option<HostCluster>,
+
+        /// Whether the preallocated cluster has a refcount of exactly 1.
+        copied: bool,
+    },
+
+    /// Data is compressed.
+    Compressed {
+        /// Offset in the data file.
+        host_offset: HostOffset,
+
+        /// Upper limit on the number of bytes that comprise the compressed data.
+        length: u64,
+    },
+}
+
+impl L2Entry {
+    /// Offset of the data cluster, if any.
+    ///
+    /// Assumes the L2 entry references a data cluster, not a compressed cluster.
+    ///
+    /// `external_data_file` must be true when using an external data file; in this case, offset 0
+    /// is a valid offset, and can only be distinguished from “unallocated” by whether the COPIED
+    /// flag is set or not (which it always is when using an external data file).
+    pub fn cluster_offset(&self, external_data_file: bool) -> Option<HostOffset> {
+        let ofs = self.0 & 0x00ff_ffff_ffff_fe00u64;
+        if ofs != 0 || (external_data_file && self.is_copied()) {
+            Some(HostOffset(ofs))
+        } else {
+            None
+        }
+    }
+
+    /// Whether the cluster is compressed.
+    pub fn is_compressed(&self) -> bool {
+        self.0 & (1u64 << 62) != 0
+    }
+
+    /// Whether the cluster is “copied”.
+    ///
+    /// `true` means is refcount is one, `false` means modifying it will require COW.
+    pub fn is_copied(&self) -> bool {
+        self.0 & (1u64 << 63) != 0
+    }
+
+    /// Clear “copied” flag.
+    #[must_use]
+    pub fn without_copied(self) -> Self {
+        L2Entry(self.0 & !(1u64 << 63))
+    }
+
+    /// Whether the cluster is a zero cluster.
+    ///
+    /// Assumes the L2 entry references a data cluster, not a compressed cluster.
+    pub fn is_zero(&self) -> bool {
+        self.0 & (1u64 << 0) != 0
+    }
+
+    /// Return all reserved bits.
+    pub fn reserved_bits(&self) -> u64 {
+        if self.is_compressed() {
+            self.0 & 0x8000_0000_0000_0000u64
+        } else {
+            self.0 & 0x3f00_0000_0000_01feu64
+        }
+    }
+
+    /// Return the full compressed cluster descriptor.
+    pub fn compressed_descriptor(&self) -> u64 {
+        self.0 & 0x3fff_ffff_ffff_ffffu64
+    }
+
+    /// If this entry is compressed, return the start host offset and upper limit on the compressed
+    /// number of bytes.
+    pub fn compressed_range(&self, cluster_bits: u32) -> Option<(HostOffset, u64)> {
+        if self.is_compressed() {
+            let desc = self.compressed_descriptor();
+            let compressed_offset_bits = 62 - (cluster_bits - 8);
+            let offset = desc & ((1 << compressed_offset_bits) - 1) & 0x00ff_ffff_ffff_ffffu64;
+            let sectors = desc >> compressed_offset_bits;
+            // The first sector is not considered in `sectors`, so we add it and subtract the
+            // number of bytes there that do not belong to this compressed cluster
+            let length = (sectors + 1) * 512 - (offset & 511);
+
+            Some((HostOffset(offset), length))
+        } else {
+            None
+        }
+    }
+
+    /// If this entry is allocated, return the first host cluster and the number of clusters it
+    /// references.
+    ///
+    /// `external_data_file` must be true when using an external data file.
+    fn allocation(
+        &self,
+        cluster_bits: u32,
+        external_data_file: bool,
+    ) -> Option<(HostCluster, ClusterCount)> {
+        if let Some((offset, length)) = self.compressed_range(cluster_bits) {
+            // Compressed clusters can cross host cluster boundaries, and thus occupy two clusters
+            let first_cluster = offset.cluster(cluster_bits);
+            let cluster_count = ClusterCount::from_byte_size(
+                offset + length - first_cluster.offset(cluster_bits),
+                cluster_bits,
+            );
+            Some((first_cluster, cluster_count))
+        } else {
+            self.cluster_offset(external_data_file)
+                .map(|ofs| (ofs.cluster(cluster_bits), ClusterCount(1)))
+        }
+    }
+
+    /// Return the high-level `L2Mapping` representation.
+    ///
+    /// `guest_cluster` is the guest cluster being accessed, `cluster_bits` is log2 of the cluster
+    /// size.  `external_data_file` must be true when using an external data file.
+    fn into_mapping(
+        self,
+        guest_cluster: GuestCluster,
+        cluster_bits: u32,
+        external_data_file: bool,
+    ) -> io::Result<L2Mapping> {
+        let mapping = if let Some((offset, length)) = self.compressed_range(cluster_bits) {
+            L2Mapping::Compressed {
+                host_offset: offset,
+                length,
+            }
+        } else if self.is_zero() {
+            let host_cluster = self
+                .cluster_offset(external_data_file)
+                .map(|ofs| {
+                    ofs.checked_cluster(cluster_bits).ok_or_else(|| {
+                        let offset = guest_cluster.offset(cluster_bits);
+                        io::Error::other(format!(
+                            "Unaligned pre-allocated zero cluster at {offset}; L2 entry: {self:?}"
+                        ))
+                    })
+                })
+                .transpose()?;
+
+            L2Mapping::Zero {
+                host_cluster,
+                copied: host_cluster.is_some() && self.is_copied(),
+            }
+        } else if let Some(host_offset) = self.cluster_offset(external_data_file) {
+            let host_cluster = host_offset.checked_cluster(cluster_bits).ok_or_else(|| {
+                let offset = guest_cluster.offset(cluster_bits);
+                io::Error::other(format!(
+                    "Unaligned data cluster at {offset}; L2 entry: {self:?}"
+                ))
+            })?;
+
+            L2Mapping::DataFile {
+                host_cluster,
+                copied: self.is_copied(),
+            }
+        } else {
+            L2Mapping::Backing {
+                backing_offset: guest_cluster.offset(cluster_bits).0,
+            }
+        };
+
+        Ok(mapping)
+    }
+
+    /// Create an L2 entry from its high-level `L2Mapping` representation.
+    fn from_mapping(value: L2Mapping, cluster_bits: u32) -> Self {
+        let num_val: u64 = match value {
+            L2Mapping::DataFile {
+                host_cluster,
+                copied,
+            } => {
+                debug_assert!(host_cluster.offset(cluster_bits) <= MAX_OFFSET);
+                if copied {
+                    (1 << 63) | host_cluster.offset(cluster_bits).0
+                } else {
+                    host_cluster.offset(cluster_bits).0
+                }
+            }
+
+            L2Mapping::Backing { backing_offset: _ } => 0,
+
+            L2Mapping::Zero {
+                host_cluster,
+                copied,
+            } => {
+                let host_offset = host_cluster.map(|hc| hc.offset(cluster_bits));
+                debug_assert!(host_offset.unwrap_or(HostOffset(0)) <= MAX_OFFSET);
+                if copied {
+                    (1 << 63) | host_offset.unwrap().0 | 0x1
+                } else {
+                    host_offset.unwrap_or(HostOffset(0)).0 | 0x1
+                }
+            }
+
+            L2Mapping::Compressed {
+                host_offset,
+                length,
+            } => {
+                let compressed_offset_bits = 62 - (cluster_bits - 8);
+                assert!(length < 1 << cluster_bits);
+                assert!(host_offset.0 < 1 << compressed_offset_bits);
+
+                // The first sector is not considered, so we subtract the number of bytes in it
+                // that belong to this compressed cluster from `length`:
+                // ceil((length - (512 - (host_offset & 511))) / 512)
+                // = (length + 511 - 512 + (host_offset & 511)) / 512
+                let sectors = (length - 1 + (host_offset.0 & 511)) / 512;
+
+                (1 << 62) | (sectors << compressed_offset_bits) | host_offset.0
+            }
+        };
+
+        let entry = L2Entry(num_val);
+        debug_assert!(entry.reserved_bits() == 0);
+        entry
+    }
+}
+
+impl AtomicL2Entry {
+    /// Get the contained value.
+    fn get(&self) -> L2Entry {
+        L2Entry(self.0.load(Ordering::Relaxed))
+    }
+
+    /// Exchange the contained value.
+    ///
+    /// # Safety
+    /// Caller must ensure that:
+    /// (1) No reader sees invalid intermediate states.
+    /// (2) Updates are done atomically (do not depend on prior state of the L2 table), or there is
+    ///     only one writer at a time.
+    unsafe fn swap(&self, l2e: L2Entry) -> L2Entry {
+        L2Entry(self.0.swap(l2e.0, Ordering::Relaxed))
+    }
+}
+
+impl TableEntry for AtomicL2Entry {
+    fn try_from_plain(value: u64, header: &Header) -> io::Result<Self> {
+        let entry = L2Entry(value);
+
+        if entry.reserved_bits() != 0 {
+            return Err(invalid_data(format!(
+                "Invalid L2 entry 0x{:x}, reserved bits set (0x{:x})",
+                value,
+                entry.reserved_bits(),
+            )));
+        }
+
+        if let Some(offset) = entry.cluster_offset(header.external_data_file()) {
+            if !entry.is_compressed() && offset.in_cluster_offset(header.cluster_bits()) != 0 {
+                return Err(invalid_data(format!(
+                    "Invalid L2 entry 0x{:x}, offset ({}) is not aligned to cluster size (0x{:x})",
+                    value,
+                    offset,
+                    header.cluster_size(),
+                )));
+            }
+        }
+
+        Ok(AtomicL2Entry(AtomicU64::new(entry.0)))
+    }
+
+    fn to_plain(&self) -> u64 {
+        self.get().0
+    }
+}
+
+impl L2Mapping {
+    /// Check whether two mappings are consecutive.
+    ///
+    /// Given the `preceding` mapping, check whether `self` is consecutive to it, i.e. is the same
+    /// kind of mapping, and the offsets are consecutive.
+    pub fn is_consecutive(&self, preceding: &L2Mapping, cluster_bits: u32) -> bool {
+        match preceding {
+            L2Mapping::DataFile {
+                host_cluster: prior_cluster,
+                copied,
+            } => {
+                if let L2Mapping::DataFile {
+                    host_cluster: next_cluster,
+                    copied: next_copied,
+                } = self
+                {
+                    *next_cluster == *prior_cluster + ClusterCount(1) && *next_copied == *copied
+                } else {
+                    false
+                }
+            }
+
+            L2Mapping::Backing {
+                backing_offset: prior_backing_offset,
+            } => {
+                let Some(expected_next) = prior_backing_offset.checked_add(1 << cluster_bits)
+                else {
+                    return false;
+                };
+
+                if let L2Mapping::Backing {
+                    backing_offset: next_offset,
+                } = self
+                {
+                    *next_offset == expected_next
+                } else {
+                    false
+                }
+            }
+
+            L2Mapping::Zero {
+                host_cluster: _,
+                copied: _,
+            } => {
+                // Cluster and copied do not matter; every read is continuous regardless (always
+                // zero), and every write is, too (always allocate)
+                matches!(
+                    self,
+                    L2Mapping::Zero {
+                        host_cluster: _,
+                        copied: _,
+                    }
+                )
+            }
+
+            L2Mapping::Compressed {
+                host_offset: _,
+                length: _,
+            } => {
+                // Not really true, but in practice it is.  Reads need to go through a special
+                // function anyway, and every write will need COW anyway.
+                matches!(
+                    self,
+                    L2Mapping::Compressed {
+                        host_offset: _,
+                        length: _,
+                    }
+                )
+            }
+        }
+    }
+}
+
+/// L2 table.
+#[derive(Debug)]
+pub(super) struct L2Table {
+    /// Cluster of the L2 table.
+    cluster: Option<HostCluster>,
+
+    /// Table data.
+    data: Box<[AtomicL2Entry]>,
+
+    /// log2 of the cluster size.
+    cluster_bits: u32,
+
+    /// Whether this image uses an external data file.
+    external_data_file: bool,
+
+    /// Whether this table has been modified since it was last written.
+    modified: AtomicBool,
+
+    /// Lock for creating `L2TableWriteGuard`.
+    writer_lock: Mutex<()>,
+}
+
+/// Write guard for an L2 table.
+#[derive(Debug)]
+pub(super) struct L2TableWriteGuard<'a> {
+    /// Referenced L2 table.
+    table: &'a L2Table,
+
+    /// Held guard mutex on that L2 table.
+    _lock: MutexGuard<'a, ()>,
+}
+
+impl L2Table {
+    /// Create a new zeroed L2 table.
+    pub fn new_cleared(header: &Header) -> Self {
+        let mut data = Vec::with_capacity(header.l2_entries());
+        data.resize_with(header.l2_entries(), Default::default);
+
+        L2Table {
+            cluster: None,
+            data: data.into_boxed_slice(),
+            cluster_bits: header.cluster_bits(),
+            external_data_file: header.external_data_file(),
+            modified: true.into(),
+            writer_lock: Default::default(),
+        }
+    }
+
+    /// Look up a cluster mapping.
+    pub fn get_mapping(&self, lookup_cluster: GuestCluster) -> io::Result<L2Mapping> {
+        self.get(lookup_cluster.l2_index(self.cluster_bits))
+            .into_mapping(lookup_cluster, self.cluster_bits, self.external_data_file)
+    }
+
+    /// Allow modifying this L2 table.
+    ///
+    /// Note that readers are allowed to exist while modifications are happening.
+    pub async fn lock_write(&self) -> L2TableWriteGuard<'_> {
+        L2TableWriteGuard {
+            table: self,
+            _lock: self.writer_lock.lock().await,
+        }
+    }
+}
+
+impl L2TableWriteGuard<'_> {
+    /// Look up a cluster mapping.
+    pub fn get_mapping(&self, lookup_cluster: GuestCluster) -> io::Result<L2Mapping> {
+        self.table.get_mapping(lookup_cluster)
+    }
+
+    /// Enter the given raw data cluster mapping into the L2 table.
+    ///
+    /// If the previous entry pointed to an allocated cluster, return the old allocation so its
+    /// refcount can be decreased (offset of the first cluster and number of clusters -- compressed
+    /// clusters can span across host cluster boundaries).
+    ///
+    /// If the allocation is reused, `None` is returned, so this function only returns `Some(_)` if
+    /// some cluster is indeed leaked.
+    #[must_use]
+    pub fn map_cluster(
+        &mut self,
+        index: usize,
+        host_cluster: HostCluster,
+    ) -> Option<(HostCluster, ClusterCount)> {
+        let new = L2Entry::from_mapping(
+            L2Mapping::DataFile {
+                host_cluster,
+                copied: true,
+            },
+            self.table.cluster_bits,
+        );
+        // Safe: We set a full valid mapping, and there is only one writer (thanks to
+        // `L2TableWriteGuard`).
+        let l2e = unsafe { self.table.data[index].swap(new) };
+        self.table.modified.store(true, Ordering::Relaxed);
+
+        let allocation = l2e.allocation(self.table.cluster_bits, self.table.external_data_file);
+        if let Some((a_cluster, a_count)) = allocation {
+            if a_cluster == host_cluster && a_count == ClusterCount(1) {
+                None
+            } else {
+                allocation
+            }
+        } else {
+            None
+        }
+    }
+}
+
+impl Table for L2Table {
+    type InternalEntry = AtomicL2Entry;
+    type Entry = L2Entry;
+    const NAME: &'static str = "L2 table";
+    const MAX_ENTRIES: usize = MAX_CLUSTER_SIZE / 8;
+
+    fn from_data(data: Box<[AtomicL2Entry]>, header: &Header) -> Self {
+        assert!(data.len() == header.l2_entries());
+
+        Self {
+            cluster: None,
+            data,
+            cluster_bits: header.cluster_bits(),
+            external_data_file: header.external_data_file(),
+            modified: true.into(),
+            writer_lock: Default::default(),
+        }
+    }
+
+    fn entries(&self) -> usize {
+        self.data.len()
+    }
+
+    fn get_ref(&self, index: usize) -> Option<&AtomicL2Entry> {
+        self.data.get(index)
+    }
+
+    fn get(&self, index: usize) -> L2Entry {
+        self.data
+            .get(index)
+            .map(|l2e| l2e.get())
+            .unwrap_or(L2Entry(0))
+    }
+
+    fn get_cluster(&self) -> Option<HostCluster> {
+        self.cluster
+    }
+
+    fn get_offset(&self) -> Option<HostOffset> {
+        self.cluster.map(|index| index.offset(self.cluster_bits))
+    }
+
+    fn set_cluster(&mut self, cluster: HostCluster) {
+        self.cluster = Some(cluster);
+        self.modified.store(true, Ordering::Relaxed);
+    }
+
+    fn unset_cluster(&mut self) {
+        self.cluster = None;
+    }
+
+    fn is_modified(&self) -> bool {
+        self.modified.load(Ordering::Relaxed)
+    }
+
+    fn clear_modified(&self) {
+        self.modified.store(false, Ordering::Relaxed);
+    }
+
+    fn set_modified(&self) {
+        self.modified.store(true, Ordering::Relaxed);
+    }
+
+    fn cluster_bits(&self) -> u32 {
+        self.cluster_bits
+    }
+}
+
+impl Clone for L2Table {
+    fn clone(&self) -> Self {
+        let mut data = Vec::with_capacity(self.data.len());
+        for entry in &self.data {
+            // None of these can be `copied`
+            let entry = entry.get().without_copied();
+            data.push(AtomicL2Entry(AtomicU64::new(entry.0)));
+        }
+
+        let modified = AtomicBool::new(self.is_modified());
+
+        L2Table {
+            cluster: None,
+            data: data.into_boxed_slice(),
+            cluster_bits: self.cluster_bits,
+            external_data_file: self.external_data_file,
+            modified,
+            writer_lock: Default::default(),
+        }
+    }
+}
+
+impl Drop for L2Table {
+    fn drop(&mut self) {
+        if self.is_modified() {
+            error!("L2 table dropped while modified; was the image closed before being flushed?");
+        }
+    }
+}
+
+/// Refcount table entry.
+#[derive(Copy, Clone, Default, Debug)]
+pub(super) struct RefTableEntry(u64);
+
+impl RefTableEntry {
+    /// Offset of the referenced refblock, if any.
+    pub fn refblock_offset(&self) -> Option<HostOffset> {
+        let ofs = self.0 & 0xffff_ffff_ffff_fe00u64;
+        if ofs == 0 {
+            None
+        } else {
+            Some(HostOffset(ofs))
+        }
+    }
+
+    /// Return all reserved bits.
+    pub fn reserved_bits(&self) -> u64 {
+        self.0 & 0x0000_0000_0000_01ffu64
+    }
+}
+
+impl TableEntry for RefTableEntry {
+    fn try_from_plain(value: u64, header: &Header) -> io::Result<Self> {
+        let entry = RefTableEntry(value);
+
+        if entry.reserved_bits() != 0 {
+            return Err(invalid_data(format!(
+                "Invalid reftable entry 0x{:x}, reserved bits set (0x{:x})",
+                value,
+                entry.reserved_bits(),
+            )));
+        }
+
+        if let Some(rb_ofs) = entry.refblock_offset() {
+            if rb_ofs.in_cluster_offset(header.cluster_bits()) != 0 {
+                return Err(invalid_data(
+                    format!(
+                        "Invalid reftable entry 0x{:x}, offset ({}) is not aligned to cluster size (0x{:x})",
+                        value,
+                        rb_ofs,
+                        header.cluster_size(),
+                    ),
+                ));
+            }
+        }
+
+        Ok(entry)
+    }
+
+    fn to_plain(&self) -> u64 {
+        self.0
+    }
+}
+
+/// Refcount table.
+#[derive(Debug)]
+pub(super) struct RefTable {
+    /// First cluster in the image file.
+    cluster: Option<HostCluster>,
+
+    /// Table data.
+    data: Box<[RefTableEntry]>,
+
+    /// log2 of the cluster size.
+    cluster_bits: u32,
+
+    /// Whether this table has been modified since it was last written.
+    modified: AtomicBool,
+}
+
+impl RefTable {
+    /// Create a clone that covers at least `at_least_index`.
+    ///
+    /// Also ensure that beyond `at_least_index`, there are enough entries to self-describe the new
+    /// refcount table (so that it can actually be allocated).
+    pub fn clone_and_grow(&self, header: &Header, at_least_index: usize) -> io::Result<Self> {
+        let cluster_size = header.cluster_size();
+        let rb_entries = header.rb_entries();
+
+        // There surely is an optimal O(1) solution, but probably would look less clear, and this
+        // is not a hot path.
+        let mut extra_rbs = 1;
+        let new_entry_count = loop {
+            let entry_count = cmp::max(at_least_index + 1 + extra_rbs, self.data.len());
+            let entry_count = entry_count.next_multiple_of(cluster_size / size_of::<u64>());
+            let size = entry_count * size_of::<u64>();
+            // Full number of clusters needed to both the new reftable *and* the `extra_rbs`
+            let refcount_clusters = size / cluster_size + extra_rbs;
+            let rbs_needed = refcount_clusters.div_ceil(rb_entries);
+            if extra_rbs == rbs_needed {
+                break entry_count;
+            }
+            extra_rbs = rbs_needed;
+        };
+
+        if new_entry_count > <Self as Table>::MAX_ENTRIES {
+            return Err(io::Error::other(
+                "Cannot grow the image to this size; refcount table would become too big",
+            ));
+        }
+
+        let mut new_data = vec![RefTableEntry::default(); new_entry_count];
+        new_data[..self.data.len()].copy_from_slice(&self.data);
+
+        Ok(Self {
+            cluster: None,
+            data: new_data.into_boxed_slice(),
+            cluster_bits: header.cluster_bits(),
+            modified: true.into(),
+        })
+    }
+
+    /// Check whether `index` is in bounds.
+    pub fn in_bounds(&self, index: usize) -> bool {
+        index < self.data.len()
+    }
+
+    /// Enter the given refcount block into this refcount table.
+    pub fn enter_refblock(&mut self, index: usize, rb: &RefBlock) -> io::Result<()> {
+        let rb_offset = rb.get_offset().ok_or_else(|| {
+            io::Error::new(
+                io::ErrorKind::InvalidInput,
+                "Refcount block as no assigned offset",
+            )
+        })?;
+
+        let rt_entry = RefTableEntry(rb_offset.0);
+        debug_assert!(rt_entry.reserved_bits() == 0);
+        self.data[index] = rt_entry;
+        self.modified.store(true, Ordering::Relaxed);
+
+        Ok(())
+    }
+}
+
+impl Table for RefTable {
+    type InternalEntry = RefTableEntry;
+    type Entry = RefTableEntry;
+    const NAME: &'static str = "Refcount table";
+
+    /// Maximum number of refcount table entries.
+    ///
+    /// Not in QEMU, but makes sense to limit to the same as the L1 table.  Note that refcount
+    /// blocks usually cover more clusters than an L2 table, so this generally allows larger image
+    /// files than would be necessary for the maximum guest disk size determined by the maximum
+    /// number of L1 entries.
+    const MAX_ENTRIES: usize = <L1Table as Table>::MAX_ENTRIES;
+
+    fn from_data(data: Box<[RefTableEntry]>, header: &Header) -> Self {
+        Self {
+            cluster: None,
+            data,
+            cluster_bits: header.cluster_bits(),
+            modified: true.into(),
+        }
+    }
+
+    fn entries(&self) -> usize {
+        self.data.len()
+    }
+
+    fn get_ref(&self, index: usize) -> Option<&RefTableEntry> {
+        self.data.get(index)
+    }
+
+    fn get(&self, index: usize) -> RefTableEntry {
+        self.data.get(index).copied().unwrap_or(RefTableEntry(0))
+    }
+
+    fn get_cluster(&self) -> Option<HostCluster> {
+        self.cluster
+    }
+
+    fn get_offset(&self) -> Option<HostOffset> {
+        self.cluster.map(|index| index.offset(self.cluster_bits))
+    }
+
+    fn set_cluster(&mut self, cluster: HostCluster) {
+        self.cluster = Some(cluster);
+        self.modified.store(true, Ordering::Relaxed);
+    }
+
+    fn unset_cluster(&mut self) {
+        self.cluster = None;
+    }
+
+    fn is_modified(&self) -> bool {
+        self.modified.load(Ordering::Relaxed)
+    }
+
+    fn clear_modified(&self) {
+        self.modified.store(false, Ordering::Relaxed);
+    }
+
+    fn set_modified(&self) {
+        self.modified.store(true, Ordering::Relaxed);
+    }
+
+    fn cluster_bits(&self) -> u32 {
+        self.cluster_bits
+    }
+}
+
+/// Refcount block.
+pub(super) struct RefBlock {
+    /// Cluster in the image file.
+    cluster: Option<HostCluster>,
+
+    /// Raw table data (big endian).
+    raw_data: IoBuffer,
+
+    /// log2 of the refcount bits.
+    refcount_order: u32,
+
+    /// log2 of the cluster size.
+    cluster_bits: u32,
+
+    /// Whether this block has been modified since it was last written.
+    modified: AtomicBool,
+
+    /// Lock for creating `RefBlockWriteGuard`.
+    writer_lock: Mutex<()>,
+}
+
+/// Write guard for a refblock.
+pub(super) struct RefBlockWriteGuard<'a> {
+    /// Referenced refblock.
+    rb: &'a RefBlock,
+
+    /// Held guard mutex on that refblock.
+    _lock: MutexGuard<'a, ()>,
+}
+
+impl RefBlock {
+    /// Create a new zeroed refcount block.
+    pub fn new_cleared<S: Storage>(for_image: &S, header: &Header) -> io::Result<Self> {
+        let mut raw_data = IoBuffer::new(header.cluster_size(), for_image.mem_align())?;
+        raw_data.as_mut().into_slice().fill(0);
+
+        Ok(RefBlock {
+            cluster: None,
+            raw_data,
+            refcount_order: header.refcount_order(),
+            cluster_bits: header.cluster_bits(),
+            modified: true.into(),
+            writer_lock: Default::default(),
+        })
+    }
+
+    /// Load a refcount block from disk.
+    pub async fn load<S: Storage>(
+        image: &S,
+        header: &Header,
+        cluster: HostCluster,
+    ) -> io::Result<Self> {
+        let cluster_bits = header.cluster_bits();
+        let cluster_size = 1 << cluster_bits;
+        let refcount_order = header.refcount_order();
+        let offset = cluster.offset(cluster_bits);
+
+        check_table(
+            "Refcount block",
+            offset.0,
+            cluster_size,
+            1,
+            MAX_CLUSTER_SIZE,
+            cluster_size,
+        )?;
+
+        let mut raw_data =
+            IoBuffer::new(cluster_size, cmp::max(image.mem_align(), size_of::<u64>()))?;
+        image.read(&mut raw_data, offset.0).await?;
+
+        Ok(RefBlock {
+            cluster: Some(cluster),
+            raw_data,
+            refcount_order,
+            cluster_bits,
+            modified: false.into(),
+            writer_lock: Default::default(),
+        })
+    }
+
+    /// Write a refcount block to disk.
+    pub async fn write<S: Storage>(&self, image: &S) -> io::Result<()> {
+        let offset = self
+            .get_offset()
+            .ok_or_else(|| io::Error::other("Cannot write qcow2 refcount block, no offset set"))?;
+
+        self.clear_modified();
+        if let Err(err) = image.write(self.raw_data.as_ref(), offset.0).await {
+            self.set_modified();
+            return Err(err);
+        }
+
+        Ok(())
+    }
+
+    /// Get the block’s cluster in the image file.
+    pub fn get_cluster(&self) -> Option<HostCluster> {
+        self.cluster
+    }
+
+    /// Get the block’s offset in the image file.
+    pub fn get_offset(&self) -> Option<HostOffset> {
+        self.cluster.map(|index| index.offset(self.cluster_bits))
+    }
+
+    /// Change the block’s cluster in the image file (for writing).
+    pub fn set_cluster(&mut self, cluster: HostCluster) {
+        self.cluster = Some(cluster);
+        self.set_modified();
+    }
+
+    /// Calculate sub-byte refcount access parameters.
+    ///
+    /// For a given refcount index, return its:
+    /// - byte index,
+    /// - access mask,
+    /// - in-byte shift.
+    fn sub_byte_refcount_access(&self, index: usize) -> (usize, u8, usize) {
+        let order = self.refcount_order;
+        debug_assert!(order < 3);
+
+        // Note that `order` is in bits, i.e. `1 << order` is the number of bits.  `index` is in
+        // units of refcounts, so `index << order` is the bit index, and `index << (order - 3)` is
+        // then the byte index, which is equal to `index >> (3 - order)`.
+        let byte_index = index >> (3 - order);
+        // `1 << order` is the bits per refcount (bprc), so `(1 << bprc) - 1` is the mask for one
+        // refcount (its maximum value).
+        let mask = (1 << (1 << order)) - 1;
+        // `index` is in units of refcounts, so `index << order` is the bit index.  `% 8`, we get
+        // the base index inside of a byte.
+        let shift = (index << order) % 8;
+
+        (byte_index, mask, shift)
+    }
+
+    /// Get the given cluster’s refcount.
+    pub fn get(&self, index: usize) -> u64 {
+        match self.refcount_order {
+            // refcount_bits == 1, 2, 4
+            0..=2 => {
+                let (index, mask, shift) = self.sub_byte_refcount_access(index);
+                let raw_data_slice = unsafe { self.raw_data.as_ref().into_typed_slice::<u8>() };
+                let atomic =
+                    unsafe { AtomicU8::from_ptr(&raw_data_slice[index] as *const u8 as *mut u8) };
+                ((atomic.load(Ordering::Relaxed) >> shift) & mask) as u64
+            }
+
+            // refcount_bits == 8
+            3 => {
+                let raw_data_slice = unsafe { self.raw_data.as_ref().into_typed_slice::<u8>() };
+                let atomic =
+                    unsafe { AtomicU8::from_ptr(&raw_data_slice[index] as *const u8 as *mut u8) };
+                atomic.load(Ordering::Relaxed) as u64
+            }
+
+            // refcount_bits == 16
+            4 => {
+                let raw_data_slice = unsafe { self.raw_data.as_ref().into_typed_slice::<u16>() };
+                let atomic = unsafe {
+                    AtomicU16::from_ptr(&raw_data_slice[index] as *const u16 as *mut u16)
+                };
+                u16::from_be(atomic.load(Ordering::Relaxed)) as u64
+            }
+
+            // refcount_bits == 32
+            5 => {
+                let raw_data_slice = unsafe { self.raw_data.as_ref().into_typed_slice::<u32>() };
+                let atomic = unsafe {
+                    AtomicU32::from_ptr(&raw_data_slice[index] as *const u32 as *mut u32)
+                };
+                u32::from_be(atomic.load(Ordering::Relaxed)) as u64
+            }
+
+            // refcount_bits == 64
+            6 => {
+                let raw_data_slice = unsafe { self.raw_data.as_ref().into_typed_slice::<u64>() };
+                let atomic = unsafe {
+                    AtomicU64::from_ptr(&raw_data_slice[index] as *const u64 as *mut u64)
+                };
+                u64::from_be(atomic.load(Ordering::Relaxed))
+            }
+
+            _ => unreachable!(),
+        }
+    }
+
+    /// Allow modifying this refcount block.
+    ///
+    /// Note that readers are allowed to exist while modifications are happening.
+    pub async fn lock_write(&self) -> RefBlockWriteGuard<'_> {
+        RefBlockWriteGuard {
+            rb: self,
+            _lock: self.writer_lock.lock().await,
+        }
+    }
+
+    /// Check whether this block has been modified since it was last written.
+    pub fn is_modified(&self) -> bool {
+        self.modified.load(Ordering::Relaxed)
+    }
+
+    /// Clear the modified flag.
+    pub fn clear_modified(&self) {
+        self.modified.store(false, Ordering::Relaxed);
+    }
+
+    /// Set the modified flag.
+    pub fn set_modified(&self) {
+        self.modified.store(true, Ordering::Relaxed);
+    }
+
+    /// Check whether the given cluster’s refcount is 0.
+    pub fn is_zero(&self, index: usize) -> bool {
+        self.get(index) == 0
+    }
+}
+
+impl RefBlockWriteGuard<'_> {
+    /// # Safety
+    /// Caller must ensure there are no concurrent writers.
+    unsafe fn fetch_update_bitset(
+        bitset: &AtomicU8,
+        change: i64,
+        base_mask: u8,
+        shift: usize,
+    ) -> io::Result<u64> {
+        let mask = base_mask << shift;
+
+        // load + store is OK without concurrent writers
+        let full = bitset.load(Ordering::Relaxed);
+        let old = (full & mask) >> shift;
+        let new = if change > 0 {
+            let change = change.try_into().map_err(|_| {
+                io::Error::new(
+                    io::ErrorKind::InvalidInput,
+                    format!("Requested refcount change of {change} is too big for the image’s refcount width"),
+                )
+            })?;
+            old.checked_add(change)
+        } else {
+            let change = (-change).try_into().map_err(|_| {
+                io::Error::new(
+                    io::ErrorKind::InvalidInput,
+                    format!("Requested refcount change of {change} is too big for the image’s refcount width"),
+                )
+            })?;
+            old.checked_sub(change)
+        };
+        let new = new.ok_or_else(|| {
+            invalid_data(format!(
+                "Changing refcount from {old} by {change} would overflow"
+            ))
+        })?;
+        if new > base_mask {
+            return Err(invalid_data(format!(
+                "Changing refcount from {old} to {new} (by {change}) would overflow"
+            )));
+        }
+
+        let full = (full & !mask) | (new << shift);
+        bitset.store(full, Ordering::Relaxed);
+        Ok(old as u64)
+    }
+
+    /// # Safety
+    /// Caller must ensure there are no concurrent writers.
+    unsafe fn fetch_update_full<
+        T,
+        L: FnOnce(&T) -> u64,
+        S: FnOnce(&T, u64) -> Result<(), TryFromIntError>,
+    >(
+        atomic: &T,
+        change: i64,
+        load: L,
+        store: S,
+    ) -> io::Result<u64> {
+        // load + store is OK without concurrent writers
+        let old = load(atomic);
+
+        let new = if change > 0 {
+            old.checked_add(change as u64)
+        } else {
+            old.checked_sub(-change as u64)
+        };
+        let new = new.ok_or_else(|| {
+            invalid_data(format!(
+                "Changing refcount from {old} by {change} would overflow"
+            ))
+        })?;
+
+        store(atomic, new).map_err(|_| {
+            invalid_data(format!(
+                "Changing refcount from {old} to {new} (by {change}) would overflow"
+            ))
+        })?;
+
+        Ok(old)
+    }
+
+    /// Modify the given cluster’s refcount.
+    fn modify(&mut self, index: usize, change: i64) -> io::Result<u64> {
+        let result = match self.rb.refcount_order {
+            // refcount_bits == 1, 2, 4
+            0..=2 => {
+                let (index, mask, shift) = self.rb.sub_byte_refcount_access(index);
+                let raw_data_slice = unsafe { self.rb.raw_data.as_ref().into_typed_slice::<u8>() };
+                let atomic =
+                    unsafe { AtomicU8::from_ptr(&raw_data_slice[index] as *const u8 as *mut u8) };
+                // Safe: `RefBlockWriteGuard` ensures there are no concurrent writers.
+                unsafe { Self::fetch_update_bitset(atomic, change, mask, shift) }
+            }
+
+            // refcount_bits == 8
+            3 => {
+                let raw_data_slice = unsafe { self.rb.raw_data.as_ref().into_typed_slice::<u8>() };
+                let atomic =
+                    unsafe { AtomicU8::from_ptr(&raw_data_slice[index] as *const u8 as *mut u8) };
+                // Safe: `RefBlockWriteGuard` ensures there are no concurrent writers.
+                unsafe {
+                    Self::fetch_update_full(
+                        atomic,
+                        change,
+                        |a| a.load(Ordering::Relaxed) as u64,
+                        |a, v| {
+                            a.store(v.try_into()?, Ordering::Relaxed);
+                            Ok(())
+                        },
+                    )
+                }
+            }
+
+            // refcount_bits == 16
+            4 => {
+                let raw_data_slice = unsafe { self.rb.raw_data.as_ref().into_typed_slice::<u16>() };
+                let atomic = unsafe {
+                    AtomicU16::from_ptr(&raw_data_slice[index] as *const u16 as *mut u16)
+                };
+                unsafe {
+                    Self::fetch_update_full(
+                        atomic,
+                        change,
+                        |a| u16::from_be(a.load(Ordering::Relaxed)) as u64,
+                        |a, v| {
+                            a.store(u16::try_from(v)?.to_be(), Ordering::Relaxed);
+                            Ok(())
+                        },
+                    )
+                }
+            }
+
+            // refcount_bits == 32
+            5 => {
+                let raw_data_slice = unsafe { self.rb.raw_data.as_ref().into_typed_slice::<u32>() };
+                let atomic = unsafe {
+                    AtomicU32::from_ptr(&raw_data_slice[index] as *const u32 as *mut u32)
+                };
+                unsafe {
+                    Self::fetch_update_full(
+                        atomic,
+                        change,
+                        |a| u32::from_be(a.load(Ordering::Relaxed)) as u64,
+                        |a, v| {
+                            a.store(u32::try_from(v)?.to_be(), Ordering::Relaxed);
+                            Ok(())
+                        },
+                    )
+                }
+            }
+
+            // refcount_bits == 64
+            6 => {
+                let raw_data_slice = unsafe { self.rb.raw_data.as_ref().into_typed_slice::<u64>() };
+                let atomic = unsafe {
+                    AtomicU64::from_ptr(&raw_data_slice[index] as *const u64 as *mut u64)
+                };
+                unsafe {
+                    Self::fetch_update_full(
+                        atomic,
+                        change,
+                        |a| u64::from_be(a.load(Ordering::Relaxed)),
+                        |a, v| {
+                            a.store(v.to_be(), Ordering::Relaxed);
+                            Ok(())
+                        },
+                    )
+                }
+            }
+
+            _ => unreachable!(),
+        };
+
+        let result = result?;
+        self.rb.modified.store(true, Ordering::Relaxed);
+        Ok(result)
+    }
+
+    /// Increment the given cluster’s refcount.
+    ///
+    /// Returns the old value.
+    pub fn increment(&mut self, index: usize) -> io::Result<u64> {
+        self.modify(index, 1)
+    }
+
+    /// Decrement the given cluster’s refcount.
+    ///
+    /// Returns the old value.
+    pub fn decrement(&mut self, index: usize) -> io::Result<u64> {
+        self.modify(index, -1)
+    }
+
+    /// Check whether the given cluster’s refcount is 0.
+    pub fn is_zero(&self, index: usize) -> bool {
+        self.rb.is_zero(index)
+    }
+}
+
+impl Drop for RefBlock {
+    fn drop(&mut self) {
+        if self.is_modified() {
+            error!(
+                "Refcount block dropped while modified; was the image closed before being flushed?"
+            );
+        }
+    }
+}
+
+/// Generic trait for qcow2 table entries (L1, L2, refcount table).
+pub trait TableEntry
+where
+    Self: Sized,
+{
+    /// Load the given raw value, checking it for validity.
+    fn try_from_plain(value: u64, header: &Header) -> io::Result<Self>;
+
+    /// Return the contained raw value.
+    fn to_plain(&self) -> u64;
+}
+
+/// Generic trait for qcow2 metadata tables (L1, L2, refcount table).
+pub trait Table: Sized {
+    /// Internal type for each table entry.
+    type InternalEntry: TableEntry;
+    /// Externally visible type for each table entry.
+    type Entry: Copy;
+    /// User-readable struct name.
+    const NAME: &'static str;
+    /// Maximum allowable number of entries.
+    const MAX_ENTRIES: usize;
+
+    /// Create a new table with the given contents
+    fn from_data(data: Box<[Self::InternalEntry]>, header: &Header) -> Self;
+
+    /// Number of entries.
+    fn entries(&self) -> usize;
+    /// Get the given entry (as reference).
+    fn get_ref(&self, index: usize) -> Option<&Self::InternalEntry>;
+    /// Get the given entry (copied).
+    fn get(&self, index: usize) -> Self::Entry;
+    /// Get this table’s (first) cluster in the image file.
+    fn get_cluster(&self) -> Option<HostCluster>;
+    /// Get this table’s offset in the image file.
+    fn get_offset(&self) -> Option<HostOffset>;
+    /// Set this table’s (first) cluster in the image file (for writing).
+    fn set_cluster(&mut self, cluster: HostCluster);
+    /// Remove the table’s association with any cluster in the image file.
+    fn unset_cluster(&mut self);
+
+    /// Return log2 of the cluster size.
+    ///
+    /// All tables store this anyway.
+    fn cluster_bits(&self) -> u32;
+
+    /// Check whether this table has been modified since it was last written.
+    fn is_modified(&self) -> bool;
+    /// Clear the modified flag.
+    fn clear_modified(&self);
+    /// Set the modified flag.
+    fn set_modified(&self);
+
+    /// Table size in bytes.
+    fn byte_size(&self) -> usize {
+        self.entries() * size_of::<u64>()
+    }
+
+    /// Number of clusters used by this table.
+    fn cluster_count(&self) -> ClusterCount {
+        ClusterCount::from_byte_size(self.byte_size() as u64, self.cluster_bits())
+    }
+
+    /// Load a table from the image file.
+    async fn load<S: Storage>(
+        image: &S,
+        header: &Header,
+        cluster: HostCluster,
+        entries: usize,
+    ) -> io::Result<Self> {
+        let offset = cluster.offset(header.cluster_bits());
+
+        check_table(
+            Self::NAME,
+            offset.0,
+            entries,
+            size_of::<u64>(),
+            Self::MAX_ENTRIES,
+            header.cluster_size(),
+        )?;
+
+        let byte_size = entries * size_of::<u64>();
+        let mut buffer = IoBuffer::new(byte_size, cmp::max(image.mem_align(), size_of::<u64>()))?;
+
+        image.read(&mut buffer, offset.0).await?;
+
+        // Safe because `u64` is a plain type, and the alignment fits
+        let raw_table = unsafe { buffer.as_ref().into_typed_slice::<u64>() };
+
+        let mut table = Vec::<Self::InternalEntry>::with_capacity(entries);
+        for be_value in raw_table {
+            table.push(Self::InternalEntry::try_from_plain(
+                u64::from_be(*be_value),
+                header,
+            )?)
+        }
+
+        let mut table = Self::from_data(table.into_boxed_slice(), header);
+        table.set_cluster(cluster);
+        table.clear_modified();
+        Ok(table)
+    }
+
+    /// Write a table to the image file.
+    ///
+    /// Callers must ensure the table is copied, i.e. its refcount is 1.
+    async fn write<S: Storage>(&self, image: &S) -> io::Result<()> {
+        let offset = self
+            .get_offset()
+            .ok_or_else(|| io::Error::other("Cannot write qcow2 metadata table, no offset set"))?;
+
+        check_table(
+            Self::NAME,
+            offset.0,
+            self.entries(),
+            size_of::<u64>(),
+            Self::MAX_ENTRIES,
+            1 << self.cluster_bits(),
+        )?;
+
+        let byte_size = self.byte_size();
+        let mut buffer = IoBuffer::new(byte_size, cmp::max(image.mem_align(), size_of::<u64>()))?;
+
+        self.clear_modified();
+
+        // Safe because we have just allocated this, and it fits the alignment
+        let raw_table = unsafe { buffer.as_mut().into_typed_slice::<u64>() };
+        for (i, be_value) in raw_table.iter_mut().enumerate() {
+            // 0 always works, that’s by design.
+            *be_value = self.get_ref(i).map(|e| e.to_plain()).unwrap_or(0).to_be();
+        }
+
+        if let Err(err) = image.write(&buffer, offset.0).await {
+            self.set_modified();
+            return Err(err);
+        }
+
+        Ok(())
+    }
+
+    /// Write at least the given single (modified) entry to the image file.
+    ///
+    /// Potentially writes more of the table, if alignment requirements ask for that.
+    async fn write_entry<S: Storage>(&self, image: &S, index: usize) -> io::Result<()> {
+        // This alignment calculation code implicitly assumes that the cluster size is aligned to
+        // the storage’s request/memory alignment, but that is often fair.  If that is not the
+        // case, there is not much we can do anyway.
+        let byte_size = self.byte_size();
+        let power_of_two_up_to_byte_size = if byte_size.is_power_of_two() {
+            byte_size
+        } else {
+            ((byte_size + 1) / 2).next_power_of_two()
+        };
+        let alignment = cmp::min(
+            power_of_two_up_to_byte_size,
+            cmp::max(
+                cmp::max(image.mem_align(), image.req_align()),
+                size_of::<u64>(),
+            ),
+        );
+        let alignment_in_entries = alignment / size_of::<u64>();
+
+        let offset = self
+            .get_offset()
+            .ok_or_else(|| io::Error::other("Cannot write qcow2 metadata table, no offset set"))?;
+
+        check_table(
+            Self::NAME,
+            offset.0,
+            self.entries(),
+            size_of::<u64>(),
+            Self::MAX_ENTRIES,
+            1 << self.cluster_bits(),
+        )?;
+
+        let mut buffer = IoBuffer::new(alignment, cmp::max(image.mem_align(), size_of::<u64>()))?;
+
+        // Safe because we have just allocated this, and it fits the alignment
+        let raw_entries = unsafe { buffer.as_mut().into_typed_slice::<u64>() };
+        let first_index = (index / alignment_in_entries) * alignment_in_entries;
+        #[allow(clippy::needless_range_loop)]
+        for i in 0..alignment_in_entries {
+            // 0 always works, that’s by design.
+            raw_entries[i] = self
+                .get_ref(first_index + i)
+                .map(|e| e.to_plain())
+                .unwrap_or(0)
+                .to_be();
+        }
+
+        image
+            .write(&buffer, offset.0 + (first_index * size_of::<u64>()) as u64)
+            .await
+    }
+}
+
+/// Check whether the given table offset/size is valid.
+///
+/// Also works for refcount blocks (with cheating, because their entry size can be less than a
+/// byte), which is why it is outside of [`Table`].
+fn check_table(
+    name: &str,
+    offset: u64,
+    entries: usize,
+    entry_size: usize,
+    max_entries: usize,
+    cluster_size: usize,
+) -> io::Result<()> {
+    if entries > max_entries {
+        return Err(invalid_data(format!(
+            "{name} too big: {entries} > {max_entries}",
+        )));
+    }
+
+    if offset % (cluster_size as u64) != 0 {
+        return Err(invalid_data(format!("{name}: Unaligned offset: {offset}")));
+    }
+
+    let byte_size = entries
+        .checked_mul(entry_size)
+        .ok_or_else(|| invalid_data(format!("{name} size overflow: {entries} * {entry_size}")))?;
+    let end_offset = offset
+        .checked_add(byte_size as u64)
+        .ok_or_else(|| invalid_data(format!("{name} offset overflow: {offset} + {byte_size}")))?;
+    if end_offset > MAX_FILE_LENGTH {
+        return Err(invalid_data(format!(
+            "{name}: Invalid end offset: {end_offset} > {MAX_FILE_LENGTH}"
+        )));
+    }
+
+    Ok(())
+}
diff --git a/src/imago/src/qcow2/mod.rs b/src/imago/src/qcow2/mod.rs
new file mode 100644
index 00000000..9922e4df
--- /dev/null
+++ b/src/imago/src/qcow2/mod.rs
@@ -0,0 +1,425 @@
+//! Qcow2 implementation.
+
+mod allocation;
+mod cache;
+mod compressed;
+mod cow;
+mod io_func;
+mod mappings;
+mod metadata;
+#[cfg(feature = "sync-wrappers")]
+mod sync_wrappers;
+mod types;
+
+use crate::async_lru_cache::AsyncLruCache;
+use crate::format::drivers::{FormatDriverInstance, Mapping};
+use crate::format::wrapped::WrappedFormat;
+use crate::io_buffers::IoVectorMut;
+use crate::misc_helpers::{invalid_data, ResultErrorContext};
+use crate::raw::Raw;
+use crate::{FormatAccess, Storage, StorageExt, StorageOpenOptions};
+use allocation::Allocator;
+use async_trait::async_trait;
+use cache::L2CacheBackend;
+use metadata::*;
+use std::fmt::{self, Debug, Display, Formatter};
+use std::ops::Range;
+use std::path::Path;
+use std::sync::Arc;
+use std::{cmp, io};
+use tokio::sync::{Mutex, RwLock};
+use types::*;
+
+/// Access qcow2 images.
+///
+/// Allows access to qcow2 images (v2 and v3), referencing the following objects:
+/// - Metadata storage object: The image file itself
+/// - Data file (storage object): May be the image file itself, or an external data file
+/// - Backing image `WrappedFormat<S>`: A backing disk image in any format
+#[must_use = "qcow2 images must be flushed before closing"]
+pub struct Qcow2<S: Storage + 'static, F: WrappedFormat<S> + 'static = FormatAccess<S>> {
+    /// Image file (which contains the qcow2 metadata).
+    metadata: Arc<S>,
+
+    /// Whether this image may be modified.
+    writable: bool,
+
+    /// Whether the user explicitly assigned a data file storage object (or `None`).
+    storage_set: bool,
+    /// Data file storage object; will use `metadata` if `None`.
+    storage: Option<S>,
+    /// Whether the user explicitly assigned a backing file (or `None`).
+    backing_set: bool,
+    /// Backing image.
+    backing: Option<F>,
+
+    /// Qcow2 header.
+    header: Arc<Header>,
+    /// L1 table.
+    l1_table: RwLock<L1Table>,
+
+    /// L2 table cache.
+    l2_cache: AsyncLruCache<HostCluster, L2Table, L2CacheBackend<S>>,
+
+    /// Allocates clusters.
+    ///
+    /// Is `None` for read-only images.
+    allocator: Option<Mutex<Allocator<S>>>,
+}
+
+impl<S: Storage + 'static, F: WrappedFormat<S> + 'static> Qcow2<S, F> {
+    /// Opens a qcow2 file.
+    ///
+    /// `metadata` is the file containing the qcow2 metadata.  If `writable` is not set, no
+    /// modifications are permitted.
+    ///
+    /// This will not open any other storage objects needed, i.e. no backing image, no external
+    /// data file.  If you want to handle those manually, check whether an external data file is
+    /// needed via [`Qcow2::requires_external_data_file()`], and, if necessary, assign one via
+    /// [`Qcow2::set_data_file()`]; and assign a backing image via [`Qcow2::set_backing()`].
+    ///
+    /// If you want to use the implicit references given in the image header, use
+    /// [`Qcow2::open_implicit_dependencies()`].
+    pub async fn open_image(metadata: S, writable: bool) -> io::Result<Self> {
+        let header = Arc::new(Header::load(&metadata, writable).await?);
+
+        let cb = header.cluster_bits();
+        let l1_offset = header.l1_table_offset();
+        let l1_cluster = l1_offset
+            .checked_cluster(cb)
+            .ok_or_else(|| invalid_data("Unaligned L1 table: {l1_offset}"))?;
+
+        let l1_table =
+            L1Table::load(&metadata, &header, l1_cluster, header.l1_table_entries()).await?;
+
+        let metadata = Arc::new(metadata);
+
+        let allocator = if writable {
+            let allocator = Allocator::new(Arc::clone(&metadata), Arc::clone(&header)).await?;
+            Some(Mutex::new(allocator))
+        } else {
+            None
+        };
+
+        let l2_cache_backend = L2CacheBackend::new(Arc::clone(&metadata), Arc::clone(&header));
+        let l2_cache = AsyncLruCache::new(l2_cache_backend, 128);
+
+        Ok(Qcow2 {
+            metadata,
+
+            writable,
+
+            storage_set: false,
+            storage: None,
+            backing_set: false,
+            backing: None,
+
+            header,
+            l1_table: RwLock::new(l1_table),
+
+            l2_cache,
+            allocator,
+        })
+    }
+
+    /// Open a qcow2 file at the given path.
+    ///
+    /// Open the file as a storage object via [`Storage::open()`], with write access if specified,
+    /// then pass that object to [`Qcow2::open_image()`].
+    ///
+    /// This will not open any other storage objects needed, i.e. no backing image, no external
+    /// data file.  If you want to handle those manually, check whether an external data file is
+    /// needed via [`Qcow2::requires_external_data_file()`], and, if necessary, assign one via
+    /// [`Qcow2::set_data_file()`]; and assign a backing image via [`Qcow2::set_backing()`].
+    ///
+    /// If you want to use the implicit references given in the image header, use
+    /// [`Qcow2::open_implicit_dependencies()`].
+    pub async fn open_path<P: AsRef<Path>>(path: P, writable: bool) -> io::Result<Self> {
+        let storage_opts = StorageOpenOptions::new().write(writable).filename(path);
+        let metadata = S::open(storage_opts).await?;
+        Self::open_image(metadata, writable).await
+    }
+
+    /// Check whether the given image file is a qcow2 file.
+    pub(crate) async fn probe(metadata: &S) -> io::Result<()> {
+        Header::load(metadata, true).await?;
+        Ok(())
+    }
+
+    /// Does this qcow2 image require an external data file?
+    ///
+    /// Conversely, if this is `false`, this image must not use an external data file.
+    pub fn requires_external_data_file(&self) -> bool {
+        self.header.external_data_file()
+    }
+
+    /// External data file filename given in the image header.
+    ///
+    /// Note that even if an image requires an external data file, the header may not contain its
+    /// filename.  In this case, an external data file must be set explicitly via
+    /// [`Qcow2::set_data_file()`].
+    pub fn implicit_external_data_file(&self) -> Option<&String> {
+        self.header.external_data_filename()
+    }
+
+    /// Backing image filename given in the image header.
+    pub fn implicit_backing_file(&self) -> Option<&String> {
+        self.header.backing_filename()
+    }
+
+    /// Backing image format given in the image header.
+    ///
+    /// If this is `None`, the backing image’s format should be probed.  Note that this may be
+    /// dangerous if guests have write access to the backing file: Given a raw image, a guest can
+    /// write a qcow2 header into it, resulting in the image being opened as qcow2 the next time,
+    /// allowing the guest to read arbitrary files (e.g. by setting them as backing files).
+    pub fn implicit_backing_format(&self) -> Option<&String> {
+        self.header.backing_format()
+    }
+
+    /// Assign the data file.
+    ///
+    /// `None` means using the same data storage for both metadata and data, which should be used
+    /// if [`Qcow2::requires_external_data_file()`] is `false`.
+    pub fn set_data_file(&mut self, file: Option<S>) {
+        self.storage = file;
+        self.storage_set = true;
+    }
+
+    /// Assign a backing image.
+    ///
+    /// `None` means no backing image, i.e. reading from unallocated areas will produce zeroes.
+    pub fn set_backing(&mut self, backing: Option<F>) {
+        self.backing = backing;
+        self.backing_set = true;
+    }
+
+    /// Get the data storage object.
+    ///
+    /// If we have an external data file, return that.  Otherwise, return the image (metadata)
+    /// file.
+    fn storage(&self) -> &S {
+        self.storage.as_ref().unwrap_or(&self.metadata)
+    }
+
+    /// Return the image’s implicit data file (as given in the image header).
+    async fn open_implicit_data_file(&self) -> io::Result<Option<S>> {
+        if !self.header.external_data_file() {
+            return Ok(None);
+        }
+
+        let Some(filename) = self.header.external_data_filename() else {
+            return Err(io::Error::other(
+                "Image requires external data file, but no filename given",
+            ));
+        };
+
+        let absolute = self
+            .metadata
+            .resolve_relative_path(filename)
+            .err_context(|| format!("Cannot resolve external data file name {filename}"))?;
+
+        let opts = StorageOpenOptions::new()
+            .write(true)
+            .filename(absolute.clone());
+
+        Ok(Some(S::open(opts).await.err_context(|| {
+            format!("External data file {absolute:?}")
+        })?))
+    }
+
+    /// Wrap `file` in the `Raw` format.  Helper for [`Qcow2::implicit_backing_file()`].
+    async fn open_raw_backing_file(&self, file: S) -> io::Result<F> {
+        let raw = Raw::open_image(file, false).await?;
+        Ok(F::wrap(FormatAccess::new(raw)))
+    }
+
+    /// Wrap `file` in the `Qcow2` format.  Helper for [`Qcow2::implicit_backing_file()`].
+    async fn open_qcow2_backing_file(&self, file: S) -> io::Result<F> {
+        let mut qcow2 = Self::open_image(file, false).await?;
+        // Recursive, so needs to be boxed
+        Box::pin(qcow2.open_implicit_dependencies()).await?;
+        Ok(F::wrap(FormatAccess::new(qcow2)))
+    }
+
+    /// Return the image’s implicit backing image (as given in the image header).
+    async fn open_implicit_backing_file(&self) -> io::Result<Option<F>> {
+        let Some(filename) = self.header.backing_filename() else {
+            return Ok(None);
+        };
+
+        let absolute = self
+            .metadata
+            .resolve_relative_path(filename)
+            .err_context(|| format!("Cannot resolve backing file name {filename}"))?;
+
+        let opts = StorageOpenOptions::new().filename(absolute.clone());
+        let file = S::open(opts)
+            .await
+            .err_context(|| format!("Backing file {absolute:?}"))?;
+
+        let result = match self.header.backing_format().map(|f| f.as_str()) {
+            Some("qcow2") => self.open_qcow2_backing_file(file).await.map(Some),
+            Some("raw") | Some("file") => self.open_raw_backing_file(file).await.map(Some),
+
+            Some(fmt) => Err(io::Error::other(format!("Unknown backing format {fmt}"))),
+
+            None => {
+                if Self::probe(&file).await.is_ok() {
+                    self.open_qcow2_backing_file(file).await.map(Some)
+                } else {
+                    self.open_raw_backing_file(file).await.map(Some)
+                }
+            }
+        };
+
+        result.err_context(|| format!("Backing file {absolute:?}"))
+    }
+
+    /// Open all implicit dependencies.
+    ///
+    /// Qcow2 images have dependencies:
+    /// - The metadata file, which is the image file itself.
+    /// - The data file, which may be the same as the metadata file, or may be an external data
+    ///   file.
+    /// - A backing disk image in any format.
+    ///
+    /// All of this can be set explicitly:
+    /// - The metadata file is always given explicitly to [`Qcow2::open_image()`].
+    /// - The data file can be set via [`Qcow2::set_data_file()`].
+    /// - The backing image can be set via [`Qcow2::set_backing()`].
+    ///
+    /// But the image header can also provide “default” references to the data file and a backing
+    /// image, which we call *implicit* dependencies.  This function opens all such implicit
+    /// dependencies if they have not been overridden with prior calls to
+    /// [`Qcow2::set_data_file()`] or [`Qcow2::set_backing()`], respectively.
+    pub async fn open_implicit_dependencies(&mut self) -> io::Result<()> {
+        if !self.storage_set {
+            self.storage = self.open_implicit_data_file().await?;
+            self.storage_set = true;
+        }
+
+        if !self.backing_set {
+            self.backing = self.open_implicit_backing_file().await?;
+            self.backing_set = true;
+        }
+
+        Ok(())
+    }
+
+    /// Require write access, i.e. return an error for read-only images.
+    fn need_writable(&self) -> io::Result<()> {
+        self.writable
+            .then_some(())
+            .ok_or_else(|| io::Error::other("Image is read-only"))
+    }
+}
+
+#[async_trait(?Send)]
+impl<S: Storage, F: WrappedFormat<S>> FormatDriverInstance for Qcow2<S, F> {
+    type Storage = S;
+
+    fn size(&self) -> u64 {
+        self.header.size()
+    }
+
+    fn collect_storage_dependencies(&self) -> Vec<&S> {
+        let mut v = self
+            .backing
+            .as_ref()
+            .map(|b| b.unwrap().collect_storage_dependencies())
+            .unwrap_or_default();
+
+        v.push(&self.metadata);
+        if let Some(storage) = self.storage.as_ref() {
+            v.push(storage);
+        }
+
+        v
+    }
+
+    fn writable(&self) -> bool {
+        self.writable
+    }
+
+    async fn get_mapping<'a>(
+        &'a self,
+        offset: u64,
+        max_length: u64,
+    ) -> io::Result<(Mapping<'a, S>, u64)> {
+        let length_until_eof = match self.header.size().checked_sub(offset) {
+            None | Some(0) => return Ok((Mapping::Eof, 0)),
+            Some(length) => length,
+        };
+
+        let max_length = cmp::min(max_length, length_until_eof);
+        let offset = GuestOffset(offset);
+        self.do_get_mapping(offset, max_length).await
+    }
+
+    async fn ensure_data_mapping<'a>(
+        &'a self,
+        offset: u64,
+        length: u64,
+        overwrite: bool,
+    ) -> io::Result<(&'a S, u64, u64)> {
+        let length_until_eof = self.header.size().saturating_sub(offset);
+        if length_until_eof < length {
+            return Err(io::Error::other("Cannot allocate beyond the disk size"));
+        }
+
+        if length == 0 {
+            return Ok((self.storage(), 0, 0));
+        }
+
+        self.need_writable()?;
+        let offset = GuestOffset(offset);
+        self.do_ensure_data_mapping(offset, length, overwrite).await
+    }
+
+    async fn readv_special(&self, bufv: IoVectorMut<'_>, offset: u64) -> io::Result<()> {
+        let offset = GuestOffset(offset);
+        self.do_readv_special(bufv, offset).await
+    }
+
+    async fn flush(&self) -> io::Result<()> {
+        self.l2_cache.flush().await?;
+        if let Some(allocator) = self.allocator.as_ref() {
+            allocator.lock().await.flush_rb_cache().await?;
+        }
+
+        self.metadata.flush().await?;
+        if let Some(storage) = self.storage.as_ref() {
+            storage.flush().await?;
+        }
+        // Backing file is read-only, so need not be flushed from us.
+        Ok(())
+    }
+
+    async fn sync(&self) -> io::Result<()> {
+        self.metadata.sync().await?;
+        if let Some(storage) = self.storage.as_ref() {
+            storage.sync().await?;
+        }
+        // Backing file is read-only, so need not be synced from us.
+        Ok(())
+    }
+}
+
+impl<S: Storage + 'static, F: WrappedFormat<S>> Debug for Qcow2<S, F> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Qcow2")
+            .field("metadata", &self.metadata)
+            .field("storage_set", &self.storage_set)
+            .field("storage", &self.storage)
+            .field("backing_set", &self.backing_set)
+            .field("backing", &self.backing)
+            .finish()
+    }
+}
+
+impl<S: Storage + 'static, F: WrappedFormat<S>> Display for Qcow2<S, F> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        write!(f, "qcow2[{}]", self.metadata)
+    }
+}
diff --git a/src/imago/src/qcow2/sync_wrappers.rs b/src/imago/src/qcow2/sync_wrappers.rs
new file mode 100644
index 00000000..f78d41bf
--- /dev/null
+++ b/src/imago/src/qcow2/sync_wrappers.rs
@@ -0,0 +1,32 @@
+//! Synchronous wrapper around qcow2 functions.
+
+use super::*;
+
+impl<S: Storage + 'static, F: WrappedFormat<S> + 'static> Qcow2<S, F> {
+    /// Synchronous wrapper around [`Qcow2::open_image()`].
+    ///
+    /// Runs the async function in an ephemeral tokio runtime.
+    pub fn open_image_sync(metadata: S, writable: bool) -> io::Result<Self> {
+        tokio::runtime::Builder::new_current_thread()
+            .build()?
+            .block_on(Self::open_image(metadata, writable))
+    }
+
+    /// Synchronous wrapper around [`Qcow2::open_path()`].
+    ///
+    /// Runs the async function in an ephemeral tokio runtime.
+    pub fn open_path_sync<P: AsRef<Path>>(path: P, writable: bool) -> io::Result<Self> {
+        tokio::runtime::Builder::new_current_thread()
+            .build()?
+            .block_on(Self::open_path(path, writable))
+    }
+
+    /// Synchronous wrapper around [`Qcow2::open_implicit_dependencies()`].
+    ///
+    /// Runs the async function in an ephemeral tokio runtime.
+    pub fn open_implicit_dependencies_sync(&mut self) -> io::Result<()> {
+        tokio::runtime::Builder::new_current_thread()
+            .build()?
+            .block_on(self.open_implicit_dependencies())
+    }
+}
diff --git a/src/imago/src/qcow2/types.rs b/src/imago/src/qcow2/types.rs
new file mode 100644
index 00000000..a80f6ea7
--- /dev/null
+++ b/src/imago/src/qcow2/types.rs
@@ -0,0 +1,294 @@
+//! Helper types.
+//!
+//! Contains types like `GuestOffset` or `HostCluster`.  This strong typing ensures there is no
+//! confusion between what is what.
+
+use super::*;
+use std::fmt::{self, Display, Formatter};
+use std::ops::{Add, AddAssign, Sub, SubAssign};
+
+/// Guest offset.
+#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub(super) struct GuestOffset(pub u64);
+
+/// Guest cluster index.
+#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub(super) struct GuestCluster(pub u64);
+
+/// Host cluster offset.
+#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub(super) struct HostOffset(pub u64);
+
+/// Host cluster index.
+#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub(super) struct HostCluster(pub u64);
+
+/// Cluster count.
+#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
+pub(super) struct ClusterCount(pub u64);
+
+impl GuestOffset {
+    /// Return the offset from the start of the containing guest clusters.
+    pub fn in_cluster_offset(self, cluster_bits: u32) -> usize {
+        (self.0 % (1 << cluster_bits)) as usize
+    }
+
+    /// Return the containing cluster’s index in its L2 table.
+    pub fn l2_index(self, cluster_bits: u32) -> usize {
+        self.cluster(cluster_bits).l2_index(cluster_bits)
+    }
+
+    /// Return the containing cluster’s L2 table’s index in the L1 table.
+    pub fn l1_index(self, cluster_bits: u32) -> usize {
+        self.cluster(cluster_bits).l1_index(cluster_bits)
+    }
+
+    /// Return the containing cluster’s index.
+    pub fn cluster(self, cluster_bits: u32) -> GuestCluster {
+        GuestCluster(self.0 >> cluster_bits)
+    }
+
+    /// How many bytes remain in this cluster after this offset.
+    pub fn remaining_in_cluster(self, cluster_bits: u32) -> u64 {
+        ((1 << cluster_bits) - self.in_cluster_offset(cluster_bits)) as u64
+    }
+
+    /// How many bytes remain in this L2 table after this offset.
+    pub fn remaining_in_l2_table(self, cluster_bits: u32) -> u64 {
+        // See `Header::l2_entries()`
+        let l2_entries = 1 << (cluster_bits - 3);
+        let after_this = ((l2_entries - (self.l2_index(cluster_bits) + 1)) as u64) << cluster_bits;
+        self.remaining_in_cluster(cluster_bits) + after_this
+    }
+}
+
+impl GuestCluster {
+    /// Return this cluster’s offset.
+    pub fn offset(self, cluster_bits: u32) -> GuestOffset {
+        GuestOffset(self.0 << cluster_bits)
+    }
+
+    /// Return this cluster’s index in its L2 table.
+    pub fn l2_index(self, cluster_bits: u32) -> usize {
+        // See `Header::l2_entries()`
+        let l2_entries = 1 << (cluster_bits - 3);
+        (self.0 % l2_entries) as usize
+    }
+
+    /// Return this cluster’s L2 table’s index in the L1 table.
+    pub fn l1_index(self, cluster_bits: u32) -> usize {
+        let l2_entries_shift = cluster_bits - 3;
+        (self.0 >> l2_entries_shift) as usize
+    }
+
+    /// Return the cluster at the given L1 and L2 table indices.
+    pub fn from_l1_l2_indices(l1_index: usize, l2_index: usize, cluster_bits: u32) -> Self {
+        let l2_entries_shift = cluster_bits - 3;
+        GuestCluster(((l1_index as u64) << l2_entries_shift) + l2_index as u64)
+    }
+
+    /// Return the next cluster in this L2 table, if any.
+    ///
+    /// Return `None` if this is the last cluster in this L2 table.
+    pub fn next_in_l2(self, cluster_bits: u32) -> Option<GuestCluster> {
+        // See `Header::l2_entries()`
+        let l2_entries = 1 << (cluster_bits - 3);
+        let l1_index = self.l1_index(cluster_bits);
+        let l2_index = self.l2_index(cluster_bits);
+        let l2_index = l2_index.checked_add(1)?;
+        if l2_index >= l2_entries {
+            None
+        } else {
+            Some(GuestCluster::from_l1_l2_indices(
+                l1_index,
+                l2_index,
+                cluster_bits,
+            ))
+        }
+    }
+
+    /// Return the first cluster in the next L2 table.
+    pub fn first_in_next_l2(self, cluster_bits: u32) -> GuestCluster {
+        let l2_entries = 1 << (cluster_bits - 3);
+        GuestCluster((self.0 + 1).next_multiple_of(l2_entries))
+    }
+}
+
+impl HostOffset {
+    /// Return the offset from the start of the containing host cluster.
+    pub fn in_cluster_offset(self, cluster_bits: u32) -> usize {
+        (self.0 % (1 << cluster_bits)) as usize
+    }
+
+    /// Return the containing cluster’s index.
+    pub fn cluster(self, cluster_bits: u32) -> HostCluster {
+        HostCluster(self.0 >> cluster_bits)
+    }
+
+    /// If this offset points to the start of a cluster, get its index.
+    ///
+    /// If this offset points inside of a cluster, return `None`.  As oposed to just `cluster()`,
+    /// this will not discard information: `self.checked_cluster(cb).unwrap().offset() == self`,
+    /// because there is no in-cluster offset that could be lost.
+    pub fn checked_cluster(self, cluster_bits: u32) -> Option<HostCluster> {
+        (self.in_cluster_offset(cluster_bits) == 0).then_some(self.cluster(cluster_bits))
+    }
+}
+
+impl HostCluster {
+    /// Return this cluster’s offset.
+    pub fn offset(self, cluster_bits: u32) -> HostOffset {
+        HostOffset(self.0 << cluster_bits)
+    }
+
+    /// Get this cluster’s index in its refcount block.
+    pub fn rb_index(self, rb_bits: u32) -> usize {
+        let rb_entries = 1 << rb_bits;
+        (self.0 % rb_entries) as usize
+    }
+
+    /// Get this cluster’s refcount block’s index in the refcount table.
+    pub fn rt_index(self, rb_bits: u32) -> usize {
+        (self.0 >> rb_bits) as usize
+    }
+
+    /// Get both the reftable and refblock indices for this cluster.
+    pub fn rt_rb_indices(self, rb_bits: u32) -> (usize, usize) {
+        (self.rt_index(rb_bits), self.rb_index(rb_bits))
+    }
+
+    /// Construct a cluster index from its reftable and refblock indices.
+    pub fn from_ref_indices(rt_index: usize, rb_index: usize, rb_bits: u32) -> Self {
+        HostCluster(((rt_index as u64) << rb_bits) + rb_index as u64)
+    }
+
+    /// Returns the host offset corresponding to `guest_offset`.
+    ///
+    /// Assuming `guest_offset.cluster()` is mapped to `self`, return the exact host offset
+    /// matching `guest_offset`.
+    ///
+    /// Same as `self.offset(cb) + guest_offset.in_cluster_offset`.
+    pub fn relative_offset(self, guest_offset: GuestOffset, cluster_bits: u32) -> HostOffset {
+        self.offset(cluster_bits) + guest_offset.in_cluster_offset(cluster_bits) as u64
+    }
+}
+
+impl ClusterCount {
+    /// Get how many clusters are required to cover `byte_size`.
+    ///
+    /// This rounds up.
+    pub fn from_byte_size(byte_size: u64, cluster_bits: u32) -> Self {
+        ClusterCount(byte_size.div_ceil(1 << cluster_bits))
+    }
+
+    /// Return the full byte size of this many clusters.
+    pub fn byte_size(self, cluster_bits: u32) -> u64 {
+        self.0 << cluster_bits
+    }
+}
+
+impl Add<ClusterCount> for HostCluster {
+    type Output = Self;
+
+    fn add(self, rhs: ClusterCount) -> Self {
+        HostCluster(self.0 + rhs.0)
+    }
+}
+
+impl AddAssign<ClusterCount> for HostCluster {
+    fn add_assign(&mut self, rhs: ClusterCount) {
+        self.0 += rhs.0;
+    }
+}
+
+impl Sub<ClusterCount> for HostCluster {
+    type Output = Self;
+
+    fn sub(self, rhs: ClusterCount) -> Self {
+        HostCluster(self.0 - rhs.0)
+    }
+}
+
+impl SubAssign<ClusterCount> for HostCluster {
+    fn sub_assign(&mut self, rhs: ClusterCount) {
+        self.0 -= rhs.0;
+    }
+}
+
+impl Sub<HostCluster> for HostCluster {
+    type Output = ClusterCount;
+
+    fn sub(self, rhs: Self) -> ClusterCount {
+        ClusterCount(self.0 - rhs.0)
+    }
+}
+
+impl Add<ClusterCount> for ClusterCount {
+    type Output = Self;
+
+    fn add(self, rhs: ClusterCount) -> Self {
+        ClusterCount(self.0 + rhs.0)
+    }
+}
+
+impl AddAssign<ClusterCount> for ClusterCount {
+    fn add_assign(&mut self, rhs: ClusterCount) {
+        self.0 += rhs.0;
+    }
+}
+
+impl Sub<ClusterCount> for ClusterCount {
+    type Output = Self;
+
+    fn sub(self, rhs: ClusterCount) -> Self {
+        ClusterCount(self.0 - rhs.0)
+    }
+}
+
+impl SubAssign<ClusterCount> for ClusterCount {
+    fn sub_assign(&mut self, rhs: ClusterCount) {
+        self.0 -= rhs.0;
+    }
+}
+
+impl Add<u64> for HostOffset {
+    type Output = Self;
+
+    fn add(self, rhs: u64) -> Self {
+        HostOffset(self.0 + rhs)
+    }
+}
+
+impl Sub<u64> for HostOffset {
+    type Output = Self;
+
+    fn sub(self, rhs: u64) -> Self {
+        HostOffset(self.0 - rhs)
+    }
+}
+
+impl Sub<HostOffset> for HostOffset {
+    type Output = u64;
+
+    fn sub(self, rhs: Self) -> u64 {
+        self.0 - rhs.0
+    }
+}
+
+impl Display for GuestOffset {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        write!(f, "0x{:x}", self.0)
+    }
+}
+
+impl Display for HostOffset {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        write!(f, "0x{:x}", self.0)
+    }
+}
+
+impl Display for ClusterCount {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
diff --git a/src/imago/src/raw.rs b/src/imago/src/raw.rs
new file mode 100644
index 00000000..ed1a2531
--- /dev/null
+++ b/src/imago/src/raw.rs
@@ -0,0 +1,128 @@
+//! Access generic files as images.
+//!
+//! Allows accessing generic storage objects (`Storage`) as images (i.e. `FormatAccess`).
+
+use crate::format::drivers::{FormatDriverInstance, Mapping};
+use crate::{Storage, StorageOpenOptions};
+use async_trait::async_trait;
+use std::fmt::{self, Display, Formatter};
+use std::io;
+use std::path::Path;
+
+/// Wraps a storage object without any translation.
+#[derive(Debug)]
+pub struct Raw<S: Storage> {
+    /// Wrapped storage object.
+    inner: S,
+
+    /// Whether this image may be modified.
+    writable: bool,
+
+    /// Disk size, which is the file size when this object was created.
+    size: u64,
+}
+
+impl<S: Storage> Raw<S> {
+    /// Wrap `inner`, allowing it to be used as a disk image in raw format.
+    pub async fn open_image(inner: S, writable: bool) -> io::Result<Self> {
+        let size = inner.size()?;
+        Ok(Raw {
+            inner,
+            writable,
+            size,
+        })
+    }
+
+    /// Open the given path as a storage object, and wrap it in `Raw`.
+    pub async fn open_path<P: AsRef<Path>>(path: P, writable: bool) -> io::Result<Self> {
+        let storage_opts = StorageOpenOptions::new().write(writable).filename(path);
+        let inner = S::open(storage_opts).await?;
+        Self::open_image(inner, writable).await
+    }
+
+    /// Wrap `inner`, allowing it to be used as a disk image in raw format.
+    #[cfg(feature = "sync-wrappers")]
+    pub fn open_image_sync(inner: S, writable: bool) -> io::Result<Self> {
+        let size = inner.size()?;
+        Ok(Raw {
+            inner,
+            writable,
+            size,
+        })
+    }
+
+    /// Synchronous wrapper around [`Raw::open_path()`].
+    pub fn open_path_sync<P: AsRef<Path>>(path: P, writable: bool) -> io::Result<Self> {
+        tokio::runtime::Builder::new_current_thread()
+            .build()?
+            .block_on(Self::open_path(path, writable))
+    }
+}
+
+#[async_trait(?Send)]
+impl<S: Storage> FormatDriverInstance for Raw<S> {
+    type Storage = S;
+
+    fn size(&self) -> u64 {
+        self.size
+    }
+
+    fn collect_storage_dependencies(&self) -> Vec<&S> {
+        vec![&self.inner]
+    }
+
+    fn writable(&self) -> bool {
+        self.writable
+    }
+
+    async fn get_mapping<'a>(
+        &'a self,
+        offset: u64,
+        max_length: u64,
+    ) -> io::Result<(Mapping<'a, S>, u64)> {
+        let remaining = match self.size.checked_sub(offset) {
+            None | Some(0) => return Ok((Mapping::Eof, 0)),
+            Some(remaining) => remaining,
+        };
+
+        Ok((
+            Mapping::Raw {
+                storage: &self.inner,
+                offset,
+                writable: true,
+            },
+            std::cmp::min(max_length, remaining),
+        ))
+    }
+
+    async fn ensure_data_mapping<'a>(
+        &'a self,
+        offset: u64,
+        length: u64,
+        _overwrite: bool,
+    ) -> io::Result<(&'a S, u64, u64)> {
+        let Some(remaining) = self.size.checked_sub(offset) else {
+            return Err(io::Error::other("Cannot allocate past the end of file"));
+        };
+        if length > remaining {
+            return Err(io::Error::other("Cannot allocate past the end of file"));
+        }
+
+        Ok((&self.inner, offset, length))
+    }
+
+    async fn flush(&self) -> io::Result<()> {
+        // No internal buffers to flush
+        self.inner.flush().await
+    }
+
+    async fn sync(&self) -> io::Result<()> {
+        self.inner.sync().await
+    }
+}
+
+impl<S: Storage> Display for Raw<S> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        write!(f, "raw[{}]", self.inner)
+    }
+}
diff --git a/src/imago/src/storage/drivers.rs b/src/imago/src/storage/drivers.rs
new file mode 100644
index 00000000..1edd5377
--- /dev/null
+++ b/src/imago/src/storage/drivers.rs
@@ -0,0 +1,184 @@
+//! Internal functionality for storage drivers.
+
+use crate::misc_helpers::Overlaps;
+use crate::vector_select::FutureVector;
+use std::ops::Range;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::Arc;
+use tokio::sync::oneshot;
+
+/// Helper object for the [`StorageExt`](crate::StorageExt) implementation.
+///
+/// State such as write blockers needs to be kept somewhere, and instead of introducing a wrapper
+/// (that might be bypassed), we store it directly in the [`Storage`](crate::Storage) objects so it
+/// cannot be bypassed (at least when using the [`StorageExt`](crate::StorageExt) methods).
+#[derive(Debug, Default)]
+pub struct CommonStorageHelper {
+    /// Current in-flight write that allow concurrent writes to the same region.
+    ///
+    /// Normal non-async RwLock, so do not await while locked!
+    weak_write_blockers: std::sync::RwLock<RangeBlockedList>,
+
+    /// Current in-flight write that do not allow concurrent writes to the same region.
+    strong_write_blockers: std::sync::RwLock<RangeBlockedList>,
+}
+
+/// A list of ranges blocked for some kind of concurrent access.
+///
+/// Depending on the use, some will block all concurrent access (i.e. serializing writes will block
+/// both serializing and non-serializing writes (strong blockers)), while others will only block a
+/// subset (non-serializing writes will only block serializing writes (weak blockers)).
+#[derive(Debug, Default)]
+struct RangeBlockedList {
+    /// The list of ranges.
+    ///
+    /// Serializing writes (strong write blockers) are supposed to be rare, so it is important that
+    /// entering and removing items into/from this list is cheap, not that iterating it is.
+    blocked: Vec<Arc<RangeBlocked>>,
+}
+
+/// A range blocked for some kind of concurrent access.
+#[derive(Debug)]
+struct RangeBlocked {
+    /// The range.
+    range: Range<u64>,
+
+    /// List of requests awaiting the range to become unblocked.
+    ///
+    /// When the corresponding `RangeBlockedGuard` is dropped, these will all be awoken (via
+    /// `oneshot::Sender::send(())`).
+    ///
+    /// Normal non-async mutex, so do not await while locked!
+    waitlist: std::sync::Mutex<Vec<oneshot::Sender<()>>>,
+
+    /// Index in the corresponding `RangeBlockedList.blocked` list, so it can be dropped quickly.
+    ///
+    /// (When the corresponding `RangeBlockedGuard` is dropped, this entry is swap-removed from the
+    /// `blocked` list, and the other entry taking its place has its `index` updated.)
+    ///
+    /// Only access under `blocked` lock!
+    index: AtomicUsize,
+}
+
+/// Keeps a `RangeBlocked` alive.
+///
+/// When dropped, removes the `RangeBlocked` from its list, and wakes all requests in the `waitlist`.
+#[derive(Debug)]
+pub struct RangeBlockedGuard<'a> {
+    /// List where this blocker resides.
+    list: &'a std::sync::RwLock<RangeBlockedList>,
+
+    /// `Option`, so `drop()` can `take()` it and unwrap the `Arc`.
+    ///
+    /// Consequently, do not clone: Must have refcount 1 when dropped.  (The only clone must be in
+    /// `self.list.blocked`, under index `self.block.index`.)
+    block: Option<Arc<RangeBlocked>>,
+}
+
+impl CommonStorageHelper {
+    /// Await concurrent strong write blockers for the given range.
+    ///
+    /// Strong write blockers are set up for writes that must not be intersected by any other
+    /// write.  Await such intersecting concurrent write requests, and return a guard that will
+    /// delay such new writes until the guard is dropped.
+    pub async fn weak_write_blocker(&self, range: Range<u64>) -> RangeBlockedGuard<'_> {
+        let mut intersecting = FutureVector::new();
+
+        let range_block = {
+            // Acquire write lock first
+            let mut weak = self.weak_write_blockers.write().unwrap();
+            let strong = self.strong_write_blockers.read().unwrap();
+
+            strong.collect_intersecting_await_futures(&range, &mut intersecting);
+            weak.block(range)
+        };
+
+        intersecting.discarding_join().await.unwrap();
+
+        RangeBlockedGuard {
+            list: &self.weak_write_blockers,
+            block: Some(range_block),
+        }
+    }
+
+    /// Await any concurrent write request for the given range.
+    ///
+    /// Block the given range for any concurrent write requests until the returned guard object is
+    /// dropped.  Existing requests are awaited, and new ones will be delayed.
+    pub async fn strong_write_blocker(&self, range: Range<u64>) -> RangeBlockedGuard<'_> {
+        let mut intersecting = FutureVector::new();
+
+        let range_block = {
+            // Acquire write lock first
+            let mut strong = self.strong_write_blockers.write().unwrap();
+            let weak = self.weak_write_blockers.read().unwrap();
+
+            weak.collect_intersecting_await_futures(&range, &mut intersecting);
+            strong.collect_intersecting_await_futures(&range, &mut intersecting);
+            strong.block(range)
+        };
+
+        intersecting.discarding_join().await.unwrap();
+
+        RangeBlockedGuard {
+            list: &self.strong_write_blockers,
+            block: Some(range_block),
+        }
+    }
+}
+
+impl RangeBlockedList {
+    /// Collects futures to await intersecting request.
+    ///
+    /// Adds a future to `future_vector` for every intersecting request; awaiting that future will
+    /// await the request.
+    fn collect_intersecting_await_futures(
+        &self,
+        check_range: &Range<u64>,
+        future_vector: &mut FutureVector<(), oneshot::error::RecvError, oneshot::Receiver<()>>,
+    ) {
+        for range_block in self.blocked.iter() {
+            if range_block.range.overlaps(check_range) {
+                let (s, r) = oneshot::channel::<()>();
+                range_block.waitlist.lock().unwrap().push(s);
+                future_vector.push(r);
+            }
+        }
+    }
+
+    /// Enter a new blocked range into the list.
+    ///
+    /// This only blocks new requests, old requests must separately be awaited by awaiting all
+    /// futures returned by `collect_intersecting_await_futures()`.
+    fn block(&mut self, range: Range<u64>) -> Arc<RangeBlocked> {
+        let range_block = Arc::new(RangeBlocked {
+            range,
+            waitlist: Default::default(),
+            index: self.blocked.len().into(),
+        });
+        self.blocked.push(Arc::clone(&range_block));
+        range_block
+    }
+}
+
+impl Drop for RangeBlockedGuard<'_> {
+    fn drop(&mut self) {
+        let block = self.block.take().unwrap();
+
+        {
+            let mut list = self.list.write().unwrap();
+            let i = block.index.load(Ordering::Relaxed);
+            let removed = list.blocked.swap_remove(i);
+            debug_assert!(Arc::ptr_eq(&removed, &block));
+            if let Some(block) = list.blocked.get(i) {
+                block.index.store(i, Ordering::Relaxed);
+            }
+        }
+
+        let block = Arc::into_inner(block).unwrap();
+        let waitlist = block.waitlist.into_inner().unwrap();
+        for waiting in waitlist {
+            waiting.send(()).unwrap();
+        }
+    }
+}
diff --git a/src/imago/src/storage/ext.rs b/src/imago/src/storage/ext.rs
new file mode 100644
index 00000000..54b4ab8b
--- /dev/null
+++ b/src/imago/src/storage/ext.rs
@@ -0,0 +1,338 @@
+//! Provides the `StorageExt` struct for more convenient access.
+//!
+//! `Storage` is provided by the driver, so is supposed to be simple and only contain what’s
+//! necessary.  `StorageExt` builds on that to provide more convenient access, e.g. allows
+//! unaligned requests and provides write serialization.
+
+use super::drivers::RangeBlockedGuard;
+use crate::io_buffers::{IoBuffer, IoVector, IoVectorMut, IoVectorTrait};
+use crate::Storage;
+use std::ops::Range;
+use std::{cmp, io};
+use tracing::trace;
+
+/// Helper methods for storage objects.
+///
+/// Provides some more convenient methods for accessing storage objects.
+pub trait StorageExt: Storage {
+    /// Read data at `offset` into `bufv`.
+    ///
+    /// Reads until `bufv` is filled completely, i.e. will not do short reads.  When reaching the
+    /// end of file, the rest of `bufv` is filled with 0.
+    ///
+    /// Checks alignment.  If anything does not meet the requirements, enforces it (using ephemeral
+    /// bounce buffers).
+    #[allow(async_fn_in_trait)] // No need for Send
+    async fn readv(&self, bufv: IoVectorMut<'_>, offset: u64) -> io::Result<()>;
+
+    /// Write data from `bufv` to `offset`.
+    ///
+    /// Writes all data from `bufv`, i.e. will not do short writes.  When reaching the end of file,
+    /// it is grown as necessary so that the new end of file will be at `offset + bufv.len()`.
+    ///
+    /// If growing is not possible, expect writes beyond the end of file (even if only partially)
+    /// to fail.
+    ///
+    /// Checks alignment.  If anything does not meet the requirements, enforces it using bounce
+    /// buffers and a read-modify-write cycle that blocks concurrent writes to the affected area.
+    #[allow(async_fn_in_trait)] // No need for Send
+    async fn writev(&self, bufv: IoVector<'_>, offset: u64) -> io::Result<()>;
+
+    /// Read data at `offset` into `buf`.
+    ///
+    /// Reads until `buf` is filled completely, i.e. will not do short reads.  When reaching the
+    /// end of file, the rest of `buf` is filled with 0.
+    ///
+    /// Checks alignment.  If anything does not meet the requirements, enforces it (using ephemeral
+    /// bounce buffers).
+    #[allow(async_fn_in_trait)] // No need for Send
+    async fn read(&self, buf: impl Into<IoVectorMut<'_>>, offset: u64) -> io::Result<()>;
+
+    /// Write data from `buf` to `offset`.
+    ///
+    /// Writes all data from `buf`, i.e. will not do short writes.  When reaching the end of file,
+    /// it is grown as necessary so that the new end of file will be at `offset + buf.len()`.
+    ///
+    /// If growing is not possible, expect writes beyond the end of file (even if only partially)
+    /// to fail.
+    ///
+    /// Checks alignment.  If anything does not meet the requirements, enforces it using bounce
+    /// buffers and a read-modify-write cycle that blocks concurrent writes to the affected area.
+    #[allow(async_fn_in_trait)] // No need for Send
+    async fn write(&self, buf: impl Into<IoVector<'_>>, offset: u64) -> io::Result<()>;
+
+    /// Ensure the given range reads back as zeroes.
+    #[allow(async_fn_in_trait)] // No need for Send
+    async fn write_zeroes(&self, offset: u64, length: u64) -> io::Result<()>;
+
+    /// Discard the given range, with undefined contents when read back.
+    ///
+    /// Tell the storage layer this range is no longer needed and need not be backed by actual
+    /// storage.  When read back, the data read will be undefined, i.e. not necessarily zeroes.
+    #[allow(async_fn_in_trait)] // No need for Send
+    async fn discard(&self, offset: u64, length: u64) -> io::Result<()>;
+
+    /// Await concurrent strong write blockers for the given range.
+    ///
+    /// Strong write blockers are set up for writes that must not be intersected by any other
+    /// write.  Await such intersecting concurrent write requests, and return a guard that will
+    /// delay such new writes until the guard is dropped.
+    #[allow(async_fn_in_trait)] // No need for Send
+    async fn weak_write_blocker(&self, range: Range<u64>) -> RangeBlockedGuard<'_>;
+
+    /// Await any concurrent write request for the given range.
+    ///
+    /// Block the given range for any concurrent write requests until the returned guard object is
+    /// dropped.  Existing requests are awaited, and new ones will be delayed.
+    #[allow(async_fn_in_trait)] // No need for Send
+    async fn strong_write_blocker(&self, range: Range<u64>) -> RangeBlockedGuard<'_>;
+}
+
+impl<S: Storage> StorageExt for S {
+    async fn readv(&self, mut bufv: IoVectorMut<'_>, offset: u64) -> io::Result<()> {
+        if bufv.is_empty() {
+            return Ok(());
+        }
+
+        let mem_align = self.mem_align();
+        let req_align = self.req_align();
+
+        if is_aligned(&bufv, offset, mem_align, req_align) {
+            // Safe: Alignment checked
+            return unsafe { self.pure_readv(bufv, offset) }.await;
+        }
+
+        trace!(
+            "Unaligned read: 0x{:x} + {} (size: {:#x})",
+            offset,
+            bufv.len(),
+            self.size().unwrap()
+        );
+
+        let req_align_mask = req_align as u64 - 1;
+        // Length must be aligned to both memory and request alignments
+        let len_align_mask = req_align_mask | (mem_align as u64 - 1);
+        debug_assert!((len_align_mask + 1) % (req_align as u64) == 0);
+
+        let unpadded_end = offset + bufv.len();
+        let padded_offset = offset & !req_align_mask;
+        // This will over-align at the end of file (aligning to exactly the end of file would be
+        // sufficient), but it is easier this way.
+        let padded_end = (unpadded_end + req_align_mask) & !req_align_mask;
+        // Now also align to memory alignment
+        let padded_len = (padded_end - padded_offset + len_align_mask) & !(len_align_mask);
+        let padded_end = padded_offset + padded_len;
+
+        let padded_len: usize = (padded_end - padded_offset)
+            .try_into()
+            .map_err(|e| io::Error::other(format!("Cannot realign read: {e}")))?;
+
+        trace!("Padded read: {padded_offset:#x} + {padded_len}");
+
+        let mut bounce_buf = IoBuffer::new(padded_len, mem_align)?;
+
+        // Safe: Alignment enforced
+        unsafe { self.pure_readv(bounce_buf.as_mut().into(), padded_offset) }.await?;
+
+        let in_buf_ofs = (offset - padded_offset) as usize;
+        // Must fit in `usize` because `padded_len: usize`
+        let in_buf_end = (unpadded_end - padded_offset) as usize;
+
+        bufv.copy_from_slice(bounce_buf.as_ref_range(in_buf_ofs..in_buf_end).into_slice());
+
+        Ok(())
+    }
+
+    async fn writev(&self, bufv: IoVector<'_>, offset: u64) -> io::Result<()> {
+        if bufv.is_empty() {
+            return Ok(());
+        }
+
+        let mem_align = self.mem_align();
+        let req_align = self.req_align();
+
+        if is_aligned(&bufv, offset, mem_align, req_align) {
+            let _sw_guard = self.weak_write_blocker(offset..(offset + bufv.len())).await;
+
+            // Safe: Alignment checked, and weak write blocker set up
+            return unsafe { self.pure_writev(bufv, offset) }.await;
+        }
+
+        trace!(
+            "Unaligned write: {:#x} + {} (size: {:#x})",
+            offset,
+            bufv.len(),
+            self.size().unwrap()
+        );
+
+        let req_align_mask = req_align - 1;
+        // Length must be aligned to both memory and request alignments
+        let len_align_mask = req_align_mask | (mem_align - 1);
+        let len_align = req_align_mask + 1;
+        debug_assert!(len_align % req_align == 0);
+
+        let unpadded_end = offset + bufv.len();
+        let padded_offset = offset & !(req_align_mask as u64);
+        // This will over-align at the end of file (aligning to exactly the end of file would be
+        // sufficient), but it is easier this way.  Small TODO, as this will indeed increase the
+        // file length (which the over-alignment in `unaligned_readv()` does not).
+        let padded_end = (unpadded_end + req_align_mask as u64) & !(req_align_mask as u64);
+        // Now also align to memory alignment
+        let padded_len =
+            (padded_end - padded_offset + len_align_mask as u64) & !(len_align_mask as u64);
+        let padded_end = padded_offset + padded_len;
+
+        let padded_len: usize = (padded_end - padded_offset)
+            .try_into()
+            .map_err(|e| io::Error::other(format!("Cannot realign write: {e}")))?;
+
+        trace!("Padded write: {padded_offset:#x} + {padded_len}");
+
+        let mut bounce_buf = IoBuffer::new(padded_len, mem_align)?;
+        assert!(padded_len >= len_align && padded_len & len_align_mask == 0);
+
+        // For the strong blocker, just the RMW regions (head and tail) would be enough.  However,
+        // we don’t expect any concurrent writes to the non-RMW (pure write) regions (it is
+        // unlikely that the guest would write to the same area twice concurrently), so we don’t
+        // need to optimize for it.  On the other hand, writes to the RMW regions are likely
+        // (adjacent writes), so those will be blocked either way.
+        // Instating fewer blockers makes them less expensive to check, though.
+        let _sw_guard = self.strong_write_blocker(padded_offset..padded_end).await;
+
+        let in_buf_ofs = (offset - padded_offset) as usize;
+        // Must fit in `usize` because `padded_len: usize`
+        let in_buf_end = (unpadded_end - padded_offset) as usize;
+
+        // RMW part 1: Read
+
+        let head_len = in_buf_ofs;
+        let aligned_head_len = (head_len + len_align_mask) & !len_align_mask;
+
+        let tail_len = padded_len - in_buf_end;
+        let aligned_tail_len = (tail_len + len_align_mask) & !len_align_mask;
+
+        if aligned_head_len + aligned_tail_len == padded_len {
+            // Must read the whole bounce buffer
+            // Safe: Alignment enforced
+            unsafe { self.pure_readv(bounce_buf.as_mut().into(), padded_offset) }.await?;
+        } else {
+            if aligned_head_len > 0 {
+                let head_bufv = bounce_buf.as_mut_range(0..aligned_head_len).into();
+                // Safe: Alignment enforced
+                unsafe { self.pure_readv(head_bufv, padded_offset) }.await?;
+            }
+            if aligned_tail_len > 0 {
+                let tail_start = padded_len - aligned_tail_len;
+                let tail_bufv = bounce_buf.as_mut_range(tail_start..padded_len).into();
+                // Safe: Alignment enforced
+                unsafe { self.pure_readv(tail_bufv, padded_offset + tail_start as u64) }.await?;
+            }
+        }
+
+        // RMW part 2: Modify
+        bufv.copy_into_slice(bounce_buf.as_mut_range(in_buf_ofs..in_buf_end).into_slice());
+
+        // RMW part 3: Write
+        // Safe: Alignment enforced, and strong write blocker set up
+        unsafe { self.pure_writev(bounce_buf.as_ref().into(), padded_offset) }.await
+    }
+
+    async fn read(&self, buf: impl Into<IoVectorMut<'_>>, offset: u64) -> io::Result<()> {
+        self.readv(buf.into(), offset).await
+    }
+
+    async fn write(&self, buf: impl Into<IoVector<'_>>, offset: u64) -> io::Result<()> {
+        self.writev(buf.into(), offset).await
+    }
+
+    async fn write_zeroes(&self, offset: u64, length: u64) -> io::Result<()> {
+        let zero_align = self.zero_align();
+        debug_assert!(zero_align.is_power_of_two());
+        let align_mask = zero_align as u64 - 1;
+
+        let unaligned_end = offset
+            .checked_add(length)
+            .ok_or_else(|| io::Error::other("Zero-write wrap-around"))?;
+        let aligned_offset = (offset + align_mask) & !align_mask;
+        let aligned_end = unaligned_end & !align_mask;
+
+        if aligned_end > aligned_offset {
+            let _sw_guard = self.weak_write_blocker(aligned_offset..aligned_end).await;
+            // Safe: Alignment checked, and weak write blocker set up
+            unsafe { self.pure_write_zeroes(aligned_offset, aligned_end - aligned_offset) }.await?;
+        }
+
+        let zero_buf = if aligned_offset > offset || aligned_end < unaligned_end {
+            let mut buf = IoBuffer::new(
+                cmp::max(aligned_offset - offset, unaligned_end - aligned_end) as usize,
+                self.mem_align(),
+            )?;
+            buf.as_mut().into_slice().fill(0);
+            Some(buf)
+        } else {
+            None
+        };
+
+        if aligned_offset > offset {
+            let buf = zero_buf
+                .as_ref()
+                .unwrap()
+                .as_ref_range(0..((aligned_offset - offset) as usize));
+            self.write(buf, offset).await?;
+        }
+        if aligned_end < unaligned_end {
+            let buf = zero_buf
+                .as_ref()
+                .unwrap()
+                .as_ref_range(0..((unaligned_end - aligned_end) as usize));
+            self.write(buf, aligned_end).await?;
+        }
+
+        Ok(())
+    }
+
+    async fn discard(&self, offset: u64, length: u64) -> io::Result<()> {
+        let discard_align = self.discard_align();
+        debug_assert!(discard_align.is_power_of_two());
+        let align_mask = discard_align as u64 - 1;
+
+        let unaligned_end = offset
+            .checked_add(length)
+            .ok_or_else(|| io::Error::other("Discard wrap-around"))?;
+        let aligned_offset = (offset + align_mask) & !align_mask;
+        let aligned_end = unaligned_end & !align_mask;
+
+        if aligned_end > aligned_offset {
+            let _sw_guard = self.weak_write_blocker(offset..(offset + length)).await;
+            // Safe: Alignment checked, and weak write blocker set up
+            unsafe { self.pure_discard(offset, length) }.await?;
+        }
+
+        // Nothing to do for the unaligned part; discarding is always just advisory.
+
+        Ok(())
+    }
+
+    async fn weak_write_blocker(&self, range: Range<u64>) -> RangeBlockedGuard<'_> {
+        self.get_storage_helper().weak_write_blocker(range).await
+    }
+
+    async fn strong_write_blocker(&self, range: Range<u64>) -> RangeBlockedGuard<'_> {
+        self.get_storage_helper().strong_write_blocker(range).await
+    }
+}
+
+/// Check whether the given request is aligned.
+fn is_aligned<V: IoVectorTrait>(bufv: &V, offset: u64, mem_align: usize, req_align: usize) -> bool {
+    debug_assert!(mem_align.is_power_of_two() && req_align.is_power_of_two());
+
+    let req_align_mask = req_align as u64 - 1;
+
+    if offset & req_align_mask != 0 {
+        false
+    } else if bufv.len() & req_align_mask == 0 {
+        bufv.is_aligned(mem_align, req_align)
+    } else {
+        false
+    }
+}
diff --git a/src/imago/src/storage/mod.rs b/src/imago/src/storage/mod.rs
new file mode 100644
index 00000000..f464e11e
--- /dev/null
+++ b/src/imago/src/storage/mod.rs
@@ -0,0 +1,535 @@
+//! Helper functionality to access storage.
+//!
+//! While not the primary purpose of this crate, to open VM images, we need to be able to access
+//! different kinds of storage objects.  Such objects are abstracted behind the `Storage` trait.
+
+pub(crate) mod drivers;
+pub mod ext;
+
+use crate::io_buffers::{IoBuffer, IoVector, IoVectorMut};
+use drivers::CommonStorageHelper;
+use ext::StorageExt;
+use std::fmt::{Debug, Display};
+use std::future::Future;
+use std::path::{Path, PathBuf};
+use std::pin::Pin;
+use std::sync::Arc;
+use std::{cmp, io};
+
+/// Parameters from which a storage object can be constructed.
+#[derive(Clone, Default)]
+pub struct StorageOpenOptions {
+    /// Filename to open.
+    pub(crate) filename: Option<PathBuf>,
+
+    /// Whether the object should be opened as writable or read-only.
+    pub(crate) writable: bool,
+
+    /// Whether to bypass the host page cache (if applicable).
+    pub(crate) direct: bool,
+}
+
+/// Implementation for storage objects.
+pub trait Storage: Debug + Display + Send + Sized + Sync {
+    /// Open a storage object.
+    ///
+    /// Different storage implementations may require different options.
+    #[allow(async_fn_in_trait)] // No need for Send
+    async fn open(_opts: StorageOpenOptions) -> io::Result<Self> {
+        Err(io::Error::new(
+            io::ErrorKind::Unsupported,
+            format!(
+                "Cannot open storage objects of type {}",
+                std::any::type_name::<Self>()
+            ),
+        ))
+    }
+
+    /// Synchronous wrapper around [`Storage::open()`].
+    #[cfg(feature = "sync-wrappers")]
+    fn open_sync(opts: StorageOpenOptions) -> io::Result<Self> {
+        tokio::runtime::Builder::new_current_thread()
+            .build()?
+            .block_on(Self::open(opts))
+    }
+
+    /// Minimum required alignment for memory buffers.
+    fn mem_align(&self) -> usize {
+        1
+    }
+
+    /// Minimum required alignment for offsets and lengths.
+    fn req_align(&self) -> usize {
+        1
+    }
+
+    /// Minimum required alignment for zero writes.
+    fn zero_align(&self) -> usize {
+        1
+    }
+
+    /// Minimum required alignment for effective discards.
+    fn discard_align(&self) -> usize {
+        1
+    }
+
+    /// Storage object length.
+    fn size(&self) -> io::Result<u64>;
+
+    /// Resolve the given path relative to this storage object.
+    ///
+    /// `relative` need not really be a relative path; it is up to the storage driver to check
+    /// whether it is an absolute path that does not need to be changed, or a relative path that
+    /// needs to be resolved.
+    ///
+    /// Must not return a relative path.
+    ///
+    /// The returned `PathBuf` should be usable with `StorageOpenOptions::filename()`.
+    fn resolve_relative_path<P: AsRef<Path>>(&self, _relative: P) -> io::Result<PathBuf> {
+        Err(io::ErrorKind::Unsupported.into())
+    }
+
+    /// Read data at `offset` into `bufv`.
+    ///
+    /// Reads until `bufv` is filled completely, i.e. will not do short reads.  When reaching the
+    /// end of file, the rest of `bufv` is filled with 0.
+    ///
+    /// # Safety
+    /// This is a pure read from storage.  The request must be fully aligned to
+    /// [`Self::mem_align()`] and [`Self::req_align()`], and safeguards we want to implement for
+    /// safe concurrent access may not be available.
+    ///
+    /// Use [`StorageExt::readv()`] instead.
+    #[allow(async_fn_in_trait)] // No need for Send
+    async unsafe fn pure_readv(&self, bufv: IoVectorMut<'_>, offset: u64) -> io::Result<()>;
+
+    /// Write data from `bufv` to `offset`.
+    ///
+    /// Writes all data from `bufv`, i.e. will not do short writes.  When reaching the end of file,
+    /// grow it as necessary so that the new end of file will be at `offset + bufv.len()`.
+    ///
+    /// If growing is not possible, writes beyond the end of file (even if only partially) should
+    /// fail.
+    ///
+    /// # Safety
+    /// This is a pure write to storage.  The request must be fully aligned to
+    /// [`Self::mem_align()`] and [`Self::req_align()`], and safeguards we want to implement for
+    /// safe concurrent access may not be available.
+    ///
+    /// Use [`StorageExt::writev()`] instead.
+    #[allow(async_fn_in_trait)] // No need for Send
+    async unsafe fn pure_writev(&self, bufv: IoVector<'_>, offset: u64) -> io::Result<()>;
+
+    /// Ensure the given range reads back as zeroes.
+    ///
+    /// The default implementation writes actual zeroes as data, which is inefficient.  Storage
+    /// drivers should override it with a more efficient implementation.
+    ///
+    /// # Safety
+    /// This is a pure write to storage.  The request must be fully aligned to
+    /// [`Self::zero_align()`], and safeguards we want to implement for safe concurrent access may
+    /// not be available.
+    ///
+    /// Use [`StorageExt::write_zeroes()`] instead.
+    #[allow(async_fn_in_trait)] // No need for Send
+    async unsafe fn pure_write_zeroes(&self, mut offset: u64, mut length: u64) -> io::Result<()> {
+        let buflen = cmp::min(length, 1048576) as usize;
+        let mut buf = IoBuffer::new(buflen, self.mem_align())?;
+        buf.as_mut().into_slice().fill(0);
+
+        while length > 0 {
+            let chunk_length = cmp::min(length, 1048576) as usize;
+            self.writev(buf.as_ref_range(0..chunk_length).into(), offset)
+                .await?;
+            offset += chunk_length as u64;
+            length -= chunk_length as u64;
+        }
+
+        Ok(())
+    }
+
+    /// Discard the given range, with undefined contents when read back.
+    ///
+    /// Tell the storage layer this range is no longer needed and need not be backed by actual
+    /// storage.  When read back, the data read will be undefined, i.e. not necessarily zeroes.
+    ///
+    /// No-op implementations therefore explicitly fulfill the interface contract.
+    ///
+    /// # Safety
+    /// This is a pure write to storage.  The request must be fully aligned to
+    /// [`Self::discard_align()`], and safeguards we want to implement for safe concurrent access
+    /// may not be available.
+    ///
+    /// Use [`StorageExt::discard()`] instead.
+    #[allow(async_fn_in_trait)] // No need for Send
+    async unsafe fn pure_discard(&self, _offset: u64, _length: u64) -> io::Result<()> {
+        Ok(())
+    }
+
+    /// Flush internal buffers.
+    ///
+    /// Does not necessarily sync those buffers to disk.  When using `flush()`, consider whether
+    /// you want to call `sync()` afterwards.
+    #[allow(async_fn_in_trait)] // No need for Send
+    async fn flush(&self) -> io::Result<()>;
+
+    /// Sync data already written to the storage hardware.
+    ///
+    /// This does not necessarily include flushing internal buffers, i.e. `flush`.  When using
+    /// `sync()`, consider whether you want to call `flush()` before it.
+    #[allow(async_fn_in_trait)] // No need for Send
+    async fn sync(&self) -> io::Result<()>;
+
+    /// Return the storage helper object (used by the [`StorageExt`] implementation).
+    fn get_storage_helper(&self) -> &CommonStorageHelper;
+}
+
+/// Allow dynamic use of storage objects (i.e. is object safe).
+///
+/// When using normal `Storage` objects, they must all be of the same type within a single disk
+/// image chain.  For example, every storage object underneath a `FormatAccess<StdFile>` object
+/// must be of type `StdFile`.
+///
+/// `DynStorage` allows the use of `Box<dyn DynStorage>`, which implements `Storage`, to allow
+/// mixed storage object types.  Therefore, a `FormatAccess<Box<dyn DynStorage>>` allows e.g. the
+/// use of both `Box<StdFile>` and `Box<Null>` storage objects together.  (`Arc` instead of `Box`
+/// works, too.)
+///
+/// Async functions in `DynStorage` return boxed futures (`Pin<Box<dyn Future>>`), which makes them
+/// slighly less efficient than async functions in `Storage`, hence the distinction.
+pub trait DynStorage: Debug + Display + Send + Sync {
+    /// Wrapper around [`Storage::mem_align()`].
+    fn dyn_mem_align(&self) -> usize;
+
+    /// Wrapper around [`Storage::req_align()`].
+    fn dyn_req_align(&self) -> usize;
+
+    /// Wrapper around [`Storage::zero_align()`].
+    fn dyn_zero_align(&self) -> usize;
+
+    /// Wrapper around [`Storage::discard_align()`].
+    fn dyn_discard_align(&self) -> usize;
+
+    /// Wrapper around [`Storage::size()`].
+    fn dyn_size(&self) -> io::Result<u64>;
+
+    /// Wrapper around [`Storage::resolve_relative_path()`].
+    fn dyn_resolve_relative_path(&self, relative: &Path) -> io::Result<PathBuf>;
+
+    /// Object-safe wrapper around [`Storage::pure_readv()`].
+    ///
+    /// # Safety
+    /// Same considerations are for [`Storage::pure_readv()`] apply.
+    unsafe fn dyn_pure_readv<'a>(
+        &'a self,
+        bufv: IoVectorMut<'a>,
+        offset: u64,
+    ) -> Pin<Box<dyn Future<Output = io::Result<()>> + 'a>>;
+
+    /// Object-safe wrapper around [`Storage::pure_writev()`].
+    ///
+    /// # Safety
+    /// Same considerations are for [`Storage::pure_writev()`] apply.
+    unsafe fn dyn_pure_writev<'a>(
+        &'a self,
+        bufv: IoVector<'a>,
+        offset: u64,
+    ) -> Pin<Box<dyn Future<Output = io::Result<()>> + 'a>>;
+
+    /// Object-safe wrapper around [`Storage::pure_write_zeroes()`].
+    ///
+    /// # Safety
+    /// Same considerations are for [`Storage::pure_write_zeroes()`] apply.
+    unsafe fn dyn_pure_write_zeroes(
+        &self,
+        offset: u64,
+        length: u64,
+    ) -> Pin<Box<dyn Future<Output = io::Result<()>> + '_>>;
+
+    /// Object-safe wrapper around [`Storage::pure_discard()`].
+    ///
+    /// # Safety
+    /// Same considerations are for [`Storage::pure_discard()`] apply.
+    unsafe fn dyn_pure_discard(
+        &self,
+        offset: u64,
+        length: u64,
+    ) -> Pin<Box<dyn Future<Output = io::Result<()>> + '_>>;
+
+    /// Object-safe wrapper around [`Storage::flush()`].
+    fn dyn_flush(&self) -> Pin<Box<dyn Future<Output = io::Result<()>> + '_>>;
+
+    /// Object-safe wrapper around [`Storage::sync()`].
+    fn dyn_sync(&self) -> Pin<Box<dyn Future<Output = io::Result<()>> + '_>>;
+
+    /// Wrapper around [`Storage::get_storage_helper()`].
+    fn dyn_get_storage_helper(&self) -> &CommonStorageHelper;
+}
+
+impl<S: Storage> Storage for &S {
+    fn mem_align(&self) -> usize {
+        (*self).mem_align()
+    }
+
+    fn req_align(&self) -> usize {
+        (*self).req_align()
+    }
+
+    fn zero_align(&self) -> usize {
+        (*self).zero_align()
+    }
+
+    fn discard_align(&self) -> usize {
+        (*self).discard_align()
+    }
+
+    fn size(&self) -> io::Result<u64> {
+        (*self).size()
+    }
+
+    fn resolve_relative_path<P: AsRef<Path>>(&self, relative: P) -> io::Result<PathBuf> {
+        (*self).resolve_relative_path(relative)
+    }
+
+    async unsafe fn pure_readv(&self, bufv: IoVectorMut<'_>, offset: u64) -> io::Result<()> {
+        unsafe { (*self).pure_readv(bufv, offset).await }
+    }
+
+    async unsafe fn pure_writev(&self, bufv: IoVector<'_>, offset: u64) -> io::Result<()> {
+        unsafe { (*self).pure_writev(bufv, offset).await }
+    }
+
+    async unsafe fn pure_write_zeroes(&self, offset: u64, length: u64) -> io::Result<()> {
+        unsafe { (*self).pure_write_zeroes(offset, length).await }
+    }
+
+    async unsafe fn pure_discard(&self, offset: u64, length: u64) -> io::Result<()> {
+        unsafe { (*self).pure_discard(offset, length).await }
+    }
+
+    async fn flush(&self) -> io::Result<()> {
+        (*self).flush().await
+    }
+
+    async fn sync(&self) -> io::Result<()> {
+        (*self).sync().await
+    }
+
+    fn get_storage_helper(&self) -> &CommonStorageHelper {
+        (*self).get_storage_helper()
+    }
+}
+
+impl<S: Storage> DynStorage for S {
+    fn dyn_mem_align(&self) -> usize {
+        <S as Storage>::mem_align(self)
+    }
+
+    fn dyn_req_align(&self) -> usize {
+        <S as Storage>::req_align(self)
+    }
+
+    fn dyn_zero_align(&self) -> usize {
+        <S as Storage>::zero_align(self)
+    }
+
+    fn dyn_discard_align(&self) -> usize {
+        <S as Storage>::discard_align(self)
+    }
+
+    fn dyn_size(&self) -> io::Result<u64> {
+        <S as Storage>::size(self)
+    }
+
+    fn dyn_resolve_relative_path(&self, relative: &Path) -> io::Result<PathBuf> {
+        <S as Storage>::resolve_relative_path(self, relative)
+    }
+
+    unsafe fn dyn_pure_readv<'a>(
+        &'a self,
+        bufv: IoVectorMut<'a>,
+        offset: u64,
+    ) -> Pin<Box<dyn Future<Output = io::Result<()>> + 'a>> {
+        Box::pin(unsafe { <S as Storage>::pure_readv(self, bufv, offset) })
+    }
+
+    unsafe fn dyn_pure_writev<'a>(
+        &'a self,
+        bufv: IoVector<'a>,
+        offset: u64,
+    ) -> Pin<Box<dyn Future<Output = io::Result<()>> + 'a>> {
+        Box::pin(unsafe { <S as Storage>::pure_writev(self, bufv, offset) })
+    }
+
+    unsafe fn dyn_pure_write_zeroes(
+        &self,
+        offset: u64,
+        length: u64,
+    ) -> Pin<Box<dyn Future<Output = io::Result<()>> + '_>> {
+        Box::pin(unsafe { <S as Storage>::pure_write_zeroes(self, offset, length) })
+    }
+
+    unsafe fn dyn_pure_discard(
+        &self,
+        offset: u64,
+        length: u64,
+    ) -> Pin<Box<dyn Future<Output = io::Result<()>> + '_>> {
+        Box::pin(unsafe { <S as Storage>::pure_discard(self, offset, length) })
+    }
+
+    fn dyn_flush(&self) -> Pin<Box<dyn Future<Output = io::Result<()>> + '_>> {
+        Box::pin(<S as Storage>::flush(self))
+    }
+
+    fn dyn_sync(&self) -> Pin<Box<dyn Future<Output = io::Result<()>> + '_>> {
+        Box::pin(<S as Storage>::sync(self))
+    }
+
+    fn dyn_get_storage_helper(&self) -> &CommonStorageHelper {
+        <S as Storage>::get_storage_helper(self)
+    }
+}
+
+impl Storage for Box<dyn DynStorage> {
+    async fn open(opts: StorageOpenOptions) -> io::Result<Self> {
+        // TODO: When we have more drivers, choose different defaults depending on the options
+        // given.  Right now, only `File` really supports being opened through options, so it is an
+        // obvious choice.
+        Ok(Box::new(crate::file::File::open(opts).await?))
+    }
+
+    fn mem_align(&self) -> usize {
+        self.as_ref().dyn_mem_align()
+    }
+
+    fn req_align(&self) -> usize {
+        self.as_ref().dyn_req_align()
+    }
+
+    fn zero_align(&self) -> usize {
+        self.as_ref().dyn_zero_align()
+    }
+
+    fn discard_align(&self) -> usize {
+        self.as_ref().dyn_discard_align()
+    }
+
+    fn size(&self) -> io::Result<u64> {
+        self.as_ref().dyn_size()
+    }
+
+    fn resolve_relative_path<P: AsRef<Path>>(&self, relative: P) -> io::Result<PathBuf> {
+        self.as_ref().dyn_resolve_relative_path(relative.as_ref())
+    }
+
+    async unsafe fn pure_readv(&self, bufv: IoVectorMut<'_>, offset: u64) -> io::Result<()> {
+        unsafe { self.as_ref().dyn_pure_readv(bufv, offset).await }
+    }
+
+    async unsafe fn pure_writev(&self, bufv: IoVector<'_>, offset: u64) -> io::Result<()> {
+        unsafe { self.as_ref().dyn_pure_writev(bufv, offset).await }
+    }
+
+    async unsafe fn pure_write_zeroes(&self, offset: u64, length: u64) -> io::Result<()> {
+        unsafe { self.as_ref().dyn_pure_write_zeroes(offset, length).await }
+    }
+
+    async unsafe fn pure_discard(&self, offset: u64, length: u64) -> io::Result<()> {
+        unsafe { self.as_ref().dyn_pure_discard(offset, length).await }
+    }
+
+    async fn flush(&self) -> io::Result<()> {
+        self.as_ref().dyn_flush().await
+    }
+
+    async fn sync(&self) -> io::Result<()> {
+        self.as_ref().dyn_sync().await
+    }
+
+    fn get_storage_helper(&self) -> &CommonStorageHelper {
+        self.as_ref().dyn_get_storage_helper()
+    }
+}
+
+impl Storage for Arc<dyn DynStorage> {
+    async fn open(opts: StorageOpenOptions) -> io::Result<Self> {
+        Box::<dyn DynStorage>::open(opts).await.map(Into::into)
+    }
+
+    fn mem_align(&self) -> usize {
+        self.as_ref().dyn_mem_align()
+    }
+
+    fn req_align(&self) -> usize {
+        self.as_ref().dyn_req_align()
+    }
+
+    fn zero_align(&self) -> usize {
+        self.as_ref().dyn_zero_align()
+    }
+
+    fn discard_align(&self) -> usize {
+        self.as_ref().dyn_discard_align()
+    }
+
+    fn size(&self) -> io::Result<u64> {
+        self.as_ref().dyn_size()
+    }
+
+    fn resolve_relative_path<P: AsRef<Path>>(&self, relative: P) -> io::Result<PathBuf> {
+        self.as_ref().dyn_resolve_relative_path(relative.as_ref())
+    }
+
+    async unsafe fn pure_readv(&self, bufv: IoVectorMut<'_>, offset: u64) -> io::Result<()> {
+        unsafe { self.as_ref().dyn_pure_readv(bufv, offset) }.await
+    }
+
+    async unsafe fn pure_writev(&self, bufv: IoVector<'_>, offset: u64) -> io::Result<()> {
+        unsafe { self.as_ref().dyn_pure_writev(bufv, offset) }.await
+    }
+
+    async unsafe fn pure_write_zeroes(&self, offset: u64, length: u64) -> io::Result<()> {
+        unsafe { self.as_ref().dyn_pure_write_zeroes(offset, length) }.await
+    }
+
+    async unsafe fn pure_discard(&self, offset: u64, length: u64) -> io::Result<()> {
+        unsafe { self.as_ref().dyn_pure_discard(offset, length) }.await
+    }
+
+    async fn flush(&self) -> io::Result<()> {
+        self.as_ref().dyn_flush().await
+    }
+
+    async fn sync(&self) -> io::Result<()> {
+        self.as_ref().dyn_sync().await
+    }
+
+    fn get_storage_helper(&self) -> &CommonStorageHelper {
+        self.as_ref().dyn_get_storage_helper()
+    }
+}
+
+impl StorageOpenOptions {
+    /// Create default options.
+    pub fn new() -> Self {
+        StorageOpenOptions::default()
+    }
+
+    /// Set a filename to open.
+    pub fn filename<P: AsRef<Path>>(mut self, filename: P) -> Self {
+        self.filename = Some(filename.as_ref().to_owned());
+        self
+    }
+
+    /// Whether the storage should be writable or not.
+    pub fn write(mut self, write: bool) -> Self {
+        self.writable = write;
+        self
+    }
+
+    /// Whether to bypass the host page cache (if applicable).
+    pub fn direct(mut self, direct: bool) -> Self {
+        self.direct = direct;
+        self
+    }
+}
diff --git a/src/imago/src/vector_select.rs b/src/imago/src/vector_select.rs
new file mode 100644
index 00000000..1252b85c
--- /dev/null
+++ b/src/imago/src/vector_select.rs
@@ -0,0 +1,114 @@
+//! Async select over future vectors.
+//!
+//! Allows collecting `dyn Future` objects (i.e. async function instances) in a vector, and
+//! `select`ing (awaiting one) or `join`ing (awaiting all) them.
+
+use std::future::Future;
+use std::marker::Unpin;
+use std::pin::Pin;
+use std::task::{Context, Poll};
+
+/// Collect futures and await one or all of them.
+pub(crate) struct FutureVector<R, E, F: Future<Output = Result<R, E>> + Unpin> {
+    /// Pending futures.
+    vec: Vec<F>,
+}
+
+/// Await a single future.
+pub(crate) struct FutureVectorSelect<'a, R, E, F: Future<Output = Result<R, E>> + Unpin>(
+    &'a mut FutureVector<R, E, F>,
+);
+
+/// Await all futures, discarding successful results.
+pub(crate) struct FutureVectorDiscardingJoin<'a, R, E, F: Future<Output = Result<R, E>> + Unpin>(
+    &'a mut FutureVector<R, E, F>,
+);
+
+impl<R, E, F: Future<Output = Result<R, E>> + Unpin> FutureVector<R, E, F> {
+    /// Create a new `FutureVector`.
+    pub fn new() -> Self {
+        FutureVector { vec: Vec::new() }
+    }
+
+    /// Add a future.
+    pub fn push(&mut self, future: F) {
+        self.vec.push(future);
+    }
+
+    /// `true` if and only if there are no pending futures.
+    pub fn is_empty(&self) -> bool {
+        self.vec.is_empty()
+    }
+
+    /// Number of pending futures.
+    pub fn len(&self) -> usize {
+        self.vec.len()
+    }
+
+    /// Await any one future.
+    ///
+    /// Return the result of the first future that becomes ready, removing it from the vector.
+    ///
+    /// Functionally, behaves like:
+    /// ```ignore
+    /// async fn select(&mut self) -> Result<R, E>;
+    /// ```
+    pub fn select(&mut self) -> FutureVectorSelect<'_, R, E, F> {
+        FutureVectorSelect(self)
+    }
+
+    /// Join all futures, discarding successful results.
+    ///
+    /// If an error occurs, return it immediately.  All pending futures remain.
+    ///
+    /// Functionally, behaves like:
+    /// ```ignore
+    /// async fn discarding_join(&mut self) -> Result<(), E>;
+    /// ```
+    pub fn discarding_join(&mut self) -> FutureVectorDiscardingJoin<'_, R, E, F> {
+        FutureVectorDiscardingJoin(self)
+    }
+}
+
+impl<R, E, F: Future<Output = Result<R, E>> + Unpin> Future for FutureVectorSelect<'_, R, E, F> {
+    type Output = F::Output;
+
+    fn poll(mut self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll<F::Output> {
+        assert!(!self.0.is_empty());
+
+        for (i, fut) in self.0.vec.iter_mut().enumerate() {
+            if let Poll::Ready(result) = F::poll(Pin::new(fut), ctx) {
+                self.0.vec.swap_remove(i);
+                return Poll::Ready(result);
+            }
+        }
+
+        Poll::Pending
+    }
+}
+
+impl<R, E, F: Future<Output = Result<R, E>> + Unpin> Future
+    for FutureVectorDiscardingJoin<'_, R, E, F>
+{
+    type Output = Result<(), E>;
+
+    fn poll(mut self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll<Result<(), E>> {
+        let mut i = 0;
+        while i < self.0.len() {
+            if let Poll::Ready(result) = F::poll(Pin::new(&mut self.0.vec[i]), ctx) {
+                self.0.vec.swap_remove(i);
+                if let Err(err) = result {
+                    return Poll::Ready(Err(err));
+                }
+            } else {
+                i += 1;
+            }
+        }
+
+        if self.0.is_empty() {
+            Poll::Ready(Ok(()))
+        } else {
+            Poll::Pending
+        }
+    }
+}