Skip to content

Commit

Permalink
chore(bors): merge pull request #620
Browse files Browse the repository at this point in the history
620: Cherry-pick #619 r=tiagolobocastro a=tiagolobocastro

    feat(snapshot/commitment): use commitment when creating a snapshot
    
    The snapshot create now uses the same rules as volume/replica creation.
    
    Signed-off-by: Tiago Castro <[email protected]>

---

    ci: fix test race condition
    
    Make cache reload period too high so we don't try to refresh whilst testing
    a timeout condition.
    
    Signed-off-by: Tiago Castro <[email protected]>

Co-authored-by: Tiago Castro <[email protected]>
  • Loading branch information
mayastor-bors and tiagolobocastro committed Jul 5, 2023
2 parents 7ec0bad + 3485efb commit 6b7a416
Show file tree
Hide file tree
Showing 17 changed files with 319 additions and 58 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ pub(super) async fn child_enospc_onlineable(
for r in children.flat_map(|c| c.as_replica()) {
replicas.push(registry.replica(r.uuid()).await?);
}
// todo: this won't work for volumes with snapshots
// we need to figure out which blocks would require rebuild
let min_allocated_bytes = replicas
.into_iter()
.flat_map(|r| r.space)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,6 @@ impl GetPersistedNexusChildren {
pub(crate) fn new_snapshot(spec: &VolumeSpec) -> Self {
Self::Snapshot(spec.clone())
}
/// Check if the snapshotting volume is published.
pub(crate) fn snapshot_target_published(&self) -> bool {
match self {
Self::Create(_) => false,
Self::ReCreate(_) => false,
Self::Snapshot(vol) => vol.target().is_some(),
}
}
/// Get the optional volume spec (used for nexus creation).
pub(crate) fn vol_spec(&self) -> Option<&VolumeSpec> {
match self {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use stor_port::types::v0::{
nexus_persistence::{ChildInfo, NexusInfo},
replica::ReplicaSpec,
},
transport::{Child, ChildUri, PoolId, Replica},
transport::{Child, ChildUri, NodeId, PoolId, Replica},
};

/// Item for pool scheduling logic.
Expand Down Expand Up @@ -88,6 +88,15 @@ impl PoolItemLister {
.collect();
pools
}
/// Get a list of pool items.
pub(crate) async fn list_for_snaps(registry: &Registry, item: &ChildItem) -> Vec<PoolItem> {
let nodes = Self::nodes(registry).await;

match nodes.iter().find(|n| n.id() == item.node()) {
Some(node) => vec![PoolItem::new(node.clone(), item.pool().clone(), None)],
None => vec![],
}
}
}

/// A replica item used for scheduling.
Expand Down Expand Up @@ -246,6 +255,10 @@ impl ChildItem {
pub(crate) fn pool(&self) -> &PoolWrapper {
&self.pool_state
}
/// Get a reference to the node id.
pub(crate) fn node(&self) -> &NodeId {
&self.pool_state.node
}
/// Check if we can rebuild this child.
pub(crate) fn rebuildable(&self) -> &Option<bool> {
&self.rebuildable
Expand Down
77 changes: 75 additions & 2 deletions control-plane/agents/src/bin/core/controller/scheduling/volume.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use stor_port::types::v0::{
};

/// Move replica to another pool.
#[derive(Clone)]
#[derive(Default, Clone)]
pub(crate) struct MoveReplica {
/// Current replica node.
node: NodeId,
Expand Down Expand Up @@ -66,6 +66,7 @@ pub(crate) struct GetSuitablePoolsContext {
spec: VolumeSpec,
allocated_bytes: Option<u64>,
move_repl: Option<MoveReplica>,
snap_repl: bool,
ag_restricted_nodes: Option<Vec<NodeId>>,
}
impl GetSuitablePoolsContext {
Expand All @@ -81,9 +82,16 @@ impl GetSuitablePoolsContext {
pub(crate) fn move_repl(&self) -> Option<&MoveReplica> {
self.move_repl.as_ref()
}
/// Check if this is a replica snapshot request.
pub(crate) fn snap_repl(&self) -> bool {
self.snap_repl
}
pub(crate) fn ag_restricted_nodes(&self) -> &Option<Vec<NodeId>> {
&self.ag_restricted_nodes
}
pub fn as_thin(&self) -> bool {
self.spec.as_thin() || self.snap_repl()
}
}

impl Deref for GetSuitablePoolsContext {
Expand Down Expand Up @@ -121,7 +129,7 @@ impl AddVolumeReplica {
for spec in replicas {
if let Ok(state) = registry.replica(spec.uuid()).await {
if let Some(space) = state.space {
used_bytes.push(space.allocated_bytes);
used_bytes.push(space.allocated_distinct());
}
}
}
Expand Down Expand Up @@ -156,6 +164,7 @@ impl AddVolumeReplica {
spec: volume_spec,
allocated_bytes,
move_repl: request.move_repl,
snap_repl: false,
ag_restricted_nodes,
},
PoolItemLister::list(registry, &pool_ag_replica_count_map).await,
Expand Down Expand Up @@ -610,3 +619,67 @@ impl ResourceFilter for AddVolumeNexusReplicas {
self.data.list
}
}

/// Snapshot a volume replica.
#[derive(Clone)]
pub(crate) struct SnapshotVolumeReplica {
data: ResourceData<GetSuitablePoolsContext, PoolItem>,
}

impl SnapshotVolumeReplica {
async fn builder(registry: &Registry, volume: &VolumeSpec, item: &ChildItem) -> Self {
let allocated_bytes = AddVolumeReplica::allocated_bytes(registry, volume).await;

Self {
data: ResourceData::new(
GetSuitablePoolsContext {
registry: registry.clone(),
spec: volume.clone(),
allocated_bytes,
move_repl: None,
snap_repl: true,
ag_restricted_nodes: None,
},
PoolItemLister::list_for_snaps(registry, item).await,
),
}
}
fn with_default_policy(self) -> Self {
match self.data.context.as_thin() {
true => self.with_simple_policy(),
false => self.with_thick_policy(),
}
}
fn with_thick_policy(self) -> Self {
self.policy(ThickPolicy::new())
}
fn with_simple_policy(self) -> Self {
let simple = SimplePolicy::new(&self.data.context().registry);
self.policy(simple)
}
/// Default rules for replica snapshot selection when snapshot replicas for a volume.
pub(crate) async fn builder_with_defaults(
registry: &Registry,
volume: &VolumeSpec,
// todo: only 1 replica snapshot supported atm
items: &ChildItem,
) -> Self {
Self::builder(registry, volume, items)
.await
.with_default_policy()
}
}

#[async_trait::async_trait(?Send)]
impl ResourceFilter for SnapshotVolumeReplica {
type Request = GetSuitablePoolsContext;
type Item = PoolItem;

fn data(&mut self) -> &mut ResourceData<Self::Request, Self::Item> {
&mut self.data
}

fn collect(self) -> Vec<Self::Item> {
self.data.list
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
use super::ResourceFilter;
use crate::controller::scheduling::{volume::AddVolumeReplica, NodeFilters};
use crate::controller::scheduling::{
volume::{AddVolumeReplica, SnapshotVolumeReplica},
NodeFilters,
};

mod affinity_group;
pub(crate) mod pool;
Expand Down Expand Up @@ -28,4 +31,18 @@ impl DefaultBasePolicy {
.filter(pool::PoolBaseFilters::min_free_space)
.filter(pool::PoolBaseFilters::topology)
}
fn filter_snapshot(request: SnapshotVolumeReplica) -> SnapshotVolumeReplica {
Self::filter_snapshot_pools(Self::filter_snapshot_nodes(request))
}
fn filter_snapshot_nodes(request: SnapshotVolumeReplica) -> SnapshotVolumeReplica {
request
.filter(NodeFilters::cordoned_for_pool)
.filter(NodeFilters::online_for_pool)
}
fn filter_snapshot_pools(request: SnapshotVolumeReplica) -> SnapshotVolumeReplica {
request
.filter(pool::PoolBaseFilters::usable)
.filter(pool::PoolBaseFilters::capacity)
.filter(pool::PoolBaseFilters::min_free_space)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::{
registry::Registry,
scheduling::{
resources::PoolItem,
volume::{AddVolumeReplica, GetSuitablePoolsContext},
volume::{AddVolumeReplica, GetSuitablePoolsContext, SnapshotVolumeReplica},
volume_policy::{affinity_group, pool::PoolBaseFilters, DefaultBasePolicy},
ResourceFilter, ResourcePolicy, SortBuilder, SortCriteria,
},
Expand Down Expand Up @@ -45,6 +45,16 @@ impl ResourcePolicy<AddVolumeReplica> for SimplePolicy {
}
}

#[async_trait::async_trait(?Send)]
impl ResourcePolicy<SnapshotVolumeReplica> for SimplePolicy {
fn apply(self, to: SnapshotVolumeReplica) -> SnapshotVolumeReplica {
DefaultBasePolicy::filter_snapshot(to)
.filter(PoolBaseFilters::min_free_space)
.filter_param(&self, SimplePolicy::min_free_space)
.filter_param(&self, SimplePolicy::pool_overcommit)
}
}

const TOTAL_REPLICA_COUNT_WEIGHT: Ranged = Ranged::new_const(25);
const FREE_SPACE_WEIGHT: Ranged = Ranged::new_const(40);
const OVER_COMMIT_WEIGHT: Ranged = Ranged::new_const(35);
Expand Down Expand Up @@ -131,6 +141,11 @@ impl SimplePolicy {
if !request.as_thin() {
return item.pool.free_space() > request.size;
}
if request.snap_repl() {
return item.pool.free_space() > {
(self.cli_args.snapshot_commitment * request.size) / 100
};
}
item.pool.free_space()
> match request.allocated_bytes() {
Some(bytes) => {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::controller::scheduling::{
resources::PoolItem,
volume::{AddVolumeReplica, GetSuitablePoolsContext},
volume::{AddVolumeReplica, GetSuitablePoolsContext, SnapshotVolumeReplica},
volume_policy::{affinity_group, pool::PoolBaseFilters, DefaultBasePolicy},
ResourceFilter, ResourcePolicy, SortBuilder, SortCriteria,
};
Expand All @@ -27,6 +27,13 @@ impl ResourcePolicy<AddVolumeReplica> for ThickPolicy {
}
}

#[async_trait::async_trait(?Send)]
impl ResourcePolicy<SnapshotVolumeReplica> for ThickPolicy {
fn apply(self, to: SnapshotVolumeReplica) -> SnapshotVolumeReplica {
DefaultBasePolicy::filter_snapshot(to)
}
}

impl ThickPolicy {
/// Create a new thick policy.
pub(crate) fn new() -> Self {
Expand Down
6 changes: 6 additions & 0 deletions control-plane/agents/src/bin/core/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,12 @@ pub(crate) struct ThinArgs {
/// to be created on the pool is 100GiB.
#[clap(long, env = "VOLUME_COMMITMENT_%", value_parser = value_parse_percent, default_value = "40%")]
volume_commitment: u64,
/// When creating snapshots for an existing volume, each replica pool must have at least
/// this much free space percentage of the volume size.
/// Example: if this value is 40, the pool has 40GiB free, then the max volume size allowed
/// to be snapped on the pool is 100GiB.
#[clap(long, env = "SNAPSHOT_COMMITMENT_%", value_parser = value_parse_percent, default_value = "40%")]
snapshot_commitment: u64,
/// Same as the `volume_commitment` argument, but applicable only when creating replicas for
/// a new volume.
#[clap(long, env = "VOLUME_COMMITMENT_%_INITIAL", value_parser = value_parse_percent, default_value = "40%")]
Expand Down
Loading

0 comments on commit 6b7a416

Please sign in to comment.