Skip to content

Commit

Permalink
NAS-130440 / 24.10 / fix pool.replace regression (introduced in 026691d
Browse files Browse the repository at this point in the history
…) (#14151)

* revert 026691d

* remove unused import and improve logs

* update and improve pool.replace api test
  • Loading branch information
yocalebo authored Aug 6, 2024
1 parent c416bf7 commit 69da705
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 83 deletions.
18 changes: 5 additions & 13 deletions src/middlewared/middlewared/plugins/pool_/replace_disk.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import errno
import os

from middlewared.schema import accepts, Bool, Dict, Int, returns, Str
from middlewared.service import item_method, job, Service, ValidationErrors
Expand Down Expand Up @@ -46,23 +45,19 @@ async def replace(self, job, oid, options):
verrors = ValidationErrors()
unused_disks = await self.middleware.call('disk.get_unused')
if not (disk := list(filter(lambda x: x['identifier'] == options['disk'], unused_disks))):
verrors.add('options.disk', 'Disk not found.', errno.ENOENT)
verrors.add('options.disk', f'Disk {options["disk"]!r} not found.', errno.ENOENT)
else:
disk = disk[0]
if not options['force'] and not await self.middleware.call('disk.check_clean', disk['devname']):
verrors.add('options.force', 'Disk is not clean, partitions were found.')
verrors.add('options.force', f'Disk {options["disk"]!r} is not clean, partitions were found.')

if not (found := await self.middleware.call('pool.find_disk_from_topology', options['label'], pool, {
'include_siblings': True,
})):
if not await self.middleware.call(
'pool.find_disk_from_topology', options['label'], pool, {'include_siblings': True}
):
verrors.add('options.label', f'Label {options["label"]} not found.', errno.ENOENT)

verrors.check()

from_disk = None
if found[1] and await self.middleware.run_in_thread(os.path.exists, found[1]['path']):
from_disk = await self.middleware.call('disk.label_to_disk', found[1]['path'].replace('/dev/', ''))

vdev = []
await self.middleware.call('pool.format_disks', job, {
disk['devname']: {
Expand All @@ -77,9 +72,6 @@ async def replace(self, job, oid, options):
await self.middleware.call('zfs.pool.replace', pool['name'], options['label'], new_devname)
except Exception:
raise
else:
if from_disk:
await self.middleware.call('disk.wipe', from_disk, 'QUICK')

if options['preserve_settings']:
filters = [['zfs_guid', '=', options['label']]]
Expand Down
31 changes: 11 additions & 20 deletions src/middlewared/middlewared/test/integration/assets/pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,19 @@
from middlewared.service_exception import InstanceNotFound
from middlewared.test.integration.utils import call, fail, pool


mirror_topology = (2, lambda disks: {
"data": [
{"type": "MIRROR", "disks": disks[0:2]}
],
_1_disk_stripe_topology = (1, lambda disks: {
"data": [{"type": "STRIPE", "disks": disks[0:1]}],
})
_2_disk_mirror_topology = (2, lambda disks: {
"data": [{"type": "MIRROR", "disks": disks[0:2]}],
})
_4_disk_raidz2_topology = (4, lambda disks: {
"data": [{"type": "RAIDZ2", "disks": disks[0:4]}],
})

another_pool_topologies = [
(1, lambda disks: {
"data": [
{"type": "STRIPE", "disks": disks[0:1]},
],
}),
mirror_topology,
(4, lambda disks: {
"data": [
{
"type": "RAIDZ2",
"disks": disks[0:4]
}
],
}),
_1_disk_stripe_topology,
_2_disk_mirror_topology,
_4_disk_raidz2_topology,
]


Expand Down
101 changes: 51 additions & 50 deletions tests/api2/test_pool_replace_disk.py
Original file line number Diff line number Diff line change
@@ -1,67 +1,68 @@
import pytest

from time import sleep
from middlewared.test.integration.assets.pool import another_pool_topologies, another_pool
from middlewared.test.integration.utils import call


def disks(topology):
flat = call("pool.flatten_topology", topology)
return [vdev for vdev in flat if vdev["type"] == "DISK"]
import pytest

from middlewared.test.integration.assets.pool import _2_disk_mirror_topology, _4_disk_raidz2_topology, another_pool
from middlewared.test.integration.utils import call

@pytest.mark.parametrize("topology", another_pool_topologies[1:])
@pytest.mark.parametrize("i", list(range(0, max(topology[0] for topology in another_pool_topologies))))
def test_pool_replace_disk(topology, i):
count = topology[0]
if i >= count:
return

with another_pool(topology=topology) as pool:
assert len(disks(pool["topology"])) == count
@pytest.mark.parametrize("topology", [_2_disk_mirror_topology, _4_disk_raidz2_topology])
def test_pool_replace_disk(topology):
"""This tests the following:
1. create a zpool based on the `topology`
2. flatten the newly created zpools topology
3. verify the zpool vdev size matches reality
4. choose 1st vdev from newly created zpool
5. choose 1st disk in vdev from step #4
6. choose 1st disk in disk.get_unused as replacement disk
7. call pool.replace using disk from step #5 with disk from step #6
8. validate that the disk being replaced still has zfs partitions
9. validate pool.get_instance topology info shows the replacement disk
10. validate disk.get_instance associates the replacement disk with the zpool
"""
with another_pool(topology=topology) as pool: # step 1
# step 2
flat_top = call("pool.flatten_topology", pool["topology"])
pool_top = [vdev for vdev in flat_top if vdev["type"] == "DISK"]
# step 3
assert len(pool_top) == topology[0]

to_replace_vdev = disks(pool["topology"])[i]
to_replace_disk = call("disk.query", [["devname", "=", to_replace_vdev["disk"]]],
{"get": True, "extra": {"pools": True}})
# step 4
to_replace_vdev = pool_top[0]
# step 5
to_replace_disk = call(
"disk.query", [["devname", "=", to_replace_vdev["disk"]]], {"get": True, "extra": {"pools": True}}
)
assert to_replace_disk["pool"] == pool["name"]

# step 6
new_disk = call("disk.get_unused")[0]

call("pool.replace", pool["id"], {
"label": to_replace_vdev["guid"],
"disk": new_disk["identifier"],
"force": True,
}, job=True)
# step 7
call("pool.replace", pool["id"], {"label": to_replace_vdev["guid"], "disk": new_disk["identifier"]}, job=True)

# step 8
assert call("disk.gptid_from_part_type", to_replace_disk["devname"], call("disk.get_zfs_part_type"))

# Sometimes the VM is slow so look 10 times with 1 second in between
# step 9
found = False
for _ in range(10):
pool = call("pool.get_instance", pool["id"])
if len(disks(pool["topology"])) == count:
break
sleep(1)
if not found:
for i in call("pool.flatten_topology", call("pool.get_instance", pool["id"])["topology"]):
if i["type"] == "DISK" and i["disk"] == new_disk["devname"]:
found = True
break
else:
sleep(1)

assert len(disks(pool["topology"])) == count
assert disks(pool["topology"])[i]["disk"] == new_disk["devname"]
assert found, f'Failed to detect replacement disk {new_disk["devname"]!r} in zpool {pool["name"]!r}'

# this is flakey on our VMs as well, give it a bit of time before we assert
new = to_replace = None
# step 10 (NOTE: disk.sync_all takes awhile so we retry a few times here)
for _ in range(30):
if all((new, to_replace)):
cmd = ("disk.get_instance", new_disk["identifier"], {"extra": {"pools": True}})
if call(*cmd)["pool"] == pool["name"]:
break
elif new is None:
p = call("disk.get_instance", new_disk["identifier"], {"extra": {"pools": True}})
if p["pool"] == pool["name"]:
new = True
else:
sleep(1)
elif to_replace is None:
t = call("disk.get_instance", to_replace_disk["identifier"], {"extra": {"pools": True}})
if t["pool"] is None:
to_replace = True
else:
sleep(1)
else:
sleep(1)
else:
if new is None:
assert False, f'disk.get_instance failed on {new_disk["identifier"]!r}'
if to_replace is None:
assert False, f'disk.get_instance failed on {to_replace["identifier"]!r}'
assert False, f"{' '.join(cmd)} failed to update with pool information"

0 comments on commit 69da705

Please sign in to comment.