-
Notifications
You must be signed in to change notification settings - Fork 1.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Unable to handle kernel NULL pointer #5295
Comments
This seem's to be related to a [z_upgrade] thread |
Same system after recompiling with --enable-debug [ 1194.503581] VERIFY(!dmu_objset_is_dirty(os, t)) failed |
aren't z_upgrade and zfs rollback compatible ? |
for now i'm disabling zfs receive until upgrade is finished :-( |
The first stack trace looks like that the objset has been freed. @leelists for the 2nd stack trace, is upgrade task still running? I will try to reproduce it on my local node. |
'zfs recv' could disown a living objset without calling dmu_objset_disown(). This will cause the problem that the objset would be released while upgrading thread is still running. This patch calls dmu_objset_upgrade_stop() in dmu_recv_end(). ZoL-bug-id: openzfs#5295 Signed-off-by: Jinshan Xiong <[email protected]>
'zfs recv' could disown a living objset without calling dmu_objset_disown(). This will cause the problem that the objset would be released while upgrading thread is still running. This patch avoids the problem by checking if a dataset is snapshot before calling dmu_objset_userobjspace_upgrade(). ZoL-bug-id: openzfs#5295 Signed-off-by: Jinshan Xiong <[email protected]>
I recently also hit this issue on the 0.7.2 and reproduced it with following scenario:
The
occasionally it causes the same error as reported by @leelists. This also may happen when the rollback is performed as it calls For testing purposes I patched the ZoL source to return EBUSY when there is an upgradeable objset on dirty dataset and it resolved the issue. The upgrade task can't be stopped here because forcing synchronization causes a deadlock in this path. For the rollback code it seems to be safe to execute diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h
index 11b8fc6..e53aa00 100644
--- a/include/sys/dmu_objset.h
+++ b/include/sys/dmu_objset.h
@@ -212,6 +212,7 @@ void dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx);
void dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx);
boolean_t dmu_objset_userused_enabled(objset_t *os);
int dmu_objset_userspace_upgrade(objset_t *os);
+void dmu_objset_upgrade_stop(objset_t *os);
boolean_t dmu_objset_userspace_present(objset_t *os);
boolean_t dmu_objset_userobjused_enabled(objset_t *os);
boolean_t dmu_objset_userobjspace_upgradable(objset_t *os);
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index 609e43f..ab837ec 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -82,7 +82,6 @@ int dmu_rescan_dnode_threshold = 1 << DN_MAX_INDBLKSHIFT;
static void dmu_objset_find_dp_cb(void *arg);
static void dmu_objset_upgrade(objset_t *os, dmu_objset_upgrade_cb_t cb);
-static void dmu_objset_upgrade_stop(objset_t *os);
void
dmu_objset_init(void)
@@ -1334,7 +1333,7 @@ dmu_objset_upgrade(objset_t *os, dmu_objset_upgrade_cb_t cb)
mutex_exit(&os->os_upgrade_lock);
}
-static void
+void
dmu_objset_upgrade_stop(objset_t *os)
{
mutex_enter(&os->os_upgrade_lock);
diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
index 1984e71..dc2a49f 100644
--- a/module/zfs/dmu_send.c
+++ b/module/zfs/dmu_send.c
@@ -3831,10 +3831,18 @@ dmu_recv_end_check(void *arg, dmu_tx_t *tx)
if (!drc->drc_newfs) {
dsl_dataset_t *origin_head;
+ objset_t* os = NULL;
error = dsl_dataset_hold(dp, drc->drc_tofs, FTAG, &origin_head);
if (error != 0)
return (error);
+
+ os = origin_head->ds_objset;
+ if (os && dmu_objset_userobjspace_upgradable(os)
+ && dsl_dataset_is_dirty(origin_head)) {
+ return (SET_ERROR(EBUSY));
+ }
+
if (drc->drc_force) {
/*
* We will destroy any snapshots in tofs (i.e. before
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index 5f333a4..8c0b923 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -3751,6 +3751,7 @@ zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
dsl_dataset_t *ds;
ds = dmu_objset_ds(zfsvfs->z_os);
+ dmu_objset_upgrade_stop(zfsvfs->z_os);
error = zfs_suspend_fs(zfsvfs);
if (error == 0) {
int resume_err; |
@ab-oe thanks for investigating this. Since the upgrade only runs when a filesystem is mounted and thus has a long hold I would have thought this case would already be covered by the |
@behlendorf I checked the
There are no other long holds/releases for tested dataset. The |
@behlendorf I secured the upgrade code with long hold instead of previous work-around and it also works well. Tests are running for 3h now without any crash. The previous work-around also led to some race conditions that ends with:
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index 609e43f..3dc320f 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -64,6 +64,8 @@
*/
krwlock_t os_lock;
+static char *upgrade_tag = "z_upgrade";
+
/*
* Tunable to overwrite the maximum number of threads for the parallelization
* of dmu_objset_find_dp, needed to speed up the import of pools with many
@@ -1313,6 +1315,7 @@ dmu_objset_upgrade_task_cb(void *data)
os->os_upgrade_exit = B_TRUE;
os->os_upgrade_id = 0;
mutex_exit(&os->os_upgrade_lock);
+ dsl_dataset_long_rele(dmu_objset_ds(os), upgrade_tag);
}
static void
@@ -1321,6 +1324,7 @@ dmu_objset_upgrade(objset_t *os, dmu_objset_upgrade_cb_t cb)
if (os->os_upgrade_id != 0)
return;
+ dsl_dataset_long_hold(dmu_objset_ds(os), upgrade_tag);
mutex_enter(&os->os_upgrade_lock);
if (os->os_upgrade_id == 0 && os->os_upgrade_status == 0) {
os->os_upgrade_exit = B_FALSE;
@@ -1328,8 +1332,10 @@ dmu_objset_upgrade(objset_t *os, dmu_objset_upgrade_cb_t cb)
os->os_upgrade_id = taskq_dispatch(
os->os_spa->spa_upgrade_taskq,
dmu_objset_upgrade_task_cb, os, TQ_SLEEP);
- if (os->os_upgrade_id == TASKQID_INVALID)
+ if (os->os_upgrade_id == TASKQID_INVALID) {
+ dsl_dataset_long_rele(dmu_objset_ds(os), upgrade_tag);
os->os_upgrade_status = ENOMEM;
+ }
}
mutex_exit(&os->os_upgrade_lock);
} |
@ab-oe OK, that makes sense. Once you're happy with your local tests it would be great if you could open a PR for review with the long hold version of the fix. There existing a similar test case in |
Ok. I can prepare the PR with fix. Could you give me any tip how to make sure that the upgrade is running on objset during the send/receive? |
I'd suggest using |
@behlendorf thank you but I still have problems with this test. I have no idea how to make upgrade task run for long enough with the testing environment restrictions. Could you give me more clues? |
@ab-oe I see. We could potentially use |
@behlendorf thank you. I already tried with delay but it seems that the upgrade starts much later and it doesn't affect time of upgrade itself or I can't catch this. However the receive/rollback always ended with success instead of returning busy error. I got one more question. This mechanism handles the case when we do rollback/receive while upgrade is running. What about opposite situation when the ugprade is started while receive/rollback is in progress? |
@ab-oe good question. In the opposite case I don't believe there's an issue since an upgrade can't be started on a snapshot which is what both |
@behlendorf I thought about origin dataset here not about the snapshot. If the upgrade starts after the |
Yes, but I don't believe the origin dataset can have an upgrade running at this time so it should be safe to evict. |
@behlendorf I understand. Thank you for clearing this. |
If the receive or rollback is performed while filesystem is upgrading the objset may be evicted in `dsl_dataset_clone_swap_sync_impl`. This will lead to NULL pointer dereference when upgrade tries to access evicted objset. This commit adds long hold of dataset during whole upgrade process. The receive and rollback will return an EBUSY error until the upgrade is not finished. Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Arkadiusz Bubała <[email protected]> Closes #5295 Closes #6837
If the receive or rollback is performed while filesystem is upgrading the objset may be evicted in `dsl_dataset_clone_swap_sync_impl`. This will lead to NULL pointer dereference when upgrade tries to access evicted objset. This commit adds long hold of dataset during whole upgrade process. The receive and rollback will return an EBUSY error until the upgrade is not finished. Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Arkadiusz Bubała <[email protected]> Closes openzfs#5295 Closes openzfs#6837
If the receive or rollback is performed while filesystem is upgrading the objset may be evicted in `dsl_dataset_clone_swap_sync_impl`. This will lead to NULL pointer dereference when upgrade tries to access evicted objset. This commit adds long hold of dataset during whole upgrade process. The receive and rollback will return an EBUSY error until the upgrade is not finished. Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Arkadiusz Bubała <[email protected]> Closes openzfs#5295 Closes openzfs#6837
If the receive or rollback is performed while filesystem is upgrading the objset may be evicted in `dsl_dataset_clone_swap_sync_impl`. This will lead to NULL pointer dereference when upgrade tries to access evicted objset. This commit adds long hold of dataset during whole upgrade process. The receive and rollback will return an EBUSY error until the upgrade is not finished. Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Arkadiusz Bubała <[email protected]> Closes openzfs#5295 Closes openzfs#6837
If the receive or rollback is performed while filesystem is upgrading the objset may be evicted in `dsl_dataset_clone_swap_sync_impl`. This will lead to NULL pointer dereference when upgrade tries to access evicted objset. This commit adds long hold of dataset during whole upgrade process. The receive and rollback will return an EBUSY error until the upgrade is not finished. Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Arkadiusz Bubała <[email protected]> Closes openzfs#5295 Closes openzfs#6837
After porting the fix for openzfs#5295 over to illumos, we started hitting an assertion failure when running the testsuite: assertion failed: rc->rc_count == number, file: .../refcount.c and the unexpected hold has this stack: dsl_dataset_long_hold+0x59 dmu_objset_upgrade+0x73 dmu_objset_id_quota_upgrade+0x15 dmu_objset_own+0x14f The simplest reproducer for this in illumos is zpool create -f -O version=1 testpool c3t0d0; zpool destroy testpool which is run as part of the zpool_create_tempname test, but I can't get this to trigger on FreeBSD. This appears to be because of the call to txg_wait_synced() in dmu_objset_upgrade_stop() (which was missing in illumos), slows down dmu_objset_disown() enough to avoid the condition. Signed-off-by: Andy Fiddaman <[email protected]>
After porting the fix for openzfs#5295 over to illumos, we started hitting an assertion failure when running the testsuite: assertion failed: rc->rc_count == number, file: .../refcount.c and the unexpected hold has this stack: dsl_dataset_long_hold+0x59 dmu_objset_upgrade+0x73 dmu_objset_id_quota_upgrade+0x15 dmu_objset_own+0x14f The simplest reproducer for this in illumos is zpool create -f -O version=1 testpool c3t0d0; zpool destroy testpool which is run as part of the zpool_create_tempname test, but I can't get this to trigger on FreeBSD. This appears to be because of the call to txg_wait_synced() in dmu_objset_upgrade_stop() (which was missing in illumos), slows down dmu_objset_disown() enough to avoid the condition. Signed-off-by: Andy Fiddaman <[email protected]>
After porting the fix for openzfs#5295 over to illumos, we started hitting an assertion failure when running the testsuite: assertion failed: rc->rc_count == number, file: .../refcount.c and the unexpected hold has this stack: dsl_dataset_long_hold+0x59 dmu_objset_upgrade+0x73 dmu_objset_id_quota_upgrade+0x15 dmu_objset_own+0x14f The simplest reproducer for this in illumos is zpool create -f -O version=1 testpool c3t0d0; zpool destroy testpool which is run as part of the zpool_create_tempname test, but I can't get this to trigger on FreeBSD. This appears to be because of the call to txg_wait_synced() in dmu_objset_upgrade_stop() (which was missing in illumos), slows down dmu_objset_disown() enough to avoid the condition. Signed-off-by: Andy Fiddaman <[email protected]>
After porting the fix for #5295 over to illumos, we started hitting an assertion failure when running the testsuite: assertion failed: rc->rc_count == number, file: .../refcount.c and the unexpected hold has this stack: dsl_dataset_long_hold+0x59 dmu_objset_upgrade+0x73 dmu_objset_id_quota_upgrade+0x15 dmu_objset_own+0x14f The simplest reproducer for this in illumos is zpool create -f -O version=1 testpool c3t0d0; zpool destroy testpool which is run as part of the zpool_create_tempname test, but I can't get this to trigger on FreeBSD. This appears to be because of the call to txg_wait_synced() in dmu_objset_upgrade_stop() (which was missing in illumos), slows down dmu_objset_disown() enough to avoid the condition. Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Andy Fiddaman <[email protected]> Closes #11368
After porting the fix for openzfs#5295 over to illumos, we started hitting an assertion failure when running the testsuite: assertion failed: rc->rc_count == number, file: .../refcount.c and the unexpected hold has this stack: dsl_dataset_long_hold+0x59 dmu_objset_upgrade+0x73 dmu_objset_id_quota_upgrade+0x15 dmu_objset_own+0x14f The simplest reproducer for this in illumos is zpool create -f -O version=1 testpool c3t0d0; zpool destroy testpool which is run as part of the zpool_create_tempname test, but I can't get this to trigger on FreeBSD. This appears to be because of the call to txg_wait_synced() in dmu_objset_upgrade_stop() (which was missing in illumos), slows down dmu_objset_disown() enough to avoid the condition. Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Andy Fiddaman <[email protected]> Closes openzfs#11368
After porting the fix for #5295 over to illumos, we started hitting an assertion failure when running the testsuite: assertion failed: rc->rc_count == number, file: .../refcount.c and the unexpected hold has this stack: dsl_dataset_long_hold+0x59 dmu_objset_upgrade+0x73 dmu_objset_id_quota_upgrade+0x15 dmu_objset_own+0x14f The simplest reproducer for this in illumos is zpool create -f -O version=1 testpool c3t0d0; zpool destroy testpool which is run as part of the zpool_create_tempname test, but I can't get this to trigger on FreeBSD. This appears to be because of the call to txg_wait_synced() in dmu_objset_upgrade_stop() (which was missing in illumos), slows down dmu_objset_disown() enough to avoid the condition. Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Andy Fiddaman <[email protected]> Closes #11368
After porting the fix for openzfs#5295 over to illumos, we started hitting an assertion failure when running the testsuite: assertion failed: rc->rc_count == number, file: .../refcount.c and the unexpected hold has this stack: dsl_dataset_long_hold+0x59 dmu_objset_upgrade+0x73 dmu_objset_id_quota_upgrade+0x15 dmu_objset_own+0x14f The simplest reproducer for this in illumos is zpool create -f -O version=1 testpool c3t0d0; zpool destroy testpool which is run as part of the zpool_create_tempname test, but I can't get this to trigger on FreeBSD. This appears to be because of the call to txg_wait_synced() in dmu_objset_upgrade_stop() (which was missing in illumos), slows down dmu_objset_disown() enough to avoid the condition. Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Andy Fiddaman <[email protected]> Closes openzfs#11368
After porting the fix for openzfs#5295 over to illumos, we started hitting an assertion failure when running the testsuite: assertion failed: rc->rc_count == number, file: .../refcount.c and the unexpected hold has this stack: dsl_dataset_long_hold+0x59 dmu_objset_upgrade+0x73 dmu_objset_id_quota_upgrade+0x15 dmu_objset_own+0x14f The simplest reproducer for this in illumos is zpool create -f -O version=1 testpool c3t0d0; zpool destroy testpool which is run as part of the zpool_create_tempname test, but I can't get this to trigger on FreeBSD. This appears to be because of the call to txg_wait_synced() in dmu_objset_upgrade_stop() (which was missing in illumos), slows down dmu_objset_disown() enough to avoid the condition. Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Andy Fiddaman <[email protected]> Closes openzfs#11368
Kernel Oops with
spl 0.7.0-rc1_8_g0d26756
zfs 0.7.0-rc1_125_gb60eac3
on ubuntu 16.06 / 4.4.0-43-generic
this system is a zfs receiver and an rsync target
[ 1980.612566] BUG: unable to handle kernel NULL pointer dereference at (null)
[ 1980.612570] IP: [] __mutex_lock_slowpath+0x98/0x130
[ 1980.612574] PGD 0
[ 1980.612575] Oops: 0002 [#4] SMP
[ 1980.612577] Modules linked in: ipt_REJECT nf_reject_ipv4 xt_multiport iptable_filter ip_tables x_tables 8021q garp mrp vxlan ip6_udp_tunnel udp_tunnel bridge stp llc binfmt_misc zfs(POE) zunicode(POE) icp(POE) zcommon(POE) znvpair(POE) spl(OE) zavl(POE) kvm_intel kvm ppdev irqbypass crct10dif_pclmul crc32_pclmul aesni_intel aes_x86_64 lrw gf128mul glue_helper ablk_helper serio_raw cryptd parport_pc parport autofs4 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid0 multipath linear raid10 raid1 e1000e psmouse ahci libahci ptp pps_core
[ 1980.612609] CPU: 1 PID: 885 Comm: z_upgrade Tainted: P D OE 4.4.0-43-generic #63-Ubuntu
[ 1980.612611] Hardware name: /DH67BL, BIOS BLH6710H.86A.0160.2012.1204.1156 12/04/2012
[ 1980.612612] task: ffff8802142bd280 ti: ffff8800d4a8c000 task.ti: ffff8800d4a8c000
[ 1980.612613] RIP: 0010:[] [] __mutex_lock_slowpath+0x98/0x130
[ 1980.612615] RSP: 0018:ffff8800d4a8fd80 EFLAGS: 00010282
[ 1980.612616] RAX: 0000000000000000 RBX: ffff8800bd635510 RCX: 0000000000000001
[ 1980.612617] RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff8800bd635514
[ 1980.612618] RBP: ffff8800d4a8fdd0 R08: 0000000000000002 R09: 0000000100150003
[ 1980.612619] R10: ffff88021f3a0300 R11: 0000000000000000 R12: ffff8800bd635514
[ 1980.612620] R13: ffff8802142bd280 R14: 00000000ffffffff R15: ffff8800bd635518
[ 1980.612621] FS: 0000000000000000(0000) GS:ffff88021ea80000(0000) knlGS:0000000000000000
[ 1980.612622] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1980.612623] CR2: 0000000000000000 CR3: 000000021de0a000 CR4: 00000000000406e0
[ 1980.612624] Stack:
[ 1980.612625] ffff8800bd635518 0000000000000000 ffff8801c8ac3d80 000000003267c79e
[ 1980.612627] 00000000c0284e3a ffff8800bd635510 ffff8800bd635538 ffff8800bd635510
[ 1980.612629] ffff880210db2c60 ffff88021f0da100 ffff8800d4a8fde8 ffffffff8182f73f
[ 1980.612630] Call Trace:
[ 1980.612633] [] mutex_lock+0x1f/0x30
[ 1980.612668] [] dmu_objset_upgrade_task_cb+0x27/0xb0 [zfs]
[ 1980.612673] [] taskq_thread+0x255/0x440 [spl]
[ 1980.612677] [] ? wake_up_q+0x70/0x70
[ 1980.612680] [] ? taskq_cancel_id+0x130/0x130 [spl]
[ 1980.612682] [] kthread+0xd8/0xf0
[ 1980.612684] [] ? kthread_create_on_node+0x1e0/0x1e0
[ 1980.612686] [] ret_from_fork+0x3f/0x70
[ 1980.612687] [] ? kthread_create_on_node+0x1e0/0x1e0
[ 1980.612688] Code: e8 ae 1f 00 00 8b 03 83 f8 01 0f 84 94 00 00 00 48 8b 43 10 4c 8d 7b 08 48 89 63 10 41 be ff ff ff ff 4c 89 3c 24 48 89 44 24 08 <48> 89 20 4c 89 6c 24 10 eb 1f 49 c7 45 00 02 00 00 00 4c 89 e7
[ 1980.612706] RIP [] __mutex_lock_slowpath+0x98/0x130
[ 1980.612708] RSP
[ 1980.612708] CR2: 0000000000000000
[ 1980.612710] ---[ end trace bc05199d55331642 ]---
The text was updated successfully, but these errors were encountered: