diff --git a/src/client/dfs/dfs.c b/src/client/dfs/dfs.c index cb8bd81ea93..ea6bf1b12f1 100644 --- a/src/client/dfs/dfs.c +++ b/src/client/dfs/dfs.c @@ -1078,6 +1078,10 @@ entry_stat(dfs_t *dfs, daos_handle_t th, daos_handle_t oh, const char *name, siz } rc = daos_array_close(file_oh, NULL); + /* Try again in case of out of memory. Needed to avoid memory leak in + * NLT fault injection */ + if (rc == -DER_NOMEM) + rc = daos_array_close(file_oh, NULL); if (rc) return daos_der2errno(rc); } diff --git a/src/client/dfuse/pil4dfs/int_dfs.c b/src/client/dfuse/pil4dfs/int_dfs.c index 9b91b429331..6b450e55972 100644 --- a/src/client/dfuse/pil4dfs/int_dfs.c +++ b/src/client/dfuse/pil4dfs/int_dfs.c @@ -274,7 +274,7 @@ rec_free(struct d_hash_table *htable, d_list_t *rlink) rc = dfs_release(hdl->oh); if (rc == ENOMEM) - rc = dfs_release(hdl->oh); + dfs_release(hdl->oh); if (rc) D_ERROR("dfs_release() failed: %d (%s)\n", rc, strerror(rc)); D_FREE(hdl); @@ -315,7 +315,7 @@ lookup_insert_dir(struct dfs_mt *mt, const char *name, size_t len, dfs_obj_t **o dfs_obj_t *oh; d_list_t *rlink; mode_t mode; - int rc; + int rc, rc2; /* TODO: Remove this after testing. */ D_ASSERT(strlen(name) == len); @@ -333,7 +333,9 @@ lookup_insert_dir(struct dfs_mt *mt, const char *name, size_t len, dfs_obj_t **o if (!S_ISDIR(mode)) { /* Not a directory, return ENOENT*/ - dfs_release(oh); + rc = dfs_release(oh); + if (rc == ENOMEM) + dfs_release(oh); return ENOTDIR; } @@ -357,7 +359,9 @@ lookup_insert_dir(struct dfs_mt *mt, const char *name, size_t len, dfs_obj_t **o return 0; out_release: - dfs_release(oh); + rc2 = dfs_release(oh); + if (rc2 == ENOMEM) + dfs_release(oh); D_FREE(hdl); return rc; } @@ -619,8 +623,10 @@ discover_daos_mount_with_env(void) D_FATAL("DAOS_CONTAINER is not set.\n"); D_GOTO(out, rc = EINVAL); } - D_STRNDUP(dfs_list[num_dfs].fs_root, fs_root, len_fs_root); + /* Added to avoid issues due to fault injection. Try again in case of ENOMEM */ + if (dfs_list[num_dfs].fs_root == NULL) + D_STRNDUP(dfs_list[num_dfs].fs_root, fs_root, len_fs_root); if (dfs_list[num_dfs].fs_root == NULL) D_GOTO(out, rc = ENOMEM); @@ -1381,6 +1387,8 @@ free_fd(int idx) if (saved_obj) { rc = dfs_release(saved_obj->file); + if (rc == ENOMEM) + dfs_release(saved_obj->file); /** This memset() is not necessary. It is left here intended. In case of duplicated * fd exists, multiple fd could point to same struct file_obj. struct file_obj is * supposed to be freed only when reference count reaches zero. With zeroing out @@ -1391,8 +1399,6 @@ free_fd(int idx) */ memset(saved_obj, 0, sizeof(struct file_obj)); D_FREE(saved_obj); - if (rc) - D_ERROR("dfs_release() failed: %d (%s)\n", rc, strerror(rc)); } } @@ -1429,8 +1435,8 @@ free_dirfd(int idx) D_FREE(saved_obj->path); D_FREE(saved_obj->ents); rc = dfs_release(saved_obj->dir); - if (rc) - D_ERROR("dfs_release() failed: %d (%s)\n", rc, strerror(rc)); + if (rc == ENOMEM) + dfs_release(saved_obj->dir); /** This memset() is not necessary. It is left here intended. In case of duplicated * fd exists, multiple fd could point to same struct dir_obj. struct dir_obj is * supposed to be freed only when reference count reaches zero. With zeroing out @@ -2641,19 +2647,19 @@ opendir(const char *path) D_ALLOC_ARRAY(dir_list[idx_dirfd]->ents, READ_DIR_BATCH_SIZE); if (dir_list[idx_dirfd]->ents == NULL) { free_dirfd(idx_dirfd); - D_GOTO(out_err, rc = ENOMEM); + D_GOTO(out_err_ret, rc = ENOMEM); } /* allocate memory for path[]. */ D_ASPRINTF(dir_list[idx_dirfd]->path, "%s%s", dfs_mt->fs_root, full_path); if (dir_list[idx_dirfd]->path == NULL) { free_dirfd(idx_dirfd); - D_GOTO(out_err, rc = ENOMEM); + D_GOTO(out_err_ret, rc = ENOMEM); } if (strnlen(dir_list[idx_dirfd]->path, DFS_MAX_PATH) >= DFS_MAX_PATH) { D_DEBUG(DB_ANY, "path is longer than DFS_MAX_PATH: %d (%s)\n", ENAMETOOLONG, strerror(ENAMETOOLONG)); free_dirfd(idx_dirfd); - D_GOTO(out_err, rc = ENAMETOOLONG); + D_GOTO(out_err_ret, rc = ENAMETOOLONG); } FREE(parent_dir); @@ -3197,6 +3203,8 @@ readlink(const char *path, char *buf, size_t size) if (rc) goto out_release; rc = dfs_release(obj); + if (rc == ENOMEM) + rc = dfs_release(obj); if (rc) goto out_err; /* The NULL at the end should not be included in the length */ @@ -3209,8 +3217,8 @@ readlink(const char *path, char *buf, size_t size) out_release: rc2 = dfs_release(obj); - if (rc2) - D_ERROR("dfs_release() failed: %d (%s)\n", rc2, strerror(rc2)); + if (rc2 == ENOMEM) + dfs_release(obj); out_err: FREE(parent_dir); @@ -3627,8 +3635,8 @@ rename(const char *old_name, const char *new_name) D_FREE(buff); errno_save = rc; rc = dfs_release(obj_old); - if (rc) - D_ERROR("dfs_release() failed: %d (%s)\n", rc, strerror(rc)); + if (rc == ENOMEM) + dfs_release(obj_old); errno = errno_save; return (-1); @@ -3639,8 +3647,8 @@ rename(const char *old_name, const char *new_name) fclose(fIn); errno_save = rc; rc = dfs_release(obj_new); - if (rc) - D_ERROR("dfs_release() failed: %d (%s)\n", rc, strerror(rc)); + if (rc == ENOMEM) + dfs_release(obj_new); errno = errno_save; return (-1); @@ -5498,7 +5506,7 @@ finalize_dfs(void) continue; } rc = daos_pool_disconnect(dfs_list[i].poh, NULL); - if (rc != 0) { + if (rc != -DER_SUCCESS) { D_ERROR("error in daos_pool_disconnect(%s): " DF_RC "\n", dfs_list[i].fs_root, DP_RC(rc)); continue; diff --git a/src/object/cli_obj.c b/src/object/cli_obj.c index 8bf5f39d102..df6e22f34f4 100644 --- a/src/object/cli_obj.c +++ b/src/object/cli_obj.c @@ -1414,6 +1414,8 @@ dc_obj_open(tse_task_t *task) args = dc_task_get_args(task); D_ASSERTF(args != NULL, "Task Argument OPC does not match DC OPC\n"); + /* initialize with an invalid handle */ + (*args->oh).cookie = 0; obj = obj_alloc(); if (obj == NULL) diff --git a/utils/node_local_test.py b/utils/node_local_test.py index 0b2d151b6de..63fc50a5479 100755 --- a/utils/node_local_test.py +++ b/utils/node_local_test.py @@ -5516,6 +5516,7 @@ def test_alloc_pil4dfs_ls(server, conf, wf): test_cmd.use_pil4dfs(container) test_cmd.check_daos_stderr = False test_cmd.check_post_stdout = False + test_cmd.ignore_busy = True return test_cmd.launch() @@ -5877,7 +5878,7 @@ def run(wf, args): fatal_errors.add_result(test_alloc_cont_create(server, conf, wf_client)) # Disabled for now because of errors - # fatal_errors.add_result(test_alloc_pil4dfs_ls(server, conf, wf_client)) + fatal_errors.add_result(test_alloc_pil4dfs_ls(server, conf, wf_client)) wf_client.close()