diff --git a/lib/libzfs/Makefile.am b/lib/libzfs/Makefile.am index 9a32a3013082..f1260ea7195d 100644 --- a/lib/libzfs/Makefile.am +++ b/lib/libzfs/Makefile.am @@ -15,7 +15,6 @@ USER_C = \ libzfs_dataset.c \ libzfs_diff.c \ libzfs_fru.c \ - libzfs_graph.c \ libzfs_import.c \ libzfs_iter.c \ libzfs_mount.c \ diff --git a/lib/libzfs/libzfs_graph.c b/lib/libzfs/libzfs_graph.c deleted file mode 100644 index a398514c16cf..000000000000 --- a/lib/libzfs/libzfs_graph.c +++ /dev/null @@ -1,653 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Iterate over all children of the current object. This includes the normal - * dataset hierarchy, but also arbitrary hierarchies due to clones. We want to - * walk all datasets in the pool, and construct a directed graph of the form: - * - * home - * | - * +----+----+ - * | | - * v v ws - * bar baz | - * | | - * v v - * @yesterday ----> foo - * - * In order to construct this graph, we have to walk every dataset in the pool, - * because the clone parent is stored as a property of the child, not the - * parent. The parent only keeps track of the number of clones. - * - * In the normal case (without clones) this would be rather expensive. To avoid - * unnecessary computation, we first try a walk of the subtree hierarchy - * starting from the initial node. At each dataset, we construct a node in the - * graph and an edge leading from its parent. If we don't see any snapshots - * with a non-zero clone count, then we are finished. - * - * If we do find a cloned snapshot, then we finish the walk of the current - * subtree, but indicate that we need to do a complete walk. We then perform a - * global walk of all datasets, avoiding the subtree we already processed. - * - * At the end of this, we'll end up with a directed graph of all relevant (and - * possible some irrelevant) datasets in the system. We need to both find our - * limiting subgraph and determine a safe ordering in which to destroy the - * datasets. We do a topological ordering of our graph starting at our target - * dataset, and then walk the results in reverse. - * - * It's possible for the graph to have cycles if, for example, the user renames - * a clone to be the parent of its origin snapshot. The user can request to - * generate an error in this case, or ignore the cycle and continue. - * - * When removing datasets, we want to destroy the snapshots in chronological - * order (because this is the most efficient method). In order to accomplish - * this, we store the creation transaction group with each vertex and keep each - * vertex's edges sorted according to this value. The topological sort will - * automatically walk the snapshots in the correct order. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "libzfs_impl.h" -#include "zfs_namecheck.h" - -#define MIN_EDGECOUNT 4 - -/* - * Vertex structure. Indexed by dataset name, this structure maintains a list - * of edges to other vertices. - */ -struct zfs_edge; -typedef struct zfs_vertex { - char zv_dataset[ZFS_MAX_DATASET_NAME_LEN]; - struct zfs_vertex *zv_next; - int zv_visited; - uint64_t zv_txg; - struct zfs_edge **zv_edges; - int zv_edgecount; - int zv_edgealloc; -} zfs_vertex_t; - -enum { - VISIT_SEEN = 1, - VISIT_SORT_PRE, - VISIT_SORT_POST -}; - -/* - * Edge structure. Simply maintains a pointer to the destination vertex. There - * is no need to store the source vertex, since we only use edges in the context - * of the source vertex. - */ -typedef struct zfs_edge { - zfs_vertex_t *ze_dest; - struct zfs_edge *ze_next; -} zfs_edge_t; - -#define ZFS_GRAPH_SIZE 1027 /* this could be dynamic some day */ - -/* - * Graph structure. Vertices are maintained in a hash indexed by dataset name. - */ -typedef struct zfs_graph { - zfs_vertex_t **zg_hash; - size_t zg_size; - size_t zg_nvertex; - const char *zg_root; - int zg_clone_count; -} zfs_graph_t; - -/* - * Allocate a new edge pointing to the target vertex. - */ -static zfs_edge_t * -zfs_edge_create(libzfs_handle_t *hdl, zfs_vertex_t *dest) -{ - zfs_edge_t *zep = zfs_alloc(hdl, sizeof (zfs_edge_t)); - - if (zep == NULL) - return (NULL); - - zep->ze_dest = dest; - - return (zep); -} - -/* - * Destroy an edge. - */ -static void -zfs_edge_destroy(zfs_edge_t *zep) -{ - free(zep); -} - -/* - * Allocate a new vertex with the given name. - */ -static zfs_vertex_t * -zfs_vertex_create(libzfs_handle_t *hdl, const char *dataset) -{ - zfs_vertex_t *zvp = zfs_alloc(hdl, sizeof (zfs_vertex_t)); - - if (zvp == NULL) - return (NULL); - - assert(strlen(dataset) < ZFS_MAX_DATASET_NAME_LEN); - - (void) strlcpy(zvp->zv_dataset, dataset, sizeof (zvp->zv_dataset)); - - if ((zvp->zv_edges = zfs_alloc(hdl, - MIN_EDGECOUNT * sizeof (void *))) == NULL) { - free(zvp); - return (NULL); - } - - zvp->zv_edgealloc = MIN_EDGECOUNT; - - return (zvp); -} - -/* - * Destroy a vertex. Frees up any associated edges. - */ -static void -zfs_vertex_destroy(zfs_vertex_t *zvp) -{ - int i; - - for (i = 0; i < zvp->zv_edgecount; i++) - zfs_edge_destroy(zvp->zv_edges[i]); - - free(zvp->zv_edges); - free(zvp); -} - -/* - * Given a vertex, add an edge to the destination vertex. - */ -static int -zfs_vertex_add_edge(libzfs_handle_t *hdl, zfs_vertex_t *zvp, - zfs_vertex_t *dest) -{ - zfs_edge_t *zep = zfs_edge_create(hdl, dest); - - if (zep == NULL) - return (-1); - - if (zvp->zv_edgecount == zvp->zv_edgealloc) { - void *ptr; - - if ((ptr = zfs_realloc(hdl, zvp->zv_edges, - zvp->zv_edgealloc * sizeof (void *), - zvp->zv_edgealloc * 2 * sizeof (void *))) == NULL) - return (-1); - - zvp->zv_edges = ptr; - zvp->zv_edgealloc *= 2; - } - - zvp->zv_edges[zvp->zv_edgecount++] = zep; - - return (0); -} - -static int -zfs_edge_compare(const void *a, const void *b) -{ - const zfs_edge_t *ea = *((zfs_edge_t **)a); - const zfs_edge_t *eb = *((zfs_edge_t **)b); - - if (ea->ze_dest->zv_txg < eb->ze_dest->zv_txg) - return (-1); - if (ea->ze_dest->zv_txg > eb->ze_dest->zv_txg) - return (1); - return (0); -} - -/* - * Sort the given vertex edges according to the creation txg of each vertex. - */ -static void -zfs_vertex_sort_edges(zfs_vertex_t *zvp) -{ - if (zvp->zv_edgecount == 0) - return; - - qsort(zvp->zv_edges, zvp->zv_edgecount, sizeof (void *), - zfs_edge_compare); -} - -/* - * Construct a new graph object. We allow the size to be specified as a - * parameter so in the future we can size the hash according to the number of - * datasets in the pool. - */ -static zfs_graph_t * -zfs_graph_create(libzfs_handle_t *hdl, const char *dataset, size_t size) -{ - zfs_graph_t *zgp = zfs_alloc(hdl, sizeof (zfs_graph_t)); - - if (zgp == NULL) - return (NULL); - - zgp->zg_size = size; - if ((zgp->zg_hash = zfs_alloc(hdl, - size * sizeof (zfs_vertex_t *))) == NULL) { - free(zgp); - return (NULL); - } - - zgp->zg_root = dataset; - zgp->zg_clone_count = 0; - - return (zgp); -} - -/* - * Destroy a graph object. We have to iterate over all the hash chains, - * destroying each vertex in the process. - */ -static void -zfs_graph_destroy(zfs_graph_t *zgp) -{ - int i; - zfs_vertex_t *current, *next; - - for (i = 0; i < zgp->zg_size; i++) { - current = zgp->zg_hash[i]; - while (current != NULL) { - next = current->zv_next; - zfs_vertex_destroy(current); - current = next; - } - } - - free(zgp->zg_hash); - free(zgp); -} - -/* - * Graph hash function. Classic bernstein k=33 hash function, taken from - * usr/src/cmd/sgs/tools/common/strhash.c - */ -static size_t -zfs_graph_hash(zfs_graph_t *zgp, const char *str) -{ - size_t hash = 5381; - int c; - - while ((c = *str++) != 0) - hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ - - return (hash % zgp->zg_size); -} - -/* - * Given a dataset name, finds the associated vertex, creating it if necessary. - */ -static zfs_vertex_t * -zfs_graph_lookup(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset, - uint64_t txg) -{ - size_t idx = zfs_graph_hash(zgp, dataset); - zfs_vertex_t *zvp; - - for (zvp = zgp->zg_hash[idx]; zvp != NULL; zvp = zvp->zv_next) { - if (strcmp(zvp->zv_dataset, dataset) == 0) { - if (zvp->zv_txg == 0) - zvp->zv_txg = txg; - return (zvp); - } - } - - if ((zvp = zfs_vertex_create(hdl, dataset)) == NULL) - return (NULL); - - zvp->zv_next = zgp->zg_hash[idx]; - zvp->zv_txg = txg; - zgp->zg_hash[idx] = zvp; - zgp->zg_nvertex++; - - return (zvp); -} - -/* - * Given two dataset names, create an edge between them. For the source vertex, - * mark 'zv_visited' to indicate that we have seen this vertex, and not simply - * created it as a destination of another edge. If 'dest' is NULL, then this - * is an individual vertex (i.e. the starting vertex), so don't add an edge. - */ -static int -zfs_graph_add(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *source, - const char *dest, uint64_t txg) -{ - zfs_vertex_t *svp, *dvp; - - if ((svp = zfs_graph_lookup(hdl, zgp, source, 0)) == NULL) - return (-1); - svp->zv_visited = VISIT_SEEN; - if (dest != NULL) { - dvp = zfs_graph_lookup(hdl, zgp, dest, txg); - if (dvp == NULL) - return (-1); - if (zfs_vertex_add_edge(hdl, svp, dvp) != 0) - return (-1); - } - - return (0); -} - -/* - * Iterate over all children of the given dataset, adding any vertices - * as necessary. Returns -1 if there was an error, or 0 otherwise. - * This is a simple recursive algorithm - the ZFS namespace typically - * is very flat. We manually invoke the necessary ioctl() calls to - * avoid the overhead and additional semantics of zfs_open(). - */ -static int -iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset) -{ - zfs_cmd_t zc = {"\0"}; - zfs_vertex_t *zvp; - - /* - * Look up the source vertex, and avoid it if we've seen it before. - */ - zvp = zfs_graph_lookup(hdl, zgp, dataset, 0); - if (zvp == NULL) - return (-1); - if (zvp->zv_visited == VISIT_SEEN) - return (0); - - /* - * Iterate over all children - */ - for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); - ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0; - (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) { - /* - * Get statistics for this dataset, to determine the type of the - * dataset and clone statistics. If this fails, the dataset has - * since been removed, and we're pretty much screwed anyway. - */ - zc.zc_objset_stats.dds_origin[0] = '\0'; - if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) - continue; - - if (zc.zc_objset_stats.dds_origin[0] != '\0') { - if (zfs_graph_add(hdl, zgp, - zc.zc_objset_stats.dds_origin, zc.zc_name, - zc.zc_objset_stats.dds_creation_txg) != 0) - return (-1); - /* - * Count origins only if they are contained in the graph - */ - if (isa_child_of(zc.zc_objset_stats.dds_origin, - zgp->zg_root)) - zgp->zg_clone_count--; - } - - /* - * Add an edge between the parent and the child. - */ - if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name, - zc.zc_objset_stats.dds_creation_txg) != 0) - return (-1); - - /* - * Recursively visit child - */ - if (iterate_children(hdl, zgp, zc.zc_name)) - return (-1); - } - - /* - * Now iterate over all snapshots. - */ - bzero(&zc, sizeof (zc)); - - for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); - ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0; - (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) { - - /* - * Get statistics for this dataset, to determine the type of the - * dataset and clone statistics. If this fails, the dataset has - * since been removed, and we're pretty much screwed anyway. - */ - if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) - continue; - - /* - * Add an edge between the parent and the child. - */ - if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name, - zc.zc_objset_stats.dds_creation_txg) != 0) - return (-1); - - zgp->zg_clone_count += zc.zc_objset_stats.dds_num_clones; - } - - zvp->zv_visited = VISIT_SEEN; - - return (0); -} - -/* - * Returns false if there are no snapshots with dependent clones in this - * subtree or if all of those clones are also in this subtree. Returns - * true if there is an error or there are external dependents. - */ -static boolean_t -external_dependents(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset) -{ - zfs_cmd_t zc = {"\0"}; - - /* - * Check whether this dataset is a clone or has clones since - * iterate_children() only checks the children. - */ - (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); - if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) - return (B_TRUE); - - if (zc.zc_objset_stats.dds_origin[0] != '\0') { - if (zfs_graph_add(hdl, zgp, - zc.zc_objset_stats.dds_origin, zc.zc_name, - zc.zc_objset_stats.dds_creation_txg) != 0) - return (B_TRUE); - if (isa_child_of(zc.zc_objset_stats.dds_origin, dataset)) - zgp->zg_clone_count--; - } - - if ((zc.zc_objset_stats.dds_num_clones) || - iterate_children(hdl, zgp, dataset)) - return (B_TRUE); - - return (zgp->zg_clone_count != 0); -} - -/* - * Construct a complete graph of all necessary vertices. First, iterate over - * only our object's children. If no cloned snapshots are found, or all of - * the cloned snapshots are in this subtree then return a graph of the subtree. - * Otherwise, start at the root of the pool and iterate over all datasets. - */ -static zfs_graph_t * -construct_graph(libzfs_handle_t *hdl, const char *dataset) -{ - zfs_graph_t *zgp = zfs_graph_create(hdl, dataset, ZFS_GRAPH_SIZE); - int ret = 0; - - if (zgp == NULL) - return (zgp); - - if ((strchr(dataset, '/') == NULL) || - (external_dependents(hdl, zgp, dataset))) { - /* - * Determine pool name and try again. - */ - int len = strcspn(dataset, "/@") + 1; - char *pool = zfs_alloc(hdl, len); - - if (pool == NULL) { - zfs_graph_destroy(zgp); - return (NULL); - } - (void) strlcpy(pool, dataset, len); - - if (iterate_children(hdl, zgp, pool) == -1 || - zfs_graph_add(hdl, zgp, pool, NULL, 0) != 0) { - free(pool); - zfs_graph_destroy(zgp); - return (NULL); - } - free(pool); - } - - if (ret == -1 || zfs_graph_add(hdl, zgp, dataset, NULL, 0) != 0) { - zfs_graph_destroy(zgp); - return (NULL); - } - - return (zgp); -} - -/* - * Given a graph, do a recursive topological sort into the given array. This is - * really just a depth first search, so that the deepest nodes appear first. - * hijack the 'zv_visited' marker to avoid visiting the same vertex twice. - */ -static int -topo_sort(libzfs_handle_t *hdl, boolean_t allowrecursion, char **result, - size_t *idx, zfs_vertex_t *zgv) -{ - int i; - - if (zgv->zv_visited == VISIT_SORT_PRE && !allowrecursion) { - /* - * If we've already seen this vertex as part of our depth-first - * search, then we have a cyclic dependency, and we must return - * an error. - */ - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "recursive dependency at '%s'"), - zgv->zv_dataset); - return (zfs_error(hdl, EZFS_RECURSIVE, - dgettext(TEXT_DOMAIN, - "cannot determine dependent datasets"))); - } else if (zgv->zv_visited >= VISIT_SORT_PRE) { - /* - * If we've already processed this as part of the topological - * sort, then don't bother doing so again. - */ - return (0); - } - - zgv->zv_visited = VISIT_SORT_PRE; - - /* avoid doing a search if we don't have to */ - zfs_vertex_sort_edges(zgv); - for (i = 0; i < zgv->zv_edgecount; i++) { - if (topo_sort(hdl, allowrecursion, result, idx, - zgv->zv_edges[i]->ze_dest) != 0) - return (-1); - } - - /* we may have visited this in the course of the above */ - if (zgv->zv_visited == VISIT_SORT_POST) - return (0); - - if ((result[*idx] = zfs_alloc(hdl, - strlen(zgv->zv_dataset) + 1)) == NULL) - return (-1); - - (void) strcpy(result[*idx], zgv->zv_dataset); - *idx += 1; - zgv->zv_visited = VISIT_SORT_POST; - return (0); -} - -/* - * The only public interface for this file. Do the dirty work of constructing a - * child list for the given object. Construct the graph, do the toplogical - * sort, and then return the array of strings to the caller. - * - * The 'allowrecursion' parameter controls behavior when cycles are found. If - * it is set, the the cycle is ignored and the results returned as if the cycle - * did not exist. If it is not set, then the routine will generate an error if - * a cycle is found. - */ -int -get_dependents(libzfs_handle_t *hdl, boolean_t allowrecursion, - const char *dataset, char ***result, size_t *count) -{ - zfs_graph_t *zgp; - zfs_vertex_t *zvp; - - if ((zgp = construct_graph(hdl, dataset)) == NULL) - return (-1); - - if ((*result = zfs_alloc(hdl, - zgp->zg_nvertex * sizeof (char *))) == NULL) { - zfs_graph_destroy(zgp); - return (-1); - } - - if ((zvp = zfs_graph_lookup(hdl, zgp, dataset, 0)) == NULL) { - free(*result); - zfs_graph_destroy(zgp); - return (-1); - } - - *count = 0; - if (topo_sort(hdl, allowrecursion, *result, count, zvp) != 0) { - free(*result); - zfs_graph_destroy(zgp); - return (-1); - } - - /* - * Get rid of the last entry, which is our starting vertex and not - * strictly a dependent. - */ - assert(*count > 0); - free((*result)[*count - 1]); - (*count)--; - - zfs_graph_destroy(zgp); - - return (0); -}