Skip to content

Commit

Permalink
btrfs: track refs in a rb_tree instead of a list
Browse files Browse the repository at this point in the history
If we get a significant amount of delayed refs for a single block (think
modifying multiple snapshots) we can end up spending an ungodly amount
of time looping through all of the entries trying to see if they can be
merged.  This is because we only add them to a list, so we have O(2n)
for every ref head.  This doesn't make any sense as we likely have refs
for different roots, and so they cannot be merged.  Tracking in a tree
will allow us to break as soon as we hit an entry that doesn't match,
making our worst case O(n).

With this we can also merge entries more easily.  Before we had to hope
that matching refs were on the ends of our list, but with the tree we
can search down to exact matches and merge them at insert time.

Signed-off-by: Josef Bacik <[email protected]>
Signed-off-by: David Sterba <[email protected]>
  • Loading branch information
josefbacik authored and kdave committed Nov 1, 2017
1 parent 1d148e5 commit 0e0adbc
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 67 deletions.
5 changes: 4 additions & 1 deletion fs/btrfs/backref.c
Original file line number Diff line number Diff line change
Expand Up @@ -773,6 +773,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
struct btrfs_key key;
struct btrfs_key tmp_op_key;
struct btrfs_key *op_key = NULL;
struct rb_node *n;
int count;
int ret = 0;

Expand All @@ -782,7 +783,9 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
}

spin_lock(&head->lock);
list_for_each_entry(node, &head->ref_list, list) {
for (n = rb_first(&head->ref_tree); n; n = rb_next(n)) {
node = rb_entry(n, struct btrfs_delayed_ref_node,
ref_node);
if (node->seq > seq)
continue;

Expand Down
108 changes: 56 additions & 52 deletions fs/btrfs/delayed-ref.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,34 @@ static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
return NULL;
}

static struct btrfs_delayed_ref_node* tree_insert(struct rb_root *root,
struct btrfs_delayed_ref_node *ins)
{
struct rb_node **p = &root->rb_node;
struct rb_node *node = &ins->ref_node;
struct rb_node *parent_node = NULL;
struct btrfs_delayed_ref_node *entry;

while (*p) {
int comp;

parent_node = *p;
entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
ref_node);
comp = comp_refs(ins, entry, true);
if (comp < 0)
p = &(*p)->rb_left;
else if (comp > 0)
p = &(*p)->rb_right;
else
return entry;
}

rb_link_node(node, parent_node, p);
rb_insert_color(node, root);
return NULL;
}

/*
* find an head entry based on bytenr. This returns the delayed ref
* head if it was able to find one, or NULL if nothing was in that spot.
Expand Down Expand Up @@ -212,7 +240,8 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref)
{
assert_spin_locked(&head->lock);
list_del(&ref->list);
rb_erase(&ref->ref_node, &head->ref_tree);
RB_CLEAR_NODE(&ref->ref_node);
if (!list_empty(&ref->add_list))
list_del(&ref->add_list);
ref->in_tree = 0;
Expand All @@ -229,24 +258,18 @@ static bool merge_ref(struct btrfs_trans_handle *trans,
u64 seq)
{
struct btrfs_delayed_ref_node *next;
struct rb_node *node = rb_next(&ref->ref_node);
bool done = false;

next = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
list);
while (!done && &next->list != &head->ref_list) {
while (!done && node) {
int mod;
struct btrfs_delayed_ref_node *next2;

next2 = list_next_entry(next, list);

if (next == ref)
goto next;

next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
node = rb_next(node);
if (seq && next->seq >= seq)
goto next;

break;
if (comp_refs(ref, next, false))
goto next;
break;

if (ref->action == next->action) {
mod = next->ref_mod;
Expand All @@ -270,8 +293,6 @@ static bool merge_ref(struct btrfs_trans_handle *trans,
WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
}
next:
next = next2;
}

return done;
Expand All @@ -283,11 +304,12 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head)
{
struct btrfs_delayed_ref_node *ref;
struct rb_node *node;
u64 seq = 0;

assert_spin_locked(&head->lock);

if (list_empty(&head->ref_list))
if (RB_EMPTY_ROOT(&head->ref_tree))
return;

/* We don't have too many refs to merge for data. */
Expand All @@ -304,22 +326,13 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
}
spin_unlock(&fs_info->tree_mod_seq_lock);

ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
list);
while (&ref->list != &head->ref_list) {
again:
for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) {
ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
if (seq && ref->seq >= seq)
goto next;

if (merge_ref(trans, delayed_refs, head, ref, seq)) {
if (list_empty(&head->ref_list))
break;
ref = list_first_entry(&head->ref_list,
struct btrfs_delayed_ref_node,
list);
continue;
}
next:
ref = list_next_entry(ref, list);
if (merge_ref(trans, delayed_refs, head, ref, seq))
goto again;
}
}

Expand Down Expand Up @@ -402,25 +415,19 @@ btrfs_select_ref_head(struct btrfs_trans_handle *trans)
* Return 0 for insert.
* Return >0 for merge.
*/
static int
add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *root,
struct btrfs_delayed_ref_head *href,
struct btrfs_delayed_ref_node *ref)
static int insert_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *root,
struct btrfs_delayed_ref_head *href,
struct btrfs_delayed_ref_node *ref)
{
struct btrfs_delayed_ref_node *exist;
int mod;
int ret = 0;

spin_lock(&href->lock);
/* Check whether we can merge the tail node with ref */
if (list_empty(&href->ref_list))
goto add_tail;
exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node,
list);
/* No need to compare bytenr nor is_head */
if (comp_refs(exist, ref, true))
goto add_tail;
exist = tree_insert(&href->ref_tree, ref);
if (!exist)
goto inserted;

/* Now we are sure we can merge */
ret = 1;
Expand Down Expand Up @@ -451,9 +458,7 @@ add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
drop_delayed_ref(trans, root, href, exist);
spin_unlock(&href->lock);
return ret;

add_tail:
list_add_tail(&ref->list, &href->ref_list);
inserted:
if (ref->action == BTRFS_ADD_DELAYED_REF)
list_add_tail(&ref->add_list, &href->ref_add_list);
atomic_inc(&root->num_entries);
Expand Down Expand Up @@ -593,7 +598,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
head_ref->ref_mod = count_mod;
head_ref->must_insert_reserved = must_insert_reserved;
head_ref->is_data = is_data;
INIT_LIST_HEAD(&head_ref->ref_list);
head_ref->ref_tree = RB_ROOT;
INIT_LIST_HEAD(&head_ref->ref_add_list);
RB_CLEAR_NODE(&head_ref->href_node);
head_ref->processing = 0;
Expand Down Expand Up @@ -685,7 +690,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
ref->is_head = 0;
ref->in_tree = 1;
ref->seq = seq;
INIT_LIST_HEAD(&ref->list);
RB_CLEAR_NODE(&ref->ref_node);
INIT_LIST_HEAD(&ref->add_list);

full_ref = btrfs_delayed_node_to_tree_ref(ref);
Expand All @@ -699,7 +704,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,

trace_add_delayed_tree_ref(fs_info, ref, full_ref, action);

ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);

/*
* XXX: memory should be freed at the same level allocated.
Expand Down Expand Up @@ -742,7 +747,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
ref->is_head = 0;
ref->in_tree = 1;
ref->seq = seq;
INIT_LIST_HEAD(&ref->list);
RB_CLEAR_NODE(&ref->ref_node);
INIT_LIST_HEAD(&ref->add_list);

full_ref = btrfs_delayed_node_to_data_ref(ref);
Expand All @@ -758,8 +763,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,

trace_add_delayed_data_ref(fs_info, ref, full_ref, action);

ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);

ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
if (ret > 0)
kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
}
Expand Down
5 changes: 2 additions & 3 deletions fs/btrfs/delayed-ref.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@
#define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */

struct btrfs_delayed_ref_node {
/*data/tree ref use list, stored in ref_head->ref_list. */
struct list_head list;
struct rb_node ref_node;
/*
* If action is BTRFS_ADD_DELAYED_REF, also link this node to
* ref_head->ref_add_list, then we do not need to iterate the
Expand Down Expand Up @@ -92,7 +91,7 @@ struct btrfs_delayed_ref_head {
struct mutex mutex;

spinlock_t lock;
struct list_head ref_list;
struct rb_root ref_tree;
/* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */
struct list_head ref_add_list;

Expand Down
10 changes: 6 additions & 4 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -4113,7 +4113,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,

while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
struct btrfs_delayed_ref_head *head;
struct btrfs_delayed_ref_node *tmp;
struct rb_node *n;
bool pin_bytes = false;

head = rb_entry(node, struct btrfs_delayed_ref_head,
Expand All @@ -4129,10 +4129,12 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
continue;
}
spin_lock(&head->lock);
list_for_each_entry_safe_reverse(ref, tmp, &head->ref_list,
list) {
while ((n = rb_first(&head->ref_tree)) != NULL) {
ref = rb_entry(n, struct btrfs_delayed_ref_node,
ref_node);
ref->in_tree = 0;
list_del(&ref->list);
rb_erase(&ref->ref_node, &head->ref_tree);
RB_CLEAR_NODE(&ref->ref_node);
if (!list_empty(&ref->add_list))
list_del(&ref->add_list);
atomic_dec(&delayed_refs->num_entries);
Expand Down
21 changes: 14 additions & 7 deletions fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -2519,7 +2519,7 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head)
{
struct btrfs_delayed_ref_node *ref;

if (list_empty(&head->ref_list))
if (RB_EMPTY_ROOT(&head->ref_tree))
return NULL;

/*
Expand All @@ -2532,8 +2532,8 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head)
return list_first_entry(&head->ref_add_list,
struct btrfs_delayed_ref_node, add_list);

ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
list);
ref = rb_entry(rb_first(&head->ref_tree),
struct btrfs_delayed_ref_node, ref_node);
ASSERT(list_empty(&ref->add_list));
return ref;
}
Expand Down Expand Up @@ -2593,7 +2593,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
spin_unlock(&head->lock);
spin_lock(&delayed_refs->lock);
spin_lock(&head->lock);
if (!list_empty(&head->ref_list) || head->extent_op) {
if (!RB_EMPTY_ROOT(&head->ref_tree) || head->extent_op) {
spin_unlock(&head->lock);
spin_unlock(&delayed_refs->lock);
return 1;
Expand Down Expand Up @@ -2740,7 +2740,8 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,

actual_count++;
ref->in_tree = 0;
list_del(&ref->list);
rb_erase(&ref->ref_node, &locked_ref->ref_tree);
RB_CLEAR_NODE(&ref->ref_node);
if (!list_empty(&ref->add_list))
list_del(&ref->add_list);
/*
Expand Down Expand Up @@ -3138,6 +3139,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
struct btrfs_delayed_data_ref *data_ref;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_transaction *cur_trans;
struct rb_node *node;
int ret = 0;

cur_trans = root->fs_info->running_transaction;
Expand Down Expand Up @@ -3170,7 +3172,12 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
spin_unlock(&delayed_refs->lock);

spin_lock(&head->lock);
list_for_each_entry(ref, &head->ref_list, list) {
/*
* XXX: We should replace this with a proper search function in the
* future.
*/
for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) {
ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
/* If it's a shared ref we know a cross reference exists */
if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
ret = 1;
Expand Down Expand Up @@ -7141,7 +7148,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
goto out_delayed_unlock;

spin_lock(&head->lock);
if (!list_empty(&head->ref_list))
if (!RB_EMPTY_ROOT(&head->ref_tree))
goto out;

if (head->extent_op) {
Expand Down

0 comments on commit 0e0adbc

Please sign in to comment.