Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (C) 2009 Oracle. All rights reserved.
4 : */
5 :
6 : #include <linux/sched.h>
7 : #include <linux/slab.h>
8 : #include <linux/sort.h>
9 : #include "messages.h"
10 : #include "ctree.h"
11 : #include "delayed-ref.h"
12 : #include "transaction.h"
13 : #include "qgroup.h"
14 : #include "space-info.h"
15 : #include "tree-mod-log.h"
16 : #include "fs.h"
17 :
18 : struct kmem_cache *btrfs_delayed_ref_head_cachep;
19 : struct kmem_cache *btrfs_delayed_tree_ref_cachep;
20 : struct kmem_cache *btrfs_delayed_data_ref_cachep;
21 : struct kmem_cache *btrfs_delayed_extent_op_cachep;
22 : /*
23 : * delayed back reference update tracking. For subvolume trees
24 : * we queue up extent allocations and backref maintenance for
25 : * delayed processing. This avoids deep call chains where we
26 : * add extents in the middle of btrfs_search_slot, and it allows
27 : * us to buffer up frequently modified backrefs in an rb tree instead
28 : * of hammering updates on the extent allocation tree.
29 : */
30 :
31 0 : bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info)
32 : {
33 0 : struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
34 0 : struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
35 0 : bool ret = false;
36 0 : u64 reserved;
37 :
38 0 : spin_lock(&global_rsv->lock);
39 0 : reserved = global_rsv->reserved;
40 0 : spin_unlock(&global_rsv->lock);
41 :
42 : /*
43 : * Since the global reserve is just kind of magic we don't really want
44 : * to rely on it to save our bacon, so if our size is more than the
45 : * delayed_refs_rsv and the global rsv then it's time to think about
46 : * bailing.
47 : */
48 0 : spin_lock(&delayed_refs_rsv->lock);
49 0 : reserved += delayed_refs_rsv->reserved;
50 0 : if (delayed_refs_rsv->size >= reserved)
51 0 : ret = true;
52 0 : spin_unlock(&delayed_refs_rsv->lock);
53 0 : return ret;
54 : }
55 :
56 : /*
57 : * Release a ref head's reservation.
58 : *
59 : * @fs_info: the filesystem
60 : * @nr: number of items to drop
61 : *
62 : * Drops the delayed ref head's count from the delayed refs rsv and free any
63 : * excess reservation we had.
64 : */
65 0 : void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
66 : {
67 0 : struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
68 0 : const u64 num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, nr);
69 0 : u64 released = 0;
70 :
71 0 : released = btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
72 0 : if (released)
73 0 : trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
74 : 0, released, 0);
75 0 : }
76 :
77 : /*
78 : * Adjust the size of the delayed refs rsv.
79 : *
80 : * This is to be called anytime we may have adjusted trans->delayed_ref_updates,
81 : * it'll calculate the additional size and add it to the delayed_refs_rsv.
82 : */
83 0 : void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
84 : {
85 0 : struct btrfs_fs_info *fs_info = trans->fs_info;
86 0 : struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
87 0 : u64 num_bytes;
88 :
89 0 : if (!trans->delayed_ref_updates)
90 : return;
91 :
92 0 : num_bytes = btrfs_calc_delayed_ref_bytes(fs_info,
93 : trans->delayed_ref_updates);
94 :
95 0 : spin_lock(&delayed_rsv->lock);
96 0 : delayed_rsv->size += num_bytes;
97 0 : delayed_rsv->full = false;
98 0 : spin_unlock(&delayed_rsv->lock);
99 0 : trans->delayed_ref_updates = 0;
100 : }
101 :
102 : /*
103 : * Transfer bytes to our delayed refs rsv.
104 : *
105 : * @fs_info: the filesystem
106 : * @src: source block rsv to transfer from
107 : * @num_bytes: number of bytes to transfer
108 : *
109 : * This transfers up to the num_bytes amount from the src rsv to the
110 : * delayed_refs_rsv. Any extra bytes are returned to the space info.
111 : */
112 0 : void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
113 : struct btrfs_block_rsv *src,
114 : u64 num_bytes)
115 : {
116 0 : struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
117 0 : u64 to_free = 0;
118 :
119 0 : spin_lock(&src->lock);
120 0 : src->reserved -= num_bytes;
121 0 : src->size -= num_bytes;
122 0 : spin_unlock(&src->lock);
123 :
124 0 : spin_lock(&delayed_refs_rsv->lock);
125 0 : if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) {
126 0 : u64 delta = delayed_refs_rsv->size -
127 : delayed_refs_rsv->reserved;
128 0 : if (num_bytes > delta) {
129 0 : to_free = num_bytes - delta;
130 0 : num_bytes = delta;
131 : }
132 : } else {
133 : to_free = num_bytes;
134 : num_bytes = 0;
135 : }
136 :
137 0 : if (num_bytes)
138 0 : delayed_refs_rsv->reserved += num_bytes;
139 0 : if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size)
140 0 : delayed_refs_rsv->full = true;
141 0 : spin_unlock(&delayed_refs_rsv->lock);
142 :
143 0 : if (num_bytes)
144 0 : trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
145 : 0, num_bytes, 1);
146 0 : if (to_free)
147 0 : btrfs_space_info_free_bytes_may_use(fs_info,
148 : delayed_refs_rsv->space_info, to_free);
149 0 : }
150 :
151 : /*
152 : * Refill based on our delayed refs usage.
153 : *
154 : * @fs_info: the filesystem
155 : * @flush: control how we can flush for this reservation.
156 : *
157 : * This will refill the delayed block_rsv up to 1 items size worth of space and
158 : * will return -ENOSPC if we can't make the reservation.
159 : */
160 0 : int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
161 : enum btrfs_reserve_flush_enum flush)
162 : {
163 0 : struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
164 0 : u64 limit = btrfs_calc_delayed_ref_bytes(fs_info, 1);
165 0 : u64 num_bytes = 0;
166 0 : int ret = -ENOSPC;
167 :
168 0 : spin_lock(&block_rsv->lock);
169 0 : if (block_rsv->reserved < block_rsv->size) {
170 0 : num_bytes = block_rsv->size - block_rsv->reserved;
171 0 : num_bytes = min(num_bytes, limit);
172 : }
173 0 : spin_unlock(&block_rsv->lock);
174 :
175 0 : if (!num_bytes)
176 : return 0;
177 :
178 0 : ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv, num_bytes, flush);
179 0 : if (ret)
180 : return ret;
181 0 : btrfs_block_rsv_add_bytes(block_rsv, num_bytes, false);
182 0 : trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
183 : 0, num_bytes, 1);
184 0 : return 0;
185 : }
186 :
187 : /*
188 : * compare two delayed tree backrefs with same bytenr and type
189 : */
190 0 : static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1,
191 : struct btrfs_delayed_tree_ref *ref2)
192 : {
193 0 : if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
194 0 : if (ref1->root < ref2->root)
195 : return -1;
196 0 : if (ref1->root > ref2->root)
197 0 : return 1;
198 : } else {
199 0 : if (ref1->parent < ref2->parent)
200 : return -1;
201 0 : if (ref1->parent > ref2->parent)
202 0 : return 1;
203 : }
204 : return 0;
205 : }
206 :
207 : /*
208 : * compare two delayed data backrefs with same bytenr and type
209 : */
210 0 : static int comp_data_refs(struct btrfs_delayed_data_ref *ref1,
211 : struct btrfs_delayed_data_ref *ref2)
212 : {
213 0 : if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
214 0 : if (ref1->root < ref2->root)
215 : return -1;
216 0 : if (ref1->root > ref2->root)
217 : return 1;
218 0 : if (ref1->objectid < ref2->objectid)
219 : return -1;
220 0 : if (ref1->objectid > ref2->objectid)
221 : return 1;
222 0 : if (ref1->offset < ref2->offset)
223 : return -1;
224 0 : if (ref1->offset > ref2->offset)
225 0 : return 1;
226 : } else {
227 0 : if (ref1->parent < ref2->parent)
228 : return -1;
229 0 : if (ref1->parent > ref2->parent)
230 0 : return 1;
231 : }
232 : return 0;
233 : }
234 :
235 0 : static int comp_refs(struct btrfs_delayed_ref_node *ref1,
236 : struct btrfs_delayed_ref_node *ref2,
237 : bool check_seq)
238 : {
239 0 : int ret = 0;
240 :
241 0 : if (ref1->type < ref2->type)
242 : return -1;
243 0 : if (ref1->type > ref2->type)
244 : return 1;
245 0 : if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
246 : ref1->type == BTRFS_SHARED_BLOCK_REF_KEY)
247 0 : ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1),
248 : btrfs_delayed_node_to_tree_ref(ref2));
249 : else
250 0 : ret = comp_data_refs(btrfs_delayed_node_to_data_ref(ref1),
251 : btrfs_delayed_node_to_data_ref(ref2));
252 0 : if (ret)
253 : return ret;
254 0 : if (check_seq) {
255 0 : if (ref1->seq < ref2->seq)
256 : return -1;
257 0 : if (ref1->seq > ref2->seq)
258 0 : return 1;
259 : }
260 : return 0;
261 : }
262 :
263 : /* insert a new ref to head ref rbtree */
264 0 : static struct btrfs_delayed_ref_head *htree_insert(struct rb_root_cached *root,
265 : struct rb_node *node)
266 : {
267 0 : struct rb_node **p = &root->rb_root.rb_node;
268 0 : struct rb_node *parent_node = NULL;
269 0 : struct btrfs_delayed_ref_head *entry;
270 0 : struct btrfs_delayed_ref_head *ins;
271 0 : u64 bytenr;
272 0 : bool leftmost = true;
273 :
274 0 : ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
275 0 : bytenr = ins->bytenr;
276 0 : while (*p) {
277 0 : parent_node = *p;
278 0 : entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
279 : href_node);
280 :
281 0 : if (bytenr < entry->bytenr) {
282 0 : p = &(*p)->rb_left;
283 0 : } else if (bytenr > entry->bytenr) {
284 0 : p = &(*p)->rb_right;
285 0 : leftmost = false;
286 : } else {
287 0 : return entry;
288 : }
289 : }
290 :
291 0 : rb_link_node(node, parent_node, p);
292 0 : rb_insert_color_cached(node, root, leftmost);
293 0 : return NULL;
294 : }
295 :
296 0 : static struct btrfs_delayed_ref_node* tree_insert(struct rb_root_cached *root,
297 : struct btrfs_delayed_ref_node *ins)
298 : {
299 0 : struct rb_node **p = &root->rb_root.rb_node;
300 0 : struct rb_node *node = &ins->ref_node;
301 0 : struct rb_node *parent_node = NULL;
302 0 : struct btrfs_delayed_ref_node *entry;
303 0 : bool leftmost = true;
304 :
305 0 : while (*p) {
306 0 : int comp;
307 :
308 0 : parent_node = *p;
309 0 : entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
310 : ref_node);
311 0 : comp = comp_refs(ins, entry, true);
312 0 : if (comp < 0) {
313 0 : p = &(*p)->rb_left;
314 0 : } else if (comp > 0) {
315 0 : p = &(*p)->rb_right;
316 0 : leftmost = false;
317 : } else {
318 0 : return entry;
319 : }
320 : }
321 :
322 0 : rb_link_node(node, parent_node, p);
323 0 : rb_insert_color_cached(node, root, leftmost);
324 0 : return NULL;
325 : }
326 :
327 : static struct btrfs_delayed_ref_head *find_first_ref_head(
328 : struct btrfs_delayed_ref_root *dr)
329 : {
330 0 : struct rb_node *n;
331 0 : struct btrfs_delayed_ref_head *entry;
332 :
333 0 : n = rb_first_cached(&dr->href_root);
334 0 : if (!n)
335 : return NULL;
336 :
337 0 : entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
338 :
339 0 : return entry;
340 : }
341 :
342 : /*
343 : * Find a head entry based on bytenr. This returns the delayed ref head if it
344 : * was able to find one, or NULL if nothing was in that spot. If return_bigger
345 : * is given, the next bigger entry is returned if no exact match is found.
346 : */
347 0 : static struct btrfs_delayed_ref_head *find_ref_head(
348 : struct btrfs_delayed_ref_root *dr, u64 bytenr,
349 : bool return_bigger)
350 : {
351 0 : struct rb_root *root = &dr->href_root.rb_root;
352 0 : struct rb_node *n;
353 0 : struct btrfs_delayed_ref_head *entry;
354 :
355 0 : n = root->rb_node;
356 0 : entry = NULL;
357 0 : while (n) {
358 0 : entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
359 :
360 0 : if (bytenr < entry->bytenr)
361 0 : n = n->rb_left;
362 0 : else if (bytenr > entry->bytenr)
363 0 : n = n->rb_right;
364 : else
365 0 : return entry;
366 : }
367 0 : if (entry && return_bigger) {
368 0 : if (bytenr > entry->bytenr) {
369 0 : n = rb_next(&entry->href_node);
370 0 : if (!n)
371 : return NULL;
372 0 : entry = rb_entry(n, struct btrfs_delayed_ref_head,
373 : href_node);
374 : }
375 0 : return entry;
376 : }
377 : return NULL;
378 : }
379 :
380 0 : int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs,
381 : struct btrfs_delayed_ref_head *head)
382 : {
383 0 : lockdep_assert_held(&delayed_refs->lock);
384 0 : if (mutex_trylock(&head->mutex))
385 : return 0;
386 :
387 0 : refcount_inc(&head->refs);
388 0 : spin_unlock(&delayed_refs->lock);
389 :
390 0 : mutex_lock(&head->mutex);
391 0 : spin_lock(&delayed_refs->lock);
392 0 : if (RB_EMPTY_NODE(&head->href_node)) {
393 0 : mutex_unlock(&head->mutex);
394 0 : btrfs_put_delayed_ref_head(head);
395 0 : return -EAGAIN;
396 : }
397 0 : btrfs_put_delayed_ref_head(head);
398 0 : return 0;
399 : }
400 :
401 0 : static inline void drop_delayed_ref(struct btrfs_delayed_ref_root *delayed_refs,
402 : struct btrfs_delayed_ref_head *head,
403 : struct btrfs_delayed_ref_node *ref)
404 : {
405 0 : lockdep_assert_held(&head->lock);
406 0 : rb_erase_cached(&ref->ref_node, &head->ref_tree);
407 0 : RB_CLEAR_NODE(&ref->ref_node);
408 0 : if (!list_empty(&ref->add_list))
409 0 : list_del(&ref->add_list);
410 0 : btrfs_put_delayed_ref(ref);
411 0 : atomic_dec(&delayed_refs->num_entries);
412 0 : }
413 :
414 0 : static bool merge_ref(struct btrfs_delayed_ref_root *delayed_refs,
415 : struct btrfs_delayed_ref_head *head,
416 : struct btrfs_delayed_ref_node *ref,
417 : u64 seq)
418 : {
419 0 : struct btrfs_delayed_ref_node *next;
420 0 : struct rb_node *node = rb_next(&ref->ref_node);
421 0 : bool done = false;
422 :
423 0 : while (!done && node) {
424 0 : int mod;
425 :
426 0 : next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
427 0 : node = rb_next(node);
428 0 : if (seq && next->seq >= seq)
429 : break;
430 0 : if (comp_refs(ref, next, false))
431 : break;
432 :
433 0 : if (ref->action == next->action) {
434 0 : mod = next->ref_mod;
435 : } else {
436 0 : if (ref->ref_mod < next->ref_mod) {
437 0 : swap(ref, next);
438 0 : done = true;
439 : }
440 0 : mod = -next->ref_mod;
441 : }
442 :
443 0 : drop_delayed_ref(delayed_refs, head, next);
444 0 : ref->ref_mod += mod;
445 0 : if (ref->ref_mod == 0) {
446 0 : drop_delayed_ref(delayed_refs, head, ref);
447 0 : done = true;
448 : } else {
449 : /*
450 : * Can't have multiples of the same ref on a tree block.
451 : */
452 0 : WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
453 : ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
454 : }
455 : }
456 :
457 0 : return done;
458 : }
459 :
460 0 : void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info,
461 : struct btrfs_delayed_ref_root *delayed_refs,
462 : struct btrfs_delayed_ref_head *head)
463 : {
464 0 : struct btrfs_delayed_ref_node *ref;
465 0 : struct rb_node *node;
466 0 : u64 seq = 0;
467 :
468 0 : lockdep_assert_held(&head->lock);
469 :
470 0 : if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
471 : return;
472 :
473 : /* We don't have too many refs to merge for data. */
474 0 : if (head->is_data)
475 : return;
476 :
477 0 : seq = btrfs_tree_mod_log_lowest_seq(fs_info);
478 0 : again:
479 0 : for (node = rb_first_cached(&head->ref_tree); node;
480 0 : node = rb_next(node)) {
481 0 : ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
482 0 : if (seq && ref->seq >= seq)
483 0 : continue;
484 0 : if (merge_ref(delayed_refs, head, ref, seq))
485 0 : goto again;
486 : }
487 : }
488 :
489 0 : int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq)
490 : {
491 0 : int ret = 0;
492 0 : u64 min_seq = btrfs_tree_mod_log_lowest_seq(fs_info);
493 :
494 0 : if (min_seq != 0 && seq >= min_seq) {
495 0 : btrfs_debug(fs_info,
496 : "holding back delayed_ref %llu, lowest is %llu",
497 : seq, min_seq);
498 0 : ret = 1;
499 : }
500 :
501 0 : return ret;
502 : }
503 :
504 0 : struct btrfs_delayed_ref_head *btrfs_select_ref_head(
505 : struct btrfs_delayed_ref_root *delayed_refs)
506 : {
507 0 : struct btrfs_delayed_ref_head *head;
508 :
509 0 : lockdep_assert_held(&delayed_refs->lock);
510 0 : again:
511 0 : head = find_ref_head(delayed_refs, delayed_refs->run_delayed_start,
512 : true);
513 0 : if (!head && delayed_refs->run_delayed_start != 0) {
514 0 : delayed_refs->run_delayed_start = 0;
515 0 : head = find_first_ref_head(delayed_refs);
516 : }
517 0 : if (!head)
518 0 : return NULL;
519 :
520 0 : while (head->processing) {
521 0 : struct rb_node *node;
522 :
523 0 : node = rb_next(&head->href_node);
524 0 : if (!node) {
525 0 : if (delayed_refs->run_delayed_start == 0)
526 : return NULL;
527 0 : delayed_refs->run_delayed_start = 0;
528 0 : goto again;
529 : }
530 0 : head = rb_entry(node, struct btrfs_delayed_ref_head,
531 : href_node);
532 : }
533 :
534 0 : head->processing = true;
535 0 : WARN_ON(delayed_refs->num_heads_ready == 0);
536 0 : delayed_refs->num_heads_ready--;
537 0 : delayed_refs->run_delayed_start = head->bytenr +
538 0 : head->num_bytes;
539 0 : return head;
540 : }
541 :
542 0 : void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
543 : struct btrfs_delayed_ref_head *head)
544 : {
545 0 : lockdep_assert_held(&delayed_refs->lock);
546 0 : lockdep_assert_held(&head->lock);
547 :
548 0 : rb_erase_cached(&head->href_node, &delayed_refs->href_root);
549 0 : RB_CLEAR_NODE(&head->href_node);
550 0 : atomic_dec(&delayed_refs->num_entries);
551 0 : delayed_refs->num_heads--;
552 0 : if (!head->processing)
553 0 : delayed_refs->num_heads_ready--;
554 0 : }
555 :
556 : /*
557 : * Helper to insert the ref_node to the tail or merge with tail.
558 : *
559 : * Return false if the ref was inserted.
560 : * Return true if the ref was merged into an existing one (and therefore can be
561 : * freed by the caller).
562 : */
563 0 : static bool insert_delayed_ref(struct btrfs_delayed_ref_root *root,
564 : struct btrfs_delayed_ref_head *href,
565 : struct btrfs_delayed_ref_node *ref)
566 : {
567 0 : struct btrfs_delayed_ref_node *exist;
568 0 : int mod;
569 :
570 0 : spin_lock(&href->lock);
571 0 : exist = tree_insert(&href->ref_tree, ref);
572 0 : if (!exist) {
573 0 : if (ref->action == BTRFS_ADD_DELAYED_REF)
574 0 : list_add_tail(&ref->add_list, &href->ref_add_list);
575 0 : atomic_inc(&root->num_entries);
576 0 : spin_unlock(&href->lock);
577 0 : return false;
578 : }
579 :
580 : /* Now we are sure we can merge */
581 0 : if (exist->action == ref->action) {
582 0 : mod = ref->ref_mod;
583 : } else {
584 : /* Need to change action */
585 0 : if (exist->ref_mod < ref->ref_mod) {
586 0 : exist->action = ref->action;
587 0 : mod = -exist->ref_mod;
588 0 : exist->ref_mod = ref->ref_mod;
589 0 : if (ref->action == BTRFS_ADD_DELAYED_REF)
590 0 : list_add_tail(&exist->add_list,
591 : &href->ref_add_list);
592 0 : else if (ref->action == BTRFS_DROP_DELAYED_REF) {
593 0 : ASSERT(!list_empty(&exist->add_list));
594 0 : list_del(&exist->add_list);
595 : } else {
596 : ASSERT(0);
597 : }
598 : } else
599 0 : mod = -ref->ref_mod;
600 : }
601 0 : exist->ref_mod += mod;
602 :
603 : /* remove existing tail if its ref_mod is zero */
604 0 : if (exist->ref_mod == 0)
605 0 : drop_delayed_ref(root, href, exist);
606 0 : spin_unlock(&href->lock);
607 0 : return true;
608 : }
609 :
610 : /*
611 : * helper function to update the accounting in the head ref
612 : * existing and update must have the same bytenr
613 : */
614 0 : static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans,
615 : struct btrfs_delayed_ref_head *existing,
616 : struct btrfs_delayed_ref_head *update)
617 : {
618 0 : struct btrfs_delayed_ref_root *delayed_refs =
619 0 : &trans->transaction->delayed_refs;
620 0 : struct btrfs_fs_info *fs_info = trans->fs_info;
621 0 : int old_ref_mod;
622 :
623 0 : BUG_ON(existing->is_data != update->is_data);
624 :
625 0 : spin_lock(&existing->lock);
626 0 : if (update->must_insert_reserved) {
627 : /* if the extent was freed and then
628 : * reallocated before the delayed ref
629 : * entries were processed, we can end up
630 : * with an existing head ref without
631 : * the must_insert_reserved flag set.
632 : * Set it again here
633 : */
634 0 : existing->must_insert_reserved = update->must_insert_reserved;
635 :
636 : /*
637 : * update the num_bytes so we make sure the accounting
638 : * is done correctly
639 : */
640 0 : existing->num_bytes = update->num_bytes;
641 :
642 : }
643 :
644 0 : if (update->extent_op) {
645 0 : if (!existing->extent_op) {
646 0 : existing->extent_op = update->extent_op;
647 : } else {
648 0 : if (update->extent_op->update_key) {
649 0 : memcpy(&existing->extent_op->key,
650 : &update->extent_op->key,
651 : sizeof(update->extent_op->key));
652 0 : existing->extent_op->update_key = true;
653 : }
654 0 : if (update->extent_op->update_flags) {
655 0 : existing->extent_op->flags_to_set |=
656 0 : update->extent_op->flags_to_set;
657 0 : existing->extent_op->update_flags = true;
658 : }
659 0 : btrfs_free_delayed_extent_op(update->extent_op);
660 : }
661 : }
662 : /*
663 : * update the reference mod on the head to reflect this new operation,
664 : * only need the lock for this case cause we could be processing it
665 : * currently, for refs we just added we know we're a-ok.
666 : */
667 0 : old_ref_mod = existing->total_ref_mod;
668 0 : existing->ref_mod += update->ref_mod;
669 0 : existing->total_ref_mod += update->ref_mod;
670 :
671 : /*
672 : * If we are going to from a positive ref mod to a negative or vice
673 : * versa we need to make sure to adjust pending_csums accordingly.
674 : */
675 0 : if (existing->is_data) {
676 0 : u64 csum_leaves =
677 0 : btrfs_csum_bytes_to_leaves(fs_info,
678 : existing->num_bytes);
679 :
680 0 : if (existing->total_ref_mod >= 0 && old_ref_mod < 0) {
681 0 : delayed_refs->pending_csums -= existing->num_bytes;
682 0 : btrfs_delayed_refs_rsv_release(fs_info, csum_leaves);
683 : }
684 0 : if (existing->total_ref_mod < 0 && old_ref_mod >= 0) {
685 0 : delayed_refs->pending_csums += existing->num_bytes;
686 0 : trans->delayed_ref_updates += csum_leaves;
687 : }
688 : }
689 :
690 0 : spin_unlock(&existing->lock);
691 0 : }
692 :
693 0 : static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
694 : struct btrfs_qgroup_extent_record *qrecord,
695 : u64 bytenr, u64 num_bytes, u64 ref_root,
696 : u64 reserved, int action, bool is_data,
697 : bool is_system)
698 : {
699 0 : int count_mod = 1;
700 0 : bool must_insert_reserved = false;
701 :
702 : /* If reserved is provided, it must be a data extent. */
703 0 : BUG_ON(!is_data && reserved);
704 :
705 0 : switch (action) {
706 : case BTRFS_UPDATE_DELAYED_HEAD:
707 : count_mod = 0;
708 : break;
709 : case BTRFS_DROP_DELAYED_REF:
710 : /*
711 : * The head node stores the sum of all the mods, so dropping a ref
712 : * should drop the sum in the head node by one.
713 : */
714 : count_mod = -1;
715 : break;
716 : case BTRFS_ADD_DELAYED_EXTENT:
717 : /*
718 : * BTRFS_ADD_DELAYED_EXTENT means that we need to update the
719 : * reserved accounting when the extent is finally added, or if a
720 : * later modification deletes the delayed ref without ever
721 : * inserting the extent into the extent allocation tree.
722 : * ref->must_insert_reserved is the flag used to record that
723 : * accounting mods are required.
724 : *
725 : * Once we record must_insert_reserved, switch the action to
726 : * BTRFS_ADD_DELAYED_REF because other special casing is not
727 : * required.
728 : */
729 : must_insert_reserved = true;
730 : break;
731 : }
732 :
733 0 : refcount_set(&head_ref->refs, 1);
734 0 : head_ref->bytenr = bytenr;
735 0 : head_ref->num_bytes = num_bytes;
736 0 : head_ref->ref_mod = count_mod;
737 0 : head_ref->must_insert_reserved = must_insert_reserved;
738 0 : head_ref->is_data = is_data;
739 0 : head_ref->is_system = is_system;
740 0 : head_ref->ref_tree = RB_ROOT_CACHED;
741 0 : INIT_LIST_HEAD(&head_ref->ref_add_list);
742 0 : RB_CLEAR_NODE(&head_ref->href_node);
743 0 : head_ref->processing = false;
744 0 : head_ref->total_ref_mod = count_mod;
745 0 : spin_lock_init(&head_ref->lock);
746 0 : mutex_init(&head_ref->mutex);
747 :
748 0 : if (qrecord) {
749 0 : if (ref_root && reserved) {
750 0 : qrecord->data_rsv = reserved;
751 0 : qrecord->data_rsv_refroot = ref_root;
752 : }
753 0 : qrecord->bytenr = bytenr;
754 0 : qrecord->num_bytes = num_bytes;
755 0 : qrecord->old_roots = NULL;
756 : }
757 0 : }
758 :
759 : /*
760 : * helper function to actually insert a head node into the rbtree.
761 : * this does all the dirty work in terms of maintaining the correct
762 : * overall modification count.
763 : */
764 : static noinline struct btrfs_delayed_ref_head *
765 0 : add_delayed_ref_head(struct btrfs_trans_handle *trans,
766 : struct btrfs_delayed_ref_head *head_ref,
767 : struct btrfs_qgroup_extent_record *qrecord,
768 : int action, bool *qrecord_inserted_ret)
769 : {
770 0 : struct btrfs_delayed_ref_head *existing;
771 0 : struct btrfs_delayed_ref_root *delayed_refs;
772 0 : bool qrecord_inserted = false;
773 :
774 0 : delayed_refs = &trans->transaction->delayed_refs;
775 :
776 : /* Record qgroup extent info if provided */
777 0 : if (qrecord) {
778 0 : if (btrfs_qgroup_trace_extent_nolock(trans->fs_info,
779 : delayed_refs, qrecord))
780 0 : kfree(qrecord);
781 : else
782 : qrecord_inserted = true;
783 : }
784 :
785 0 : trace_add_delayed_ref_head(trans->fs_info, head_ref, action);
786 :
787 0 : existing = htree_insert(&delayed_refs->href_root,
788 : &head_ref->href_node);
789 0 : if (existing) {
790 0 : update_existing_head_ref(trans, existing, head_ref);
791 : /*
792 : * we've updated the existing ref, free the newly
793 : * allocated ref
794 : */
795 0 : kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
796 0 : head_ref = existing;
797 : } else {
798 0 : if (head_ref->is_data && head_ref->ref_mod < 0) {
799 0 : delayed_refs->pending_csums += head_ref->num_bytes;
800 0 : trans->delayed_ref_updates +=
801 0 : btrfs_csum_bytes_to_leaves(trans->fs_info,
802 : head_ref->num_bytes);
803 : }
804 0 : delayed_refs->num_heads++;
805 0 : delayed_refs->num_heads_ready++;
806 0 : atomic_inc(&delayed_refs->num_entries);
807 0 : trans->delayed_ref_updates++;
808 : }
809 0 : if (qrecord_inserted_ret)
810 0 : *qrecord_inserted_ret = qrecord_inserted;
811 :
812 0 : return head_ref;
813 : }
814 :
815 : /*
816 : * init_delayed_ref_common - Initialize the structure which represents a
817 : * modification to a an extent.
818 : *
819 : * @fs_info: Internal to the mounted filesystem mount structure.
820 : *
821 : * @ref: The structure which is going to be initialized.
822 : *
823 : * @bytenr: The logical address of the extent for which a modification is
824 : * going to be recorded.
825 : *
826 : * @num_bytes: Size of the extent whose modification is being recorded.
827 : *
828 : * @ref_root: The id of the root where this modification has originated, this
829 : * can be either one of the well-known metadata trees or the
830 : * subvolume id which references this extent.
831 : *
832 : * @action: Can be one of BTRFS_ADD_DELAYED_REF/BTRFS_DROP_DELAYED_REF or
833 : * BTRFS_ADD_DELAYED_EXTENT
834 : *
835 : * @ref_type: Holds the type of the extent which is being recorded, can be
836 : * one of BTRFS_SHARED_BLOCK_REF_KEY/BTRFS_TREE_BLOCK_REF_KEY
837 : * when recording a metadata extent or BTRFS_SHARED_DATA_REF_KEY/
838 : * BTRFS_EXTENT_DATA_REF_KEY when recording data extent
839 : */
840 0 : static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
841 : struct btrfs_delayed_ref_node *ref,
842 : u64 bytenr, u64 num_bytes, u64 ref_root,
843 : int action, u8 ref_type)
844 : {
845 0 : u64 seq = 0;
846 :
847 0 : if (action == BTRFS_ADD_DELAYED_EXTENT)
848 0 : action = BTRFS_ADD_DELAYED_REF;
849 :
850 0 : if (is_fstree(ref_root))
851 0 : seq = atomic64_read(&fs_info->tree_mod_seq);
852 :
853 0 : refcount_set(&ref->refs, 1);
854 0 : ref->bytenr = bytenr;
855 0 : ref->num_bytes = num_bytes;
856 0 : ref->ref_mod = 1;
857 0 : ref->action = action;
858 0 : ref->seq = seq;
859 0 : ref->type = ref_type;
860 0 : RB_CLEAR_NODE(&ref->ref_node);
861 0 : INIT_LIST_HEAD(&ref->add_list);
862 0 : }
863 :
864 : /*
865 : * add a delayed tree ref. This does all of the accounting required
866 : * to make sure the delayed ref is eventually processed before this
867 : * transaction commits.
868 : */
869 0 : int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
870 : struct btrfs_ref *generic_ref,
871 : struct btrfs_delayed_extent_op *extent_op)
872 : {
873 0 : struct btrfs_fs_info *fs_info = trans->fs_info;
874 0 : struct btrfs_delayed_tree_ref *ref;
875 0 : struct btrfs_delayed_ref_head *head_ref;
876 0 : struct btrfs_delayed_ref_root *delayed_refs;
877 0 : struct btrfs_qgroup_extent_record *record = NULL;
878 0 : bool qrecord_inserted;
879 0 : bool is_system;
880 0 : bool merged;
881 0 : int action = generic_ref->action;
882 0 : int level = generic_ref->tree_ref.level;
883 0 : u64 bytenr = generic_ref->bytenr;
884 0 : u64 num_bytes = generic_ref->len;
885 0 : u64 parent = generic_ref->parent;
886 0 : u8 ref_type;
887 :
888 0 : is_system = (generic_ref->tree_ref.owning_root == BTRFS_CHUNK_TREE_OBJECTID);
889 :
890 0 : ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action);
891 0 : ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
892 0 : if (!ref)
893 : return -ENOMEM;
894 :
895 0 : head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
896 0 : if (!head_ref) {
897 0 : kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
898 0 : return -ENOMEM;
899 : }
900 :
901 0 : if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
902 0 : !generic_ref->skip_qgroup) {
903 0 : record = kzalloc(sizeof(*record), GFP_NOFS);
904 0 : if (!record) {
905 0 : kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
906 0 : kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
907 0 : return -ENOMEM;
908 : }
909 : }
910 :
911 0 : if (parent)
912 : ref_type = BTRFS_SHARED_BLOCK_REF_KEY;
913 : else
914 0 : ref_type = BTRFS_TREE_BLOCK_REF_KEY;
915 :
916 0 : init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
917 : generic_ref->tree_ref.owning_root, action,
918 : ref_type);
919 0 : ref->root = generic_ref->tree_ref.owning_root;
920 0 : ref->parent = parent;
921 0 : ref->level = level;
922 :
923 0 : init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
924 : generic_ref->tree_ref.owning_root, 0, action,
925 : false, is_system);
926 0 : head_ref->extent_op = extent_op;
927 :
928 0 : delayed_refs = &trans->transaction->delayed_refs;
929 0 : spin_lock(&delayed_refs->lock);
930 :
931 : /*
932 : * insert both the head node and the new ref without dropping
933 : * the spin lock
934 : */
935 0 : head_ref = add_delayed_ref_head(trans, head_ref, record,
936 : action, &qrecord_inserted);
937 :
938 0 : merged = insert_delayed_ref(delayed_refs, head_ref, &ref->node);
939 0 : spin_unlock(&delayed_refs->lock);
940 :
941 : /*
942 : * Need to update the delayed_refs_rsv with any changes we may have
943 : * made.
944 : */
945 0 : btrfs_update_delayed_refs_rsv(trans);
946 :
947 0 : trace_add_delayed_tree_ref(fs_info, &ref->node, ref,
948 : action == BTRFS_ADD_DELAYED_EXTENT ?
949 : BTRFS_ADD_DELAYED_REF : action);
950 0 : if (merged)
951 0 : kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
952 :
953 0 : if (qrecord_inserted)
954 0 : btrfs_qgroup_trace_extent_post(trans, record);
955 :
956 : return 0;
957 : }
958 :
959 : /*
960 : * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
961 : */
962 0 : int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
963 : struct btrfs_ref *generic_ref,
964 : u64 reserved)
965 : {
966 0 : struct btrfs_fs_info *fs_info = trans->fs_info;
967 0 : struct btrfs_delayed_data_ref *ref;
968 0 : struct btrfs_delayed_ref_head *head_ref;
969 0 : struct btrfs_delayed_ref_root *delayed_refs;
970 0 : struct btrfs_qgroup_extent_record *record = NULL;
971 0 : bool qrecord_inserted;
972 0 : int action = generic_ref->action;
973 0 : bool merged;
974 0 : u64 bytenr = generic_ref->bytenr;
975 0 : u64 num_bytes = generic_ref->len;
976 0 : u64 parent = generic_ref->parent;
977 0 : u64 ref_root = generic_ref->data_ref.owning_root;
978 0 : u64 owner = generic_ref->data_ref.ino;
979 0 : u64 offset = generic_ref->data_ref.offset;
980 0 : u8 ref_type;
981 :
982 0 : ASSERT(generic_ref->type == BTRFS_REF_DATA && action);
983 0 : ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
984 0 : if (!ref)
985 : return -ENOMEM;
986 :
987 0 : if (parent)
988 : ref_type = BTRFS_SHARED_DATA_REF_KEY;
989 : else
990 0 : ref_type = BTRFS_EXTENT_DATA_REF_KEY;
991 0 : init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
992 : ref_root, action, ref_type);
993 0 : ref->root = ref_root;
994 0 : ref->parent = parent;
995 0 : ref->objectid = owner;
996 0 : ref->offset = offset;
997 :
998 :
999 0 : head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
1000 0 : if (!head_ref) {
1001 0 : kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
1002 0 : return -ENOMEM;
1003 : }
1004 :
1005 0 : if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
1006 0 : !generic_ref->skip_qgroup) {
1007 0 : record = kzalloc(sizeof(*record), GFP_NOFS);
1008 0 : if (!record) {
1009 0 : kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
1010 0 : kmem_cache_free(btrfs_delayed_ref_head_cachep,
1011 : head_ref);
1012 0 : return -ENOMEM;
1013 : }
1014 : }
1015 :
1016 0 : init_delayed_ref_head(head_ref, record, bytenr, num_bytes, ref_root,
1017 : reserved, action, true, false);
1018 0 : head_ref->extent_op = NULL;
1019 :
1020 0 : delayed_refs = &trans->transaction->delayed_refs;
1021 0 : spin_lock(&delayed_refs->lock);
1022 :
1023 : /*
1024 : * insert both the head node and the new ref without dropping
1025 : * the spin lock
1026 : */
1027 0 : head_ref = add_delayed_ref_head(trans, head_ref, record,
1028 : action, &qrecord_inserted);
1029 :
1030 0 : merged = insert_delayed_ref(delayed_refs, head_ref, &ref->node);
1031 0 : spin_unlock(&delayed_refs->lock);
1032 :
1033 : /*
1034 : * Need to update the delayed_refs_rsv with any changes we may have
1035 : * made.
1036 : */
1037 0 : btrfs_update_delayed_refs_rsv(trans);
1038 :
1039 0 : trace_add_delayed_data_ref(trans->fs_info, &ref->node, ref,
1040 : action == BTRFS_ADD_DELAYED_EXTENT ?
1041 : BTRFS_ADD_DELAYED_REF : action);
1042 0 : if (merged)
1043 0 : kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
1044 :
1045 :
1046 0 : if (qrecord_inserted)
1047 0 : return btrfs_qgroup_trace_extent_post(trans, record);
1048 : return 0;
1049 : }
1050 :
1051 0 : int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
1052 : u64 bytenr, u64 num_bytes,
1053 : struct btrfs_delayed_extent_op *extent_op)
1054 : {
1055 0 : struct btrfs_delayed_ref_head *head_ref;
1056 0 : struct btrfs_delayed_ref_root *delayed_refs;
1057 :
1058 0 : head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
1059 0 : if (!head_ref)
1060 : return -ENOMEM;
1061 :
1062 0 : init_delayed_ref_head(head_ref, NULL, bytenr, num_bytes, 0, 0,
1063 : BTRFS_UPDATE_DELAYED_HEAD, false, false);
1064 0 : head_ref->extent_op = extent_op;
1065 :
1066 0 : delayed_refs = &trans->transaction->delayed_refs;
1067 0 : spin_lock(&delayed_refs->lock);
1068 :
1069 0 : add_delayed_ref_head(trans, head_ref, NULL, BTRFS_UPDATE_DELAYED_HEAD,
1070 : NULL);
1071 :
1072 0 : spin_unlock(&delayed_refs->lock);
1073 :
1074 : /*
1075 : * Need to update the delayed_refs_rsv with any changes we may have
1076 : * made.
1077 : */
1078 0 : btrfs_update_delayed_refs_rsv(trans);
1079 0 : return 0;
1080 : }
1081 :
1082 : /*
1083 : * This does a simple search for the head node for a given extent. Returns the
1084 : * head node if found, or NULL if not.
1085 : */
1086 : struct btrfs_delayed_ref_head *
1087 0 : btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
1088 : {
1089 0 : lockdep_assert_held(&delayed_refs->lock);
1090 :
1091 0 : return find_ref_head(delayed_refs, bytenr, false);
1092 : }
1093 :
1094 0 : void __cold btrfs_delayed_ref_exit(void)
1095 : {
1096 0 : kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
1097 0 : kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
1098 0 : kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
1099 0 : kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
1100 0 : }
1101 :
1102 2 : int __init btrfs_delayed_ref_init(void)
1103 : {
1104 2 : btrfs_delayed_ref_head_cachep = kmem_cache_create(
1105 : "btrfs_delayed_ref_head",
1106 : sizeof(struct btrfs_delayed_ref_head), 0,
1107 : SLAB_MEM_SPREAD, NULL);
1108 2 : if (!btrfs_delayed_ref_head_cachep)
1109 0 : goto fail;
1110 :
1111 2 : btrfs_delayed_tree_ref_cachep = kmem_cache_create(
1112 : "btrfs_delayed_tree_ref",
1113 : sizeof(struct btrfs_delayed_tree_ref), 0,
1114 : SLAB_MEM_SPREAD, NULL);
1115 2 : if (!btrfs_delayed_tree_ref_cachep)
1116 0 : goto fail;
1117 :
1118 2 : btrfs_delayed_data_ref_cachep = kmem_cache_create(
1119 : "btrfs_delayed_data_ref",
1120 : sizeof(struct btrfs_delayed_data_ref), 0,
1121 : SLAB_MEM_SPREAD, NULL);
1122 2 : if (!btrfs_delayed_data_ref_cachep)
1123 0 : goto fail;
1124 :
1125 2 : btrfs_delayed_extent_op_cachep = kmem_cache_create(
1126 : "btrfs_delayed_extent_op",
1127 : sizeof(struct btrfs_delayed_extent_op), 0,
1128 : SLAB_MEM_SPREAD, NULL);
1129 2 : if (!btrfs_delayed_extent_op_cachep)
1130 0 : goto fail;
1131 :
1132 : return 0;
1133 0 : fail:
1134 0 : btrfs_delayed_ref_exit();
1135 0 : return -ENOMEM;
1136 : }
|