Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 :
3 : #include <linux/slab.h>
4 : #include <trace/events/btrfs.h>
5 : #include "messages.h"
6 : #include "ctree.h"
7 : #include "extent-io-tree.h"
8 : #include "btrfs_inode.h"
9 : #include "misc.h"
10 :
11 : static struct kmem_cache *extent_state_cache;
12 :
13 : static inline bool extent_state_in_tree(const struct extent_state *state)
14 : {
15 686090775 : return !RB_EMPTY_NODE(&state->rb_node);
16 : }
17 :
18 : #ifdef CONFIG_BTRFS_DEBUG
19 : static LIST_HEAD(states);
20 : static DEFINE_SPINLOCK(leak_lock);
21 :
22 : static inline void btrfs_leak_debug_add_state(struct extent_state *state)
23 : {
24 : unsigned long flags;
25 :
26 : spin_lock_irqsave(&leak_lock, flags);
27 : list_add(&state->leak_list, &states);
28 : spin_unlock_irqrestore(&leak_lock, flags);
29 : }
30 :
31 : static inline void btrfs_leak_debug_del_state(struct extent_state *state)
32 : {
33 : unsigned long flags;
34 :
35 : spin_lock_irqsave(&leak_lock, flags);
36 : list_del(&state->leak_list);
37 : spin_unlock_irqrestore(&leak_lock, flags);
38 : }
39 :
40 : static inline void btrfs_extent_state_leak_debug_check(void)
41 : {
42 : struct extent_state *state;
43 :
44 : while (!list_empty(&states)) {
45 : state = list_entry(states.next, struct extent_state, leak_list);
46 : pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
47 : state->start, state->end, state->state,
48 : extent_state_in_tree(state),
49 : refcount_read(&state->refs));
50 : list_del(&state->leak_list);
51 : kmem_cache_free(extent_state_cache, state);
52 : }
53 : }
54 :
55 : #define btrfs_debug_check_extent_io_range(tree, start, end) \
56 : __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
57 : static inline void __btrfs_debug_check_extent_io_range(const char *caller,
58 : struct extent_io_tree *tree,
59 : u64 start, u64 end)
60 : {
61 : struct btrfs_inode *inode = tree->inode;
62 : u64 isize;
63 :
64 : if (!inode)
65 : return;
66 :
67 : isize = i_size_read(&inode->vfs_inode);
68 : if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
69 : btrfs_debug_rl(inode->root->fs_info,
70 : "%s: ino %llu isize %llu odd range [%llu,%llu]",
71 : caller, btrfs_ino(inode), isize, start, end);
72 : }
73 : }
74 : #else
75 : #define btrfs_leak_debug_add_state(state) do {} while (0)
76 : #define btrfs_leak_debug_del_state(state) do {} while (0)
77 : #define btrfs_extent_state_leak_debug_check() do {} while (0)
78 : #define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
79 : #endif
80 :
81 : /*
82 : * For the file_extent_tree, we want to hold the inode lock when we lookup and
83 : * update the disk_i_size, but lockdep will complain because our io_tree we hold
84 : * the tree lock and get the inode lock when setting delalloc. These two things
85 : * are unrelated, so make a class for the file_extent_tree so we don't get the
86 : * two locking patterns mixed up.
87 : */
88 : static struct lock_class_key file_extent_tree_class;
89 :
90 : struct tree_entry {
91 : u64 start;
92 : u64 end;
93 : struct rb_node rb_node;
94 : };
95 :
96 8310657 : void extent_io_tree_init(struct btrfs_fs_info *fs_info,
97 : struct extent_io_tree *tree, unsigned int owner)
98 : {
99 8310657 : tree->fs_info = fs_info;
100 8310657 : tree->state = RB_ROOT;
101 8310657 : spin_lock_init(&tree->lock);
102 8304884 : tree->inode = NULL;
103 8304884 : tree->owner = owner;
104 8304884 : if (owner == IO_TREE_INODE_FILE_EXTENT)
105 8304884 : lockdep_set_class(&tree->lock, &file_extent_tree_class);
106 8304884 : }
107 :
108 1240360 : void extent_io_tree_release(struct extent_io_tree *tree)
109 : {
110 1240360 : spin_lock(&tree->lock);
111 : /*
112 : * Do a single barrier for the waitqueue_active check here, the state
113 : * of the waitqueue should not change once extent_io_tree_release is
114 : * called.
115 : */
116 1240360 : smp_mb();
117 1240389 : while (!RB_EMPTY_ROOT(&tree->state)) {
118 29 : struct rb_node *node;
119 29 : struct extent_state *state;
120 :
121 29 : node = rb_first(&tree->state);
122 29 : state = rb_entry(node, struct extent_state, rb_node);
123 29 : rb_erase(&state->rb_node, &tree->state);
124 29 : RB_CLEAR_NODE(&state->rb_node);
125 : /*
126 : * btree io trees aren't supposed to have tasks waiting for
127 : * changes in the flags of extent states ever.
128 : */
129 58 : ASSERT(!waitqueue_active(&state->wq));
130 29 : free_extent_state(state);
131 :
132 29 : cond_resched_lock(&tree->lock);
133 : }
134 1240360 : spin_unlock(&tree->lock);
135 1240360 : }
136 :
137 422588681 : static struct extent_state *alloc_extent_state(gfp_t mask)
138 : {
139 422588681 : struct extent_state *state;
140 :
141 : /*
142 : * The given mask might be not appropriate for the slab allocator,
143 : * drop the unsupported bits
144 : */
145 422588681 : mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
146 422588681 : state = kmem_cache_alloc(extent_state_cache, mask);
147 422672657 : if (!state)
148 : return state;
149 422672657 : state->state = 0;
150 422672657 : RB_CLEAR_NODE(&state->rb_node);
151 422672657 : btrfs_leak_debug_add_state(state);
152 422672657 : refcount_set(&state->refs, 1);
153 422672657 : init_waitqueue_head(&state->wq);
154 422565062 : trace_alloc_extent_state(state, mask, _RET_IP_);
155 422592345 : return state;
156 : }
157 :
158 : static struct extent_state *alloc_extent_state_atomic(struct extent_state *prealloc)
159 : {
160 160823912 : if (!prealloc)
161 3504 : prealloc = alloc_extent_state(GFP_ATOMIC);
162 :
163 160823912 : return prealloc;
164 : }
165 :
166 505008330 : void free_extent_state(struct extent_state *state)
167 : {
168 505008330 : if (!state)
169 : return;
170 443623386 : if (refcount_dec_and_test(&state->refs)) {
171 423236298 : WARN_ON(extent_state_in_tree(state));
172 423236298 : btrfs_leak_debug_del_state(state);
173 423236298 : trace_free_extent_state(state, _RET_IP_);
174 423122274 : kmem_cache_free(extent_state_cache, state);
175 : }
176 : }
177 :
178 333715427 : static int add_extent_changeset(struct extent_state *state, u32 bits,
179 : struct extent_changeset *changeset,
180 : int set)
181 : {
182 333715427 : int ret;
183 :
184 333715427 : if (!changeset)
185 : return 0;
186 2584413 : if (set && (state->state & bits) == bits)
187 : return 0;
188 2584269 : if (!set && (state->state & bits) == 0)
189 : return 0;
190 2584269 : changeset->bytes_changed += state->end - state->start + 1;
191 2584269 : ret = ulist_add(&changeset->range_changed, state->start, state->end,
192 : GFP_ATOMIC);
193 2584269 : return ret;
194 : }
195 :
196 : static inline struct extent_state *next_state(struct extent_state *state)
197 : {
198 396400619 : struct rb_node *next = rb_next(&state->rb_node);
199 :
200 396415409 : if (next)
201 92355263 : return rb_entry(next, struct extent_state, rb_node);
202 : else
203 : return NULL;
204 : }
205 :
206 : static inline struct extent_state *prev_state(struct extent_state *state)
207 : {
208 71334785 : struct rb_node *next = rb_prev(&state->rb_node);
209 :
210 71288582 : if (next)
211 1 : return rb_entry(next, struct extent_state, rb_node);
212 : else
213 : return NULL;
214 : }
215 :
216 : /*
217 : * Search @tree for an entry that contains @offset. Such entry would have
218 : * entry->start <= offset && entry->end >= offset.
219 : *
220 : * @tree: the tree to search
221 : * @offset: offset that should fall within an entry in @tree
222 : * @node_ret: pointer where new node should be anchored (used when inserting an
223 : * entry in the tree)
224 : * @parent_ret: points to entry which would have been the parent of the entry,
225 : * containing @offset
226 : *
227 : * Return a pointer to the entry that contains @offset byte address and don't change
228 : * @node_ret and @parent_ret.
229 : *
230 : * If no such entry exists, return pointer to entry that ends before @offset
231 : * and fill parameters @node_ret and @parent_ret, ie. does not return NULL.
232 : */
233 393000869 : static inline struct extent_state *tree_search_for_insert(struct extent_io_tree *tree,
234 : u64 offset,
235 : struct rb_node ***node_ret,
236 : struct rb_node **parent_ret)
237 : {
238 393000869 : struct rb_root *root = &tree->state;
239 393000869 : struct rb_node **node = &root->rb_node;
240 393000869 : struct rb_node *prev = NULL;
241 393000869 : struct extent_state *entry = NULL;
242 :
243 805291376 : while (*node) {
244 532255671 : prev = *node;
245 532255671 : entry = rb_entry(prev, struct extent_state, rb_node);
246 :
247 532255671 : if (offset < entry->start)
248 132452712 : node = &(*node)->rb_left;
249 399802959 : else if (offset > entry->end)
250 279837795 : node = &(*node)->rb_right;
251 : else
252 119965164 : return entry;
253 : }
254 :
255 273035705 : if (node_ret)
256 111388474 : *node_ret = node;
257 273035705 : if (parent_ret)
258 111393337 : *parent_ret = prev;
259 :
260 : /* Search neighbors until we find the first one past the end */
261 286255679 : while (entry && offset > entry->end)
262 89673043 : entry = next_state(entry);
263 :
264 : return entry;
265 : }
266 :
267 : /*
268 : * Search offset in the tree or fill neighbor rbtree node pointers.
269 : *
270 : * @tree: the tree to search
271 : * @offset: offset that should fall within an entry in @tree
272 : * @next_ret: pointer to the first entry whose range ends after @offset
273 : * @prev_ret: pointer to the first entry whose range begins before @offset
274 : *
275 : * Return a pointer to the entry that contains @offset byte address. If no
276 : * such entry exists, then return NULL and fill @prev_ret and @next_ret.
277 : * Otherwise return the found entry and other pointers are left untouched.
278 : */
279 3376 : static struct extent_state *tree_search_prev_next(struct extent_io_tree *tree,
280 : u64 offset,
281 : struct extent_state **prev_ret,
282 : struct extent_state **next_ret)
283 : {
284 3376 : struct rb_root *root = &tree->state;
285 3376 : struct rb_node **node = &root->rb_node;
286 3376 : struct extent_state *orig_prev;
287 3376 : struct extent_state *entry = NULL;
288 :
289 3376 : ASSERT(prev_ret);
290 3376 : ASSERT(next_ret);
291 :
292 6760 : while (*node) {
293 5887 : entry = rb_entry(*node, struct extent_state, rb_node);
294 :
295 5887 : if (offset < entry->start)
296 846 : node = &(*node)->rb_left;
297 5041 : else if (offset > entry->end)
298 2538 : node = &(*node)->rb_right;
299 : else
300 2503 : return entry;
301 : }
302 :
303 : orig_prev = entry;
304 875 : while (entry && offset > entry->end)
305 860 : entry = next_state(entry);
306 873 : *next_ret = entry;
307 873 : entry = orig_prev;
308 :
309 874 : while (entry && offset < entry->start)
310 13 : entry = prev_state(entry);
311 873 : *prev_ret = entry;
312 :
313 873 : return NULL;
314 : }
315 :
316 : /*
317 : * Inexact rb-tree search, return the next entry if @offset is not found
318 : */
319 : static inline struct extent_state *tree_search(struct extent_io_tree *tree, u64 offset)
320 : {
321 253675057 : return tree_search_for_insert(tree, offset, NULL, NULL);
322 : }
323 :
324 0 : static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
325 : {
326 0 : btrfs_panic(tree->fs_info, err,
327 : "locking error: extent tree was modified by another thread while locked");
328 : }
329 :
330 : /*
331 : * Utility function to look for merge candidates inside a given range. Any
332 : * extents with matching state are merged together into a single extent in the
333 : * tree. Extents with EXTENT_IO in their state field are not merged because
334 : * the end_io handlers need to be able to do operations on them without
335 : * sleeping (or doing allocations/splits).
336 : *
337 : * This should be called with the tree lock held.
338 : */
339 232055156 : static void merge_state(struct extent_io_tree *tree, struct extent_state *state)
340 : {
341 232055156 : struct extent_state *other;
342 :
343 232055156 : if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY))
344 : return;
345 :
346 71333945 : other = prev_state(state);
347 64037914 : if (other && other->end == state->start - 1 &&
348 55158594 : other->state == state->state) {
349 52960473 : if (tree->inode)
350 36446965 : btrfs_merge_delalloc_extent(tree->inode, state, other);
351 53082063 : state->start = other->start;
352 53082063 : rb_erase(&other->rb_node, &tree->state);
353 53058919 : RB_CLEAR_NODE(&other->rb_node);
354 53058919 : free_extent_state(other);
355 : }
356 71430223 : other = next_state(state);
357 21137583 : if (other && other->start == state->end + 1 &&
358 9806939 : other->state == state->state) {
359 8426355 : if (tree->inode)
360 7804455 : btrfs_merge_delalloc_extent(tree->inode, state, other);
361 8426365 : state->end = other->end;
362 8426365 : rb_erase(&other->rb_node, &tree->state);
363 8426327 : RB_CLEAR_NODE(&other->rb_node);
364 8426327 : free_extent_state(other);
365 : }
366 : }
367 :
368 184531164 : static void set_state_bits(struct extent_io_tree *tree,
369 : struct extent_state *state,
370 : u32 bits, struct extent_changeset *changeset)
371 : {
372 184531164 : u32 bits_to_set = bits & ~EXTENT_CTLBITS;
373 184531164 : int ret;
374 :
375 184531164 : if (tree->inode)
376 157276250 : btrfs_set_delalloc_extent(tree->inode, state, bits);
377 :
378 184586888 : ret = add_extent_changeset(state, bits_to_set, changeset, 1);
379 184589719 : BUG_ON(ret < 0);
380 184589719 : state->state |= bits_to_set;
381 184589719 : }
382 :
383 : /*
384 : * Insert an extent_state struct into the tree. 'bits' are set on the
385 : * struct before it is inserted.
386 : *
387 : * This may return -EEXIST if the extent is already there, in which case the
388 : * state struct is freed.
389 : *
390 : * The tree lock is not taken internally. This is a utility function and
391 : * probably isn't what you want to call (see set/clear_extent_bit).
392 : */
393 18443727 : static int insert_state(struct extent_io_tree *tree,
394 : struct extent_state *state,
395 : u32 bits, struct extent_changeset *changeset)
396 : {
397 18443727 : struct rb_node **node;
398 18443727 : struct rb_node *parent = NULL;
399 18443727 : const u64 end = state->end;
400 :
401 18443727 : set_state_bits(tree, state, bits, changeset);
402 :
403 18443730 : node = &tree->state.rb_node;
404 109037365 : while (*node) {
405 90593635 : struct extent_state *entry;
406 :
407 90593635 : parent = *node;
408 90593635 : entry = rb_entry(parent, struct extent_state, rb_node);
409 :
410 90593635 : if (end < entry->start) {
411 36801001 : node = &(*node)->rb_left;
412 53792634 : } else if (end > entry->end) {
413 53792634 : node = &(*node)->rb_right;
414 : } else {
415 0 : btrfs_err(tree->fs_info,
416 : "found node %llu %llu on insert of %llu %llu",
417 : entry->start, entry->end, state->start, end);
418 0 : return -EEXIST;
419 : }
420 : }
421 :
422 18443730 : rb_link_node(&state->rb_node, parent, node);
423 18443730 : rb_insert_color(&state->rb_node, &tree->state);
424 :
425 18443735 : merge_state(tree, state);
426 18443735 : return 0;
427 : }
428 :
429 : /*
430 : * Insert state to @tree to the location given by @node and @parent.
431 : */
432 92870396 : static void insert_state_fast(struct extent_io_tree *tree,
433 : struct extent_state *state, struct rb_node **node,
434 : struct rb_node *parent, unsigned bits,
435 : struct extent_changeset *changeset)
436 : {
437 92870396 : set_state_bits(tree, state, bits, changeset);
438 92917441 : rb_link_node(&state->rb_node, parent, node);
439 92917441 : rb_insert_color(&state->rb_node, &tree->state);
440 92924159 : merge_state(tree, state);
441 92915079 : }
442 :
443 : /*
444 : * Split a given extent state struct in two, inserting the preallocated
445 : * struct 'prealloc' as the newly created second half. 'split' indicates an
446 : * offset inside 'orig' where it should be split.
447 : *
448 : * Before calling,
449 : * the tree has 'orig' at [orig->start, orig->end]. After calling, there
450 : * are two extent state structs in the tree:
451 : * prealloc: [orig->start, split - 1]
452 : * orig: [ split, orig->end ]
453 : *
454 : * The tree locks are not taken by this function. They need to be held
455 : * by the caller.
456 : */
457 49497388 : static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
458 : struct extent_state *prealloc, u64 split)
459 : {
460 49497388 : struct rb_node *parent = NULL;
461 49497388 : struct rb_node **node;
462 :
463 49497388 : if (tree->inode)
464 49432179 : btrfs_split_delalloc_extent(tree->inode, orig, split);
465 :
466 49578448 : prealloc->start = orig->start;
467 49578448 : prealloc->end = split - 1;
468 49578448 : prealloc->state = orig->state;
469 49578448 : orig->start = split;
470 :
471 49578448 : parent = &orig->rb_node;
472 49578448 : node = &parent;
473 114188570 : while (*node) {
474 64610122 : struct extent_state *entry;
475 :
476 64610122 : parent = *node;
477 64610122 : entry = rb_entry(parent, struct extent_state, rb_node);
478 :
479 64610122 : if (prealloc->end < entry->start) {
480 49577193 : node = &(*node)->rb_left;
481 15032929 : } else if (prealloc->end > entry->end) {
482 15032929 : node = &(*node)->rb_right;
483 : } else {
484 0 : free_extent_state(prealloc);
485 0 : return -EEXIST;
486 : }
487 : }
488 :
489 49578448 : rb_link_node(&prealloc->rb_node, parent, node);
490 49578448 : rb_insert_color(&prealloc->rb_node, &tree->state);
491 :
492 49578448 : return 0;
493 : }
494 :
495 : /*
496 : * Utility function to clear some bits in an extent state struct. It will
497 : * optionally wake up anyone waiting on this state (wake == 1).
498 : *
499 : * If no bits are set on the state struct after clearing things, the
500 : * struct is freed and removed from the tree
501 : */
502 149287676 : static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
503 : struct extent_state *state,
504 : u32 bits, int wake,
505 : struct extent_changeset *changeset)
506 : {
507 149287676 : struct extent_state *next;
508 149287676 : u32 bits_to_clear = bits & ~EXTENT_CTLBITS;
509 149287676 : int ret;
510 :
511 149287676 : if (tree->inode)
512 139137172 : btrfs_clear_delalloc_extent(tree->inode, state, bits);
513 :
514 149209635 : ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
515 149237768 : BUG_ON(ret < 0);
516 149237768 : state->state &= ~bits_to_clear;
517 149237768 : if (wake)
518 123122602 : wake_up(&state->wq);
519 149313095 : if (state->state == 0) {
520 99467674 : next = next_state(state);
521 99481654 : if (extent_state_in_tree(state)) {
522 99481654 : rb_erase(&state->rb_node, &tree->state);
523 99440452 : RB_CLEAR_NODE(&state->rb_node);
524 99440452 : free_extent_state(state);
525 : } else {
526 0 : WARN_ON(1);
527 : }
528 : } else {
529 49845421 : merge_state(tree, state);
530 49897458 : next = next_state(state);
531 : }
532 149408657 : return next;
533 : }
534 :
535 : /*
536 : * Detect if extent bits request NOWAIT semantics and set the gfp mask accordingly,
537 : * unset the EXTENT_NOWAIT bit.
538 : */
539 : static void set_gfp_mask_from_bits(u32 *bits, gfp_t *mask)
540 : {
541 413419138 : *mask = (*bits & EXTENT_NOWAIT ? GFP_NOWAIT : GFP_NOFS);
542 413419138 : *bits &= EXTENT_NOWAIT - 1;
543 : }
544 :
545 : /*
546 : * Clear some bits on a range in the tree. This may require splitting or
547 : * inserting elements in the tree, so the gfp mask is used to indicate which
548 : * allocations or sleeping are allowed.
549 : *
550 : * Pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove the given
551 : * range from the tree regardless of state (ie for truncate).
552 : *
553 : * The range [start, end] is inclusive.
554 : *
555 : * This takes the tree lock, and returns 0 on success and < 0 on error.
556 : */
557 231826413 : int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
558 : u32 bits, struct extent_state **cached_state,
559 : struct extent_changeset *changeset)
560 : {
561 231826413 : struct extent_state *state;
562 231826413 : struct extent_state *cached;
563 231826413 : struct extent_state *prealloc = NULL;
564 231826413 : u64 last_end;
565 231826413 : int err;
566 231826413 : int clear = 0;
567 231826413 : int wake;
568 231826413 : int delete = (bits & EXTENT_CLEAR_ALL_BITS);
569 231826413 : gfp_t mask;
570 :
571 231826413 : set_gfp_mask_from_bits(&bits, &mask);
572 231826413 : btrfs_debug_check_extent_io_range(tree, start, end);
573 231826413 : trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits);
574 :
575 231705606 : if (delete)
576 32937135 : bits |= ~EXTENT_CTLBITS;
577 :
578 231705606 : if (bits & EXTENT_DELALLOC)
579 135056155 : bits |= EXTENT_NORESERVE;
580 :
581 231705606 : wake = (bits & EXTENT_LOCKED) ? 1 : 0;
582 231705606 : if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY))
583 177131171 : clear = 1;
584 231705606 : again:
585 231870587 : if (!prealloc) {
586 : /*
587 : * Don't care for allocation failure here because we might end
588 : * up not needing the pre-allocated extent state at all, which
589 : * is the case if we only have in the tree extent states that
590 : * cover our input range and don't cover too any other range.
591 : * If we end up needing a new extent state we allocate it later.
592 : */
593 231808252 : prealloc = alloc_extent_state(mask);
594 : }
595 :
596 231921316 : spin_lock(&tree->lock);
597 232132784 : if (cached_state) {
598 121803172 : cached = *cached_state;
599 :
600 121803172 : if (clear) {
601 74222478 : *cached_state = NULL;
602 74222478 : cached_state = NULL;
603 : }
604 :
605 121803172 : if (cached && extent_state_in_tree(cached) &&
606 107034615 : cached->start <= start && cached->end > start) {
607 106859331 : if (clear)
608 72335988 : refcount_dec(&cached->refs);
609 106909322 : state = cached;
610 106909322 : goto hit_next;
611 : }
612 14943841 : if (clear)
613 1871455 : free_extent_state(cached);
614 : }
615 :
616 : /* This search will find the extents that end after our range starts. */
617 125273424 : state = tree_search(tree, start);
618 125176680 : if (!state)
619 64837631 : goto out;
620 167248371 : hit_next:
621 169066924 : if (state->start > end)
622 7568526 : goto out;
623 161498398 : WARN_ON(state->end < start);
624 161498398 : last_end = state->end;
625 :
626 : /* The state doesn't have the wanted bits, go ahead. */
627 161498398 : if (!(state->state & bits)) {
628 14305189 : state = next_state(state);
629 14304710 : goto next;
630 : }
631 :
632 : /*
633 : * | ---- desired range ---- |
634 : * | state | or
635 : * | ------------- state -------------- |
636 : *
637 : * We need to split the extent we found, and may flip bits on second
638 : * half.
639 : *
640 : * If the extent we found extends past our range, we just split and
641 : * search again. It'll get split again the next time though.
642 : *
643 : * If the extent we found is inside our range, we clear the desired bit
644 : * on it.
645 : */
646 :
647 147193209 : if (state->start < start) {
648 522533 : prealloc = alloc_extent_state_atomic(prealloc);
649 522533 : if (!prealloc)
650 0 : goto search_again;
651 522533 : err = split_state(tree, state, prealloc, start);
652 522242 : if (err)
653 0 : extent_io_tree_panic(tree, err);
654 :
655 522242 : prealloc = NULL;
656 522242 : if (err)
657 : goto out;
658 522242 : if (state->end <= end) {
659 367771 : state = clear_state_bit(tree, state, bits, wake, changeset);
660 367979 : goto next;
661 : }
662 154471 : goto search_again;
663 : }
664 : /*
665 : * | ---- desired range ---- |
666 : * | state |
667 : * We need to split the extent, and clear the bit on the first half.
668 : */
669 146670676 : if (state->start <= end && state->end > end) {
670 25447310 : prealloc = alloc_extent_state_atomic(prealloc);
671 25447310 : if (!prealloc)
672 0 : goto search_again;
673 25447310 : err = split_state(tree, state, prealloc, end + 1);
674 25447701 : if (err)
675 0 : extent_io_tree_panic(tree, err);
676 :
677 25447701 : if (wake)
678 24769942 : wake_up(&state->wq);
679 :
680 25452252 : clear_state_bit(tree, prealloc, bits, wake, changeset);
681 :
682 25455831 : prealloc = NULL;
683 25455831 : goto out;
684 : }
685 :
686 121223366 : state = clear_state_bit(tree, state, bits, wake, changeset);
687 135924716 : next:
688 135924716 : if (last_end == (u64)-1)
689 273238 : goto out;
690 135651478 : start = last_end + 1;
691 135651478 : if (start <= end && state && !need_resched())
692 1818553 : goto hit_next;
693 :
694 133832925 : search_again:
695 133987396 : if (start > end)
696 133804503 : goto out;
697 182893 : spin_unlock(&tree->lock);
698 164981 : if (gfpflags_allow_blocking(mask))
699 163511 : cond_resched();
700 164981 : goto again;
701 :
702 231939729 : out:
703 231939729 : spin_unlock(&tree->lock);
704 232075864 : if (prealloc)
705 206253266 : free_extent_state(prealloc);
706 :
707 232016702 : return 0;
708 :
709 : }
710 :
711 910391 : static void wait_on_state(struct extent_io_tree *tree,
712 : struct extent_state *state)
713 : __releases(tree->lock)
714 : __acquires(tree->lock)
715 : {
716 910391 : DEFINE_WAIT(wait);
717 910391 : prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
718 910390 : spin_unlock(&tree->lock);
719 910388 : schedule();
720 909178 : spin_lock(&tree->lock);
721 910390 : finish_wait(&state->wq, &wait);
722 910389 : }
723 :
724 : /*
725 : * Wait for one or more bits to clear on a range in the state tree.
726 : * The range [start, end] is inclusive.
727 : * The tree lock is taken by this function
728 : */
729 152667 : void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
730 : struct extent_state **cached_state)
731 : {
732 152667 : struct extent_state *state;
733 :
734 152667 : btrfs_debug_check_extent_io_range(tree, start, end);
735 :
736 152667 : spin_lock(&tree->lock);
737 1063063 : again:
738 : /*
739 : * Maintain cached_state, as we may not remove it from the tree if there
740 : * are more bits than the bits we're waiting on set on this state.
741 : */
742 1063063 : if (cached_state && *cached_state) {
743 1063063 : state = *cached_state;
744 1063063 : if (extent_state_in_tree(state) &&
745 152960 : state->start <= start && start < state->end)
746 152030 : goto process_node;
747 : }
748 911040 : while (1) {
749 : /*
750 : * This search will find all the extents that end after our
751 : * range starts.
752 : */
753 911040 : state = tree_search(tree, start);
754 : process_node:
755 1063077 : if (!state)
756 : break;
757 1034284 : if (state->start > end)
758 122997 : goto out;
759 :
760 911287 : if (state->state & bits) {
761 910390 : start = state->start;
762 910390 : refcount_inc(&state->refs);
763 910391 : wait_on_state(tree, state);
764 910389 : free_extent_state(state);
765 910391 : goto again;
766 : }
767 897 : start = state->end + 1;
768 :
769 897 : if (start > end)
770 : break;
771 :
772 21 : if (!cond_resched_lock(&tree->lock)) {
773 14 : state = next_state(state);
774 14 : goto process_node;
775 : }
776 : }
777 29669 : out:
778 : /* This state is no longer useful, clear it and free it up. */
779 152672 : if (cached_state && *cached_state) {
780 152672 : state = *cached_state;
781 152672 : *cached_state = NULL;
782 152672 : free_extent_state(state);
783 : }
784 152672 : spin_unlock(&tree->lock);
785 152673 : }
786 :
787 194060839 : static void cache_state_if_flags(struct extent_state *state,
788 : struct extent_state **cached_ptr,
789 : unsigned flags)
790 : {
791 194060839 : if (cached_ptr && !(*cached_ptr)) {
792 112092526 : if (!flags || (state->state & flags)) {
793 98974826 : *cached_ptr = state;
794 98974826 : refcount_inc(&state->refs);
795 : }
796 : }
797 194163075 : }
798 :
799 : static void cache_state(struct extent_state *state,
800 : struct extent_state **cached_ptr)
801 : {
802 184796436 : return cache_state_if_flags(state, cached_ptr,
803 : EXTENT_LOCKED | EXTENT_BOUNDARY);
804 : }
805 :
806 : /*
807 : * Find the first state struct with 'bits' set after 'start', and return it.
808 : * tree->lock must be held. NULL will returned if nothing was found after
809 : * 'start'.
810 : */
811 12301210 : static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
812 : u64 start, u32 bits)
813 : {
814 12301210 : struct extent_state *state;
815 :
816 : /*
817 : * This search will find all the extents that end after our range
818 : * starts.
819 : */
820 12301210 : state = tree_search(tree, start);
821 12301294 : while (state) {
822 9335221 : if (state->end >= start && (state->state & bits))
823 9333447 : return state;
824 1774 : state = next_state(state);
825 : }
826 : return NULL;
827 : }
828 :
829 : /*
830 : * Find the first offset in the io tree with one or more @bits set.
831 : *
832 : * Note: If there are multiple bits set in @bits, any of them will match.
833 : *
834 : * Return 0 if we find something, and update @start_ret and @end_ret.
835 : * Return 1 if we found nothing.
836 : */
837 12300814 : int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
838 : u64 *start_ret, u64 *end_ret, u32 bits,
839 : struct extent_state **cached_state)
840 : {
841 12300814 : struct extent_state *state;
842 12300814 : int ret = 1;
843 :
844 12300814 : spin_lock(&tree->lock);
845 12300794 : if (cached_state && *cached_state) {
846 0 : state = *cached_state;
847 0 : if (state->end == start - 1 && extent_state_in_tree(state)) {
848 0 : while ((state = next_state(state)) != NULL) {
849 0 : if (state->state & bits)
850 0 : goto got_it;
851 : }
852 0 : free_extent_state(*cached_state);
853 0 : *cached_state = NULL;
854 0 : goto out;
855 : }
856 0 : free_extent_state(*cached_state);
857 0 : *cached_state = NULL;
858 : }
859 :
860 12300794 : state = find_first_extent_bit_state(tree, start, bits);
861 12300783 : got_it:
862 12300783 : if (state) {
863 9333038 : cache_state_if_flags(state, cached_state, 0);
864 9333087 : *start_ret = state->start;
865 9333087 : *end_ret = state->end;
866 9333087 : ret = 0;
867 : }
868 2967745 : out:
869 12300832 : spin_unlock(&tree->lock);
870 12300828 : return ret;
871 : }
872 :
873 : /*
874 : * Find a contiguous area of bits
875 : *
876 : * @tree: io tree to check
877 : * @start: offset to start the search from
878 : * @start_ret: the first offset we found with the bits set
879 : * @end_ret: the final contiguous range of the bits that were set
880 : * @bits: bits to look for
881 : *
882 : * set_extent_bit and clear_extent_bit can temporarily split contiguous ranges
883 : * to set bits appropriately, and then merge them again. During this time it
884 : * will drop the tree->lock, so use this helper if you want to find the actual
885 : * contiguous area for given bits. We will search to the first bit we find, and
886 : * then walk down the tree until we find a non-contiguous area. The area
887 : * returned will be the full contiguous area with the bits set.
888 : */
889 400 : int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
890 : u64 *start_ret, u64 *end_ret, u32 bits)
891 : {
892 400 : struct extent_state *state;
893 400 : int ret = 1;
894 :
895 400 : spin_lock(&tree->lock);
896 400 : state = find_first_extent_bit_state(tree, start, bits);
897 400 : if (state) {
898 395 : *start_ret = state->start;
899 395 : *end_ret = state->end;
900 495 : while ((state = next_state(state)) != NULL) {
901 100 : if (state->start > (*end_ret + 1))
902 : break;
903 0 : *end_ret = state->end;
904 : }
905 : ret = 0;
906 : }
907 400 : spin_unlock(&tree->lock);
908 400 : return ret;
909 : }
910 :
911 : /*
912 : * Find a contiguous range of bytes in the file marked as delalloc, not more
913 : * than 'max_bytes'. start and end are used to return the range,
914 : *
915 : * True is returned if we find something, false if nothing was in the tree.
916 : */
917 56932990 : bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
918 : u64 *end, u64 max_bytes,
919 : struct extent_state **cached_state)
920 : {
921 56932990 : struct extent_state *state;
922 56932990 : u64 cur_start = *start;
923 56932990 : bool found = false;
924 56932990 : u64 total_bytes = 0;
925 :
926 56932990 : spin_lock(&tree->lock);
927 :
928 : /*
929 : * This search will find all the extents that end after our range
930 : * starts.
931 : */
932 56934346 : state = tree_search(tree, cur_start);
933 56926985 : if (!state) {
934 12983084 : *end = (u64)-1;
935 12983084 : goto out;
936 : }
937 :
938 51186888 : while (state) {
939 47566541 : if (found && (state->start != cur_start ||
940 2692854 : (state->state & EXTENT_BOUNDARY))) {
941 2035035 : goto out;
942 : }
943 45531506 : if (!(state->state & EXTENT_DELALLOC)) {
944 33416648 : if (!found)
945 33405667 : *end = state->end;
946 33416648 : goto out;
947 : }
948 12114858 : if (!found) {
949 10540078 : *start = state->start;
950 10540078 : *cached_state = state;
951 10540078 : refcount_inc(&state->refs);
952 : }
953 12115107 : found = true;
954 12115107 : *end = state->end;
955 12115107 : cur_start = state->end + 1;
956 12115107 : total_bytes += state->end - state->start + 1;
957 12115107 : if (total_bytes >= max_bytes)
958 : break;
959 7240972 : state = next_state(state);
960 : }
961 8494482 : out:
962 56929249 : spin_unlock(&tree->lock);
963 56934258 : return found;
964 : }
965 :
966 : /*
967 : * Set some bits on a range in the tree. This may require allocations or
968 : * sleeping. By default all allocations use GFP_NOFS, use EXTENT_NOWAIT for
969 : * GFP_NOWAIT.
970 : *
971 : * If any of the exclusive bits are set, this will fail with -EEXIST if some
972 : * part of the range already has the desired bits set. The extent_state of the
973 : * existing range is returned in failed_state in this case, and the start of the
974 : * existing range is returned in failed_start. failed_state is used as an
975 : * optimization for wait_extent_bit, failed_start must be used as the source of
976 : * truth as failed_state may have changed since we returned.
977 : *
978 : * [start, end] is inclusive This takes the tree lock.
979 : */
980 181592725 : static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
981 : u32 bits, u64 *failed_start,
982 : struct extent_state **failed_state,
983 : struct extent_state **cached_state,
984 : struct extent_changeset *changeset)
985 : {
986 181592725 : struct extent_state *state;
987 181592725 : struct extent_state *prealloc = NULL;
988 181592725 : struct rb_node **p = NULL;
989 181592725 : struct rb_node *parent = NULL;
990 181592725 : int err = 0;
991 181592725 : u64 last_start;
992 181592725 : u64 last_end;
993 181592725 : u32 exclusive_bits = (bits & EXTENT_LOCKED);
994 181592725 : gfp_t mask;
995 :
996 181592725 : set_gfp_mask_from_bits(&bits, &mask);
997 181592725 : btrfs_debug_check_extent_io_range(tree, start, end);
998 181592725 : trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits);
999 :
1000 181592725 : if (exclusive_bits)
1001 : ASSERT(failed_start);
1002 : else
1003 : ASSERT(failed_start == NULL && failed_state == NULL);
1004 : again:
1005 188586425 : if (!prealloc) {
1006 : /*
1007 : * Don't care for allocation failure here because we might end
1008 : * up not needing the pre-allocated extent state at all, which
1009 : * is the case if we only have in the tree extent states that
1010 : * cover our input range and don't cover too any other range.
1011 : * If we end up needing a new extent state we allocate it later.
1012 : */
1013 188427235 : prealloc = alloc_extent_state(mask);
1014 : }
1015 :
1016 188565481 : spin_lock(&tree->lock);
1017 188862226 : if (cached_state && *cached_state) {
1018 49950110 : state = *cached_state;
1019 49950110 : if (state->start <= start && state->end > start &&
1020 : extent_state_in_tree(state))
1021 49386275 : goto hit_next;
1022 : }
1023 : /*
1024 : * This search will find all the extents that end after our range
1025 : * starts.
1026 : */
1027 139475951 : state = tree_search_for_insert(tree, start, &p, &parent);
1028 139288931 : if (!state) {
1029 92868195 : prealloc = alloc_extent_state_atomic(prealloc);
1030 92868195 : if (!prealloc)
1031 0 : goto search_again;
1032 92868195 : prealloc->start = start;
1033 92868195 : prealloc->end = end;
1034 92868195 : insert_state_fast(tree, prealloc, p, parent, bits, changeset);
1035 92916210 : cache_state(prealloc, cached_state);
1036 92953918 : prealloc = NULL;
1037 92953918 : goto out;
1038 : }
1039 46420736 : hit_next:
1040 96428217 : last_start = state->start;
1041 96428217 : last_end = state->end;
1042 :
1043 : /*
1044 : * | ---- desired range ---- |
1045 : * | state |
1046 : *
1047 : * Just lock what we found and keep going
1048 : */
1049 96428217 : if (state->start == start && state->end <= end) {
1050 54413199 : if (state->state & exclusive_bits) {
1051 148723 : *failed_start = state->start;
1052 148723 : cache_state(state, failed_state);
1053 148724 : err = -EEXIST;
1054 148724 : goto out;
1055 : }
1056 :
1057 54264476 : set_state_bits(tree, state, bits, changeset);
1058 54223180 : cache_state(state, cached_state);
1059 54209543 : merge_state(tree, state);
1060 54183925 : if (last_end == (u64)-1)
1061 0 : goto out;
1062 54183925 : start = last_end + 1;
1063 54183925 : state = next_state(state);
1064 54191530 : if (start < end && state && state->start == start &&
1065 : !need_resched())
1066 612020 : goto hit_next;
1067 53579510 : goto search_again;
1068 : }
1069 :
1070 : /*
1071 : * | ---- desired range ---- |
1072 : * | state |
1073 : * or
1074 : * | ------------- state -------------- |
1075 : *
1076 : * We need to split the extent we found, and may flip bits on second
1077 : * half.
1078 : *
1079 : * If the extent we found extends past our range, we just split and
1080 : * search again. It'll get split again the next time though.
1081 : *
1082 : * If the extent we found is inside our range, we set the desired bit
1083 : * on it.
1084 : */
1085 42015018 : if (state->start < start) {
1086 15338912 : if (state->state & exclusive_bits) {
1087 2518 : *failed_start = start;
1088 2518 : cache_state(state, failed_state);
1089 2518 : err = -EEXIST;
1090 2518 : goto out;
1091 : }
1092 :
1093 : /*
1094 : * If this extent already has all the bits we want set, then
1095 : * skip it, not necessary to split it or do anything with it.
1096 : */
1097 15336394 : if ((state->state & bits) == bits) {
1098 25194 : start = state->end + 1;
1099 25194 : cache_state(state, cached_state);
1100 25194 : goto search_again;
1101 : }
1102 :
1103 15311200 : prealloc = alloc_extent_state_atomic(prealloc);
1104 15311200 : if (!prealloc)
1105 0 : goto search_again;
1106 15311200 : err = split_state(tree, state, prealloc, start);
1107 15342606 : if (err)
1108 0 : extent_io_tree_panic(tree, err);
1109 :
1110 15342606 : prealloc = NULL;
1111 15342606 : if (err)
1112 : goto out;
1113 15342606 : if (state->end <= end) {
1114 8555738 : set_state_bits(tree, state, bits, changeset);
1115 8509574 : cache_state(state, cached_state);
1116 8559788 : merge_state(tree, state);
1117 8560664 : if (last_end == (u64)-1)
1118 0 : goto out;
1119 8560664 : start = last_end + 1;
1120 8560664 : state = next_state(state);
1121 8572161 : if (start < end && state && state->start == start &&
1122 : !need_resched())
1123 9186 : goto hit_next;
1124 : }
1125 15349843 : goto search_again;
1126 : }
1127 : /*
1128 : * | ---- desired range ---- |
1129 : * | state | or | state |
1130 : *
1131 : * There's a hole, we need to insert something in it and ignore the
1132 : * extent we found.
1133 : */
1134 26676106 : if (state->start > start) {
1135 18443744 : u64 this_end;
1136 18443744 : if (end < last_start)
1137 : this_end = end;
1138 : else
1139 144675 : this_end = last_start - 1;
1140 :
1141 18443744 : prealloc = alloc_extent_state_atomic(prealloc);
1142 18443744 : if (!prealloc)
1143 0 : goto search_again;
1144 :
1145 : /*
1146 : * Avoid to free 'prealloc' if it can be merged with the later
1147 : * extent.
1148 : */
1149 18443744 : prealloc->start = start;
1150 18443744 : prealloc->end = this_end;
1151 18443744 : err = insert_state(tree, prealloc, bits, changeset);
1152 18443732 : if (err)
1153 0 : extent_io_tree_panic(tree, err);
1154 :
1155 18443732 : cache_state(prealloc, cached_state);
1156 18443876 : prealloc = NULL;
1157 18443876 : start = this_end + 1;
1158 18443876 : goto search_again;
1159 : }
1160 : /*
1161 : * | ---- desired range ---- |
1162 : * | state |
1163 : *
1164 : * We need to split the extent, and set the bit on the first half
1165 : */
1166 8232362 : if (state->start <= end && state->end > end) {
1167 8232362 : if (state->state & exclusive_bits) {
1168 1442 : *failed_start = start;
1169 1442 : cache_state(state, failed_state);
1170 1442 : err = -EEXIST;
1171 1442 : goto out;
1172 : }
1173 :
1174 8230920 : prealloc = alloc_extent_state_atomic(prealloc);
1175 8230920 : if (!prealloc)
1176 52 : goto search_again;
1177 8230868 : err = split_state(tree, state, prealloc, end + 1);
1178 8234405 : if (err)
1179 0 : extent_io_tree_panic(tree, err);
1180 :
1181 8234405 : set_state_bits(tree, prealloc, bits, changeset);
1182 8230895 : cache_state(prealloc, cached_state);
1183 8232742 : merge_state(tree, prealloc);
1184 8232244 : prealloc = NULL;
1185 8232244 : goto out;
1186 : }
1187 :
1188 0 : search_again:
1189 87398475 : if (start > end)
1190 80245544 : goto out;
1191 7152931 : spin_unlock(&tree->lock);
1192 7159502 : if (gfpflags_allow_blocking(mask))
1193 7159499 : cond_resched();
1194 7159444 : goto again;
1195 :
1196 181584390 : out:
1197 181584390 : spin_unlock(&tree->lock);
1198 181811321 : if (prealloc)
1199 53765991 : free_extent_state(prealloc);
1200 :
1201 181803323 : return err;
1202 :
1203 : }
1204 :
1205 82630244 : int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1206 : u32 bits, struct extent_state **cached_state)
1207 : {
1208 82630244 : return __set_extent_bit(tree, start, end, bits, NULL, NULL,
1209 : cached_state, NULL);
1210 : }
1211 :
1212 : /*
1213 : * Convert all bits in a given range from one bit to another
1214 : *
1215 : * @tree: the io tree to search
1216 : * @start: the start offset in bytes
1217 : * @end: the end offset in bytes (inclusive)
1218 : * @bits: the bits to set in this range
1219 : * @clear_bits: the bits to clear in this range
1220 : * @cached_state: state that we're going to cache
1221 : *
1222 : * This will go through and set bits for the given range. If any states exist
1223 : * already in this range they are set with the given bit and cleared of the
1224 : * clear_bits. This is only meant to be used by things that are mergeable, ie.
1225 : * converting from say DELALLOC to DIRTY. This is not meant to be used with
1226 : * boundary bits like LOCK.
1227 : *
1228 : * All allocations are done with GFP_NOFS.
1229 : */
1230 2295000 : int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1231 : u32 bits, u32 clear_bits,
1232 : struct extent_state **cached_state)
1233 : {
1234 2295000 : struct extent_state *state;
1235 2295000 : struct extent_state *prealloc = NULL;
1236 2295000 : struct rb_node **p = NULL;
1237 2295000 : struct rb_node *parent = NULL;
1238 2295000 : int err = 0;
1239 2295000 : u64 last_start;
1240 2295000 : u64 last_end;
1241 2295000 : bool first_iteration = true;
1242 :
1243 2295000 : btrfs_debug_check_extent_io_range(tree, start, end);
1244 2295000 : trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits,
1245 : clear_bits);
1246 :
1247 2294971 : again:
1248 2294971 : if (!prealloc) {
1249 : /*
1250 : * Best effort, don't worry if extent state allocation fails
1251 : * here for the first iteration. We might have a cached state
1252 : * that matches exactly the target range, in which case no
1253 : * extent state allocations are needed. We'll only know this
1254 : * after locking the tree.
1255 : */
1256 2294966 : prealloc = alloc_extent_state(GFP_NOFS);
1257 2294984 : if (!prealloc && !first_iteration)
1258 : return -ENOMEM;
1259 : }
1260 :
1261 2294989 : spin_lock(&tree->lock);
1262 2295004 : if (cached_state && *cached_state) {
1263 2295004 : state = *cached_state;
1264 2295004 : if (state->start <= start && state->end > start &&
1265 : extent_state_in_tree(state))
1266 2295001 : goto hit_next;
1267 : }
1268 :
1269 : /*
1270 : * This search will find all the extents that end after our range
1271 : * starts.
1272 : */
1273 3 : state = tree_search_for_insert(tree, start, &p, &parent);
1274 0 : if (!state) {
1275 0 : prealloc = alloc_extent_state_atomic(prealloc);
1276 0 : if (!prealloc) {
1277 0 : err = -ENOMEM;
1278 0 : goto out;
1279 : }
1280 0 : prealloc->start = start;
1281 0 : prealloc->end = end;
1282 0 : insert_state_fast(tree, prealloc, p, parent, bits, NULL);
1283 0 : cache_state(prealloc, cached_state);
1284 0 : prealloc = NULL;
1285 0 : goto out;
1286 : }
1287 0 : hit_next:
1288 2295001 : last_start = state->start;
1289 2295001 : last_end = state->end;
1290 :
1291 : /*
1292 : * | ---- desired range ---- |
1293 : * | state |
1294 : *
1295 : * Just lock what we found and keep going.
1296 : */
1297 2295001 : if (state->start == start && state->end <= end) {
1298 2294991 : set_state_bits(tree, state, bits, NULL);
1299 2294968 : cache_state(state, cached_state);
1300 2294960 : state = clear_state_bit(tree, state, clear_bits, 0, NULL);
1301 2294955 : if (last_end == (u64)-1)
1302 0 : goto out;
1303 2294955 : start = last_end + 1;
1304 2294955 : if (start < end && state && state->start == start &&
1305 : !need_resched())
1306 0 : goto hit_next;
1307 2294955 : goto search_again;
1308 : }
1309 :
1310 : /*
1311 : * | ---- desired range ---- |
1312 : * | state |
1313 : * or
1314 : * | ------------- state -------------- |
1315 : *
1316 : * We need to split the extent we found, and may flip bits on second
1317 : * half.
1318 : *
1319 : * If the extent we found extends past our range, we just split and
1320 : * search again. It'll get split again the next time though.
1321 : *
1322 : * If the extent we found is inside our range, we set the desired bit
1323 : * on it.
1324 : */
1325 10 : if (state->start < start) {
1326 10 : prealloc = alloc_extent_state_atomic(prealloc);
1327 10 : if (!prealloc) {
1328 0 : err = -ENOMEM;
1329 0 : goto out;
1330 : }
1331 10 : err = split_state(tree, state, prealloc, start);
1332 0 : if (err)
1333 0 : extent_io_tree_panic(tree, err);
1334 0 : prealloc = NULL;
1335 0 : if (err)
1336 : goto out;
1337 0 : if (state->end <= end) {
1338 0 : set_state_bits(tree, state, bits, NULL);
1339 0 : cache_state(state, cached_state);
1340 0 : state = clear_state_bit(tree, state, clear_bits, 0, NULL);
1341 0 : if (last_end == (u64)-1)
1342 0 : goto out;
1343 0 : start = last_end + 1;
1344 0 : if (start < end && state && state->start == start &&
1345 : !need_resched())
1346 0 : goto hit_next;
1347 : }
1348 0 : goto search_again;
1349 : }
1350 : /*
1351 : * | ---- desired range ---- |
1352 : * | state | or | state |
1353 : *
1354 : * There's a hole, we need to insert something in it and ignore the
1355 : * extent we found.
1356 : */
1357 0 : if (state->start > start) {
1358 0 : u64 this_end;
1359 0 : if (end < last_start)
1360 : this_end = end;
1361 : else
1362 0 : this_end = last_start - 1;
1363 :
1364 0 : prealloc = alloc_extent_state_atomic(prealloc);
1365 0 : if (!prealloc) {
1366 0 : err = -ENOMEM;
1367 0 : goto out;
1368 : }
1369 :
1370 : /*
1371 : * Avoid to free 'prealloc' if it can be merged with the later
1372 : * extent.
1373 : */
1374 0 : prealloc->start = start;
1375 0 : prealloc->end = this_end;
1376 0 : err = insert_state(tree, prealloc, bits, NULL);
1377 0 : if (err)
1378 0 : extent_io_tree_panic(tree, err);
1379 0 : cache_state(prealloc, cached_state);
1380 0 : prealloc = NULL;
1381 0 : start = this_end + 1;
1382 0 : goto search_again;
1383 : }
1384 : /*
1385 : * | ---- desired range ---- |
1386 : * | state |
1387 : *
1388 : * We need to split the extent, and set the bit on the first half.
1389 : */
1390 0 : if (state->start <= end && state->end > end) {
1391 0 : prealloc = alloc_extent_state_atomic(prealloc);
1392 0 : if (!prealloc) {
1393 0 : err = -ENOMEM;
1394 0 : goto out;
1395 : }
1396 :
1397 0 : err = split_state(tree, state, prealloc, end + 1);
1398 0 : if (err)
1399 0 : extent_io_tree_panic(tree, err);
1400 :
1401 0 : set_state_bits(tree, prealloc, bits, NULL);
1402 0 : cache_state(prealloc, cached_state);
1403 0 : clear_state_bit(tree, prealloc, clear_bits, 0, NULL);
1404 0 : prealloc = NULL;
1405 0 : goto out;
1406 : }
1407 :
1408 0 : search_again:
1409 2294955 : if (start > end)
1410 2294987 : goto out;
1411 0 : spin_unlock(&tree->lock);
1412 0 : cond_resched();
1413 0 : first_iteration = false;
1414 0 : goto again;
1415 :
1416 2294987 : out:
1417 2294987 : spin_unlock(&tree->lock);
1418 2295012 : if (prealloc)
1419 2295012 : free_extent_state(prealloc);
1420 :
1421 : return err;
1422 : }
1423 :
1424 : /*
1425 : * Find the first range that has @bits not set. This range could start before
1426 : * @start.
1427 : *
1428 : * @tree: the tree to search
1429 : * @start: offset at/after which the found extent should start
1430 : * @start_ret: records the beginning of the range
1431 : * @end_ret: records the end of the range (inclusive)
1432 : * @bits: the set of bits which must be unset
1433 : *
1434 : * Since unallocated range is also considered one which doesn't have the bits
1435 : * set it's possible that @end_ret contains -1, this happens in case the range
1436 : * spans (last_range_end, end of device]. In this case it's up to the caller to
1437 : * trim @end_ret to the appropriate size.
1438 : */
1439 873 : void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
1440 : u64 *start_ret, u64 *end_ret, u32 bits)
1441 : {
1442 873 : struct extent_state *state;
1443 873 : struct extent_state *prev = NULL, *next = NULL;
1444 :
1445 873 : spin_lock(&tree->lock);
1446 :
1447 : /* Find first extent with bits cleared */
1448 3376 : while (1) {
1449 3376 : state = tree_search_prev_next(tree, start, &prev, &next);
1450 3376 : if (!state && !next && !prev) {
1451 : /*
1452 : * Tree is completely empty, send full range and let
1453 : * caller deal with it
1454 : */
1455 0 : *start_ret = 0;
1456 0 : *end_ret = -1;
1457 0 : goto out;
1458 3376 : } else if (!state && !next) {
1459 : /*
1460 : * We are past the last allocated chunk, set start at
1461 : * the end of the last extent.
1462 : */
1463 858 : *start_ret = prev->end + 1;
1464 858 : *end_ret = -1;
1465 858 : goto out;
1466 2518 : } else if (!state) {
1467 15 : state = next;
1468 : }
1469 :
1470 : /*
1471 : * At this point 'state' either contains 'start' or start is
1472 : * before 'state'
1473 : */
1474 2518 : if (in_range(start, state->start, state->end - state->start + 1)) {
1475 2503 : if (state->state & bits) {
1476 : /*
1477 : * |--range with bits sets--|
1478 : * |
1479 : * start
1480 : */
1481 : start = state->end + 1;
1482 : } else {
1483 : /*
1484 : * 'start' falls within a range that doesn't
1485 : * have the bits set, so take its start as the
1486 : * beginning of the desired range
1487 : *
1488 : * |--range with bits cleared----|
1489 : * |
1490 : * start
1491 : */
1492 0 : *start_ret = state->start;
1493 0 : break;
1494 : }
1495 : } else {
1496 : /*
1497 : * |---prev range---|---hole/unset---|---node range---|
1498 : * |
1499 : * start
1500 : *
1501 : * or
1502 : *
1503 : * |---hole/unset--||--first node--|
1504 : * 0 |
1505 : * start
1506 : */
1507 15 : if (prev)
1508 3 : *start_ret = prev->end + 1;
1509 : else
1510 12 : *start_ret = 0;
1511 : break;
1512 : }
1513 : }
1514 :
1515 : /*
1516 : * Find the longest stretch from start until an entry which has the
1517 : * bits set
1518 : */
1519 15 : while (state) {
1520 15 : if (state->end >= start && !(state->state & bits)) {
1521 0 : *end_ret = state->end;
1522 : } else {
1523 15 : *end_ret = state->start - 1;
1524 15 : break;
1525 : }
1526 0 : state = next_state(state);
1527 : }
1528 0 : out:
1529 873 : spin_unlock(&tree->lock);
1530 873 : }
1531 :
1532 : /*
1533 : * Count the number of bytes in the tree that have a given bit(s) set for a
1534 : * given range.
1535 : *
1536 : * @tree: The io tree to search.
1537 : * @start: The start offset of the range. This value is updated to the
1538 : * offset of the first byte found with the given bit(s), so it
1539 : * can end up being bigger than the initial value.
1540 : * @search_end: The end offset (inclusive value) of the search range.
1541 : * @max_bytes: The maximum byte count we are interested. The search stops
1542 : * once it reaches this count.
1543 : * @bits: The bits the range must have in order to be accounted for.
1544 : * If multiple bits are set, then only subranges that have all
1545 : * the bits set are accounted for.
1546 : * @contig: Indicate if we should ignore holes in the range or not. If
1547 : * this is true, then stop once we find a hole.
1548 : * @cached_state: A cached state to be used across multiple calls to this
1549 : * function in order to speedup searches. Use NULL if this is
1550 : * called only once or if each call does not start where the
1551 : * previous one ended.
1552 : *
1553 : * Returns the total number of bytes found within the given range that have
1554 : * all given bits set. If the returned number of bytes is greater than zero
1555 : * then @start is updated with the offset of the first byte with the bits set.
1556 : */
1557 1135425 : u64 count_range_bits(struct extent_io_tree *tree,
1558 : u64 *start, u64 search_end, u64 max_bytes,
1559 : u32 bits, int contig,
1560 : struct extent_state **cached_state)
1561 : {
1562 1135425 : struct extent_state *state = NULL;
1563 1135425 : struct extent_state *cached;
1564 1135425 : u64 cur_start = *start;
1565 1135425 : u64 total_bytes = 0;
1566 1135425 : u64 last = 0;
1567 1135425 : int found = 0;
1568 :
1569 1135425 : if (WARN_ON(search_end < cur_start))
1570 : return 0;
1571 :
1572 1135425 : spin_lock(&tree->lock);
1573 :
1574 1135423 : if (!cached_state || !*cached_state)
1575 1111229 : goto search;
1576 :
1577 24194 : cached = *cached_state;
1578 :
1579 24194 : if (!extent_state_in_tree(cached))
1580 222 : goto search;
1581 :
1582 23972 : if (cached->start <= cur_start && cur_start <= cached->end) {
1583 : state = cached;
1584 928 : } else if (cached->start > cur_start) {
1585 827 : struct extent_state *prev;
1586 :
1587 : /*
1588 : * The cached state starts after our search range's start. Check
1589 : * if the previous state record starts at or before the range we
1590 : * are looking for, and if so, use it - this is a common case
1591 : * when there are holes between records in the tree. If there is
1592 : * no previous state record, we can start from our cached state.
1593 : */
1594 827 : prev = prev_state(cached);
1595 827 : if (!prev)
1596 : state = cached;
1597 827 : else if (prev->start <= cur_start && cur_start <= prev->end)
1598 : state = prev;
1599 : }
1600 :
1601 : /*
1602 : * This search will find all the extents that end after our range
1603 : * starts.
1604 : */
1605 139 : search:
1606 1135284 : if (!state)
1607 1111590 : state = tree_search(tree, cur_start);
1608 :
1609 2486914 : while (state) {
1610 1678158 : if (state->start > search_end)
1611 : break;
1612 1374683 : if (contig && found && state->start > last + 1)
1613 : break;
1614 1374683 : if (state->end >= cur_start && (state->state & bits) == bits) {
1615 23367 : total_bytes += min(search_end, state->end) + 1 -
1616 23367 : max(cur_start, state->start);
1617 23367 : if (total_bytes >= max_bytes)
1618 : break;
1619 1791 : if (!found) {
1620 1791 : *start = max(cur_start, state->start);
1621 1791 : found = 1;
1622 : }
1623 1791 : last = state->end;
1624 1351316 : } else if (contig && found) {
1625 : break;
1626 : }
1627 1351502 : state = next_state(state);
1628 : }
1629 :
1630 1135412 : if (cached_state) {
1631 53446 : free_extent_state(*cached_state);
1632 53446 : *cached_state = state;
1633 53446 : if (state)
1634 24204 : refcount_inc(&state->refs);
1635 : }
1636 :
1637 1135413 : spin_unlock(&tree->lock);
1638 :
1639 1135413 : return total_bytes;
1640 : }
1641 :
1642 : /*
1643 : * Search a range in the state tree for a given mask. If 'filled' == 1, this
1644 : * returns 1 only if every extent in the tree has the bits set. Otherwise, 1
1645 : * is returned if any bit in the range is found set.
1646 : */
1647 60705130 : int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1648 : u32 bits, int filled, struct extent_state *cached)
1649 : {
1650 60705130 : struct extent_state *state = NULL;
1651 60705130 : int bitset = 0;
1652 :
1653 60705130 : spin_lock(&tree->lock);
1654 60707871 : if (cached && extent_state_in_tree(cached) && cached->start <= start &&
1655 3564424 : cached->end > start)
1656 : state = cached;
1657 : else
1658 57143447 : state = tree_search(tree, start);
1659 60987359 : while (state && start <= end) {
1660 13351357 : if (filled && state->start > start) {
1661 : bitset = 0;
1662 : break;
1663 : }
1664 :
1665 13346672 : if (state->start > end)
1666 : break;
1667 :
1668 5781542 : if (state->state & bits) {
1669 4259095 : bitset = 1;
1670 4259095 : if (!filled)
1671 : break;
1672 1522447 : } else if (filled) {
1673 : bitset = 0;
1674 : break;
1675 : }
1676 :
1677 4441443 : if (state->end == (u64)-1)
1678 : break;
1679 :
1680 4441443 : start = state->end + 1;
1681 4441443 : if (start > end)
1682 : break;
1683 286926 : state = next_state(state);
1684 : }
1685 :
1686 : /* We ran out of states and were still inside of our range. */
1687 60700433 : if (filled && !state)
1688 351 : bitset = 0;
1689 60700433 : spin_unlock(&tree->lock);
1690 60705783 : return bitset;
1691 : }
1692 :
1693 : /* Wrappers around set/clear extent bit */
1694 1909813 : int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1695 : u32 bits, struct extent_changeset *changeset)
1696 : {
1697 : /*
1698 : * We don't support EXTENT_LOCKED yet, as current changeset will
1699 : * record any bits changed, so for EXTENT_LOCKED case, it will
1700 : * either fail with -EEXIST or changeset will record the whole
1701 : * range.
1702 : */
1703 1909813 : ASSERT(!(bits & EXTENT_LOCKED));
1704 :
1705 1909813 : return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset);
1706 : }
1707 :
1708 6870231 : int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1709 : u32 bits, struct extent_changeset *changeset)
1710 : {
1711 : /*
1712 : * Don't support EXTENT_LOCKED case, same reason as
1713 : * set_record_extent_bits().
1714 : */
1715 6870231 : ASSERT(!(bits & EXTENT_LOCKED));
1716 :
1717 6870231 : return __clear_extent_bit(tree, start, end, bits, NULL, changeset);
1718 : }
1719 :
1720 4876 : int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
1721 : struct extent_state **cached)
1722 : {
1723 4876 : int err;
1724 4876 : u64 failed_start;
1725 :
1726 4876 : err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
1727 : NULL, cached, NULL);
1728 4876 : if (err == -EEXIST) {
1729 11 : if (failed_start > start)
1730 4 : clear_extent_bit(tree, start, failed_start - 1,
1731 : EXTENT_LOCKED, cached);
1732 11 : return 0;
1733 : }
1734 : return 1;
1735 : }
1736 :
1737 : /*
1738 : * Either insert or lock state struct between start and end use mask to tell
1739 : * us if waiting is desired.
1740 : */
1741 96993266 : int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
1742 : struct extent_state **cached_state)
1743 : {
1744 96993266 : struct extent_state *failed_state = NULL;
1745 96993266 : int err;
1746 96993266 : u64 failed_start;
1747 :
1748 96993266 : err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
1749 : &failed_state, cached_state, NULL);
1750 97146284 : while (err == -EEXIST) {
1751 152669 : if (failed_start != start)
1752 4158 : clear_extent_bit(tree, start, failed_start - 1,
1753 : EXTENT_LOCKED, cached_state);
1754 :
1755 152669 : wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED,
1756 : &failed_state);
1757 152673 : err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
1758 : &failed_start, &failed_state,
1759 : cached_state, NULL);
1760 : }
1761 96991431 : return err;
1762 : }
1763 :
1764 0 : void __cold extent_state_free_cachep(void)
1765 : {
1766 0 : btrfs_extent_state_leak_debug_check();
1767 0 : kmem_cache_destroy(extent_state_cache);
1768 0 : }
1769 :
1770 11 : int __init extent_state_init_cachep(void)
1771 : {
1772 11 : extent_state_cache = kmem_cache_create("btrfs_extent_state",
1773 : sizeof(struct extent_state), 0,
1774 : SLAB_MEM_SPREAD, NULL);
1775 11 : if (!extent_state_cache)
1776 0 : return -ENOMEM;
1777 :
1778 : return 0;
1779 : }
|