Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 :
3 : #include <linux/slab.h>
4 : #include <trace/events/btrfs.h>
5 : #include "messages.h"
6 : #include "ctree.h"
7 : #include "extent-io-tree.h"
8 : #include "btrfs_inode.h"
9 : #include "misc.h"
10 :
11 : static struct kmem_cache *extent_state_cache;
12 :
13 : static inline bool extent_state_in_tree(const struct extent_state *state)
14 : {
15 660312778 : return !RB_EMPTY_NODE(&state->rb_node);
16 : }
17 :
18 : #ifdef CONFIG_BTRFS_DEBUG
19 : static LIST_HEAD(states);
20 : static DEFINE_SPINLOCK(leak_lock);
21 :
22 : static inline void btrfs_leak_debug_add_state(struct extent_state *state)
23 : {
24 : unsigned long flags;
25 :
26 : spin_lock_irqsave(&leak_lock, flags);
27 : list_add(&state->leak_list, &states);
28 : spin_unlock_irqrestore(&leak_lock, flags);
29 : }
30 :
31 : static inline void btrfs_leak_debug_del_state(struct extent_state *state)
32 : {
33 : unsigned long flags;
34 :
35 : spin_lock_irqsave(&leak_lock, flags);
36 : list_del(&state->leak_list);
37 : spin_unlock_irqrestore(&leak_lock, flags);
38 : }
39 :
40 : static inline void btrfs_extent_state_leak_debug_check(void)
41 : {
42 : struct extent_state *state;
43 :
44 : while (!list_empty(&states)) {
45 : state = list_entry(states.next, struct extent_state, leak_list);
46 : pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
47 : state->start, state->end, state->state,
48 : extent_state_in_tree(state),
49 : refcount_read(&state->refs));
50 : list_del(&state->leak_list);
51 : kmem_cache_free(extent_state_cache, state);
52 : }
53 : }
54 :
55 : #define btrfs_debug_check_extent_io_range(tree, start, end) \
56 : __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
57 : static inline void __btrfs_debug_check_extent_io_range(const char *caller,
58 : struct extent_io_tree *tree,
59 : u64 start, u64 end)
60 : {
61 : struct btrfs_inode *inode = tree->inode;
62 : u64 isize;
63 :
64 : if (!inode)
65 : return;
66 :
67 : isize = i_size_read(&inode->vfs_inode);
68 : if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
69 : btrfs_debug_rl(inode->root->fs_info,
70 : "%s: ino %llu isize %llu odd range [%llu,%llu]",
71 : caller, btrfs_ino(inode), isize, start, end);
72 : }
73 : }
74 : #else
75 : #define btrfs_leak_debug_add_state(state) do {} while (0)
76 : #define btrfs_leak_debug_del_state(state) do {} while (0)
77 : #define btrfs_extent_state_leak_debug_check() do {} while (0)
78 : #define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
79 : #endif
80 :
81 : /*
82 : * For the file_extent_tree, we want to hold the inode lock when we lookup and
83 : * update the disk_i_size, but lockdep will complain because our io_tree we hold
84 : * the tree lock and get the inode lock when setting delalloc. These two things
85 : * are unrelated, so make a class for the file_extent_tree so we don't get the
86 : * two locking patterns mixed up.
87 : */
88 : static struct lock_class_key file_extent_tree_class;
89 :
90 : struct tree_entry {
91 : u64 start;
92 : u64 end;
93 : struct rb_node rb_node;
94 : };
95 :
96 8223763 : void extent_io_tree_init(struct btrfs_fs_info *fs_info,
97 : struct extent_io_tree *tree, unsigned int owner)
98 : {
99 8223763 : tree->fs_info = fs_info;
100 8223763 : tree->state = RB_ROOT;
101 8223763 : spin_lock_init(&tree->lock);
102 8216601 : tree->inode = NULL;
103 8216601 : tree->owner = owner;
104 8216601 : if (owner == IO_TREE_INODE_FILE_EXTENT)
105 8216601 : lockdep_set_class(&tree->lock, &file_extent_tree_class);
106 8216601 : }
107 :
108 1216407 : void extent_io_tree_release(struct extent_io_tree *tree)
109 : {
110 1216407 : spin_lock(&tree->lock);
111 : /*
112 : * Do a single barrier for the waitqueue_active check here, the state
113 : * of the waitqueue should not change once extent_io_tree_release is
114 : * called.
115 : */
116 1216407 : smp_mb();
117 1216437 : while (!RB_EMPTY_ROOT(&tree->state)) {
118 30 : struct rb_node *node;
119 30 : struct extent_state *state;
120 :
121 30 : node = rb_first(&tree->state);
122 30 : state = rb_entry(node, struct extent_state, rb_node);
123 30 : rb_erase(&state->rb_node, &tree->state);
124 30 : RB_CLEAR_NODE(&state->rb_node);
125 : /*
126 : * btree io trees aren't supposed to have tasks waiting for
127 : * changes in the flags of extent states ever.
128 : */
129 60 : ASSERT(!waitqueue_active(&state->wq));
130 30 : free_extent_state(state);
131 :
132 30 : cond_resched_lock(&tree->lock);
133 : }
134 1216407 : spin_unlock(&tree->lock);
135 1216407 : }
136 :
137 406166106 : static struct extent_state *alloc_extent_state(gfp_t mask)
138 : {
139 406166106 : struct extent_state *state;
140 :
141 : /*
142 : * The given mask might be not appropriate for the slab allocator,
143 : * drop the unsupported bits
144 : */
145 406166106 : mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
146 406166106 : state = kmem_cache_alloc(extent_state_cache, mask);
147 406257377 : if (!state)
148 : return state;
149 406257377 : state->state = 0;
150 406257377 : RB_CLEAR_NODE(&state->rb_node);
151 406257377 : btrfs_leak_debug_add_state(state);
152 406257377 : refcount_set(&state->refs, 1);
153 406257377 : init_waitqueue_head(&state->wq);
154 406203411 : trace_alloc_extent_state(state, mask, _RET_IP_);
155 406236203 : return state;
156 : }
157 :
158 : static struct extent_state *alloc_extent_state_atomic(struct extent_state *prealloc)
159 : {
160 155636467 : if (!prealloc)
161 3638 : prealloc = alloc_extent_state(GFP_ATOMIC);
162 :
163 155636467 : return prealloc;
164 : }
165 :
166 482123039 : void free_extent_state(struct extent_state *state)
167 : {
168 482123039 : if (!state)
169 : return;
170 425960330 : if (refcount_dec_and_test(&state->refs)) {
171 406700951 : WARN_ON(extent_state_in_tree(state));
172 406700951 : btrfs_leak_debug_del_state(state);
173 406700951 : trace_free_extent_state(state, _RET_IP_);
174 406608832 : kmem_cache_free(extent_state_cache, state);
175 : }
176 : }
177 :
178 323638455 : static int add_extent_changeset(struct extent_state *state, u32 bits,
179 : struct extent_changeset *changeset,
180 : int set)
181 : {
182 323638455 : int ret;
183 :
184 323638455 : if (!changeset)
185 : return 0;
186 2593002 : if (set && (state->state & bits) == bits)
187 : return 0;
188 2592874 : if (!set && (state->state & bits) == 0)
189 : return 0;
190 2592874 : changeset->bytes_changed += state->end - state->start + 1;
191 2592874 : ret = ulist_add(&changeset->range_changed, state->start, state->end,
192 : GFP_ATOMIC);
193 2592874 : return ret;
194 : }
195 :
196 : static inline struct extent_state *next_state(struct extent_state *state)
197 : {
198 382482108 : struct rb_node *next = rb_next(&state->rb_node);
199 :
200 382478710 : if (next)
201 90826236 : return rb_entry(next, struct extent_state, rb_node);
202 : else
203 : return NULL;
204 : }
205 :
206 : static inline struct extent_state *prev_state(struct extent_state *state)
207 : {
208 68757526 : struct rb_node *next = rb_prev(&state->rb_node);
209 :
210 68717559 : if (next)
211 1 : return rb_entry(next, struct extent_state, rb_node);
212 : else
213 : return NULL;
214 : }
215 :
216 : /*
217 : * Search @tree for an entry that contains @offset. Such entry would have
218 : * entry->start <= offset && entry->end >= offset.
219 : *
220 : * @tree: the tree to search
221 : * @offset: offset that should fall within an entry in @tree
222 : * @node_ret: pointer where new node should be anchored (used when inserting an
223 : * entry in the tree)
224 : * @parent_ret: points to entry which would have been the parent of the entry,
225 : * containing @offset
226 : *
227 : * Return a pointer to the entry that contains @offset byte address and don't change
228 : * @node_ret and @parent_ret.
229 : *
230 : * If no such entry exists, return pointer to entry that ends before @offset
231 : * and fill parameters @node_ret and @parent_ret, ie. does not return NULL.
232 : */
233 369724465 : static inline struct extent_state *tree_search_for_insert(struct extent_io_tree *tree,
234 : u64 offset,
235 : struct rb_node ***node_ret,
236 : struct rb_node **parent_ret)
237 : {
238 369724465 : struct rb_root *root = &tree->state;
239 369724465 : struct rb_node **node = &root->rb_node;
240 369724465 : struct rb_node *prev = NULL;
241 369724465 : struct extent_state *entry = NULL;
242 :
243 762463702 : while (*node) {
244 508852234 : prev = *node;
245 508852234 : entry = rb_entry(prev, struct extent_state, rb_node);
246 :
247 508852234 : if (offset < entry->start)
248 127877749 : node = &(*node)->rb_left;
249 380974485 : else if (offset > entry->end)
250 264861488 : node = &(*node)->rb_right;
251 : else
252 116112997 : return entry;
253 : }
254 :
255 253611468 : if (node_ret)
256 106707474 : *node_ret = node;
257 253611468 : if (parent_ret)
258 106709280 : *parent_ret = prev;
259 :
260 : /* Search neighbors until we find the first one past the end */
261 266055802 : while (entry && offset > entry->end)
262 84955615 : entry = next_state(entry);
263 :
264 : return entry;
265 : }
266 :
267 : /*
268 : * Search offset in the tree or fill neighbor rbtree node pointers.
269 : *
270 : * @tree: the tree to search
271 : * @offset: offset that should fall within an entry in @tree
272 : * @next_ret: pointer to the first entry whose range ends after @offset
273 : * @prev_ret: pointer to the first entry whose range begins before @offset
274 : *
275 : * Return a pointer to the entry that contains @offset byte address. If no
276 : * such entry exists, then return NULL and fill @prev_ret and @next_ret.
277 : * Otherwise return the found entry and other pointers are left untouched.
278 : */
279 2372 : static struct extent_state *tree_search_prev_next(struct extent_io_tree *tree,
280 : u64 offset,
281 : struct extent_state **prev_ret,
282 : struct extent_state **next_ret)
283 : {
284 2372 : struct rb_root *root = &tree->state;
285 2372 : struct rb_node **node = &root->rb_node;
286 2372 : struct extent_state *orig_prev;
287 2372 : struct extent_state *entry = NULL;
288 :
289 2372 : ASSERT(prev_ret);
290 2372 : ASSERT(next_ret);
291 :
292 4764 : while (*node) {
293 4144 : entry = rb_entry(*node, struct extent_state, rb_node);
294 :
295 4144 : if (offset < entry->start)
296 610 : node = &(*node)->rb_left;
297 3534 : else if (offset > entry->end)
298 1782 : node = &(*node)->rb_right;
299 : else
300 1752 : return entry;
301 : }
302 :
303 : orig_prev = entry;
304 623 : while (entry && offset > entry->end)
305 607 : entry = next_state(entry);
306 620 : *next_ret = entry;
307 620 : entry = orig_prev;
308 :
309 621 : while (entry && offset < entry->start)
310 13 : entry = prev_state(entry);
311 620 : *prev_ret = entry;
312 :
313 620 : return NULL;
314 : }
315 :
316 : /*
317 : * Inexact rb-tree search, return the next entry if @offset is not found
318 : */
319 : static inline struct extent_state *tree_search(struct extent_io_tree *tree, u64 offset)
320 : {
321 235102615 : return tree_search_for_insert(tree, offset, NULL, NULL);
322 : }
323 :
324 0 : static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
325 : {
326 0 : btrfs_panic(tree->fs_info, err,
327 : "locking error: extent tree was modified by another thread while locked");
328 : }
329 :
330 : /*
331 : * Utility function to look for merge candidates inside a given range. Any
332 : * extents with matching state are merged together into a single extent in the
333 : * tree. Extents with EXTENT_IO in their state field are not merged because
334 : * the end_io handlers need to be able to do operations on them without
335 : * sleeping (or doing allocations/splits).
336 : *
337 : * This should be called with the tree lock held.
338 : */
339 225485710 : static void merge_state(struct extent_io_tree *tree, struct extent_state *state)
340 : {
341 225485710 : struct extent_state *other;
342 :
343 225485710 : if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY))
344 : return;
345 :
346 68749369 : other = prev_state(state);
347 61653778 : if (other && other->end == state->start - 1 &&
348 53498899 : other->state == state->state) {
349 51297314 : if (tree->inode)
350 35783839 : btrfs_merge_delalloc_extent(tree->inode, state, other);
351 51397502 : state->start = other->start;
352 51397502 : rb_erase(&other->rb_node, &tree->state);
353 51396352 : RB_CLEAR_NODE(&other->rb_node);
354 51396352 : free_extent_state(other);
355 : }
356 68816408 : other = next_state(state);
357 20388174 : if (other && other->start == state->end + 1 &&
358 9794506 : other->state == state->state) {
359 8402968 : if (tree->inode)
360 7915787 : btrfs_merge_delalloc_extent(tree->inode, state, other);
361 8402973 : state->end = other->end;
362 8402973 : rb_erase(&other->rb_node, &tree->state);
363 8402949 : RB_CLEAR_NODE(&other->rb_node);
364 8402949 : free_extent_state(other);
365 : }
366 : }
367 :
368 178701889 : static void set_state_bits(struct extent_io_tree *tree,
369 : struct extent_state *state,
370 : u32 bits, struct extent_changeset *changeset)
371 : {
372 178701889 : u32 bits_to_set = bits & ~EXTENT_CTLBITS;
373 178701889 : int ret;
374 :
375 178701889 : if (tree->inode)
376 153427279 : btrfs_set_delalloc_extent(tree->inode, state, bits);
377 :
378 178793927 : ret = add_extent_changeset(state, bits_to_set, changeset, 1);
379 178776977 : BUG_ON(ret < 0);
380 178776977 : state->state |= bits_to_set;
381 178776977 : }
382 :
383 : /*
384 : * Insert an extent_state struct into the tree. 'bits' are set on the
385 : * struct before it is inserted.
386 : *
387 : * This may return -EEXIST if the extent is already there, in which case the
388 : * state struct is freed.
389 : *
390 : * The tree lock is not taken internally. This is a utility function and
391 : * probably isn't what you want to call (see set/clear_extent_bit).
392 : */
393 18249917 : static int insert_state(struct extent_io_tree *tree,
394 : struct extent_state *state,
395 : u32 bits, struct extent_changeset *changeset)
396 : {
397 18249917 : struct rb_node **node;
398 18249917 : struct rb_node *parent = NULL;
399 18249917 : const u64 end = state->end;
400 :
401 18249917 : set_state_bits(tree, state, bits, changeset);
402 :
403 18249923 : node = &tree->state.rb_node;
404 104865302 : while (*node) {
405 86615379 : struct extent_state *entry;
406 :
407 86615379 : parent = *node;
408 86615379 : entry = rb_entry(parent, struct extent_state, rb_node);
409 :
410 86615379 : if (end < entry->start) {
411 35425222 : node = &(*node)->rb_left;
412 51190157 : } else if (end > entry->end) {
413 51190157 : node = &(*node)->rb_right;
414 : } else {
415 0 : btrfs_err(tree->fs_info,
416 : "found node %llu %llu on insert of %llu %llu",
417 : entry->start, entry->end, state->start, end);
418 0 : return -EEXIST;
419 : }
420 : }
421 :
422 18249923 : rb_link_node(&state->rb_node, parent, node);
423 18249923 : rb_insert_color(&state->rb_node, &tree->state);
424 :
425 18249961 : merge_state(tree, state);
426 18249961 : return 0;
427 : }
428 :
429 : /*
430 : * Insert state to @tree to the location given by @node and @parent.
431 : */
432 88398424 : static void insert_state_fast(struct extent_io_tree *tree,
433 : struct extent_state *state, struct rb_node **node,
434 : struct rb_node *parent, unsigned bits,
435 : struct extent_changeset *changeset)
436 : {
437 88398424 : set_state_bits(tree, state, bits, changeset);
438 88433293 : rb_link_node(&state->rb_node, parent, node);
439 88433293 : rb_insert_color(&state->rb_node, &tree->state);
440 88441614 : merge_state(tree, state);
441 88425657 : }
442 :
443 : /*
444 : * Split a given extent state struct in two, inserting the preallocated
445 : * struct 'prealloc' as the newly created second half. 'split' indicates an
446 : * offset inside 'orig' where it should be split.
447 : *
448 : * Before calling,
449 : * the tree has 'orig' at [orig->start, orig->end]. After calling, there
450 : * are two extent state structs in the tree:
451 : * prealloc: [orig->start, split - 1]
452 : * orig: [ split, orig->end ]
453 : *
454 : * The tree locks are not taken by this function. They need to be held
455 : * by the caller.
456 : */
457 48972153 : static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
458 : struct extent_state *prealloc, u64 split)
459 : {
460 48972153 : struct rb_node *parent = NULL;
461 48972153 : struct rb_node **node;
462 :
463 48972153 : if (tree->inode)
464 48916984 : btrfs_split_delalloc_extent(tree->inode, orig, split);
465 :
466 49036223 : prealloc->start = orig->start;
467 49036223 : prealloc->end = split - 1;
468 49036223 : prealloc->state = orig->state;
469 49036223 : orig->start = split;
470 :
471 49036223 : parent = &orig->rb_node;
472 49036223 : node = &parent;
473 113216619 : while (*node) {
474 64180396 : struct extent_state *entry;
475 :
476 64180396 : parent = *node;
477 64180396 : entry = rb_entry(parent, struct extent_state, rb_node);
478 :
479 64180396 : if (prealloc->end < entry->start) {
480 49034833 : node = &(*node)->rb_left;
481 15145563 : } else if (prealloc->end > entry->end) {
482 15145563 : node = &(*node)->rb_right;
483 : } else {
484 0 : free_extent_state(prealloc);
485 0 : return -EEXIST;
486 : }
487 : }
488 :
489 49036223 : rb_link_node(&prealloc->rb_node, parent, node);
490 49036223 : rb_insert_color(&prealloc->rb_node, &tree->state);
491 :
492 49036223 : return 0;
493 : }
494 :
495 : /*
496 : * Utility function to clear some bits in an extent state struct. It will
497 : * optionally wake up anyone waiting on this state (wake == 1).
498 : *
499 : * If no bits are set on the state struct after clearing things, the
500 : * struct is freed and removed from the tree
501 : */
502 144905180 : static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
503 : struct extent_state *state,
504 : u32 bits, int wake,
505 : struct extent_changeset *changeset)
506 : {
507 144905180 : struct extent_state *next;
508 144905180 : u32 bits_to_clear = bits & ~EXTENT_CTLBITS;
509 144905180 : int ret;
510 :
511 144905180 : if (tree->inode)
512 135607526 : btrfs_clear_delalloc_extent(tree->inode, state, bits);
513 :
514 144896541 : ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
515 144921179 : BUG_ON(ret < 0);
516 144921179 : state->state &= ~bits_to_clear;
517 144921179 : if (wake)
518 119146519 : wake_up(&state->wq);
519 144974166 : if (state->state == 0) {
520 95958340 : next = next_state(state);
521 95959925 : if (extent_state_in_tree(state)) {
522 95959925 : rb_erase(&state->rb_node, &tree->state);
523 95936220 : RB_CLEAR_NODE(&state->rb_node);
524 95936220 : free_extent_state(state);
525 : } else {
526 0 : WARN_ON(1);
527 : }
528 : } else {
529 49015826 : merge_state(tree, state);
530 49033564 : next = next_state(state);
531 : }
532 145017680 : return next;
533 : }
534 :
535 : /*
536 : * Detect if extent bits request NOWAIT semantics and set the gfp mask accordingly,
537 : * unset the EXTENT_NOWAIT bit.
538 : */
539 : static void set_gfp_mask_from_bits(u32 *bits, gfp_t *mask)
540 : {
541 396956103 : *mask = (*bits & EXTENT_NOWAIT ? GFP_NOWAIT : GFP_NOFS);
542 396956103 : *bits &= EXTENT_NOWAIT - 1;
543 : }
544 :
545 : /*
546 : * Clear some bits on a range in the tree. This may require splitting or
547 : * inserting elements in the tree, so the gfp mask is used to indicate which
548 : * allocations or sleeping are allowed.
549 : *
550 : * Pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove the given
551 : * range from the tree regardless of state (ie for truncate).
552 : *
553 : * The range [start, end] is inclusive.
554 : *
555 : * This takes the tree lock, and returns 0 on success and < 0 on error.
556 : */
557 221227460 : int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
558 : u32 bits, struct extent_state **cached_state,
559 : struct extent_changeset *changeset)
560 : {
561 221227460 : struct extent_state *state;
562 221227460 : struct extent_state *cached;
563 221227460 : struct extent_state *prealloc = NULL;
564 221227460 : u64 last_end;
565 221227460 : int err;
566 221227460 : int clear = 0;
567 221227460 : int wake;
568 221227460 : int delete = (bits & EXTENT_CLEAR_ALL_BITS);
569 221227460 : gfp_t mask;
570 :
571 221227460 : set_gfp_mask_from_bits(&bits, &mask);
572 221227460 : btrfs_debug_check_extent_io_range(tree, start, end);
573 221227460 : trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits);
574 :
575 221134175 : if (delete)
576 31606928 : bits |= ~EXTENT_CTLBITS;
577 :
578 221134175 : if (bits & EXTENT_DELALLOC)
579 127181847 : bits |= EXTENT_NORESERVE;
580 :
581 221134175 : wake = (bits & EXTENT_LOCKED) ? 1 : 0;
582 221134175 : if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY))
583 167808595 : clear = 1;
584 221134175 : again:
585 221294639 : if (!prealloc) {
586 : /*
587 : * Don't care for allocation failure here because we might end
588 : * up not needing the pre-allocated extent state at all, which
589 : * is the case if we only have in the tree extent states that
590 : * cover our input range and don't cover too any other range.
591 : * If we end up needing a new extent state we allocate it later.
592 : */
593 221250225 : prealloc = alloc_extent_state(mask);
594 : }
595 :
596 221345316 : spin_lock(&tree->lock);
597 221499068 : if (cached_state) {
598 117820987 : cached = *cached_state;
599 :
600 117820987 : if (clear) {
601 71414124 : *cached_state = NULL;
602 71414124 : cached_state = NULL;
603 : }
604 :
605 117820987 : if (cached && extent_state_in_tree(cached) &&
606 103145810 : cached->start <= start && cached->end > start) {
607 102997347 : if (clear)
608 69485090 : refcount_dec(&cached->refs);
609 103027366 : state = cached;
610 103027366 : goto hit_next;
611 : }
612 14823640 : if (clear)
613 1918574 : free_extent_state(cached);
614 : }
615 :
616 : /* This search will find the extents that end after our range starts. */
617 118501677 : state = tree_search(tree, start);
618 118441294 : if (!state)
619 59265763 : goto out;
620 162202897 : hit_next:
621 164014216 : if (state->start > end)
622 7791479 : goto out;
623 156222737 : WARN_ON(state->end < start);
624 156222737 : last_end = state->end;
625 :
626 : /* The state doesn't have the wanted bits, go ahead. */
627 156222737 : if (!(state->state & bits)) {
628 13329120 : state = next_state(state);
629 13324616 : goto next;
630 : }
631 :
632 : /*
633 : * | ---- desired range ---- |
634 : * | state | or
635 : * | ------------- state -------------- |
636 : *
637 : * We need to split the extent we found, and may flip bits on second
638 : * half.
639 : *
640 : * If the extent we found extends past our range, we just split and
641 : * search again. It'll get split again the next time though.
642 : *
643 : * If the extent we found is inside our range, we clear the desired bit
644 : * on it.
645 : */
646 :
647 142893617 : if (state->start < start) {
648 452760 : prealloc = alloc_extent_state_atomic(prealloc);
649 452760 : if (!prealloc)
650 0 : goto search_again;
651 452760 : err = split_state(tree, state, prealloc, start);
652 452694 : if (err)
653 0 : extent_io_tree_panic(tree, err);
654 :
655 452694 : prealloc = NULL;
656 452694 : if (err)
657 : goto out;
658 452694 : if (state->end <= end) {
659 303065 : state = clear_state_bit(tree, state, bits, wake, changeset);
660 303077 : goto next;
661 : }
662 149629 : goto search_again;
663 : }
664 : /*
665 : * | ---- desired range ---- |
666 : * | state |
667 : * We need to split the extent, and clear the bit on the first half.
668 : */
669 142440857 : if (state->start <= end && state->end > end) {
670 24794834 : prealloc = alloc_extent_state_atomic(prealloc);
671 24794834 : if (!prealloc)
672 0 : goto search_again;
673 24794834 : err = split_state(tree, state, prealloc, end + 1);
674 24796104 : if (err)
675 0 : extent_io_tree_panic(tree, err);
676 :
677 24796104 : if (wake)
678 24117957 : wake_up(&state->wq);
679 :
680 24798993 : clear_state_bit(tree, prealloc, bits, wake, changeset);
681 :
682 24800885 : prealloc = NULL;
683 24800885 : goto out;
684 : }
685 :
686 117646023 : state = clear_state_bit(tree, state, bits, wake, changeset);
687 131291867 : next:
688 131291867 : if (last_end == (u64)-1)
689 266328 : goto out;
690 131025539 : start = last_end + 1;
691 131025539 : if (start <= end && state && !need_resched())
692 1811319 : goto hit_next;
693 :
694 129214220 : search_again:
695 129363849 : if (start > end)
696 129191813 : goto out;
697 172036 : spin_unlock(&tree->lock);
698 160464 : if (gfpflags_allow_blocking(mask))
699 159917 : cond_resched();
700 160464 : goto again;
701 :
702 221316268 : out:
703 221316268 : spin_unlock(&tree->lock);
704 221404816 : if (prealloc)
705 196302811 : free_extent_state(prealloc);
706 :
707 221361780 : return 0;
708 :
709 : }
710 :
711 373804 : static void wait_on_state(struct extent_io_tree *tree,
712 : struct extent_state *state)
713 : __releases(tree->lock)
714 : __acquires(tree->lock)
715 : {
716 373804 : DEFINE_WAIT(wait);
717 373804 : prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
718 373804 : spin_unlock(&tree->lock);
719 373802 : schedule();
720 372159 : spin_lock(&tree->lock);
721 373804 : finish_wait(&state->wq, &wait);
722 373804 : }
723 :
724 : /*
725 : * Wait for one or more bits to clear on a range in the state tree.
726 : * The range [start, end] is inclusive.
727 : * The tree lock is taken by this function
728 : */
729 71848 : void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
730 : struct extent_state **cached_state)
731 : {
732 71848 : struct extent_state *state;
733 :
734 71848 : btrfs_debug_check_extent_io_range(tree, start, end);
735 :
736 71848 : spin_lock(&tree->lock);
737 446279 : again:
738 : /*
739 : * Maintain cached_state, as we may not remove it from the tree if there
740 : * are more bits than the bits we're waiting on set on this state.
741 : */
742 446279 : if (cached_state && *cached_state) {
743 446279 : state = *cached_state;
744 446279 : if (extent_state_in_tree(state) &&
745 72897 : state->start <= start && start < state->end)
746 71982 : goto process_node;
747 : }
748 374305 : while (1) {
749 : /*
750 : * This search will find all the extents that end after our
751 : * range starts.
752 : */
753 374305 : state = tree_search(tree, start);
754 : process_node:
755 446291 : if (!state)
756 : break;
757 427363 : if (state->start > end)
758 52750 : goto out;
759 :
760 374613 : if (state->state & bits) {
761 373804 : start = state->start;
762 373804 : refcount_inc(&state->refs);
763 373804 : wait_on_state(tree, state);
764 373804 : free_extent_state(state);
765 373804 : goto again;
766 : }
767 809 : start = state->end + 1;
768 :
769 809 : if (start > end)
770 : break;
771 :
772 15 : if (!cond_resched_lock(&tree->lock)) {
773 7 : state = next_state(state);
774 7 : goto process_node;
775 : }
776 : }
777 19722 : out:
778 : /* This state is no longer useful, clear it and free it up. */
779 72475 : if (cached_state && *cached_state) {
780 72475 : state = *cached_state;
781 72475 : *cached_state = NULL;
782 72475 : free_extent_state(state);
783 : }
784 72475 : spin_unlock(&tree->lock);
785 72475 : }
786 :
787 187680422 : static void cache_state_if_flags(struct extent_state *state,
788 : struct extent_state **cached_ptr,
789 : unsigned flags)
790 : {
791 187680422 : if (cached_ptr && !(*cached_ptr)) {
792 108304228 : if (!flags || (state->state & flags)) {
793 95315725 : *cached_ptr = state;
794 95315725 : refcount_inc(&state->refs);
795 : }
796 : }
797 187778311 : }
798 :
799 : static void cache_state(struct extent_state *state,
800 : struct extent_state **cached_ptr)
801 : {
802 178887917 : return cache_state_if_flags(state, cached_ptr,
803 : EXTENT_LOCKED | EXTENT_BOUNDARY);
804 : }
805 :
806 : /*
807 : * Find the first state struct with 'bits' set after 'start', and return it.
808 : * tree->lock must be held. NULL will returned if nothing was found after
809 : * 'start'.
810 : */
811 11732594 : static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
812 : u64 start, u32 bits)
813 : {
814 11732594 : struct extent_state *state;
815 :
816 : /*
817 : * This search will find all the extents that end after our range
818 : * starts.
819 : */
820 11732594 : state = tree_search(tree, start);
821 11732761 : while (state) {
822 8855365 : if (state->end >= start && (state->state & bits))
823 8853547 : return state;
824 1818 : state = next_state(state);
825 : }
826 : return NULL;
827 : }
828 :
829 : /*
830 : * Find the first offset in the io tree with one or more @bits set.
831 : *
832 : * Note: If there are multiple bits set in @bits, any of them will match.
833 : *
834 : * Return 0 if we find something, and update @start_ret and @end_ret.
835 : * Return 1 if we found nothing.
836 : */
837 11732178 : int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
838 : u64 *start_ret, u64 *end_ret, u32 bits,
839 : struct extent_state **cached_state)
840 : {
841 11732178 : struct extent_state *state;
842 11732178 : int ret = 1;
843 :
844 11732178 : spin_lock(&tree->lock);
845 11732208 : if (cached_state && *cached_state) {
846 0 : state = *cached_state;
847 0 : if (state->end == start - 1 && extent_state_in_tree(state)) {
848 0 : while ((state = next_state(state)) != NULL) {
849 0 : if (state->state & bits)
850 0 : goto got_it;
851 : }
852 0 : free_extent_state(*cached_state);
853 0 : *cached_state = NULL;
854 0 : goto out;
855 : }
856 0 : free_extent_state(*cached_state);
857 0 : *cached_state = NULL;
858 : }
859 :
860 11732208 : state = find_first_extent_bit_state(tree, start, bits);
861 11732171 : got_it:
862 11732171 : if (state) {
863 8853143 : cache_state_if_flags(state, cached_state, 0);
864 8853184 : *start_ret = state->start;
865 8853184 : *end_ret = state->end;
866 8853184 : ret = 0;
867 : }
868 2879028 : out:
869 11732212 : spin_unlock(&tree->lock);
870 11732220 : return ret;
871 : }
872 :
873 : /*
874 : * Find a contiguous area of bits
875 : *
876 : * @tree: io tree to check
877 : * @start: offset to start the search from
878 : * @start_ret: the first offset we found with the bits set
879 : * @end_ret: the final contiguous range of the bits that were set
880 : * @bits: bits to look for
881 : *
882 : * set_extent_bit and clear_extent_bit can temporarily split contiguous ranges
883 : * to set bits appropriately, and then merge them again. During this time it
884 : * will drop the tree->lock, so use this helper if you want to find the actual
885 : * contiguous area for given bits. We will search to the first bit we find, and
886 : * then walk down the tree until we find a non-contiguous area. The area
887 : * returned will be the full contiguous area with the bits set.
888 : */
889 400 : int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
890 : u64 *start_ret, u64 *end_ret, u32 bits)
891 : {
892 400 : struct extent_state *state;
893 400 : int ret = 1;
894 :
895 400 : spin_lock(&tree->lock);
896 400 : state = find_first_extent_bit_state(tree, start, bits);
897 400 : if (state) {
898 395 : *start_ret = state->start;
899 395 : *end_ret = state->end;
900 495 : while ((state = next_state(state)) != NULL) {
901 100 : if (state->start > (*end_ret + 1))
902 : break;
903 0 : *end_ret = state->end;
904 : }
905 : ret = 0;
906 : }
907 400 : spin_unlock(&tree->lock);
908 400 : return ret;
909 : }
910 :
911 : /*
912 : * Find a contiguous range of bytes in the file marked as delalloc, not more
913 : * than 'max_bytes'. start and end are used to return the range,
914 : *
915 : * True is returned if we find something, false if nothing was in the tree.
916 : */
917 51580837 : bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
918 : u64 *end, u64 max_bytes,
919 : struct extent_state **cached_state)
920 : {
921 51580837 : struct extent_state *state;
922 51580837 : u64 cur_start = *start;
923 51580837 : bool found = false;
924 51580837 : u64 total_bytes = 0;
925 :
926 51580837 : spin_lock(&tree->lock);
927 :
928 : /*
929 : * This search will find all the extents that end after our range
930 : * starts.
931 : */
932 51581070 : state = tree_search(tree, cur_start);
933 51578473 : if (!state) {
934 9352278 : *end = (u64)-1;
935 9352278 : goto out;
936 : }
937 :
938 49192269 : while (state) {
939 45717611 : if (found && (state->start != cur_start ||
940 2632332 : (state->state & EXTENT_BOUNDARY))) {
941 1971175 : goto out;
942 : }
943 43746436 : if (!(state->state & EXTENT_DELALLOC)) {
944 31754748 : if (!found)
945 31743343 : *end = state->end;
946 31754748 : goto out;
947 : }
948 11991688 : if (!found) {
949 10483655 : *start = state->start;
950 10483655 : *cached_state = state;
951 10483655 : refcount_inc(&state->refs);
952 : }
953 11991902 : found = true;
954 11991902 : *end = state->end;
955 11991902 : cur_start = state->end + 1;
956 11991902 : total_bytes += state->end - state->start + 1;
957 11991902 : if (total_bytes >= max_bytes)
958 : break;
959 6965332 : state = next_state(state);
960 : }
961 8501228 : out:
962 51579429 : spin_unlock(&tree->lock);
963 51581060 : return found;
964 : }
965 :
966 : /*
967 : * Set some bits on a range in the tree. This may require allocations or
968 : * sleeping. By default all allocations use GFP_NOFS, use EXTENT_NOWAIT for
969 : * GFP_NOWAIT.
970 : *
971 : * If any of the exclusive bits are set, this will fail with -EEXIST if some
972 : * part of the range already has the desired bits set. The extent_state of the
973 : * existing range is returned in failed_state in this case, and the start of the
974 : * existing range is returned in failed_start. failed_state is used as an
975 : * optimization for wait_extent_bit, failed_start must be used as the source of
976 : * truth as failed_state may have changed since we returned.
977 : *
978 : * [start, end] is inclusive This takes the tree lock.
979 : */
980 175728643 : static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
981 : u32 bits, u64 *failed_start,
982 : struct extent_state **failed_state,
983 : struct extent_state **cached_state,
984 : struct extent_changeset *changeset)
985 : {
986 175728643 : struct extent_state *state;
987 175728643 : struct extent_state *prealloc = NULL;
988 175728643 : struct rb_node **p = NULL;
989 175728643 : struct rb_node *parent = NULL;
990 175728643 : int err = 0;
991 175728643 : u64 last_start;
992 175728643 : u64 last_end;
993 175728643 : u32 exclusive_bits = (bits & EXTENT_LOCKED);
994 175728643 : gfp_t mask;
995 :
996 175728643 : set_gfp_mask_from_bits(&bits, &mask);
997 175728643 : btrfs_debug_check_extent_io_range(tree, start, end);
998 175728643 : trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits);
999 :
1000 175728643 : if (exclusive_bits)
1001 : ASSERT(failed_start);
1002 : else
1003 : ASSERT(failed_start == NULL && failed_state == NULL);
1004 : again:
1005 182851469 : if (!prealloc) {
1006 : /*
1007 : * Don't care for allocation failure here because we might end
1008 : * up not needing the pre-allocated extent state at all, which
1009 : * is the case if we only have in the tree extent states that
1010 : * cover our input range and don't cover too any other range.
1011 : * If we end up needing a new extent state we allocate it later.
1012 : */
1013 182678317 : prealloc = alloc_extent_state(mask);
1014 : }
1015 :
1016 182865660 : spin_lock(&tree->lock);
1017 183100746 : if (cached_state && *cached_state) {
1018 48962751 : state = *cached_state;
1019 48962751 : if (state->start <= start && state->end > start &&
1020 : extent_state_in_tree(state))
1021 48392518 : goto hit_next;
1022 : }
1023 : /*
1024 : * This search will find all the extents that end after our range
1025 : * starts.
1026 : */
1027 134708228 : state = tree_search_for_insert(tree, start, &p, &parent);
1028 134567753 : if (!state) {
1029 88392033 : prealloc = alloc_extent_state_atomic(prealloc);
1030 88392033 : if (!prealloc)
1031 0 : goto search_again;
1032 88392033 : prealloc->start = start;
1033 88392033 : prealloc->end = end;
1034 88392033 : insert_state_fast(tree, prealloc, p, parent, bits, changeset);
1035 88427675 : cache_state(prealloc, cached_state);
1036 88461608 : prealloc = NULL;
1037 88461608 : goto out;
1038 : }
1039 46175720 : hit_next:
1040 95187541 : last_start = state->start;
1041 95187541 : last_end = state->end;
1042 :
1043 : /*
1044 : * | ---- desired range ---- |
1045 : * | state |
1046 : *
1047 : * Just lock what we found and keep going
1048 : */
1049 95187541 : if (state->start == start && state->end <= end) {
1050 53162449 : if (state->state & exclusive_bits) {
1051 68367 : *failed_start = state->start;
1052 68367 : cache_state(state, failed_state);
1053 68367 : err = -EEXIST;
1054 68367 : goto out;
1055 : }
1056 :
1057 53094082 : set_state_bits(tree, state, bits, changeset);
1058 53064183 : cache_state(state, cached_state);
1059 53056100 : merge_state(tree, state);
1060 53041646 : if (last_end == (u64)-1)
1061 0 : goto out;
1062 53041646 : start = last_end + 1;
1063 53041646 : state = next_state(state);
1064 53046229 : if (start < end && state && state->start == start &&
1065 : !need_resched())
1066 610616 : goto hit_next;
1067 52435613 : goto search_again;
1068 : }
1069 :
1070 : /*
1071 : * | ---- desired range ---- |
1072 : * | state |
1073 : * or
1074 : * | ------------- state -------------- |
1075 : *
1076 : * We need to split the extent we found, and may flip bits on second
1077 : * half.
1078 : *
1079 : * If the extent we found extends past our range, we just split and
1080 : * search again. It'll get split again the next time though.
1081 : *
1082 : * If the extent we found is inside our range, we set the desired bit
1083 : * on it.
1084 : */
1085 42025092 : if (state->start < start) {
1086 15478208 : if (state->state & exclusive_bits) {
1087 2346 : *failed_start = start;
1088 2346 : cache_state(state, failed_state);
1089 2346 : err = -EEXIST;
1090 2346 : goto out;
1091 : }
1092 :
1093 : /*
1094 : * If this extent already has all the bits we want set, then
1095 : * skip it, not necessary to split it or do anything with it.
1096 : */
1097 15475862 : if ((state->state & bits) == bits) {
1098 24132 : start = state->end + 1;
1099 24132 : cache_state(state, cached_state);
1100 24132 : goto search_again;
1101 : }
1102 :
1103 15451730 : prealloc = alloc_extent_state_atomic(prealloc);
1104 15451730 : if (!prealloc)
1105 0 : goto search_again;
1106 15451730 : err = split_state(tree, state, prealloc, start);
1107 15467651 : if (err)
1108 0 : extent_io_tree_panic(tree, err);
1109 :
1110 15467651 : prealloc = NULL;
1111 15467651 : if (err)
1112 : goto out;
1113 15467651 : if (state->end <= end) {
1114 8569523 : set_state_bits(tree, state, bits, changeset);
1115 8538466 : cache_state(state, cached_state);
1116 8571632 : merge_state(tree, state);
1117 8569764 : if (last_end == (u64)-1)
1118 0 : goto out;
1119 8569764 : start = last_end + 1;
1120 8569764 : state = next_state(state);
1121 8574242 : if (start < end && state && state->start == start &&
1122 : !need_resched())
1123 8687 : goto hit_next;
1124 : }
1125 15463683 : goto search_again;
1126 : }
1127 : /*
1128 : * | ---- desired range ---- |
1129 : * | state | or | state |
1130 : *
1131 : * There's a hole, we need to insert something in it and ignore the
1132 : * extent we found.
1133 : */
1134 26546884 : if (state->start > start) {
1135 18249929 : u64 this_end;
1136 18249929 : if (end < last_start)
1137 : this_end = end;
1138 : else
1139 152579 : this_end = last_start - 1;
1140 :
1141 18249929 : prealloc = alloc_extent_state_atomic(prealloc);
1142 18249929 : if (!prealloc)
1143 0 : goto search_again;
1144 :
1145 : /*
1146 : * Avoid to free 'prealloc' if it can be merged with the later
1147 : * extent.
1148 : */
1149 18249929 : prealloc->start = start;
1150 18249929 : prealloc->end = this_end;
1151 18249929 : err = insert_state(tree, prealloc, bits, changeset);
1152 18249974 : if (err)
1153 0 : extent_io_tree_panic(tree, err);
1154 :
1155 18249974 : cache_state(prealloc, cached_state);
1156 18250031 : prealloc = NULL;
1157 18250031 : start = this_end + 1;
1158 18250031 : goto search_again;
1159 : }
1160 : /*
1161 : * | ---- desired range ---- |
1162 : * | state |
1163 : *
1164 : * We need to split the extent, and set the bit on the first half
1165 : */
1166 8296955 : if (state->start <= end && state->end > end) {
1167 8296955 : if (state->state & exclusive_bits) {
1168 1777 : *failed_start = start;
1169 1777 : cache_state(state, failed_state);
1170 1777 : err = -EEXIST;
1171 1777 : goto out;
1172 : }
1173 :
1174 8295178 : prealloc = alloc_extent_state_atomic(prealloc);
1175 8295178 : if (!prealloc)
1176 0 : goto search_again;
1177 8295192 : err = split_state(tree, state, prealloc, end + 1);
1178 8295581 : if (err)
1179 0 : extent_io_tree_panic(tree, err);
1180 :
1181 8295581 : set_state_bits(tree, prealloc, bits, changeset);
1182 8295365 : cache_state(prealloc, cached_state);
1183 8295545 : merge_state(tree, prealloc);
1184 8295495 : prealloc = NULL;
1185 8295495 : goto out;
1186 : }
1187 :
1188 0 : search_again:
1189 86173445 : if (start > end)
1190 78887843 : goto out;
1191 7285602 : spin_unlock(&tree->lock);
1192 7285966 : if (gfpflags_allow_blocking(mask))
1193 7285959 : cond_resched();
1194 7285940 : goto again;
1195 :
1196 175717436 : out:
1197 175717436 : spin_unlock(&tree->lock);
1198 175896644 : if (prealloc)
1199 52488275 : free_extent_state(prealloc);
1200 :
1201 175896580 : return err;
1202 :
1203 : }
1204 :
1205 80152178 : int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1206 : u32 bits, struct extent_state **cached_state)
1207 : {
1208 80152178 : return __set_extent_bit(tree, start, end, bits, NULL, NULL,
1209 : cached_state, NULL);
1210 : }
1211 :
1212 : /*
1213 : * Convert all bits in a given range from one bit to another
1214 : *
1215 : * @tree: the io tree to search
1216 : * @start: the start offset in bytes
1217 : * @end: the end offset in bytes (inclusive)
1218 : * @bits: the bits to set in this range
1219 : * @clear_bits: the bits to clear in this range
1220 : * @cached_state: state that we're going to cache
1221 : *
1222 : * This will go through and set bits for the given range. If any states exist
1223 : * already in this range they are set with the given bit and cleared of the
1224 : * clear_bits. This is only meant to be used by things that are mergeable, ie.
1225 : * converting from say DELALLOC to DIRTY. This is not meant to be used with
1226 : * boundary bits like LOCK.
1227 : *
1228 : * All allocations are done with GFP_NOFS.
1229 : */
1230 2215676 : int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1231 : u32 bits, u32 clear_bits,
1232 : struct extent_state **cached_state)
1233 : {
1234 2215676 : struct extent_state *state;
1235 2215676 : struct extent_state *prealloc = NULL;
1236 2215676 : struct rb_node **p = NULL;
1237 2215676 : struct rb_node *parent = NULL;
1238 2215676 : int err = 0;
1239 2215676 : u64 last_start;
1240 2215676 : u64 last_end;
1241 2215676 : bool first_iteration = true;
1242 :
1243 2215676 : btrfs_debug_check_extent_io_range(tree, start, end);
1244 2215676 : trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits,
1245 : clear_bits);
1246 :
1247 2215642 : again:
1248 2215642 : if (!prealloc) {
1249 : /*
1250 : * Best effort, don't worry if extent state allocation fails
1251 : * here for the first iteration. We might have a cached state
1252 : * that matches exactly the target range, in which case no
1253 : * extent state allocations are needed. We'll only know this
1254 : * after locking the tree.
1255 : */
1256 2215643 : prealloc = alloc_extent_state(GFP_NOFS);
1257 2215652 : if (!prealloc && !first_iteration)
1258 : return -ENOMEM;
1259 : }
1260 :
1261 2215651 : spin_lock(&tree->lock);
1262 2215674 : if (cached_state && *cached_state) {
1263 2215674 : state = *cached_state;
1264 2215674 : if (state->start <= start && state->end > start &&
1265 : extent_state_in_tree(state))
1266 2215662 : goto hit_next;
1267 : }
1268 :
1269 : /*
1270 : * This search will find all the extents that end after our range
1271 : * starts.
1272 : */
1273 12 : state = tree_search_for_insert(tree, start, &p, &parent);
1274 0 : if (!state) {
1275 0 : prealloc = alloc_extent_state_atomic(prealloc);
1276 0 : if (!prealloc) {
1277 0 : err = -ENOMEM;
1278 0 : goto out;
1279 : }
1280 0 : prealloc->start = start;
1281 0 : prealloc->end = end;
1282 0 : insert_state_fast(tree, prealloc, p, parent, bits, NULL);
1283 0 : cache_state(prealloc, cached_state);
1284 0 : prealloc = NULL;
1285 0 : goto out;
1286 : }
1287 0 : hit_next:
1288 2215662 : last_start = state->start;
1289 2215662 : last_end = state->end;
1290 :
1291 : /*
1292 : * | ---- desired range ---- |
1293 : * | state |
1294 : *
1295 : * Just lock what we found and keep going.
1296 : */
1297 2215662 : if (state->start == start && state->end <= end) {
1298 2215659 : set_state_bits(tree, state, bits, NULL);
1299 2215632 : cache_state(state, cached_state);
1300 2215625 : state = clear_state_bit(tree, state, clear_bits, 0, NULL);
1301 2215620 : if (last_end == (u64)-1)
1302 0 : goto out;
1303 2215620 : start = last_end + 1;
1304 2215620 : if (start < end && state && state->start == start &&
1305 : !need_resched())
1306 0 : goto hit_next;
1307 2215620 : goto search_again;
1308 : }
1309 :
1310 : /*
1311 : * | ---- desired range ---- |
1312 : * | state |
1313 : * or
1314 : * | ------------- state -------------- |
1315 : *
1316 : * We need to split the extent we found, and may flip bits on second
1317 : * half.
1318 : *
1319 : * If the extent we found extends past our range, we just split and
1320 : * search again. It'll get split again the next time though.
1321 : *
1322 : * If the extent we found is inside our range, we set the desired bit
1323 : * on it.
1324 : */
1325 3 : if (state->start < start) {
1326 3 : prealloc = alloc_extent_state_atomic(prealloc);
1327 3 : if (!prealloc) {
1328 0 : err = -ENOMEM;
1329 0 : goto out;
1330 : }
1331 3 : err = split_state(tree, state, prealloc, start);
1332 0 : if (err)
1333 0 : extent_io_tree_panic(tree, err);
1334 0 : prealloc = NULL;
1335 0 : if (err)
1336 : goto out;
1337 0 : if (state->end <= end) {
1338 0 : set_state_bits(tree, state, bits, NULL);
1339 0 : cache_state(state, cached_state);
1340 0 : state = clear_state_bit(tree, state, clear_bits, 0, NULL);
1341 0 : if (last_end == (u64)-1)
1342 0 : goto out;
1343 0 : start = last_end + 1;
1344 0 : if (start < end && state && state->start == start &&
1345 : !need_resched())
1346 0 : goto hit_next;
1347 : }
1348 0 : goto search_again;
1349 : }
1350 : /*
1351 : * | ---- desired range ---- |
1352 : * | state | or | state |
1353 : *
1354 : * There's a hole, we need to insert something in it and ignore the
1355 : * extent we found.
1356 : */
1357 0 : if (state->start > start) {
1358 0 : u64 this_end;
1359 0 : if (end < last_start)
1360 : this_end = end;
1361 : else
1362 0 : this_end = last_start - 1;
1363 :
1364 0 : prealloc = alloc_extent_state_atomic(prealloc);
1365 0 : if (!prealloc) {
1366 0 : err = -ENOMEM;
1367 0 : goto out;
1368 : }
1369 :
1370 : /*
1371 : * Avoid to free 'prealloc' if it can be merged with the later
1372 : * extent.
1373 : */
1374 0 : prealloc->start = start;
1375 0 : prealloc->end = this_end;
1376 0 : err = insert_state(tree, prealloc, bits, NULL);
1377 0 : if (err)
1378 0 : extent_io_tree_panic(tree, err);
1379 0 : cache_state(prealloc, cached_state);
1380 0 : prealloc = NULL;
1381 0 : start = this_end + 1;
1382 0 : goto search_again;
1383 : }
1384 : /*
1385 : * | ---- desired range ---- |
1386 : * | state |
1387 : *
1388 : * We need to split the extent, and set the bit on the first half.
1389 : */
1390 0 : if (state->start <= end && state->end > end) {
1391 0 : prealloc = alloc_extent_state_atomic(prealloc);
1392 0 : if (!prealloc) {
1393 0 : err = -ENOMEM;
1394 0 : goto out;
1395 : }
1396 :
1397 0 : err = split_state(tree, state, prealloc, end + 1);
1398 0 : if (err)
1399 0 : extent_io_tree_panic(tree, err);
1400 :
1401 0 : set_state_bits(tree, prealloc, bits, NULL);
1402 0 : cache_state(prealloc, cached_state);
1403 0 : clear_state_bit(tree, prealloc, clear_bits, 0, NULL);
1404 0 : prealloc = NULL;
1405 0 : goto out;
1406 : }
1407 :
1408 0 : search_again:
1409 2215620 : if (start > end)
1410 2215664 : goto out;
1411 0 : spin_unlock(&tree->lock);
1412 0 : cond_resched();
1413 0 : first_iteration = false;
1414 0 : goto again;
1415 :
1416 2215664 : out:
1417 2215664 : spin_unlock(&tree->lock);
1418 2215698 : if (prealloc)
1419 2215698 : free_extent_state(prealloc);
1420 :
1421 : return err;
1422 : }
1423 :
1424 : /*
1425 : * Find the first range that has @bits not set. This range could start before
1426 : * @start.
1427 : *
1428 : * @tree: the tree to search
1429 : * @start: offset at/after which the found extent should start
1430 : * @start_ret: records the beginning of the range
1431 : * @end_ret: records the end of the range (inclusive)
1432 : * @bits: the set of bits which must be unset
1433 : *
1434 : * Since unallocated range is also considered one which doesn't have the bits
1435 : * set it's possible that @end_ret contains -1, this happens in case the range
1436 : * spans (last_range_end, end of device]. In this case it's up to the caller to
1437 : * trim @end_ret to the appropriate size.
1438 : */
1439 620 : void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
1440 : u64 *start_ret, u64 *end_ret, u32 bits)
1441 : {
1442 620 : struct extent_state *state;
1443 620 : struct extent_state *prev = NULL, *next = NULL;
1444 :
1445 620 : spin_lock(&tree->lock);
1446 :
1447 : /* Find first extent with bits cleared */
1448 2372 : while (1) {
1449 2372 : state = tree_search_prev_next(tree, start, &prev, &next);
1450 2372 : if (!state && !next && !prev) {
1451 : /*
1452 : * Tree is completely empty, send full range and let
1453 : * caller deal with it
1454 : */
1455 0 : *start_ret = 0;
1456 0 : *end_ret = -1;
1457 0 : goto out;
1458 2372 : } else if (!state && !next) {
1459 : /*
1460 : * We are past the last allocated chunk, set start at
1461 : * the end of the last extent.
1462 : */
1463 604 : *start_ret = prev->end + 1;
1464 604 : *end_ret = -1;
1465 604 : goto out;
1466 1768 : } else if (!state) {
1467 16 : state = next;
1468 : }
1469 :
1470 : /*
1471 : * At this point 'state' either contains 'start' or start is
1472 : * before 'state'
1473 : */
1474 1768 : if (in_range(start, state->start, state->end - state->start + 1)) {
1475 1752 : if (state->state & bits) {
1476 : /*
1477 : * |--range with bits sets--|
1478 : * |
1479 : * start
1480 : */
1481 : start = state->end + 1;
1482 : } else {
1483 : /*
1484 : * 'start' falls within a range that doesn't
1485 : * have the bits set, so take its start as the
1486 : * beginning of the desired range
1487 : *
1488 : * |--range with bits cleared----|
1489 : * |
1490 : * start
1491 : */
1492 0 : *start_ret = state->start;
1493 0 : break;
1494 : }
1495 : } else {
1496 : /*
1497 : * |---prev range---|---hole/unset---|---node range---|
1498 : * |
1499 : * start
1500 : *
1501 : * or
1502 : *
1503 : * |---hole/unset--||--first node--|
1504 : * 0 |
1505 : * start
1506 : */
1507 16 : if (prev)
1508 4 : *start_ret = prev->end + 1;
1509 : else
1510 12 : *start_ret = 0;
1511 : break;
1512 : }
1513 : }
1514 :
1515 : /*
1516 : * Find the longest stretch from start until an entry which has the
1517 : * bits set
1518 : */
1519 16 : while (state) {
1520 16 : if (state->end >= start && !(state->state & bits)) {
1521 0 : *end_ret = state->end;
1522 : } else {
1523 16 : *end_ret = state->start - 1;
1524 16 : break;
1525 : }
1526 0 : state = next_state(state);
1527 : }
1528 0 : out:
1529 620 : spin_unlock(&tree->lock);
1530 620 : }
1531 :
1532 : /*
1533 : * Count the number of bytes in the tree that have a given bit(s) set for a
1534 : * given range.
1535 : *
1536 : * @tree: The io tree to search.
1537 : * @start: The start offset of the range. This value is updated to the
1538 : * offset of the first byte found with the given bit(s), so it
1539 : * can end up being bigger than the initial value.
1540 : * @search_end: The end offset (inclusive value) of the search range.
1541 : * @max_bytes: The maximum byte count we are interested. The search stops
1542 : * once it reaches this count.
1543 : * @bits: The bits the range must have in order to be accounted for.
1544 : * If multiple bits are set, then only subranges that have all
1545 : * the bits set are accounted for.
1546 : * @contig: Indicate if we should ignore holes in the range or not. If
1547 : * this is true, then stop once we find a hole.
1548 : * @cached_state: A cached state to be used across multiple calls to this
1549 : * function in order to speedup searches. Use NULL if this is
1550 : * called only once or if each call does not start where the
1551 : * previous one ended.
1552 : *
1553 : * Returns the total number of bytes found within the given range that have
1554 : * all given bits set. If the returned number of bytes is greater than zero
1555 : * then @start is updated with the offset of the first byte with the bits set.
1556 : */
1557 1304655 : u64 count_range_bits(struct extent_io_tree *tree,
1558 : u64 *start, u64 search_end, u64 max_bytes,
1559 : u32 bits, int contig,
1560 : struct extent_state **cached_state)
1561 : {
1562 1304655 : struct extent_state *state = NULL;
1563 1304655 : struct extent_state *cached;
1564 1304655 : u64 cur_start = *start;
1565 1304655 : u64 total_bytes = 0;
1566 1304655 : u64 last = 0;
1567 1304655 : int found = 0;
1568 :
1569 1304655 : if (WARN_ON(search_end < cur_start))
1570 : return 0;
1571 :
1572 1304655 : spin_lock(&tree->lock);
1573 :
1574 1304656 : if (!cached_state || !*cached_state)
1575 1258621 : goto search;
1576 :
1577 46035 : cached = *cached_state;
1578 :
1579 46035 : if (!extent_state_in_tree(cached))
1580 222 : goto search;
1581 :
1582 45813 : if (cached->start <= cur_start && cur_start <= cached->end) {
1583 : state = cached;
1584 8267 : } else if (cached->start > cur_start) {
1585 8144 : struct extent_state *prev;
1586 :
1587 : /*
1588 : * The cached state starts after our search range's start. Check
1589 : * if the previous state record starts at or before the range we
1590 : * are looking for, and if so, use it - this is a common case
1591 : * when there are holes between records in the tree. If there is
1592 : * no previous state record, we can start from our cached state.
1593 : */
1594 8144 : prev = prev_state(cached);
1595 8144 : if (!prev)
1596 : state = cached;
1597 8144 : else if (prev->start <= cur_start && cur_start <= prev->end)
1598 : state = prev;
1599 : }
1600 :
1601 : /*
1602 : * This search will find all the extents that end after our range
1603 : * starts.
1604 : */
1605 161 : search:
1606 1304495 : if (!state)
1607 1259004 : state = tree_search(tree, cur_start);
1608 :
1609 2827127 : while (state) {
1610 1869614 : if (state->start > search_end)
1611 : break;
1612 1555759 : if (contig && found && state->start > last + 1)
1613 : break;
1614 1555759 : if (state->end >= cur_start && (state->state & bits) == bits) {
1615 37740 : total_bytes += min(search_end, state->end) + 1 -
1616 37740 : max(cur_start, state->start);
1617 37740 : if (total_bytes >= max_bytes)
1618 : break;
1619 13765 : if (!found) {
1620 13765 : *start = max(cur_start, state->start);
1621 13765 : found = 1;
1622 : }
1623 13765 : last = state->end;
1624 1518019 : } else if (contig && found) {
1625 : break;
1626 : }
1627 1522477 : state = next_state(state);
1628 : }
1629 :
1630 1304650 : if (cached_state) {
1631 222493 : free_extent_state(*cached_state);
1632 222493 : *cached_state = state;
1633 222493 : if (state)
1634 46047 : refcount_inc(&state->refs);
1635 : }
1636 :
1637 1304650 : spin_unlock(&tree->lock);
1638 :
1639 1304650 : return total_bytes;
1640 : }
1641 :
1642 : /*
1643 : * Search a range in the state tree for a given mask. If 'filled' == 1, this
1644 : * returns 1 only if every extent in the tree has the bits set. Otherwise, 1
1645 : * is returned if any bit in the range is found set.
1646 : */
1647 55053595 : int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1648 : u32 bits, int filled, struct extent_state *cached)
1649 : {
1650 55053595 : struct extent_state *state = NULL;
1651 55053595 : int bitset = 0;
1652 :
1653 55053595 : spin_lock(&tree->lock);
1654 55054754 : if (cached && extent_state_in_tree(cached) && cached->start <= start &&
1655 3400789 : cached->end > start)
1656 : state = cached;
1657 : else
1658 51653965 : state = tree_search(tree, start);
1659 55338827 : while (state && start <= end) {
1660 13083516 : if (filled && state->start > start) {
1661 : bitset = 0;
1662 : break;
1663 : }
1664 :
1665 13077711 : if (state->start > end)
1666 : break;
1667 :
1668 5286661 : if (state->state & bits) {
1669 4009390 : bitset = 1;
1670 4009390 : if (!filled)
1671 : break;
1672 1277271 : } else if (filled) {
1673 : bitset = 0;
1674 : break;
1675 : }
1676 :
1677 4124417 : if (state->end == (u64)-1)
1678 : break;
1679 :
1680 4124417 : start = state->end + 1;
1681 4124417 : if (start > end)
1682 : break;
1683 287015 : state = next_state(state);
1684 : }
1685 :
1686 : /* We ran out of states and were still inside of our range. */
1687 55051812 : if (filled && !state)
1688 732 : bitset = 0;
1689 55051812 : spin_unlock(&tree->lock);
1690 55053617 : return bitset;
1691 : }
1692 :
1693 : /* Wrappers around set/clear extent bit */
1694 1911442 : int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1695 : u32 bits, struct extent_changeset *changeset)
1696 : {
1697 : /*
1698 : * We don't support EXTENT_LOCKED yet, as current changeset will
1699 : * record any bits changed, so for EXTENT_LOCKED case, it will
1700 : * either fail with -EEXIST or changeset will record the whole
1701 : * range.
1702 : */
1703 1911442 : ASSERT(!(bits & EXTENT_LOCKED));
1704 :
1705 1911442 : return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset);
1706 : }
1707 :
1708 6796630 : int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1709 : u32 bits, struct extent_changeset *changeset)
1710 : {
1711 : /*
1712 : * Don't support EXTENT_LOCKED case, same reason as
1713 : * set_record_extent_bits().
1714 : */
1715 6796630 : ASSERT(!(bits & EXTENT_LOCKED));
1716 :
1717 6796630 : return __clear_extent_bit(tree, start, end, bits, NULL, changeset);
1718 : }
1719 :
1720 4572 : int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
1721 : struct extent_state **cached)
1722 : {
1723 4572 : int err;
1724 4572 : u64 failed_start;
1725 :
1726 4572 : err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
1727 : NULL, cached, NULL);
1728 4572 : if (err == -EEXIST) {
1729 15 : if (failed_start > start)
1730 6 : clear_extent_bit(tree, start, failed_start - 1,
1731 : EXTENT_LOCKED, cached);
1732 15 : return 0;
1733 : }
1734 : return 1;
1735 : }
1736 :
1737 : /*
1738 : * Either insert or lock state struct between start and end use mask to tell
1739 : * us if waiting is desired.
1740 : */
1741 93670595 : int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
1742 : struct extent_state **cached_state)
1743 : {
1744 93670595 : struct extent_state *failed_state = NULL;
1745 93670595 : int err;
1746 93670595 : u64 failed_start;
1747 :
1748 93670595 : err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
1749 : &failed_state, cached_state, NULL);
1750 93742547 : while (err == -EEXIST) {
1751 72153 : if (failed_start != start)
1752 4359 : clear_extent_bit(tree, start, failed_start - 1,
1753 : EXTENT_LOCKED, cached_state);
1754 :
1755 72153 : wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED,
1756 : &failed_state);
1757 72475 : err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
1758 : &failed_start, &failed_state,
1759 : cached_state, NULL);
1760 : }
1761 93677122 : return err;
1762 : }
1763 :
1764 0 : void __cold extent_state_free_cachep(void)
1765 : {
1766 0 : btrfs_extent_state_leak_debug_check();
1767 0 : kmem_cache_destroy(extent_state_cache);
1768 0 : }
1769 :
1770 11 : int __init extent_state_init_cachep(void)
1771 : {
1772 11 : extent_state_cache = kmem_cache_create("btrfs_extent_state",
1773 : sizeof(struct extent_state), 0,
1774 : SLAB_MEM_SPREAD, NULL);
1775 11 : if (!extent_state_cache)
1776 0 : return -ENOMEM;
1777 :
1778 : return 0;
1779 : }
|