Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 :
3 : #include <linux/slab.h>
4 : #include <trace/events/btrfs.h>
5 : #include "messages.h"
6 : #include "ctree.h"
7 : #include "extent-io-tree.h"
8 : #include "btrfs_inode.h"
9 : #include "misc.h"
10 :
11 : static struct kmem_cache *extent_state_cache;
12 :
13 : static inline bool extent_state_in_tree(const struct extent_state *state)
14 : {
15 0 : return !RB_EMPTY_NODE(&state->rb_node);
16 : }
17 :
18 : #ifdef CONFIG_BTRFS_DEBUG
19 : static LIST_HEAD(states);
20 : static DEFINE_SPINLOCK(leak_lock);
21 :
22 : static inline void btrfs_leak_debug_add_state(struct extent_state *state)
23 : {
24 : unsigned long flags;
25 :
26 : spin_lock_irqsave(&leak_lock, flags);
27 : list_add(&state->leak_list, &states);
28 : spin_unlock_irqrestore(&leak_lock, flags);
29 : }
30 :
31 : static inline void btrfs_leak_debug_del_state(struct extent_state *state)
32 : {
33 : unsigned long flags;
34 :
35 : spin_lock_irqsave(&leak_lock, flags);
36 : list_del(&state->leak_list);
37 : spin_unlock_irqrestore(&leak_lock, flags);
38 : }
39 :
40 : static inline void btrfs_extent_state_leak_debug_check(void)
41 : {
42 : struct extent_state *state;
43 :
44 : while (!list_empty(&states)) {
45 : state = list_entry(states.next, struct extent_state, leak_list);
46 : pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
47 : state->start, state->end, state->state,
48 : extent_state_in_tree(state),
49 : refcount_read(&state->refs));
50 : list_del(&state->leak_list);
51 : kmem_cache_free(extent_state_cache, state);
52 : }
53 : }
54 :
55 : #define btrfs_debug_check_extent_io_range(tree, start, end) \
56 : __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
57 : static inline void __btrfs_debug_check_extent_io_range(const char *caller,
58 : struct extent_io_tree *tree,
59 : u64 start, u64 end)
60 : {
61 : struct btrfs_inode *inode = tree->inode;
62 : u64 isize;
63 :
64 : if (!inode)
65 : return;
66 :
67 : isize = i_size_read(&inode->vfs_inode);
68 : if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
69 : btrfs_debug_rl(inode->root->fs_info,
70 : "%s: ino %llu isize %llu odd range [%llu,%llu]",
71 : caller, btrfs_ino(inode), isize, start, end);
72 : }
73 : }
74 : #else
75 : #define btrfs_leak_debug_add_state(state) do {} while (0)
76 : #define btrfs_leak_debug_del_state(state) do {} while (0)
77 : #define btrfs_extent_state_leak_debug_check() do {} while (0)
78 : #define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
79 : #endif
80 :
81 : /*
82 : * For the file_extent_tree, we want to hold the inode lock when we lookup and
83 : * update the disk_i_size, but lockdep will complain because our io_tree we hold
84 : * the tree lock and get the inode lock when setting delalloc. These two things
85 : * are unrelated, so make a class for the file_extent_tree so we don't get the
86 : * two locking patterns mixed up.
87 : */
88 : static struct lock_class_key file_extent_tree_class;
89 :
90 : struct tree_entry {
91 : u64 start;
92 : u64 end;
93 : struct rb_node rb_node;
94 : };
95 :
96 2 : void extent_io_tree_init(struct btrfs_fs_info *fs_info,
97 : struct extent_io_tree *tree, unsigned int owner)
98 : {
99 2 : tree->fs_info = fs_info;
100 2 : tree->state = RB_ROOT;
101 2 : spin_lock_init(&tree->lock);
102 2 : tree->inode = NULL;
103 2 : tree->owner = owner;
104 2 : if (owner == IO_TREE_INODE_FILE_EXTENT)
105 2 : lockdep_set_class(&tree->lock, &file_extent_tree_class);
106 2 : }
107 :
108 0 : void extent_io_tree_release(struct extent_io_tree *tree)
109 : {
110 0 : spin_lock(&tree->lock);
111 : /*
112 : * Do a single barrier for the waitqueue_active check here, the state
113 : * of the waitqueue should not change once extent_io_tree_release is
114 : * called.
115 : */
116 0 : smp_mb();
117 0 : while (!RB_EMPTY_ROOT(&tree->state)) {
118 0 : struct rb_node *node;
119 0 : struct extent_state *state;
120 :
121 0 : node = rb_first(&tree->state);
122 0 : state = rb_entry(node, struct extent_state, rb_node);
123 0 : rb_erase(&state->rb_node, &tree->state);
124 0 : RB_CLEAR_NODE(&state->rb_node);
125 : /*
126 : * btree io trees aren't supposed to have tasks waiting for
127 : * changes in the flags of extent states ever.
128 : */
129 0 : ASSERT(!waitqueue_active(&state->wq));
130 0 : free_extent_state(state);
131 :
132 0 : cond_resched_lock(&tree->lock);
133 : }
134 0 : spin_unlock(&tree->lock);
135 0 : }
136 :
137 0 : static struct extent_state *alloc_extent_state(gfp_t mask)
138 : {
139 0 : struct extent_state *state;
140 :
141 : /*
142 : * The given mask might be not appropriate for the slab allocator,
143 : * drop the unsupported bits
144 : */
145 0 : mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
146 0 : state = kmem_cache_alloc(extent_state_cache, mask);
147 0 : if (!state)
148 : return state;
149 0 : state->state = 0;
150 0 : RB_CLEAR_NODE(&state->rb_node);
151 0 : btrfs_leak_debug_add_state(state);
152 0 : refcount_set(&state->refs, 1);
153 0 : init_waitqueue_head(&state->wq);
154 0 : trace_alloc_extent_state(state, mask, _RET_IP_);
155 0 : return state;
156 : }
157 :
158 : static struct extent_state *alloc_extent_state_atomic(struct extent_state *prealloc)
159 : {
160 0 : if (!prealloc)
161 0 : prealloc = alloc_extent_state(GFP_ATOMIC);
162 :
163 0 : return prealloc;
164 : }
165 :
166 0 : void free_extent_state(struct extent_state *state)
167 : {
168 0 : if (!state)
169 : return;
170 0 : if (refcount_dec_and_test(&state->refs)) {
171 0 : WARN_ON(extent_state_in_tree(state));
172 0 : btrfs_leak_debug_del_state(state);
173 0 : trace_free_extent_state(state, _RET_IP_);
174 0 : kmem_cache_free(extent_state_cache, state);
175 : }
176 : }
177 :
178 0 : static int add_extent_changeset(struct extent_state *state, u32 bits,
179 : struct extent_changeset *changeset,
180 : int set)
181 : {
182 0 : int ret;
183 :
184 0 : if (!changeset)
185 : return 0;
186 0 : if (set && (state->state & bits) == bits)
187 : return 0;
188 0 : if (!set && (state->state & bits) == 0)
189 : return 0;
190 0 : changeset->bytes_changed += state->end - state->start + 1;
191 0 : ret = ulist_add(&changeset->range_changed, state->start, state->end,
192 : GFP_ATOMIC);
193 0 : return ret;
194 : }
195 :
196 : static inline struct extent_state *next_state(struct extent_state *state)
197 : {
198 0 : struct rb_node *next = rb_next(&state->rb_node);
199 :
200 0 : if (next)
201 0 : return rb_entry(next, struct extent_state, rb_node);
202 : else
203 : return NULL;
204 : }
205 :
206 : static inline struct extent_state *prev_state(struct extent_state *state)
207 : {
208 0 : struct rb_node *next = rb_prev(&state->rb_node);
209 :
210 0 : if (next)
211 0 : return rb_entry(next, struct extent_state, rb_node);
212 : else
213 : return NULL;
214 : }
215 :
216 : /*
217 : * Search @tree for an entry that contains @offset. Such entry would have
218 : * entry->start <= offset && entry->end >= offset.
219 : *
220 : * @tree: the tree to search
221 : * @offset: offset that should fall within an entry in @tree
222 : * @node_ret: pointer where new node should be anchored (used when inserting an
223 : * entry in the tree)
224 : * @parent_ret: points to entry which would have been the parent of the entry,
225 : * containing @offset
226 : *
227 : * Return a pointer to the entry that contains @offset byte address and don't change
228 : * @node_ret and @parent_ret.
229 : *
230 : * If no such entry exists, return pointer to entry that ends before @offset
231 : * and fill parameters @node_ret and @parent_ret, ie. does not return NULL.
232 : */
233 0 : static inline struct extent_state *tree_search_for_insert(struct extent_io_tree *tree,
234 : u64 offset,
235 : struct rb_node ***node_ret,
236 : struct rb_node **parent_ret)
237 : {
238 0 : struct rb_root *root = &tree->state;
239 0 : struct rb_node **node = &root->rb_node;
240 0 : struct rb_node *prev = NULL;
241 0 : struct extent_state *entry = NULL;
242 :
243 0 : while (*node) {
244 0 : prev = *node;
245 0 : entry = rb_entry(prev, struct extent_state, rb_node);
246 :
247 0 : if (offset < entry->start)
248 0 : node = &(*node)->rb_left;
249 0 : else if (offset > entry->end)
250 0 : node = &(*node)->rb_right;
251 : else
252 0 : return entry;
253 : }
254 :
255 0 : if (node_ret)
256 0 : *node_ret = node;
257 0 : if (parent_ret)
258 0 : *parent_ret = prev;
259 :
260 : /* Search neighbors until we find the first one past the end */
261 0 : while (entry && offset > entry->end)
262 0 : entry = next_state(entry);
263 :
264 : return entry;
265 : }
266 :
267 : /*
268 : * Search offset in the tree or fill neighbor rbtree node pointers.
269 : *
270 : * @tree: the tree to search
271 : * @offset: offset that should fall within an entry in @tree
272 : * @next_ret: pointer to the first entry whose range ends after @offset
273 : * @prev_ret: pointer to the first entry whose range begins before @offset
274 : *
275 : * Return a pointer to the entry that contains @offset byte address. If no
276 : * such entry exists, then return NULL and fill @prev_ret and @next_ret.
277 : * Otherwise return the found entry and other pointers are left untouched.
278 : */
279 0 : static struct extent_state *tree_search_prev_next(struct extent_io_tree *tree,
280 : u64 offset,
281 : struct extent_state **prev_ret,
282 : struct extent_state **next_ret)
283 : {
284 0 : struct rb_root *root = &tree->state;
285 0 : struct rb_node **node = &root->rb_node;
286 0 : struct extent_state *orig_prev;
287 0 : struct extent_state *entry = NULL;
288 :
289 0 : ASSERT(prev_ret);
290 0 : ASSERT(next_ret);
291 :
292 0 : while (*node) {
293 0 : entry = rb_entry(*node, struct extent_state, rb_node);
294 :
295 0 : if (offset < entry->start)
296 0 : node = &(*node)->rb_left;
297 0 : else if (offset > entry->end)
298 0 : node = &(*node)->rb_right;
299 : else
300 0 : return entry;
301 : }
302 :
303 : orig_prev = entry;
304 0 : while (entry && offset > entry->end)
305 0 : entry = next_state(entry);
306 0 : *next_ret = entry;
307 0 : entry = orig_prev;
308 :
309 0 : while (entry && offset < entry->start)
310 0 : entry = prev_state(entry);
311 0 : *prev_ret = entry;
312 :
313 0 : return NULL;
314 : }
315 :
316 : /*
317 : * Inexact rb-tree search, return the next entry if @offset is not found
318 : */
319 : static inline struct extent_state *tree_search(struct extent_io_tree *tree, u64 offset)
320 : {
321 0 : return tree_search_for_insert(tree, offset, NULL, NULL);
322 : }
323 :
324 0 : static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
325 : {
326 0 : btrfs_panic(tree->fs_info, err,
327 : "locking error: extent tree was modified by another thread while locked");
328 : }
329 :
330 : /*
331 : * Utility function to look for merge candidates inside a given range. Any
332 : * extents with matching state are merged together into a single extent in the
333 : * tree. Extents with EXTENT_IO in their state field are not merged because
334 : * the end_io handlers need to be able to do operations on them without
335 : * sleeping (or doing allocations/splits).
336 : *
337 : * This should be called with the tree lock held.
338 : */
339 0 : static void merge_state(struct extent_io_tree *tree, struct extent_state *state)
340 : {
341 0 : struct extent_state *other;
342 :
343 0 : if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY))
344 : return;
345 :
346 0 : other = prev_state(state);
347 0 : if (other && other->end == state->start - 1 &&
348 0 : other->state == state->state) {
349 0 : if (tree->inode)
350 0 : btrfs_merge_delalloc_extent(tree->inode, state, other);
351 0 : state->start = other->start;
352 0 : rb_erase(&other->rb_node, &tree->state);
353 0 : RB_CLEAR_NODE(&other->rb_node);
354 0 : free_extent_state(other);
355 : }
356 0 : other = next_state(state);
357 0 : if (other && other->start == state->end + 1 &&
358 0 : other->state == state->state) {
359 0 : if (tree->inode)
360 0 : btrfs_merge_delalloc_extent(tree->inode, state, other);
361 0 : state->end = other->end;
362 0 : rb_erase(&other->rb_node, &tree->state);
363 0 : RB_CLEAR_NODE(&other->rb_node);
364 0 : free_extent_state(other);
365 : }
366 : }
367 :
368 0 : static void set_state_bits(struct extent_io_tree *tree,
369 : struct extent_state *state,
370 : u32 bits, struct extent_changeset *changeset)
371 : {
372 0 : u32 bits_to_set = bits & ~EXTENT_CTLBITS;
373 0 : int ret;
374 :
375 0 : if (tree->inode)
376 0 : btrfs_set_delalloc_extent(tree->inode, state, bits);
377 :
378 0 : ret = add_extent_changeset(state, bits_to_set, changeset, 1);
379 0 : BUG_ON(ret < 0);
380 0 : state->state |= bits_to_set;
381 0 : }
382 :
383 : /*
384 : * Insert an extent_state struct into the tree. 'bits' are set on the
385 : * struct before it is inserted.
386 : *
387 : * This may return -EEXIST if the extent is already there, in which case the
388 : * state struct is freed.
389 : *
390 : * The tree lock is not taken internally. This is a utility function and
391 : * probably isn't what you want to call (see set/clear_extent_bit).
392 : */
393 0 : static int insert_state(struct extent_io_tree *tree,
394 : struct extent_state *state,
395 : u32 bits, struct extent_changeset *changeset)
396 : {
397 0 : struct rb_node **node;
398 0 : struct rb_node *parent = NULL;
399 0 : const u64 end = state->end;
400 :
401 0 : set_state_bits(tree, state, bits, changeset);
402 :
403 0 : node = &tree->state.rb_node;
404 0 : while (*node) {
405 0 : struct extent_state *entry;
406 :
407 0 : parent = *node;
408 0 : entry = rb_entry(parent, struct extent_state, rb_node);
409 :
410 0 : if (end < entry->start) {
411 0 : node = &(*node)->rb_left;
412 0 : } else if (end > entry->end) {
413 0 : node = &(*node)->rb_right;
414 : } else {
415 0 : btrfs_err(tree->fs_info,
416 : "found node %llu %llu on insert of %llu %llu",
417 : entry->start, entry->end, state->start, end);
418 0 : return -EEXIST;
419 : }
420 : }
421 :
422 0 : rb_link_node(&state->rb_node, parent, node);
423 0 : rb_insert_color(&state->rb_node, &tree->state);
424 :
425 0 : merge_state(tree, state);
426 0 : return 0;
427 : }
428 :
429 : /*
430 : * Insert state to @tree to the location given by @node and @parent.
431 : */
432 0 : static void insert_state_fast(struct extent_io_tree *tree,
433 : struct extent_state *state, struct rb_node **node,
434 : struct rb_node *parent, unsigned bits,
435 : struct extent_changeset *changeset)
436 : {
437 0 : set_state_bits(tree, state, bits, changeset);
438 0 : rb_link_node(&state->rb_node, parent, node);
439 0 : rb_insert_color(&state->rb_node, &tree->state);
440 0 : merge_state(tree, state);
441 0 : }
442 :
443 : /*
444 : * Split a given extent state struct in two, inserting the preallocated
445 : * struct 'prealloc' as the newly created second half. 'split' indicates an
446 : * offset inside 'orig' where it should be split.
447 : *
448 : * Before calling,
449 : * the tree has 'orig' at [orig->start, orig->end]. After calling, there
450 : * are two extent state structs in the tree:
451 : * prealloc: [orig->start, split - 1]
452 : * orig: [ split, orig->end ]
453 : *
454 : * The tree locks are not taken by this function. They need to be held
455 : * by the caller.
456 : */
457 0 : static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
458 : struct extent_state *prealloc, u64 split)
459 : {
460 0 : struct rb_node *parent = NULL;
461 0 : struct rb_node **node;
462 :
463 0 : if (tree->inode)
464 0 : btrfs_split_delalloc_extent(tree->inode, orig, split);
465 :
466 0 : prealloc->start = orig->start;
467 0 : prealloc->end = split - 1;
468 0 : prealloc->state = orig->state;
469 0 : orig->start = split;
470 :
471 0 : parent = &orig->rb_node;
472 0 : node = &parent;
473 0 : while (*node) {
474 0 : struct extent_state *entry;
475 :
476 0 : parent = *node;
477 0 : entry = rb_entry(parent, struct extent_state, rb_node);
478 :
479 0 : if (prealloc->end < entry->start) {
480 0 : node = &(*node)->rb_left;
481 0 : } else if (prealloc->end > entry->end) {
482 0 : node = &(*node)->rb_right;
483 : } else {
484 0 : free_extent_state(prealloc);
485 0 : return -EEXIST;
486 : }
487 : }
488 :
489 0 : rb_link_node(&prealloc->rb_node, parent, node);
490 0 : rb_insert_color(&prealloc->rb_node, &tree->state);
491 :
492 0 : return 0;
493 : }
494 :
495 : /*
496 : * Utility function to clear some bits in an extent state struct. It will
497 : * optionally wake up anyone waiting on this state (wake == 1).
498 : *
499 : * If no bits are set on the state struct after clearing things, the
500 : * struct is freed and removed from the tree
501 : */
502 0 : static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
503 : struct extent_state *state,
504 : u32 bits, int wake,
505 : struct extent_changeset *changeset)
506 : {
507 0 : struct extent_state *next;
508 0 : u32 bits_to_clear = bits & ~EXTENT_CTLBITS;
509 0 : int ret;
510 :
511 0 : if (tree->inode)
512 0 : btrfs_clear_delalloc_extent(tree->inode, state, bits);
513 :
514 0 : ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
515 0 : BUG_ON(ret < 0);
516 0 : state->state &= ~bits_to_clear;
517 0 : if (wake)
518 0 : wake_up(&state->wq);
519 0 : if (state->state == 0) {
520 0 : next = next_state(state);
521 0 : if (extent_state_in_tree(state)) {
522 0 : rb_erase(&state->rb_node, &tree->state);
523 0 : RB_CLEAR_NODE(&state->rb_node);
524 0 : free_extent_state(state);
525 : } else {
526 0 : WARN_ON(1);
527 : }
528 : } else {
529 0 : merge_state(tree, state);
530 0 : next = next_state(state);
531 : }
532 0 : return next;
533 : }
534 :
535 : /*
536 : * Detect if extent bits request NOWAIT semantics and set the gfp mask accordingly,
537 : * unset the EXTENT_NOWAIT bit.
538 : */
539 : static void set_gfp_mask_from_bits(u32 *bits, gfp_t *mask)
540 : {
541 0 : *mask = (*bits & EXTENT_NOWAIT ? GFP_NOWAIT : GFP_NOFS);
542 0 : *bits &= EXTENT_NOWAIT - 1;
543 : }
544 :
545 : /*
546 : * Clear some bits on a range in the tree. This may require splitting or
547 : * inserting elements in the tree, so the gfp mask is used to indicate which
548 : * allocations or sleeping are allowed.
549 : *
550 : * Pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove the given
551 : * range from the tree regardless of state (ie for truncate).
552 : *
553 : * The range [start, end] is inclusive.
554 : *
555 : * This takes the tree lock, and returns 0 on success and < 0 on error.
556 : */
557 0 : int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
558 : u32 bits, struct extent_state **cached_state,
559 : struct extent_changeset *changeset)
560 : {
561 0 : struct extent_state *state;
562 0 : struct extent_state *cached;
563 0 : struct extent_state *prealloc = NULL;
564 0 : u64 last_end;
565 0 : int err;
566 0 : int clear = 0;
567 0 : int wake;
568 0 : int delete = (bits & EXTENT_CLEAR_ALL_BITS);
569 0 : gfp_t mask;
570 :
571 0 : set_gfp_mask_from_bits(&bits, &mask);
572 0 : btrfs_debug_check_extent_io_range(tree, start, end);
573 0 : trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits);
574 :
575 0 : if (delete)
576 0 : bits |= ~EXTENT_CTLBITS;
577 :
578 0 : if (bits & EXTENT_DELALLOC)
579 0 : bits |= EXTENT_NORESERVE;
580 :
581 0 : wake = (bits & EXTENT_LOCKED) ? 1 : 0;
582 0 : if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY))
583 0 : clear = 1;
584 0 : again:
585 0 : if (!prealloc) {
586 : /*
587 : * Don't care for allocation failure here because we might end
588 : * up not needing the pre-allocated extent state at all, which
589 : * is the case if we only have in the tree extent states that
590 : * cover our input range and don't cover too any other range.
591 : * If we end up needing a new extent state we allocate it later.
592 : */
593 0 : prealloc = alloc_extent_state(mask);
594 : }
595 :
596 0 : spin_lock(&tree->lock);
597 0 : if (cached_state) {
598 0 : cached = *cached_state;
599 :
600 0 : if (clear) {
601 0 : *cached_state = NULL;
602 0 : cached_state = NULL;
603 : }
604 :
605 0 : if (cached && extent_state_in_tree(cached) &&
606 0 : cached->start <= start && cached->end > start) {
607 0 : if (clear)
608 0 : refcount_dec(&cached->refs);
609 0 : state = cached;
610 0 : goto hit_next;
611 : }
612 0 : if (clear)
613 0 : free_extent_state(cached);
614 : }
615 :
616 : /* This search will find the extents that end after our range starts. */
617 0 : state = tree_search(tree, start);
618 0 : if (!state)
619 0 : goto out;
620 0 : hit_next:
621 0 : if (state->start > end)
622 0 : goto out;
623 0 : WARN_ON(state->end < start);
624 0 : last_end = state->end;
625 :
626 : /* The state doesn't have the wanted bits, go ahead. */
627 0 : if (!(state->state & bits)) {
628 0 : state = next_state(state);
629 0 : goto next;
630 : }
631 :
632 : /*
633 : * | ---- desired range ---- |
634 : * | state | or
635 : * | ------------- state -------------- |
636 : *
637 : * We need to split the extent we found, and may flip bits on second
638 : * half.
639 : *
640 : * If the extent we found extends past our range, we just split and
641 : * search again. It'll get split again the next time though.
642 : *
643 : * If the extent we found is inside our range, we clear the desired bit
644 : * on it.
645 : */
646 :
647 0 : if (state->start < start) {
648 0 : prealloc = alloc_extent_state_atomic(prealloc);
649 0 : if (!prealloc)
650 0 : goto search_again;
651 0 : err = split_state(tree, state, prealloc, start);
652 0 : if (err)
653 0 : extent_io_tree_panic(tree, err);
654 :
655 0 : prealloc = NULL;
656 0 : if (err)
657 : goto out;
658 0 : if (state->end <= end) {
659 0 : state = clear_state_bit(tree, state, bits, wake, changeset);
660 0 : goto next;
661 : }
662 0 : goto search_again;
663 : }
664 : /*
665 : * | ---- desired range ---- |
666 : * | state |
667 : * We need to split the extent, and clear the bit on the first half.
668 : */
669 0 : if (state->start <= end && state->end > end) {
670 0 : prealloc = alloc_extent_state_atomic(prealloc);
671 0 : if (!prealloc)
672 0 : goto search_again;
673 0 : err = split_state(tree, state, prealloc, end + 1);
674 0 : if (err)
675 0 : extent_io_tree_panic(tree, err);
676 :
677 0 : if (wake)
678 0 : wake_up(&state->wq);
679 :
680 0 : clear_state_bit(tree, prealloc, bits, wake, changeset);
681 :
682 0 : prealloc = NULL;
683 0 : goto out;
684 : }
685 :
686 0 : state = clear_state_bit(tree, state, bits, wake, changeset);
687 0 : next:
688 0 : if (last_end == (u64)-1)
689 0 : goto out;
690 0 : start = last_end + 1;
691 0 : if (start <= end && state && !need_resched())
692 0 : goto hit_next;
693 :
694 0 : search_again:
695 0 : if (start > end)
696 0 : goto out;
697 0 : spin_unlock(&tree->lock);
698 0 : if (gfpflags_allow_blocking(mask))
699 0 : cond_resched();
700 0 : goto again;
701 :
702 0 : out:
703 0 : spin_unlock(&tree->lock);
704 0 : if (prealloc)
705 0 : free_extent_state(prealloc);
706 :
707 0 : return 0;
708 :
709 : }
710 :
711 0 : static void wait_on_state(struct extent_io_tree *tree,
712 : struct extent_state *state)
713 : __releases(tree->lock)
714 : __acquires(tree->lock)
715 : {
716 0 : DEFINE_WAIT(wait);
717 0 : prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
718 0 : spin_unlock(&tree->lock);
719 0 : schedule();
720 0 : spin_lock(&tree->lock);
721 0 : finish_wait(&state->wq, &wait);
722 0 : }
723 :
724 : /*
725 : * Wait for one or more bits to clear on a range in the state tree.
726 : * The range [start, end] is inclusive.
727 : * The tree lock is taken by this function
728 : */
729 0 : void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
730 : struct extent_state **cached_state)
731 : {
732 0 : struct extent_state *state;
733 :
734 0 : btrfs_debug_check_extent_io_range(tree, start, end);
735 :
736 0 : spin_lock(&tree->lock);
737 0 : again:
738 : /*
739 : * Maintain cached_state, as we may not remove it from the tree if there
740 : * are more bits than the bits we're waiting on set on this state.
741 : */
742 0 : if (cached_state && *cached_state) {
743 0 : state = *cached_state;
744 0 : if (extent_state_in_tree(state) &&
745 0 : state->start <= start && start < state->end)
746 0 : goto process_node;
747 : }
748 0 : while (1) {
749 : /*
750 : * This search will find all the extents that end after our
751 : * range starts.
752 : */
753 0 : state = tree_search(tree, start);
754 : process_node:
755 0 : if (!state)
756 : break;
757 0 : if (state->start > end)
758 0 : goto out;
759 :
760 0 : if (state->state & bits) {
761 0 : start = state->start;
762 0 : refcount_inc(&state->refs);
763 0 : wait_on_state(tree, state);
764 0 : free_extent_state(state);
765 0 : goto again;
766 : }
767 0 : start = state->end + 1;
768 :
769 0 : if (start > end)
770 : break;
771 :
772 0 : if (!cond_resched_lock(&tree->lock)) {
773 0 : state = next_state(state);
774 0 : goto process_node;
775 : }
776 : }
777 0 : out:
778 : /* This state is no longer useful, clear it and free it up. */
779 0 : if (cached_state && *cached_state) {
780 0 : state = *cached_state;
781 0 : *cached_state = NULL;
782 0 : free_extent_state(state);
783 : }
784 0 : spin_unlock(&tree->lock);
785 0 : }
786 :
787 0 : static void cache_state_if_flags(struct extent_state *state,
788 : struct extent_state **cached_ptr,
789 : unsigned flags)
790 : {
791 0 : if (cached_ptr && !(*cached_ptr)) {
792 0 : if (!flags || (state->state & flags)) {
793 0 : *cached_ptr = state;
794 0 : refcount_inc(&state->refs);
795 : }
796 : }
797 0 : }
798 :
799 : static void cache_state(struct extent_state *state,
800 : struct extent_state **cached_ptr)
801 : {
802 0 : return cache_state_if_flags(state, cached_ptr,
803 : EXTENT_LOCKED | EXTENT_BOUNDARY);
804 : }
805 :
806 : /*
807 : * Find the first state struct with 'bits' set after 'start', and return it.
808 : * tree->lock must be held. NULL will returned if nothing was found after
809 : * 'start'.
810 : */
811 0 : static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
812 : u64 start, u32 bits)
813 : {
814 0 : struct extent_state *state;
815 :
816 : /*
817 : * This search will find all the extents that end after our range
818 : * starts.
819 : */
820 0 : state = tree_search(tree, start);
821 0 : while (state) {
822 0 : if (state->end >= start && (state->state & bits))
823 0 : return state;
824 0 : state = next_state(state);
825 : }
826 : return NULL;
827 : }
828 :
829 : /*
830 : * Find the first offset in the io tree with one or more @bits set.
831 : *
832 : * Note: If there are multiple bits set in @bits, any of them will match.
833 : *
834 : * Return 0 if we find something, and update @start_ret and @end_ret.
835 : * Return 1 if we found nothing.
836 : */
837 0 : int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
838 : u64 *start_ret, u64 *end_ret, u32 bits,
839 : struct extent_state **cached_state)
840 : {
841 0 : struct extent_state *state;
842 0 : int ret = 1;
843 :
844 0 : spin_lock(&tree->lock);
845 0 : if (cached_state && *cached_state) {
846 0 : state = *cached_state;
847 0 : if (state->end == start - 1 && extent_state_in_tree(state)) {
848 0 : while ((state = next_state(state)) != NULL) {
849 0 : if (state->state & bits)
850 0 : goto got_it;
851 : }
852 0 : free_extent_state(*cached_state);
853 0 : *cached_state = NULL;
854 0 : goto out;
855 : }
856 0 : free_extent_state(*cached_state);
857 0 : *cached_state = NULL;
858 : }
859 :
860 0 : state = find_first_extent_bit_state(tree, start, bits);
861 0 : got_it:
862 0 : if (state) {
863 0 : cache_state_if_flags(state, cached_state, 0);
864 0 : *start_ret = state->start;
865 0 : *end_ret = state->end;
866 0 : ret = 0;
867 : }
868 0 : out:
869 0 : spin_unlock(&tree->lock);
870 0 : return ret;
871 : }
872 :
873 : /*
874 : * Find a contiguous area of bits
875 : *
876 : * @tree: io tree to check
877 : * @start: offset to start the search from
878 : * @start_ret: the first offset we found with the bits set
879 : * @end_ret: the final contiguous range of the bits that were set
880 : * @bits: bits to look for
881 : *
882 : * set_extent_bit and clear_extent_bit can temporarily split contiguous ranges
883 : * to set bits appropriately, and then merge them again. During this time it
884 : * will drop the tree->lock, so use this helper if you want to find the actual
885 : * contiguous area for given bits. We will search to the first bit we find, and
886 : * then walk down the tree until we find a non-contiguous area. The area
887 : * returned will be the full contiguous area with the bits set.
888 : */
889 0 : int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
890 : u64 *start_ret, u64 *end_ret, u32 bits)
891 : {
892 0 : struct extent_state *state;
893 0 : int ret = 1;
894 :
895 0 : spin_lock(&tree->lock);
896 0 : state = find_first_extent_bit_state(tree, start, bits);
897 0 : if (state) {
898 0 : *start_ret = state->start;
899 0 : *end_ret = state->end;
900 0 : while ((state = next_state(state)) != NULL) {
901 0 : if (state->start > (*end_ret + 1))
902 : break;
903 0 : *end_ret = state->end;
904 : }
905 : ret = 0;
906 : }
907 0 : spin_unlock(&tree->lock);
908 0 : return ret;
909 : }
910 :
911 : /*
912 : * Find a contiguous range of bytes in the file marked as delalloc, not more
913 : * than 'max_bytes'. start and end are used to return the range,
914 : *
915 : * True is returned if we find something, false if nothing was in the tree.
916 : */
917 0 : bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
918 : u64 *end, u64 max_bytes,
919 : struct extent_state **cached_state)
920 : {
921 0 : struct extent_state *state;
922 0 : u64 cur_start = *start;
923 0 : bool found = false;
924 0 : u64 total_bytes = 0;
925 :
926 0 : spin_lock(&tree->lock);
927 :
928 : /*
929 : * This search will find all the extents that end after our range
930 : * starts.
931 : */
932 0 : state = tree_search(tree, cur_start);
933 0 : if (!state) {
934 0 : *end = (u64)-1;
935 0 : goto out;
936 : }
937 :
938 0 : while (state) {
939 0 : if (found && (state->start != cur_start ||
940 0 : (state->state & EXTENT_BOUNDARY))) {
941 0 : goto out;
942 : }
943 0 : if (!(state->state & EXTENT_DELALLOC)) {
944 0 : if (!found)
945 0 : *end = state->end;
946 0 : goto out;
947 : }
948 0 : if (!found) {
949 0 : *start = state->start;
950 0 : *cached_state = state;
951 0 : refcount_inc(&state->refs);
952 : }
953 0 : found = true;
954 0 : *end = state->end;
955 0 : cur_start = state->end + 1;
956 0 : total_bytes += state->end - state->start + 1;
957 0 : if (total_bytes >= max_bytes)
958 : break;
959 0 : state = next_state(state);
960 : }
961 0 : out:
962 0 : spin_unlock(&tree->lock);
963 0 : return found;
964 : }
965 :
966 : /*
967 : * Set some bits on a range in the tree. This may require allocations or
968 : * sleeping. By default all allocations use GFP_NOFS, use EXTENT_NOWAIT for
969 : * GFP_NOWAIT.
970 : *
971 : * If any of the exclusive bits are set, this will fail with -EEXIST if some
972 : * part of the range already has the desired bits set. The extent_state of the
973 : * existing range is returned in failed_state in this case, and the start of the
974 : * existing range is returned in failed_start. failed_state is used as an
975 : * optimization for wait_extent_bit, failed_start must be used as the source of
976 : * truth as failed_state may have changed since we returned.
977 : *
978 : * [start, end] is inclusive This takes the tree lock.
979 : */
980 0 : static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
981 : u32 bits, u64 *failed_start,
982 : struct extent_state **failed_state,
983 : struct extent_state **cached_state,
984 : struct extent_changeset *changeset)
985 : {
986 0 : struct extent_state *state;
987 0 : struct extent_state *prealloc = NULL;
988 0 : struct rb_node **p = NULL;
989 0 : struct rb_node *parent = NULL;
990 0 : int err = 0;
991 0 : u64 last_start;
992 0 : u64 last_end;
993 0 : u32 exclusive_bits = (bits & EXTENT_LOCKED);
994 0 : gfp_t mask;
995 :
996 0 : set_gfp_mask_from_bits(&bits, &mask);
997 0 : btrfs_debug_check_extent_io_range(tree, start, end);
998 0 : trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits);
999 :
1000 0 : if (exclusive_bits)
1001 : ASSERT(failed_start);
1002 : else
1003 : ASSERT(failed_start == NULL && failed_state == NULL);
1004 : again:
1005 0 : if (!prealloc) {
1006 : /*
1007 : * Don't care for allocation failure here because we might end
1008 : * up not needing the pre-allocated extent state at all, which
1009 : * is the case if we only have in the tree extent states that
1010 : * cover our input range and don't cover too any other range.
1011 : * If we end up needing a new extent state we allocate it later.
1012 : */
1013 0 : prealloc = alloc_extent_state(mask);
1014 : }
1015 :
1016 0 : spin_lock(&tree->lock);
1017 0 : if (cached_state && *cached_state) {
1018 0 : state = *cached_state;
1019 0 : if (state->start <= start && state->end > start &&
1020 : extent_state_in_tree(state))
1021 0 : goto hit_next;
1022 : }
1023 : /*
1024 : * This search will find all the extents that end after our range
1025 : * starts.
1026 : */
1027 0 : state = tree_search_for_insert(tree, start, &p, &parent);
1028 0 : if (!state) {
1029 0 : prealloc = alloc_extent_state_atomic(prealloc);
1030 0 : if (!prealloc)
1031 0 : goto search_again;
1032 0 : prealloc->start = start;
1033 0 : prealloc->end = end;
1034 0 : insert_state_fast(tree, prealloc, p, parent, bits, changeset);
1035 0 : cache_state(prealloc, cached_state);
1036 0 : prealloc = NULL;
1037 0 : goto out;
1038 : }
1039 0 : hit_next:
1040 0 : last_start = state->start;
1041 0 : last_end = state->end;
1042 :
1043 : /*
1044 : * | ---- desired range ---- |
1045 : * | state |
1046 : *
1047 : * Just lock what we found and keep going
1048 : */
1049 0 : if (state->start == start && state->end <= end) {
1050 0 : if (state->state & exclusive_bits) {
1051 0 : *failed_start = state->start;
1052 0 : cache_state(state, failed_state);
1053 0 : err = -EEXIST;
1054 0 : goto out;
1055 : }
1056 :
1057 0 : set_state_bits(tree, state, bits, changeset);
1058 0 : cache_state(state, cached_state);
1059 0 : merge_state(tree, state);
1060 0 : if (last_end == (u64)-1)
1061 0 : goto out;
1062 0 : start = last_end + 1;
1063 0 : state = next_state(state);
1064 0 : if (start < end && state && state->start == start &&
1065 : !need_resched())
1066 0 : goto hit_next;
1067 0 : goto search_again;
1068 : }
1069 :
1070 : /*
1071 : * | ---- desired range ---- |
1072 : * | state |
1073 : * or
1074 : * | ------------- state -------------- |
1075 : *
1076 : * We need to split the extent we found, and may flip bits on second
1077 : * half.
1078 : *
1079 : * If the extent we found extends past our range, we just split and
1080 : * search again. It'll get split again the next time though.
1081 : *
1082 : * If the extent we found is inside our range, we set the desired bit
1083 : * on it.
1084 : */
1085 0 : if (state->start < start) {
1086 0 : if (state->state & exclusive_bits) {
1087 0 : *failed_start = start;
1088 0 : cache_state(state, failed_state);
1089 0 : err = -EEXIST;
1090 0 : goto out;
1091 : }
1092 :
1093 : /*
1094 : * If this extent already has all the bits we want set, then
1095 : * skip it, not necessary to split it or do anything with it.
1096 : */
1097 0 : if ((state->state & bits) == bits) {
1098 0 : start = state->end + 1;
1099 0 : cache_state(state, cached_state);
1100 0 : goto search_again;
1101 : }
1102 :
1103 0 : prealloc = alloc_extent_state_atomic(prealloc);
1104 0 : if (!prealloc)
1105 0 : goto search_again;
1106 0 : err = split_state(tree, state, prealloc, start);
1107 0 : if (err)
1108 0 : extent_io_tree_panic(tree, err);
1109 :
1110 0 : prealloc = NULL;
1111 0 : if (err)
1112 : goto out;
1113 0 : if (state->end <= end) {
1114 0 : set_state_bits(tree, state, bits, changeset);
1115 0 : cache_state(state, cached_state);
1116 0 : merge_state(tree, state);
1117 0 : if (last_end == (u64)-1)
1118 0 : goto out;
1119 0 : start = last_end + 1;
1120 0 : state = next_state(state);
1121 0 : if (start < end && state && state->start == start &&
1122 : !need_resched())
1123 0 : goto hit_next;
1124 : }
1125 0 : goto search_again;
1126 : }
1127 : /*
1128 : * | ---- desired range ---- |
1129 : * | state | or | state |
1130 : *
1131 : * There's a hole, we need to insert something in it and ignore the
1132 : * extent we found.
1133 : */
1134 0 : if (state->start > start) {
1135 0 : u64 this_end;
1136 0 : if (end < last_start)
1137 : this_end = end;
1138 : else
1139 0 : this_end = last_start - 1;
1140 :
1141 0 : prealloc = alloc_extent_state_atomic(prealloc);
1142 0 : if (!prealloc)
1143 0 : goto search_again;
1144 :
1145 : /*
1146 : * Avoid to free 'prealloc' if it can be merged with the later
1147 : * extent.
1148 : */
1149 0 : prealloc->start = start;
1150 0 : prealloc->end = this_end;
1151 0 : err = insert_state(tree, prealloc, bits, changeset);
1152 0 : if (err)
1153 0 : extent_io_tree_panic(tree, err);
1154 :
1155 0 : cache_state(prealloc, cached_state);
1156 0 : prealloc = NULL;
1157 0 : start = this_end + 1;
1158 0 : goto search_again;
1159 : }
1160 : /*
1161 : * | ---- desired range ---- |
1162 : * | state |
1163 : *
1164 : * We need to split the extent, and set the bit on the first half
1165 : */
1166 0 : if (state->start <= end && state->end > end) {
1167 0 : if (state->state & exclusive_bits) {
1168 0 : *failed_start = start;
1169 0 : cache_state(state, failed_state);
1170 0 : err = -EEXIST;
1171 0 : goto out;
1172 : }
1173 :
1174 0 : prealloc = alloc_extent_state_atomic(prealloc);
1175 0 : if (!prealloc)
1176 0 : goto search_again;
1177 0 : err = split_state(tree, state, prealloc, end + 1);
1178 0 : if (err)
1179 0 : extent_io_tree_panic(tree, err);
1180 :
1181 0 : set_state_bits(tree, prealloc, bits, changeset);
1182 0 : cache_state(prealloc, cached_state);
1183 0 : merge_state(tree, prealloc);
1184 0 : prealloc = NULL;
1185 0 : goto out;
1186 : }
1187 :
1188 0 : search_again:
1189 0 : if (start > end)
1190 0 : goto out;
1191 0 : spin_unlock(&tree->lock);
1192 0 : if (gfpflags_allow_blocking(mask))
1193 0 : cond_resched();
1194 0 : goto again;
1195 :
1196 0 : out:
1197 0 : spin_unlock(&tree->lock);
1198 0 : if (prealloc)
1199 0 : free_extent_state(prealloc);
1200 :
1201 0 : return err;
1202 :
1203 : }
1204 :
1205 0 : int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1206 : u32 bits, struct extent_state **cached_state)
1207 : {
1208 0 : return __set_extent_bit(tree, start, end, bits, NULL, NULL,
1209 : cached_state, NULL);
1210 : }
1211 :
1212 : /*
1213 : * Convert all bits in a given range from one bit to another
1214 : *
1215 : * @tree: the io tree to search
1216 : * @start: the start offset in bytes
1217 : * @end: the end offset in bytes (inclusive)
1218 : * @bits: the bits to set in this range
1219 : * @clear_bits: the bits to clear in this range
1220 : * @cached_state: state that we're going to cache
1221 : *
1222 : * This will go through and set bits for the given range. If any states exist
1223 : * already in this range they are set with the given bit and cleared of the
1224 : * clear_bits. This is only meant to be used by things that are mergeable, ie.
1225 : * converting from say DELALLOC to DIRTY. This is not meant to be used with
1226 : * boundary bits like LOCK.
1227 : *
1228 : * All allocations are done with GFP_NOFS.
1229 : */
1230 0 : int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1231 : u32 bits, u32 clear_bits,
1232 : struct extent_state **cached_state)
1233 : {
1234 0 : struct extent_state *state;
1235 0 : struct extent_state *prealloc = NULL;
1236 0 : struct rb_node **p = NULL;
1237 0 : struct rb_node *parent = NULL;
1238 0 : int err = 0;
1239 0 : u64 last_start;
1240 0 : u64 last_end;
1241 0 : bool first_iteration = true;
1242 :
1243 0 : btrfs_debug_check_extent_io_range(tree, start, end);
1244 0 : trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits,
1245 : clear_bits);
1246 :
1247 0 : again:
1248 0 : if (!prealloc) {
1249 : /*
1250 : * Best effort, don't worry if extent state allocation fails
1251 : * here for the first iteration. We might have a cached state
1252 : * that matches exactly the target range, in which case no
1253 : * extent state allocations are needed. We'll only know this
1254 : * after locking the tree.
1255 : */
1256 0 : prealloc = alloc_extent_state(GFP_NOFS);
1257 0 : if (!prealloc && !first_iteration)
1258 : return -ENOMEM;
1259 : }
1260 :
1261 0 : spin_lock(&tree->lock);
1262 0 : if (cached_state && *cached_state) {
1263 0 : state = *cached_state;
1264 0 : if (state->start <= start && state->end > start &&
1265 : extent_state_in_tree(state))
1266 0 : goto hit_next;
1267 : }
1268 :
1269 : /*
1270 : * This search will find all the extents that end after our range
1271 : * starts.
1272 : */
1273 0 : state = tree_search_for_insert(tree, start, &p, &parent);
1274 0 : if (!state) {
1275 0 : prealloc = alloc_extent_state_atomic(prealloc);
1276 0 : if (!prealloc) {
1277 0 : err = -ENOMEM;
1278 0 : goto out;
1279 : }
1280 0 : prealloc->start = start;
1281 0 : prealloc->end = end;
1282 0 : insert_state_fast(tree, prealloc, p, parent, bits, NULL);
1283 0 : cache_state(prealloc, cached_state);
1284 0 : prealloc = NULL;
1285 0 : goto out;
1286 : }
1287 0 : hit_next:
1288 0 : last_start = state->start;
1289 0 : last_end = state->end;
1290 :
1291 : /*
1292 : * | ---- desired range ---- |
1293 : * | state |
1294 : *
1295 : * Just lock what we found and keep going.
1296 : */
1297 0 : if (state->start == start && state->end <= end) {
1298 0 : set_state_bits(tree, state, bits, NULL);
1299 0 : cache_state(state, cached_state);
1300 0 : state = clear_state_bit(tree, state, clear_bits, 0, NULL);
1301 0 : if (last_end == (u64)-1)
1302 0 : goto out;
1303 0 : start = last_end + 1;
1304 0 : if (start < end && state && state->start == start &&
1305 : !need_resched())
1306 0 : goto hit_next;
1307 0 : goto search_again;
1308 : }
1309 :
1310 : /*
1311 : * | ---- desired range ---- |
1312 : * | state |
1313 : * or
1314 : * | ------------- state -------------- |
1315 : *
1316 : * We need to split the extent we found, and may flip bits on second
1317 : * half.
1318 : *
1319 : * If the extent we found extends past our range, we just split and
1320 : * search again. It'll get split again the next time though.
1321 : *
1322 : * If the extent we found is inside our range, we set the desired bit
1323 : * on it.
1324 : */
1325 0 : if (state->start < start) {
1326 0 : prealloc = alloc_extent_state_atomic(prealloc);
1327 0 : if (!prealloc) {
1328 0 : err = -ENOMEM;
1329 0 : goto out;
1330 : }
1331 0 : err = split_state(tree, state, prealloc, start);
1332 0 : if (err)
1333 0 : extent_io_tree_panic(tree, err);
1334 0 : prealloc = NULL;
1335 0 : if (err)
1336 : goto out;
1337 0 : if (state->end <= end) {
1338 0 : set_state_bits(tree, state, bits, NULL);
1339 0 : cache_state(state, cached_state);
1340 0 : state = clear_state_bit(tree, state, clear_bits, 0, NULL);
1341 0 : if (last_end == (u64)-1)
1342 0 : goto out;
1343 0 : start = last_end + 1;
1344 0 : if (start < end && state && state->start == start &&
1345 : !need_resched())
1346 0 : goto hit_next;
1347 : }
1348 0 : goto search_again;
1349 : }
1350 : /*
1351 : * | ---- desired range ---- |
1352 : * | state | or | state |
1353 : *
1354 : * There's a hole, we need to insert something in it and ignore the
1355 : * extent we found.
1356 : */
1357 0 : if (state->start > start) {
1358 0 : u64 this_end;
1359 0 : if (end < last_start)
1360 : this_end = end;
1361 : else
1362 0 : this_end = last_start - 1;
1363 :
1364 0 : prealloc = alloc_extent_state_atomic(prealloc);
1365 0 : if (!prealloc) {
1366 0 : err = -ENOMEM;
1367 0 : goto out;
1368 : }
1369 :
1370 : /*
1371 : * Avoid to free 'prealloc' if it can be merged with the later
1372 : * extent.
1373 : */
1374 0 : prealloc->start = start;
1375 0 : prealloc->end = this_end;
1376 0 : err = insert_state(tree, prealloc, bits, NULL);
1377 0 : if (err)
1378 0 : extent_io_tree_panic(tree, err);
1379 0 : cache_state(prealloc, cached_state);
1380 0 : prealloc = NULL;
1381 0 : start = this_end + 1;
1382 0 : goto search_again;
1383 : }
1384 : /*
1385 : * | ---- desired range ---- |
1386 : * | state |
1387 : *
1388 : * We need to split the extent, and set the bit on the first half.
1389 : */
1390 0 : if (state->start <= end && state->end > end) {
1391 0 : prealloc = alloc_extent_state_atomic(prealloc);
1392 0 : if (!prealloc) {
1393 0 : err = -ENOMEM;
1394 0 : goto out;
1395 : }
1396 :
1397 0 : err = split_state(tree, state, prealloc, end + 1);
1398 0 : if (err)
1399 0 : extent_io_tree_panic(tree, err);
1400 :
1401 0 : set_state_bits(tree, prealloc, bits, NULL);
1402 0 : cache_state(prealloc, cached_state);
1403 0 : clear_state_bit(tree, prealloc, clear_bits, 0, NULL);
1404 0 : prealloc = NULL;
1405 0 : goto out;
1406 : }
1407 :
1408 0 : search_again:
1409 0 : if (start > end)
1410 0 : goto out;
1411 0 : spin_unlock(&tree->lock);
1412 0 : cond_resched();
1413 0 : first_iteration = false;
1414 0 : goto again;
1415 :
1416 0 : out:
1417 0 : spin_unlock(&tree->lock);
1418 0 : if (prealloc)
1419 0 : free_extent_state(prealloc);
1420 :
1421 : return err;
1422 : }
1423 :
1424 : /*
1425 : * Find the first range that has @bits not set. This range could start before
1426 : * @start.
1427 : *
1428 : * @tree: the tree to search
1429 : * @start: offset at/after which the found extent should start
1430 : * @start_ret: records the beginning of the range
1431 : * @end_ret: records the end of the range (inclusive)
1432 : * @bits: the set of bits which must be unset
1433 : *
1434 : * Since unallocated range is also considered one which doesn't have the bits
1435 : * set it's possible that @end_ret contains -1, this happens in case the range
1436 : * spans (last_range_end, end of device]. In this case it's up to the caller to
1437 : * trim @end_ret to the appropriate size.
1438 : */
1439 0 : void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
1440 : u64 *start_ret, u64 *end_ret, u32 bits)
1441 : {
1442 0 : struct extent_state *state;
1443 0 : struct extent_state *prev = NULL, *next = NULL;
1444 :
1445 0 : spin_lock(&tree->lock);
1446 :
1447 : /* Find first extent with bits cleared */
1448 0 : while (1) {
1449 0 : state = tree_search_prev_next(tree, start, &prev, &next);
1450 0 : if (!state && !next && !prev) {
1451 : /*
1452 : * Tree is completely empty, send full range and let
1453 : * caller deal with it
1454 : */
1455 0 : *start_ret = 0;
1456 0 : *end_ret = -1;
1457 0 : goto out;
1458 0 : } else if (!state && !next) {
1459 : /*
1460 : * We are past the last allocated chunk, set start at
1461 : * the end of the last extent.
1462 : */
1463 0 : *start_ret = prev->end + 1;
1464 0 : *end_ret = -1;
1465 0 : goto out;
1466 0 : } else if (!state) {
1467 0 : state = next;
1468 : }
1469 :
1470 : /*
1471 : * At this point 'state' either contains 'start' or start is
1472 : * before 'state'
1473 : */
1474 0 : if (in_range(start, state->start, state->end - state->start + 1)) {
1475 0 : if (state->state & bits) {
1476 : /*
1477 : * |--range with bits sets--|
1478 : * |
1479 : * start
1480 : */
1481 : start = state->end + 1;
1482 : } else {
1483 : /*
1484 : * 'start' falls within a range that doesn't
1485 : * have the bits set, so take its start as the
1486 : * beginning of the desired range
1487 : *
1488 : * |--range with bits cleared----|
1489 : * |
1490 : * start
1491 : */
1492 0 : *start_ret = state->start;
1493 0 : break;
1494 : }
1495 : } else {
1496 : /*
1497 : * |---prev range---|---hole/unset---|---node range---|
1498 : * |
1499 : * start
1500 : *
1501 : * or
1502 : *
1503 : * |---hole/unset--||--first node--|
1504 : * 0 |
1505 : * start
1506 : */
1507 0 : if (prev)
1508 0 : *start_ret = prev->end + 1;
1509 : else
1510 0 : *start_ret = 0;
1511 : break;
1512 : }
1513 : }
1514 :
1515 : /*
1516 : * Find the longest stretch from start until an entry which has the
1517 : * bits set
1518 : */
1519 0 : while (state) {
1520 0 : if (state->end >= start && !(state->state & bits)) {
1521 0 : *end_ret = state->end;
1522 : } else {
1523 0 : *end_ret = state->start - 1;
1524 0 : break;
1525 : }
1526 0 : state = next_state(state);
1527 : }
1528 0 : out:
1529 0 : spin_unlock(&tree->lock);
1530 0 : }
1531 :
1532 : /*
1533 : * Count the number of bytes in the tree that have a given bit(s) set for a
1534 : * given range.
1535 : *
1536 : * @tree: The io tree to search.
1537 : * @start: The start offset of the range. This value is updated to the
1538 : * offset of the first byte found with the given bit(s), so it
1539 : * can end up being bigger than the initial value.
1540 : * @search_end: The end offset (inclusive value) of the search range.
1541 : * @max_bytes: The maximum byte count we are interested. The search stops
1542 : * once it reaches this count.
1543 : * @bits: The bits the range must have in order to be accounted for.
1544 : * If multiple bits are set, then only subranges that have all
1545 : * the bits set are accounted for.
1546 : * @contig: Indicate if we should ignore holes in the range or not. If
1547 : * this is true, then stop once we find a hole.
1548 : * @cached_state: A cached state to be used across multiple calls to this
1549 : * function in order to speedup searches. Use NULL if this is
1550 : * called only once or if each call does not start where the
1551 : * previous one ended.
1552 : *
1553 : * Returns the total number of bytes found within the given range that have
1554 : * all given bits set. If the returned number of bytes is greater than zero
1555 : * then @start is updated with the offset of the first byte with the bits set.
1556 : */
1557 0 : u64 count_range_bits(struct extent_io_tree *tree,
1558 : u64 *start, u64 search_end, u64 max_bytes,
1559 : u32 bits, int contig,
1560 : struct extent_state **cached_state)
1561 : {
1562 0 : struct extent_state *state = NULL;
1563 0 : struct extent_state *cached;
1564 0 : u64 cur_start = *start;
1565 0 : u64 total_bytes = 0;
1566 0 : u64 last = 0;
1567 0 : int found = 0;
1568 :
1569 0 : if (WARN_ON(search_end < cur_start))
1570 : return 0;
1571 :
1572 0 : spin_lock(&tree->lock);
1573 :
1574 0 : if (!cached_state || !*cached_state)
1575 0 : goto search;
1576 :
1577 0 : cached = *cached_state;
1578 :
1579 0 : if (!extent_state_in_tree(cached))
1580 0 : goto search;
1581 :
1582 0 : if (cached->start <= cur_start && cur_start <= cached->end) {
1583 : state = cached;
1584 0 : } else if (cached->start > cur_start) {
1585 0 : struct extent_state *prev;
1586 :
1587 : /*
1588 : * The cached state starts after our search range's start. Check
1589 : * if the previous state record starts at or before the range we
1590 : * are looking for, and if so, use it - this is a common case
1591 : * when there are holes between records in the tree. If there is
1592 : * no previous state record, we can start from our cached state.
1593 : */
1594 0 : prev = prev_state(cached);
1595 0 : if (!prev)
1596 : state = cached;
1597 0 : else if (prev->start <= cur_start && cur_start <= prev->end)
1598 : state = prev;
1599 : }
1600 :
1601 : /*
1602 : * This search will find all the extents that end after our range
1603 : * starts.
1604 : */
1605 0 : search:
1606 0 : if (!state)
1607 0 : state = tree_search(tree, cur_start);
1608 :
1609 0 : while (state) {
1610 0 : if (state->start > search_end)
1611 : break;
1612 0 : if (contig && found && state->start > last + 1)
1613 : break;
1614 0 : if (state->end >= cur_start && (state->state & bits) == bits) {
1615 0 : total_bytes += min(search_end, state->end) + 1 -
1616 0 : max(cur_start, state->start);
1617 0 : if (total_bytes >= max_bytes)
1618 : break;
1619 0 : if (!found) {
1620 0 : *start = max(cur_start, state->start);
1621 0 : found = 1;
1622 : }
1623 0 : last = state->end;
1624 0 : } else if (contig && found) {
1625 : break;
1626 : }
1627 0 : state = next_state(state);
1628 : }
1629 :
1630 0 : if (cached_state) {
1631 0 : free_extent_state(*cached_state);
1632 0 : *cached_state = state;
1633 0 : if (state)
1634 0 : refcount_inc(&state->refs);
1635 : }
1636 :
1637 0 : spin_unlock(&tree->lock);
1638 :
1639 0 : return total_bytes;
1640 : }
1641 :
1642 : /*
1643 : * Search a range in the state tree for a given mask. If 'filled' == 1, this
1644 : * returns 1 only if every extent in the tree has the bits set. Otherwise, 1
1645 : * is returned if any bit in the range is found set.
1646 : */
1647 0 : int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1648 : u32 bits, int filled, struct extent_state *cached)
1649 : {
1650 0 : struct extent_state *state = NULL;
1651 0 : int bitset = 0;
1652 :
1653 0 : spin_lock(&tree->lock);
1654 0 : if (cached && extent_state_in_tree(cached) && cached->start <= start &&
1655 0 : cached->end > start)
1656 : state = cached;
1657 : else
1658 0 : state = tree_search(tree, start);
1659 0 : while (state && start <= end) {
1660 0 : if (filled && state->start > start) {
1661 : bitset = 0;
1662 : break;
1663 : }
1664 :
1665 0 : if (state->start > end)
1666 : break;
1667 :
1668 0 : if (state->state & bits) {
1669 0 : bitset = 1;
1670 0 : if (!filled)
1671 : break;
1672 0 : } else if (filled) {
1673 : bitset = 0;
1674 : break;
1675 : }
1676 :
1677 0 : if (state->end == (u64)-1)
1678 : break;
1679 :
1680 0 : start = state->end + 1;
1681 0 : if (start > end)
1682 : break;
1683 0 : state = next_state(state);
1684 : }
1685 :
1686 : /* We ran out of states and were still inside of our range. */
1687 0 : if (filled && !state)
1688 0 : bitset = 0;
1689 0 : spin_unlock(&tree->lock);
1690 0 : return bitset;
1691 : }
1692 :
1693 : /* Wrappers around set/clear extent bit */
1694 0 : int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1695 : u32 bits, struct extent_changeset *changeset)
1696 : {
1697 : /*
1698 : * We don't support EXTENT_LOCKED yet, as current changeset will
1699 : * record any bits changed, so for EXTENT_LOCKED case, it will
1700 : * either fail with -EEXIST or changeset will record the whole
1701 : * range.
1702 : */
1703 0 : ASSERT(!(bits & EXTENT_LOCKED));
1704 :
1705 0 : return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset);
1706 : }
1707 :
1708 0 : int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1709 : u32 bits, struct extent_changeset *changeset)
1710 : {
1711 : /*
1712 : * Don't support EXTENT_LOCKED case, same reason as
1713 : * set_record_extent_bits().
1714 : */
1715 0 : ASSERT(!(bits & EXTENT_LOCKED));
1716 :
1717 0 : return __clear_extent_bit(tree, start, end, bits, NULL, changeset);
1718 : }
1719 :
1720 0 : int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
1721 : struct extent_state **cached)
1722 : {
1723 0 : int err;
1724 0 : u64 failed_start;
1725 :
1726 0 : err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
1727 : NULL, cached, NULL);
1728 0 : if (err == -EEXIST) {
1729 0 : if (failed_start > start)
1730 0 : clear_extent_bit(tree, start, failed_start - 1,
1731 : EXTENT_LOCKED, cached);
1732 0 : return 0;
1733 : }
1734 : return 1;
1735 : }
1736 :
1737 : /*
1738 : * Either insert or lock state struct between start and end use mask to tell
1739 : * us if waiting is desired.
1740 : */
1741 0 : int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
1742 : struct extent_state **cached_state)
1743 : {
1744 0 : struct extent_state *failed_state = NULL;
1745 0 : int err;
1746 0 : u64 failed_start;
1747 :
1748 0 : err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
1749 : &failed_state, cached_state, NULL);
1750 0 : while (err == -EEXIST) {
1751 0 : if (failed_start != start)
1752 0 : clear_extent_bit(tree, start, failed_start - 1,
1753 : EXTENT_LOCKED, cached_state);
1754 :
1755 0 : wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED,
1756 : &failed_state);
1757 0 : err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
1758 : &failed_start, &failed_state,
1759 : cached_state, NULL);
1760 : }
1761 0 : return err;
1762 : }
1763 :
1764 0 : void __cold extent_state_free_cachep(void)
1765 : {
1766 0 : btrfs_extent_state_leak_debug_check();
1767 0 : kmem_cache_destroy(extent_state_cache);
1768 0 : }
1769 :
1770 2 : int __init extent_state_init_cachep(void)
1771 : {
1772 2 : extent_state_cache = kmem_cache_create("btrfs_extent_state",
1773 : sizeof(struct extent_state), 0,
1774 : SLAB_MEM_SPREAD, NULL);
1775 2 : if (!extent_state_cache)
1776 0 : return -ENOMEM;
1777 :
1778 : return 0;
1779 : }
|