Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (C) 2007 Oracle. All rights reserved.
4 : */
5 :
6 : #include <linux/slab.h>
7 : #include <linux/blkdev.h>
8 : #include <linux/writeback.h>
9 : #include <linux/sched/mm.h>
10 : #include "messages.h"
11 : #include "misc.h"
12 : #include "ctree.h"
13 : #include "transaction.h"
14 : #include "btrfs_inode.h"
15 : #include "extent_io.h"
16 : #include "disk-io.h"
17 : #include "compression.h"
18 : #include "delalloc-space.h"
19 : #include "qgroup.h"
20 : #include "subpage.h"
21 : #include "file.h"
22 : #include "super.h"
23 :
24 : static struct kmem_cache *btrfs_ordered_extent_cache;
25 :
26 : static u64 entry_end(struct btrfs_ordered_extent *entry)
27 : {
28 6424966 : if (entry->file_offset + entry->num_bytes < entry->file_offset)
29 0 : return (u64)-1;
30 : return entry->file_offset + entry->num_bytes;
31 : }
32 :
33 : /* returns NULL if the insertion worked, or it returns the node it did find
34 : * in the tree
35 : */
36 3659746 : static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
37 : struct rb_node *node)
38 : {
39 3659746 : struct rb_node **p = &root->rb_node;
40 3659746 : struct rb_node *parent = NULL;
41 3659746 : struct btrfs_ordered_extent *entry;
42 :
43 10516988 : while (*p) {
44 6857242 : parent = *p;
45 6857242 : entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node);
46 :
47 6857242 : if (file_offset < entry->file_offset)
48 718769 : p = &(*p)->rb_left;
49 6138473 : else if (file_offset >= entry_end(entry))
50 6138473 : p = &(*p)->rb_right;
51 : else
52 0 : return parent;
53 : }
54 :
55 3659746 : rb_link_node(node, parent, p);
56 3659746 : rb_insert_color(node, root);
57 3659746 : return NULL;
58 : }
59 :
60 : /*
61 : * look for a given offset in the tree, and if it can't be found return the
62 : * first lesser offset
63 : */
64 104120149 : static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset,
65 : struct rb_node **prev_ret)
66 : {
67 104120149 : struct rb_node *n = root->rb_node;
68 104120149 : struct rb_node *prev = NULL;
69 104120149 : struct rb_node *test;
70 104120149 : struct btrfs_ordered_extent *entry;
71 104120149 : struct btrfs_ordered_extent *prev_entry = NULL;
72 :
73 113223225 : while (n) {
74 11672544 : entry = rb_entry(n, struct btrfs_ordered_extent, rb_node);
75 11672544 : prev = n;
76 11672544 : prev_entry = entry;
77 :
78 11672544 : if (file_offset < entry->file_offset)
79 2329510 : n = n->rb_left;
80 9343034 : else if (file_offset >= entry_end(entry))
81 6773566 : n = n->rb_right;
82 : else
83 2569468 : return n;
84 : }
85 101550681 : if (!prev_ret)
86 : return NULL;
87 :
88 104650044 : while (prev && file_offset >= entry_end(prev_entry)) {
89 1629931 : test = rb_next(prev);
90 1625380 : if (!test)
91 : break;
92 191064 : prev_entry = rb_entry(test, struct btrfs_ordered_extent,
93 : rb_node);
94 191064 : if (file_offset < entry_end(prev_entry))
95 : break;
96 :
97 : prev = test;
98 : }
99 101546130 : if (prev)
100 3086645 : prev_entry = rb_entry(prev, struct btrfs_ordered_extent,
101 : rb_node);
102 105114006 : while (prev && file_offset < entry_end(prev_entry)) {
103 1458312 : test = rb_prev(prev);
104 1469395 : if (!test)
105 : break;
106 242270 : prev_entry = rb_entry(test, struct btrfs_ordered_extent,
107 : rb_node);
108 242270 : prev = test;
109 : }
110 101557213 : *prev_ret = prev;
111 101557213 : return NULL;
112 : }
113 :
114 : static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset,
115 : u64 len)
116 : {
117 8946438 : if (file_offset + len <= entry->file_offset ||
118 3132327 : entry->file_offset + entry->num_bytes <= file_offset)
119 2209243 : return 0;
120 : return 1;
121 : }
122 :
123 : /*
124 : * look find the first ordered struct that has this offset, otherwise
125 : * the first one less than this offset
126 : */
127 105173084 : static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
128 : u64 file_offset)
129 : {
130 105173084 : struct rb_root *root = &tree->tree;
131 105173084 : struct rb_node *prev = NULL;
132 105173084 : struct rb_node *ret;
133 105173084 : struct btrfs_ordered_extent *entry;
134 :
135 105173084 : if (tree->last) {
136 4500911 : entry = rb_entry(tree->last, struct btrfs_ordered_extent,
137 : rb_node);
138 4500911 : if (in_range(file_offset, entry->file_offset, entry->num_bytes))
139 : return tree->last;
140 : }
141 104248716 : ret = __tree_search(root, file_offset, &prev);
142 104140595 : if (!ret)
143 101561032 : ret = prev;
144 104140595 : if (ret)
145 5663586 : tree->last = ret;
146 : return ret;
147 : }
148 :
149 3658608 : static struct btrfs_ordered_extent *alloc_ordered_extent(
150 : struct btrfs_inode *inode, u64 file_offset, u64 num_bytes,
151 : u64 ram_bytes, u64 disk_bytenr, u64 disk_num_bytes,
152 : u64 offset, unsigned long flags, int compress_type)
153 : {
154 3658608 : struct btrfs_ordered_extent *entry;
155 3658608 : int ret;
156 :
157 3658608 : if (flags &
158 : ((1 << BTRFS_ORDERED_NOCOW) | (1 << BTRFS_ORDERED_PREALLOC))) {
159 : /* For nocow write, we can release the qgroup rsv right now */
160 280859 : ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes);
161 280858 : if (ret < 0)
162 0 : return ERR_PTR(ret);
163 : } else {
164 : /*
165 : * The ordered extent has reserved qgroup space, release now
166 : * and pass the reserved number for qgroup_record to free.
167 : */
168 3377749 : ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes);
169 3377892 : if (ret < 0)
170 0 : return ERR_PTR(ret);
171 : }
172 3658750 : entry = kmem_cache_zalloc(btrfs_ordered_extent_cache, GFP_NOFS);
173 3659009 : if (!entry)
174 : return ERR_PTR(-ENOMEM);
175 :
176 3659009 : entry->file_offset = file_offset;
177 3659009 : entry->num_bytes = num_bytes;
178 3659009 : entry->ram_bytes = ram_bytes;
179 3659009 : entry->disk_bytenr = disk_bytenr;
180 3659009 : entry->disk_num_bytes = disk_num_bytes;
181 3659009 : entry->offset = offset;
182 3659009 : entry->bytes_left = num_bytes;
183 3659009 : entry->inode = igrab(&inode->vfs_inode);
184 3659086 : entry->compress_type = compress_type;
185 3659086 : entry->truncated_len = (u64)-1;
186 3659086 : entry->qgroup_rsv = ret;
187 3659086 : entry->flags = flags;
188 3659086 : refcount_set(&entry->refs, 1);
189 3659086 : init_waitqueue_head(&entry->wait);
190 3659003 : INIT_LIST_HEAD(&entry->list);
191 3659003 : INIT_LIST_HEAD(&entry->log_list);
192 3659003 : INIT_LIST_HEAD(&entry->root_extent_list);
193 3659003 : INIT_LIST_HEAD(&entry->work_list);
194 3659003 : init_completion(&entry->completion);
195 :
196 : /*
197 : * We don't need the count_max_extents here, we can assume that all of
198 : * that work has been done at higher layers, so this is truly the
199 : * smallest the extent is going to get.
200 : */
201 3658987 : spin_lock(&inode->lock);
202 3659021 : btrfs_mod_outstanding_extents(inode, 1);
203 3658576 : spin_unlock(&inode->lock);
204 :
205 3658576 : return entry;
206 : }
207 :
208 3658179 : static void insert_ordered_extent(struct btrfs_ordered_extent *entry)
209 : {
210 3658179 : struct btrfs_inode *inode = BTRFS_I(entry->inode);
211 3658179 : struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
212 3658179 : struct btrfs_root *root = inode->root;
213 3658179 : struct btrfs_fs_info *fs_info = root->fs_info;
214 3658179 : struct rb_node *node;
215 :
216 3658179 : trace_btrfs_ordered_extent_add(inode, entry);
217 :
218 3658110 : percpu_counter_add_batch(&fs_info->ordered_bytes, entry->num_bytes,
219 : fs_info->delalloc_batch);
220 :
221 : /* One ref for the tree. */
222 3657987 : refcount_inc(&entry->refs);
223 :
224 3658313 : spin_lock_irq(&tree->lock);
225 3658361 : node = tree_insert(&tree->tree, entry->file_offset, &entry->rb_node);
226 3658085 : if (node)
227 0 : btrfs_panic(fs_info, -EEXIST,
228 : "inconsistency in ordered tree at offset %llu",
229 : entry->file_offset);
230 3658085 : spin_unlock_irq(&tree->lock);
231 :
232 3658269 : spin_lock(&root->ordered_extent_lock);
233 3658509 : list_add_tail(&entry->root_extent_list,
234 : &root->ordered_extents);
235 3658503 : root->nr_ordered_extents++;
236 3658503 : if (root->nr_ordered_extents == 1) {
237 1242150 : spin_lock(&fs_info->ordered_root_lock);
238 1242181 : BUG_ON(!list_empty(&root->ordered_root));
239 1242181 : list_add_tail(&root->ordered_root, &fs_info->ordered_roots);
240 1242181 : spin_unlock(&fs_info->ordered_root_lock);
241 : }
242 3658534 : spin_unlock(&root->ordered_extent_lock);
243 3658535 : }
244 :
245 : /*
246 : * Add an ordered extent to the per-inode tree.
247 : *
248 : * @inode: Inode that this extent is for.
249 : * @file_offset: Logical offset in file where the extent starts.
250 : * @num_bytes: Logical length of extent in file.
251 : * @ram_bytes: Full length of unencoded data.
252 : * @disk_bytenr: Offset of extent on disk.
253 : * @disk_num_bytes: Size of extent on disk.
254 : * @offset: Offset into unencoded data where file data starts.
255 : * @flags: Flags specifying type of extent (1 << BTRFS_ORDERED_*).
256 : * @compress_type: Compression algorithm used for data.
257 : *
258 : * Most of these parameters correspond to &struct btrfs_file_extent_item. The
259 : * tree is given a single reference on the ordered extent that was inserted, and
260 : * the returned pointer is given a second reference.
261 : *
262 : * Return: the new ordered extent or error pointer.
263 : */
264 3658161 : struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
265 : struct btrfs_inode *inode, u64 file_offset,
266 : u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
267 : u64 disk_num_bytes, u64 offset, unsigned long flags,
268 : int compress_type)
269 : {
270 3658161 : struct btrfs_ordered_extent *entry;
271 :
272 3658161 : ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0);
273 :
274 3658161 : entry = alloc_ordered_extent(inode, file_offset, num_bytes, ram_bytes,
275 : disk_bytenr, disk_num_bytes, offset, flags,
276 : compress_type);
277 3658232 : if (!IS_ERR(entry))
278 3658232 : insert_ordered_extent(entry);
279 3658535 : return entry;
280 : }
281 :
282 : /*
283 : * Add a struct btrfs_ordered_sum into the list of checksums to be inserted
284 : * when an ordered extent is finished. If the list covers more than one
285 : * ordered extent, it is split across multiples.
286 : */
287 3717775 : void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
288 : struct btrfs_ordered_sum *sum)
289 : {
290 3717775 : struct btrfs_ordered_inode_tree *tree;
291 :
292 3717775 : tree = &BTRFS_I(entry->inode)->ordered_tree;
293 3717775 : spin_lock_irq(&tree->lock);
294 3718448 : list_add_tail(&sum->list, &entry->list);
295 3717969 : spin_unlock_irq(&tree->lock);
296 3718410 : }
297 :
298 3659080 : static void finish_ordered_fn(struct btrfs_work *work)
299 : {
300 3659080 : struct btrfs_ordered_extent *ordered_extent;
301 :
302 3659080 : ordered_extent = container_of(work, struct btrfs_ordered_extent, work);
303 3659080 : btrfs_finish_ordered_io(ordered_extent);
304 3658712 : }
305 :
306 56937535 : static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
307 : struct page *page, u64 file_offset,
308 : u64 len, bool uptodate)
309 : {
310 56937535 : struct btrfs_inode *inode = BTRFS_I(ordered->inode);
311 56937535 : struct btrfs_fs_info *fs_info = inode->root->fs_info;
312 :
313 56937535 : lockdep_assert_held(&inode->ordered_tree.lock);
314 :
315 56937535 : if (page) {
316 55843128 : ASSERT(page->mapping);
317 55843128 : ASSERT(page_offset(page) <= file_offset);
318 55843128 : ASSERT(file_offset + len <= page_offset(page) + PAGE_SIZE);
319 :
320 : /*
321 : * Ordered (Private2) bit indicates whether we still have
322 : * pending io unfinished for the ordered extent.
323 : *
324 : * If there's no such bit, we need to skip to next range.
325 : */
326 55843128 : if (!btrfs_page_test_ordered(fs_info, page, file_offset, len))
327 : return false;
328 55843128 : btrfs_page_clear_ordered(fs_info, page, file_offset, len);
329 : }
330 :
331 : /* Now we're fine to update the accounting. */
332 56937535 : if (WARN_ON_ONCE(len > ordered->bytes_left)) {
333 0 : btrfs_crit(fs_info,
334 : "bad ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu to_dec=%llu left=%llu",
335 : inode->root->root_key.objectid, btrfs_ino(inode),
336 : ordered->file_offset, ordered->num_bytes,
337 : len, ordered->bytes_left);
338 0 : ordered->bytes_left = 0;
339 : } else {
340 56937535 : ordered->bytes_left -= len;
341 : }
342 :
343 56937535 : if (!uptodate)
344 129552 : set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
345 :
346 56937535 : if (ordered->bytes_left)
347 : return false;
348 :
349 : /*
350 : * All the IO of the ordered extent is finished, we need to queue
351 : * the finish_func to be executed.
352 : */
353 3659101 : set_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags);
354 3659101 : cond_wake_up(&ordered->wait);
355 3659101 : refcount_inc(&ordered->refs);
356 3659101 : trace_btrfs_ordered_extent_mark_finished(inode, ordered);
357 3659101 : return true;
358 : }
359 :
360 3659101 : static void btrfs_queue_ordered_fn(struct btrfs_ordered_extent *ordered)
361 : {
362 3659101 : struct btrfs_inode *inode = BTRFS_I(ordered->inode);
363 3659101 : struct btrfs_fs_info *fs_info = inode->root->fs_info;
364 3659101 : struct btrfs_workqueue *wq = btrfs_is_free_space_inode(inode) ?
365 3659101 : fs_info->endio_freespace_worker : fs_info->endio_write_workers;
366 :
367 3659101 : btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL);
368 3659098 : btrfs_queue_work(wq, &ordered->work);
369 3659101 : }
370 :
371 56937532 : bool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
372 : struct page *page, u64 file_offset, u64 len,
373 : bool uptodate)
374 : {
375 56937532 : struct btrfs_inode *inode = BTRFS_I(ordered->inode);
376 56937532 : unsigned long flags;
377 56937532 : bool ret;
378 :
379 56937532 : trace_btrfs_finish_ordered_extent(inode, file_offset, len, uptodate);
380 :
381 56937533 : spin_lock_irqsave(&inode->ordered_tree.lock, flags);
382 56937535 : ret = can_finish_ordered_extent(ordered, page, file_offset, len, uptodate);
383 56937535 : spin_unlock_irqrestore(&inode->ordered_tree.lock, flags);
384 :
385 56937535 : if (ret)
386 3659101 : btrfs_queue_ordered_fn(ordered);
387 56937535 : return ret;
388 : }
389 :
390 : /*
391 : * Mark all ordered extents io inside the specified range finished.
392 : *
393 : * @page: The involved page for the operation.
394 : * For uncompressed buffered IO, the page status also needs to be
395 : * updated to indicate whether the pending ordered io is finished.
396 : * Can be NULL for direct IO and compressed write.
397 : * For these cases, callers are ensured they won't execute the
398 : * endio function twice.
399 : *
400 : * This function is called for endio, thus the range must have ordered
401 : * extent(s) covering it.
402 : */
403 0 : void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
404 : struct page *page, u64 file_offset,
405 : u64 num_bytes, bool uptodate)
406 : {
407 0 : struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
408 0 : struct rb_node *node;
409 0 : struct btrfs_ordered_extent *entry = NULL;
410 0 : unsigned long flags;
411 0 : u64 cur = file_offset;
412 :
413 0 : spin_lock_irqsave(&tree->lock, flags);
414 0 : while (cur < file_offset + num_bytes) {
415 0 : u64 entry_end;
416 0 : u64 end;
417 0 : u32 len;
418 :
419 0 : node = tree_search(tree, cur);
420 : /* No ordered extents at all */
421 0 : if (!node)
422 : break;
423 :
424 0 : entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
425 0 : entry_end = entry->file_offset + entry->num_bytes;
426 : /*
427 : * |<-- OE --->| |
428 : * cur
429 : * Go to next OE.
430 : */
431 0 : if (cur >= entry_end) {
432 0 : node = rb_next(node);
433 : /* No more ordered extents, exit */
434 0 : if (!node)
435 : break;
436 0 : entry = rb_entry(node, struct btrfs_ordered_extent,
437 : rb_node);
438 :
439 : /* Go to next ordered extent and continue */
440 0 : cur = entry->file_offset;
441 0 : continue;
442 : }
443 : /*
444 : * | |<--- OE --->|
445 : * cur
446 : * Go to the start of OE.
447 : */
448 0 : if (cur < entry->file_offset) {
449 0 : cur = entry->file_offset;
450 0 : continue;
451 : }
452 :
453 : /*
454 : * Now we are definitely inside one ordered extent.
455 : *
456 : * |<--- OE --->|
457 : * |
458 : * cur
459 : */
460 0 : end = min(entry->file_offset + entry->num_bytes,
461 : file_offset + num_bytes) - 1;
462 0 : ASSERT(end + 1 - cur < U32_MAX);
463 0 : len = end + 1 - cur;
464 :
465 0 : if (can_finish_ordered_extent(entry, page, cur, len, uptodate)) {
466 0 : spin_unlock_irqrestore(&tree->lock, flags);
467 0 : btrfs_queue_ordered_fn(entry);
468 0 : spin_lock_irqsave(&tree->lock, flags);
469 : }
470 0 : cur += len;
471 : }
472 0 : spin_unlock_irqrestore(&tree->lock, flags);
473 0 : }
474 :
475 : /*
476 : * Finish IO for one ordered extent across a given range. The range can only
477 : * contain one ordered extent.
478 : *
479 : * @cached: The cached ordered extent. If not NULL, we can skip the tree
480 : * search and use the ordered extent directly.
481 : * Will be also used to store the finished ordered extent.
482 : * @file_offset: File offset for the finished IO
483 : * @io_size: Length of the finish IO range
484 : *
485 : * Return true if the ordered extent is finished in the range, and update
486 : * @cached.
487 : * Return false otherwise.
488 : *
489 : * NOTE: The range can NOT cross multiple ordered extents.
490 : * Thus caller should ensure the range doesn't cross ordered extents.
491 : */
492 20118 : bool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode,
493 : struct btrfs_ordered_extent **cached,
494 : u64 file_offset, u64 io_size)
495 : {
496 20118 : struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
497 20118 : struct rb_node *node;
498 20118 : struct btrfs_ordered_extent *entry = NULL;
499 20118 : unsigned long flags;
500 20118 : bool finished = false;
501 :
502 20118 : spin_lock_irqsave(&tree->lock, flags);
503 20118 : if (cached && *cached) {
504 20118 : entry = *cached;
505 20118 : goto have_entry;
506 : }
507 :
508 0 : node = tree_search(tree, file_offset);
509 0 : if (!node)
510 0 : goto out;
511 :
512 0 : entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
513 20118 : have_entry:
514 20118 : if (!in_range(file_offset, entry->file_offset, entry->num_bytes))
515 0 : goto out;
516 :
517 20118 : if (io_size > entry->bytes_left)
518 0 : btrfs_crit(inode->root->fs_info,
519 : "bad ordered accounting left %llu size %llu",
520 : entry->bytes_left, io_size);
521 :
522 20118 : entry->bytes_left -= io_size;
523 :
524 20118 : if (entry->bytes_left == 0) {
525 : /*
526 : * Ensure only one caller can set the flag and finished_ret
527 : * accordingly
528 : */
529 209 : finished = !test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
530 : /* test_and_set_bit implies a barrier */
531 209 : cond_wake_up_nomb(&entry->wait);
532 : }
533 19909 : out:
534 20118 : if (finished && cached && entry) {
535 209 : *cached = entry;
536 209 : refcount_inc(&entry->refs);
537 209 : trace_btrfs_ordered_extent_dec_test_pending(inode, entry);
538 : }
539 20118 : spin_unlock_irqrestore(&tree->lock, flags);
540 20118 : return finished;
541 : }
542 :
543 : /*
544 : * used to drop a reference on an ordered extent. This will free
545 : * the extent if the last reference is dropped
546 : */
547 17189776 : void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
548 : {
549 17189776 : struct list_head *cur;
550 17189776 : struct btrfs_ordered_sum *sum;
551 :
552 17189776 : trace_btrfs_ordered_extent_put(BTRFS_I(entry->inode), entry);
553 :
554 17188687 : if (refcount_dec_and_test(&entry->refs)) {
555 3658929 : ASSERT(list_empty(&entry->root_extent_list));
556 3658929 : ASSERT(list_empty(&entry->log_list));
557 3658929 : ASSERT(RB_EMPTY_NODE(&entry->rb_node));
558 3658929 : if (entry->inode)
559 3658929 : btrfs_add_delayed_iput(BTRFS_I(entry->inode));
560 7377961 : while (!list_empty(&entry->list)) {
561 3719109 : cur = entry->list.next;
562 3719109 : sum = list_entry(cur, struct btrfs_ordered_sum, list);
563 3719109 : list_del(&sum->list);
564 3718959 : kvfree(sum);
565 : }
566 3658852 : kmem_cache_free(btrfs_ordered_extent_cache, entry);
567 : }
568 17190166 : }
569 :
570 : /*
571 : * remove an ordered extent from the tree. No references are dropped
572 : * and waiters are woken up.
573 : */
574 3658820 : void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
575 : struct btrfs_ordered_extent *entry)
576 : {
577 3658820 : struct btrfs_ordered_inode_tree *tree;
578 3658820 : struct btrfs_root *root = btrfs_inode->root;
579 3658820 : struct btrfs_fs_info *fs_info = root->fs_info;
580 3658820 : struct rb_node *node;
581 3658820 : bool pending;
582 3658820 : bool freespace_inode;
583 :
584 : /*
585 : * If this is a free space inode the thread has not acquired the ordered
586 : * extents lockdep map.
587 : */
588 3658820 : freespace_inode = btrfs_is_free_space_inode(btrfs_inode);
589 :
590 3658820 : btrfs_lockdep_acquire(fs_info, btrfs_trans_pending_ordered);
591 : /* This is paired with btrfs_alloc_ordered_extent. */
592 3658820 : spin_lock(&btrfs_inode->lock);
593 3659137 : btrfs_mod_outstanding_extents(btrfs_inode, -1);
594 3658688 : spin_unlock(&btrfs_inode->lock);
595 3658938 : if (root != fs_info->tree_root) {
596 3658897 : u64 release;
597 :
598 7317794 : if (test_bit(BTRFS_ORDERED_ENCODED, &entry->flags))
599 0 : release = entry->disk_num_bytes;
600 : else
601 3658897 : release = entry->num_bytes;
602 3658897 : btrfs_delalloc_release_metadata(btrfs_inode, release, false);
603 : }
604 :
605 3658458 : percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes,
606 : fs_info->delalloc_batch);
607 :
608 3658384 : tree = &btrfs_inode->ordered_tree;
609 3658384 : spin_lock_irq(&tree->lock);
610 3658797 : node = &entry->rb_node;
611 3658797 : rb_erase(node, &tree->tree);
612 3658664 : RB_CLEAR_NODE(node);
613 3658664 : if (tree->last == node)
614 2087411 : tree->last = NULL;
615 3658664 : set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
616 3659006 : pending = test_and_clear_bit(BTRFS_ORDERED_PENDING, &entry->flags);
617 3658987 : spin_unlock_irq(&tree->lock);
618 :
619 : /*
620 : * The current running transaction is waiting on us, we need to let it
621 : * know that we're complete and wake it up.
622 : */
623 3659049 : if (pending) {
624 8171 : struct btrfs_transaction *trans;
625 :
626 : /*
627 : * The checks for trans are just a formality, it should be set,
628 : * but if it isn't we don't want to deref/assert under the spin
629 : * lock, so be nice and check if trans is set, but ASSERT() so
630 : * if it isn't set a developer will notice.
631 : */
632 8171 : spin_lock(&fs_info->trans_lock);
633 8171 : trans = fs_info->running_transaction;
634 8171 : if (trans)
635 8171 : refcount_inc(&trans->use_count);
636 8171 : spin_unlock(&fs_info->trans_lock);
637 :
638 8171 : ASSERT(trans);
639 8171 : if (trans) {
640 8171 : if (atomic_dec_and_test(&trans->pending_ordered))
641 1030 : wake_up(&trans->pending_wait);
642 8170 : btrfs_put_transaction(trans);
643 : }
644 : }
645 :
646 3659049 : btrfs_lockdep_release(fs_info, btrfs_trans_pending_ordered);
647 :
648 3659049 : spin_lock(&root->ordered_extent_lock);
649 3659307 : list_del_init(&entry->root_extent_list);
650 3659306 : root->nr_ordered_extents--;
651 :
652 3659306 : trace_btrfs_ordered_extent_remove(btrfs_inode, entry);
653 :
654 3659305 : if (!root->nr_ordered_extents) {
655 1242181 : spin_lock(&fs_info->ordered_root_lock);
656 1242181 : BUG_ON(list_empty(&root->ordered_root));
657 1242181 : list_del_init(&root->ordered_root);
658 1242181 : spin_unlock(&fs_info->ordered_root_lock);
659 : }
660 3659304 : spin_unlock(&root->ordered_extent_lock);
661 3659300 : wake_up(&entry->wait);
662 3659240 : if (!freespace_inode)
663 3659240 : btrfs_lockdep_release(fs_info, btrfs_ordered_extent);
664 3659240 : }
665 :
666 113773 : static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
667 : {
668 113773 : struct btrfs_ordered_extent *ordered;
669 :
670 113773 : ordered = container_of(work, struct btrfs_ordered_extent, flush_work);
671 113773 : btrfs_start_ordered_extent(ordered);
672 113697 : complete(&ordered->completion);
673 113693 : }
674 :
675 : /*
676 : * wait for all the ordered extents in a root. This is done when balancing
677 : * space between drives.
678 : */
679 24295 : u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
680 : const u64 range_start, const u64 range_len)
681 : {
682 24295 : struct btrfs_fs_info *fs_info = root->fs_info;
683 24295 : LIST_HEAD(splice);
684 24295 : LIST_HEAD(skipped);
685 24295 : LIST_HEAD(works);
686 24295 : struct btrfs_ordered_extent *ordered, *next;
687 24295 : u64 count = 0;
688 24295 : const u64 range_end = range_start + range_len;
689 :
690 24295 : mutex_lock(&root->ordered_extent_mutex);
691 24295 : spin_lock(&root->ordered_extent_lock);
692 24294 : list_splice_init(&root->ordered_extents, &splice);
693 138346 : while (!list_empty(&splice) && nr) {
694 114051 : ordered = list_first_entry(&splice, struct btrfs_ordered_extent,
695 : root_extent_list);
696 :
697 114051 : if (range_end <= ordered->disk_bytenr ||
698 113991 : ordered->disk_bytenr + ordered->disk_num_bytes <= range_start) {
699 275 : list_move_tail(&ordered->root_extent_list, &skipped);
700 276 : cond_resched_lock(&root->ordered_extent_lock);
701 276 : continue;
702 : }
703 :
704 113776 : list_move_tail(&ordered->root_extent_list,
705 : &root->ordered_extents);
706 113776 : refcount_inc(&ordered->refs);
707 113776 : spin_unlock(&root->ordered_extent_lock);
708 :
709 113776 : btrfs_init_work(&ordered->flush_work,
710 : btrfs_run_ordered_extent_work, NULL, NULL);
711 113776 : list_add_tail(&ordered->work_list, &works);
712 113776 : btrfs_queue_work(fs_info->flush_workers, &ordered->flush_work);
713 :
714 113776 : cond_resched();
715 113776 : spin_lock(&root->ordered_extent_lock);
716 113776 : if (nr != U64_MAX)
717 21347 : nr--;
718 113776 : count++;
719 : }
720 24295 : list_splice_tail(&skipped, &root->ordered_extents);
721 24295 : list_splice_tail(&splice, &root->ordered_extents);
722 24295 : spin_unlock(&root->ordered_extent_lock);
723 :
724 138069 : list_for_each_entry_safe(ordered, next, &works, work_list) {
725 113776 : list_del_init(&ordered->work_list);
726 113776 : wait_for_completion(&ordered->completion);
727 113776 : btrfs_put_ordered_extent(ordered);
728 113775 : cond_resched();
729 : }
730 24293 : mutex_unlock(&root->ordered_extent_mutex);
731 :
732 24295 : return count;
733 : }
734 :
735 40255 : void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
736 : const u64 range_start, const u64 range_len)
737 : {
738 40255 : struct btrfs_root *root;
739 40255 : struct list_head splice;
740 40255 : u64 done;
741 :
742 40255 : INIT_LIST_HEAD(&splice);
743 :
744 40255 : mutex_lock(&fs_info->ordered_operations_mutex);
745 40258 : spin_lock(&fs_info->ordered_root_lock);
746 40259 : list_splice_init(&fs_info->ordered_roots, &splice);
747 54056 : while (!list_empty(&splice) && nr) {
748 13797 : root = list_first_entry(&splice, struct btrfs_root,
749 : ordered_root);
750 13797 : root = btrfs_grab_root(root);
751 13797 : BUG_ON(!root);
752 13797 : list_move_tail(&root->ordered_root,
753 : &fs_info->ordered_roots);
754 13797 : spin_unlock(&fs_info->ordered_root_lock);
755 :
756 13797 : done = btrfs_wait_ordered_extents(root, nr,
757 : range_start, range_len);
758 13797 : btrfs_put_root(root);
759 :
760 13797 : spin_lock(&fs_info->ordered_root_lock);
761 13797 : if (nr != U64_MAX) {
762 1349 : nr -= done;
763 : }
764 : }
765 40259 : list_splice_tail(&splice, &fs_info->ordered_roots);
766 40259 : spin_unlock(&fs_info->ordered_root_lock);
767 40259 : mutex_unlock(&fs_info->ordered_operations_mutex);
768 40259 : }
769 :
770 : /*
771 : * Start IO and wait for a given ordered extent to finish.
772 : *
773 : * Wait on page writeback for all the pages in the extent and the IO completion
774 : * code to insert metadata into the btree corresponding to the extent.
775 : */
776 332805 : void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry)
777 : {
778 332805 : u64 start = entry->file_offset;
779 332805 : u64 end = start + entry->num_bytes - 1;
780 332805 : struct btrfs_inode *inode = BTRFS_I(entry->inode);
781 332805 : bool freespace_inode;
782 :
783 332805 : trace_btrfs_ordered_extent_start(inode, entry);
784 :
785 : /*
786 : * If this is a free space inode do not take the ordered extents lockdep
787 : * map.
788 : */
789 332800 : freespace_inode = btrfs_is_free_space_inode(inode);
790 :
791 : /*
792 : * pages in the range can be dirty, clean or writeback. We
793 : * start IO on any dirty ones so the wait doesn't stall waiting
794 : * for the flusher thread to find them
795 : */
796 332800 : if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
797 294769 : filemap_fdatawrite_range(inode->vfs_inode.i_mapping, start, end);
798 :
799 332761 : if (!freespace_inode)
800 332761 : btrfs_might_wait_for_event(inode->root->fs_info, btrfs_ordered_extent);
801 1200433 : wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags));
802 332723 : }
803 :
804 : /*
805 : * Used to wait on ordered extents across a large range of bytes.
806 : */
807 8602965 : int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
808 : {
809 8602965 : int ret = 0;
810 8602965 : int ret_wb = 0;
811 8602965 : u64 end;
812 8602965 : u64 orig_end;
813 8602965 : struct btrfs_ordered_extent *ordered;
814 :
815 8602965 : if (start + len < start) {
816 : orig_end = OFFSET_MAX;
817 : } else {
818 8438833 : orig_end = start + len - 1;
819 8438833 : if (orig_end > OFFSET_MAX)
820 : orig_end = OFFSET_MAX;
821 : }
822 :
823 : /* start IO across the range first to instantiate any delalloc
824 : * extents
825 : */
826 8602965 : ret = btrfs_fdatawrite_range(inode, start, orig_end);
827 8603810 : if (ret)
828 : return ret;
829 :
830 : /*
831 : * If we have a writeback error don't return immediately. Wait first
832 : * for any ordered extents that haven't completed yet. This is to make
833 : * sure no one can dirty the same page ranges and call writepages()
834 : * before the ordered extents complete - to avoid failures (-EEXIST)
835 : * when adding the new ordered extents to the ordered tree.
836 : */
837 8603850 : ret_wb = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
838 :
839 8603850 : end = orig_end;
840 8652654 : while (1) {
841 8627900 : ordered = btrfs_lookup_first_ordered_extent(BTRFS_I(inode), end);
842 8628193 : if (!ordered)
843 : break;
844 1366908 : if (ordered->file_offset > orig_end) {
845 852466 : btrfs_put_ordered_extent(ordered);
846 852466 : break;
847 : }
848 514442 : if (ordered->file_offset + ordered->num_bytes <= start) {
849 312113 : btrfs_put_ordered_extent(ordered);
850 312113 : break;
851 : }
852 202329 : btrfs_start_ordered_extent(ordered);
853 202328 : end = ordered->file_offset;
854 : /*
855 : * If the ordered extent had an error save the error but don't
856 : * exit without waiting first for all other ordered extents in
857 : * the range to complete.
858 : */
859 404656 : if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))
860 1 : ret = -EIO;
861 202328 : btrfs_put_ordered_extent(ordered);
862 202328 : if (end == 0 || end == start)
863 : break;
864 24754 : end--;
865 : }
866 8603438 : return ret_wb ? ret_wb : ret;
867 : }
868 :
869 : /*
870 : * find an ordered extent corresponding to file_offset. return NULL if
871 : * nothing is found, otherwise take a reference on the extent and return it
872 : */
873 4583913 : struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode,
874 : u64 file_offset)
875 : {
876 4583913 : struct btrfs_ordered_inode_tree *tree;
877 4583913 : struct rb_node *node;
878 4583913 : struct btrfs_ordered_extent *entry = NULL;
879 4583913 : unsigned long flags;
880 :
881 4583913 : tree = &inode->ordered_tree;
882 4583913 : spin_lock_irqsave(&tree->lock, flags);
883 4588595 : node = tree_search(tree, file_offset);
884 4573254 : if (!node)
885 1897706 : goto out;
886 :
887 2675548 : entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
888 2675548 : if (!in_range(file_offset, entry->file_offset, entry->num_bytes))
889 : entry = NULL;
890 2627195 : if (entry) {
891 2627195 : refcount_inc(&entry->refs);
892 2627265 : trace_btrfs_ordered_extent_lookup(inode, entry);
893 : }
894 0 : out:
895 4573316 : spin_unlock_irqrestore(&tree->lock, flags);
896 4586755 : return entry;
897 : }
898 :
899 : /* Since the DIO code tries to lock a wide area we need to look for any ordered
900 : * extents that exist in the range, rather than just the start of the range.
901 : */
902 45378840 : struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
903 : struct btrfs_inode *inode, u64 file_offset, u64 len)
904 : {
905 45378840 : struct btrfs_ordered_inode_tree *tree;
906 45378840 : struct rb_node *node;
907 45378840 : struct btrfs_ordered_extent *entry = NULL;
908 :
909 45378840 : tree = &inode->ordered_tree;
910 45378840 : spin_lock_irq(&tree->lock);
911 45444850 : node = tree_search(tree, file_offset);
912 45330609 : if (!node) {
913 42754227 : node = tree_search(tree, file_offset + len);
914 42759626 : if (!node)
915 42790500 : goto out;
916 : }
917 :
918 2901189 : while (1) {
919 2901189 : entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
920 2901189 : if (range_overlaps(entry, file_offset, len))
921 : break;
922 :
923 2209243 : if (entry->file_offset >= file_offset + len) {
924 : entry = NULL;
925 : break;
926 : }
927 1500845 : entry = NULL;
928 1500845 : node = rb_next(node);
929 1500923 : if (!node)
930 : break;
931 : }
932 1145242 : out:
933 45336086 : if (entry) {
934 692049 : refcount_inc(&entry->refs);
935 692049 : trace_btrfs_ordered_extent_lookup_range(inode, entry);
936 : }
937 45336086 : spin_unlock_irq(&tree->lock);
938 45404262 : return entry;
939 : }
940 :
941 : /*
942 : * Adds all ordered extents to the given list. The list ends up sorted by the
943 : * file_offset of the ordered extents.
944 : */
945 141220 : void btrfs_get_ordered_extents_for_logging(struct btrfs_inode *inode,
946 : struct list_head *list)
947 : {
948 141220 : struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
949 141220 : struct rb_node *n;
950 :
951 141220 : ASSERT(inode_is_locked(&inode->vfs_inode));
952 :
953 141220 : spin_lock_irq(&tree->lock);
954 437106 : for (n = rb_first(&tree->tree); n; n = rb_next(n)) {
955 295862 : struct btrfs_ordered_extent *ordered;
956 :
957 295862 : ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node);
958 :
959 591724 : if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
960 87912 : continue;
961 :
962 207950 : ASSERT(list_empty(&ordered->log_list));
963 207950 : list_add_tail(&ordered->log_list, list);
964 207940 : refcount_inc(&ordered->refs);
965 207964 : trace_btrfs_ordered_extent_lookup_for_logging(inode, ordered);
966 : }
967 141232 : spin_unlock_irq(&tree->lock);
968 141233 : }
969 :
970 : /*
971 : * lookup and return any extent before 'file_offset'. NULL is returned
972 : * if none is found
973 : */
974 : struct btrfs_ordered_extent *
975 12510508 : btrfs_lookup_first_ordered_extent(struct btrfs_inode *inode, u64 file_offset)
976 : {
977 12510508 : struct btrfs_ordered_inode_tree *tree;
978 12510508 : struct rb_node *node;
979 12510508 : struct btrfs_ordered_extent *entry = NULL;
980 :
981 12510508 : tree = &inode->ordered_tree;
982 12510508 : spin_lock_irq(&tree->lock);
983 12511368 : node = tree_search(tree, file_offset);
984 12509639 : if (!node)
985 11142731 : goto out;
986 :
987 1366908 : entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
988 1366908 : refcount_inc(&entry->refs);
989 1366908 : trace_btrfs_ordered_extent_lookup_first(inode, entry);
990 12509639 : out:
991 12509639 : spin_unlock_irq(&tree->lock);
992 12511242 : return entry;
993 : }
994 :
995 : /*
996 : * Lookup the first ordered extent that overlaps the range
997 : * [@file_offset, @file_offset + @len).
998 : *
999 : * The difference between this and btrfs_lookup_first_ordered_extent() is
1000 : * that this one won't return any ordered extent that does not overlap the range.
1001 : * And the difference against btrfs_lookup_ordered_extent() is, this function
1002 : * ensures the first ordered extent gets returned.
1003 : */
1004 87288633 : struct btrfs_ordered_extent *btrfs_lookup_first_ordered_range(
1005 : struct btrfs_inode *inode, u64 file_offset, u64 len)
1006 : {
1007 87288633 : struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
1008 87288633 : struct rb_node *node;
1009 87288633 : struct rb_node *cur;
1010 87288633 : struct rb_node *prev;
1011 87288633 : struct rb_node *next;
1012 87288633 : struct btrfs_ordered_extent *entry = NULL;
1013 :
1014 87288633 : spin_lock_irq(&tree->lock);
1015 87291185 : node = tree->tree.rb_node;
1016 : /*
1017 : * Here we don't want to use tree_search() which will use tree->last
1018 : * and screw up the search order.
1019 : * And __tree_search() can't return the adjacent ordered extents
1020 : * either, thus here we do our own search.
1021 : */
1022 91290903 : while (node) {
1023 4271310 : entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
1024 :
1025 4271310 : if (file_offset < entry->file_offset) {
1026 2786085 : node = node->rb_left;
1027 1485225 : } else if (file_offset >= entry_end(entry)) {
1028 1213633 : node = node->rb_right;
1029 : } else {
1030 : /*
1031 : * Direct hit, got an ordered extent that starts at
1032 : * @file_offset
1033 : */
1034 271592 : goto out;
1035 : }
1036 : }
1037 87019593 : if (!entry) {
1038 : /* Empty tree */
1039 84315899 : goto out;
1040 : }
1041 :
1042 2703694 : cur = &entry->rb_node;
1043 : /* We got an entry around @file_offset, check adjacent entries */
1044 2703694 : if (entry->file_offset < file_offset) {
1045 803383 : prev = cur;
1046 803383 : next = rb_next(cur);
1047 : } else {
1048 1900311 : prev = rb_prev(cur);
1049 1900311 : next = cur;
1050 : }
1051 2703694 : if (prev) {
1052 939391 : entry = rb_entry(prev, struct btrfs_ordered_extent, rb_node);
1053 939391 : if (range_overlaps(entry, file_offset, len))
1054 0 : goto out;
1055 : }
1056 2703694 : if (next) {
1057 1973531 : entry = rb_entry(next, struct btrfs_ordered_extent, rb_node);
1058 1973531 : if (range_overlaps(entry, file_offset, len))
1059 94 : goto out;
1060 : }
1061 : /* No ordered extent in the range */
1062 : entry = NULL;
1063 84587585 : out:
1064 84587585 : if (entry) {
1065 274173 : refcount_inc(&entry->refs);
1066 271704 : trace_btrfs_ordered_extent_lookup_first_range(inode, entry);
1067 : }
1068 :
1069 87288711 : spin_unlock_irq(&tree->lock);
1070 87290965 : return entry;
1071 : }
1072 :
1073 : /*
1074 : * Lock the passed range and ensures all pending ordered extents in it are run
1075 : * to completion.
1076 : *
1077 : * @inode: Inode whose ordered tree is to be searched
1078 : * @start: Beginning of range to flush
1079 : * @end: Last byte of range to lock
1080 : * @cached_state: If passed, will return the extent state responsible for the
1081 : * locked range. It's the caller's responsibility to free the
1082 : * cached state.
1083 : *
1084 : * Always return with the given range locked, ensuring after it's called no
1085 : * order extent can be pending.
1086 : */
1087 19158288 : void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
1088 : u64 end,
1089 : struct extent_state **cached_state)
1090 : {
1091 19158288 : struct btrfs_ordered_extent *ordered;
1092 19158288 : struct extent_state *cache = NULL;
1093 19158288 : struct extent_state **cachedp = &cache;
1094 :
1095 19158288 : if (cached_state)
1096 1661809 : cachedp = cached_state;
1097 :
1098 19179563 : while (1) {
1099 19168925 : lock_extent(&inode->io_tree, start, end, cachedp);
1100 19154708 : ordered = btrfs_lookup_ordered_range(inode, start,
1101 19154708 : end - start + 1);
1102 19120770 : if (!ordered) {
1103 : /*
1104 : * If no external cached_state has been passed then
1105 : * decrement the extra ref taken for cachedp since we
1106 : * aren't exposing it outside of this function
1107 : */
1108 19110131 : if (!cached_state)
1109 17476812 : refcount_dec(&cache->refs);
1110 19125319 : break;
1111 : }
1112 10639 : unlock_extent(&inode->io_tree, start, end, cachedp);
1113 10639 : btrfs_start_ordered_extent(ordered);
1114 10638 : btrfs_put_ordered_extent(ordered);
1115 : }
1116 19125319 : }
1117 :
1118 : /*
1119 : * Lock the passed range and ensure all pending ordered extents in it are run
1120 : * to completion in nowait mode.
1121 : *
1122 : * Return true if btrfs_lock_ordered_range does not return any extents,
1123 : * otherwise false.
1124 : */
1125 0 : bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end,
1126 : struct extent_state **cached_state)
1127 : {
1128 0 : struct btrfs_ordered_extent *ordered;
1129 :
1130 0 : if (!try_lock_extent(&inode->io_tree, start, end, cached_state))
1131 : return false;
1132 :
1133 0 : ordered = btrfs_lookup_ordered_range(inode, start, end - start + 1);
1134 0 : if (!ordered)
1135 : return true;
1136 :
1137 0 : btrfs_put_ordered_extent(ordered);
1138 0 : unlock_extent(&inode->io_tree, start, end, cached_state);
1139 :
1140 0 : return false;
1141 : }
1142 :
1143 : /* Split out a new ordered extent for this first @len bytes of @ordered. */
1144 770 : struct btrfs_ordered_extent *btrfs_split_ordered_extent(
1145 : struct btrfs_ordered_extent *ordered, u64 len)
1146 : {
1147 770 : struct btrfs_inode *inode = BTRFS_I(ordered->inode);
1148 770 : struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
1149 770 : struct btrfs_root *root = inode->root;
1150 770 : struct btrfs_fs_info *fs_info = root->fs_info;
1151 770 : u64 file_offset = ordered->file_offset;
1152 770 : u64 disk_bytenr = ordered->disk_bytenr;
1153 770 : unsigned long flags = ordered->flags;
1154 770 : struct btrfs_ordered_sum *sum, *tmpsum;
1155 770 : struct btrfs_ordered_extent *new;
1156 770 : struct rb_node *node;
1157 770 : u64 offset = 0;
1158 :
1159 770 : trace_btrfs_ordered_extent_split(inode, ordered);
1160 :
1161 770 : ASSERT(!(flags & (1U << BTRFS_ORDERED_COMPRESSED)));
1162 :
1163 : /*
1164 : * The entire bio must be covered by the ordered extent, but we can't
1165 : * reduce the original extent to a zero length either.
1166 : */
1167 770 : if (WARN_ON_ONCE(len >= ordered->num_bytes))
1168 : return ERR_PTR(-EINVAL);
1169 : /* We cannot split partially completed ordered extents. */
1170 770 : if (ordered->bytes_left) {
1171 770 : ASSERT(!(flags & ~BTRFS_ORDERED_TYPE_FLAGS));
1172 770 : if (WARN_ON_ONCE(ordered->bytes_left != ordered->disk_num_bytes))
1173 : return ERR_PTR(-EINVAL);
1174 : }
1175 : /* We cannot split a compressed ordered extent. */
1176 770 : if (WARN_ON_ONCE(ordered->disk_num_bytes != ordered->num_bytes))
1177 : return ERR_PTR(-EINVAL);
1178 :
1179 770 : new = alloc_ordered_extent(inode, file_offset, len, len, disk_bytenr,
1180 : len, 0, flags, ordered->compress_type);
1181 770 : if (IS_ERR(new))
1182 : return new;
1183 :
1184 : /* One ref for the tree. */
1185 770 : refcount_inc(&new->refs);
1186 :
1187 770 : spin_lock_irq(&root->ordered_extent_lock);
1188 770 : spin_lock(&tree->lock);
1189 : /* Remove from tree once */
1190 770 : node = &ordered->rb_node;
1191 770 : rb_erase(node, &tree->tree);
1192 770 : RB_CLEAR_NODE(node);
1193 770 : if (tree->last == node)
1194 0 : tree->last = NULL;
1195 :
1196 770 : ordered->file_offset += len;
1197 770 : ordered->disk_bytenr += len;
1198 770 : ordered->num_bytes -= len;
1199 770 : ordered->disk_num_bytes -= len;
1200 :
1201 1540 : if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) {
1202 0 : ASSERT(ordered->bytes_left == 0);
1203 0 : new->bytes_left = 0;
1204 : } else {
1205 770 : ordered->bytes_left -= len;
1206 : }
1207 :
1208 1540 : if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags)) {
1209 0 : if (ordered->truncated_len > len) {
1210 0 : ordered->truncated_len -= len;
1211 : } else {
1212 0 : new->truncated_len = ordered->truncated_len;
1213 0 : ordered->truncated_len = 0;
1214 : }
1215 : }
1216 :
1217 770 : list_for_each_entry_safe(sum, tmpsum, &ordered->list, list) {
1218 0 : if (offset == len)
1219 : break;
1220 0 : list_move_tail(&sum->list, &new->list);
1221 0 : offset += sum->len;
1222 : }
1223 :
1224 : /* Re-insert the node */
1225 770 : node = tree_insert(&tree->tree, ordered->file_offset, &ordered->rb_node);
1226 770 : if (node)
1227 0 : btrfs_panic(fs_info, -EEXIST,
1228 : "zoned: inconsistency in ordered tree at offset %llu",
1229 : ordered->file_offset);
1230 :
1231 770 : node = tree_insert(&tree->tree, new->file_offset, &new->rb_node);
1232 770 : if (node)
1233 0 : btrfs_panic(fs_info, -EEXIST,
1234 : "zoned: inconsistency in ordered tree at offset %llu",
1235 : new->file_offset);
1236 770 : spin_unlock(&tree->lock);
1237 :
1238 770 : list_add_tail(&new->root_extent_list, &root->ordered_extents);
1239 770 : root->nr_ordered_extents++;
1240 770 : spin_unlock_irq(&root->ordered_extent_lock);
1241 770 : return new;
1242 : }
1243 :
1244 11 : int __init ordered_data_init(void)
1245 : {
1246 11 : btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent",
1247 : sizeof(struct btrfs_ordered_extent), 0,
1248 : SLAB_MEM_SPREAD,
1249 : NULL);
1250 11 : if (!btrfs_ordered_extent_cache)
1251 0 : return -ENOMEM;
1252 :
1253 : return 0;
1254 : }
1255 :
1256 0 : void __cold ordered_data_exit(void)
1257 : {
1258 0 : kmem_cache_destroy(btrfs_ordered_extent_cache);
1259 0 : }
|