Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (C) 2007 Oracle. All rights reserved.
4 : */
5 :
6 : #include <linux/slab.h>
7 : #include <linux/blkdev.h>
8 : #include <linux/writeback.h>
9 : #include <linux/sched/mm.h>
10 : #include "messages.h"
11 : #include "misc.h"
12 : #include "ctree.h"
13 : #include "transaction.h"
14 : #include "btrfs_inode.h"
15 : #include "extent_io.h"
16 : #include "disk-io.h"
17 : #include "compression.h"
18 : #include "delalloc-space.h"
19 : #include "qgroup.h"
20 : #include "subpage.h"
21 : #include "file.h"
22 : #include "super.h"
23 :
24 : static struct kmem_cache *btrfs_ordered_extent_cache;
25 :
26 : static u64 entry_end(struct btrfs_ordered_extent *entry)
27 : {
28 7246131 : if (entry->file_offset + entry->num_bytes < entry->file_offset)
29 0 : return (u64)-1;
30 : return entry->file_offset + entry->num_bytes;
31 : }
32 :
33 : /* returns NULL if the insertion worked, or it returns the node it did find
34 : * in the tree
35 : */
36 3410853 : static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
37 : struct rb_node *node)
38 : {
39 3410853 : struct rb_node **p = &root->rb_node;
40 3410853 : struct rb_node *parent = NULL;
41 3410853 : struct btrfs_ordered_extent *entry;
42 :
43 9175056 : while (*p) {
44 5764203 : parent = *p;
45 5764203 : entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node);
46 :
47 5764203 : if (file_offset < entry->file_offset)
48 474189 : p = &(*p)->rb_left;
49 5290014 : else if (file_offset >= entry_end(entry))
50 5290014 : p = &(*p)->rb_right;
51 : else
52 0 : return parent;
53 : }
54 :
55 3410853 : rb_link_node(node, parent, p);
56 3410853 : rb_insert_color(node, root);
57 3410853 : return NULL;
58 : }
59 :
60 : /*
61 : * look for a given offset in the tree, and if it can't be found return the
62 : * first lesser offset
63 : */
64 102114923 : static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset,
65 : struct rb_node **prev_ret)
66 : {
67 102114923 : struct rb_node *n = root->rb_node;
68 102114923 : struct rb_node *prev = NULL;
69 102114923 : struct rb_node *test;
70 102114923 : struct btrfs_ordered_extent *entry;
71 102114923 : struct btrfs_ordered_extent *prev_entry = NULL;
72 :
73 110541124 : while (n) {
74 10833415 : entry = rb_entry(n, struct btrfs_ordered_extent, rb_node);
75 10833415 : prev = n;
76 10833415 : prev_entry = entry;
77 :
78 10833415 : if (file_offset < entry->file_offset)
79 2518606 : n = n->rb_left;
80 8314809 : else if (file_offset >= entry_end(entry))
81 5907595 : n = n->rb_right;
82 : else
83 2407214 : return n;
84 : }
85 99707709 : if (!prev_ret)
86 : return NULL;
87 :
88 103231372 : while (prev && file_offset >= entry_end(prev_entry)) {
89 1661476 : test = rb_next(prev);
90 1648271 : if (!test)
91 : break;
92 146928 : prev_entry = rb_entry(test, struct btrfs_ordered_extent,
93 : rb_node);
94 146928 : if (file_offset < entry_end(prev_entry))
95 : break;
96 :
97 : prev = test;
98 : }
99 99694504 : if (prev)
100 3503575 : prev_entry = rb_entry(prev, struct btrfs_ordered_extent,
101 : rb_node);
102 103611599 : while (prev && file_offset < entry_end(prev_entry)) {
103 1879803 : test = rb_prev(prev);
104 1862188 : if (!test)
105 : break;
106 194625 : prev_entry = rb_entry(test, struct btrfs_ordered_extent,
107 : rb_node);
108 194625 : prev = test;
109 : }
110 99676889 : *prev_ret = prev;
111 99676889 : return NULL;
112 : }
113 :
114 : static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset,
115 : u64 len)
116 : {
117 9815005 : if (file_offset + len <= entry->file_offset ||
118 3315029 : entry->file_offset + entry->num_bytes <= file_offset)
119 2121299 : return 0;
120 : return 1;
121 : }
122 :
123 : /*
124 : * look find the first ordered struct that has this offset, otherwise
125 : * the first one less than this offset
126 : */
127 103102269 : static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
128 : u64 file_offset)
129 : {
130 103102269 : struct rb_root *root = &tree->tree;
131 103102269 : struct rb_node *prev = NULL;
132 103102269 : struct rb_node *ret;
133 103102269 : struct btrfs_ordered_extent *entry;
134 :
135 103102269 : if (tree->last) {
136 4857445 : entry = rb_entry(tree->last, struct btrfs_ordered_extent,
137 : rb_node);
138 4857445 : if (in_range(file_offset, entry->file_offset, entry->num_bytes))
139 : return tree->last;
140 : }
141 102169523 : ret = __tree_search(root, file_offset, &prev);
142 102115640 : if (!ret)
143 99696008 : ret = prev;
144 102115640 : if (ret)
145 5917580 : tree->last = ret;
146 : return ret;
147 : }
148 :
149 3410460 : static struct btrfs_ordered_extent *alloc_ordered_extent(
150 : struct btrfs_inode *inode, u64 file_offset, u64 num_bytes,
151 : u64 ram_bytes, u64 disk_bytenr, u64 disk_num_bytes,
152 : u64 offset, unsigned long flags, int compress_type)
153 : {
154 3410460 : struct btrfs_ordered_extent *entry;
155 3410460 : int ret;
156 :
157 3410460 : if (flags &
158 : ((1 << BTRFS_ORDERED_NOCOW) | (1 << BTRFS_ORDERED_PREALLOC))) {
159 : /* For nocow write, we can release the qgroup rsv right now */
160 285741 : ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes);
161 285741 : if (ret < 0)
162 0 : return ERR_PTR(ret);
163 : } else {
164 : /*
165 : * The ordered extent has reserved qgroup space, release now
166 : * and pass the reserved number for qgroup_record to free.
167 : */
168 3124719 : ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes);
169 3124493 : if (ret < 0)
170 0 : return ERR_PTR(ret);
171 : }
172 3410234 : entry = kmem_cache_zalloc(btrfs_ordered_extent_cache, GFP_NOFS);
173 3410611 : if (!entry)
174 : return ERR_PTR(-ENOMEM);
175 :
176 3410611 : entry->file_offset = file_offset;
177 3410611 : entry->num_bytes = num_bytes;
178 3410611 : entry->ram_bytes = ram_bytes;
179 3410611 : entry->disk_bytenr = disk_bytenr;
180 3410611 : entry->disk_num_bytes = disk_num_bytes;
181 3410611 : entry->offset = offset;
182 3410611 : entry->bytes_left = num_bytes;
183 3410611 : entry->inode = igrab(&inode->vfs_inode);
184 3410674 : entry->compress_type = compress_type;
185 3410674 : entry->truncated_len = (u64)-1;
186 3410674 : entry->qgroup_rsv = ret;
187 3410674 : entry->flags = flags;
188 3410674 : refcount_set(&entry->refs, 1);
189 3410674 : init_waitqueue_head(&entry->wait);
190 3410594 : INIT_LIST_HEAD(&entry->list);
191 3410594 : INIT_LIST_HEAD(&entry->log_list);
192 3410594 : INIT_LIST_HEAD(&entry->root_extent_list);
193 3410594 : INIT_LIST_HEAD(&entry->work_list);
194 3410594 : init_completion(&entry->completion);
195 :
196 : /*
197 : * We don't need the count_max_extents here, we can assume that all of
198 : * that work has been done at higher layers, so this is truly the
199 : * smallest the extent is going to get.
200 : */
201 3410489 : spin_lock(&inode->lock);
202 3410540 : btrfs_mod_outstanding_extents(inode, 1);
203 3410156 : spin_unlock(&inode->lock);
204 :
205 3410156 : return entry;
206 : }
207 :
208 3410386 : static void insert_ordered_extent(struct btrfs_ordered_extent *entry)
209 : {
210 3410386 : struct btrfs_inode *inode = BTRFS_I(entry->inode);
211 3410386 : struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
212 3410386 : struct btrfs_root *root = inode->root;
213 3410386 : struct btrfs_fs_info *fs_info = root->fs_info;
214 3410386 : struct rb_node *node;
215 :
216 3410386 : trace_btrfs_ordered_extent_add(inode, entry);
217 :
218 3410265 : percpu_counter_add_batch(&fs_info->ordered_bytes, entry->num_bytes,
219 : fs_info->delalloc_batch);
220 :
221 : /* One ref for the tree. */
222 3410232 : refcount_inc(&entry->refs);
223 :
224 3410353 : spin_lock_irq(&tree->lock);
225 3410390 : node = tree_insert(&tree->tree, entry->file_offset, &entry->rb_node);
226 3410226 : if (node)
227 0 : btrfs_panic(fs_info, -EEXIST,
228 : "inconsistency in ordered tree at offset %llu",
229 : entry->file_offset);
230 3410226 : spin_unlock_irq(&tree->lock);
231 :
232 3410366 : spin_lock(&root->ordered_extent_lock);
233 3410505 : list_add_tail(&entry->root_extent_list,
234 : &root->ordered_extents);
235 3410506 : root->nr_ordered_extents++;
236 3410506 : if (root->nr_ordered_extents == 1) {
237 1235060 : spin_lock(&fs_info->ordered_root_lock);
238 1235090 : BUG_ON(!list_empty(&root->ordered_root));
239 1235090 : list_add_tail(&root->ordered_root, &fs_info->ordered_roots);
240 1235090 : spin_unlock(&fs_info->ordered_root_lock);
241 : }
242 3410536 : spin_unlock(&root->ordered_extent_lock);
243 3410537 : }
244 :
245 : /*
246 : * Add an ordered extent to the per-inode tree.
247 : *
248 : * @inode: Inode that this extent is for.
249 : * @file_offset: Logical offset in file where the extent starts.
250 : * @num_bytes: Logical length of extent in file.
251 : * @ram_bytes: Full length of unencoded data.
252 : * @disk_bytenr: Offset of extent on disk.
253 : * @disk_num_bytes: Size of extent on disk.
254 : * @offset: Offset into unencoded data where file data starts.
255 : * @flags: Flags specifying type of extent (1 << BTRFS_ORDERED_*).
256 : * @compress_type: Compression algorithm used for data.
257 : *
258 : * Most of these parameters correspond to &struct btrfs_file_extent_item. The
259 : * tree is given a single reference on the ordered extent that was inserted, and
260 : * the returned pointer is given a second reference.
261 : *
262 : * Return: the new ordered extent or error pointer.
263 : */
264 3410305 : struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
265 : struct btrfs_inode *inode, u64 file_offset,
266 : u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
267 : u64 disk_num_bytes, u64 offset, unsigned long flags,
268 : int compress_type)
269 : {
270 3410305 : struct btrfs_ordered_extent *entry;
271 :
272 3410305 : ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0);
273 :
274 3410305 : entry = alloc_ordered_extent(inode, file_offset, num_bytes, ram_bytes,
275 : disk_bytenr, disk_num_bytes, offset, flags,
276 : compress_type);
277 3410420 : if (!IS_ERR(entry))
278 3410420 : insert_ordered_extent(entry);
279 3410536 : return entry;
280 : }
281 :
282 : /*
283 : * Add a struct btrfs_ordered_sum into the list of checksums to be inserted
284 : * when an ordered extent is finished. If the list covers more than one
285 : * ordered extent, it is split across multiples.
286 : */
287 3467844 : void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
288 : struct btrfs_ordered_sum *sum)
289 : {
290 3467844 : struct btrfs_ordered_inode_tree *tree;
291 :
292 3467844 : tree = &BTRFS_I(entry->inode)->ordered_tree;
293 3467844 : spin_lock_irq(&tree->lock);
294 3468138 : list_add_tail(&sum->list, &entry->list);
295 3467936 : spin_unlock_irq(&tree->lock);
296 3468120 : }
297 :
298 3410583 : static void finish_ordered_fn(struct btrfs_work *work)
299 : {
300 3410583 : struct btrfs_ordered_extent *ordered_extent;
301 :
302 3410583 : ordered_extent = container_of(work, struct btrfs_ordered_extent, work);
303 3410583 : btrfs_finish_ordered_io(ordered_extent);
304 3410282 : }
305 :
306 51499581 : static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
307 : struct page *page, u64 file_offset,
308 : u64 len, bool uptodate)
309 : {
310 51499581 : struct btrfs_inode *inode = BTRFS_I(ordered->inode);
311 51499581 : struct btrfs_fs_info *fs_info = inode->root->fs_info;
312 :
313 51499581 : lockdep_assert_held(&inode->ordered_tree.lock);
314 :
315 51499581 : if (page) {
316 50491835 : ASSERT(page->mapping);
317 50491835 : ASSERT(page_offset(page) <= file_offset);
318 50491835 : ASSERT(file_offset + len <= page_offset(page) + PAGE_SIZE);
319 :
320 : /*
321 : * Ordered (Private2) bit indicates whether we still have
322 : * pending io unfinished for the ordered extent.
323 : *
324 : * If there's no such bit, we need to skip to next range.
325 : */
326 50491835 : if (!btrfs_page_test_ordered(fs_info, page, file_offset, len))
327 : return false;
328 50491835 : btrfs_page_clear_ordered(fs_info, page, file_offset, len);
329 : }
330 :
331 : /* Now we're fine to update the accounting. */
332 51499581 : if (WARN_ON_ONCE(len > ordered->bytes_left)) {
333 0 : btrfs_crit(fs_info,
334 : "bad ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu to_dec=%llu left=%llu",
335 : inode->root->root_key.objectid, btrfs_ino(inode),
336 : ordered->file_offset, ordered->num_bytes,
337 : len, ordered->bytes_left);
338 0 : ordered->bytes_left = 0;
339 : } else {
340 51499581 : ordered->bytes_left -= len;
341 : }
342 :
343 51499581 : if (!uptodate)
344 119306 : set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
345 :
346 51499581 : if (ordered->bytes_left)
347 : return false;
348 :
349 : /*
350 : * All the IO of the ordered extent is finished, we need to queue
351 : * the finish_func to be executed.
352 : */
353 3410603 : set_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags);
354 3410603 : cond_wake_up(&ordered->wait);
355 3410603 : refcount_inc(&ordered->refs);
356 3410603 : trace_btrfs_ordered_extent_mark_finished(inode, ordered);
357 3410603 : return true;
358 : }
359 :
360 3410603 : static void btrfs_queue_ordered_fn(struct btrfs_ordered_extent *ordered)
361 : {
362 3410603 : struct btrfs_inode *inode = BTRFS_I(ordered->inode);
363 3410603 : struct btrfs_fs_info *fs_info = inode->root->fs_info;
364 3410603 : struct btrfs_workqueue *wq = btrfs_is_free_space_inode(inode) ?
365 3410603 : fs_info->endio_freespace_worker : fs_info->endio_write_workers;
366 :
367 3410603 : btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL);
368 3410602 : btrfs_queue_work(wq, &ordered->work);
369 3410603 : }
370 :
371 51499580 : bool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
372 : struct page *page, u64 file_offset, u64 len,
373 : bool uptodate)
374 : {
375 51499580 : struct btrfs_inode *inode = BTRFS_I(ordered->inode);
376 51499580 : unsigned long flags;
377 51499580 : bool ret;
378 :
379 51499580 : trace_btrfs_finish_ordered_extent(inode, file_offset, len, uptodate);
380 :
381 51499580 : spin_lock_irqsave(&inode->ordered_tree.lock, flags);
382 51499581 : ret = can_finish_ordered_extent(ordered, page, file_offset, len, uptodate);
383 51499581 : spin_unlock_irqrestore(&inode->ordered_tree.lock, flags);
384 :
385 51499581 : if (ret)
386 3410603 : btrfs_queue_ordered_fn(ordered);
387 51499581 : return ret;
388 : }
389 :
390 : /*
391 : * Mark all ordered extents io inside the specified range finished.
392 : *
393 : * @page: The involved page for the operation.
394 : * For uncompressed buffered IO, the page status also needs to be
395 : * updated to indicate whether the pending ordered io is finished.
396 : * Can be NULL for direct IO and compressed write.
397 : * For these cases, callers are ensured they won't execute the
398 : * endio function twice.
399 : *
400 : * This function is called for endio, thus the range must have ordered
401 : * extent(s) covering it.
402 : */
403 0 : void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
404 : struct page *page, u64 file_offset,
405 : u64 num_bytes, bool uptodate)
406 : {
407 0 : struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
408 0 : struct rb_node *node;
409 0 : struct btrfs_ordered_extent *entry = NULL;
410 0 : unsigned long flags;
411 0 : u64 cur = file_offset;
412 :
413 0 : spin_lock_irqsave(&tree->lock, flags);
414 0 : while (cur < file_offset + num_bytes) {
415 0 : u64 entry_end;
416 0 : u64 end;
417 0 : u32 len;
418 :
419 0 : node = tree_search(tree, cur);
420 : /* No ordered extents at all */
421 0 : if (!node)
422 : break;
423 :
424 0 : entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
425 0 : entry_end = entry->file_offset + entry->num_bytes;
426 : /*
427 : * |<-- OE --->| |
428 : * cur
429 : * Go to next OE.
430 : */
431 0 : if (cur >= entry_end) {
432 0 : node = rb_next(node);
433 : /* No more ordered extents, exit */
434 0 : if (!node)
435 : break;
436 0 : entry = rb_entry(node, struct btrfs_ordered_extent,
437 : rb_node);
438 :
439 : /* Go to next ordered extent and continue */
440 0 : cur = entry->file_offset;
441 0 : continue;
442 : }
443 : /*
444 : * | |<--- OE --->|
445 : * cur
446 : * Go to the start of OE.
447 : */
448 0 : if (cur < entry->file_offset) {
449 0 : cur = entry->file_offset;
450 0 : continue;
451 : }
452 :
453 : /*
454 : * Now we are definitely inside one ordered extent.
455 : *
456 : * |<--- OE --->|
457 : * |
458 : * cur
459 : */
460 0 : end = min(entry->file_offset + entry->num_bytes,
461 : file_offset + num_bytes) - 1;
462 0 : ASSERT(end + 1 - cur < U32_MAX);
463 0 : len = end + 1 - cur;
464 :
465 0 : if (can_finish_ordered_extent(entry, page, cur, len, uptodate)) {
466 0 : spin_unlock_irqrestore(&tree->lock, flags);
467 0 : btrfs_queue_ordered_fn(entry);
468 0 : spin_lock_irqsave(&tree->lock, flags);
469 : }
470 0 : cur += len;
471 : }
472 0 : spin_unlock_irqrestore(&tree->lock, flags);
473 0 : }
474 :
475 : /*
476 : * Finish IO for one ordered extent across a given range. The range can only
477 : * contain one ordered extent.
478 : *
479 : * @cached: The cached ordered extent. If not NULL, we can skip the tree
480 : * search and use the ordered extent directly.
481 : * Will be also used to store the finished ordered extent.
482 : * @file_offset: File offset for the finished IO
483 : * @io_size: Length of the finish IO range
484 : *
485 : * Return true if the ordered extent is finished in the range, and update
486 : * @cached.
487 : * Return false otherwise.
488 : *
489 : * NOTE: The range can NOT cross multiple ordered extents.
490 : * Thus caller should ensure the range doesn't cross ordered extents.
491 : */
492 11150 : bool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode,
493 : struct btrfs_ordered_extent **cached,
494 : u64 file_offset, u64 io_size)
495 : {
496 11150 : struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
497 11150 : struct rb_node *node;
498 11150 : struct btrfs_ordered_extent *entry = NULL;
499 11150 : unsigned long flags;
500 11150 : bool finished = false;
501 :
502 11150 : spin_lock_irqsave(&tree->lock, flags);
503 11150 : if (cached && *cached) {
504 11150 : entry = *cached;
505 11150 : goto have_entry;
506 : }
507 :
508 0 : node = tree_search(tree, file_offset);
509 0 : if (!node)
510 0 : goto out;
511 :
512 0 : entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
513 11150 : have_entry:
514 11150 : if (!in_range(file_offset, entry->file_offset, entry->num_bytes))
515 0 : goto out;
516 :
517 11150 : if (io_size > entry->bytes_left)
518 0 : btrfs_crit(inode->root->fs_info,
519 : "bad ordered accounting left %llu size %llu",
520 : entry->bytes_left, io_size);
521 :
522 11150 : entry->bytes_left -= io_size;
523 :
524 11150 : if (entry->bytes_left == 0) {
525 : /*
526 : * Ensure only one caller can set the flag and finished_ret
527 : * accordingly
528 : */
529 209 : finished = !test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
530 : /* test_and_set_bit implies a barrier */
531 209 : cond_wake_up_nomb(&entry->wait);
532 : }
533 10941 : out:
534 11150 : if (finished && cached && entry) {
535 209 : *cached = entry;
536 209 : refcount_inc(&entry->refs);
537 209 : trace_btrfs_ordered_extent_dec_test_pending(inode, entry);
538 : }
539 11150 : spin_unlock_irqrestore(&tree->lock, flags);
540 11150 : return finished;
541 : }
542 :
543 : /*
544 : * used to drop a reference on an ordered extent. This will free
545 : * the extent if the last reference is dropped
546 : */
547 16438603 : void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
548 : {
549 16438603 : struct list_head *cur;
550 16438603 : struct btrfs_ordered_sum *sum;
551 :
552 16438603 : trace_btrfs_ordered_extent_put(BTRFS_I(entry->inode), entry);
553 :
554 16437793 : if (refcount_dec_and_test(&entry->refs)) {
555 3410677 : ASSERT(list_empty(&entry->root_extent_list));
556 3410677 : ASSERT(list_empty(&entry->log_list));
557 3410677 : ASSERT(RB_EMPTY_NODE(&entry->rb_node));
558 3410677 : if (entry->inode)
559 3410677 : btrfs_add_delayed_iput(BTRFS_I(entry->inode));
560 6878605 : while (!list_empty(&entry->list)) {
561 3468384 : cur = entry->list.next;
562 3468384 : sum = list_entry(cur, struct btrfs_ordered_sum, list);
563 3468384 : list_del(&sum->list);
564 3468233 : kvfree(sum);
565 : }
566 3410221 : kmem_cache_free(btrfs_ordered_extent_cache, entry);
567 : }
568 16438899 : }
569 :
570 : /*
571 : * remove an ordered extent from the tree. No references are dropped
572 : * and waiters are woken up.
573 : */
574 3410271 : void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
575 : struct btrfs_ordered_extent *entry)
576 : {
577 3410271 : struct btrfs_ordered_inode_tree *tree;
578 3410271 : struct btrfs_root *root = btrfs_inode->root;
579 3410271 : struct btrfs_fs_info *fs_info = root->fs_info;
580 3410271 : struct rb_node *node;
581 3410271 : bool pending;
582 3410271 : bool freespace_inode;
583 :
584 : /*
585 : * If this is a free space inode the thread has not acquired the ordered
586 : * extents lockdep map.
587 : */
588 3410271 : freespace_inode = btrfs_is_free_space_inode(btrfs_inode);
589 :
590 3410271 : btrfs_lockdep_acquire(fs_info, btrfs_trans_pending_ordered);
591 : /* This is paired with btrfs_alloc_ordered_extent. */
592 3410271 : spin_lock(&btrfs_inode->lock);
593 3410616 : btrfs_mod_outstanding_extents(btrfs_inode, -1);
594 3410256 : spin_unlock(&btrfs_inode->lock);
595 3410554 : if (root != fs_info->tree_root) {
596 3410513 : u64 release;
597 :
598 6821026 : if (test_bit(BTRFS_ORDERED_ENCODED, &entry->flags))
599 0 : release = entry->disk_num_bytes;
600 : else
601 3410513 : release = entry->num_bytes;
602 3410513 : btrfs_delalloc_release_metadata(btrfs_inode, release, false);
603 : }
604 :
605 3410075 : percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes,
606 : fs_info->delalloc_batch);
607 :
608 3410149 : tree = &btrfs_inode->ordered_tree;
609 3410149 : spin_lock_irq(&tree->lock);
610 3410499 : node = &entry->rb_node;
611 3410499 : rb_erase(node, &tree->tree);
612 3410369 : RB_CLEAR_NODE(node);
613 3410369 : if (tree->last == node)
614 1992871 : tree->last = NULL;
615 3410369 : set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
616 3410577 : pending = test_and_clear_bit(BTRFS_ORDERED_PENDING, &entry->flags);
617 3410494 : spin_unlock_irq(&tree->lock);
618 :
619 : /*
620 : * The current running transaction is waiting on us, we need to let it
621 : * know that we're complete and wake it up.
622 : */
623 3410550 : if (pending) {
624 8452 : struct btrfs_transaction *trans;
625 :
626 : /*
627 : * The checks for trans are just a formality, it should be set,
628 : * but if it isn't we don't want to deref/assert under the spin
629 : * lock, so be nice and check if trans is set, but ASSERT() so
630 : * if it isn't set a developer will notice.
631 : */
632 8452 : spin_lock(&fs_info->trans_lock);
633 8452 : trans = fs_info->running_transaction;
634 8452 : if (trans)
635 8452 : refcount_inc(&trans->use_count);
636 8452 : spin_unlock(&fs_info->trans_lock);
637 :
638 8452 : ASSERT(trans);
639 8452 : if (trans) {
640 8452 : if (atomic_dec_and_test(&trans->pending_ordered))
641 1293 : wake_up(&trans->pending_wait);
642 8452 : btrfs_put_transaction(trans);
643 : }
644 : }
645 :
646 3410550 : btrfs_lockdep_release(fs_info, btrfs_trans_pending_ordered);
647 :
648 3410550 : spin_lock(&root->ordered_extent_lock);
649 3410811 : list_del_init(&entry->root_extent_list);
650 3410809 : root->nr_ordered_extents--;
651 :
652 3410809 : trace_btrfs_ordered_extent_remove(btrfs_inode, entry);
653 :
654 3410808 : if (!root->nr_ordered_extents) {
655 1235090 : spin_lock(&fs_info->ordered_root_lock);
656 1235090 : BUG_ON(list_empty(&root->ordered_root));
657 1235090 : list_del_init(&root->ordered_root);
658 1235090 : spin_unlock(&fs_info->ordered_root_lock);
659 : }
660 3410808 : spin_unlock(&root->ordered_extent_lock);
661 3410788 : wake_up(&entry->wait);
662 3410713 : if (!freespace_inode)
663 3410713 : btrfs_lockdep_release(fs_info, btrfs_ordered_extent);
664 3410713 : }
665 :
666 86747 : static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
667 : {
668 86747 : struct btrfs_ordered_extent *ordered;
669 :
670 86747 : ordered = container_of(work, struct btrfs_ordered_extent, flush_work);
671 86747 : btrfs_start_ordered_extent(ordered);
672 86737 : complete(&ordered->completion);
673 86728 : }
674 :
675 : /*
676 : * wait for all the ordered extents in a root. This is done when balancing
677 : * space between drives.
678 : */
679 20601 : u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
680 : const u64 range_start, const u64 range_len)
681 : {
682 20601 : struct btrfs_fs_info *fs_info = root->fs_info;
683 20601 : LIST_HEAD(splice);
684 20601 : LIST_HEAD(skipped);
685 20601 : LIST_HEAD(works);
686 20601 : struct btrfs_ordered_extent *ordered, *next;
687 20601 : u64 count = 0;
688 20601 : const u64 range_end = range_start + range_len;
689 :
690 20601 : mutex_lock(&root->ordered_extent_mutex);
691 20601 : spin_lock(&root->ordered_extent_lock);
692 20602 : list_splice_init(&root->ordered_extents, &splice);
693 107457 : while (!list_empty(&splice) && nr) {
694 86855 : ordered = list_first_entry(&splice, struct btrfs_ordered_extent,
695 : root_extent_list);
696 :
697 86855 : if (range_end <= ordered->disk_bytenr ||
698 86800 : ordered->disk_bytenr + ordered->disk_num_bytes <= range_start) {
699 109 : list_move_tail(&ordered->root_extent_list, &skipped);
700 108 : cond_resched_lock(&root->ordered_extent_lock);
701 108 : continue;
702 : }
703 :
704 86746 : list_move_tail(&ordered->root_extent_list,
705 : &root->ordered_extents);
706 86746 : refcount_inc(&ordered->refs);
707 86747 : spin_unlock(&root->ordered_extent_lock);
708 :
709 86747 : btrfs_init_work(&ordered->flush_work,
710 : btrfs_run_ordered_extent_work, NULL, NULL);
711 86746 : list_add_tail(&ordered->work_list, &works);
712 86746 : btrfs_queue_work(fs_info->flush_workers, &ordered->flush_work);
713 :
714 86747 : cond_resched();
715 86747 : spin_lock(&root->ordered_extent_lock);
716 86747 : if (nr != U64_MAX)
717 23157 : nr--;
718 86747 : count++;
719 : }
720 20602 : list_splice_tail(&skipped, &root->ordered_extents);
721 20602 : list_splice_tail(&splice, &root->ordered_extents);
722 20602 : spin_unlock(&root->ordered_extent_lock);
723 :
724 107349 : list_for_each_entry_safe(ordered, next, &works, work_list) {
725 86747 : list_del_init(&ordered->work_list);
726 86747 : wait_for_completion(&ordered->completion);
727 86747 : btrfs_put_ordered_extent(ordered);
728 86747 : cond_resched();
729 : }
730 20602 : mutex_unlock(&root->ordered_extent_mutex);
731 :
732 20602 : return count;
733 : }
734 :
735 38258 : void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
736 : const u64 range_start, const u64 range_len)
737 : {
738 38258 : struct btrfs_root *root;
739 38258 : struct list_head splice;
740 38258 : u64 done;
741 :
742 38258 : INIT_LIST_HEAD(&splice);
743 :
744 38258 : mutex_lock(&fs_info->ordered_operations_mutex);
745 38258 : spin_lock(&fs_info->ordered_root_lock);
746 38257 : list_splice_init(&fs_info->ordered_roots, &splice);
747 52166 : while (!list_empty(&splice) && nr) {
748 13909 : root = list_first_entry(&splice, struct btrfs_root,
749 : ordered_root);
750 13909 : root = btrfs_grab_root(root);
751 13909 : BUG_ON(!root);
752 13909 : list_move_tail(&root->ordered_root,
753 : &fs_info->ordered_roots);
754 13909 : spin_unlock(&fs_info->ordered_root_lock);
755 :
756 13909 : done = btrfs_wait_ordered_extents(root, nr,
757 : range_start, range_len);
758 13909 : btrfs_put_root(root);
759 :
760 13909 : spin_lock(&fs_info->ordered_root_lock);
761 13909 : if (nr != U64_MAX) {
762 1236 : nr -= done;
763 : }
764 : }
765 38257 : list_splice_tail(&splice, &fs_info->ordered_roots);
766 38257 : spin_unlock(&fs_info->ordered_root_lock);
767 38259 : mutex_unlock(&fs_info->ordered_operations_mutex);
768 38258 : }
769 :
770 : /*
771 : * Start IO and wait for a given ordered extent to finish.
772 : *
773 : * Wait on page writeback for all the pages in the extent and the IO completion
774 : * code to insert metadata into the btree corresponding to the extent.
775 : */
776 303984 : void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry)
777 : {
778 303984 : u64 start = entry->file_offset;
779 303984 : u64 end = start + entry->num_bytes - 1;
780 303984 : struct btrfs_inode *inode = BTRFS_I(entry->inode);
781 303984 : bool freespace_inode;
782 :
783 303984 : trace_btrfs_ordered_extent_start(inode, entry);
784 :
785 : /*
786 : * If this is a free space inode do not take the ordered extents lockdep
787 : * map.
788 : */
789 303978 : freespace_inode = btrfs_is_free_space_inode(inode);
790 :
791 : /*
792 : * pages in the range can be dirty, clean or writeback. We
793 : * start IO on any dirty ones so the wait doesn't stall waiting
794 : * for the flusher thread to find them
795 : */
796 303978 : if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
797 273137 : filemap_fdatawrite_range(inode->vfs_inode.i_mapping, start, end);
798 :
799 303975 : if (!freespace_inode)
800 303975 : btrfs_might_wait_for_event(inode->root->fs_info, btrfs_ordered_extent);
801 1162151 : wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags));
802 303962 : }
803 :
804 : /*
805 : * Used to wait on ordered extents across a large range of bytes.
806 : */
807 8559894 : int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
808 : {
809 8559894 : int ret = 0;
810 8559894 : int ret_wb = 0;
811 8559894 : u64 end;
812 8559894 : u64 orig_end;
813 8559894 : struct btrfs_ordered_extent *ordered;
814 :
815 8559894 : if (start + len < start) {
816 : orig_end = OFFSET_MAX;
817 : } else {
818 8396466 : orig_end = start + len - 1;
819 8396466 : if (orig_end > OFFSET_MAX)
820 : orig_end = OFFSET_MAX;
821 : }
822 :
823 : /* start IO across the range first to instantiate any delalloc
824 : * extents
825 : */
826 8559894 : ret = btrfs_fdatawrite_range(inode, start, orig_end);
827 8560554 : if (ret)
828 : return ret;
829 :
830 : /*
831 : * If we have a writeback error don't return immediately. Wait first
832 : * for any ordered extents that haven't completed yet. This is to make
833 : * sure no one can dirty the same page ranges and call writepages()
834 : * before the ordered extents complete - to avoid failures (-EEXIST)
835 : * when adding the new ordered extents to the ordered tree.
836 : */
837 8560567 : ret_wb = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
838 :
839 8560567 : end = orig_end;
840 8608891 : while (1) {
841 8584359 : ordered = btrfs_lookup_first_ordered_extent(BTRFS_I(inode), end);
842 8584874 : if (!ordered)
843 : break;
844 1775921 : if (ordered->file_offset > orig_end) {
845 1273251 : btrfs_put_ordered_extent(ordered);
846 1273251 : break;
847 : }
848 502670 : if (ordered->file_offset + ordered->num_bytes <= start) {
849 305181 : btrfs_put_ordered_extent(ordered);
850 305181 : break;
851 : }
852 197489 : btrfs_start_ordered_extent(ordered);
853 197489 : end = ordered->file_offset;
854 : /*
855 : * If the ordered extent had an error save the error but don't
856 : * exit without waiting first for all other ordered extents in
857 : * the range to complete.
858 : */
859 394978 : if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))
860 0 : ret = -EIO;
861 197489 : btrfs_put_ordered_extent(ordered);
862 197490 : if (end == 0 || end == start)
863 : break;
864 24532 : end--;
865 : }
866 8560343 : return ret_wb ? ret_wb : ret;
867 : }
868 :
869 : /*
870 : * find an ordered extent corresponding to file_offset. return NULL if
871 : * nothing is found, otherwise take a reference on the extent and return it
872 : */
873 4418589 : struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode,
874 : u64 file_offset)
875 : {
876 4418589 : struct btrfs_ordered_inode_tree *tree;
877 4418589 : struct rb_node *node;
878 4418589 : struct btrfs_ordered_extent *entry = NULL;
879 4418589 : unsigned long flags;
880 :
881 4418589 : tree = &inode->ordered_tree;
882 4418589 : spin_lock_irqsave(&tree->lock, flags);
883 4422427 : node = tree_search(tree, file_offset);
884 4412914 : if (!node)
885 1902391 : goto out;
886 :
887 2510523 : entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
888 2510523 : if (!in_range(file_offset, entry->file_offset, entry->num_bytes))
889 : entry = NULL;
890 2462907 : if (entry) {
891 2462907 : refcount_inc(&entry->refs);
892 2462967 : trace_btrfs_ordered_extent_lookup(inode, entry);
893 : }
894 0 : out:
895 4412936 : spin_unlock_irqrestore(&tree->lock, flags);
896 4421340 : return entry;
897 : }
898 :
899 : /* Since the DIO code tries to lock a wide area we need to look for any ordered
900 : * extents that exist in the range, rather than just the start of the range.
901 : */
902 44458969 : struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
903 : struct btrfs_inode *inode, u64 file_offset, u64 len)
904 : {
905 44458969 : struct btrfs_ordered_inode_tree *tree;
906 44458969 : struct rb_node *node;
907 44458969 : struct btrfs_ordered_extent *entry = NULL;
908 :
909 44458969 : tree = &inode->ordered_tree;
910 44458969 : spin_lock_irq(&tree->lock);
911 44509699 : node = tree_search(tree, file_offset);
912 44449677 : if (!node) {
913 41856521 : node = tree_search(tree, file_offset + len);
914 41844694 : if (!node)
915 41874201 : goto out;
916 : }
917 :
918 2827655 : while (1) {
919 2827655 : entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
920 2827655 : if (range_overlaps(entry, file_offset, len))
921 : break;
922 :
923 2121299 : if (entry->file_offset >= file_offset + len) {
924 : entry = NULL;
925 : break;
926 : }
927 1484875 : entry = NULL;
928 1484875 : node = rb_next(node);
929 1485153 : if (!node)
930 : break;
931 : }
932 1221147 : out:
933 44438128 : if (entry) {
934 706393 : refcount_inc(&entry->refs);
935 706393 : trace_btrfs_ordered_extent_lookup_range(inode, entry);
936 : }
937 44438128 : spin_unlock_irq(&tree->lock);
938 44510828 : return entry;
939 : }
940 :
941 : /*
942 : * Adds all ordered extents to the given list. The list ends up sorted by the
943 : * file_offset of the ordered extents.
944 : */
945 145424 : void btrfs_get_ordered_extents_for_logging(struct btrfs_inode *inode,
946 : struct list_head *list)
947 : {
948 145424 : struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
949 145424 : struct rb_node *n;
950 :
951 145424 : ASSERT(inode_is_locked(&inode->vfs_inode));
952 :
953 145424 : spin_lock_irq(&tree->lock);
954 433826 : for (n = rb_first(&tree->tree); n; n = rb_next(n)) {
955 288393 : struct btrfs_ordered_extent *ordered;
956 :
957 288393 : ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node);
958 :
959 576786 : if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
960 79468 : continue;
961 :
962 208925 : ASSERT(list_empty(&ordered->log_list));
963 208925 : list_add_tail(&ordered->log_list, list);
964 208918 : refcount_inc(&ordered->refs);
965 208932 : trace_btrfs_ordered_extent_lookup_for_logging(inode, ordered);
966 : }
967 145432 : spin_unlock_irq(&tree->lock);
968 145429 : }
969 :
970 : /*
971 : * lookup and return any extent before 'file_offset'. NULL is returned
972 : * if none is found
973 : */
974 : struct btrfs_ordered_extent *
975 12436365 : btrfs_lookup_first_ordered_extent(struct btrfs_inode *inode, u64 file_offset)
976 : {
977 12436365 : struct btrfs_ordered_inode_tree *tree;
978 12436365 : struct rb_node *node;
979 12436365 : struct btrfs_ordered_extent *entry = NULL;
980 :
981 12436365 : tree = &inode->ordered_tree;
982 12436365 : spin_lock_irq(&tree->lock);
983 12436978 : node = tree_search(tree, file_offset);
984 12436094 : if (!node)
985 10660173 : goto out;
986 :
987 1775921 : entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
988 1775921 : refcount_inc(&entry->refs);
989 1775922 : trace_btrfs_ordered_extent_lookup_first(inode, entry);
990 12436095 : out:
991 12436095 : spin_unlock_irq(&tree->lock);
992 12436479 : return entry;
993 : }
994 :
995 : /*
996 : * Lookup the first ordered extent that overlaps the range
997 : * [@file_offset, @file_offset + @len).
998 : *
999 : * The difference between this and btrfs_lookup_first_ordered_extent() is
1000 : * that this one won't return any ordered extent that does not overlap the range.
1001 : * And the difference against btrfs_lookup_ordered_extent() is, this function
1002 : * ensures the first ordered extent gets returned.
1003 : */
1004 85089554 : struct btrfs_ordered_extent *btrfs_lookup_first_ordered_range(
1005 : struct btrfs_inode *inode, u64 file_offset, u64 len)
1006 : {
1007 85089554 : struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
1008 85089554 : struct rb_node *node;
1009 85089554 : struct rb_node *cur;
1010 85089554 : struct rb_node *prev;
1011 85089554 : struct rb_node *next;
1012 85089554 : struct btrfs_ordered_extent *entry = NULL;
1013 :
1014 85089554 : spin_lock_irq(&tree->lock);
1015 85091301 : node = tree->tree.rb_node;
1016 : /*
1017 : * Here we don't want to use tree_search() which will use tree->last
1018 : * and screw up the search order.
1019 : * And __tree_search() can't return the adjacent ordered extents
1020 : * either, thus here we do our own search.
1021 : */
1022 89798171 : while (node) {
1023 4825011 : entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
1024 :
1025 4825011 : if (file_offset < entry->file_offset) {
1026 3158148 : node = node->rb_left;
1027 1666863 : } else if (file_offset >= entry_end(entry)) {
1028 1548722 : node = node->rb_right;
1029 : } else {
1030 : /*
1031 : * Direct hit, got an ordered extent that starts at
1032 : * @file_offset
1033 : */
1034 118141 : goto out;
1035 : }
1036 : }
1037 84973160 : if (!entry) {
1038 : /* Empty tree */
1039 81509475 : goto out;
1040 : }
1041 :
1042 3463685 : cur = &entry->rb_node;
1043 : /* We got an entry around @file_offset, check adjacent entries */
1044 3463685 : if (entry->file_offset < file_offset) {
1045 982348 : prev = cur;
1046 982348 : next = rb_next(cur);
1047 : } else {
1048 2481337 : prev = rb_prev(cur);
1049 2481337 : next = cur;
1050 : }
1051 3463685 : if (prev) {
1052 1123699 : entry = rb_entry(prev, struct btrfs_ordered_extent, rb_node);
1053 1123699 : if (range_overlaps(entry, file_offset, len))
1054 0 : goto out;
1055 : }
1056 3463685 : if (next) {
1057 2548622 : entry = rb_entry(next, struct btrfs_ordered_extent, rb_node);
1058 2548622 : if (range_overlaps(entry, file_offset, len))
1059 43 : goto out;
1060 : }
1061 : /* No ordered extent in the range */
1062 : entry = NULL;
1063 81627659 : out:
1064 81627659 : if (entry) {
1065 118551 : refcount_inc(&entry->refs);
1066 118183 : trace_btrfs_ordered_extent_lookup_first_range(inode, entry);
1067 : }
1068 :
1069 85090933 : spin_unlock_irq(&tree->lock);
1070 85090484 : return entry;
1071 : }
1072 :
1073 : /*
1074 : * Lock the passed range and ensures all pending ordered extents in it are run
1075 : * to completion.
1076 : *
1077 : * @inode: Inode whose ordered tree is to be searched
1078 : * @start: Beginning of range to flush
1079 : * @end: Last byte of range to lock
1080 : * @cached_state: If passed, will return the extent state responsible for the
1081 : * locked range. It's the caller's responsibility to free the
1082 : * cached state.
1083 : *
1084 : * Always return with the given range locked, ensuring after it's called no
1085 : * order extent can be pending.
1086 : */
1087 19003358 : void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
1088 : u64 end,
1089 : struct extent_state **cached_state)
1090 : {
1091 19003358 : struct btrfs_ordered_extent *ordered;
1092 19003358 : struct extent_state *cache = NULL;
1093 19003358 : struct extent_state **cachedp = &cache;
1094 :
1095 19003358 : if (cached_state)
1096 1651485 : cachedp = cached_state;
1097 :
1098 19025814 : while (1) {
1099 19014586 : lock_extent(&inode->io_tree, start, end, cachedp);
1100 19008535 : ordered = btrfs_lookup_ordered_range(inode, start,
1101 19008535 : end - start + 1);
1102 19010040 : if (!ordered) {
1103 : /*
1104 : * If no external cached_state has been passed then
1105 : * decrement the extra ref taken for cachedp since we
1106 : * aren't exposing it outside of this function
1107 : */
1108 18998812 : if (!cached_state)
1109 17344991 : refcount_dec(&cache->refs);
1110 19006981 : break;
1111 : }
1112 11228 : unlock_extent(&inode->io_tree, start, end, cachedp);
1113 11228 : btrfs_start_ordered_extent(ordered);
1114 11228 : btrfs_put_ordered_extent(ordered);
1115 : }
1116 19006981 : }
1117 :
1118 : /*
1119 : * Lock the passed range and ensure all pending ordered extents in it are run
1120 : * to completion in nowait mode.
1121 : *
1122 : * Return true if btrfs_lock_ordered_range does not return any extents,
1123 : * otherwise false.
1124 : */
1125 0 : bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end,
1126 : struct extent_state **cached_state)
1127 : {
1128 0 : struct btrfs_ordered_extent *ordered;
1129 :
1130 0 : if (!try_lock_extent(&inode->io_tree, start, end, cached_state))
1131 : return false;
1132 :
1133 0 : ordered = btrfs_lookup_ordered_range(inode, start, end - start + 1);
1134 0 : if (!ordered)
1135 : return true;
1136 :
1137 0 : btrfs_put_ordered_extent(ordered);
1138 0 : unlock_extent(&inode->io_tree, start, end, cached_state);
1139 :
1140 0 : return false;
1141 : }
1142 :
1143 : /* Split out a new ordered extent for this first @len bytes of @ordered. */
1144 267 : struct btrfs_ordered_extent *btrfs_split_ordered_extent(
1145 : struct btrfs_ordered_extent *ordered, u64 len)
1146 : {
1147 267 : struct btrfs_inode *inode = BTRFS_I(ordered->inode);
1148 267 : struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
1149 267 : struct btrfs_root *root = inode->root;
1150 267 : struct btrfs_fs_info *fs_info = root->fs_info;
1151 267 : u64 file_offset = ordered->file_offset;
1152 267 : u64 disk_bytenr = ordered->disk_bytenr;
1153 267 : unsigned long flags = ordered->flags;
1154 267 : struct btrfs_ordered_sum *sum, *tmpsum;
1155 267 : struct btrfs_ordered_extent *new;
1156 267 : struct rb_node *node;
1157 267 : u64 offset = 0;
1158 :
1159 267 : trace_btrfs_ordered_extent_split(inode, ordered);
1160 :
1161 267 : ASSERT(!(flags & (1U << BTRFS_ORDERED_COMPRESSED)));
1162 :
1163 : /*
1164 : * The entire bio must be covered by the ordered extent, but we can't
1165 : * reduce the original extent to a zero length either.
1166 : */
1167 267 : if (WARN_ON_ONCE(len >= ordered->num_bytes))
1168 : return ERR_PTR(-EINVAL);
1169 : /* We cannot split partially completed ordered extents. */
1170 267 : if (ordered->bytes_left) {
1171 267 : ASSERT(!(flags & ~BTRFS_ORDERED_TYPE_FLAGS));
1172 267 : if (WARN_ON_ONCE(ordered->bytes_left != ordered->disk_num_bytes))
1173 : return ERR_PTR(-EINVAL);
1174 : }
1175 : /* We cannot split a compressed ordered extent. */
1176 267 : if (WARN_ON_ONCE(ordered->disk_num_bytes != ordered->num_bytes))
1177 : return ERR_PTR(-EINVAL);
1178 :
1179 267 : new = alloc_ordered_extent(inode, file_offset, len, len, disk_bytenr,
1180 : len, 0, flags, ordered->compress_type);
1181 267 : if (IS_ERR(new))
1182 : return new;
1183 :
1184 : /* One ref for the tree. */
1185 267 : refcount_inc(&new->refs);
1186 :
1187 267 : spin_lock_irq(&root->ordered_extent_lock);
1188 267 : spin_lock(&tree->lock);
1189 : /* Remove from tree once */
1190 267 : node = &ordered->rb_node;
1191 267 : rb_erase(node, &tree->tree);
1192 267 : RB_CLEAR_NODE(node);
1193 267 : if (tree->last == node)
1194 0 : tree->last = NULL;
1195 :
1196 267 : ordered->file_offset += len;
1197 267 : ordered->disk_bytenr += len;
1198 267 : ordered->num_bytes -= len;
1199 267 : ordered->disk_num_bytes -= len;
1200 :
1201 534 : if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) {
1202 0 : ASSERT(ordered->bytes_left == 0);
1203 0 : new->bytes_left = 0;
1204 : } else {
1205 267 : ordered->bytes_left -= len;
1206 : }
1207 :
1208 534 : if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags)) {
1209 0 : if (ordered->truncated_len > len) {
1210 0 : ordered->truncated_len -= len;
1211 : } else {
1212 0 : new->truncated_len = ordered->truncated_len;
1213 0 : ordered->truncated_len = 0;
1214 : }
1215 : }
1216 :
1217 267 : list_for_each_entry_safe(sum, tmpsum, &ordered->list, list) {
1218 0 : if (offset == len)
1219 : break;
1220 0 : list_move_tail(&sum->list, &new->list);
1221 0 : offset += sum->len;
1222 : }
1223 :
1224 : /* Re-insert the node */
1225 267 : node = tree_insert(&tree->tree, ordered->file_offset, &ordered->rb_node);
1226 267 : if (node)
1227 0 : btrfs_panic(fs_info, -EEXIST,
1228 : "zoned: inconsistency in ordered tree at offset %llu",
1229 : ordered->file_offset);
1230 :
1231 267 : node = tree_insert(&tree->tree, new->file_offset, &new->rb_node);
1232 267 : if (node)
1233 0 : btrfs_panic(fs_info, -EEXIST,
1234 : "zoned: inconsistency in ordered tree at offset %llu",
1235 : new->file_offset);
1236 267 : spin_unlock(&tree->lock);
1237 :
1238 267 : list_add_tail(&new->root_extent_list, &root->ordered_extents);
1239 267 : root->nr_ordered_extents++;
1240 267 : spin_unlock_irq(&root->ordered_extent_lock);
1241 267 : return new;
1242 : }
1243 :
1244 11 : int __init ordered_data_init(void)
1245 : {
1246 11 : btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent",
1247 : sizeof(struct btrfs_ordered_extent), 0,
1248 : SLAB_MEM_SPREAD,
1249 : NULL);
1250 11 : if (!btrfs_ordered_extent_cache)
1251 0 : return -ENOMEM;
1252 :
1253 : return 0;
1254 : }
1255 :
1256 0 : void __cold ordered_data_exit(void)
1257 : {
1258 0 : kmem_cache_destroy(btrfs_ordered_extent_cache);
1259 0 : }
|