LCOV - code coverage report
Current view: top level - fs/btrfs - extent-tree.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsx @ Mon Jul 31 20:08:34 PDT 2023 Lines: 2292 3008 76.2 %
Date: 2023-07-31 20:08:34 Functions: 95 100 95.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (C) 2007 Oracle.  All rights reserved.
       4             :  */
       5             : 
       6             : #include <linux/sched.h>
       7             : #include <linux/sched/signal.h>
       8             : #include <linux/pagemap.h>
       9             : #include <linux/writeback.h>
      10             : #include <linux/blkdev.h>
      11             : #include <linux/sort.h>
      12             : #include <linux/rcupdate.h>
      13             : #include <linux/kthread.h>
      14             : #include <linux/slab.h>
      15             : #include <linux/ratelimit.h>
      16             : #include <linux/percpu_counter.h>
      17             : #include <linux/lockdep.h>
      18             : #include <linux/crc32c.h>
      19             : #include "ctree.h"
      20             : #include "extent-tree.h"
      21             : #include "tree-log.h"
      22             : #include "disk-io.h"
      23             : #include "print-tree.h"
      24             : #include "volumes.h"
      25             : #include "raid56.h"
      26             : #include "locking.h"
      27             : #include "free-space-cache.h"
      28             : #include "free-space-tree.h"
      29             : #include "sysfs.h"
      30             : #include "qgroup.h"
      31             : #include "ref-verify.h"
      32             : #include "space-info.h"
      33             : #include "block-rsv.h"
      34             : #include "delalloc-space.h"
      35             : #include "discard.h"
      36             : #include "rcu-string.h"
      37             : #include "zoned.h"
      38             : #include "dev-replace.h"
      39             : #include "fs.h"
      40             : #include "accessors.h"
      41             : #include "root-tree.h"
      42             : #include "file-item.h"
      43             : #include "orphan.h"
      44             : #include "tree-checker.h"
      45             : 
      46             : #undef SCRAMBLE_DELAYED_REFS
      47             : 
      48             : 
      49             : static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
      50             :                                struct btrfs_delayed_ref_node *node, u64 parent,
      51             :                                u64 root_objectid, u64 owner_objectid,
      52             :                                u64 owner_offset, int refs_to_drop,
      53             :                                struct btrfs_delayed_extent_op *extra_op);
      54             : static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
      55             :                                     struct extent_buffer *leaf,
      56             :                                     struct btrfs_extent_item *ei);
      57             : static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
      58             :                                       u64 parent, u64 root_objectid,
      59             :                                       u64 flags, u64 owner, u64 offset,
      60             :                                       struct btrfs_key *ins, int ref_mod);
      61             : static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
      62             :                                      struct btrfs_delayed_ref_node *node,
      63             :                                      struct btrfs_delayed_extent_op *extent_op);
      64             : static int find_next_key(struct btrfs_path *path, int level,
      65             :                          struct btrfs_key *key);
      66             : 
      67             : static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
      68             : {
      69    22809167 :         return (cache->flags & bits) == bits;
      70             : }
      71             : 
      72        3248 : int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
      73             :                               u64 start, u64 num_bytes)
      74             : {
      75        3248 :         u64 end = start + num_bytes - 1;
      76        3248 :         set_extent_bit(&fs_info->excluded_extents, start, end,
      77             :                        EXTENT_UPTODATE, NULL);
      78        3248 :         return 0;
      79             : }
      80             : 
      81       31306 : void btrfs_free_excluded_extents(struct btrfs_block_group *cache)
      82             : {
      83       31306 :         struct btrfs_fs_info *fs_info = cache->fs_info;
      84       31306 :         u64 start, end;
      85             : 
      86       31306 :         start = cache->start;
      87       31306 :         end = start + cache->length - 1;
      88             : 
      89       31306 :         clear_extent_bits(&fs_info->excluded_extents, start, end,
      90             :                           EXTENT_UPTODATE);
      91       31306 : }
      92             : 
      93             : /* simple helper to search for an existing data extent at a given offset */
      94       95201 : int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
      95             : {
      96       95201 :         struct btrfs_root *root = btrfs_extent_root(fs_info, start);
      97       95201 :         int ret;
      98       95201 :         struct btrfs_key key;
      99       95201 :         struct btrfs_path *path;
     100             : 
     101       95201 :         path = btrfs_alloc_path();
     102       95201 :         if (!path)
     103             :                 return -ENOMEM;
     104             : 
     105       95201 :         key.objectid = start;
     106       95201 :         key.offset = len;
     107       95201 :         key.type = BTRFS_EXTENT_ITEM_KEY;
     108       95201 :         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
     109       95201 :         btrfs_free_path(path);
     110       95201 :         return ret;
     111             : }
     112             : 
     113             : /*
     114             :  * helper function to lookup reference count and flags of a tree block.
     115             :  *
     116             :  * the head node for delayed ref is used to store the sum of all the
     117             :  * reference count modifications queued up in the rbtree. the head
     118             :  * node may also store the extent flags to set. This way you can check
     119             :  * to see what the reference count and extent flags would be if all of
     120             :  * the delayed refs are not processed.
     121             :  */
     122    10406873 : int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
     123             :                              struct btrfs_fs_info *fs_info, u64 bytenr,
     124             :                              u64 offset, int metadata, u64 *refs, u64 *flags)
     125             : {
     126    10406873 :         struct btrfs_root *extent_root;
     127    10406873 :         struct btrfs_delayed_ref_head *head;
     128    10406873 :         struct btrfs_delayed_ref_root *delayed_refs;
     129    10406873 :         struct btrfs_path *path;
     130    10406873 :         struct btrfs_extent_item *ei;
     131    10406873 :         struct extent_buffer *leaf;
     132    10406873 :         struct btrfs_key key;
     133    10406873 :         u32 item_size;
     134    10406873 :         u64 num_refs;
     135    10406873 :         u64 extent_flags;
     136    10406873 :         int ret;
     137             : 
     138             :         /*
     139             :          * If we don't have skinny metadata, don't bother doing anything
     140             :          * different
     141             :          */
     142    10406873 :         if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
     143           0 :                 offset = fs_info->nodesize;
     144           0 :                 metadata = 0;
     145             :         }
     146             : 
     147    10406873 :         path = btrfs_alloc_path();
     148    10406873 :         if (!path)
     149             :                 return -ENOMEM;
     150             : 
     151    10406873 :         if (!trans) {
     152           0 :                 path->skip_locking = 1;
     153           0 :                 path->search_commit_root = 1;
     154             :         }
     155             : 
     156    10406873 : search_again:
     157    10406939 :         key.objectid = bytenr;
     158    10406939 :         key.offset = offset;
     159    10406939 :         if (metadata)
     160    10406939 :                 key.type = BTRFS_METADATA_ITEM_KEY;
     161             :         else
     162           0 :                 key.type = BTRFS_EXTENT_ITEM_KEY;
     163             : 
     164    10406939 :         extent_root = btrfs_extent_root(fs_info, bytenr);
     165    10406939 :         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
     166    10406938 :         if (ret < 0)
     167           0 :                 goto out_free;
     168             : 
     169    10406938 :         if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
     170         721 :                 if (path->slots[0]) {
     171         721 :                         path->slots[0]--;
     172         721 :                         btrfs_item_key_to_cpu(path->nodes[0], &key,
     173             :                                               path->slots[0]);
     174         721 :                         if (key.objectid == bytenr &&
     175           0 :                             key.type == BTRFS_EXTENT_ITEM_KEY &&
     176           0 :                             key.offset == fs_info->nodesize)
     177             :                                 ret = 0;
     178             :                 }
     179             :         }
     180             : 
     181    10406938 :         if (ret == 0) {
     182    10406217 :                 leaf = path->nodes[0];
     183    10406217 :                 item_size = btrfs_item_size(leaf, path->slots[0]);
     184    10406217 :                 if (item_size >= sizeof(*ei)) {
     185    10406217 :                         ei = btrfs_item_ptr(leaf, path->slots[0],
     186             :                                             struct btrfs_extent_item);
     187    10406218 :                         num_refs = btrfs_extent_refs(leaf, ei);
     188    10406218 :                         extent_flags = btrfs_extent_flags(leaf, ei);
     189             :                 } else {
     190           0 :                         ret = -EINVAL;
     191           0 :                         btrfs_print_v0_err(fs_info);
     192           0 :                         if (trans)
     193           0 :                                 btrfs_abort_transaction(trans, ret);
     194             :                         else
     195           0 :                                 btrfs_handle_fs_error(fs_info, ret, NULL);
     196             : 
     197           0 :                         goto out_free;
     198             :                 }
     199             : 
     200    10406218 :                 BUG_ON(num_refs == 0);
     201             :         } else {
     202             :                 num_refs = 0;
     203             :                 extent_flags = 0;
     204             :                 ret = 0;
     205             :         }
     206             : 
     207    10406939 :         if (!trans)
     208           0 :                 goto out;
     209             : 
     210    10406939 :         delayed_refs = &trans->transaction->delayed_refs;
     211    10406939 :         spin_lock(&delayed_refs->lock);
     212    10406939 :         head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
     213    10406939 :         if (head) {
     214     3824061 :                 if (!mutex_trylock(&head->mutex)) {
     215          66 :                         refcount_inc(&head->refs);
     216          66 :                         spin_unlock(&delayed_refs->lock);
     217             : 
     218          66 :                         btrfs_release_path(path);
     219             : 
     220             :                         /*
     221             :                          * Mutex was contended, block until it's released and try
     222             :                          * again
     223             :                          */
     224          66 :                         mutex_lock(&head->mutex);
     225          66 :                         mutex_unlock(&head->mutex);
     226          66 :                         btrfs_put_delayed_ref_head(head);
     227          66 :                         goto search_again;
     228             :                 }
     229     3823995 :                 spin_lock(&head->lock);
     230     3823995 :                 if (head->extent_op && head->extent_op->update_flags)
     231        2426 :                         extent_flags |= head->extent_op->flags_to_set;
     232             :                 else
     233     3821569 :                         BUG_ON(num_refs == 0);
     234             : 
     235     3823995 :                 num_refs += head->ref_mod;
     236     3823995 :                 spin_unlock(&head->lock);
     237     3823995 :                 mutex_unlock(&head->mutex);
     238             :         }
     239    10406873 :         spin_unlock(&delayed_refs->lock);
     240    10406873 : out:
     241    10406873 :         WARN_ON(num_refs == 0);
     242    10406873 :         if (refs)
     243    10406873 :                 *refs = num_refs;
     244    10406873 :         if (flags)
     245    10406873 :                 *flags = extent_flags;
     246           0 : out_free:
     247    10406873 :         btrfs_free_path(path);
     248    10406873 :         return ret;
     249             : }
     250             : 
     251             : /*
     252             :  * Back reference rules.  Back refs have three main goals:
     253             :  *
     254             :  * 1) differentiate between all holders of references to an extent so that
     255             :  *    when a reference is dropped we can make sure it was a valid reference
     256             :  *    before freeing the extent.
     257             :  *
     258             :  * 2) Provide enough information to quickly find the holders of an extent
     259             :  *    if we notice a given block is corrupted or bad.
     260             :  *
     261             :  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
     262             :  *    maintenance.  This is actually the same as #2, but with a slightly
     263             :  *    different use case.
     264             :  *
     265             :  * There are two kinds of back refs. The implicit back refs is optimized
     266             :  * for pointers in non-shared tree blocks. For a given pointer in a block,
     267             :  * back refs of this kind provide information about the block's owner tree
     268             :  * and the pointer's key. These information allow us to find the block by
     269             :  * b-tree searching. The full back refs is for pointers in tree blocks not
     270             :  * referenced by their owner trees. The location of tree block is recorded
     271             :  * in the back refs. Actually the full back refs is generic, and can be
     272             :  * used in all cases the implicit back refs is used. The major shortcoming
     273             :  * of the full back refs is its overhead. Every time a tree block gets
     274             :  * COWed, we have to update back refs entry for all pointers in it.
     275             :  *
     276             :  * For a newly allocated tree block, we use implicit back refs for
     277             :  * pointers in it. This means most tree related operations only involve
     278             :  * implicit back refs. For a tree block created in old transaction, the
     279             :  * only way to drop a reference to it is COW it. So we can detect the
     280             :  * event that tree block loses its owner tree's reference and do the
     281             :  * back refs conversion.
     282             :  *
     283             :  * When a tree block is COWed through a tree, there are four cases:
     284             :  *
     285             :  * The reference count of the block is one and the tree is the block's
     286             :  * owner tree. Nothing to do in this case.
     287             :  *
     288             :  * The reference count of the block is one and the tree is not the
     289             :  * block's owner tree. In this case, full back refs is used for pointers
     290             :  * in the block. Remove these full back refs, add implicit back refs for
     291             :  * every pointers in the new block.
     292             :  *
     293             :  * The reference count of the block is greater than one and the tree is
     294             :  * the block's owner tree. In this case, implicit back refs is used for
     295             :  * pointers in the block. Add full back refs for every pointers in the
     296             :  * block, increase lower level extents' reference counts. The original
     297             :  * implicit back refs are entailed to the new block.
     298             :  *
     299             :  * The reference count of the block is greater than one and the tree is
     300             :  * not the block's owner tree. Add implicit back refs for every pointer in
     301             :  * the new block, increase lower level extents' reference count.
     302             :  *
     303             :  * Back Reference Key composing:
     304             :  *
     305             :  * The key objectid corresponds to the first byte in the extent,
     306             :  * The key type is used to differentiate between types of back refs.
     307             :  * There are different meanings of the key offset for different types
     308             :  * of back refs.
     309             :  *
     310             :  * File extents can be referenced by:
     311             :  *
     312             :  * - multiple snapshots, subvolumes, or different generations in one subvol
     313             :  * - different files inside a single subvolume
     314             :  * - different offsets inside a file (bookend extents in file.c)
     315             :  *
     316             :  * The extent ref structure for the implicit back refs has fields for:
     317             :  *
     318             :  * - Objectid of the subvolume root
     319             :  * - objectid of the file holding the reference
     320             :  * - original offset in the file
     321             :  * - how many bookend extents
     322             :  *
     323             :  * The key offset for the implicit back refs is hash of the first
     324             :  * three fields.
     325             :  *
     326             :  * The extent ref structure for the full back refs has field for:
     327             :  *
     328             :  * - number of pointers in the tree leaf
     329             :  *
     330             :  * The key offset for the implicit back refs is the first byte of
     331             :  * the tree leaf
     332             :  *
     333             :  * When a file extent is allocated, The implicit back refs is used.
     334             :  * the fields are filled in:
     335             :  *
     336             :  *     (root_key.objectid, inode objectid, offset in file, 1)
     337             :  *
     338             :  * When a file extent is removed file truncation, we find the
     339             :  * corresponding implicit back refs and check the following fields:
     340             :  *
     341             :  *     (btrfs_header_owner(leaf), inode objectid, offset in file)
     342             :  *
     343             :  * Btree extents can be referenced by:
     344             :  *
     345             :  * - Different subvolumes
     346             :  *
     347             :  * Both the implicit back refs and the full back refs for tree blocks
     348             :  * only consist of key. The key offset for the implicit back refs is
     349             :  * objectid of block's owner tree. The key offset for the full back refs
     350             :  * is the first byte of parent block.
     351             :  *
     352             :  * When implicit back refs is used, information about the lowest key and
     353             :  * level of the tree block are required. These information are stored in
     354             :  * tree block info structure.
     355             :  */
     356             : 
     357             : /*
     358             :  * is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required,
     359             :  * is_data == BTRFS_REF_TYPE_DATA, data type is requiried,
     360             :  * is_data == BTRFS_REF_TYPE_ANY, either type is OK.
     361             :  */
     362   334951002 : int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
     363             :                                      struct btrfs_extent_inline_ref *iref,
     364             :                                      enum btrfs_inline_ref_type is_data)
     365             : {
     366   334951002 :         int type = btrfs_extent_inline_ref_type(eb, iref);
     367   334917746 :         u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
     368             : 
     369   334926753 :         if (type == BTRFS_TREE_BLOCK_REF_KEY ||
     370   334926753 :             type == BTRFS_SHARED_BLOCK_REF_KEY ||
     371   274601921 :             type == BTRFS_SHARED_DATA_REF_KEY ||
     372   274601921 :             type == BTRFS_EXTENT_DATA_REF_KEY) {
     373   334928890 :                 if (is_data == BTRFS_REF_TYPE_BLOCK) {
     374    36494169 :                         if (type == BTRFS_TREE_BLOCK_REF_KEY)
     375             :                                 return type;
     376    29939967 :                         if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
     377    29939977 :                                 ASSERT(eb->fs_info);
     378             :                                 /*
     379             :                                  * Every shared one has parent tree block,
     380             :                                  * which must be aligned to sector size.
     381             :                                  */
     382    29939977 :                                 if (offset &&
     383    29939965 :                                     IS_ALIGNED(offset, eb->fs_info->sectorsize))
     384             :                                         return type;
     385             :                         }
     386   298434721 :                 } else if (is_data == BTRFS_REF_TYPE_DATA) {
     387   264947791 :                         if (type == BTRFS_EXTENT_DATA_REF_KEY)
     388             :                                 return type;
     389    39253402 :                         if (type == BTRFS_SHARED_DATA_REF_KEY) {
     390    39253408 :                                 ASSERT(eb->fs_info);
     391             :                                 /*
     392             :                                  * Every shared one has parent tree block,
     393             :                                  * which must be aligned to sector size.
     394             :                                  */
     395    39253408 :                                 if (offset &&
     396    39253490 :                                     IS_ALIGNED(offset, eb->fs_info->sectorsize))
     397             :                                         return type;
     398             :                         }
     399             :                 } else {
     400             :                         ASSERT(is_data == BTRFS_REF_TYPE_ANY);
     401             :                         return type;
     402             :                 }
     403             :         }
     404             : 
     405           0 :         btrfs_print_leaf(eb);
     406           0 :         btrfs_err(eb->fs_info,
     407             :                   "eb %llu iref 0x%lx invalid extent inline ref type %d",
     408             :                   eb->start, (unsigned long)iref, type);
     409           0 :         WARN_ON(1);
     410             : 
     411             :         return BTRFS_REF_TYPE_INVALID;
     412             : }
     413             : 
     414   449826866 : u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
     415             : {
     416   449826866 :         u32 high_crc = ~(u32)0;
     417   449826866 :         u32 low_crc = ~(u32)0;
     418   449826866 :         __le64 lenum;
     419             : 
     420   449826866 :         lenum = cpu_to_le64(root_objectid);
     421   449826866 :         high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
     422   449844770 :         lenum = cpu_to_le64(owner);
     423   449844770 :         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
     424   449893803 :         lenum = cpu_to_le64(offset);
     425   449893803 :         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
     426             : 
     427   449939944 :         return ((u64)high_crc << 31) ^ (u64)low_crc;
     428             : }
     429             : 
     430   218854697 : static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
     431             :                                      struct btrfs_extent_data_ref *ref)
     432             : {
     433   437703611 :         return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
     434             :                                     btrfs_extent_data_ref_objectid(leaf, ref),
     435             :                                     btrfs_extent_data_ref_offset(leaf, ref));
     436             : }
     437             : 
     438   220349688 : static int match_extent_data_ref(struct extent_buffer *leaf,
     439             :                                  struct btrfs_extent_data_ref *ref,
     440             :                                  u64 root_objectid, u64 owner, u64 offset)
     441             : {
     442   440530200 :         if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
     443   100359522 :             btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
     444             :             btrfs_extent_data_ref_offset(leaf, ref) != offset)
     445   218850334 :                 return 0;
     446             :         return 1;
     447             : }
     448             : 
     449      205686 : static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
     450             :                                            struct btrfs_path *path,
     451             :                                            u64 bytenr, u64 parent,
     452             :                                            u64 root_objectid,
     453             :                                            u64 owner, u64 offset)
     454             : {
     455      205686 :         struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
     456      205686 :         struct btrfs_key key;
     457      205686 :         struct btrfs_extent_data_ref *ref;
     458      205686 :         struct extent_buffer *leaf;
     459      205686 :         u32 nritems;
     460      205686 :         int ret;
     461      205686 :         int recow;
     462      205686 :         int err = -ENOENT;
     463             : 
     464      205686 :         key.objectid = bytenr;
     465      205686 :         if (parent) {
     466       72553 :                 key.type = BTRFS_SHARED_DATA_REF_KEY;
     467       72553 :                 key.offset = parent;
     468             :         } else {
     469      133133 :                 key.type = BTRFS_EXTENT_DATA_REF_KEY;
     470      133133 :                 key.offset = hash_extent_data_ref(root_objectid,
     471             :                                                   owner, offset);
     472             :         }
     473             : again:
     474      205686 :         recow = 0;
     475      205686 :         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
     476      205686 :         if (ret < 0) {
     477           0 :                 err = ret;
     478           0 :                 goto fail;
     479             :         }
     480             : 
     481      205686 :         if (parent) {
     482       72553 :                 if (!ret)
     483             :                         return 0;
     484           0 :                 goto fail;
     485             :         }
     486             : 
     487      133133 :         leaf = path->nodes[0];
     488      133133 :         nritems = btrfs_header_nritems(leaf);
     489      133133 :         while (1) {
     490      133133 :                 if (path->slots[0] >= nritems) {
     491           0 :                         ret = btrfs_next_leaf(root, path);
     492           0 :                         if (ret < 0)
     493           0 :                                 err = ret;
     494           0 :                         if (ret)
     495           0 :                                 goto fail;
     496             : 
     497           0 :                         leaf = path->nodes[0];
     498           0 :                         nritems = btrfs_header_nritems(leaf);
     499           0 :                         recow = 1;
     500             :                 }
     501             : 
     502      133133 :                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
     503      133133 :                 if (key.objectid != bytenr ||
     504      133133 :                     key.type != BTRFS_EXTENT_DATA_REF_KEY)
     505           0 :                         goto fail;
     506             : 
     507      133133 :                 ref = btrfs_item_ptr(leaf, path->slots[0],
     508             :                                      struct btrfs_extent_data_ref);
     509             : 
     510      133133 :                 if (match_extent_data_ref(leaf, ref, root_objectid,
     511             :                                           owner, offset)) {
     512      133133 :                         if (recow) {
     513           0 :                                 btrfs_release_path(path);
     514           0 :                                 goto again;
     515             :                         }
     516             :                         err = 0;
     517             :                         break;
     518             :                 }
     519           0 :                 path->slots[0]++;
     520             :         }
     521             : fail:
     522             :         return err;
     523             : }
     524             : 
     525    12623358 : static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
     526             :                                            struct btrfs_path *path,
     527             :                                            u64 bytenr, u64 parent,
     528             :                                            u64 root_objectid, u64 owner,
     529             :                                            u64 offset, int refs_to_add)
     530             : {
     531    12623358 :         struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
     532    12623306 :         struct btrfs_key key;
     533    12623306 :         struct extent_buffer *leaf;
     534    12623306 :         u32 size;
     535    12623306 :         u32 num_refs;
     536    12623306 :         int ret;
     537             : 
     538    12623306 :         key.objectid = bytenr;
     539    12623306 :         if (parent) {
     540      500590 :                 key.type = BTRFS_SHARED_DATA_REF_KEY;
     541      500590 :                 key.offset = parent;
     542      500590 :                 size = sizeof(struct btrfs_shared_data_ref);
     543             :         } else {
     544    12122716 :                 key.type = BTRFS_EXTENT_DATA_REF_KEY;
     545    12122716 :                 key.offset = hash_extent_data_ref(root_objectid,
     546             :                                                   owner, offset);
     547    12122746 :                 size = sizeof(struct btrfs_extent_data_ref);
     548             :         }
     549             : 
     550    12623336 :         ret = btrfs_insert_empty_item(trans, root, path, &key, size);
     551    12622793 :         if (ret && ret != -EEXIST)
     552           0 :                 goto fail;
     553             : 
     554    12622793 :         leaf = path->nodes[0];
     555    12622793 :         if (parent) {
     556      500568 :                 struct btrfs_shared_data_ref *ref;
     557      500568 :                 ref = btrfs_item_ptr(leaf, path->slots[0],
     558             :                                      struct btrfs_shared_data_ref);
     559      500551 :                 if (ret == 0) {
     560      500480 :                         btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
     561             :                 } else {
     562          71 :                         num_refs = btrfs_shared_data_ref_count(leaf, ref);
     563          71 :                         num_refs += refs_to_add;
     564          71 :                         btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
     565             :                 }
     566             :         } else {
     567             :                 struct btrfs_extent_data_ref *ref;
     568    12122225 :                 while (ret == -EEXIST) {
     569        3367 :                         ref = btrfs_item_ptr(leaf, path->slots[0],
     570             :                                              struct btrfs_extent_data_ref);
     571        3355 :                         if (match_extent_data_ref(leaf, ref, root_objectid,
     572             :                                                   owner, offset))
     573             :                                 break;
     574           0 :                         btrfs_release_path(path);
     575           0 :                         key.offset++;
     576           0 :                         ret = btrfs_insert_empty_item(trans, root, path, &key,
     577             :                                                       size);
     578           0 :                         if (ret && ret != -EEXIST)
     579           0 :                                 goto fail;
     580             : 
     581           0 :                         leaf = path->nodes[0];
     582             :                 }
     583    12122213 :                 ref = btrfs_item_ptr(leaf, path->slots[0],
     584             :                                      struct btrfs_extent_data_ref);
     585    12122113 :                 if (ret == 0) {
     586    12118758 :                         btrfs_set_extent_data_ref_root(leaf, ref,
     587             :                                                        root_objectid);
     588    12118630 :                         btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
     589    12118617 :                         btrfs_set_extent_data_ref_offset(leaf, ref, offset);
     590    12118633 :                         btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
     591             :                 } else {
     592        3355 :                         num_refs = btrfs_extent_data_ref_count(leaf, ref);
     593        3355 :                         num_refs += refs_to_add;
     594        3355 :                         btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
     595             :                 }
     596             :         }
     597    12622750 :         btrfs_mark_buffer_dirty(leaf);
     598    12622750 :         ret = 0;
     599    12622995 : fail:
     600    12622995 :         btrfs_release_path(path);
     601    12623365 :         return ret;
     602             : }
     603             : 
     604      205462 : static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
     605             :                                            struct btrfs_root *root,
     606             :                                            struct btrfs_path *path,
     607             :                                            int refs_to_drop)
     608             : {
     609      205462 :         struct btrfs_key key;
     610      205462 :         struct btrfs_extent_data_ref *ref1 = NULL;
     611      205462 :         struct btrfs_shared_data_ref *ref2 = NULL;
     612      205462 :         struct extent_buffer *leaf;
     613      205462 :         u32 num_refs = 0;
     614      205462 :         int ret = 0;
     615             : 
     616      205462 :         leaf = path->nodes[0];
     617      205462 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
     618             : 
     619      205462 :         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
     620      133003 :                 ref1 = btrfs_item_ptr(leaf, path->slots[0],
     621             :                                       struct btrfs_extent_data_ref);
     622      133003 :                 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
     623       72459 :         } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
     624       72459 :                 ref2 = btrfs_item_ptr(leaf, path->slots[0],
     625             :                                       struct btrfs_shared_data_ref);
     626       72459 :                 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
     627           0 :         } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
     628           0 :                 btrfs_print_v0_err(trans->fs_info);
     629           0 :                 btrfs_abort_transaction(trans, -EINVAL);
     630           0 :                 return -EINVAL;
     631             :         } else {
     632           0 :                 BUG();
     633             :         }
     634             : 
     635      205462 :         BUG_ON(num_refs < refs_to_drop);
     636      205462 :         num_refs -= refs_to_drop;
     637             : 
     638      205462 :         if (num_refs == 0) {
     639      202199 :                 ret = btrfs_del_item(trans, root, path);
     640             :         } else {
     641        3263 :                 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
     642        3263 :                         btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
     643           0 :                 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
     644           0 :                         btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
     645        3263 :                 btrfs_mark_buffer_dirty(leaf);
     646             :         }
     647             :         return ret;
     648             : }
     649             : 
     650     1065541 : static noinline u32 extent_data_ref_count(struct btrfs_path *path,
     651             :                                           struct btrfs_extent_inline_ref *iref)
     652             : {
     653     1065541 :         struct btrfs_key key;
     654     1065541 :         struct extent_buffer *leaf;
     655     1065541 :         struct btrfs_extent_data_ref *ref1;
     656     1065541 :         struct btrfs_shared_data_ref *ref2;
     657     1065541 :         u32 num_refs = 0;
     658     1065541 :         int type;
     659             : 
     660     1065541 :         leaf = path->nodes[0];
     661     1065541 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
     662             : 
     663     1065541 :         BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
     664     1065541 :         if (iref) {
     665             :                 /*
     666             :                  * If type is invalid, we should have bailed out earlier than
     667             :                  * this call.
     668             :                  */
     669     1065317 :                 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
     670     1065317 :                 ASSERT(type != BTRFS_REF_TYPE_INVALID);
     671     1065317 :                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
     672     1041970 :                         ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
     673     1041970 :                         num_refs = btrfs_extent_data_ref_count(leaf, ref1);
     674             :                 } else {
     675       23347 :                         ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
     676       23347 :                         num_refs = btrfs_shared_data_ref_count(leaf, ref2);
     677             :                 }
     678         224 :         } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
     679         130 :                 ref1 = btrfs_item_ptr(leaf, path->slots[0],
     680             :                                       struct btrfs_extent_data_ref);
     681         130 :                 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
     682          94 :         } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
     683          94 :                 ref2 = btrfs_item_ptr(leaf, path->slots[0],
     684             :                                       struct btrfs_shared_data_ref);
     685          94 :                 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
     686             :         } else {
     687           0 :                 WARN_ON(1);
     688             :         }
     689     1065541 :         return num_refs;
     690             : }
     691             : 
     692           0 : static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
     693             :                                           struct btrfs_path *path,
     694             :                                           u64 bytenr, u64 parent,
     695             :                                           u64 root_objectid)
     696             : {
     697           0 :         struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
     698           0 :         struct btrfs_key key;
     699           0 :         int ret;
     700             : 
     701           0 :         key.objectid = bytenr;
     702           0 :         if (parent) {
     703           0 :                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
     704           0 :                 key.offset = parent;
     705             :         } else {
     706           0 :                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
     707           0 :                 key.offset = root_objectid;
     708             :         }
     709             : 
     710           0 :         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
     711           0 :         if (ret > 0)
     712           0 :                 ret = -ENOENT;
     713           0 :         return ret;
     714             : }
     715             : 
     716           0 : static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
     717             :                                           struct btrfs_path *path,
     718             :                                           u64 bytenr, u64 parent,
     719             :                                           u64 root_objectid)
     720             : {
     721           0 :         struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
     722           0 :         struct btrfs_key key;
     723           0 :         int ret;
     724             : 
     725           0 :         key.objectid = bytenr;
     726           0 :         if (parent) {
     727           0 :                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
     728           0 :                 key.offset = parent;
     729             :         } else {
     730           0 :                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
     731           0 :                 key.offset = root_objectid;
     732             :         }
     733             : 
     734           0 :         ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
     735           0 :         btrfs_release_path(path);
     736           0 :         return ret;
     737             : }
     738             : 
     739             : static inline int extent_ref_type(u64 parent, u64 owner)
     740             : {
     741    70765281 :         int type;
     742    70765281 :         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
     743    30927659 :                 if (parent > 0)
     744             :                         type = BTRFS_SHARED_BLOCK_REF_KEY;
     745             :                 else
     746     5945045 :                         type = BTRFS_TREE_BLOCK_REF_KEY;
     747             :         } else {
     748    39837622 :                 if (parent > 0)
     749             :                         type = BTRFS_SHARED_DATA_REF_KEY;
     750             :                 else
     751    15232138 :                         type = BTRFS_EXTENT_DATA_REF_KEY;
     752             :         }
     753    70765281 :         return type;
     754             : }
     755             : 
     756    22823604 : static int find_next_key(struct btrfs_path *path, int level,
     757             :                          struct btrfs_key *key)
     758             : 
     759             : {
     760    25204709 :         for (; level < BTRFS_MAX_LEVEL; level++) {
     761    25204709 :                 if (!path->nodes[level])
     762             :                         break;
     763    24473346 :                 if (path->slots[level] + 1 >=
     764             :                     btrfs_header_nritems(path->nodes[level]))
     765     2381105 :                         continue;
     766    22092241 :                 if (level == 0)
     767    16358547 :                         btrfs_item_key_to_cpu(path->nodes[level], key,
     768             :                                               path->slots[level] + 1);
     769             :                 else
     770     5733694 :                         btrfs_node_key_to_cpu(path->nodes[level], key,
     771             :                                               path->slots[level] + 1);
     772             :                 return 0;
     773             :         }
     774             :         return 1;
     775             : }
     776             : 
     777             : /*
     778             :  * look for inline back ref. if back ref is found, *ref_ret is set
     779             :  * to the address of inline back ref, and 0 is returned.
     780             :  *
     781             :  * if back ref isn't found, *ref_ret is set to the address where it
     782             :  * should be inserted, and -ENOENT is returned.
     783             :  *
     784             :  * if insert is true and there are too many inline back refs, the path
     785             :  * points to the extent item, and -EAGAIN is returned.
     786             :  *
     787             :  * NOTE: inline back refs are ordered in the same way that back ref
     788             :  *       items in the tree are ordered.
     789             :  */
     790             : static noinline_for_stack
     791    53868393 : int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
     792             :                                  struct btrfs_path *path,
     793             :                                  struct btrfs_extent_inline_ref **ref_ret,
     794             :                                  u64 bytenr, u64 num_bytes,
     795             :                                  u64 parent, u64 root_objectid,
     796             :                                  u64 owner, u64 offset, int insert)
     797             : {
     798    53868393 :         struct btrfs_fs_info *fs_info = trans->fs_info;
     799    53868393 :         struct btrfs_root *root = btrfs_extent_root(fs_info, bytenr);
     800    53868521 :         struct btrfs_key key;
     801    53868521 :         struct extent_buffer *leaf;
     802    53868521 :         struct btrfs_extent_item *ei;
     803    53868521 :         struct btrfs_extent_inline_ref *iref;
     804    53868521 :         u64 flags;
     805    53868521 :         u64 item_size;
     806    53868521 :         unsigned long ptr;
     807    53868521 :         unsigned long end;
     808    53868521 :         int extra_size;
     809    53868521 :         int type;
     810    53868521 :         int want;
     811    53868521 :         int ret;
     812    53868521 :         int err = 0;
     813    53868521 :         bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
     814    53868521 :         int needed;
     815             : 
     816    53868521 :         key.objectid = bytenr;
     817    53868521 :         key.type = BTRFS_EXTENT_ITEM_KEY;
     818    53868521 :         key.offset = num_bytes;
     819             : 
     820    53868521 :         want = extent_ref_type(parent, owner);
     821    53868521 :         if (insert) {
     822    29591231 :                 extra_size = btrfs_extent_inline_ref_size(want);
     823    29591231 :                 path->search_for_extension = 1;
     824    29591231 :                 path->keep_locks = 1;
     825             :         } else
     826             :                 extra_size = -1;
     827             : 
     828             :         /*
     829             :          * Owner is our level, so we can just add one to get the level for the
     830             :          * block we are interested in.
     831             :          */
     832    53868521 :         if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
     833    23087363 :                 key.type = BTRFS_METADATA_ITEM_KEY;
     834    23087363 :                 key.offset = owner;
     835             :         }
     836             : 
     837    30781158 : again:
     838    53868521 :         ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
     839    53869132 :         if (ret < 0) {
     840           0 :                 err = ret;
     841           0 :                 goto out;
     842             :         }
     843             : 
     844             :         /*
     845             :          * We may be a newly converted file system which still has the old fat
     846             :          * extent entries for metadata, so try and see if we have one of those.
     847             :          */
     848    53869132 :         if (ret > 0 && skinny_metadata) {
     849           0 :                 skinny_metadata = false;
     850           0 :                 if (path->slots[0]) {
     851           0 :                         path->slots[0]--;
     852           0 :                         btrfs_item_key_to_cpu(path->nodes[0], &key,
     853             :                                               path->slots[0]);
     854           0 :                         if (key.objectid == bytenr &&
     855           0 :                             key.type == BTRFS_EXTENT_ITEM_KEY &&
     856           0 :                             key.offset == num_bytes)
     857             :                                 ret = 0;
     858             :                 }
     859           0 :                 if (ret) {
     860           0 :                         key.objectid = bytenr;
     861           0 :                         key.type = BTRFS_EXTENT_ITEM_KEY;
     862           0 :                         key.offset = num_bytes;
     863           0 :                         btrfs_release_path(path);
     864           0 :                         goto again;
     865             :                 }
     866             :         }
     867             : 
     868    53869132 :         if (ret && !insert) {
     869           0 :                 err = -ENOENT;
     870           0 :                 goto out;
     871    53869132 :         } else if (WARN_ON(ret)) {
     872           0 :                 err = -EIO;
     873           0 :                 goto out;
     874             :         }
     875             : 
     876    53869132 :         leaf = path->nodes[0];
     877    53869132 :         item_size = btrfs_item_size(leaf, path->slots[0]);
     878    53869109 :         if (unlikely(item_size < sizeof(*ei))) {
     879           0 :                 err = -EINVAL;
     880           0 :                 btrfs_print_v0_err(fs_info);
     881           0 :                 btrfs_abort_transaction(trans, err);
     882           0 :                 goto out;
     883             :         }
     884             : 
     885    53869109 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
     886    53869127 :         flags = btrfs_extent_flags(leaf, ei);
     887             : 
     888    53869123 :         ptr = (unsigned long)(ei + 1);
     889    53869123 :         end = (unsigned long)ei + item_size;
     890             : 
     891    53869123 :         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
     892           0 :                 ptr += sizeof(struct btrfs_tree_block_info);
     893           0 :                 BUG_ON(ptr > end);
     894             :         }
     895             : 
     896    53869123 :         if (owner >= BTRFS_FIRST_FREE_OBJECTID)
     897             :                 needed = BTRFS_REF_TYPE_DATA;
     898             :         else
     899    23087431 :                 needed = BTRFS_REF_TYPE_BLOCK;
     900             : 
     901    53869123 :         err = -ENOENT;
     902   299656287 :         while (1) {
     903   299656287 :                 if (ptr >= end) {
     904     2315258 :                         if (ptr > end) {
     905           0 :                                 err = -EUCLEAN;
     906           0 :                                 btrfs_print_leaf(path->nodes[0]);
     907           0 :                                 btrfs_crit(fs_info,
     908             : "overrun extent record at slot %d while looking for inline extent for root %llu owner %llu offset %llu parent %llu",
     909             :                                         path->slots[0], root_objectid, owner, offset, parent);
     910             :                         }
     911             :                         break;
     912             :                 }
     913   297341029 :                 iref = (struct btrfs_extent_inline_ref *)ptr;
     914   297341029 :                 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
     915   297321911 :                 if (type == BTRFS_REF_TYPE_INVALID) {
     916           0 :                         err = -EUCLEAN;
     917           0 :                         goto out;
     918             :                 }
     919             : 
     920   297321911 :                 if (want < type)
     921             :                         break;
     922   297236288 :                 if (want > type) {
     923     4870965 :                         ptr += btrfs_extent_inline_ref_size(type);
     924     4870965 :                         continue;
     925             :                 }
     926             : 
     927   292365323 :                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
     928   220213174 :                         struct btrfs_extent_data_ref *dref;
     929   220213174 :                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
     930   220213174 :                         if (match_extent_data_ref(leaf, dref, root_objectid,
     931             :                                                   owner, offset)) {
     932             :                                 err = 0;
     933             :                                 break;
     934             :                         }
     935   437754717 :                         if (hash_extent_data_ref_item(leaf, dref) <
     936   218894299 :                             hash_extent_data_ref(root_objectid, owner, offset))
     937             :                                 break;
     938             :                 } else {
     939    72152149 :                         u64 ref_offset;
     940    72152149 :                         ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
     941    72152097 :                         if (parent > 0) {
     942    66324317 :                                 if (parent == ref_offset) {
     943             :                                         err = 0;
     944             :                                         break;
     945             :                                 }
     946    49177065 :                                 if (ref_offset < parent)
     947             :                                         break;
     948             :                         } else {
     949     5827780 :                                 if (root_objectid == ref_offset) {
     950             :                                         err = 0;
     951             :                                         break;
     952             :                                 }
     953      165557 :                                 if (ref_offset < root_objectid)
     954             :                                         break;
     955             :                         }
     956             :                 }
     957   470332366 :                 ptr += btrfs_extent_inline_ref_size(type);
     958             :         }
     959    53868941 :         if (err == -ENOENT && insert) {
     960    29519903 :                 if (item_size + extra_size >=
     961    29519903 :                     BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
     962    12255341 :                         err = -EAGAIN;
     963    12255341 :                         goto out;
     964             :                 }
     965             :                 /*
     966             :                  * To add new inline back ref, we have to make sure
     967             :                  * there is no corresponding back ref item.
     968             :                  * For simplicity, we just do not add new inline back
     969             :                  * ref if there is any kind of item for this block
     970             :                  */
     971    17264562 :                 if (find_next_key(path, 0, &key) == 0 &&
     972    16533693 :                     key.objectid == bytenr &&
     973      371695 :                     key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
     974      367813 :                         err = -EAGAIN;
     975      367813 :                         goto out;
     976             :                 }
     977             :         }
     978    41245781 :         *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
     979    53868935 : out:
     980    53868935 :         if (insert) {
     981    29591550 :                 path->keep_locks = 0;
     982    29591550 :                 path->search_for_extension = 0;
     983    29591550 :                 btrfs_unlock_up_safe(path, 1);
     984             :         }
     985    53868960 :         return err;
     986             : }
     987             : 
     988             : /*
     989             :  * helper to add new inline back ref
     990             :  */
     991             : static noinline_for_stack
     992    16896754 : void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
     993             :                                  struct btrfs_path *path,
     994             :                                  struct btrfs_extent_inline_ref *iref,
     995             :                                  u64 parent, u64 root_objectid,
     996             :                                  u64 owner, u64 offset, int refs_to_add,
     997             :                                  struct btrfs_delayed_extent_op *extent_op)
     998             : {
     999    16896754 :         struct extent_buffer *leaf;
    1000    16896754 :         struct btrfs_extent_item *ei;
    1001    16896754 :         unsigned long ptr;
    1002    16896754 :         unsigned long end;
    1003    16896754 :         unsigned long item_offset;
    1004    16896754 :         u64 refs;
    1005    16896754 :         int size;
    1006    16896754 :         int type;
    1007             : 
    1008    16896754 :         leaf = path->nodes[0];
    1009    16896754 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    1010    16896760 :         item_offset = (unsigned long)iref - (unsigned long)ei;
    1011             : 
    1012    16896760 :         type = extent_ref_type(parent, owner);
    1013    16896760 :         size = btrfs_extent_inline_ref_size(type);
    1014             : 
    1015    16896760 :         btrfs_extend_item(path, size);
    1016             : 
    1017    16896745 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    1018    16896737 :         refs = btrfs_extent_refs(leaf, ei);
    1019    16896740 :         refs += refs_to_add;
    1020    16896740 :         btrfs_set_extent_refs(leaf, ei, refs);
    1021    16896735 :         if (extent_op)
    1022       22834 :                 __run_delayed_extent_op(extent_op, leaf, ei);
    1023             : 
    1024    16896735 :         ptr = (unsigned long)ei + item_offset;
    1025    16896735 :         end = (unsigned long)ei + btrfs_item_size(leaf, path->slots[0]);
    1026    16896738 :         if (ptr < end - size)
    1027    14866504 :                 memmove_extent_buffer(leaf, ptr + size, ptr,
    1028             :                                       end - size - ptr);
    1029             : 
    1030    16896738 :         iref = (struct btrfs_extent_inline_ref *)ptr;
    1031    16896738 :         btrfs_set_extent_inline_ref_type(leaf, iref, type);
    1032    16896738 :         if (type == BTRFS_EXTENT_DATA_REF_KEY) {
    1033      821406 :                 struct btrfs_extent_data_ref *dref;
    1034      821406 :                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
    1035      821406 :                 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
    1036      821406 :                 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
    1037      821406 :                 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
    1038      821406 :                 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
    1039    16075332 :         } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
    1040     8235049 :                 struct btrfs_shared_data_ref *sref;
    1041     8235049 :                 sref = (struct btrfs_shared_data_ref *)(iref + 1);
    1042     8235049 :                 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
    1043     8235044 :                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
    1044     7840283 :         } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
    1045     7698868 :                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
    1046             :         } else {
    1047      141415 :                 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
    1048             :         }
    1049    16896735 :         btrfs_mark_buffer_dirty(leaf);
    1050    16896747 : }
    1051             : 
    1052    24277302 : static int lookup_extent_backref(struct btrfs_trans_handle *trans,
    1053             :                                  struct btrfs_path *path,
    1054             :                                  struct btrfs_extent_inline_ref **ref_ret,
    1055             :                                  u64 bytenr, u64 num_bytes, u64 parent,
    1056             :                                  u64 root_objectid, u64 owner, u64 offset)
    1057             : {
    1058    24277302 :         int ret;
    1059             : 
    1060    24277302 :         ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
    1061             :                                            num_bytes, parent, root_objectid,
    1062             :                                            owner, offset, 0);
    1063    24277331 :         if (ret != -ENOENT)
    1064             :                 return ret;
    1065             : 
    1066      205686 :         btrfs_release_path(path);
    1067      205686 :         *ref_ret = NULL;
    1068             : 
    1069      205686 :         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
    1070           0 :                 ret = lookup_tree_block_ref(trans, path, bytenr, parent,
    1071             :                                             root_objectid);
    1072             :         } else {
    1073      205686 :                 ret = lookup_extent_data_ref(trans, path, bytenr, parent,
    1074             :                                              root_objectid, owner, offset);
    1075             :         }
    1076             :         return ret;
    1077             : }
    1078             : 
    1079             : /*
    1080             :  * helper to update/remove inline back ref
    1081             :  */
    1082             : static noinline_for_stack
    1083    15580265 : void update_inline_extent_backref(struct btrfs_path *path,
    1084             :                                   struct btrfs_extent_inline_ref *iref,
    1085             :                                   int refs_to_mod,
    1086             :                                   struct btrfs_delayed_extent_op *extent_op)
    1087             : {
    1088    15580265 :         struct extent_buffer *leaf = path->nodes[0];
    1089    15580265 :         struct btrfs_extent_item *ei;
    1090    15580265 :         struct btrfs_extent_data_ref *dref = NULL;
    1091    15580265 :         struct btrfs_shared_data_ref *sref = NULL;
    1092    15580265 :         unsigned long ptr;
    1093    15580265 :         unsigned long end;
    1094    15580265 :         u32 item_size;
    1095    15580265 :         int size;
    1096    15580265 :         int type;
    1097    15580265 :         u64 refs;
    1098             : 
    1099    15580265 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    1100    15580265 :         refs = btrfs_extent_refs(leaf, ei);
    1101    31160530 :         WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
    1102    15580265 :         refs += refs_to_mod;
    1103    15580265 :         btrfs_set_extent_refs(leaf, ei, refs);
    1104    15580265 :         if (extent_op)
    1105           0 :                 __run_delayed_extent_op(extent_op, leaf, ei);
    1106             : 
    1107             :         /*
    1108             :          * If type is invalid, we should have bailed out after
    1109             :          * lookup_inline_extent_backref().
    1110             :          */
    1111    15580265 :         type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
    1112    15580265 :         ASSERT(type != BTRFS_REF_TYPE_INVALID);
    1113             : 
    1114    15580265 :         if (type == BTRFS_EXTENT_DATA_REF_KEY) {
    1115      291899 :                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
    1116      291899 :                 refs = btrfs_extent_data_ref_count(leaf, dref);
    1117    15288366 :         } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
    1118     7538984 :                 sref = (struct btrfs_shared_data_ref *)(iref + 1);
    1119     7538984 :                 refs = btrfs_shared_data_ref_count(leaf, sref);
    1120             :         } else {
    1121     7749382 :                 refs = 1;
    1122     7749382 :                 BUG_ON(refs_to_mod != -1);
    1123             :         }
    1124             : 
    1125    15580265 :         BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
    1126    15580265 :         refs += refs_to_mod;
    1127             : 
    1128    15580265 :         if (refs > 0) {
    1129       89695 :                 if (type == BTRFS_EXTENT_DATA_REF_KEY)
    1130       89692 :                         btrfs_set_extent_data_ref_count(leaf, dref, refs);
    1131             :                 else
    1132           3 :                         btrfs_set_shared_data_ref_count(leaf, sref, refs);
    1133             :         } else {
    1134    15490570 :                 size =  btrfs_extent_inline_ref_size(type);
    1135    15490570 :                 item_size = btrfs_item_size(leaf, path->slots[0]);
    1136    15490570 :                 ptr = (unsigned long)iref;
    1137    15490570 :                 end = (unsigned long)ei + item_size;
    1138    15490570 :                 if (ptr + size < end)
    1139     5779506 :                         memmove_extent_buffer(leaf, ptr, ptr + size,
    1140     5779506 :                                               end - ptr - size);
    1141    15490570 :                 item_size -= size;
    1142    15490570 :                 btrfs_truncate_item(path, item_size, 1);
    1143             :         }
    1144    15580265 :         btrfs_mark_buffer_dirty(leaf);
    1145    15580265 : }
    1146             : 
    1147             : static noinline_for_stack
    1148    29590769 : int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
    1149             :                                  struct btrfs_path *path,
    1150             :                                  u64 bytenr, u64 num_bytes, u64 parent,
    1151             :                                  u64 root_objectid, u64 owner,
    1152             :                                  u64 offset, int refs_to_add,
    1153             :                                  struct btrfs_delayed_extent_op *extent_op)
    1154             : {
    1155    29590769 :         struct btrfs_extent_inline_ref *iref;
    1156    29590769 :         int ret;
    1157             : 
    1158    29590769 :         ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
    1159             :                                            num_bytes, parent, root_objectid,
    1160             :                                            owner, offset, 1);
    1161    29591656 :         if (ret == 0) {
    1162             :                 /*
    1163             :                  * We're adding refs to a tree block we already own, this
    1164             :                  * should not happen at all.
    1165             :                  */
    1166       71699 :                 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
    1167           0 :                         btrfs_print_leaf(path->nodes[0]);
    1168           0 :                         btrfs_crit(trans->fs_info,
    1169             : "adding refs to an existing tree ref, bytenr %llu num_bytes %llu root_objectid %llu slot %u",
    1170             :                                    bytenr, num_bytes, root_objectid, path->slots[0]);
    1171           0 :                         return -EUCLEAN;
    1172             :                 }
    1173       71699 :                 update_inline_extent_backref(path, iref, refs_to_add, extent_op);
    1174    29519957 :         } else if (ret == -ENOENT) {
    1175    16896755 :                 setup_inline_extent_backref(trans->fs_info, path, iref, parent,
    1176             :                                             root_objectid, owner, offset,
    1177             :                                             refs_to_add, extent_op);
    1178    16896755 :                 ret = 0;
    1179             :         }
    1180             :         return ret;
    1181             : }
    1182             : 
    1183    15714028 : static int remove_extent_backref(struct btrfs_trans_handle *trans,
    1184             :                                  struct btrfs_root *root,
    1185             :                                  struct btrfs_path *path,
    1186             :                                  struct btrfs_extent_inline_ref *iref,
    1187             :                                  int refs_to_drop, int is_data)
    1188             : {
    1189    15714028 :         int ret = 0;
    1190             : 
    1191    15714028 :         BUG_ON(!is_data && refs_to_drop != 1);
    1192    15714028 :         if (iref)
    1193    15508566 :                 update_inline_extent_backref(path, iref, -refs_to_drop, NULL);
    1194      205462 :         else if (is_data)
    1195      205462 :                 ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
    1196             :         else
    1197           0 :                 ret = btrfs_del_item(trans, root, path);
    1198    15714028 :         return ret;
    1199             : }
    1200             : 
    1201       62663 : static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
    1202             :                                u64 *discarded_bytes)
    1203             : {
    1204       62663 :         int j, ret = 0;
    1205       62663 :         u64 bytes_left, end;
    1206       62663 :         u64 aligned_start = ALIGN(start, 1 << SECTOR_SHIFT);
    1207             : 
    1208       62663 :         if (WARN_ON(start != aligned_start)) {
    1209           0 :                 len -= aligned_start - start;
    1210           0 :                 len = round_down(len, 1 << SECTOR_SHIFT);
    1211           0 :                 start = aligned_start;
    1212             :         }
    1213             : 
    1214       62663 :         *discarded_bytes = 0;
    1215             : 
    1216       62663 :         if (!len)
    1217             :                 return 0;
    1218             : 
    1219       62663 :         end = start + len;
    1220       62663 :         bytes_left = len;
    1221             : 
    1222             :         /* Skip any superblocks on this device. */
    1223      250652 :         for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
    1224      187989 :                 u64 sb_start = btrfs_sb_offset(j);
    1225      187989 :                 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
    1226      187989 :                 u64 size = sb_start - start;
    1227             : 
    1228      187989 :                 if (!in_range(sb_start, start, bytes_left) &&
    1229      187988 :                     !in_range(sb_end, start, bytes_left) &&
    1230      122416 :                     !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
    1231      187988 :                         continue;
    1232             : 
    1233             :                 /*
    1234             :                  * Superblock spans beginning of range.  Adjust start and
    1235             :                  * try again.
    1236             :                  */
    1237           1 :                 if (sb_start <= start) {
    1238           0 :                         start += sb_end - start;
    1239           0 :                         if (start > end) {
    1240             :                                 bytes_left = 0;
    1241             :                                 break;
    1242             :                         }
    1243           0 :                         bytes_left = end - start;
    1244           0 :                         continue;
    1245             :                 }
    1246             : 
    1247           1 :                 if (size) {
    1248           1 :                         ret = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
    1249             :                                                    size >> SECTOR_SHIFT,
    1250             :                                                    GFP_NOFS);
    1251           1 :                         if (!ret)
    1252           1 :                                 *discarded_bytes += size;
    1253           0 :                         else if (ret != -EOPNOTSUPP)
    1254           0 :                                 return ret;
    1255             :                 }
    1256             : 
    1257           1 :                 start = sb_end;
    1258           1 :                 if (start > end) {
    1259             :                         bytes_left = 0;
    1260             :                         break;
    1261             :                 }
    1262           1 :                 bytes_left = end - start;
    1263             :         }
    1264             : 
    1265       62663 :         if (bytes_left) {
    1266       62663 :                 ret = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
    1267             :                                            bytes_left >> SECTOR_SHIFT,
    1268             :                                            GFP_NOFS);
    1269       62662 :                 if (!ret)
    1270       62660 :                         *discarded_bytes += bytes_left;
    1271             :         }
    1272             :         return ret;
    1273             : }
    1274             : 
    1275       62633 : static int do_discard_extent(struct btrfs_discard_stripe *stripe, u64 *bytes)
    1276             : {
    1277       62633 :         struct btrfs_device *dev = stripe->dev;
    1278       62633 :         struct btrfs_fs_info *fs_info = dev->fs_info;
    1279       62633 :         struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
    1280       62633 :         u64 phys = stripe->physical;
    1281       62633 :         u64 len = stripe->length;
    1282       62633 :         u64 discarded = 0;
    1283       62633 :         int ret = 0;
    1284             : 
    1285             :         /* Zone reset on a zoned filesystem */
    1286       62633 :         if (btrfs_can_zone_reset(dev, phys, len)) {
    1287           0 :                 u64 src_disc;
    1288             : 
    1289           0 :                 ret = btrfs_reset_device_zone(dev, phys, len, &discarded);
    1290           0 :                 if (ret)
    1291             :                         goto out;
    1292             : 
    1293           0 :                 if (!btrfs_dev_replace_is_ongoing(dev_replace) ||
    1294           0 :                     dev != dev_replace->srcdev)
    1295           0 :                         goto out;
    1296             : 
    1297             :                 src_disc = discarded;
    1298             : 
    1299             :                 /* Send to replace target as well */
    1300             :                 ret = btrfs_reset_device_zone(dev_replace->tgtdev, phys, len,
    1301             :                                               &discarded);
    1302             :                 discarded += src_disc;
    1303       62633 :         } else if (bdev_max_discard_sectors(stripe->dev->bdev)) {
    1304       62633 :                 ret = btrfs_issue_discard(dev->bdev, phys, len, &discarded);
    1305             :         } else {
    1306             :                 ret = 0;
    1307             :                 *bytes = 0;
    1308             :         }
    1309             : 
    1310       62632 : out:
    1311       62632 :         *bytes = discarded;
    1312       62632 :         return ret;
    1313             : }
    1314             : 
    1315       47919 : int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
    1316             :                          u64 num_bytes, u64 *actual_bytes)
    1317             : {
    1318       47919 :         int ret = 0;
    1319       47919 :         u64 discarded_bytes = 0;
    1320       47919 :         u64 end = bytenr + num_bytes;
    1321       47919 :         u64 cur = bytenr;
    1322             : 
    1323             :         /*
    1324             :          * Avoid races with device replace and make sure the devices in the
    1325             :          * stripes don't go away while we are discarding.
    1326             :          */
    1327       47919 :         btrfs_bio_counter_inc_blocked(fs_info);
    1328       95837 :         while (cur < end) {
    1329       47920 :                 struct btrfs_discard_stripe *stripes;
    1330       47920 :                 unsigned int num_stripes;
    1331       47920 :                 int i;
    1332             : 
    1333       47920 :                 num_bytes = end - cur;
    1334       47920 :                 stripes = btrfs_map_discard(fs_info, cur, &num_bytes, &num_stripes);
    1335       47920 :                 if (IS_ERR(stripes)) {
    1336           0 :                         ret = PTR_ERR(stripes);
    1337           0 :                         if (ret == -EOPNOTSUPP)
    1338           0 :                                 ret = 0;
    1339           2 :                         break;
    1340             :                 }
    1341             : 
    1342      110549 :                 for (i = 0; i < num_stripes; i++) {
    1343       62632 :                         struct btrfs_discard_stripe *stripe = stripes + i;
    1344       62632 :                         u64 bytes;
    1345             : 
    1346       62632 :                         if (!stripe->dev->bdev) {
    1347           0 :                                 ASSERT(btrfs_test_opt(fs_info, DEGRADED));
    1348           0 :                                 continue;
    1349             :                         }
    1350             : 
    1351       62632 :                         if (!test_bit(BTRFS_DEV_STATE_WRITEABLE,
    1352             :                                         &stripe->dev->dev_state))
    1353           0 :                                 continue;
    1354             : 
    1355       62632 :                         ret = do_discard_extent(stripe, &bytes);
    1356       62631 :                         if (ret) {
    1357             :                                 /*
    1358             :                                  * Keep going if discard is not supported by the
    1359             :                                  * device.
    1360             :                                  */
    1361           2 :                                 if (ret != -EOPNOTSUPP)
    1362             :                                         break;
    1363             :                                 ret = 0;
    1364             :                         } else {
    1365       62629 :                                 discarded_bytes += bytes;
    1366             :                         }
    1367             :                 }
    1368       47919 :                 kfree(stripes);
    1369       47920 :                 if (ret)
    1370             :                         break;
    1371       47918 :                 cur += num_bytes;
    1372             :         }
    1373       47919 :         btrfs_bio_counter_dec(fs_info);
    1374       47919 :         if (actual_bytes)
    1375       47881 :                 *actual_bytes = discarded_bytes;
    1376       47919 :         return ret;
    1377             : }
    1378             : 
    1379             : /* Can return -ENOMEM */
    1380    35083232 : int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
    1381             :                          struct btrfs_ref *generic_ref)
    1382             : {
    1383    35083232 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1384    35083232 :         int ret;
    1385             : 
    1386    35083232 :         ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
    1387             :                generic_ref->action);
    1388    35083232 :         BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
    1389             :                generic_ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID);
    1390             : 
    1391    35083232 :         if (generic_ref->type == BTRFS_REF_METADATA)
    1392     8715203 :                 ret = btrfs_add_delayed_tree_ref(trans, generic_ref, NULL);
    1393             :         else
    1394    26368029 :                 ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0);
    1395             : 
    1396    35083268 :         btrfs_ref_tree_mod(fs_info, generic_ref);
    1397             : 
    1398    35083268 :         return ret;
    1399             : }
    1400             : 
    1401             : /*
    1402             :  * __btrfs_inc_extent_ref - insert backreference for a given extent
    1403             :  *
    1404             :  * The counterpart is in __btrfs_free_extent(), with examples and more details
    1405             :  * how it works.
    1406             :  *
    1407             :  * @trans:          Handle of transaction
    1408             :  *
    1409             :  * @node:           The delayed ref node used to get the bytenr/length for
    1410             :  *                  extent whose references are incremented.
    1411             :  *
    1412             :  * @parent:         If this is a shared extent (BTRFS_SHARED_DATA_REF_KEY/
    1413             :  *                  BTRFS_SHARED_BLOCK_REF_KEY) then it holds the logical
    1414             :  *                  bytenr of the parent block. Since new extents are always
    1415             :  *                  created with indirect references, this will only be the case
    1416             :  *                  when relocating a shared extent. In that case, root_objectid
    1417             :  *                  will be BTRFS_TREE_RELOC_OBJECTID. Otherwise, parent must
    1418             :  *                  be 0
    1419             :  *
    1420             :  * @root_objectid:  The id of the root where this modification has originated,
    1421             :  *                  this can be either one of the well-known metadata trees or
    1422             :  *                  the subvolume id which references this extent.
    1423             :  *
    1424             :  * @owner:          For data extents it is the inode number of the owning file.
    1425             :  *                  For metadata extents this parameter holds the level in the
    1426             :  *                  tree of the extent.
    1427             :  *
    1428             :  * @offset:         For metadata extents the offset is ignored and is currently
    1429             :  *                  always passed as 0. For data extents it is the fileoffset
    1430             :  *                  this extent belongs to.
    1431             :  *
    1432             :  * @refs_to_add     Number of references to add
    1433             :  *
    1434             :  * @extent_op       Pointer to a structure, holding information necessary when
    1435             :  *                  updating a tree block's flags
    1436             :  *
    1437             :  */
    1438    29591605 : static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
    1439             :                                   struct btrfs_delayed_ref_node *node,
    1440             :                                   u64 parent, u64 root_objectid,
    1441             :                                   u64 owner, u64 offset, int refs_to_add,
    1442             :                                   struct btrfs_delayed_extent_op *extent_op)
    1443             : {
    1444    29591605 :         struct btrfs_path *path;
    1445    29591605 :         struct extent_buffer *leaf;
    1446    29591605 :         struct btrfs_extent_item *item;
    1447    29591605 :         struct btrfs_key key;
    1448    29591605 :         u64 bytenr = node->bytenr;
    1449    29591605 :         u64 num_bytes = node->num_bytes;
    1450    29591605 :         u64 refs;
    1451    29591605 :         int ret;
    1452             : 
    1453    29591605 :         path = btrfs_alloc_path();
    1454    29590723 :         if (!path)
    1455             :                 return -ENOMEM;
    1456             : 
    1457             :         /* this will setup the path even if it fails to insert the back ref */
    1458    29590723 :         ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
    1459             :                                            parent, root_objectid, owner,
    1460             :                                            offset, refs_to_add, extent_op);
    1461    29590809 :         if ((ret < 0 && ret != -EAGAIN) || !ret)
    1462    16968441 :                 goto out;
    1463             : 
    1464             :         /*
    1465             :          * Ok we had -EAGAIN which means we didn't have space to insert and
    1466             :          * inline extent ref, so just update the reference count and add a
    1467             :          * normal backref.
    1468             :          */
    1469    12622368 :         leaf = path->nodes[0];
    1470    12622368 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    1471    12622545 :         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    1472    12622633 :         refs = btrfs_extent_refs(leaf, item);
    1473    12622638 :         btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
    1474    12622426 :         if (extent_op)
    1475           0 :                 __run_delayed_extent_op(extent_op, leaf, item);
    1476             : 
    1477    12622426 :         btrfs_mark_buffer_dirty(leaf);
    1478    12622857 :         btrfs_release_path(path);
    1479             : 
    1480             :         /* now insert the actual backref */
    1481    12623357 :         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
    1482           0 :                 BUG_ON(refs_to_add != 1);
    1483           0 :                 ret = insert_tree_block_ref(trans, path, bytenr, parent,
    1484             :                                             root_objectid);
    1485             :         } else {
    1486    12623357 :                 ret = insert_extent_data_ref(trans, path, bytenr, parent,
    1487             :                                              root_objectid, owner, offset,
    1488             :                                              refs_to_add);
    1489             :         }
    1490    12623366 :         if (ret)
    1491           0 :                 btrfs_abort_transaction(trans, ret);
    1492    12623366 : out:
    1493    29591807 :         btrfs_free_path(path);
    1494    29591807 :         return ret;
    1495             : }
    1496             : 
    1497    32951749 : static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
    1498             :                                 struct btrfs_delayed_ref_node *node,
    1499             :                                 struct btrfs_delayed_extent_op *extent_op,
    1500             :                                 bool insert_reserved)
    1501             : {
    1502    32951749 :         int ret = 0;
    1503    32951749 :         struct btrfs_delayed_data_ref *ref;
    1504    32951749 :         struct btrfs_key ins;
    1505    32951749 :         u64 parent = 0;
    1506    32951749 :         u64 ref_root = 0;
    1507    32951749 :         u64 flags = 0;
    1508             : 
    1509    32951749 :         ins.objectid = node->bytenr;
    1510    32951749 :         ins.offset = node->num_bytes;
    1511    32951749 :         ins.type = BTRFS_EXTENT_ITEM_KEY;
    1512             : 
    1513    32951749 :         ref = btrfs_delayed_node_to_data_ref(node);
    1514    32951749 :         trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
    1515             : 
    1516    32951726 :         if (node->type == BTRFS_SHARED_DATA_REF_KEY)
    1517    16370428 :                 parent = ref->parent;
    1518    32951726 :         ref_root = ref->root;
    1519             : 
    1520    32951726 :         if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
    1521     2170181 :                 if (extent_op)
    1522           0 :                         flags |= extent_op->flags_to_set;
    1523     2170181 :                 ret = alloc_reserved_file_extent(trans, parent, ref_root,
    1524             :                                                  flags, ref->objectid,
    1525             :                                                  ref->offset, &ins,
    1526             :                                                  node->ref_mod);
    1527    30781545 :         } else if (node->action == BTRFS_ADD_DELAYED_REF) {
    1528    21751370 :                 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
    1529             :                                              ref->objectid, ref->offset,
    1530             :                                              node->ref_mod, extent_op);
    1531     9030175 :         } else if (node->action == BTRFS_DROP_DELAYED_REF) {
    1532     9030175 :                 ret = __btrfs_free_extent(trans, node, parent,
    1533             :                                           ref_root, ref->objectid,
    1534             :                                           ref->offset, node->ref_mod,
    1535             :                                           extent_op);
    1536             :         } else {
    1537           0 :                 BUG();
    1538             :         }
    1539    32951842 :         return ret;
    1540             : }
    1541             : 
    1542       69060 : static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
    1543             :                                     struct extent_buffer *leaf,
    1544             :                                     struct btrfs_extent_item *ei)
    1545             : {
    1546       69060 :         u64 flags = btrfs_extent_flags(leaf, ei);
    1547       69060 :         if (extent_op->update_flags) {
    1548       69060 :                 flags |= extent_op->flags_to_set;
    1549       69060 :                 btrfs_set_extent_flags(leaf, ei, flags);
    1550             :         }
    1551             : 
    1552       69060 :         if (extent_op->update_key) {
    1553           0 :                 struct btrfs_tree_block_info *bi;
    1554           0 :                 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
    1555           0 :                 bi = (struct btrfs_tree_block_info *)(ei + 1);
    1556           0 :                 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
    1557             :         }
    1558       69060 : }
    1559             : 
    1560        6932 : static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
    1561             :                                  struct btrfs_delayed_ref_head *head,
    1562             :                                  struct btrfs_delayed_extent_op *extent_op)
    1563             : {
    1564        6932 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1565        6932 :         struct btrfs_root *root;
    1566        6932 :         struct btrfs_key key;
    1567        6932 :         struct btrfs_path *path;
    1568        6932 :         struct btrfs_extent_item *ei;
    1569        6932 :         struct extent_buffer *leaf;
    1570        6932 :         u32 item_size;
    1571        6932 :         int ret;
    1572        6932 :         int err = 0;
    1573        6932 :         int metadata = 1;
    1574             : 
    1575        6932 :         if (TRANS_ABORTED(trans))
    1576             :                 return 0;
    1577             : 
    1578        6932 :         if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
    1579           0 :                 metadata = 0;
    1580             : 
    1581        6932 :         path = btrfs_alloc_path();
    1582        6932 :         if (!path)
    1583             :                 return -ENOMEM;
    1584             : 
    1585        6932 :         key.objectid = head->bytenr;
    1586             : 
    1587        6932 :         if (metadata) {
    1588        6932 :                 key.type = BTRFS_METADATA_ITEM_KEY;
    1589        6932 :                 key.offset = extent_op->level;
    1590             :         } else {
    1591           0 :                 key.type = BTRFS_EXTENT_ITEM_KEY;
    1592           0 :                 key.offset = head->num_bytes;
    1593             :         }
    1594             : 
    1595        6932 :         root = btrfs_extent_root(fs_info, key.objectid);
    1596        6932 : again:
    1597        6932 :         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
    1598        6932 :         if (ret < 0) {
    1599           0 :                 err = ret;
    1600           0 :                 goto out;
    1601             :         }
    1602        6932 :         if (ret > 0) {
    1603           0 :                 if (metadata) {
    1604           0 :                         if (path->slots[0] > 0) {
    1605           0 :                                 path->slots[0]--;
    1606           0 :                                 btrfs_item_key_to_cpu(path->nodes[0], &key,
    1607             :                                                       path->slots[0]);
    1608           0 :                                 if (key.objectid == head->bytenr &&
    1609           0 :                                     key.type == BTRFS_EXTENT_ITEM_KEY &&
    1610           0 :                                     key.offset == head->num_bytes)
    1611             :                                         ret = 0;
    1612             :                         }
    1613           0 :                         if (ret > 0) {
    1614           0 :                                 btrfs_release_path(path);
    1615           0 :                                 metadata = 0;
    1616             : 
    1617           0 :                                 key.objectid = head->bytenr;
    1618           0 :                                 key.offset = head->num_bytes;
    1619           0 :                                 key.type = BTRFS_EXTENT_ITEM_KEY;
    1620           0 :                                 goto again;
    1621             :                         }
    1622             :                 } else {
    1623           0 :                         err = -EIO;
    1624           0 :                         goto out;
    1625             :                 }
    1626             :         }
    1627             : 
    1628        6932 :         leaf = path->nodes[0];
    1629        6932 :         item_size = btrfs_item_size(leaf, path->slots[0]);
    1630             : 
    1631        6932 :         if (unlikely(item_size < sizeof(*ei))) {
    1632           0 :                 err = -EINVAL;
    1633           0 :                 btrfs_print_v0_err(fs_info);
    1634           0 :                 btrfs_abort_transaction(trans, err);
    1635           0 :                 goto out;
    1636             :         }
    1637             : 
    1638        6932 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    1639        6932 :         __run_delayed_extent_op(extent_op, leaf, ei);
    1640             : 
    1641        6932 :         btrfs_mark_buffer_dirty(leaf);
    1642        6932 : out:
    1643        6932 :         btrfs_free_path(path);
    1644        6932 :         return err;
    1645             : }
    1646             : 
    1647    30852428 : static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
    1648             :                                 struct btrfs_delayed_ref_node *node,
    1649             :                                 struct btrfs_delayed_extent_op *extent_op,
    1650             :                                 bool insert_reserved)
    1651             : {
    1652    30852428 :         int ret = 0;
    1653    30852428 :         struct btrfs_delayed_tree_ref *ref;
    1654    30852428 :         u64 parent = 0;
    1655    30852428 :         u64 ref_root = 0;
    1656             : 
    1657    30852428 :         ref = btrfs_delayed_node_to_tree_ref(node);
    1658    30852428 :         trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
    1659             : 
    1660    30852448 :         if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
    1661    19265821 :                 parent = ref->parent;
    1662    30852448 :         ref_root = ref->root;
    1663             : 
    1664    30852448 :         if (node->ref_mod != 1) {
    1665           0 :                 btrfs_err(trans->fs_info,
    1666             :         "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
    1667             :                           node->bytenr, node->ref_mod, node->action, ref_root,
    1668             :                           parent);
    1669           0 :                 return -EIO;
    1670             :         }
    1671    30852448 :         if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
    1672     7765117 :                 BUG_ON(!extent_op || !extent_op->update_flags);
    1673     7765117 :                 ret = alloc_reserved_tree_block(trans, node, extent_op);
    1674    23087331 :         } else if (node->action == BTRFS_ADD_DELAYED_REF) {
    1675     7840200 :                 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
    1676     7840200 :                                              ref->level, 0, 1, extent_op);
    1677    15247131 :         } else if (node->action == BTRFS_DROP_DELAYED_REF) {
    1678    15247131 :                 ret = __btrfs_free_extent(trans, node, parent, ref_root,
    1679    15247131 :                                           ref->level, 0, 1, extent_op);
    1680             :         } else {
    1681           0 :                 BUG();
    1682             :         }
    1683             :         return ret;
    1684             : }
    1685             : 
    1686             : /* helper function to actually process a single delayed ref entry */
    1687    63804207 : static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
    1688             :                                struct btrfs_delayed_ref_node *node,
    1689             :                                struct btrfs_delayed_extent_op *extent_op,
    1690             :                                bool insert_reserved)
    1691             : {
    1692    63804207 :         int ret = 0;
    1693             : 
    1694    63804207 :         if (TRANS_ABORTED(trans)) {
    1695           0 :                 if (insert_reserved)
    1696           0 :                         btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
    1697           0 :                 return 0;
    1698             :         }
    1699             : 
    1700    63804207 :         if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
    1701             :             node->type == BTRFS_SHARED_BLOCK_REF_KEY)
    1702    30852448 :                 ret = run_delayed_tree_ref(trans, node, extent_op,
    1703             :                                            insert_reserved);
    1704    32951759 :         else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
    1705             :                  node->type == BTRFS_SHARED_DATA_REF_KEY)
    1706    32951759 :                 ret = run_delayed_data_ref(trans, node, extent_op,
    1707             :                                            insert_reserved);
    1708             :         else
    1709           0 :                 BUG();
    1710    63804339 :         if (ret && insert_reserved)
    1711           1 :                 btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
    1712    63804339 :         if (ret < 0)
    1713           1 :                 btrfs_err(trans->fs_info,
    1714             : "failed to run delayed ref for logical %llu num_bytes %llu type %u action %u ref_mod %d: %d",
    1715             :                           node->bytenr, node->num_bytes, node->type,
    1716             :                           node->action, node->ref_mod, ret);
    1717             :         return ret;
    1718             : }
    1719             : 
    1720             : static inline struct btrfs_delayed_ref_node *
    1721             : select_delayed_ref(struct btrfs_delayed_ref_head *head)
    1722             : {
    1723   113833394 :         struct btrfs_delayed_ref_node *ref;
    1724             : 
    1725   113833394 :         if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
    1726             :                 return NULL;
    1727             : 
    1728             :         /*
    1729             :          * Select a delayed ref of type BTRFS_ADD_DELAYED_REF first.
    1730             :          * This is to prevent a ref count from going down to zero, which deletes
    1731             :          * the extent item from the extent tree, when there still are references
    1732             :          * to add, which would fail because they would not find the extent item.
    1733             :          */
    1734    63804344 :         if (!list_empty(&head->ref_add_list))
    1735    39526955 :                 return list_first_entry(&head->ref_add_list,
    1736             :                                 struct btrfs_delayed_ref_node, add_list);
    1737             : 
    1738    24277389 :         ref = rb_entry(rb_first_cached(&head->ref_tree),
    1739             :                        struct btrfs_delayed_ref_node, ref_node);
    1740    24277389 :         ASSERT(list_empty(&ref->add_list));
    1741    24277389 :         return ref;
    1742             : }
    1743             : 
    1744         173 : static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
    1745             :                                       struct btrfs_delayed_ref_head *head)
    1746             : {
    1747         173 :         spin_lock(&delayed_refs->lock);
    1748         173 :         head->processing = false;
    1749         173 :         delayed_refs->num_heads_ready++;
    1750         173 :         spin_unlock(&delayed_refs->lock);
    1751         173 :         btrfs_delayed_ref_unlock(head);
    1752         173 : }
    1753             : 
    1754    50573298 : static struct btrfs_delayed_extent_op *cleanup_extent_op(
    1755             :                                 struct btrfs_delayed_ref_head *head)
    1756             : {
    1757    50573298 :         struct btrfs_delayed_extent_op *extent_op = head->extent_op;
    1758             : 
    1759    50573298 :         if (!extent_op)
    1760             :                 return NULL;
    1761             : 
    1762      585769 :         if (head->must_insert_reserved) {
    1763      578837 :                 head->extent_op = NULL;
    1764      578837 :                 btrfs_free_delayed_extent_op(extent_op);
    1765      578837 :                 return NULL;
    1766             :         }
    1767             :         return extent_op;
    1768             : }
    1769             : 
    1770    50028998 : static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
    1771             :                                      struct btrfs_delayed_ref_head *head)
    1772             : {
    1773    50028998 :         struct btrfs_delayed_extent_op *extent_op;
    1774    50028998 :         int ret;
    1775             : 
    1776    50028998 :         extent_op = cleanup_extent_op(head);
    1777    50029006 :         if (!extent_op)
    1778             :                 return 0;
    1779        6932 :         head->extent_op = NULL;
    1780        6932 :         spin_unlock(&head->lock);
    1781        6932 :         ret = run_delayed_extent_op(trans, head, extent_op);
    1782        6932 :         btrfs_free_delayed_extent_op(extent_op);
    1783        6932 :         return ret ? ret : 1;
    1784             : }
    1785             : 
    1786    50566652 : void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
    1787             :                                   struct btrfs_delayed_ref_root *delayed_refs,
    1788             :                                   struct btrfs_delayed_ref_head *head)
    1789             : {
    1790    50566652 :         int nr_items = 1;       /* Dropping this ref head update. */
    1791             : 
    1792             :         /*
    1793             :          * We had csum deletions accounted for in our delayed refs rsv, we need
    1794             :          * to drop the csum leaves for this update from our delayed_refs_rsv.
    1795             :          */
    1796    50566652 :         if (head->total_ref_mod < 0 && head->is_data) {
    1797     7266493 :                 spin_lock(&delayed_refs->lock);
    1798     7266493 :                 delayed_refs->pending_csums -= head->num_bytes;
    1799     7266493 :                 spin_unlock(&delayed_refs->lock);
    1800     7266493 :                 nr_items += btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes);
    1801             :         }
    1802             : 
    1803    50566652 :         btrfs_delayed_refs_rsv_release(fs_info, nr_items);
    1804    50566360 : }
    1805             : 
    1806    50029008 : static int cleanup_ref_head(struct btrfs_trans_handle *trans,
    1807             :                             struct btrfs_delayed_ref_head *head)
    1808             : {
    1809             : 
    1810    50029008 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1811    50029008 :         struct btrfs_delayed_ref_root *delayed_refs;
    1812    50029008 :         int ret;
    1813             : 
    1814    50029008 :         delayed_refs = &trans->transaction->delayed_refs;
    1815             : 
    1816    50029008 :         ret = run_and_cleanup_extent_op(trans, head);
    1817    50028946 :         if (ret < 0) {
    1818           0 :                 unselect_delayed_ref_head(delayed_refs, head);
    1819           0 :                 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
    1820           0 :                 return ret;
    1821    50028946 :         } else if (ret) {
    1822             :                 return ret;
    1823             :         }
    1824             : 
    1825             :         /*
    1826             :          * Need to drop our head ref lock and re-acquire the delayed ref lock
    1827             :          * and then re-check to make sure nobody got added.
    1828             :          */
    1829    50022021 :         spin_unlock(&head->lock);
    1830    50022228 :         spin_lock(&delayed_refs->lock);
    1831    50022382 :         spin_lock(&head->lock);
    1832    50022380 :         if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) {
    1833          11 :                 spin_unlock(&head->lock);
    1834          11 :                 spin_unlock(&delayed_refs->lock);
    1835          11 :                 return 1;
    1836             :         }
    1837    50022369 :         btrfs_delete_ref_head(delayed_refs, head);
    1838    50022370 :         spin_unlock(&head->lock);
    1839    50022370 :         spin_unlock(&delayed_refs->lock);
    1840             : 
    1841    50022369 :         if (head->must_insert_reserved) {
    1842     1677665 :                 btrfs_pin_extent(trans, head->bytenr, head->num_bytes, 1);
    1843     1677665 :                 if (head->is_data) {
    1844     1643086 :                         struct btrfs_root *csum_root;
    1845             : 
    1846     1643086 :                         csum_root = btrfs_csum_root(fs_info, head->bytenr);
    1847     1643086 :                         ret = btrfs_del_csums(trans, csum_root, head->bytenr,
    1848             :                                               head->num_bytes);
    1849             :                 }
    1850             :         }
    1851             : 
    1852    50022369 :         btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
    1853             : 
    1854    50022050 :         trace_run_delayed_ref_head(fs_info, head, 0);
    1855    50022002 :         btrfs_delayed_ref_unlock(head);
    1856    50022207 :         btrfs_put_delayed_ref_head(head);
    1857    50022207 :         return ret;
    1858             : }
    1859             : 
    1860    51038245 : static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
    1861             :                                         struct btrfs_trans_handle *trans)
    1862             : {
    1863    51038245 :         struct btrfs_delayed_ref_root *delayed_refs =
    1864    51038245 :                 &trans->transaction->delayed_refs;
    1865    51038245 :         struct btrfs_delayed_ref_head *head = NULL;
    1866    51038245 :         int ret;
    1867             : 
    1868    51038245 :         spin_lock(&delayed_refs->lock);
    1869    51038956 :         head = btrfs_select_ref_head(delayed_refs);
    1870    51038956 :         if (!head) {
    1871     1016412 :                 spin_unlock(&delayed_refs->lock);
    1872     1016412 :                 return head;
    1873             :         }
    1874             : 
    1875             :         /*
    1876             :          * Grab the lock that says we are going to process all the refs for
    1877             :          * this head
    1878             :          */
    1879    50022544 :         ret = btrfs_delayed_ref_lock(delayed_refs, head);
    1880    50022544 :         spin_unlock(&delayed_refs->lock);
    1881             : 
    1882             :         /*
    1883             :          * We may have dropped the spin lock to get the head mutex lock, and
    1884             :          * that might have given someone else time to free the head.  If that's
    1885             :          * true, it has been removed from our list and we can move on.
    1886             :          */
    1887    50022542 :         if (ret == -EAGAIN)
    1888           0 :                 head = ERR_PTR(-EAGAIN);
    1889             : 
    1890             :         return head;
    1891             : }
    1892             : 
    1893    50029382 : static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
    1894             :                                            struct btrfs_delayed_ref_head *locked_ref)
    1895             : {
    1896    50029382 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1897    50029382 :         struct btrfs_delayed_ref_root *delayed_refs;
    1898    50029382 :         struct btrfs_delayed_extent_op *extent_op;
    1899    50029382 :         struct btrfs_delayed_ref_node *ref;
    1900    50029382 :         bool must_insert_reserved;
    1901    50029382 :         int ret;
    1902             : 
    1903    50029382 :         delayed_refs = &trans->transaction->delayed_refs;
    1904             : 
    1905    50029382 :         lockdep_assert_held(&locked_ref->mutex);
    1906    50029382 :         lockdep_assert_held(&locked_ref->lock);
    1907             : 
    1908   177637738 :         while ((ref = select_delayed_ref(locked_ref))) {
    1909    64754269 :                 if (ref->seq &&
    1910      949925 :                     btrfs_check_delayed_seq(fs_info, ref->seq)) {
    1911         172 :                         spin_unlock(&locked_ref->lock);
    1912         172 :                         unselect_delayed_ref_head(delayed_refs, locked_ref);
    1913         172 :                         return -EAGAIN;
    1914             :                 }
    1915             : 
    1916    63804172 :                 rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
    1917    63804020 :                 RB_CLEAR_NODE(&ref->ref_node);
    1918    63804020 :                 if (!list_empty(&ref->add_list))
    1919    39526713 :                         list_del(&ref->add_list);
    1920             :                 /*
    1921             :                  * When we play the delayed ref, also correct the ref_mod on
    1922             :                  * head
    1923             :                  */
    1924    63803923 :                 switch (ref->action) {
    1925    39526628 :                 case BTRFS_ADD_DELAYED_REF:
    1926             :                 case BTRFS_ADD_DELAYED_EXTENT:
    1927    39526628 :                         locked_ref->ref_mod -= ref->ref_mod;
    1928    39526628 :                         break;
    1929    24277295 :                 case BTRFS_DROP_DELAYED_REF:
    1930    24277295 :                         locked_ref->ref_mod += ref->ref_mod;
    1931    24277295 :                         break;
    1932             :                 default:
    1933           0 :                         WARN_ON(1);
    1934             :                 }
    1935    63803923 :                 atomic_dec(&delayed_refs->num_entries);
    1936             : 
    1937             :                 /*
    1938             :                  * Record the must_insert_reserved flag before we drop the
    1939             :                  * spin lock.
    1940             :                  */
    1941    63804187 :                 must_insert_reserved = locked_ref->must_insert_reserved;
    1942    63804187 :                 locked_ref->must_insert_reserved = false;
    1943             : 
    1944    63804187 :                 extent_op = locked_ref->extent_op;
    1945    63804187 :                 locked_ref->extent_op = NULL;
    1946    63804187 :                 spin_unlock(&locked_ref->lock);
    1947             : 
    1948    63804179 :                 ret = run_one_delayed_ref(trans, ref, extent_op,
    1949             :                                           must_insert_reserved);
    1950             : 
    1951    63804261 :                 btrfs_free_delayed_extent_op(extent_op);
    1952    63804246 :                 if (ret) {
    1953           1 :                         unselect_delayed_ref_head(delayed_refs, locked_ref);
    1954           1 :                         btrfs_put_delayed_ref(ref);
    1955           1 :                         return ret;
    1956             :                 }
    1957             : 
    1958    63804245 :                 btrfs_put_delayed_ref(ref);
    1959    63804227 :                 cond_resched();
    1960             : 
    1961    63804197 :                 spin_lock(&locked_ref->lock);
    1962    63804090 :                 btrfs_merge_delayed_refs(fs_info, delayed_refs, locked_ref);
    1963             :         }
    1964             : 
    1965             :         return 0;
    1966             : }
    1967             : 
    1968             : /*
    1969             :  * Returns 0 on success or if called with an already aborted transaction.
    1970             :  * Returns -ENOMEM or -EIO on failure and will abort the transaction.
    1971             :  */
    1972     4403876 : static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
    1973             :                                              unsigned long nr)
    1974             : {
    1975     4403876 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1976     4403876 :         struct btrfs_delayed_ref_root *delayed_refs;
    1977     4403876 :         struct btrfs_delayed_ref_head *locked_ref = NULL;
    1978     4403876 :         int ret;
    1979     4403876 :         unsigned long count = 0;
    1980             : 
    1981     4403876 :         delayed_refs = &trans->transaction->delayed_refs;
    1982    51045293 :         do {
    1983    51045293 :                 if (!locked_ref) {
    1984    51038386 :                         locked_ref = btrfs_obtain_ref_head(trans);
    1985   101061496 :                         if (IS_ERR_OR_NULL(locked_ref)) {
    1986     1016412 :                                 if (PTR_ERR(locked_ref) == -EAGAIN) {
    1987           0 :                                         continue;
    1988             :                                 } else {
    1989             :                                         break;
    1990             :                                 }
    1991             :                         }
    1992    50022542 :                         count++;
    1993             :                 }
    1994             :                 /*
    1995             :                  * We need to try and merge add/drops of the same ref since we
    1996             :                  * can run into issues with relocate dropping the implicit ref
    1997             :                  * and then it being added back again before the drop can
    1998             :                  * finish.  If we merged anything we need to re-loop so we can
    1999             :                  * get a good ref.
    2000             :                  * Or we can get node references of the same type that weren't
    2001             :                  * merged when created due to bumps in the tree mod seq, and
    2002             :                  * we need to merge them to prevent adding an inline extent
    2003             :                  * backref before dropping it (triggering a BUG_ON at
    2004             :                  * insert_inline_extent_backref()).
    2005             :                  */
    2006    50029449 :                 spin_lock(&locked_ref->lock);
    2007    50029465 :                 btrfs_merge_delayed_refs(fs_info, delayed_refs, locked_ref);
    2008             : 
    2009    50029381 :                 ret = btrfs_run_delayed_refs_for_head(trans, locked_ref);
    2010    50029188 :                 if (ret < 0 && ret != -EAGAIN) {
    2011             :                         /*
    2012             :                          * Error, btrfs_run_delayed_refs_for_head already
    2013             :                          * unlocked everything so just bail out
    2014             :                          */
    2015           1 :                         return ret;
    2016    50029187 :                 } else if (!ret) {
    2017             :                         /*
    2018             :                          * Success, perform the usual cleanup of a processed
    2019             :                          * head
    2020             :                          */
    2021    50029015 :                         ret = cleanup_ref_head(trans, locked_ref);
    2022    50028662 :                         if (ret > 0 ) {
    2023             :                                 /* We dropped our lock, we need to loop. */
    2024        6943 :                                 ret = 0;
    2025        6943 :                                 continue;
    2026    50021719 :                         } else if (ret) {
    2027           0 :                                 return ret;
    2028             :                         }
    2029             :                 }
    2030             : 
    2031             :                 /*
    2032             :                  * Either success case or btrfs_run_delayed_refs_for_head
    2033             :                  * returned -EAGAIN, meaning we need to select another head
    2034             :                  */
    2035             : 
    2036    50021891 :                 locked_ref = NULL;
    2037    50021891 :                 cond_resched();
    2038    50028885 :         } while ((nr != -1 && count < nr) || locked_ref);
    2039             : 
    2040             :         return 0;
    2041             : }
    2042             : 
    2043             : #ifdef SCRAMBLE_DELAYED_REFS
    2044             : /*
    2045             :  * Normally delayed refs get processed in ascending bytenr order. This
    2046             :  * correlates in most cases to the order added. To expose dependencies on this
    2047             :  * order, we start to process the tree in the middle instead of the beginning
    2048             :  */
    2049             : static u64 find_middle(struct rb_root *root)
    2050             : {
    2051             :         struct rb_node *n = root->rb_node;
    2052             :         struct btrfs_delayed_ref_node *entry;
    2053             :         int alt = 1;
    2054             :         u64 middle;
    2055             :         u64 first = 0, last = 0;
    2056             : 
    2057             :         n = rb_first(root);
    2058             :         if (n) {
    2059             :                 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
    2060             :                 first = entry->bytenr;
    2061             :         }
    2062             :         n = rb_last(root);
    2063             :         if (n) {
    2064             :                 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
    2065             :                 last = entry->bytenr;
    2066             :         }
    2067             :         n = root->rb_node;
    2068             : 
    2069             :         while (n) {
    2070             :                 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
    2071             :                 WARN_ON(!entry->in_tree);
    2072             : 
    2073             :                 middle = entry->bytenr;
    2074             : 
    2075             :                 if (alt)
    2076             :                         n = n->rb_left;
    2077             :                 else
    2078             :                         n = n->rb_right;
    2079             : 
    2080             :                 alt = 1 - alt;
    2081             :         }
    2082             :         return middle;
    2083             : }
    2084             : #endif
    2085             : 
    2086             : /*
    2087             :  * this starts processing the delayed reference count updates and
    2088             :  * extent insertions we have queued up so far.  count can be
    2089             :  * 0, which means to process everything in the tree at the start
    2090             :  * of the run (but not newly added entries), or it can be some target
    2091             :  * number you'd like to process.
    2092             :  *
    2093             :  * Returns 0 on success or if called with an aborted transaction
    2094             :  * Returns <0 on error and aborts the transaction
    2095             :  */
    2096     2420353 : int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
    2097             :                            unsigned long count)
    2098             : {
    2099     2420353 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    2100     2420353 :         struct rb_node *node;
    2101     2420353 :         struct btrfs_delayed_ref_root *delayed_refs;
    2102     2420353 :         struct btrfs_delayed_ref_head *head;
    2103     2420353 :         int ret;
    2104     2420353 :         int run_all = count == (unsigned long)-1;
    2105             : 
    2106             :         /* We'll clean this up in btrfs_cleanup_transaction */
    2107     2420353 :         if (TRANS_ABORTED(trans))
    2108             :                 return 0;
    2109             : 
    2110     4840704 :         if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
    2111             :                 return 0;
    2112             : 
    2113     2420352 :         delayed_refs = &trans->transaction->delayed_refs;
    2114     2420352 :         if (count == 0)
    2115      595194 :                 count = delayed_refs->num_heads_ready;
    2116             : 
    2117     2420352 : again:
    2118             : #ifdef SCRAMBLE_DELAYED_REFS
    2119             :         delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
    2120             : #endif
    2121     4403878 :         ret = __btrfs_run_delayed_refs(trans, count);
    2122     4403879 :         if (ret < 0) {
    2123           1 :                 btrfs_abort_transaction(trans, ret);
    2124           1 :                 return ret;
    2125             :         }
    2126             : 
    2127     4403878 :         if (run_all) {
    2128     2891497 :                 btrfs_create_pending_block_groups(trans);
    2129             : 
    2130     2891497 :                 spin_lock(&delayed_refs->lock);
    2131     2891497 :                 node = rb_first_cached(&delayed_refs->href_root);
    2132     2891497 :                 if (!node) {
    2133      907971 :                         spin_unlock(&delayed_refs->lock);
    2134      907971 :                         goto out;
    2135             :                 }
    2136     1983526 :                 head = rb_entry(node, struct btrfs_delayed_ref_head,
    2137             :                                 href_node);
    2138     1983526 :                 refcount_inc(&head->refs);
    2139     1983526 :                 spin_unlock(&delayed_refs->lock);
    2140             : 
    2141             :                 /* Mutex was contended, block until it's released and retry. */
    2142     1983526 :                 mutex_lock(&head->mutex);
    2143     1983526 :                 mutex_unlock(&head->mutex);
    2144             : 
    2145     1983526 :                 btrfs_put_delayed_ref_head(head);
    2146     1983526 :                 cond_resched();
    2147     1983526 :                 goto again;
    2148             :         }
    2149     1512381 : out:
    2150             :         return 0;
    2151             : }
    2152             : 
    2153       69362 : int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
    2154             :                                 struct extent_buffer *eb, u64 flags)
    2155             : {
    2156       69362 :         struct btrfs_delayed_extent_op *extent_op;
    2157       69362 :         int level = btrfs_header_level(eb);
    2158       69362 :         int ret;
    2159             : 
    2160       69362 :         extent_op = btrfs_alloc_delayed_extent_op();
    2161       69362 :         if (!extent_op)
    2162             :                 return -ENOMEM;
    2163             : 
    2164       69362 :         extent_op->flags_to_set = flags;
    2165       69362 :         extent_op->update_flags = true;
    2166       69362 :         extent_op->update_key = false;
    2167       69362 :         extent_op->level = level;
    2168             : 
    2169       69362 :         ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len, extent_op);
    2170       69362 :         if (ret)
    2171           0 :                 btrfs_free_delayed_extent_op(extent_op);
    2172             :         return ret;
    2173             : }
    2174             : 
    2175      351150 : static noinline int check_delayed_ref(struct btrfs_root *root,
    2176             :                                       struct btrfs_path *path,
    2177             :                                       u64 objectid, u64 offset, u64 bytenr)
    2178             : {
    2179      351150 :         struct btrfs_delayed_ref_head *head;
    2180      351150 :         struct btrfs_delayed_ref_node *ref;
    2181      351150 :         struct btrfs_delayed_data_ref *data_ref;
    2182      351150 :         struct btrfs_delayed_ref_root *delayed_refs;
    2183      351150 :         struct btrfs_transaction *cur_trans;
    2184      351150 :         struct rb_node *node;
    2185      351150 :         int ret = 0;
    2186             : 
    2187      351150 :         spin_lock(&root->fs_info->trans_lock);
    2188      351152 :         cur_trans = root->fs_info->running_transaction;
    2189      351152 :         if (cur_trans)
    2190      349544 :                 refcount_inc(&cur_trans->use_count);
    2191      351152 :         spin_unlock(&root->fs_info->trans_lock);
    2192      351152 :         if (!cur_trans)
    2193             :                 return 0;
    2194             : 
    2195      349544 :         delayed_refs = &cur_trans->delayed_refs;
    2196      349544 :         spin_lock(&delayed_refs->lock);
    2197      349544 :         head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
    2198      349544 :         if (!head) {
    2199       52532 :                 spin_unlock(&delayed_refs->lock);
    2200       52532 :                 btrfs_put_transaction(cur_trans);
    2201       52532 :                 return 0;
    2202             :         }
    2203             : 
    2204      297012 :         if (!mutex_trylock(&head->mutex)) {
    2205           1 :                 if (path->nowait) {
    2206           0 :                         spin_unlock(&delayed_refs->lock);
    2207           0 :                         btrfs_put_transaction(cur_trans);
    2208           0 :                         return -EAGAIN;
    2209             :                 }
    2210             : 
    2211           1 :                 refcount_inc(&head->refs);
    2212           1 :                 spin_unlock(&delayed_refs->lock);
    2213             : 
    2214           1 :                 btrfs_release_path(path);
    2215             : 
    2216             :                 /*
    2217             :                  * Mutex was contended, block until it's released and let
    2218             :                  * caller try again
    2219             :                  */
    2220           1 :                 mutex_lock(&head->mutex);
    2221           1 :                 mutex_unlock(&head->mutex);
    2222           1 :                 btrfs_put_delayed_ref_head(head);
    2223           1 :                 btrfs_put_transaction(cur_trans);
    2224           1 :                 return -EAGAIN;
    2225             :         }
    2226      297011 :         spin_unlock(&delayed_refs->lock);
    2227             : 
    2228      297011 :         spin_lock(&head->lock);
    2229             :         /*
    2230             :          * XXX: We should replace this with a proper search function in the
    2231             :          * future.
    2232             :          */
    2233      556852 :         for (node = rb_first_cached(&head->ref_tree); node;
    2234      259841 :              node = rb_next(node)) {
    2235      324113 :                 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
    2236             :                 /* If it's a shared ref we know a cross reference exists */
    2237      324113 :                 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
    2238             :                         ret = 1;
    2239             :                         break;
    2240             :                 }
    2241             : 
    2242      324113 :                 data_ref = btrfs_delayed_node_to_data_ref(ref);
    2243             : 
    2244             :                 /*
    2245             :                  * If our ref doesn't match the one we're currently looking at
    2246             :                  * then we have a cross reference.
    2247             :                  */
    2248      324113 :                 if (data_ref->root != root->root_key.objectid ||
    2249      324113 :                     data_ref->objectid != objectid ||
    2250      324037 :                     data_ref->offset != offset) {
    2251             :                         ret = 1;
    2252             :                         break;
    2253             :                 }
    2254             :         }
    2255      297011 :         spin_unlock(&head->lock);
    2256      297011 :         mutex_unlock(&head->mutex);
    2257      297011 :         btrfs_put_transaction(cur_trans);
    2258      297011 :         return ret;
    2259             : }
    2260             : 
    2261      356499 : static noinline int check_committed_ref(struct btrfs_root *root,
    2262             :                                         struct btrfs_path *path,
    2263             :                                         u64 objectid, u64 offset, u64 bytenr,
    2264             :                                         bool strict)
    2265             : {
    2266      356499 :         struct btrfs_fs_info *fs_info = root->fs_info;
    2267      356499 :         struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bytenr);
    2268      356498 :         struct extent_buffer *leaf;
    2269      356498 :         struct btrfs_extent_data_ref *ref;
    2270      356498 :         struct btrfs_extent_inline_ref *iref;
    2271      356498 :         struct btrfs_extent_item *ei;
    2272      356498 :         struct btrfs_key key;
    2273      356498 :         u32 item_size;
    2274      356498 :         int type;
    2275      356498 :         int ret;
    2276             : 
    2277      356498 :         key.objectid = bytenr;
    2278      356498 :         key.offset = (u64)-1;
    2279      356498 :         key.type = BTRFS_EXTENT_ITEM_KEY;
    2280             : 
    2281      356498 :         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
    2282      356499 :         if (ret < 0)
    2283           0 :                 goto out;
    2284      356499 :         BUG_ON(ret == 0); /* Corruption */
    2285             : 
    2286      356499 :         ret = -ENOENT;
    2287      356499 :         if (path->slots[0] == 0)
    2288        2219 :                 goto out;
    2289             : 
    2290      354280 :         path->slots[0]--;
    2291      354280 :         leaf = path->nodes[0];
    2292      354280 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    2293             : 
    2294      354281 :         if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
    2295      263747 :                 goto out;
    2296             : 
    2297       90534 :         ret = 1;
    2298       90534 :         item_size = btrfs_item_size(leaf, path->slots[0]);
    2299       90534 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    2300             : 
    2301             :         /* If extent item has more than 1 inline ref then it's shared */
    2302       90534 :         if (item_size != sizeof(*ei) +
    2303             :             btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
    2304        5249 :                 goto out;
    2305             : 
    2306             :         /*
    2307             :          * If extent created before last snapshot => it's shared unless the
    2308             :          * snapshot has been deleted. Use the heuristic if strict is false.
    2309             :          */
    2310      170556 :         if (!strict &&
    2311             :             (btrfs_extent_generation(leaf, ei) <=
    2312             :              btrfs_root_last_snapshot(&root->root_item)))
    2313           0 :                 goto out;
    2314             : 
    2315       85285 :         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
    2316             : 
    2317             :         /* If this extent has SHARED_DATA_REF then it's shared */
    2318       85285 :         type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
    2319       85285 :         if (type != BTRFS_EXTENT_DATA_REF_KEY)
    2320           0 :                 goto out;
    2321             : 
    2322       85285 :         ref = (struct btrfs_extent_data_ref *)(&iref->offset);
    2323       85285 :         if (btrfs_extent_refs(leaf, ei) !=
    2324       85285 :             btrfs_extent_data_ref_count(leaf, ref) ||
    2325             :             btrfs_extent_data_ref_root(leaf, ref) !=
    2326      170562 :             root->root_key.objectid ||
    2327       85269 :             btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
    2328             :             btrfs_extent_data_ref_offset(leaf, ref) != offset)
    2329          99 :                 goto out;
    2330             : 
    2331             :         ret = 0;
    2332      356500 : out:
    2333      356500 :         return ret;
    2334             : }
    2335             : 
    2336      356497 : int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
    2337             :                           u64 bytenr, bool strict, struct btrfs_path *path)
    2338             : {
    2339      356498 :         int ret;
    2340             : 
    2341      356498 :         do {
    2342      356498 :                 ret = check_committed_ref(root, path, objectid,
    2343             :                                           offset, bytenr, strict);
    2344      356500 :                 if (ret && ret != -ENOENT)
    2345        5348 :                         goto out;
    2346             : 
    2347      351152 :                 ret = check_delayed_ref(root, path, objectid, offset, bytenr);
    2348      351151 :         } while (ret == -EAGAIN);
    2349             : 
    2350      351150 : out:
    2351      356498 :         btrfs_release_path(path);
    2352      356499 :         if (btrfs_is_data_reloc_root(root))
    2353       22096 :                 WARN_ON(ret > 0);
    2354      356499 :         return ret;
    2355             : }
    2356             : 
    2357     4049789 : static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
    2358             :                            struct btrfs_root *root,
    2359             :                            struct extent_buffer *buf,
    2360             :                            int full_backref, int inc)
    2361             : {
    2362     4049789 :         struct btrfs_fs_info *fs_info = root->fs_info;
    2363     4049789 :         u64 bytenr;
    2364     4049789 :         u64 num_bytes;
    2365     4049789 :         u64 parent;
    2366     4049789 :         u64 ref_root;
    2367     4049789 :         u32 nritems;
    2368     4049789 :         struct btrfs_key key;
    2369     4049789 :         struct btrfs_file_extent_item *fi;
    2370     4049789 :         struct btrfs_ref generic_ref = { 0 };
    2371     4049789 :         bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC);
    2372     4049789 :         int i;
    2373     4049789 :         int action;
    2374     4049789 :         int level;
    2375     4049789 :         int ret = 0;
    2376             : 
    2377     4049789 :         if (btrfs_is_testing(fs_info))
    2378             :                 return 0;
    2379             : 
    2380     4049789 :         ref_root = btrfs_header_owner(buf);
    2381     4049789 :         nritems = btrfs_header_nritems(buf);
    2382     4049789 :         level = btrfs_header_level(buf);
    2383             : 
    2384     4049789 :         if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && level == 0)
    2385             :                 return 0;
    2386             : 
    2387     4049789 :         if (full_backref)
    2388     4014752 :                 parent = buf->start;
    2389             :         else
    2390             :                 parent = 0;
    2391     4049789 :         if (inc)
    2392             :                 action = BTRFS_ADD_DELAYED_REF;
    2393             :         else
    2394     1995949 :                 action = BTRFS_DROP_DELAYED_REF;
    2395             : 
    2396    97797725 :         for (i = 0; i < nritems; i++) {
    2397    93747936 :                 if (level == 0) {
    2398    85850356 :                         btrfs_item_key_to_cpu(buf, &key, i);
    2399    85850453 :                         if (key.type != BTRFS_EXTENT_DATA_KEY)
    2400    68607759 :                                 continue;
    2401    17242694 :                         fi = btrfs_item_ptr(buf, i,
    2402             :                                             struct btrfs_file_extent_item);
    2403    17242696 :                         if (btrfs_file_extent_type(buf, fi) ==
    2404             :                             BTRFS_FILE_EXTENT_INLINE)
    2405      120700 :                                 continue;
    2406    17121997 :                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
    2407    17121991 :                         if (bytenr == 0)
    2408        3253 :                                 continue;
    2409             : 
    2410    17118738 :                         num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
    2411    17118738 :                         key.offset -= btrfs_file_extent_offset(buf, fi);
    2412    17118744 :                         btrfs_init_generic_ref(&generic_ref, action, bytenr,
    2413             :                                                num_bytes, parent);
    2414    17118744 :                         btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
    2415             :                                             key.offset, root->root_key.objectid,
    2416             :                                             for_reloc);
    2417    17118753 :                         if (inc)
    2418     9186654 :                                 ret = btrfs_inc_extent_ref(trans, &generic_ref);
    2419             :                         else
    2420     7932099 :                                 ret = btrfs_free_extent(trans, &generic_ref);
    2421    17118759 :                         if (ret)
    2422           0 :                                 goto fail;
    2423             :                 } else {
    2424     7897580 :                         bytenr = btrfs_node_blockptr(buf, i);
    2425     7897580 :                         num_bytes = fs_info->nodesize;
    2426     7897580 :                         btrfs_init_generic_ref(&generic_ref, action, bytenr,
    2427             :                                                num_bytes, parent);
    2428     7897580 :                         btrfs_init_tree_ref(&generic_ref, level - 1, ref_root,
    2429             :                                             root->root_key.objectid, for_reloc);
    2430     7897580 :                         if (inc)
    2431     7782412 :                                 ret = btrfs_inc_extent_ref(trans, &generic_ref);
    2432             :                         else
    2433      115168 :                                 ret = btrfs_free_extent(trans, &generic_ref);
    2434     7897465 :                         if (ret)
    2435           0 :                                 goto fail;
    2436             :                 }
    2437             :         }
    2438             :         return 0;
    2439             : fail:
    2440             :         return ret;
    2441             : }
    2442             : 
    2443     2053840 : int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
    2444             :                   struct extent_buffer *buf, int full_backref)
    2445             : {
    2446     2053840 :         return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
    2447             : }
    2448             : 
    2449       33999 : int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
    2450             :                   struct extent_buffer *buf, int full_backref)
    2451             : {
    2452       33999 :         return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
    2453             : }
    2454             : 
    2455    13261791 : static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
    2456             : {
    2457    13261791 :         struct btrfs_fs_info *fs_info = root->fs_info;
    2458    13261791 :         u64 flags;
    2459    13261791 :         u64 ret;
    2460             : 
    2461    13261791 :         if (data)
    2462             :                 flags = BTRFS_BLOCK_GROUP_DATA;
    2463     9449709 :         else if (root == fs_info->chunk_root)
    2464             :                 flags = BTRFS_BLOCK_GROUP_SYSTEM;
    2465             :         else
    2466     9448246 :                 flags = BTRFS_BLOCK_GROUP_METADATA;
    2467             : 
    2468    13261791 :         ret = btrfs_get_alloc_profile(fs_info, flags);
    2469    13262385 :         return ret;
    2470             : }
    2471             : 
    2472    13265745 : static u64 first_logical_byte(struct btrfs_fs_info *fs_info)
    2473             : {
    2474    13265745 :         struct rb_node *leftmost;
    2475    13265745 :         u64 bytenr = 0;
    2476             : 
    2477    13265745 :         read_lock(&fs_info->block_group_cache_lock);
    2478             :         /* Get the block group with the lowest logical start address. */
    2479    13266506 :         leftmost = rb_first_cached(&fs_info->block_group_cache_tree);
    2480    13266506 :         if (leftmost) {
    2481    13266506 :                 struct btrfs_block_group *bg;
    2482             : 
    2483    13266506 :                 bg = rb_entry(leftmost, struct btrfs_block_group, cache_node);
    2484    13266506 :                 bytenr = bg->start;
    2485             :         }
    2486    13266506 :         read_unlock(&fs_info->block_group_cache_lock);
    2487             : 
    2488    13266833 :         return bytenr;
    2489             : }
    2490             : 
    2491     3289078 : static int pin_down_extent(struct btrfs_trans_handle *trans,
    2492             :                            struct btrfs_block_group *cache,
    2493             :                            u64 bytenr, u64 num_bytes, int reserved)
    2494             : {
    2495     3289078 :         struct btrfs_fs_info *fs_info = cache->fs_info;
    2496             : 
    2497     3289078 :         spin_lock(&cache->space_info->lock);
    2498     3289087 :         spin_lock(&cache->lock);
    2499     3289088 :         cache->pinned += num_bytes;
    2500     3289088 :         btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info,
    2501             :                                              num_bytes);
    2502     3289089 :         if (reserved) {
    2503     3283853 :                 cache->reserved -= num_bytes;
    2504     3283853 :                 cache->space_info->bytes_reserved -= num_bytes;
    2505             :         }
    2506     3289089 :         spin_unlock(&cache->lock);
    2507     3289093 :         spin_unlock(&cache->space_info->lock);
    2508             : 
    2509     3289087 :         set_extent_bit(&trans->transaction->pinned_extents, bytenr,
    2510     3289087 :                        bytenr + num_bytes - 1, EXTENT_DIRTY, NULL);
    2511     3289096 :         return 0;
    2512             : }
    2513             : 
    2514     1677666 : int btrfs_pin_extent(struct btrfs_trans_handle *trans,
    2515             :                      u64 bytenr, u64 num_bytes, int reserved)
    2516             : {
    2517     1677666 :         struct btrfs_block_group *cache;
    2518             : 
    2519     1677666 :         cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
    2520     1677666 :         BUG_ON(!cache); /* Logic error */
    2521             : 
    2522     1677666 :         pin_down_extent(trans, cache, bytenr, num_bytes, reserved);
    2523             : 
    2524     1677666 :         btrfs_put_block_group(cache);
    2525     1677666 :         return 0;
    2526             : }
    2527             : 
    2528             : /*
    2529             :  * this function must be called within transaction
    2530             :  */
    2531        5236 : int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
    2532             :                                     u64 bytenr, u64 num_bytes)
    2533             : {
    2534        5236 :         struct btrfs_block_group *cache;
    2535        5236 :         int ret;
    2536             : 
    2537        5236 :         cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
    2538        5236 :         if (!cache)
    2539             :                 return -EINVAL;
    2540             : 
    2541             :         /*
    2542             :          * Fully cache the free space first so that our pin removes the free space
    2543             :          * from the cache.
    2544             :          */
    2545        5236 :         ret = btrfs_cache_block_group(cache, true);
    2546        5236 :         if (ret)
    2547           0 :                 goto out;
    2548             : 
    2549        5236 :         pin_down_extent(trans, cache, bytenr, num_bytes, 0);
    2550             : 
    2551             :         /* remove us from the free space cache (if we're there at all) */
    2552        5236 :         ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
    2553        5236 : out:
    2554        5236 :         btrfs_put_block_group(cache);
    2555        5236 :         return ret;
    2556             : }
    2557             : 
    2558       84010 : static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
    2559             :                                    u64 start, u64 num_bytes)
    2560             : {
    2561       84010 :         int ret;
    2562       84010 :         struct btrfs_block_group *block_group;
    2563             : 
    2564       84010 :         block_group = btrfs_lookup_block_group(fs_info, start);
    2565       84010 :         if (!block_group)
    2566             :                 return -EINVAL;
    2567             : 
    2568       84010 :         ret = btrfs_cache_block_group(block_group, true);
    2569       84010 :         if (ret)
    2570           0 :                 goto out;
    2571             : 
    2572       84010 :         ret = btrfs_remove_free_space(block_group, start, num_bytes);
    2573       84010 : out:
    2574       84010 :         btrfs_put_block_group(block_group);
    2575       84010 :         return ret;
    2576             : }
    2577             : 
    2578        4681 : int btrfs_exclude_logged_extents(struct extent_buffer *eb)
    2579             : {
    2580        4681 :         struct btrfs_fs_info *fs_info = eb->fs_info;
    2581        4681 :         struct btrfs_file_extent_item *item;
    2582        4681 :         struct btrfs_key key;
    2583        4681 :         int found_type;
    2584        4681 :         int i;
    2585        4681 :         int ret = 0;
    2586             : 
    2587        4681 :         if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
    2588             :                 return 0;
    2589             : 
    2590           0 :         for (i = 0; i < btrfs_header_nritems(eb); i++) {
    2591           0 :                 btrfs_item_key_to_cpu(eb, &key, i);
    2592           0 :                 if (key.type != BTRFS_EXTENT_DATA_KEY)
    2593           0 :                         continue;
    2594           0 :                 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
    2595           0 :                 found_type = btrfs_file_extent_type(eb, item);
    2596           0 :                 if (found_type == BTRFS_FILE_EXTENT_INLINE)
    2597           0 :                         continue;
    2598           0 :                 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
    2599           0 :                         continue;
    2600           0 :                 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
    2601           0 :                 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
    2602           0 :                 ret = __exclude_logged_extent(fs_info, key.objectid, key.offset);
    2603           0 :                 if (ret)
    2604             :                         break;
    2605             :         }
    2606             : 
    2607             :         return ret;
    2608             : }
    2609             : 
    2610             : static void
    2611             : btrfs_inc_block_group_reservations(struct btrfs_block_group *bg)
    2612             : {
    2613    13262515 :         atomic_inc(&bg->reservations);
    2614             : }
    2615             : 
    2616             : /*
    2617             :  * Returns the free cluster for the given space info and sets empty_cluster to
    2618             :  * what it should be based on the mount options.
    2619             :  */
    2620             : static struct btrfs_free_cluster *
    2621    17207163 : fetch_cluster_info(struct btrfs_fs_info *fs_info,
    2622             :                    struct btrfs_space_info *space_info, u64 *empty_cluster)
    2623             : {
    2624    17207163 :         struct btrfs_free_cluster *ret = NULL;
    2625             : 
    2626    17207163 :         *empty_cluster = 0;
    2627    17207163 :         if (btrfs_mixed_space_info(space_info))
    2628             :                 return ret;
    2629             : 
    2630    16467603 :         if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
    2631    12548984 :                 ret = &fs_info->meta_alloc_cluster;
    2632    12548984 :                 if (btrfs_test_opt(fs_info, SSD))
    2633        6609 :                         *empty_cluster = SZ_2M;
    2634             :                 else
    2635    12542375 :                         *empty_cluster = SZ_64K;
    2636     3918619 :         } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
    2637     3915696 :                    btrfs_test_opt(fs_info, SSD_SPREAD)) {
    2638           0 :                 *empty_cluster = SZ_2M;
    2639           0 :                 ret = &fs_info->data_alloc_cluster;
    2640             :         }
    2641             : 
    2642             :         return ret;
    2643             : }
    2644             : 
    2645     3941114 : static int unpin_extent_range(struct btrfs_fs_info *fs_info,
    2646             :                               u64 start, u64 end,
    2647             :                               const bool return_free_space)
    2648             : {
    2649     3941114 :         struct btrfs_block_group *cache = NULL;
    2650     3941114 :         struct btrfs_space_info *space_info;
    2651     3941114 :         struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
    2652     3941114 :         struct btrfs_free_cluster *cluster = NULL;
    2653     3941114 :         u64 len;
    2654     3941114 :         u64 total_unpinned = 0;
    2655     3941114 :         u64 empty_cluster = 0;
    2656     3941114 :         bool readonly;
    2657             : 
    2658     7882749 :         while (start <= end) {
    2659     3941635 :                 readonly = false;
    2660     3941635 :                 if (!cache ||
    2661         521 :                     start >= cache->start + cache->length) {
    2662     3941635 :                         if (cache)
    2663         521 :                                 btrfs_put_block_group(cache);
    2664     3941635 :                         total_unpinned = 0;
    2665     3941635 :                         cache = btrfs_lookup_block_group(fs_info, start);
    2666     3941635 :                         BUG_ON(!cache); /* Logic error */
    2667             : 
    2668     3941635 :                         cluster = fetch_cluster_info(fs_info,
    2669             :                                                      cache->space_info,
    2670             :                                                      &empty_cluster);
    2671     3941635 :                         empty_cluster <<= 1;
    2672             :                 }
    2673             : 
    2674     3941635 :                 len = cache->start + cache->length - start;
    2675     3941635 :                 len = min(len, end + 1 - start);
    2676             : 
    2677     3941635 :                 if (return_free_space)
    2678     3941495 :                         btrfs_add_free_space(cache, start, len);
    2679             : 
    2680     3941635 :                 start += len;
    2681     3941635 :                 total_unpinned += len;
    2682     3941635 :                 space_info = cache->space_info;
    2683             : 
    2684             :                 /*
    2685             :                  * If this space cluster has been marked as fragmented and we've
    2686             :                  * unpinned enough in this block group to potentially allow a
    2687             :                  * cluster to be created inside of it go ahead and clear the
    2688             :                  * fragmented check.
    2689             :                  */
    2690     3941635 :                 if (cluster && cluster->fragmented &&
    2691           0 :                     total_unpinned > empty_cluster) {
    2692           0 :                         spin_lock(&cluster->lock);
    2693           0 :                         cluster->fragmented = 0;
    2694           0 :                         spin_unlock(&cluster->lock);
    2695             :                 }
    2696             : 
    2697     3941635 :                 spin_lock(&space_info->lock);
    2698     3941635 :                 spin_lock(&cache->lock);
    2699     3941635 :                 cache->pinned -= len;
    2700     3941635 :                 btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
    2701     3941635 :                 space_info->max_extent_size = 0;
    2702     3941635 :                 if (cache->ro) {
    2703       62354 :                         space_info->bytes_readonly += len;
    2704       62354 :                         readonly = true;
    2705             :                 } else if (btrfs_is_zoned(fs_info)) {
    2706             :                         /* Need reset before reusing in a zoned block group */
    2707             :                         space_info->bytes_zone_unusable += len;
    2708             :                         readonly = true;
    2709             :                 }
    2710     3941635 :                 spin_unlock(&cache->lock);
    2711     3941635 :                 if (!readonly && return_free_space &&
    2712     3879141 :                     global_rsv->space_info == space_info) {
    2713     3675084 :                         spin_lock(&global_rsv->lock);
    2714     3675084 :                         if (!global_rsv->full) {
    2715      290771 :                                 u64 to_add = min(len, global_rsv->size -
    2716             :                                                       global_rsv->reserved);
    2717             : 
    2718      290771 :                                 global_rsv->reserved += to_add;
    2719      290771 :                                 btrfs_space_info_update_bytes_may_use(fs_info,
    2720             :                                                 space_info, to_add);
    2721      290771 :                                 if (global_rsv->reserved >= global_rsv->size)
    2722      143295 :                                         global_rsv->full = 1;
    2723      290771 :                                 len -= to_add;
    2724             :                         }
    2725     3675084 :                         spin_unlock(&global_rsv->lock);
    2726             :                 }
    2727             :                 /* Add to any tickets we may have */
    2728     3941635 :                 if (!readonly && return_free_space && len)
    2729     3630499 :                         btrfs_try_granting_tickets(fs_info, space_info);
    2730     3941635 :                 spin_unlock(&space_info->lock);
    2731             :         }
    2732             : 
    2733     3941114 :         if (cache)
    2734     3941114 :                 btrfs_put_block_group(cache);
    2735     3941114 :         return 0;
    2736             : }
    2737             : 
    2738      206170 : int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
    2739             : {
    2740      206170 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    2741      206170 :         struct btrfs_block_group *block_group, *tmp;
    2742      206170 :         struct list_head *deleted_bgs;
    2743      206170 :         struct extent_io_tree *unpin;
    2744      206170 :         u64 start;
    2745      206170 :         u64 end;
    2746      206170 :         int ret;
    2747             : 
    2748      206170 :         unpin = &trans->transaction->pinned_extents;
    2749             : 
    2750     4147144 :         while (!TRANS_ABORTED(trans)) {
    2751     4147144 :                 struct extent_state *cached_state = NULL;
    2752             : 
    2753     4147144 :                 mutex_lock(&fs_info->unused_bg_unpin_mutex);
    2754     4147144 :                 ret = find_first_extent_bit(unpin, 0, &start, &end,
    2755             :                                             EXTENT_DIRTY, &cached_state);
    2756     4147144 :                 if (ret) {
    2757      206170 :                         mutex_unlock(&fs_info->unused_bg_unpin_mutex);
    2758      206170 :                         break;
    2759             :                 }
    2760             : 
    2761     3940974 :                 if (btrfs_test_opt(fs_info, DISCARD_SYNC))
    2762          36 :                         ret = btrfs_discard_extent(fs_info, start,
    2763          36 :                                                    end + 1 - start, NULL);
    2764             : 
    2765     3940974 :                 clear_extent_dirty(unpin, start, end, &cached_state);
    2766     3940974 :                 unpin_extent_range(fs_info, start, end, true);
    2767     3940974 :                 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
    2768     3940974 :                 free_extent_state(cached_state);
    2769     3940974 :                 cond_resched();
    2770             :         }
    2771             : 
    2772      206170 :         if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
    2773      206149 :                 btrfs_discard_calc_delay(&fs_info->discard_ctl);
    2774      206149 :                 btrfs_discard_schedule_work(&fs_info->discard_ctl, true);
    2775             :         }
    2776             : 
    2777             :         /*
    2778             :          * Transaction is finished.  We don't need the lock anymore.  We
    2779             :          * do need to clean up the block groups in case of a transaction
    2780             :          * abort.
    2781             :          */
    2782      206170 :         deleted_bgs = &trans->transaction->deleted_bgs;
    2783      206170 :         list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
    2784           0 :                 u64 trimmed = 0;
    2785             : 
    2786           0 :                 ret = -EROFS;
    2787           0 :                 if (!TRANS_ABORTED(trans))
    2788           0 :                         ret = btrfs_discard_extent(fs_info,
    2789             :                                                    block_group->start,
    2790             :                                                    block_group->length,
    2791             :                                                    &trimmed);
    2792             : 
    2793           0 :                 list_del_init(&block_group->bg_list);
    2794           0 :                 btrfs_unfreeze_block_group(block_group);
    2795           0 :                 btrfs_put_block_group(block_group);
    2796             : 
    2797           0 :                 if (ret) {
    2798           0 :                         const char *errstr = btrfs_decode_error(ret);
    2799           0 :                         btrfs_warn(fs_info,
    2800             :                            "discard failed while removing blockgroup: errno=%d %s",
    2801             :                                    ret, errstr);
    2802             :                 }
    2803             :         }
    2804             : 
    2805      206170 :         return 0;
    2806             : }
    2807             : 
    2808     8563323 : static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
    2809             :                                      u64 bytenr, u64 num_bytes, bool is_data)
    2810             : {
    2811     8563323 :         int ret;
    2812             : 
    2813     8563323 :         if (is_data) {
    2814     1065561 :                 struct btrfs_root *csum_root;
    2815             : 
    2816     1065561 :                 csum_root = btrfs_csum_root(trans->fs_info, bytenr);
    2817     1065560 :                 ret = btrfs_del_csums(trans, csum_root, bytenr, num_bytes);
    2818     1065561 :                 if (ret) {
    2819           0 :                         btrfs_abort_transaction(trans, ret);
    2820           0 :                         return ret;
    2821             :                 }
    2822             :         }
    2823             : 
    2824     8563323 :         ret = add_to_free_space_tree(trans, bytenr, num_bytes);
    2825     8563322 :         if (ret) {
    2826           0 :                 btrfs_abort_transaction(trans, ret);
    2827           0 :                 return ret;
    2828             :         }
    2829             : 
    2830     8563322 :         ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
    2831     8563323 :         if (ret)
    2832           0 :                 btrfs_abort_transaction(trans, ret);
    2833             : 
    2834             :         return ret;
    2835             : }
    2836             : 
    2837             : #define abort_and_dump(trans, path, fmt, args...)       \
    2838             : ({                                                      \
    2839             :         btrfs_abort_transaction(trans, -EUCLEAN);       \
    2840             :         btrfs_print_leaf(path->nodes[0]);            \
    2841             :         btrfs_crit(trans->fs_info, fmt, ##args);     \
    2842             : })
    2843             : 
    2844             : /*
    2845             :  * Drop one or more refs of @node.
    2846             :  *
    2847             :  * 1. Locate the extent refs.
    2848             :  *    It's either inline in EXTENT/METADATA_ITEM or in keyed SHARED_* item.
    2849             :  *    Locate it, then reduce the refs number or remove the ref line completely.
    2850             :  *
    2851             :  * 2. Update the refs count in EXTENT/METADATA_ITEM
    2852             :  *
    2853             :  * Inline backref case:
    2854             :  *
    2855             :  * in extent tree we have:
    2856             :  *
    2857             :  *      item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 16201 itemsize 82
    2858             :  *              refs 2 gen 6 flags DATA
    2859             :  *              extent data backref root FS_TREE objectid 258 offset 0 count 1
    2860             :  *              extent data backref root FS_TREE objectid 257 offset 0 count 1
    2861             :  *
    2862             :  * This function gets called with:
    2863             :  *
    2864             :  *    node->bytenr = 13631488
    2865             :  *    node->num_bytes = 1048576
    2866             :  *    root_objectid = FS_TREE
    2867             :  *    owner_objectid = 257
    2868             :  *    owner_offset = 0
    2869             :  *    refs_to_drop = 1
    2870             :  *
    2871             :  * Then we should get some like:
    2872             :  *
    2873             :  *      item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 16201 itemsize 82
    2874             :  *              refs 1 gen 6 flags DATA
    2875             :  *              extent data backref root FS_TREE objectid 258 offset 0 count 1
    2876             :  *
    2877             :  * Keyed backref case:
    2878             :  *
    2879             :  * in extent tree we have:
    2880             :  *
    2881             :  *      item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 3971 itemsize 24
    2882             :  *              refs 754 gen 6 flags DATA
    2883             :  *      [...]
    2884             :  *      item 2 key (13631488 EXTENT_DATA_REF <HASH>) itemoff 3915 itemsize 28
    2885             :  *              extent data backref root FS_TREE objectid 866 offset 0 count 1
    2886             :  *
    2887             :  * This function get called with:
    2888             :  *
    2889             :  *    node->bytenr = 13631488
    2890             :  *    node->num_bytes = 1048576
    2891             :  *    root_objectid = FS_TREE
    2892             :  *    owner_objectid = 866
    2893             :  *    owner_offset = 0
    2894             :  *    refs_to_drop = 1
    2895             :  *
    2896             :  * Then we should get some like:
    2897             :  *
    2898             :  *      item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 3971 itemsize 24
    2899             :  *              refs 753 gen 6 flags DATA
    2900             :  *
    2901             :  * And that (13631488 EXTENT_DATA_REF <HASH>) gets removed.
    2902             :  */
    2903    24277306 : static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
    2904             :                                struct btrfs_delayed_ref_node *node, u64 parent,
    2905             :                                u64 root_objectid, u64 owner_objectid,
    2906             :                                u64 owner_offset, int refs_to_drop,
    2907             :                                struct btrfs_delayed_extent_op *extent_op)
    2908             : {
    2909    24277306 :         struct btrfs_fs_info *info = trans->fs_info;
    2910    24277306 :         struct btrfs_key key;
    2911    24277306 :         struct btrfs_path *path;
    2912    24277306 :         struct btrfs_root *extent_root;
    2913    24277306 :         struct extent_buffer *leaf;
    2914    24277306 :         struct btrfs_extent_item *ei;
    2915    24277306 :         struct btrfs_extent_inline_ref *iref;
    2916    24277306 :         int ret;
    2917    24277306 :         int is_data;
    2918    24277306 :         int extent_slot = 0;
    2919    24277306 :         int found_extent = 0;
    2920    24277306 :         int num_to_del = 1;
    2921    24277306 :         u32 item_size;
    2922    24277306 :         u64 refs;
    2923    24277306 :         u64 bytenr = node->bytenr;
    2924    24277306 :         u64 num_bytes = node->num_bytes;
    2925    24277306 :         bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
    2926             : 
    2927    24277306 :         extent_root = btrfs_extent_root(info, bytenr);
    2928    24277307 :         ASSERT(extent_root);
    2929             : 
    2930    24277307 :         path = btrfs_alloc_path();
    2931    24277305 :         if (!path)
    2932             :                 return -ENOMEM;
    2933             : 
    2934    24277305 :         is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
    2935             : 
    2936    24277305 :         if (!is_data && refs_to_drop != 1) {
    2937           0 :                 btrfs_crit(info,
    2938             : "invalid refs_to_drop, dropping more than 1 refs for tree block %llu refs_to_drop %u",
    2939             :                            node->bytenr, refs_to_drop);
    2940           0 :                 ret = -EINVAL;
    2941           0 :                 btrfs_abort_transaction(trans, ret);
    2942           0 :                 goto out;
    2943             :         }
    2944             : 
    2945    24277305 :         if (is_data)
    2946     9030177 :                 skinny_metadata = false;
    2947             : 
    2948    24277305 :         ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
    2949             :                                     parent, root_objectid, owner_objectid,
    2950             :                                     owner_offset);
    2951    24277331 :         if (ret == 0) {
    2952             :                 /*
    2953             :                  * Either the inline backref or the SHARED_DATA_REF/
    2954             :                  * SHARED_BLOCK_REF is found
    2955             :                  *
    2956             :                  * Here is a quick path to locate EXTENT/METADATA_ITEM.
    2957             :                  * It's possible the EXTENT/METADATA_ITEM is near current slot.
    2958             :                  */
    2959    24277331 :                 extent_slot = path->slots[0];
    2960    25200294 :                 while (extent_slot >= 0) {
    2961    25176269 :                         btrfs_item_key_to_cpu(path->nodes[0], &key,
    2962             :                                               extent_slot);
    2963    25176269 :                         if (key.objectid != bytenr)
    2964             :                                 break;
    2965    25176269 :                         if (key.type == BTRFS_EXTENT_ITEM_KEY &&
    2966     8871508 :                             key.offset == num_bytes) {
    2967             :                                 found_extent = 1;
    2968             :                                 break;
    2969             :                         }
    2970    16304761 :                         if (key.type == BTRFS_METADATA_ITEM_KEY &&
    2971    15247144 :                             key.offset == owner_objectid) {
    2972             :                                 found_extent = 1;
    2973             :                                 break;
    2974             :                         }
    2975             : 
    2976             :                         /* Quick path didn't find the EXTEMT/METADATA_ITEM */
    2977     1057617 :                         if (path->slots[0] - extent_slot > 5)
    2978             :                                 break;
    2979      922963 :                         extent_slot--;
    2980             :                 }
    2981             : 
    2982    24277331 :                 if (!found_extent) {
    2983      158679 :                         if (iref) {
    2984           0 :                                 abort_and_dump(trans, path,
    2985             : "invalid iref slot %u, no EXTENT/METADATA_ITEM found but has inline extent ref",
    2986             :                                            path->slots[0]);
    2987           0 :                                 ret = -EUCLEAN;
    2988           0 :                                 goto out;
    2989             :                         }
    2990             :                         /* Must be SHARED_* item, remove the backref first */
    2991      158679 :                         ret = remove_extent_backref(trans, extent_root, path,
    2992             :                                                     NULL, refs_to_drop, is_data);
    2993      158679 :                         if (ret) {
    2994           0 :                                 btrfs_abort_transaction(trans, ret);
    2995           0 :                                 goto out;
    2996             :                         }
    2997      158679 :                         btrfs_release_path(path);
    2998             : 
    2999             :                         /* Slow path to locate EXTENT/METADATA_ITEM */
    3000      158679 :                         key.objectid = bytenr;
    3001      158679 :                         key.type = BTRFS_EXTENT_ITEM_KEY;
    3002      158679 :                         key.offset = num_bytes;
    3003             : 
    3004      158679 :                         if (!is_data && skinny_metadata) {
    3005           0 :                                 key.type = BTRFS_METADATA_ITEM_KEY;
    3006           0 :                                 key.offset = owner_objectid;
    3007             :                         }
    3008             : 
    3009      158679 :                         ret = btrfs_search_slot(trans, extent_root,
    3010             :                                                 &key, path, -1, 1);
    3011      158679 :                         if (ret > 0 && skinny_metadata && path->slots[0]) {
    3012             :                                 /*
    3013             :                                  * Couldn't find our skinny metadata item,
    3014             :                                  * see if we have ye olde extent item.
    3015             :                                  */
    3016           0 :                                 path->slots[0]--;
    3017           0 :                                 btrfs_item_key_to_cpu(path->nodes[0], &key,
    3018             :                                                       path->slots[0]);
    3019           0 :                                 if (key.objectid == bytenr &&
    3020           0 :                                     key.type == BTRFS_EXTENT_ITEM_KEY &&
    3021           0 :                                     key.offset == num_bytes)
    3022           0 :                                         ret = 0;
    3023             :                         }
    3024             : 
    3025      158679 :                         if (ret > 0 && skinny_metadata) {
    3026           0 :                                 skinny_metadata = false;
    3027           0 :                                 key.objectid = bytenr;
    3028           0 :                                 key.type = BTRFS_EXTENT_ITEM_KEY;
    3029           0 :                                 key.offset = num_bytes;
    3030           0 :                                 btrfs_release_path(path);
    3031           0 :                                 ret = btrfs_search_slot(trans, extent_root,
    3032             :                                                         &key, path, -1, 1);
    3033             :                         }
    3034             : 
    3035      158679 :                         if (ret) {
    3036           0 :                                 if (ret > 0)
    3037           0 :                                         btrfs_print_leaf(path->nodes[0]);
    3038           0 :                                 btrfs_err(info,
    3039             :                         "umm, got %d back from search, was looking for %llu, slot %d",
    3040             :                                           ret, bytenr, path->slots[0]);
    3041             :                         }
    3042      158679 :                         if (ret < 0) {
    3043           0 :                                 btrfs_abort_transaction(trans, ret);
    3044           0 :                                 goto out;
    3045             :                         }
    3046      158679 :                         extent_slot = path->slots[0];
    3047             :                 }
    3048           0 :         } else if (WARN_ON(ret == -ENOENT)) {
    3049           0 :                 abort_and_dump(trans, path,
    3050             : "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu slot %d",
    3051             :                                bytenr, parent, root_objectid, owner_objectid,
    3052             :                                owner_offset, path->slots[0]);
    3053           0 :                 goto out;
    3054             :         } else {
    3055           0 :                 btrfs_abort_transaction(trans, ret);
    3056           0 :                 goto out;
    3057             :         }
    3058             : 
    3059    24277331 :         leaf = path->nodes[0];
    3060    24277331 :         item_size = btrfs_item_size(leaf, extent_slot);
    3061    24277331 :         if (unlikely(item_size < sizeof(*ei))) {
    3062           0 :                 ret = -EINVAL;
    3063           0 :                 btrfs_print_v0_err(info);
    3064           0 :                 btrfs_abort_transaction(trans, ret);
    3065           0 :                 goto out;
    3066             :         }
    3067    24277331 :         ei = btrfs_item_ptr(leaf, extent_slot,
    3068             :                             struct btrfs_extent_item);
    3069    24277331 :         if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
    3070    15247144 :             key.type == BTRFS_EXTENT_ITEM_KEY) {
    3071           0 :                 struct btrfs_tree_block_info *bi;
    3072             : 
    3073           0 :                 if (item_size < sizeof(*ei) + sizeof(*bi)) {
    3074           0 :                         abort_and_dump(trans, path,
    3075             : "invalid extent item size for key (%llu, %u, %llu) slot %u owner %llu, has %u expect >= %zu",
    3076             :                                        key.objectid, key.type, key.offset,
    3077             :                                        path->slots[0], owner_objectid, item_size,
    3078             :                                        sizeof(*ei) + sizeof(*bi));
    3079           0 :                         ret = -EUCLEAN;
    3080           0 :                         goto out;
    3081             :                 }
    3082           0 :                 bi = (struct btrfs_tree_block_info *)(ei + 1);
    3083           0 :                 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
    3084             :         }
    3085             : 
    3086    24277331 :         refs = btrfs_extent_refs(leaf, ei);
    3087    24277331 :         if (refs < refs_to_drop) {
    3088           0 :                 abort_and_dump(trans, path,
    3089             :                 "trying to drop %d refs but we only have %llu for bytenr %llu slot %u",
    3090             :                                refs_to_drop, refs, bytenr, path->slots[0]);
    3091           0 :                 ret = -EUCLEAN;
    3092           0 :                 goto out;
    3093             :         }
    3094    24277331 :         refs -= refs_to_drop;
    3095             : 
    3096    24277331 :         if (refs > 0) {
    3097    15714008 :                 if (extent_op)
    3098       39294 :                         __run_delayed_extent_op(extent_op, leaf, ei);
    3099             :                 /*
    3100             :                  * In the case of inline back ref, reference count will
    3101             :                  * be updated by remove_extent_backref
    3102             :                  */
    3103    15714008 :                 if (iref) {
    3104    15508566 :                         if (!found_extent) {
    3105           0 :                                 abort_and_dump(trans, path,
    3106             : "invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found, slot %u",
    3107             :                                                path->slots[0]);
    3108           0 :                                 ret = -EUCLEAN;
    3109           0 :                                 goto out;
    3110             :                         }
    3111             :                 } else {
    3112      205442 :                         btrfs_set_extent_refs(leaf, ei, refs);
    3113      205442 :                         btrfs_mark_buffer_dirty(leaf);
    3114             :                 }
    3115    15714008 :                 if (found_extent) {
    3116    15555349 :                         ret = remove_extent_backref(trans, extent_root, path,
    3117             :                                                     iref, refs_to_drop, is_data);
    3118    15555349 :                         if (ret) {
    3119           0 :                                 btrfs_abort_transaction(trans, ret);
    3120           0 :                                 goto out;
    3121             :                         }
    3122             :                 }
    3123             :         } else {
    3124             :                 /* In this branch refs == 1 */
    3125     8563323 :                 if (found_extent) {
    3126     9628844 :                         if (is_data && refs_to_drop !=
    3127     1065541 :                             extent_data_ref_count(path, iref)) {
    3128           0 :                                 abort_and_dump(trans, path,
    3129             :                 "invalid refs_to_drop, current refs %u refs_to_drop %u slot %u",
    3130             :                                                extent_data_ref_count(path, iref),
    3131             :                                                refs_to_drop, path->slots[0]);
    3132           0 :                                 ret = -EUCLEAN;
    3133           0 :                                 goto out;
    3134             :                         }
    3135     8563303 :                         if (iref) {
    3136     8563079 :                                 if (path->slots[0] != extent_slot) {
    3137           0 :                                         abort_and_dump(trans, path,
    3138             : "invalid iref, extent item key (%llu %u %llu) slot %u doesn't have wanted iref",
    3139             :                                                        key.objectid, key.type,
    3140             :                                                        key.offset, path->slots[0]);
    3141           0 :                                         ret = -EUCLEAN;
    3142           0 :                                         goto out;
    3143             :                                 }
    3144             :                         } else {
    3145             :                                 /*
    3146             :                                  * No inline ref, we must be at SHARED_* item,
    3147             :                                  * And it's single ref, it must be:
    3148             :                                  * |    extent_slot       ||extent_slot + 1|
    3149             :                                  * [ EXTENT/METADATA_ITEM ][ SHARED_* ITEM ]
    3150             :                                  */
    3151         224 :                                 if (path->slots[0] != extent_slot + 1) {
    3152           0 :                                         abort_and_dump(trans, path,
    3153             :         "invalid SHARED_* item slot %u, previous item is not EXTENT/METADATA_ITEM",
    3154             :                                                        path->slots[0]);
    3155           0 :                                         ret = -EUCLEAN;
    3156           0 :                                         goto out;
    3157             :                                 }
    3158         224 :                                 path->slots[0] = extent_slot;
    3159         224 :                                 num_to_del = 2;
    3160             :                         }
    3161             :                 }
    3162             : 
    3163     8563323 :                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
    3164             :                                       num_to_del);
    3165     8563323 :                 if (ret) {
    3166           0 :                         btrfs_abort_transaction(trans, ret);
    3167           0 :                         goto out;
    3168             :                 }
    3169     8563323 :                 btrfs_release_path(path);
    3170             : 
    3171     8563323 :                 ret = do_free_extent_accounting(trans, bytenr, num_bytes, is_data);
    3172             :         }
    3173    24277331 :         btrfs_release_path(path);
    3174             : 
    3175    24277317 : out:
    3176    24277317 :         btrfs_free_path(path);
    3177    24277317 :         return ret;
    3178             : }
    3179             : 
    3180             : /*
    3181             :  * when we free an block, it is possible (and likely) that we free the last
    3182             :  * delayed ref for that extent as well.  This searches the delayed ref tree for
    3183             :  * a given extent, and if there are no other delayed refs to be processed, it
    3184             :  * removes it from the tree.
    3185             :  */
    3186      935550 : static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
    3187             :                                       u64 bytenr)
    3188             : {
    3189      935550 :         struct btrfs_delayed_ref_head *head;
    3190      935550 :         struct btrfs_delayed_ref_root *delayed_refs;
    3191      935550 :         int ret = 0;
    3192             : 
    3193      935550 :         delayed_refs = &trans->transaction->delayed_refs;
    3194      935550 :         spin_lock(&delayed_refs->lock);
    3195      935550 :         head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
    3196      935550 :         if (!head)
    3197           3 :                 goto out_delayed_unlock;
    3198             : 
    3199      935547 :         spin_lock(&head->lock);
    3200      935547 :         if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root))
    3201      391268 :                 goto out;
    3202             : 
    3203      544279 :         if (cleanup_extent_op(head) != NULL)
    3204           0 :                 goto out;
    3205             : 
    3206             :         /*
    3207             :          * waiting for the lock here would deadlock.  If someone else has it
    3208             :          * locked they are already in the process of dropping it anyway
    3209             :          */
    3210      544279 :         if (!mutex_trylock(&head->mutex))
    3211          21 :                 goto out;
    3212             : 
    3213      544258 :         btrfs_delete_ref_head(delayed_refs, head);
    3214      544258 :         head->processing = false;
    3215             : 
    3216      544258 :         spin_unlock(&head->lock);
    3217      544258 :         spin_unlock(&delayed_refs->lock);
    3218             : 
    3219      544258 :         BUG_ON(head->extent_op);
    3220      544258 :         if (head->must_insert_reserved)
    3221      544258 :                 ret = 1;
    3222             : 
    3223      544258 :         btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
    3224      544258 :         mutex_unlock(&head->mutex);
    3225      544258 :         btrfs_put_delayed_ref_head(head);
    3226      544258 :         return ret;
    3227      391289 : out:
    3228      391289 :         spin_unlock(&head->lock);
    3229             : 
    3230      391292 : out_delayed_unlock:
    3231      391292 :         spin_unlock(&delayed_refs->lock);
    3232      391292 :         return 0;
    3233             : }
    3234             : 
    3235    12011242 : void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
    3236             :                            u64 root_id,
    3237             :                            struct extent_buffer *buf,
    3238             :                            u64 parent, int last_ref)
    3239             : {
    3240    12011242 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    3241    12011242 :         struct btrfs_ref generic_ref = { 0 };
    3242    12011242 :         int ret;
    3243             : 
    3244    12011242 :         btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
    3245    12011242 :                                buf->start, buf->len, parent);
    3246    12011242 :         btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
    3247             :                             root_id, 0, false);
    3248             : 
    3249    12011242 :         if (root_id != BTRFS_TREE_LOG_OBJECTID) {
    3250    10936247 :                 btrfs_ref_tree_mod(fs_info, &generic_ref);
    3251    10936247 :                 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL);
    3252    10936733 :                 BUG_ON(ret); /* -ENOMEM */
    3253             :         }
    3254             : 
    3255    12011728 :         if (last_ref && btrfs_header_generation(buf) == trans->transid) {
    3256     2010552 :                 struct btrfs_block_group *cache;
    3257     2010552 :                 bool must_pin = false;
    3258             : 
    3259     2010552 :                 if (root_id != BTRFS_TREE_LOG_OBJECTID) {
    3260      935550 :                         ret = check_ref_cleanup(trans, buf->start);
    3261      935548 :                         if (!ret) {
    3262      391292 :                                 btrfs_redirty_list_add(trans->transaction, buf);
    3263      391292 :                                 goto out;
    3264             :                         }
    3265             :                 }
    3266             : 
    3267     1619258 :                 cache = btrfs_lookup_block_group(fs_info, buf->start);
    3268             : 
    3269     1619281 :                 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
    3270     1575347 :                         pin_down_extent(trans, cache, buf->start, buf->len, 1);
    3271     1575357 :                         btrfs_put_block_group(cache);
    3272     1575357 :                         goto out;
    3273             :                 }
    3274             : 
    3275             :                 /*
    3276             :                  * If there are tree mod log users we may have recorded mod log
    3277             :                  * operations for this node.  If we re-allocate this node we
    3278             :                  * could replay operations on this node that happened when it
    3279             :                  * existed in a completely different root.  For example if it
    3280             :                  * was part of root A, then was reallocated to root B, and we
    3281             :                  * are doing a btrfs_old_search_slot(root b), we could replay
    3282             :                  * operations that happened when the block was part of root A,
    3283             :                  * giving us an inconsistent view of the btree.
    3284             :                  *
    3285             :                  * We are safe from races here because at this point no other
    3286             :                  * node or root points to this extent buffer, so if after this
    3287             :                  * check a new tree mod log user joins we will not have an
    3288             :                  * existing log of operations on this node that we have to
    3289             :                  * contend with.
    3290             :                  */
    3291       87868 :                 if (test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
    3292           3 :                         must_pin = true;
    3293             : 
    3294       43934 :                 if (must_pin || btrfs_is_zoned(fs_info)) {
    3295           3 :                         btrfs_redirty_list_add(trans->transaction, buf);
    3296           3 :                         pin_down_extent(trans, cache, buf->start, buf->len, 1);
    3297           3 :                         btrfs_put_block_group(cache);
    3298           3 :                         goto out;
    3299             :                 }
    3300             : 
    3301       43931 :                 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
    3302             : 
    3303       43931 :                 btrfs_add_free_space(cache, buf->start, buf->len);
    3304       43931 :                 btrfs_free_reserved_bytes(cache, buf->len, 0);
    3305       43931 :                 btrfs_put_block_group(cache);
    3306       43931 :                 trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
    3307             :         }
    3308    10001176 : out:
    3309    12011759 :         if (last_ref) {
    3310             :                 /*
    3311             :                  * Deleting the buffer, clear the corrupt flag since it doesn't
    3312             :                  * matter anymore.
    3313             :                  */
    3314     9151607 :                 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
    3315             :         }
    3316    12011768 : }
    3317             : 
    3318             : /* Can return -ENOMEM */
    3319    17601976 : int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
    3320             : {
    3321    17601976 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    3322    17601976 :         int ret;
    3323             : 
    3324    17601976 :         if (btrfs_is_testing(fs_info))
    3325             :                 return 0;
    3326             : 
    3327             :         /*
    3328             :          * tree log blocks never actually go into the extent allocation
    3329             :          * tree, just update pinning info and exit early.
    3330             :          */
    3331    17601976 :         if ((ref->type == BTRFS_REF_METADATA &&
    3332    17601976 :              ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
    3333    11837815 :             (ref->type == BTRFS_REF_DATA &&
    3334    11837815 :              ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)) {
    3335             :                 /* unlocks the pinned mutex */
    3336           0 :                 btrfs_pin_extent(trans, ref->bytenr, ref->len, 1);
    3337           0 :                 ret = 0;
    3338    17601976 :         } else if (ref->type == BTRFS_REF_METADATA) {
    3339     5764161 :                 ret = btrfs_add_delayed_tree_ref(trans, ref, NULL);
    3340             :         } else {
    3341    11837815 :                 ret = btrfs_add_delayed_data_ref(trans, ref, 0);
    3342             :         }
    3343             : 
    3344    17601988 :         if (!((ref->type == BTRFS_REF_METADATA &&
    3345             :                ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
    3346             :               (ref->type == BTRFS_REF_DATA &&
    3347             :                ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)))
    3348             :                 btrfs_ref_tree_mod(fs_info, ref);
    3349             : 
    3350    17601988 :         return ret;
    3351             : }
    3352             : 
    3353             : enum btrfs_loop_type {
    3354             :         LOOP_CACHING_NOWAIT,
    3355             :         LOOP_CACHING_WAIT,
    3356             :         LOOP_UNSET_SIZE_CLASS,
    3357             :         LOOP_ALLOC_CHUNK,
    3358             :         LOOP_WRONG_SIZE_CLASS,
    3359             :         LOOP_NO_EMPTY_SIZE,
    3360             : };
    3361             : 
    3362             : static inline void
    3363             : btrfs_lock_block_group(struct btrfs_block_group *cache,
    3364             :                        int delalloc)
    3365             : {
    3366    11289554 :         if (delalloc)
    3367     2104214 :                 down_read(&cache->data_rwsem);
    3368             : }
    3369             : 
    3370    11519116 : static inline void btrfs_grab_block_group(struct btrfs_block_group *cache,
    3371             :                        int delalloc)
    3372             : {
    3373    11519116 :         btrfs_get_block_group(cache);
    3374    11519021 :         if (delalloc)
    3375     8355673 :                 down_read(&cache->data_rwsem);
    3376    11518974 : }
    3377             : 
    3378     9100841 : static struct btrfs_block_group *btrfs_lock_cluster(
    3379             :                    struct btrfs_block_group *block_group,
    3380             :                    struct btrfs_free_cluster *cluster,
    3381             :                    int delalloc)
    3382             :         __acquires(&cluster->refill_lock)
    3383             : {
    3384     9100841 :         struct btrfs_block_group *used_bg = NULL;
    3385             : 
    3386     9100841 :         spin_lock(&cluster->refill_lock);
    3387     9101576 :         while (1) {
    3388     9101576 :                 used_bg = cluster->block_group;
    3389     9101576 :                 if (!used_bg)
    3390             :                         return NULL;
    3391             : 
    3392     9091268 :                 if (used_bg == block_group)
    3393     9091176 :                         return used_bg;
    3394             : 
    3395          92 :                 btrfs_get_block_group(used_bg);
    3396             : 
    3397          92 :                 if (!delalloc)
    3398          92 :                         return used_bg;
    3399             : 
    3400           0 :                 if (down_read_trylock(&used_bg->data_rwsem))
    3401           0 :                         return used_bg;
    3402             : 
    3403           0 :                 spin_unlock(&cluster->refill_lock);
    3404             : 
    3405             :                 /* We should only have one-level nested. */
    3406           0 :                 down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
    3407             : 
    3408           0 :                 spin_lock(&cluster->refill_lock);
    3409           0 :                 if (used_bg == cluster->block_group)
    3410           0 :                         return used_bg;
    3411             : 
    3412           0 :                 up_read(&used_bg->data_rwsem);
    3413           0 :                 btrfs_put_block_group(used_bg);
    3414             :         }
    3415             : }
    3416             : 
    3417             : static inline void
    3418    22809343 : btrfs_release_block_group(struct btrfs_block_group *cache,
    3419             :                          int delalloc)
    3420             : {
    3421    22809343 :         if (delalloc)
    3422    10460727 :                 up_read(&cache->data_rwsem);
    3423    22809855 :         btrfs_put_block_group(cache);
    3424    22810175 : }
    3425             : 
    3426             : /*
    3427             :  * Helper function for find_free_extent().
    3428             :  *
    3429             :  * Return -ENOENT to inform caller that we need fallback to unclustered mode.
    3430             :  * Return -EAGAIN to inform caller that we need to re-search this block group
    3431             :  * Return >0 to inform caller that we find nothing
    3432             :  * Return 0 means we have found a location and set ffe_ctl->found_offset.
    3433             :  */
    3434     9100960 : static int find_free_extent_clustered(struct btrfs_block_group *bg,
    3435             :                                       struct find_free_extent_ctl *ffe_ctl,
    3436             :                                       struct btrfs_block_group **cluster_bg_ret)
    3437             : {
    3438     9100960 :         struct btrfs_block_group *cluster_bg;
    3439     9100960 :         struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
    3440     9100960 :         u64 aligned_cluster;
    3441     9100960 :         u64 offset;
    3442     9100960 :         int ret;
    3443             : 
    3444     9100960 :         cluster_bg = btrfs_lock_cluster(bg, last_ptr, ffe_ctl->delalloc);
    3445     9101576 :         if (!cluster_bg)
    3446       10308 :                 goto refill_cluster;
    3447     9091268 :         if (cluster_bg != bg && (cluster_bg->ro ||
    3448           2 :             !block_group_bits(cluster_bg, ffe_ctl->flags)))
    3449          90 :                 goto release_cluster;
    3450             : 
    3451     9091178 :         offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
    3452             :                         ffe_ctl->num_bytes, cluster_bg->start,
    3453             :                         &ffe_ctl->max_extent_size);
    3454     9091178 :         if (offset) {
    3455             :                 /* We have a block, we're done */
    3456     9084628 :                 spin_unlock(&last_ptr->refill_lock);
    3457     9084627 :                 trace_btrfs_reserve_extent_cluster(cluster_bg, ffe_ctl);
    3458     9084627 :                 *cluster_bg_ret = cluster_bg;
    3459     9084627 :                 ffe_ctl->found_offset = offset;
    3460     9084627 :                 return 0;
    3461             :         }
    3462        6550 :         WARN_ON(last_ptr->block_group != cluster_bg);
    3463             : 
    3464        6640 : release_cluster:
    3465             :         /*
    3466             :          * If we are on LOOP_NO_EMPTY_SIZE, we can't set up a new clusters, so
    3467             :          * lets just skip it and let the allocator find whatever block it can
    3468             :          * find. If we reach this point, we will have tried the cluster
    3469             :          * allocator plenty of times and not have found anything, so we are
    3470             :          * likely way too fragmented for the clustering stuff to find anything.
    3471             :          *
    3472             :          * However, if the cluster is taken from the current block group,
    3473             :          * release the cluster first, so that we stand a better chance of
    3474             :          * succeeding in the unclustered allocation.
    3475             :          */
    3476        6640 :         if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE && cluster_bg != bg) {
    3477           0 :                 spin_unlock(&last_ptr->refill_lock);
    3478           0 :                 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
    3479           0 :                 return -ENOENT;
    3480             :         }
    3481             : 
    3482             :         /* This cluster didn't work out, free it and start over */
    3483        6640 :         btrfs_return_cluster_to_free_space(NULL, last_ptr);
    3484             : 
    3485        6640 :         if (cluster_bg != bg)
    3486          90 :                 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
    3487             : 
    3488        6550 : refill_cluster:
    3489       16948 :         if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE) {
    3490           0 :                 spin_unlock(&last_ptr->refill_lock);
    3491           0 :                 return -ENOENT;
    3492             :         }
    3493             : 
    3494       16948 :         aligned_cluster = max_t(u64,
    3495             :                         ffe_ctl->empty_cluster + ffe_ctl->empty_size,
    3496             :                         bg->full_stripe_len);
    3497       16948 :         ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start,
    3498             :                         ffe_ctl->num_bytes, aligned_cluster);
    3499       16948 :         if (ret == 0) {
    3500             :                 /* Now pull our allocation out of this cluster */
    3501        9791 :                 offset = btrfs_alloc_from_cluster(bg, last_ptr,
    3502             :                                 ffe_ctl->num_bytes, ffe_ctl->search_start,
    3503             :                                 &ffe_ctl->max_extent_size);
    3504        9791 :                 if (offset) {
    3505             :                         /* We found one, proceed */
    3506        9791 :                         spin_unlock(&last_ptr->refill_lock);
    3507        9791 :                         ffe_ctl->found_offset = offset;
    3508        9791 :                         trace_btrfs_reserve_extent_cluster(bg, ffe_ctl);
    3509        9791 :                         return 0;
    3510             :                 }
    3511        7157 :         } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
    3512        2226 :                    !ffe_ctl->retry_clustered) {
    3513        2226 :                 spin_unlock(&last_ptr->refill_lock);
    3514             : 
    3515        2226 :                 ffe_ctl->retry_clustered = true;
    3516        2226 :                 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
    3517        2226 :                                 ffe_ctl->empty_cluster + ffe_ctl->empty_size);
    3518        2226 :                 return -EAGAIN;
    3519             :         }
    3520             :         /*
    3521             :          * At this point we either didn't find a cluster or we weren't able to
    3522             :          * allocate a block from our cluster.  Free the cluster we've been
    3523             :          * trying to use, and go to the next block group.
    3524             :          */
    3525        4931 :         btrfs_return_cluster_to_free_space(NULL, last_ptr);
    3526        4931 :         spin_unlock(&last_ptr->refill_lock);
    3527        4931 :         return 1;
    3528             : }
    3529             : 
    3530             : /*
    3531             :  * Return >0 to inform caller that we find nothing
    3532             :  * Return 0 when we found an free extent and set ffe_ctrl->found_offset
    3533             :  * Return -EAGAIN to inform caller that we need to re-search this block group
    3534             :  */
    3535     8829911 : static int find_free_extent_unclustered(struct btrfs_block_group *bg,
    3536             :                                         struct find_free_extent_ctl *ffe_ctl)
    3537             : {
    3538     8829911 :         struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
    3539     8829911 :         u64 offset;
    3540             : 
    3541             :         /*
    3542             :          * We are doing an unclustered allocation, set the fragmented flag so
    3543             :          * we don't bother trying to setup a cluster again until we get more
    3544             :          * space.
    3545             :          */
    3546     8829911 :         if (unlikely(last_ptr)) {
    3547           0 :                 spin_lock(&last_ptr->lock);
    3548           0 :                 last_ptr->fragmented = 1;
    3549           0 :                 spin_unlock(&last_ptr->lock);
    3550             :         }
    3551     8829911 :         if (ffe_ctl->cached) {
    3552     8828439 :                 struct btrfs_free_space_ctl *free_space_ctl;
    3553             : 
    3554     8828439 :                 free_space_ctl = bg->free_space_ctl;
    3555     8828439 :                 spin_lock(&free_space_ctl->tree_lock);
    3556     8830204 :                 if (free_space_ctl->free_space <
    3557     8830204 :                     ffe_ctl->num_bytes + ffe_ctl->empty_cluster +
    3558     8830204 :                     ffe_ctl->empty_size) {
    3559     4389193 :                         ffe_ctl->total_free_space = max_t(u64,
    3560             :                                         ffe_ctl->total_free_space,
    3561             :                                         free_space_ctl->free_space);
    3562     4389193 :                         spin_unlock(&free_space_ctl->tree_lock);
    3563     4389193 :                         return 1;
    3564             :                 }
    3565     4441011 :                 spin_unlock(&free_space_ctl->tree_lock);
    3566             :         }
    3567             : 
    3568     4442415 :         offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
    3569             :                         ffe_ctl->num_bytes, ffe_ctl->empty_size,
    3570             :                         &ffe_ctl->max_extent_size);
    3571             : 
    3572             :         /*
    3573             :          * If we didn't find a chunk, and we haven't failed on this block group
    3574             :          * before, and this block group is in the middle of caching and we are
    3575             :          * ok with waiting, then go ahead and wait for progress to be made, and
    3576             :          * set @retry_unclustered to true.
    3577             :          *
    3578             :          * If @retry_unclustered is true then we've already waited on this
    3579             :          * block group once and should move on to the next block group.
    3580             :          */
    3581     4442402 :         if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
    3582         971 :             ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
    3583         585 :                 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
    3584         585 :                                                       ffe_ctl->empty_size);
    3585         585 :                 ffe_ctl->retry_unclustered = true;
    3586         585 :                 return -EAGAIN;
    3587     4441817 :         } else if (!offset) {
    3588             :                 return 1;
    3589             :         }
    3590     4168176 :         ffe_ctl->found_offset = offset;
    3591     4168176 :         return 0;
    3592             : }
    3593             : 
    3594    17931106 : static int do_allocation_clustered(struct btrfs_block_group *block_group,
    3595             :                                    struct find_free_extent_ctl *ffe_ctl,
    3596             :                                    struct btrfs_block_group **bg_ret)
    3597             : {
    3598    17931106 :         int ret;
    3599             : 
    3600             :         /* We want to try and use the cluster allocator, so lets look there */
    3601    17931106 :         if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) {
    3602     9100952 :                 ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret);
    3603     9101575 :                 if (ret >= 0 || ret == -EAGAIN)
    3604             :                         return ret;
    3605             :                 /* ret == -ENOENT case falls through */
    3606             :         }
    3607             : 
    3608     8830155 :         return find_free_extent_unclustered(block_group, ffe_ctl);
    3609             : }
    3610             : 
    3611             : /*
    3612             :  * Tree-log block group locking
    3613             :  * ============================
    3614             :  *
    3615             :  * fs_info::treelog_bg_lock protects the fs_info::treelog_bg which
    3616             :  * indicates the starting address of a block group, which is reserved only
    3617             :  * for tree-log metadata.
    3618             :  *
    3619             :  * Lock nesting
    3620             :  * ============
    3621             :  *
    3622             :  * space_info::lock
    3623             :  *   block_group::lock
    3624             :  *     fs_info::treelog_bg_lock
    3625             :  */
    3626             : 
    3627             : /*
    3628             :  * Simple allocator for sequential-only block group. It only allows sequential
    3629             :  * allocation. No need to play with trees. This function also reserves the
    3630             :  * bytes as in btrfs_add_reserved_bytes.
    3631             :  */
    3632           0 : static int do_allocation_zoned(struct btrfs_block_group *block_group,
    3633             :                                struct find_free_extent_ctl *ffe_ctl,
    3634             :                                struct btrfs_block_group **bg_ret)
    3635             : {
    3636           0 :         struct btrfs_fs_info *fs_info = block_group->fs_info;
    3637           0 :         struct btrfs_space_info *space_info = block_group->space_info;
    3638           0 :         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
    3639           0 :         u64 start = block_group->start;
    3640           0 :         u64 num_bytes = ffe_ctl->num_bytes;
    3641           0 :         u64 avail;
    3642           0 :         u64 bytenr = block_group->start;
    3643           0 :         u64 log_bytenr;
    3644           0 :         u64 data_reloc_bytenr;
    3645           0 :         int ret = 0;
    3646           0 :         bool skip = false;
    3647             : 
    3648           0 :         ASSERT(btrfs_is_zoned(block_group->fs_info));
    3649             : 
    3650             :         /*
    3651             :          * Do not allow non-tree-log blocks in the dedicated tree-log block
    3652             :          * group, and vice versa.
    3653             :          */
    3654           0 :         spin_lock(&fs_info->treelog_bg_lock);
    3655           0 :         log_bytenr = fs_info->treelog_bg;
    3656           0 :         if (log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) ||
    3657           0 :                            (!ffe_ctl->for_treelog && bytenr == log_bytenr)))
    3658           0 :                 skip = true;
    3659           0 :         spin_unlock(&fs_info->treelog_bg_lock);
    3660           0 :         if (skip)
    3661             :                 return 1;
    3662             : 
    3663             :         /*
    3664             :          * Do not allow non-relocation blocks in the dedicated relocation block
    3665             :          * group, and vice versa.
    3666             :          */
    3667           0 :         spin_lock(&fs_info->relocation_bg_lock);
    3668           0 :         data_reloc_bytenr = fs_info->data_reloc_bg;
    3669           0 :         if (data_reloc_bytenr &&
    3670           0 :             ((ffe_ctl->for_data_reloc && bytenr != data_reloc_bytenr) ||
    3671           0 :              (!ffe_ctl->for_data_reloc && bytenr == data_reloc_bytenr)))
    3672           0 :                 skip = true;
    3673           0 :         spin_unlock(&fs_info->relocation_bg_lock);
    3674           0 :         if (skip)
    3675             :                 return 1;
    3676             : 
    3677             :         /* Check RO and no space case before trying to activate it */
    3678           0 :         spin_lock(&block_group->lock);
    3679           0 :         if (block_group->ro || btrfs_zoned_bg_is_full(block_group)) {
    3680             :                 ret = 1;
    3681             :                 /*
    3682             :                  * May need to clear fs_info->{treelog,data_reloc}_bg.
    3683             :                  * Return the error after taking the locks.
    3684             :                  */
    3685             :         }
    3686           0 :         spin_unlock(&block_group->lock);
    3687             : 
    3688           0 :         if (!ret && !btrfs_zone_activate(block_group)) {
    3689             :                 ret = 1;
    3690             :                 /*
    3691             :                  * May need to clear fs_info->{treelog,data_reloc}_bg.
    3692             :                  * Return the error after taking the locks.
    3693             :                  */
    3694             :         }
    3695             : 
    3696           0 :         spin_lock(&space_info->lock);
    3697           0 :         spin_lock(&block_group->lock);
    3698           0 :         spin_lock(&fs_info->treelog_bg_lock);
    3699           0 :         spin_lock(&fs_info->relocation_bg_lock);
    3700             : 
    3701           0 :         if (ret)
    3702           0 :                 goto out;
    3703             : 
    3704           0 :         ASSERT(!ffe_ctl->for_treelog ||
    3705             :                block_group->start == fs_info->treelog_bg ||
    3706             :                fs_info->treelog_bg == 0);
    3707           0 :         ASSERT(!ffe_ctl->for_data_reloc ||
    3708             :                block_group->start == fs_info->data_reloc_bg ||
    3709             :                fs_info->data_reloc_bg == 0);
    3710             : 
    3711           0 :         if (block_group->ro ||
    3712           0 :             test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
    3713           0 :                 ret = 1;
    3714           0 :                 goto out;
    3715             :         }
    3716             : 
    3717             :         /*
    3718             :          * Do not allow currently using block group to be tree-log dedicated
    3719             :          * block group.
    3720             :          */
    3721           0 :         if (ffe_ctl->for_treelog && !fs_info->treelog_bg &&
    3722           0 :             (block_group->used || block_group->reserved)) {
    3723           0 :                 ret = 1;
    3724           0 :                 goto out;
    3725             :         }
    3726             : 
    3727             :         /*
    3728             :          * Do not allow currently used block group to be the data relocation
    3729             :          * dedicated block group.
    3730             :          */
    3731           0 :         if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg &&
    3732           0 :             (block_group->used || block_group->reserved)) {
    3733           0 :                 ret = 1;
    3734           0 :                 goto out;
    3735             :         }
    3736             : 
    3737           0 :         WARN_ON_ONCE(block_group->alloc_offset > block_group->zone_capacity);
    3738           0 :         avail = block_group->zone_capacity - block_group->alloc_offset;
    3739           0 :         if (avail < num_bytes) {
    3740           0 :                 if (ffe_ctl->max_extent_size < avail) {
    3741             :                         /*
    3742             :                          * With sequential allocator, free space is always
    3743             :                          * contiguous
    3744             :                          */
    3745           0 :                         ffe_ctl->max_extent_size = avail;
    3746           0 :                         ffe_ctl->total_free_space = avail;
    3747             :                 }
    3748           0 :                 ret = 1;
    3749           0 :                 goto out;
    3750             :         }
    3751             : 
    3752           0 :         if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
    3753           0 :                 fs_info->treelog_bg = block_group->start;
    3754             : 
    3755           0 :         if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg)
    3756           0 :                 fs_info->data_reloc_bg = block_group->start;
    3757             : 
    3758           0 :         ffe_ctl->found_offset = start + block_group->alloc_offset;
    3759           0 :         block_group->alloc_offset += num_bytes;
    3760           0 :         spin_lock(&ctl->tree_lock);
    3761           0 :         ctl->free_space -= num_bytes;
    3762           0 :         spin_unlock(&ctl->tree_lock);
    3763             : 
    3764             :         /*
    3765             :          * We do not check if found_offset is aligned to stripesize. The
    3766             :          * address is anyway rewritten when using zone append writing.
    3767             :          */
    3768             : 
    3769           0 :         ffe_ctl->search_start = ffe_ctl->found_offset;
    3770             : 
    3771           0 : out:
    3772           0 :         if (ret && ffe_ctl->for_treelog)
    3773           0 :                 fs_info->treelog_bg = 0;
    3774           0 :         if (ret && ffe_ctl->for_data_reloc &&
    3775           0 :             fs_info->data_reloc_bg == block_group->start) {
    3776             :                 /*
    3777             :                  * Do not allow further allocations from this block group.
    3778             :                  * Compared to increasing the ->ro, setting the
    3779             :                  * ->zoned_data_reloc_ongoing flag still allows nocow
    3780             :                  *  writers to come in. See btrfs_inc_nocow_writers().
    3781             :                  *
    3782             :                  * We need to disable an allocation to avoid an allocation of
    3783             :                  * regular (non-relocation data) extent. With mix of relocation
    3784             :                  * extents and regular extents, we can dispatch WRITE commands
    3785             :                  * (for relocation extents) and ZONE APPEND commands (for
    3786             :                  * regular extents) at the same time to the same zone, which
    3787             :                  * easily break the write pointer.
    3788             :                  */
    3789           0 :                 set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
    3790           0 :                 fs_info->data_reloc_bg = 0;
    3791             :         }
    3792           0 :         spin_unlock(&fs_info->relocation_bg_lock);
    3793           0 :         spin_unlock(&fs_info->treelog_bg_lock);
    3794           0 :         spin_unlock(&block_group->lock);
    3795           0 :         spin_unlock(&space_info->lock);
    3796           0 :         return ret;
    3797             : }
    3798             : 
    3799    17931156 : static int do_allocation(struct btrfs_block_group *block_group,
    3800             :                          struct find_free_extent_ctl *ffe_ctl,
    3801             :                          struct btrfs_block_group **bg_ret)
    3802             : {
    3803    17931156 :         switch (ffe_ctl->policy) {
    3804    17931156 :         case BTRFS_EXTENT_ALLOC_CLUSTERED:
    3805    17931156 :                 return do_allocation_clustered(block_group, ffe_ctl, bg_ret);
    3806           0 :         case BTRFS_EXTENT_ALLOC_ZONED:
    3807           0 :                 return do_allocation_zoned(block_group, ffe_ctl, bg_ret);
    3808           0 :         default:
    3809           0 :                 BUG();
    3810             :         }
    3811             : }
    3812             : 
    3813     9547586 : static void release_block_group(struct btrfs_block_group *block_group,
    3814             :                                 struct find_free_extent_ctl *ffe_ctl,
    3815             :                                 int delalloc)
    3816             : {
    3817     9547586 :         switch (ffe_ctl->policy) {
    3818     9547789 :         case BTRFS_EXTENT_ALLOC_CLUSTERED:
    3819     9547789 :                 ffe_ctl->retry_clustered = false;
    3820     9547789 :                 ffe_ctl->retry_unclustered = false;
    3821     9547789 :                 break;
    3822             :         case BTRFS_EXTENT_ALLOC_ZONED:
    3823             :                 /* Nothing to do */
    3824             :                 break;
    3825           0 :         default:
    3826           0 :                 BUG();
    3827             :         }
    3828             : 
    3829     9547586 :         BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
    3830             :                ffe_ctl->index);
    3831     9547586 :         btrfs_release_block_group(block_group, delalloc);
    3832     9547944 : }
    3833             : 
    3834    13262328 : static void found_extent_clustered(struct find_free_extent_ctl *ffe_ctl,
    3835             :                                    struct btrfs_key *ins)
    3836             : {
    3837    13262328 :         struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
    3838             : 
    3839    13262328 :         if (!ffe_ctl->use_cluster && last_ptr) {
    3840           0 :                 spin_lock(&last_ptr->lock);
    3841           0 :                 last_ptr->window_start = ins->objectid;
    3842           0 :                 spin_unlock(&last_ptr->lock);
    3843             :         }
    3844    13262328 : }
    3845             : 
    3846    13262373 : static void found_extent(struct find_free_extent_ctl *ffe_ctl,
    3847             :                          struct btrfs_key *ins)
    3848             : {
    3849    13262373 :         switch (ffe_ctl->policy) {
    3850    13262373 :         case BTRFS_EXTENT_ALLOC_CLUSTERED:
    3851    13262373 :                 found_extent_clustered(ffe_ctl, ins);
    3852    13262373 :                 break;
    3853             :         case BTRFS_EXTENT_ALLOC_ZONED:
    3854             :                 /* Nothing to do */
    3855             :                 break;
    3856           0 :         default:
    3857           0 :                 BUG();
    3858             :         }
    3859    13262305 : }
    3860             : 
    3861             : static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info,
    3862             :                                     struct find_free_extent_ctl *ffe_ctl)
    3863             : {
    3864             :         /* If we can activate new zone, just allocate a chunk and use it */
    3865             :         if (btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
    3866             :                 return 0;
    3867             : 
    3868             :         /*
    3869             :          * We already reached the max active zones. Try to finish one block
    3870             :          * group to make a room for a new block group. This is only possible
    3871             :          * for a data block group because btrfs_zone_finish() may need to wait
    3872             :          * for a running transaction which can cause a deadlock for metadata
    3873             :          * allocation.
    3874             :          */
    3875             :         if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
    3876             :                 int ret = btrfs_zone_finish_one_bg(fs_info);
    3877             : 
    3878             :                 if (ret == 1)
    3879             :                         return 0;
    3880             :                 else if (ret < 0)
    3881             :                         return ret;
    3882             :         }
    3883             : 
    3884             :         /*
    3885             :          * If we have enough free space left in an already active block group
    3886             :          * and we can't activate any other zone now, do not allow allocating a
    3887             :          * new chunk and let find_free_extent() retry with a smaller size.
    3888             :          */
    3889             :         if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size)
    3890             :                 return -ENOSPC;
    3891             : 
    3892             :         /*
    3893             :          * Even min_alloc_size is not left in any block groups. Since we cannot
    3894             :          * activate a new block group, allocating it may not help. Let's tell a
    3895             :          * caller to try again and hope it progress something by writing some
    3896             :          * parts of the region. That is only possible for data block groups,
    3897             :          * where a part of the region can be written.
    3898             :          */
    3899             :         if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA)
    3900             :                 return -EAGAIN;
    3901             : 
    3902             :         /*
    3903             :          * We cannot activate a new block group and no enough space left in any
    3904             :          * block groups. So, allocating a new block group may not help. But,
    3905             :          * there is nothing to do anyway, so let's go with it.
    3906             :          */
    3907             :         return 0;
    3908             : }
    3909             : 
    3910             : static int can_allocate_chunk(struct btrfs_fs_info *fs_info,
    3911             :                               struct find_free_extent_ctl *ffe_ctl)
    3912             : {
    3913       54405 :         switch (ffe_ctl->policy) {
    3914             :         case BTRFS_EXTENT_ALLOC_CLUSTERED:
    3915             :                 return 0;
    3916             :         case BTRFS_EXTENT_ALLOC_ZONED:
    3917             :                 return can_allocate_chunk_zoned(fs_info, ffe_ctl);
    3918           0 :         default:
    3919           0 :                 BUG();
    3920             :         }
    3921             : }
    3922             : 
    3923             : /*
    3924             :  * Return >0 means caller needs to re-search for free extent
    3925             :  * Return 0 means we have the needed free extent.
    3926             :  * Return <0 means we failed to locate any free extent.
    3927             :  */
    3928    15348293 : static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
    3929             :                                         struct btrfs_key *ins,
    3930             :                                         struct find_free_extent_ctl *ffe_ctl,
    3931             :                                         bool full_search)
    3932             : {
    3933    15348293 :         struct btrfs_root *root = fs_info->chunk_root;
    3934    15348293 :         int ret;
    3935             : 
    3936    15348293 :         if ((ffe_ctl->loop == LOOP_CACHING_NOWAIT) &&
    3937    13942892 :             ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
    3938        4091 :                 ffe_ctl->orig_have_caching_bg = true;
    3939             : 
    3940    15348293 :         if (ins->objectid) {
    3941    13262416 :                 found_extent(ffe_ctl, ins);
    3942    13262416 :                 return 0;
    3943             :         }
    3944             : 
    3945     2085877 :         if (ffe_ctl->loop >= LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg)
    3946             :                 return 1;
    3947             : 
    3948     1757686 :         ffe_ctl->index++;
    3949     1757686 :         if (ffe_ctl->index < BTRFS_NR_RAID_TYPES)
    3950             :                 return 1;
    3951             : 
    3952             :         /*
    3953             :          * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
    3954             :          *                      caching kthreads as we move along
    3955             :          * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
    3956             :          * LOOP_UNSET_SIZE_CLASS, allow unset size class
    3957             :          * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
    3958             :          * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
    3959             :          *                     again
    3960             :          */
    3961      195302 :         if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
    3962      195302 :                 ffe_ctl->index = 0;
    3963             :                 /*
    3964             :                  * We want to skip the LOOP_CACHING_WAIT step if we don't have
    3965             :                  * any uncached bgs and we've already done a full search
    3966             :                  * through.
    3967             :                  */
    3968      195302 :                 if (ffe_ctl->loop == LOOP_CACHING_NOWAIT &&
    3969       85822 :                     (!ffe_ctl->orig_have_caching_bg && full_search))
    3970        5451 :                         ffe_ctl->loop++;
    3971      195302 :                 ffe_ctl->loop++;
    3972             : 
    3973      195302 :                 if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) {
    3974       54405 :                         struct btrfs_trans_handle *trans;
    3975       54405 :                         int exist = 0;
    3976             : 
    3977             :                         /* Check if allocation policy allows to create a new chunk */
    3978       54405 :                         ret = can_allocate_chunk(fs_info, ffe_ctl);
    3979       54405 :                         if (ret)
    3980             :                                 return ret;
    3981             : 
    3982       54405 :                         trans = current->journal_info;
    3983       54405 :                         if (trans)
    3984             :                                 exist = 1;
    3985             :                         else
    3986       54166 :                                 trans = btrfs_join_transaction(root);
    3987             : 
    3988       54405 :                         if (IS_ERR(trans)) {
    3989           0 :                                 ret = PTR_ERR(trans);
    3990           0 :                                 return ret;
    3991             :                         }
    3992             : 
    3993       54405 :                         ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
    3994             :                                                 CHUNK_ALLOC_FORCE_FOR_EXTENT);
    3995             : 
    3996             :                         /* Do not bail out on ENOSPC since we can do more. */
    3997       54405 :                         if (ret == -ENOSPC) {
    3998       53780 :                                 ret = 0;
    3999       53780 :                                 ffe_ctl->loop++;
    4000             :                         }
    4001         625 :                         else if (ret < 0)
    4002           0 :                                 btrfs_abort_transaction(trans, ret);
    4003             :                         else
    4004             :                                 ret = 0;
    4005       54405 :                         if (!exist)
    4006       54164 :                                 btrfs_end_transaction(trans);
    4007       54406 :                         if (ret)
    4008             :                                 return ret;
    4009             :                 }
    4010             : 
    4011      195303 :                 if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) {
    4012        4869 :                         if (ffe_ctl->policy != BTRFS_EXTENT_ALLOC_CLUSTERED)
    4013             :                                 return -ENOSPC;
    4014             : 
    4015             :                         /*
    4016             :                          * Don't loop again if we already have no empty_size and
    4017             :                          * no empty_cluster.
    4018             :                          */
    4019        4869 :                         if (ffe_ctl->empty_size == 0 &&
    4020        4869 :                             ffe_ctl->empty_cluster == 0)
    4021             :                                 return -ENOSPC;
    4022           0 :                         ffe_ctl->empty_size = 0;
    4023           0 :                         ffe_ctl->empty_cluster = 0;
    4024             :                 }
    4025      190434 :                 return 1;
    4026             :         }
    4027             :         return -ENOSPC;
    4028             : }
    4029             : 
    4030    22812352 : static bool find_free_extent_check_size_class(struct find_free_extent_ctl *ffe_ctl,
    4031             :                                               struct btrfs_block_group *bg)
    4032             : {
    4033    22812352 :         if (ffe_ctl->policy == BTRFS_EXTENT_ALLOC_ZONED)
    4034             :                 return true;
    4035    22812352 :         if (!btrfs_block_group_should_use_size_class(bg))
    4036             :                 return true;
    4037    11699313 :         if (ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS)
    4038             :                 return true;
    4039    11423373 :         if (ffe_ctl->loop >= LOOP_UNSET_SIZE_CLASS &&
    4040      507169 :             bg->size_class == BTRFS_BG_SZ_NONE)
    4041             :                 return true;
    4042    11421027 :         return ffe_ctl->size_class == bg->size_class;
    4043             : }
    4044             : 
    4045    13273840 : static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
    4046             :                                         struct find_free_extent_ctl *ffe_ctl,
    4047             :                                         struct btrfs_space_info *space_info,
    4048             :                                         struct btrfs_key *ins)
    4049             : {
    4050             :         /*
    4051             :          * If our free space is heavily fragmented we may not be able to make
    4052             :          * big contiguous allocations, so instead of doing the expensive search
    4053             :          * for free space, simply return ENOSPC with our max_extent_size so we
    4054             :          * can go ahead and search for a more manageable chunk.
    4055             :          *
    4056             :          * If our max_extent_size is large enough for our allocation simply
    4057             :          * disable clustering since we will likely not be able to find enough
    4058             :          * space to create a cluster and induce latency trying.
    4059             :          */
    4060    13273840 :         if (space_info->max_extent_size) {
    4061       28831 :                 spin_lock(&space_info->lock);
    4062       28832 :                 if (space_info->max_extent_size &&
    4063       28832 :                     ffe_ctl->num_bytes > space_info->max_extent_size) {
    4064        7964 :                         ins->offset = space_info->max_extent_size;
    4065        7964 :                         spin_unlock(&space_info->lock);
    4066        7964 :                         return -ENOSPC;
    4067       20868 :                 } else if (space_info->max_extent_size) {
    4068       20868 :                         ffe_ctl->use_cluster = false;
    4069             :                 }
    4070       20868 :                 spin_unlock(&space_info->lock);
    4071             :         }
    4072             : 
    4073    13265877 :         ffe_ctl->last_ptr = fetch_cluster_info(fs_info, space_info,
    4074             :                                                &ffe_ctl->empty_cluster);
    4075    13265534 :         if (ffe_ctl->last_ptr) {
    4076     9093874 :                 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
    4077             : 
    4078     9093874 :                 spin_lock(&last_ptr->lock);
    4079     9094419 :                 if (last_ptr->block_group)
    4080     9091255 :                         ffe_ctl->hint_byte = last_ptr->window_start;
    4081     9094419 :                 if (last_ptr->fragmented) {
    4082             :                         /*
    4083             :                          * We still set window_start so we can keep track of the
    4084             :                          * last place we found an allocation to try and save
    4085             :                          * some time.
    4086             :                          */
    4087           0 :                         ffe_ctl->hint_byte = last_ptr->window_start;
    4088           0 :                         ffe_ctl->use_cluster = false;
    4089             :                 }
    4090     9094419 :                 spin_unlock(&last_ptr->lock);
    4091             :         }
    4092             : 
    4093             :         return 0;
    4094             : }
    4095             : 
    4096    13273908 : static int prepare_allocation(struct btrfs_fs_info *fs_info,
    4097             :                               struct find_free_extent_ctl *ffe_ctl,
    4098             :                               struct btrfs_space_info *space_info,
    4099             :                               struct btrfs_key *ins)
    4100             : {
    4101    13273908 :         switch (ffe_ctl->policy) {
    4102    13273908 :         case BTRFS_EXTENT_ALLOC_CLUSTERED:
    4103    13273908 :                 return prepare_allocation_clustered(fs_info, ffe_ctl,
    4104             :                                                     space_info, ins);
    4105           0 :         case BTRFS_EXTENT_ALLOC_ZONED:
    4106           0 :                 if (ffe_ctl->for_treelog) {
    4107           0 :                         spin_lock(&fs_info->treelog_bg_lock);
    4108           0 :                         if (fs_info->treelog_bg)
    4109           0 :                                 ffe_ctl->hint_byte = fs_info->treelog_bg;
    4110           0 :                         spin_unlock(&fs_info->treelog_bg_lock);
    4111             :                 }
    4112           0 :                 if (ffe_ctl->for_data_reloc) {
    4113           0 :                         spin_lock(&fs_info->relocation_bg_lock);
    4114           0 :                         if (fs_info->data_reloc_bg)
    4115           0 :                                 ffe_ctl->hint_byte = fs_info->data_reloc_bg;
    4116           0 :                         spin_unlock(&fs_info->relocation_bg_lock);
    4117             :                 }
    4118             :                 return 0;
    4119           0 :         default:
    4120           0 :                 BUG();
    4121             :         }
    4122             : }
    4123             : 
    4124             : /*
    4125             :  * walks the btree of allocated extents and find a hole of a given size.
    4126             :  * The key ins is changed to record the hole:
    4127             :  * ins->objectid == start position
    4128             :  * ins->flags = BTRFS_EXTENT_ITEM_KEY
    4129             :  * ins->offset == the size of the hole.
    4130             :  * Any available blocks before search_start are skipped.
    4131             :  *
    4132             :  * If there is no suitable free space, we will record the max size of
    4133             :  * the free space extent currently.
    4134             :  *
    4135             :  * The overall logic and call chain:
    4136             :  *
    4137             :  * find_free_extent()
    4138             :  * |- Iterate through all block groups
    4139             :  * |  |- Get a valid block group
    4140             :  * |  |- Try to do clustered allocation in that block group
    4141             :  * |  |- Try to do unclustered allocation in that block group
    4142             :  * |  |- Check if the result is valid
    4143             :  * |  |  |- If valid, then exit
    4144             :  * |  |- Jump to next block group
    4145             :  * |
    4146             :  * |- Push harder to find free extents
    4147             :  *    |- If not found, re-iterate all block groups
    4148             :  */
    4149    13275048 : static noinline int find_free_extent(struct btrfs_root *root,
    4150             :                                      struct btrfs_key *ins,
    4151             :                                      struct find_free_extent_ctl *ffe_ctl)
    4152             : {
    4153    13275048 :         struct btrfs_fs_info *fs_info = root->fs_info;
    4154    13275048 :         int ret = 0;
    4155    13275048 :         int cache_block_group_error = 0;
    4156    13275048 :         struct btrfs_block_group *block_group = NULL;
    4157    13275048 :         struct btrfs_space_info *space_info;
    4158    13275048 :         bool full_search = false;
    4159             : 
    4160    13275048 :         WARN_ON(ffe_ctl->num_bytes < fs_info->sectorsize);
    4161             : 
    4162    13275048 :         ffe_ctl->search_start = 0;
    4163             :         /* For clustered allocation */
    4164    13275048 :         ffe_ctl->empty_cluster = 0;
    4165    13275048 :         ffe_ctl->last_ptr = NULL;
    4166    13275048 :         ffe_ctl->use_cluster = true;
    4167    13275048 :         ffe_ctl->have_caching_bg = false;
    4168    13275048 :         ffe_ctl->orig_have_caching_bg = false;
    4169    13275048 :         ffe_ctl->index = btrfs_bg_flags_to_raid_index(ffe_ctl->flags);
    4170    13275048 :         ffe_ctl->loop = 0;
    4171             :         /* For clustered allocation */
    4172    13275048 :         ffe_ctl->retry_clustered = false;
    4173    13275048 :         ffe_ctl->retry_unclustered = false;
    4174    13275048 :         ffe_ctl->cached = 0;
    4175    13275048 :         ffe_ctl->max_extent_size = 0;
    4176    13275048 :         ffe_ctl->total_free_space = 0;
    4177    13275048 :         ffe_ctl->found_offset = 0;
    4178    13275048 :         ffe_ctl->policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
    4179    13275048 :         ffe_ctl->size_class = btrfs_calc_block_group_size_class(ffe_ctl->num_bytes);
    4180             : 
    4181    13273891 :         if (btrfs_is_zoned(fs_info))
    4182             :                 ffe_ctl->policy = BTRFS_EXTENT_ALLOC_ZONED;
    4183             : 
    4184    13273891 :         ins->type = BTRFS_EXTENT_ITEM_KEY;
    4185    13273891 :         ins->objectid = 0;
    4186    13273891 :         ins->offset = 0;
    4187             : 
    4188    13273891 :         trace_find_free_extent(root, ffe_ctl);
    4189             : 
    4190    13273681 :         space_info = btrfs_find_space_info(fs_info, ffe_ctl->flags);
    4191    13273905 :         if (!space_info) {
    4192           0 :                 btrfs_err(fs_info, "No space info for %llu", ffe_ctl->flags);
    4193           0 :                 return -ENOSPC;
    4194             :         }
    4195             : 
    4196    13273905 :         ret = prepare_allocation(fs_info, ffe_ctl, space_info, ins);
    4197    13273692 :         if (ret < 0)
    4198             :                 return ret;
    4199             : 
    4200    13265728 :         ffe_ctl->search_start = max(ffe_ctl->search_start,
    4201             :                                     first_logical_byte(fs_info));
    4202    13266828 :         ffe_ctl->search_start = max(ffe_ctl->search_start, ffe_ctl->hint_byte);
    4203    13266828 :         if (ffe_ctl->search_start == ffe_ctl->hint_byte) {
    4204    11291595 :                 block_group = btrfs_lookup_block_group(fs_info,
    4205             :                                                        ffe_ctl->search_start);
    4206             :                 /*
    4207             :                  * we don't want to use the block group if it doesn't match our
    4208             :                  * allocation bits, or if its not cached.
    4209             :                  *
    4210             :                  * However if we are re-searching with an ideal block group
    4211             :                  * picked out then we don't care that the block group is cached.
    4212             :                  */
    4213    11291151 :                 if (block_group && block_group_bits(block_group, ffe_ctl->flags) &&
    4214    11290383 :                     block_group->cached != BTRFS_CACHE_NO) {
    4215    11290321 :                         down_read(&space_info->groups_sem);
    4216    11290550 :                         if (list_empty(&block_group->list) ||
    4217    11290549 :                             block_group->ro) {
    4218             :                                 /*
    4219             :                                  * someone is removing this block group,
    4220             :                                  * we can't jump into the have_block_group
    4221             :                                  * target because our list pointers are not
    4222             :                                  * valid
    4223             :                                  */
    4224         996 :                                 btrfs_put_block_group(block_group);
    4225         838 :                                 up_read(&space_info->groups_sem);
    4226             :                         } else {
    4227    11289554 :                                 ffe_ctl->index = btrfs_bg_flags_to_raid_index(
    4228             :                                                         block_group->flags);
    4229    11289554 :                                 btrfs_lock_block_group(block_group,
    4230             :                                                        ffe_ctl->delalloc);
    4231    11289571 :                                 ffe_ctl->hinted = true;
    4232    11289571 :                                 goto have_block_group;
    4233             :                         }
    4234         830 :                 } else if (block_group) {
    4235         430 :                         btrfs_put_block_group(block_group);
    4236             :                 }
    4237             :         }
    4238     1975633 : search:
    4239     4057895 :         trace_find_free_extent_search_loop(root, ffe_ctl);
    4240     4057895 :         ffe_ctl->have_caching_bg = false;
    4241     4057895 :         if (ffe_ctl->index == btrfs_bg_flags_to_raid_index(ffe_ctl->flags) ||
    4242             :             ffe_ctl->index == 0)
    4243     2499592 :                 full_search = true;
    4244     4057895 :         down_read(&space_info->groups_sem);
    4245    13612470 :         list_for_each_entry(block_group,
    4246             :                             &space_info->block_groups[ffe_ctl->index], list) {
    4247    11526567 :                 struct btrfs_block_group *bg_ret;
    4248             : 
    4249    11526567 :                 ffe_ctl->hinted = false;
    4250             :                 /* If the block group is read-only, we can skip it entirely. */
    4251    11526567 :                 if (unlikely(block_group->ro)) {
    4252        7408 :                         if (ffe_ctl->for_treelog)
    4253             :                                 btrfs_clear_treelog_bg(block_group);
    4254        7408 :                         if (ffe_ctl->for_data_reloc)
    4255             :                                 btrfs_clear_data_reloc_bg(block_group);
    4256        7408 :                         continue;
    4257             :                 }
    4258             : 
    4259    11519159 :                 btrfs_grab_block_group(block_group, ffe_ctl->delalloc);
    4260    11518414 :                 ffe_ctl->search_start = block_group->start;
    4261             : 
    4262             :                 /*
    4263             :                  * this can happen if we end up cycling through all the
    4264             :                  * raid types, but we want to make sure we only allocate
    4265             :                  * for the proper type.
    4266             :                  */
    4267    11518414 :                 if (!block_group_bits(block_group, ffe_ctl->flags)) {
    4268           0 :                         u64 extra = BTRFS_BLOCK_GROUP_DUP |
    4269             :                                 BTRFS_BLOCK_GROUP_RAID1_MASK |
    4270             :                                 BTRFS_BLOCK_GROUP_RAID56_MASK |
    4271             :                                 BTRFS_BLOCK_GROUP_RAID10;
    4272             : 
    4273             :                         /*
    4274             :                          * if they asked for extra copies and this block group
    4275             :                          * doesn't provide them, bail.  This does allow us to
    4276             :                          * fill raid0 from raid1.
    4277             :                          */
    4278           0 :                         if ((ffe_ctl->flags & extra) && !(block_group->flags & extra))
    4279           0 :                                 goto loop;
    4280             : 
    4281             :                         /*
    4282             :                          * This block group has different flags than we want.
    4283             :                          * It's possible that we have MIXED_GROUP flag but no
    4284             :                          * block group is mixed.  Just skip such block group.
    4285             :                          */
    4286           0 :                         btrfs_release_block_group(block_group, ffe_ctl->delalloc);
    4287           0 :                         continue;
    4288             :                 }
    4289             : 
    4290    22807985 : have_block_group:
    4291    22810796 :                 trace_find_free_extent_have_block_group(root, ffe_ctl, block_group);
    4292    22809418 :                 ffe_ctl->cached = btrfs_block_group_done(block_group);
    4293    22812505 :                 if (unlikely(!ffe_ctl->cached)) {
    4294      556137 :                         ffe_ctl->have_caching_bg = true;
    4295      556137 :                         ret = btrfs_cache_block_group(block_group, false);
    4296             : 
    4297             :                         /*
    4298             :                          * If we get ENOMEM here or something else we want to
    4299             :                          * try other block groups, because it may not be fatal.
    4300             :                          * However if we can't find anything else we need to
    4301             :                          * save our return here so that we return the actual
    4302             :                          * error that caused problems, not ENOSPC.
    4303             :                          */
    4304      556138 :                         if (ret < 0) {
    4305           0 :                                 if (!cache_block_group_error)
    4306           0 :                                         cache_block_group_error = ret;
    4307           0 :                                 ret = 0;
    4308           0 :                                 goto loop;
    4309             :                         }
    4310             :                         ret = 0;
    4311             :                 }
    4312             : 
    4313    22812506 :                 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
    4314           0 :                         goto loop;
    4315             : 
    4316    22812506 :                 if (!find_free_extent_check_size_class(ffe_ctl, block_group))
    4317     4880677 :                         goto loop;
    4318             : 
    4319    17931270 :                 bg_ret = NULL;
    4320    17931270 :                 ret = do_allocation(block_group, ffe_ctl, &bg_ret);
    4321    17932678 :                 if (ret == 0) {
    4322    13262576 :                         if (bg_ret && bg_ret != block_group) {
    4323           2 :                                 btrfs_release_block_group(block_group,
    4324             :                                                           ffe_ctl->delalloc);
    4325           2 :                                 block_group = bg_ret;
    4326             :                         }
    4327     4670102 :                 } else if (ret == -EAGAIN) {
    4328        2811 :                         goto have_block_group;
    4329     4667291 :                 } else if (ret > 0) {
    4330     4667249 :                         goto loop;
    4331             :                 }
    4332             : 
    4333             :                 /* Checks */
    4334    13262618 :                 ffe_ctl->search_start = round_up(ffe_ctl->found_offset,
    4335             :                                                  fs_info->stripesize);
    4336             : 
    4337             :                 /* move on to the next group */
    4338    13262618 :                 if (ffe_ctl->search_start + ffe_ctl->num_bytes >
    4339    13262618 :                     block_group->start + block_group->length) {
    4340           0 :                         btrfs_add_free_space_unused(block_group,
    4341             :                                             ffe_ctl->found_offset,
    4342             :                                             ffe_ctl->num_bytes);
    4343           0 :                         goto loop;
    4344             :                 }
    4345             : 
    4346    13262618 :                 if (ffe_ctl->found_offset < ffe_ctl->search_start)
    4347           0 :                         btrfs_add_free_space_unused(block_group,
    4348             :                                         ffe_ctl->found_offset,
    4349             :                                         ffe_ctl->search_start - ffe_ctl->found_offset);
    4350             : 
    4351    13262618 :                 ret = btrfs_add_reserved_bytes(block_group, ffe_ctl->ram_bytes,
    4352             :                                                ffe_ctl->num_bytes,
    4353             :                                                ffe_ctl->delalloc,
    4354    13262618 :                                                ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS);
    4355    13262515 :                 if (ret == -EAGAIN) {
    4356           0 :                         btrfs_add_free_space_unused(block_group,
    4357             :                                         ffe_ctl->found_offset,
    4358             :                                         ffe_ctl->num_bytes);
    4359           0 :                         goto loop;
    4360             :                 }
    4361    13262515 :                 btrfs_inc_block_group_reservations(block_group);
    4362             : 
    4363             :                 /* we are all good, lets return */
    4364    13262589 :                 ins->objectid = ffe_ctl->search_start;
    4365    13262589 :                 ins->offset = ffe_ctl->num_bytes;
    4366             : 
    4367    13262589 :                 trace_btrfs_reserve_extent(block_group, ffe_ctl);
    4368    13262571 :                 btrfs_release_block_group(block_group, ffe_ctl->delalloc);
    4369    13262514 :                 break;
    4370     9547926 : loop:
    4371     9547926 :                 release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
    4372     9547833 :                 cond_resched();
    4373             :         }
    4374    15348417 :         up_read(&space_info->groups_sem);
    4375             : 
    4376    15348293 :         ret = find_free_extent_update_loop(fs_info, ins, ffe_ctl, full_search);
    4377    15348174 :         if (ret > 0)
    4378     2080994 :                 goto search;
    4379             : 
    4380    13267180 :         if (ret == -ENOSPC && !cache_block_group_error) {
    4381             :                 /*
    4382             :                  * Use ffe_ctl->total_free_space as fallback if we can't find
    4383             :                  * any contiguous hole.
    4384             :                  */
    4385        4869 :                 if (!ffe_ctl->max_extent_size)
    4386        1590 :                         ffe_ctl->max_extent_size = ffe_ctl->total_free_space;
    4387        4869 :                 spin_lock(&space_info->lock);
    4388        4869 :                 space_info->max_extent_size = ffe_ctl->max_extent_size;
    4389        4869 :                 spin_unlock(&space_info->lock);
    4390        4869 :                 ins->offset = ffe_ctl->max_extent_size;
    4391    13262311 :         } else if (ret == -ENOSPC) {
    4392           0 :                 ret = cache_block_group_error;
    4393             :         }
    4394             :         return ret;
    4395             : }
    4396             : 
    4397             : /*
    4398             :  * btrfs_reserve_extent - entry point to the extent allocator. Tries to find a
    4399             :  *                        hole that is at least as big as @num_bytes.
    4400             :  *
    4401             :  * @root           -    The root that will contain this extent
    4402             :  *
    4403             :  * @ram_bytes      -    The amount of space in ram that @num_bytes take. This
    4404             :  *                      is used for accounting purposes. This value differs
    4405             :  *                      from @num_bytes only in the case of compressed extents.
    4406             :  *
    4407             :  * @num_bytes      -    Number of bytes to allocate on-disk.
    4408             :  *
    4409             :  * @min_alloc_size -    Indicates the minimum amount of space that the
    4410             :  *                      allocator should try to satisfy. In some cases
    4411             :  *                      @num_bytes may be larger than what is required and if
    4412             :  *                      the filesystem is fragmented then allocation fails.
    4413             :  *                      However, the presence of @min_alloc_size gives a
    4414             :  *                      chance to try and satisfy the smaller allocation.
    4415             :  *
    4416             :  * @empty_size     -    A hint that you plan on doing more COW. This is the
    4417             :  *                      size in bytes the allocator should try to find free
    4418             :  *                      next to the block it returns.  This is just a hint and
    4419             :  *                      may be ignored by the allocator.
    4420             :  *
    4421             :  * @hint_byte      -    Hint to the allocator to start searching above the byte
    4422             :  *                      address passed. It might be ignored.
    4423             :  *
    4424             :  * @ins            -    This key is modified to record the found hole. It will
    4425             :  *                      have the following values:
    4426             :  *                      ins->objectid == start position
    4427             :  *                      ins->flags = BTRFS_EXTENT_ITEM_KEY
    4428             :  *                      ins->offset == the size of the hole.
    4429             :  *
    4430             :  * @is_data        -    Boolean flag indicating whether an extent is
    4431             :  *                      allocated for data (true) or metadata (false)
    4432             :  *
    4433             :  * @delalloc       -    Boolean flag indicating whether this allocation is for
    4434             :  *                      delalloc or not. If 'true' data_rwsem of block groups
    4435             :  *                      is going to be acquired.
    4436             :  *
    4437             :  *
    4438             :  * Returns 0 when an allocation succeeded or < 0 when an error occurred. In
    4439             :  * case -ENOSPC is returned then @ins->offset will contain the size of the
    4440             :  * largest available hole the allocator managed to find.
    4441             :  */
    4442    13262168 : int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
    4443             :                          u64 num_bytes, u64 min_alloc_size,
    4444             :                          u64 empty_size, u64 hint_byte,
    4445             :                          struct btrfs_key *ins, int is_data, int delalloc)
    4446             : {
    4447    13262168 :         struct btrfs_fs_info *fs_info = root->fs_info;
    4448    13262168 :         struct find_free_extent_ctl ffe_ctl = {};
    4449    13262168 :         bool final_tried = num_bytes == min_alloc_size;
    4450    13262168 :         u64 flags;
    4451    13262168 :         int ret;
    4452    13262168 :         bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
    4453    13262168 :         bool for_data_reloc = (btrfs_is_data_reloc_root(root) && is_data);
    4454             : 
    4455    13262168 :         flags = get_alloc_profile_by_root(root, is_data);
    4456    13275213 : again:
    4457    13275213 :         WARN_ON(num_bytes < fs_info->sectorsize);
    4458             : 
    4459    13275213 :         ffe_ctl.ram_bytes = ram_bytes;
    4460    13275213 :         ffe_ctl.num_bytes = num_bytes;
    4461    13275213 :         ffe_ctl.min_alloc_size = min_alloc_size;
    4462    13275213 :         ffe_ctl.empty_size = empty_size;
    4463    13275213 :         ffe_ctl.flags = flags;
    4464    13275213 :         ffe_ctl.delalloc = delalloc;
    4465    13275213 :         ffe_ctl.hint_byte = hint_byte;
    4466    13275213 :         ffe_ctl.for_treelog = for_treelog;
    4467    13275213 :         ffe_ctl.for_data_reloc = for_data_reloc;
    4468             : 
    4469    13275213 :         ret = find_free_extent(root, ins, &ffe_ctl);
    4470    13275099 :         if (!ret && !is_data) {
    4471     9449662 :                 btrfs_dec_block_group_reservations(fs_info, ins->objectid);
    4472     3825437 :         } else if (ret == -ENOSPC) {
    4473       12833 :                 if (!final_tried && ins->offset) {
    4474       12833 :                         num_bytes = min(num_bytes >> 1, ins->offset);
    4475       12833 :                         num_bytes = round_down(num_bytes,
    4476             :                                                fs_info->sectorsize);
    4477       12833 :                         num_bytes = max(num_bytes, min_alloc_size);
    4478       12833 :                         ram_bytes = num_bytes;
    4479       12833 :                         if (num_bytes == min_alloc_size)
    4480        2491 :                                 final_tried = true;
    4481       12833 :                         goto again;
    4482           0 :                 } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
    4483           0 :                         struct btrfs_space_info *sinfo;
    4484             : 
    4485           0 :                         sinfo = btrfs_find_space_info(fs_info, flags);
    4486           0 :                         btrfs_err(fs_info,
    4487             :         "allocation failed flags %llu, wanted %llu tree-log %d, relocation: %d",
    4488             :                                   flags, num_bytes, for_treelog, for_data_reloc);
    4489           0 :                         if (sinfo)
    4490           0 :                                 btrfs_dump_space_info(fs_info, sinfo,
    4491             :                                                       num_bytes, 1);
    4492             :                 }
    4493             :         }
    4494             : 
    4495    13262446 :         return ret;
    4496             : }
    4497             : 
    4498         175 : int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
    4499             :                                u64 start, u64 len, int delalloc)
    4500             : {
    4501         175 :         struct btrfs_block_group *cache;
    4502             : 
    4503         175 :         cache = btrfs_lookup_block_group(fs_info, start);
    4504         175 :         if (!cache) {
    4505           0 :                 btrfs_err(fs_info, "Unable to find block group for %llu",
    4506             :                           start);
    4507           0 :                 return -ENOSPC;
    4508             :         }
    4509             : 
    4510         175 :         btrfs_add_free_space(cache, start, len);
    4511         175 :         btrfs_free_reserved_bytes(cache, len, delalloc);
    4512         175 :         trace_btrfs_reserved_extent_free(fs_info, start, len);
    4513             : 
    4514         175 :         btrfs_put_block_group(cache);
    4515         175 :         return 0;
    4516             : }
    4517             : 
    4518       30834 : int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
    4519             :                               u64 len)
    4520             : {
    4521       30834 :         struct btrfs_block_group *cache;
    4522       30834 :         int ret = 0;
    4523             : 
    4524       30834 :         cache = btrfs_lookup_block_group(trans->fs_info, start);
    4525       30834 :         if (!cache) {
    4526           0 :                 btrfs_err(trans->fs_info, "unable to find block group for %llu",
    4527             :                           start);
    4528           0 :                 return -ENOSPC;
    4529             :         }
    4530             : 
    4531       30834 :         ret = pin_down_extent(trans, cache, start, len, 1);
    4532       30834 :         btrfs_put_block_group(cache);
    4533       30834 :         return ret;
    4534             : }
    4535             : 
    4536    10019351 : static int alloc_reserved_extent(struct btrfs_trans_handle *trans, u64 bytenr,
    4537             :                                  u64 num_bytes)
    4538             : {
    4539    10019351 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    4540    10019351 :         int ret;
    4541             : 
    4542    10019351 :         ret = remove_from_free_space_tree(trans, bytenr, num_bytes);
    4543    10019353 :         if (ret)
    4544             :                 return ret;
    4545             : 
    4546    10019353 :         ret = btrfs_update_block_group(trans, bytenr, num_bytes, true);
    4547    10019352 :         if (ret) {
    4548           0 :                 ASSERT(!ret);
    4549           0 :                 btrfs_err(fs_info, "update block group failed for %llu %llu",
    4550             :                           bytenr, num_bytes);
    4551           0 :                 return ret;
    4552             :         }
    4553             : 
    4554    10019352 :         trace_btrfs_reserved_extent_alloc(fs_info, bytenr, num_bytes);
    4555    10019352 :         return 0;
    4556             : }
    4557             : 
    4558     2254192 : static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
    4559             :                                       u64 parent, u64 root_objectid,
    4560             :                                       u64 flags, u64 owner, u64 offset,
    4561             :                                       struct btrfs_key *ins, int ref_mod)
    4562             : {
    4563     2254192 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    4564     2254192 :         struct btrfs_root *extent_root;
    4565     2254192 :         int ret;
    4566     2254192 :         struct btrfs_extent_item *extent_item;
    4567     2254192 :         struct btrfs_extent_inline_ref *iref;
    4568     2254192 :         struct btrfs_path *path;
    4569     2254192 :         struct extent_buffer *leaf;
    4570     2254192 :         int type;
    4571     2254192 :         u32 size;
    4572             : 
    4573     2254192 :         if (parent > 0)
    4574             :                 type = BTRFS_SHARED_DATA_REF_KEY;
    4575             :         else
    4576     2254192 :                 type = BTRFS_EXTENT_DATA_REF_KEY;
    4577             : 
    4578     2254192 :         size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
    4579             : 
    4580     2254192 :         path = btrfs_alloc_path();
    4581     2254185 :         if (!path)
    4582             :                 return -ENOMEM;
    4583             : 
    4584     2254185 :         extent_root = btrfs_extent_root(fs_info, ins->objectid);
    4585     2254179 :         ret = btrfs_insert_empty_item(trans, extent_root, path, ins, size);
    4586     2254191 :         if (ret) {
    4587           1 :                 btrfs_free_path(path);
    4588           1 :                 return ret;
    4589             :         }
    4590             : 
    4591     2254190 :         leaf = path->nodes[0];
    4592     2254190 :         extent_item = btrfs_item_ptr(leaf, path->slots[0],
    4593             :                                      struct btrfs_extent_item);
    4594     2254189 :         btrfs_set_extent_refs(leaf, extent_item, ref_mod);
    4595     2254189 :         btrfs_set_extent_generation(leaf, extent_item, trans->transid);
    4596     2254190 :         btrfs_set_extent_flags(leaf, extent_item,
    4597             :                                flags | BTRFS_EXTENT_FLAG_DATA);
    4598             : 
    4599     2254189 :         iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
    4600     2254189 :         btrfs_set_extent_inline_ref_type(leaf, iref, type);
    4601     2254189 :         if (parent > 0) {
    4602           0 :                 struct btrfs_shared_data_ref *ref;
    4603           0 :                 ref = (struct btrfs_shared_data_ref *)(iref + 1);
    4604           0 :                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
    4605           0 :                 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
    4606             :         } else {
    4607     2254189 :                 struct btrfs_extent_data_ref *ref;
    4608     2254189 :                 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
    4609     2254189 :                 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
    4610     2254189 :                 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
    4611     2254188 :                 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
    4612     2254189 :                 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
    4613             :         }
    4614             : 
    4615     2254187 :         btrfs_mark_buffer_dirty(path->nodes[0]);
    4616     2254189 :         btrfs_free_path(path);
    4617             : 
    4618     2254190 :         return alloc_reserved_extent(trans, ins->objectid, ins->offset);
    4619             : }
    4620             : 
    4621     7765103 : static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
    4622             :                                      struct btrfs_delayed_ref_node *node,
    4623             :                                      struct btrfs_delayed_extent_op *extent_op)
    4624             : {
    4625     7765103 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    4626     7765103 :         struct btrfs_root *extent_root;
    4627     7765103 :         int ret;
    4628     7765103 :         struct btrfs_extent_item *extent_item;
    4629     7765103 :         struct btrfs_key extent_key;
    4630     7765103 :         struct btrfs_tree_block_info *block_info;
    4631     7765103 :         struct btrfs_extent_inline_ref *iref;
    4632     7765103 :         struct btrfs_path *path;
    4633     7765103 :         struct extent_buffer *leaf;
    4634     7765103 :         struct btrfs_delayed_tree_ref *ref;
    4635     7765103 :         u32 size = sizeof(*extent_item) + sizeof(*iref);
    4636     7765103 :         u64 flags = extent_op->flags_to_set;
    4637     7765103 :         bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
    4638             : 
    4639     7765103 :         ref = btrfs_delayed_node_to_tree_ref(node);
    4640             : 
    4641     7765103 :         extent_key.objectid = node->bytenr;
    4642     7765103 :         if (skinny_metadata) {
    4643     7765103 :                 extent_key.offset = ref->level;
    4644     7765103 :                 extent_key.type = BTRFS_METADATA_ITEM_KEY;
    4645             :         } else {
    4646           0 :                 extent_key.offset = node->num_bytes;
    4647           0 :                 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
    4648           0 :                 size += sizeof(*block_info);
    4649             :         }
    4650             : 
    4651     7765103 :         path = btrfs_alloc_path();
    4652     7765111 :         if (!path)
    4653             :                 return -ENOMEM;
    4654             : 
    4655     7765111 :         extent_root = btrfs_extent_root(fs_info, extent_key.objectid);
    4656     7765101 :         ret = btrfs_insert_empty_item(trans, extent_root, path, &extent_key,
    4657             :                                       size);
    4658     7765163 :         if (ret) {
    4659           0 :                 btrfs_free_path(path);
    4660           0 :                 return ret;
    4661             :         }
    4662             : 
    4663     7765163 :         leaf = path->nodes[0];
    4664     7765163 :         extent_item = btrfs_item_ptr(leaf, path->slots[0],
    4665             :                                      struct btrfs_extent_item);
    4666     7765163 :         btrfs_set_extent_refs(leaf, extent_item, 1);
    4667     7765163 :         btrfs_set_extent_generation(leaf, extent_item, trans->transid);
    4668     7765163 :         btrfs_set_extent_flags(leaf, extent_item,
    4669             :                                flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
    4670             : 
    4671     7765163 :         if (skinny_metadata) {
    4672     7765163 :                 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
    4673             :         } else {
    4674           0 :                 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
    4675           0 :                 btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
    4676           0 :                 btrfs_set_tree_block_level(leaf, block_info, ref->level);
    4677           0 :                 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
    4678             :         }
    4679             : 
    4680     7765163 :         if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
    4681     1982207 :                 btrfs_set_extent_inline_ref_type(leaf, iref,
    4682             :                                                  BTRFS_SHARED_BLOCK_REF_KEY);
    4683     1982207 :                 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent);
    4684             :         } else {
    4685     5782956 :                 btrfs_set_extent_inline_ref_type(leaf, iref,
    4686             :                                                  BTRFS_TREE_BLOCK_REF_KEY);
    4687     5782956 :                 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
    4688             :         }
    4689             : 
    4690     7765163 :         btrfs_mark_buffer_dirty(leaf);
    4691     7765163 :         btrfs_free_path(path);
    4692             : 
    4693     7765163 :         return alloc_reserved_extent(trans, node->bytenr, fs_info->nodesize);
    4694             : }
    4695             : 
    4696     3813187 : int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
    4697             :                                      struct btrfs_root *root, u64 owner,
    4698             :                                      u64 offset, u64 ram_bytes,
    4699             :                                      struct btrfs_key *ins)
    4700             : {
    4701     3813187 :         struct btrfs_ref generic_ref = { 0 };
    4702             : 
    4703     3813187 :         BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
    4704             : 
    4705     3813187 :         btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
    4706             :                                ins->objectid, ins->offset, 0);
    4707     3813187 :         btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner,
    4708             :                             offset, 0, false);
    4709     3813187 :         btrfs_ref_tree_mod(root->fs_info, &generic_ref);
    4710             : 
    4711     3813187 :         return btrfs_add_delayed_data_ref(trans, &generic_ref, ram_bytes);
    4712             : }
    4713             : 
    4714             : /*
    4715             :  * this is used by the tree logging recovery code.  It records that
    4716             :  * an extent has been allocated and makes sure to clear the free
    4717             :  * space cache bits as well
    4718             :  */
    4719       84010 : int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
    4720             :                                    u64 root_objectid, u64 owner, u64 offset,
    4721             :                                    struct btrfs_key *ins)
    4722             : {
    4723       84010 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    4724       84010 :         int ret;
    4725       84010 :         struct btrfs_block_group *block_group;
    4726       84010 :         struct btrfs_space_info *space_info;
    4727             : 
    4728             :         /*
    4729             :          * Mixed block groups will exclude before processing the log so we only
    4730             :          * need to do the exclude dance if this fs isn't mixed.
    4731             :          */
    4732       84010 :         if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
    4733       84010 :                 ret = __exclude_logged_extent(fs_info, ins->objectid,
    4734             :                                               ins->offset);
    4735       84010 :                 if (ret)
    4736             :                         return ret;
    4737             :         }
    4738             : 
    4739       84010 :         block_group = btrfs_lookup_block_group(fs_info, ins->objectid);
    4740       84010 :         if (!block_group)
    4741             :                 return -EINVAL;
    4742             : 
    4743       84010 :         space_info = block_group->space_info;
    4744       84010 :         spin_lock(&space_info->lock);
    4745       84010 :         spin_lock(&block_group->lock);
    4746       84010 :         space_info->bytes_reserved += ins->offset;
    4747       84010 :         block_group->reserved += ins->offset;
    4748       84010 :         spin_unlock(&block_group->lock);
    4749       84010 :         spin_unlock(&space_info->lock);
    4750             : 
    4751       84010 :         ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
    4752             :                                          offset, ins, 1);
    4753       84010 :         if (ret)
    4754           0 :                 btrfs_pin_extent(trans, ins->objectid, ins->offset, 1);
    4755       84010 :         btrfs_put_block_group(block_group);
    4756       84010 :         return ret;
    4757             : }
    4758             : 
    4759             : static struct extent_buffer *
    4760     9449823 : btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
    4761             :                       u64 bytenr, int level, u64 owner,
    4762             :                       enum btrfs_lock_nesting nest)
    4763             : {
    4764     9449823 :         struct btrfs_fs_info *fs_info = root->fs_info;
    4765     9449823 :         struct extent_buffer *buf;
    4766     9449823 :         u64 lockdep_owner = owner;
    4767             : 
    4768     9449823 :         buf = btrfs_find_create_tree_block(fs_info, bytenr, owner, level);
    4769     9449739 :         if (IS_ERR(buf))
    4770             :                 return buf;
    4771             : 
    4772             :         /*
    4773             :          * Extra safety check in case the extent tree is corrupted and extent
    4774             :          * allocator chooses to use a tree block which is already used and
    4775             :          * locked.
    4776             :          */
    4777     9449739 :         if (buf->lock_owner == current->pid) {
    4778           0 :                 btrfs_err_rl(fs_info,
    4779             : "tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
    4780             :                         buf->start, btrfs_header_owner(buf), current->pid);
    4781           0 :                 free_extent_buffer(buf);
    4782           0 :                 return ERR_PTR(-EUCLEAN);
    4783             :         }
    4784             : 
    4785             :         /*
    4786             :          * The reloc trees are just snapshots, so we need them to appear to be
    4787             :          * just like any other fs tree WRT lockdep.
    4788             :          *
    4789             :          * The exception however is in replace_path() in relocation, where we
    4790             :          * hold the lock on the original fs root and then search for the reloc
    4791             :          * root.  At that point we need to make sure any reloc root buffers are
    4792             :          * set to the BTRFS_TREE_RELOC_OBJECTID lockdep class in order to make
    4793             :          * lockdep happy.
    4794             :          */
    4795     9449739 :         if (lockdep_owner == BTRFS_TREE_RELOC_OBJECTID &&
    4796     1982207 :             !test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
    4797             :                 lockdep_owner = BTRFS_FS_TREE_OBJECTID;
    4798             : 
    4799             :         /* btrfs_clear_buffer_dirty() accesses generation field. */
    4800     9449739 :         btrfs_set_header_generation(buf, trans->transid);
    4801             : 
    4802             :         /*
    4803             :          * This needs to stay, because we could allocate a freed block from an
    4804             :          * old tree into a new tree, so we need to make sure this new block is
    4805             :          * set to the appropriate level and owner.
    4806             :          */
    4807     9449739 :         btrfs_set_buffer_lockdep_class(lockdep_owner, buf, level);
    4808             : 
    4809     9449739 :         __btrfs_tree_lock(buf, nest);
    4810     9449122 :         btrfs_clear_buffer_dirty(trans, buf);
    4811     9449522 :         clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
    4812     9449696 :         clear_bit(EXTENT_BUFFER_NO_CHECK, &buf->bflags);
    4813             : 
    4814     9449585 :         set_extent_buffer_uptodate(buf);
    4815             : 
    4816     9449548 :         memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
    4817     9447300 :         btrfs_set_header_level(buf, level);
    4818     9447300 :         btrfs_set_header_bytenr(buf, buf->start);
    4819     9447300 :         btrfs_set_header_generation(buf, trans->transid);
    4820     9447300 :         btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
    4821     9447466 :         btrfs_set_header_owner(buf, owner);
    4822     9447466 :         write_extent_buffer_fsid(buf, fs_info->fs_devices->metadata_uuid);
    4823     9448482 :         write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
    4824     9448343 :         if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
    4825     1105910 :                 buf->log_index = root->log_transid % 2;
    4826             :                 /*
    4827             :                  * we allow two log transactions at a time, use different
    4828             :                  * EXTENT bit to differentiate dirty pages.
    4829             :                  */
    4830     1105910 :                 if (buf->log_index == 0)
    4831      565463 :                         set_extent_bit(&root->dirty_log_pages, buf->start,
    4832      565463 :                                        buf->start + buf->len - 1,
    4833             :                                        EXTENT_DIRTY, NULL);
    4834             :                 else
    4835      540447 :                         set_extent_bit(&root->dirty_log_pages, buf->start,
    4836      540447 :                                        buf->start + buf->len - 1,
    4837             :                                        EXTENT_NEW, NULL);
    4838             :         } else {
    4839     8342433 :                 buf->log_index = -1;
    4840     8342433 :                 set_extent_bit(&trans->transaction->dirty_pages, buf->start,
    4841     8342433 :                                buf->start + buf->len - 1, EXTENT_DIRTY, NULL);
    4842             :         }
    4843             :         /* this returns a buffer locked for blocking */
    4844             :         return buf;
    4845             : }
    4846             : 
    4847             : /*
    4848             :  * finds a free extent and does all the dirty work required for allocation
    4849             :  * returns the tree buffer or an ERR_PTR on error.
    4850             :  */
    4851     9451077 : struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
    4852             :                                              struct btrfs_root *root,
    4853             :                                              u64 parent, u64 root_objectid,
    4854             :                                              const struct btrfs_disk_key *key,
    4855             :                                              int level, u64 hint,
    4856             :                                              u64 empty_size,
    4857             :                                              enum btrfs_lock_nesting nest)
    4858             : {
    4859     9451077 :         struct btrfs_fs_info *fs_info = root->fs_info;
    4860     9451077 :         struct btrfs_key ins;
    4861     9451077 :         struct btrfs_block_rsv *block_rsv;
    4862     9451077 :         struct extent_buffer *buf;
    4863     9451077 :         struct btrfs_delayed_extent_op *extent_op;
    4864     9451077 :         struct btrfs_ref generic_ref = { 0 };
    4865     9451077 :         u64 flags = 0;
    4866     9451077 :         int ret;
    4867     9451077 :         u32 blocksize = fs_info->nodesize;
    4868     9451077 :         bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
    4869             : 
    4870             : #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
    4871             :         if (btrfs_is_testing(fs_info)) {
    4872             :                 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
    4873             :                                             level, root_objectid, nest);
    4874             :                 if (!IS_ERR(buf))
    4875             :                         root->alloc_bytenr += blocksize;
    4876             :                 return buf;
    4877             :         }
    4878             : #endif
    4879             : 
    4880     9451077 :         block_rsv = btrfs_use_block_rsv(trans, root, blocksize);
    4881     9451110 :         if (IS_ERR(block_rsv))
    4882             :                 return ERR_CAST(block_rsv);
    4883             : 
    4884     9449738 :         ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
    4885             :                                    empty_size, hint, &ins, 0, 0);
    4886     9449834 :         if (ret)
    4887           0 :                 goto out_unuse;
    4888             : 
    4889     9449834 :         buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
    4890             :                                     root_objectid, nest);
    4891     9449868 :         if (IS_ERR(buf)) {
    4892           0 :                 ret = PTR_ERR(buf);
    4893           0 :                 goto out_free_reserved;
    4894             :         }
    4895             : 
    4896     9449868 :         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
    4897     1982207 :                 if (parent == 0)
    4898        2194 :                         parent = ins.objectid;
    4899             :                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
    4900             :         } else
    4901     7467661 :                 BUG_ON(parent > 0);
    4902             : 
    4903     9449868 :         if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
    4904     8344002 :                 extent_op = btrfs_alloc_delayed_extent_op();
    4905     8343987 :                 if (!extent_op) {
    4906           0 :                         ret = -ENOMEM;
    4907           0 :                         goto out_free_buf;
    4908             :                 }
    4909     8343987 :                 if (key)
    4910    16687132 :                         memcpy(&extent_op->key, key, sizeof(extent_op->key));
    4911             :                 else
    4912         842 :                         memset(&extent_op->key, 0, sizeof(extent_op->key));
    4913     8343987 :                 extent_op->flags_to_set = flags;
    4914     8343987 :                 extent_op->update_key = skinny_metadata ? false : true;
    4915     8343987 :                 extent_op->update_flags = true;
    4916     8343987 :                 extent_op->level = level;
    4917             : 
    4918     8343987 :                 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
    4919             :                                        ins.objectid, ins.offset, parent);
    4920     8343987 :                 btrfs_init_tree_ref(&generic_ref, level, root_objectid,
    4921             :                                     root->root_key.objectid, false);
    4922     8343971 :                 btrfs_ref_tree_mod(fs_info, &generic_ref);
    4923     8343971 :                 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op);
    4924     8343999 :                 if (ret)
    4925           0 :                         goto out_free_delayed;
    4926             :         }
    4927             :         return buf;
    4928             : 
    4929             : out_free_delayed:
    4930           0 :         btrfs_free_delayed_extent_op(extent_op);
    4931           0 : out_free_buf:
    4932           0 :         btrfs_tree_unlock(buf);
    4933           0 :         free_extent_buffer(buf);
    4934           0 : out_free_reserved:
    4935           0 :         btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
    4936           0 : out_unuse:
    4937           0 :         btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize);
    4938           0 :         return ERR_PTR(ret);
    4939             : }
    4940             : 
    4941             : struct walk_control {
    4942             :         u64 refs[BTRFS_MAX_LEVEL];
    4943             :         u64 flags[BTRFS_MAX_LEVEL];
    4944             :         struct btrfs_key update_progress;
    4945             :         struct btrfs_key drop_progress;
    4946             :         int drop_level;
    4947             :         int stage;
    4948             :         int level;
    4949             :         int shared_level;
    4950             :         int update_ref;
    4951             :         int keep_locks;
    4952             :         int reada_slot;
    4953             :         int reada_count;
    4954             :         int restarted;
    4955             : };
    4956             : 
    4957             : #define DROP_REFERENCE  1
    4958             : #define UPDATE_BACKREF  2
    4959             : 
    4960           0 : static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
    4961             :                                      struct btrfs_root *root,
    4962             :                                      struct walk_control *wc,
    4963             :                                      struct btrfs_path *path)
    4964             : {
    4965           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    4966           0 :         u64 bytenr;
    4967           0 :         u64 generation;
    4968           0 :         u64 refs;
    4969           0 :         u64 flags;
    4970           0 :         u32 nritems;
    4971           0 :         struct btrfs_key key;
    4972           0 :         struct extent_buffer *eb;
    4973           0 :         int ret;
    4974           0 :         int slot;
    4975           0 :         int nread = 0;
    4976             : 
    4977           0 :         if (path->slots[wc->level] < wc->reada_slot) {
    4978           0 :                 wc->reada_count = wc->reada_count * 2 / 3;
    4979           0 :                 wc->reada_count = max(wc->reada_count, 2);
    4980             :         } else {
    4981           0 :                 wc->reada_count = wc->reada_count * 3 / 2;
    4982           0 :                 wc->reada_count = min_t(int, wc->reada_count,
    4983             :                                         BTRFS_NODEPTRS_PER_BLOCK(fs_info));
    4984             :         }
    4985             : 
    4986           0 :         eb = path->nodes[wc->level];
    4987           0 :         nritems = btrfs_header_nritems(eb);
    4988             : 
    4989           0 :         for (slot = path->slots[wc->level]; slot < nritems; slot++) {
    4990           0 :                 if (nread >= wc->reada_count)
    4991             :                         break;
    4992             : 
    4993           0 :                 cond_resched();
    4994           0 :                 bytenr = btrfs_node_blockptr(eb, slot);
    4995           0 :                 generation = btrfs_node_ptr_generation(eb, slot);
    4996             : 
    4997           0 :                 if (slot == path->slots[wc->level])
    4998           0 :                         goto reada;
    4999             : 
    5000           0 :                 if (wc->stage == UPDATE_BACKREF &&
    5001           0 :                     generation <= root->root_key.offset)
    5002           0 :                         continue;
    5003             : 
    5004             :                 /* We don't lock the tree block, it's OK to be racy here */
    5005           0 :                 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
    5006           0 :                                                wc->level - 1, 1, &refs,
    5007             :                                                &flags);
    5008             :                 /* We don't care about errors in readahead. */
    5009           0 :                 if (ret < 0)
    5010           0 :                         continue;
    5011           0 :                 BUG_ON(refs == 0);
    5012             : 
    5013           0 :                 if (wc->stage == DROP_REFERENCE) {
    5014           0 :                         if (refs == 1)
    5015           0 :                                 goto reada;
    5016             : 
    5017           0 :                         if (wc->level == 1 &&
    5018           0 :                             (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
    5019           0 :                                 continue;
    5020           0 :                         if (!wc->update_ref ||
    5021           0 :                             generation <= root->root_key.offset)
    5022           0 :                                 continue;
    5023           0 :                         btrfs_node_key_to_cpu(eb, &key, slot);
    5024           0 :                         ret = btrfs_comp_cpu_keys(&key,
    5025           0 :                                                   &wc->update_progress);
    5026           0 :                         if (ret < 0)
    5027           0 :                                 continue;
    5028             :                 } else {
    5029           0 :                         if (wc->level == 1 &&
    5030           0 :                             (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
    5031           0 :                                 continue;
    5032             :                 }
    5033           0 : reada:
    5034           0 :                 btrfs_readahead_node_child(eb, slot);
    5035           0 :                 nread++;
    5036             :         }
    5037           0 :         wc->reada_slot = slot;
    5038           0 : }
    5039             : 
    5040             : /*
    5041             :  * helper to process tree block while walking down the tree.
    5042             :  *
    5043             :  * when wc->stage == UPDATE_BACKREF, this function updates
    5044             :  * back refs for pointers in the block.
    5045             :  *
    5046             :  * NOTE: return value 1 means we should stop walking down.
    5047             :  */
    5048    10365361 : static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
    5049             :                                    struct btrfs_root *root,
    5050             :                                    struct btrfs_path *path,
    5051             :                                    struct walk_control *wc, int lookup_info)
    5052             : {
    5053    10365361 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5054    10365361 :         int level = wc->level;
    5055    10365361 :         struct extent_buffer *eb = path->nodes[level];
    5056    10365361 :         u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
    5057    10365361 :         int ret;
    5058             : 
    5059    10365361 :         if (wc->stage == UPDATE_BACKREF &&
    5060           0 :             btrfs_header_owner(eb) != root->root_key.objectid)
    5061             :                 return 1;
    5062             : 
    5063             :         /*
    5064             :          * when reference count of tree block is 1, it won't increase
    5065             :          * again. once full backref flag is set, we never clear it.
    5066             :          */
    5067    10365361 :         if (lookup_info &&
    5068     8380990 :             ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
    5069           0 :              (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
    5070      844480 :                 BUG_ON(!path->locks[level]);
    5071      844480 :                 ret = btrfs_lookup_extent_info(trans, fs_info,
    5072             :                                                eb->start, level, 1,
    5073             :                                                &wc->refs[level],
    5074             :                                                &wc->flags[level]);
    5075      844480 :                 BUG_ON(ret == -ENOMEM);
    5076      844480 :                 if (ret)
    5077             :                         return ret;
    5078      844480 :                 BUG_ON(wc->refs[level] == 0);
    5079             :         }
    5080             : 
    5081    10365361 :         if (wc->stage == DROP_REFERENCE) {
    5082    10365361 :                 if (wc->refs[level] > 1)
    5083             :                         return 1;
    5084             : 
    5085     9522520 :                 if (path->locks[level] && !wc->keep_locks) {
    5086     1986010 :                         btrfs_tree_unlock_rw(eb, path->locks[level]);
    5087     1986010 :                         path->locks[level] = 0;
    5088             :                 }
    5089     9522520 :                 return 0;
    5090             :         }
    5091             : 
    5092             :         /* wc->stage == UPDATE_BACKREF */
    5093           0 :         if (!(wc->flags[level] & flag)) {
    5094           0 :                 BUG_ON(!path->locks[level]);
    5095           0 :                 ret = btrfs_inc_ref(trans, root, eb, 1);
    5096           0 :                 BUG_ON(ret); /* -ENOMEM */
    5097           0 :                 ret = btrfs_dec_ref(trans, root, eb, 0);
    5098           0 :                 BUG_ON(ret); /* -ENOMEM */
    5099           0 :                 ret = btrfs_set_disk_extent_flags(trans, eb, flag);
    5100           0 :                 BUG_ON(ret); /* -ENOMEM */
    5101           0 :                 wc->flags[level] |= flag;
    5102             :         }
    5103             : 
    5104             :         /*
    5105             :          * the block is shared by multiple trees, so it's not good to
    5106             :          * keep the tree lock
    5107             :          */
    5108           0 :         if (path->locks[level] && level > 0) {
    5109           0 :                 btrfs_tree_unlock_rw(eb, path->locks[level]);
    5110           0 :                 path->locks[level] = 0;
    5111             :         }
    5112             :         return 0;
    5113             : }
    5114             : 
    5115             : /*
    5116             :  * This is used to verify a ref exists for this root to deal with a bug where we
    5117             :  * would have a drop_progress key that hadn't been updated properly.
    5118             :  */
    5119           0 : static int check_ref_exists(struct btrfs_trans_handle *trans,
    5120             :                             struct btrfs_root *root, u64 bytenr, u64 parent,
    5121             :                             int level)
    5122             : {
    5123           0 :         struct btrfs_path *path;
    5124           0 :         struct btrfs_extent_inline_ref *iref;
    5125           0 :         int ret;
    5126             : 
    5127           0 :         path = btrfs_alloc_path();
    5128           0 :         if (!path)
    5129             :                 return -ENOMEM;
    5130             : 
    5131           0 :         ret = lookup_extent_backref(trans, path, &iref, bytenr,
    5132           0 :                                     root->fs_info->nodesize, parent,
    5133             :                                     root->root_key.objectid, level, 0);
    5134           0 :         btrfs_free_path(path);
    5135           0 :         if (ret == -ENOENT)
    5136             :                 return 0;
    5137           0 :         if (ret < 0)
    5138           0 :                 return ret;
    5139             :         return 1;
    5140             : }
    5141             : 
    5142             : /*
    5143             :  * helper to process tree block pointer.
    5144             :  *
    5145             :  * when wc->stage == DROP_REFERENCE, this function checks
    5146             :  * reference count of the block pointed to. if the block
    5147             :  * is shared and we need update back refs for the subtree
    5148             :  * rooted at the block, this function changes wc->stage to
    5149             :  * UPDATE_BACKREF. if the block is shared and there is no
    5150             :  * need to update back, this function drops the reference
    5151             :  * to the block.
    5152             :  *
    5153             :  * NOTE: return value 1 means we should stop walking down.
    5154             :  */
    5155     7543414 : static noinline int do_walk_down(struct btrfs_trans_handle *trans,
    5156             :                                  struct btrfs_root *root,
    5157             :                                  struct btrfs_path *path,
    5158             :                                  struct walk_control *wc, int *lookup_info)
    5159             : {
    5160     7543414 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5161     7543414 :         u64 bytenr;
    5162     7543414 :         u64 generation;
    5163     7543414 :         u64 parent;
    5164     7543414 :         struct btrfs_tree_parent_check check = { 0 };
    5165     7543414 :         struct btrfs_key key;
    5166     7543414 :         struct btrfs_ref ref = { 0 };
    5167     7543414 :         struct extent_buffer *next;
    5168     7543414 :         int level = wc->level;
    5169     7543414 :         int reada = 0;
    5170     7543414 :         int ret = 0;
    5171     7543414 :         bool need_account = false;
    5172             : 
    5173     7543414 :         generation = btrfs_node_ptr_generation(path->nodes[level],
    5174             :                                                path->slots[level]);
    5175             :         /*
    5176             :          * if the lower level block was created before the snapshot
    5177             :          * was created, we know there is no need to update back refs
    5178             :          * for the subtree
    5179             :          */
    5180     7543414 :         if (wc->stage == UPDATE_BACKREF &&
    5181           0 :             generation <= root->root_key.offset) {
    5182           0 :                 *lookup_info = 1;
    5183           0 :                 return 1;
    5184             :         }
    5185             : 
    5186     7543414 :         bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
    5187             : 
    5188     7543414 :         check.level = level - 1;
    5189     7543414 :         check.transid = generation;
    5190     7543414 :         check.owner_root = root->root_key.objectid;
    5191     7543414 :         check.has_first_key = true;
    5192     7543414 :         btrfs_node_key_to_cpu(path->nodes[level], &check.first_key,
    5193             :                               path->slots[level]);
    5194             : 
    5195     7543414 :         next = find_extent_buffer(fs_info, bytenr);
    5196     7543414 :         if (!next) {
    5197     1437473 :                 next = btrfs_find_create_tree_block(fs_info, bytenr,
    5198             :                                 root->root_key.objectid, level - 1);
    5199     1437473 :                 if (IS_ERR(next))
    5200           0 :                         return PTR_ERR(next);
    5201             :                 reada = 1;
    5202             :         }
    5203     7543414 :         btrfs_tree_lock(next);
    5204             : 
    5205     7543414 :         ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
    5206             :                                        &wc->refs[level - 1],
    5207     7543414 :                                        &wc->flags[level - 1]);
    5208     7543414 :         if (ret < 0)
    5209           0 :                 goto out_unlock;
    5210             : 
    5211     7543414 :         if (unlikely(wc->refs[level - 1] == 0)) {
    5212           0 :                 btrfs_err(fs_info, "Missing references.");
    5213           0 :                 ret = -EIO;
    5214           0 :                 goto out_unlock;
    5215             :         }
    5216     7543414 :         *lookup_info = 0;
    5217             : 
    5218     7543414 :         if (wc->stage == DROP_REFERENCE) {
    5219     7543414 :                 if (wc->refs[level - 1] > 1) {
    5220     5559043 :                         need_account = true;
    5221     5559043 :                         if (level == 1 &&
    5222     5548662 :                             (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
    5223     5515735 :                                 goto skip;
    5224             : 
    5225       43308 :                         if (!wc->update_ref ||
    5226       22103 :                             generation <= root->root_key.offset)
    5227       43308 :                                 goto skip;
    5228             : 
    5229           0 :                         btrfs_node_key_to_cpu(path->nodes[level], &key,
    5230             :                                               path->slots[level]);
    5231           0 :                         ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
    5232           0 :                         if (ret < 0)
    5233           0 :                                 goto skip;
    5234             : 
    5235           0 :                         wc->stage = UPDATE_BACKREF;
    5236           0 :                         wc->shared_level = level - 1;
    5237             :                 }
    5238             :         } else {
    5239           0 :                 if (level == 1 &&
    5240           0 :                     (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
    5241           0 :                         goto skip;
    5242             :         }
    5243             : 
    5244     1984371 :         if (!btrfs_buffer_uptodate(next, generation, 0)) {
    5245           0 :                 btrfs_tree_unlock(next);
    5246           0 :                 free_extent_buffer(next);
    5247           0 :                 next = NULL;
    5248           0 :                 *lookup_info = 1;
    5249             :         }
    5250             : 
    5251     1984371 :         if (!next) {
    5252           0 :                 if (reada && level == 1)
    5253           0 :                         reada_walk_down(trans, root, wc, path);
    5254           0 :                 next = read_tree_block(fs_info, bytenr, &check);
    5255           0 :                 if (IS_ERR(next)) {
    5256           0 :                         return PTR_ERR(next);
    5257           0 :                 } else if (!extent_buffer_uptodate(next)) {
    5258           0 :                         free_extent_buffer(next);
    5259           0 :                         return -EIO;
    5260             :                 }
    5261           0 :                 btrfs_tree_lock(next);
    5262             :         }
    5263             : 
    5264     1984371 :         level--;
    5265     1984371 :         ASSERT(level == btrfs_header_level(next));
    5266     1984371 :         if (level != btrfs_header_level(next)) {
    5267           0 :                 btrfs_err(root->fs_info, "mismatched level");
    5268           0 :                 ret = -EIO;
    5269           0 :                 goto out_unlock;
    5270             :         }
    5271     1984371 :         path->nodes[level] = next;
    5272     1984371 :         path->slots[level] = 0;
    5273     1984371 :         path->locks[level] = BTRFS_WRITE_LOCK;
    5274     1984371 :         wc->level = level;
    5275     1984371 :         if (wc->level == 1)
    5276       22946 :                 wc->reada_slot = 0;
    5277             :         return 0;
    5278     5559043 : skip:
    5279     5559043 :         wc->refs[level - 1] = 0;
    5280     5559043 :         wc->flags[level - 1] = 0;
    5281     5559043 :         if (wc->stage == DROP_REFERENCE) {
    5282     5559043 :                 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
    5283     5552761 :                         parent = path->nodes[level]->start;
    5284             :                 } else {
    5285        6282 :                         ASSERT(root->root_key.objectid ==
    5286             :                                btrfs_header_owner(path->nodes[level]));
    5287        6282 :                         if (root->root_key.objectid !=
    5288             :                             btrfs_header_owner(path->nodes[level])) {
    5289           0 :                                 btrfs_err(root->fs_info,
    5290             :                                                 "mismatched block owner");
    5291           0 :                                 ret = -EIO;
    5292           0 :                                 goto out_unlock;
    5293             :                         }
    5294             :                         parent = 0;
    5295             :                 }
    5296             : 
    5297             :                 /*
    5298             :                  * If we had a drop_progress we need to verify the refs are set
    5299             :                  * as expected.  If we find our ref then we know that from here
    5300             :                  * on out everything should be correct, and we can clear the
    5301             :                  * ->restarted flag.
    5302             :                  */
    5303     5559043 :                 if (wc->restarted) {
    5304           0 :                         ret = check_ref_exists(trans, root, bytenr, parent,
    5305             :                                                level - 1);
    5306           0 :                         if (ret < 0)
    5307           0 :                                 goto out_unlock;
    5308           0 :                         if (ret == 0)
    5309           0 :                                 goto no_delete;
    5310           0 :                         ret = 0;
    5311           0 :                         wc->restarted = 0;
    5312             :                 }
    5313             : 
    5314             :                 /*
    5315             :                  * Reloc tree doesn't contribute to qgroup numbers, and we have
    5316             :                  * already accounted them at merge time (replace_path),
    5317             :                  * thus we could skip expensive subtree trace here.
    5318             :                  */
    5319     5559043 :                 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
    5320             :                     need_account) {
    5321       33697 :                         ret = btrfs_qgroup_trace_subtree(trans, next,
    5322             :                                                          generation, level - 1);
    5323       33697 :                         if (ret) {
    5324           0 :                                 btrfs_err_rl(fs_info,
    5325             :                                              "Error %d accounting shared subtree. Quota is out of sync, rescan required.",
    5326             :                                              ret);
    5327             :                         }
    5328             :                 }
    5329             : 
    5330             :                 /*
    5331             :                  * We need to update the next key in our walk control so we can
    5332             :                  * update the drop_progress key accordingly.  We don't care if
    5333             :                  * find_next_key doesn't find a key because that means we're at
    5334             :                  * the end and are going to clean up now.
    5335             :                  */
    5336     5559043 :                 wc->drop_level = level;
    5337     5559043 :                 find_next_key(path, level, &wc->drop_progress);
    5338             : 
    5339     5559043 :                 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
    5340     5559043 :                                        fs_info->nodesize, parent);
    5341     5559043 :                 btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid,
    5342             :                                     0, false);
    5343     5559043 :                 ret = btrfs_free_extent(trans, &ref);
    5344     5559043 :                 if (ret)
    5345           0 :                         goto out_unlock;
    5346             :         }
    5347     5559043 : no_delete:
    5348     5559043 :         *lookup_info = 1;
    5349     5559043 :         ret = 1;
    5350             : 
    5351     5559043 : out_unlock:
    5352     5559043 :         btrfs_tree_unlock(next);
    5353     5559043 :         free_extent_buffer(next);
    5354             : 
    5355     5559043 :         return ret;
    5356             : }
    5357             : 
    5358             : /*
    5359             :  * helper to process tree block while walking up the tree.
    5360             :  *
    5361             :  * when wc->stage == DROP_REFERENCE, this function drops
    5362             :  * reference count on the block.
    5363             :  *
    5364             :  * when wc->stage == UPDATE_BACKREF, this function changes
    5365             :  * wc->stage back to DROP_REFERENCE if we changed wc->stage
    5366             :  * to UPDATE_BACKREF previously while processing the block.
    5367             :  *
    5368             :  * NOTE: return value 1 means we should stop walking up.
    5369             :  */
    5370     2828851 : static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
    5371             :                                  struct btrfs_root *root,
    5372             :                                  struct btrfs_path *path,
    5373             :                                  struct walk_control *wc)
    5374             : {
    5375     2828851 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5376     2828851 :         int ret;
    5377     2828851 :         int level = wc->level;
    5378     2828851 :         struct extent_buffer *eb = path->nodes[level];
    5379     2828851 :         u64 parent = 0;
    5380             : 
    5381     2828851 :         if (wc->stage == UPDATE_BACKREF) {
    5382           0 :                 BUG_ON(wc->shared_level < level);
    5383           0 :                 if (level < wc->shared_level)
    5384           0 :                         goto out;
    5385             : 
    5386           0 :                 ret = find_next_key(path, level + 1, &wc->update_progress);
    5387           0 :                 if (ret > 0)
    5388           0 :                         wc->update_ref = 0;
    5389             : 
    5390           0 :                 wc->stage = DROP_REFERENCE;
    5391           0 :                 wc->shared_level = -1;
    5392           0 :                 path->slots[level] = 0;
    5393             : 
    5394             :                 /*
    5395             :                  * check reference count again if the block isn't locked.
    5396             :                  * we should start walking down the tree again if reference
    5397             :                  * count is one.
    5398             :                  */
    5399           0 :                 if (!path->locks[level]) {
    5400           0 :                         BUG_ON(level == 0);
    5401           0 :                         btrfs_tree_lock(eb);
    5402           0 :                         path->locks[level] = BTRFS_WRITE_LOCK;
    5403             : 
    5404           0 :                         ret = btrfs_lookup_extent_info(trans, fs_info,
    5405             :                                                        eb->start, level, 1,
    5406             :                                                        &wc->refs[level],
    5407             :                                                        &wc->flags[level]);
    5408           0 :                         if (ret < 0) {
    5409           0 :                                 btrfs_tree_unlock_rw(eb, path->locks[level]);
    5410           0 :                                 path->locks[level] = 0;
    5411           0 :                                 return ret;
    5412             :                         }
    5413           0 :                         BUG_ON(wc->refs[level] == 0);
    5414           0 :                         if (wc->refs[level] == 1) {
    5415           0 :                                 btrfs_tree_unlock_rw(eb, path->locks[level]);
    5416           0 :                                 path->locks[level] = 0;
    5417           0 :                                 return 1;
    5418             :                         }
    5419             :                 }
    5420             :         }
    5421             : 
    5422             :         /* wc->stage == DROP_REFERENCE */
    5423     2828851 :         BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
    5424             : 
    5425     2828851 :         if (wc->refs[level] == 1) {
    5426     1986010 :                 if (level == 0) {
    5427     1961950 :                         if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
    5428     1961900 :                                 ret = btrfs_dec_ref(trans, root, eb, 1);
    5429             :                         else
    5430          50 :                                 ret = btrfs_dec_ref(trans, root, eb, 0);
    5431     1961950 :                         BUG_ON(ret); /* -ENOMEM */
    5432     1961950 :                         if (is_fstree(root->root_key.objectid)) {
    5433        3201 :                                 ret = btrfs_qgroup_trace_leaf_items(trans, eb);
    5434        3201 :                                 if (ret) {
    5435           0 :                                         btrfs_err_rl(fs_info,
    5436             :         "error %d accounting leaf items, quota is out of sync, rescan required",
    5437             :                                              ret);
    5438             :                                 }
    5439             :                         }
    5440             :                 }
    5441             :                 /* Make block locked assertion in btrfs_clear_buffer_dirty happy. */
    5442     1986010 :                 if (!path->locks[level]) {
    5443     1986010 :                         btrfs_tree_lock(eb);
    5444     1986010 :                         path->locks[level] = BTRFS_WRITE_LOCK;
    5445             :                 }
    5446     1986010 :                 btrfs_clear_buffer_dirty(trans, eb);
    5447             :         }
    5448             : 
    5449     2828851 :         if (eb == root->node) {
    5450        1639 :                 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
    5451        1520 :                         parent = eb->start;
    5452         119 :                 else if (root->root_key.objectid != btrfs_header_owner(eb))
    5453           0 :                         goto owner_mismatch;
    5454             :         } else {
    5455     2827212 :                 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
    5456     2825461 :                         parent = path->nodes[level + 1]->start;
    5457        1751 :                 else if (root->root_key.objectid !=
    5458        1751 :                          btrfs_header_owner(path->nodes[level + 1]))
    5459           0 :                         goto owner_mismatch;
    5460             :         }
    5461             : 
    5462     2828851 :         btrfs_free_tree_block(trans, btrfs_root_id(root), eb, parent,
    5463     2828851 :                               wc->refs[level] == 1);
    5464     2828851 : out:
    5465     2828851 :         wc->refs[level] = 0;
    5466     2828851 :         wc->flags[level] = 0;
    5467     2828851 :         return 0;
    5468             : 
    5469           0 : owner_mismatch:
    5470           0 :         btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu",
    5471             :                      btrfs_header_owner(eb), root->root_key.objectid);
    5472             :         return -EUCLEAN;
    5473             : }
    5474             : 
    5475     2821947 : static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
    5476             :                                    struct btrfs_root *root,
    5477             :                                    struct btrfs_path *path,
    5478             :                                    struct walk_control *wc)
    5479             : {
    5480     2821947 :         int level = wc->level;
    5481     2821947 :         int lookup_info = 1;
    5482     2821947 :         int ret = 0;
    5483             : 
    5484    10365361 :         while (level >= 0) {
    5485    10365361 :                 ret = walk_down_proc(trans, root, path, wc, lookup_info);
    5486    10365361 :                 if (ret)
    5487             :                         break;
    5488             : 
    5489     9522520 :                 if (level == 0)
    5490             :                         break;
    5491             : 
    5492     7560570 :                 if (path->slots[level] >=
    5493     7560570 :                     btrfs_header_nritems(path->nodes[level]))
    5494             :                         break;
    5495             : 
    5496     7543414 :                 ret = do_walk_down(trans, root, path, wc, &lookup_info);
    5497     7543414 :                 if (ret > 0) {
    5498     5559043 :                         path->slots[level]++;
    5499     5559043 :                         continue;
    5500     1984371 :                 } else if (ret < 0)
    5501             :                         break;
    5502     1984371 :                 level = wc->level;
    5503             :         }
    5504     2821947 :         return (ret == 1) ? 0 : ret;
    5505             : }
    5506             : 
    5507     2821947 : static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
    5508             :                                  struct btrfs_root *root,
    5509             :                                  struct btrfs_path *path,
    5510             :                                  struct walk_control *wc, int max_level)
    5511             : {
    5512     2821947 :         int level = wc->level;
    5513     2821947 :         int ret;
    5514             : 
    5515     2821947 :         path->slots[level] = btrfs_header_nritems(path->nodes[level]);
    5516     5650798 :         while (level < max_level && path->nodes[level]) {
    5517     4806318 :                 wc->level = level;
    5518     4806318 :                 if (path->slots[level] + 1 <
    5519     4806318 :                     btrfs_header_nritems(path->nodes[level])) {
    5520     1977467 :                         path->slots[level]++;
    5521     1977467 :                         return 0;
    5522             :                 } else {
    5523     2828851 :                         ret = walk_up_proc(trans, root, path, wc);
    5524     2828851 :                         if (ret > 0)
    5525             :                                 return 0;
    5526     2828851 :                         if (ret < 0)
    5527           0 :                                 return ret;
    5528             : 
    5529     2828851 :                         if (path->locks[level]) {
    5530     2828851 :                                 btrfs_tree_unlock_rw(path->nodes[level],
    5531             :                                                      path->locks[level]);
    5532     2828851 :                                 path->locks[level] = 0;
    5533             :                         }
    5534     2828851 :                         free_extent_buffer(path->nodes[level]);
    5535     2828851 :                         path->nodes[level] = NULL;
    5536     2828851 :                         level++;
    5537             :                 }
    5538             :         }
    5539             :         return 1;
    5540             : }
    5541             : 
    5542             : /*
    5543             :  * drop a subvolume tree.
    5544             :  *
    5545             :  * this function traverses the tree freeing any blocks that only
    5546             :  * referenced by the tree.
    5547             :  *
    5548             :  * when a shared tree block is found. this function decreases its
    5549             :  * reference count by one. if update_ref is true, this function
    5550             :  * also make sure backrefs for the shared block and all lower level
    5551             :  * blocks are properly updated.
    5552             :  *
    5553             :  * If called with for_reloc == 0, may exit early with -EAGAIN
    5554             :  */
    5555        1639 : int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
    5556             : {
    5557        1639 :         const bool is_reloc_root = (root->root_key.objectid ==
    5558             :                                     BTRFS_TREE_RELOC_OBJECTID);
    5559        1639 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5560        1639 :         struct btrfs_path *path;
    5561        1639 :         struct btrfs_trans_handle *trans;
    5562        1639 :         struct btrfs_root *tree_root = fs_info->tree_root;
    5563        1639 :         struct btrfs_root_item *root_item = &root->root_item;
    5564        1639 :         struct walk_control *wc;
    5565        1639 :         struct btrfs_key key;
    5566        1639 :         int err = 0;
    5567        1639 :         int ret;
    5568        1639 :         int level;
    5569        1639 :         bool root_dropped = false;
    5570        1639 :         bool unfinished_drop = false;
    5571             : 
    5572        1639 :         btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
    5573             : 
    5574        1639 :         path = btrfs_alloc_path();
    5575        1639 :         if (!path) {
    5576           0 :                 err = -ENOMEM;
    5577           0 :                 goto out;
    5578             :         }
    5579             : 
    5580        1639 :         wc = kzalloc(sizeof(*wc), GFP_NOFS);
    5581        1639 :         if (!wc) {
    5582           0 :                 btrfs_free_path(path);
    5583           0 :                 err = -ENOMEM;
    5584           0 :                 goto out;
    5585             :         }
    5586             : 
    5587             :         /*
    5588             :          * Use join to avoid potential EINTR from transaction start. See
    5589             :          * wait_reserve_ticket and the whole reservation callchain.
    5590             :          */
    5591        1639 :         if (for_reloc)
    5592        1520 :                 trans = btrfs_join_transaction(tree_root);
    5593             :         else
    5594         119 :                 trans = btrfs_start_transaction(tree_root, 0);
    5595        1639 :         if (IS_ERR(trans)) {
    5596           0 :                 err = PTR_ERR(trans);
    5597           0 :                 goto out_free;
    5598             :         }
    5599             : 
    5600        1639 :         err = btrfs_run_delayed_items(trans);
    5601        1639 :         if (err)
    5602           0 :                 goto out_end_trans;
    5603             : 
    5604             :         /*
    5605             :          * This will help us catch people modifying the fs tree while we're
    5606             :          * dropping it.  It is unsafe to mess with the fs tree while it's being
    5607             :          * dropped as we unlock the root node and parent nodes as we walk down
    5608             :          * the tree, assuming nothing will change.  If something does change
    5609             :          * then we'll have stale information and drop references to blocks we've
    5610             :          * already dropped.
    5611             :          */
    5612        1639 :         set_bit(BTRFS_ROOT_DELETING, &root->state);
    5613        1639 :         unfinished_drop = test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state);
    5614             : 
    5615        1639 :         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
    5616        1639 :                 level = btrfs_header_level(root->node);
    5617        1639 :                 path->nodes[level] = btrfs_lock_root_node(root);
    5618        1639 :                 path->slots[level] = 0;
    5619        1639 :                 path->locks[level] = BTRFS_WRITE_LOCK;
    5620        3278 :                 memset(&wc->update_progress, 0,
    5621             :                        sizeof(wc->update_progress));
    5622             :         } else {
    5623           0 :                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
    5624           0 :                 memcpy(&wc->update_progress, &key,
    5625             :                        sizeof(wc->update_progress));
    5626             : 
    5627           0 :                 level = btrfs_root_drop_level(root_item);
    5628           0 :                 BUG_ON(level == 0);
    5629           0 :                 path->lowest_level = level;
    5630           0 :                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    5631           0 :                 path->lowest_level = 0;
    5632           0 :                 if (ret < 0) {
    5633           0 :                         err = ret;
    5634           0 :                         goto out_end_trans;
    5635             :                 }
    5636           0 :                 WARN_ON(ret > 0);
    5637             : 
    5638             :                 /*
    5639             :                  * unlock our path, this is safe because only this
    5640             :                  * function is allowed to delete this snapshot
    5641             :                  */
    5642           0 :                 btrfs_unlock_up_safe(path, 0);
    5643             : 
    5644           0 :                 level = btrfs_header_level(root->node);
    5645           0 :                 while (1) {
    5646           0 :                         btrfs_tree_lock(path->nodes[level]);
    5647           0 :                         path->locks[level] = BTRFS_WRITE_LOCK;
    5648             : 
    5649           0 :                         ret = btrfs_lookup_extent_info(trans, fs_info,
    5650           0 :                                                 path->nodes[level]->start,
    5651             :                                                 level, 1, &wc->refs[level],
    5652             :                                                 &wc->flags[level]);
    5653           0 :                         if (ret < 0) {
    5654           0 :                                 err = ret;
    5655           0 :                                 goto out_end_trans;
    5656             :                         }
    5657           0 :                         BUG_ON(wc->refs[level] == 0);
    5658             : 
    5659           0 :                         if (level == btrfs_root_drop_level(root_item))
    5660             :                                 break;
    5661             : 
    5662           0 :                         btrfs_tree_unlock(path->nodes[level]);
    5663           0 :                         path->locks[level] = 0;
    5664           0 :                         WARN_ON(wc->refs[level] != 1);
    5665           0 :                         level--;
    5666             :                 }
    5667             :         }
    5668             : 
    5669        1639 :         wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
    5670        1639 :         wc->level = level;
    5671        1639 :         wc->shared_level = -1;
    5672        1639 :         wc->stage = DROP_REFERENCE;
    5673        1639 :         wc->update_ref = update_ref;
    5674        1639 :         wc->keep_locks = 0;
    5675        1639 :         wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
    5676             : 
    5677     1979106 :         while (1) {
    5678             : 
    5679     1979106 :                 ret = walk_down_tree(trans, root, path, wc);
    5680     1979106 :                 if (ret < 0) {
    5681           0 :                         btrfs_abort_transaction(trans, ret);
    5682           0 :                         err = ret;
    5683           0 :                         break;
    5684             :                 }
    5685             : 
    5686     1979106 :                 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
    5687     1979106 :                 if (ret < 0) {
    5688           0 :                         btrfs_abort_transaction(trans, ret);
    5689           0 :                         err = ret;
    5690           0 :                         break;
    5691             :                 }
    5692             : 
    5693     1979106 :                 if (ret > 0) {
    5694        1639 :                         BUG_ON(wc->stage != DROP_REFERENCE);
    5695             :                         break;
    5696             :                 }
    5697             : 
    5698     1977467 :                 if (wc->stage == DROP_REFERENCE) {
    5699     1977467 :                         wc->drop_level = wc->level;
    5700     1977467 :                         btrfs_node_key_to_cpu(path->nodes[wc->drop_level],
    5701             :                                               &wc->drop_progress,
    5702             :                                               path->slots[wc->drop_level]);
    5703             :                 }
    5704     1977467 :                 btrfs_cpu_key_to_disk(&root_item->drop_progress,
    5705     1977467 :                                       &wc->drop_progress);
    5706     1977467 :                 btrfs_set_root_drop_level(root_item, wc->drop_level);
    5707             : 
    5708     1977467 :                 BUG_ON(wc->level == 0);
    5709     1977467 :                 if (btrfs_should_end_transaction(trans) ||
    5710           8 :                     (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) {
    5711     1975120 :                         ret = btrfs_update_root(trans, tree_root,
    5712             :                                                 &root->root_key,
    5713             :                                                 root_item);
    5714     1975120 :                         if (ret) {
    5715           0 :                                 btrfs_abort_transaction(trans, ret);
    5716           0 :                                 err = ret;
    5717           0 :                                 goto out_end_trans;
    5718             :                         }
    5719             : 
    5720     1975120 :                         if (!is_reloc_root)
    5721        3120 :                                 btrfs_set_last_root_drop_gen(fs_info, trans->transid);
    5722             : 
    5723     1975120 :                         btrfs_end_transaction_throttle(trans);
    5724     1975120 :                         if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
    5725           0 :                                 btrfs_debug(fs_info,
    5726             :                                             "drop snapshot early exit");
    5727           0 :                                 err = -EAGAIN;
    5728           0 :                                 goto out_free;
    5729             :                         }
    5730             : 
    5731             :                        /*
    5732             :                         * Use join to avoid potential EINTR from transaction
    5733             :                         * start. See wait_reserve_ticket and the whole
    5734             :                         * reservation callchain.
    5735             :                         */
    5736     1975120 :                         if (for_reloc)
    5737     1972000 :                                 trans = btrfs_join_transaction(tree_root);
    5738             :                         else
    5739        3120 :                                 trans = btrfs_start_transaction(tree_root, 0);
    5740     1975120 :                         if (IS_ERR(trans)) {
    5741           0 :                                 err = PTR_ERR(trans);
    5742           0 :                                 goto out_free;
    5743             :                         }
    5744             :                 }
    5745             :         }
    5746        1639 :         btrfs_release_path(path);
    5747        1639 :         if (err)
    5748           0 :                 goto out_end_trans;
    5749             : 
    5750        1639 :         ret = btrfs_del_root(trans, &root->root_key);
    5751        1639 :         if (ret) {
    5752           0 :                 btrfs_abort_transaction(trans, ret);
    5753           0 :                 err = ret;
    5754           0 :                 goto out_end_trans;
    5755             :         }
    5756             : 
    5757        1639 :         if (!is_reloc_root) {
    5758         119 :                 ret = btrfs_find_root(tree_root, &root->root_key, path,
    5759             :                                       NULL, NULL);
    5760         119 :                 if (ret < 0) {
    5761           0 :                         btrfs_abort_transaction(trans, ret);
    5762           0 :                         err = ret;
    5763           0 :                         goto out_end_trans;
    5764         119 :                 } else if (ret > 0) {
    5765             :                         /* if we fail to delete the orphan item this time
    5766             :                          * around, it'll get picked up the next time.
    5767             :                          *
    5768             :                          * The most common failure here is just -ENOENT.
    5769             :                          */
    5770         119 :                         btrfs_del_orphan_item(trans, tree_root,
    5771             :                                               root->root_key.objectid);
    5772             :                 }
    5773             :         }
    5774             : 
    5775             :         /*
    5776             :          * This subvolume is going to be completely dropped, and won't be
    5777             :          * recorded as dirty roots, thus pertrans meta rsv will not be freed at
    5778             :          * commit transaction time.  So free it here manually.
    5779             :          */
    5780        1639 :         btrfs_qgroup_convert_reserved_meta(root, INT_MAX);
    5781        1639 :         btrfs_qgroup_free_meta_all_pertrans(root);
    5782             : 
    5783        3278 :         if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
    5784         119 :                 btrfs_add_dropped_root(trans, root);
    5785             :         else
    5786        1520 :                 btrfs_put_root(root);
    5787             :         root_dropped = true;
    5788        1639 : out_end_trans:
    5789        1639 :         if (!is_reloc_root)
    5790         119 :                 btrfs_set_last_root_drop_gen(fs_info, trans->transid);
    5791             : 
    5792        1639 :         btrfs_end_transaction_throttle(trans);
    5793        1639 : out_free:
    5794        1639 :         kfree(wc);
    5795        1639 :         btrfs_free_path(path);
    5796        1639 : out:
    5797             :         /*
    5798             :          * We were an unfinished drop root, check to see if there are any
    5799             :          * pending, and if not clear and wake up any waiters.
    5800             :          */
    5801        1639 :         if (!err && unfinished_drop)
    5802           0 :                 btrfs_maybe_wake_unfinished_drop(fs_info);
    5803             : 
    5804             :         /*
    5805             :          * So if we need to stop dropping the snapshot for whatever reason we
    5806             :          * need to make sure to add it back to the dead root list so that we
    5807             :          * keep trying to do the work later.  This also cleans up roots if we
    5808             :          * don't have it in the radix (like when we recover after a power fail
    5809             :          * or unmount) so we don't leak memory.
    5810             :          */
    5811        1639 :         if (!for_reloc && !root_dropped)
    5812           0 :                 btrfs_add_dead_root(root);
    5813        1639 :         return err;
    5814             : }
    5815             : 
    5816             : /*
    5817             :  * drop subtree rooted at tree block 'node'.
    5818             :  *
    5819             :  * NOTE: this function will unlock and release tree block 'node'
    5820             :  * only used by relocation code
    5821             :  */
    5822      842841 : int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
    5823             :                         struct btrfs_root *root,
    5824             :                         struct extent_buffer *node,
    5825             :                         struct extent_buffer *parent)
    5826             : {
    5827      842841 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5828      842841 :         struct btrfs_path *path;
    5829      842841 :         struct walk_control *wc;
    5830      842841 :         int level;
    5831      842841 :         int parent_level;
    5832      842841 :         int ret = 0;
    5833      842841 :         int wret;
    5834             : 
    5835      842841 :         BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
    5836             : 
    5837      842841 :         path = btrfs_alloc_path();
    5838      842841 :         if (!path)
    5839             :                 return -ENOMEM;
    5840             : 
    5841      842841 :         wc = kzalloc(sizeof(*wc), GFP_NOFS);
    5842      842841 :         if (!wc) {
    5843           0 :                 btrfs_free_path(path);
    5844           0 :                 return -ENOMEM;
    5845             :         }
    5846             : 
    5847      842841 :         btrfs_assert_tree_write_locked(parent);
    5848      842841 :         parent_level = btrfs_header_level(parent);
    5849      842841 :         atomic_inc(&parent->refs);
    5850      842841 :         path->nodes[parent_level] = parent;
    5851      842841 :         path->slots[parent_level] = btrfs_header_nritems(parent);
    5852             : 
    5853      842841 :         btrfs_assert_tree_write_locked(node);
    5854      842841 :         level = btrfs_header_level(node);
    5855      842841 :         path->nodes[level] = node;
    5856      842841 :         path->slots[level] = 0;
    5857      842841 :         path->locks[level] = BTRFS_WRITE_LOCK;
    5858             : 
    5859      842841 :         wc->refs[parent_level] = 1;
    5860      842841 :         wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
    5861      842841 :         wc->level = level;
    5862      842841 :         wc->shared_level = -1;
    5863      842841 :         wc->stage = DROP_REFERENCE;
    5864      842841 :         wc->update_ref = 0;
    5865      842841 :         wc->keep_locks = 1;
    5866      842841 :         wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
    5867             : 
    5868      842841 :         while (1) {
    5869      842841 :                 wret = walk_down_tree(trans, root, path, wc);
    5870      842841 :                 if (wret < 0) {
    5871             :                         ret = wret;
    5872             :                         break;
    5873             :                 }
    5874             : 
    5875      842841 :                 wret = walk_up_tree(trans, root, path, wc, parent_level);
    5876      842841 :                 if (wret < 0)
    5877           0 :                         ret = wret;
    5878      842841 :                 if (wret != 0)
    5879             :                         break;
    5880             :         }
    5881             : 
    5882      842841 :         kfree(wc);
    5883      842841 :         btrfs_free_path(path);
    5884      842841 :         return ret;
    5885             : }
    5886             : 
    5887         140 : int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
    5888             :                                    u64 start, u64 end)
    5889             : {
    5890         140 :         return unpin_extent_range(fs_info, start, end, false);
    5891             : }
    5892             : 
    5893             : /*
    5894             :  * It used to be that old block groups would be left around forever.
    5895             :  * Iterating over them would be enough to trim unused space.  Since we
    5896             :  * now automatically remove them, we also need to iterate over unallocated
    5897             :  * space.
    5898             :  *
    5899             :  * We don't want a transaction for this since the discard may take a
    5900             :  * substantial amount of time.  We don't require that a transaction be
    5901             :  * running, but we do need to take a running transaction into account
    5902             :  * to ensure that we're not discarding chunks that were released or
    5903             :  * allocated in the current transaction.
    5904             :  *
    5905             :  * Holding the chunks lock will prevent other threads from allocating
    5906             :  * or releasing chunks, but it won't prevent a running transaction
    5907             :  * from committing and releasing the memory that the pending chunks
    5908             :  * list head uses.  For that, we need to take a reference to the
    5909             :  * transaction and hold the commit root sem.  We only need to hold
    5910             :  * it while performing the free space search since we have already
    5911             :  * held back allocations.
    5912             :  */
    5913         843 : static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
    5914             : {
    5915         843 :         u64 start = BTRFS_DEVICE_RANGE_RESERVED, len = 0, end = 0;
    5916         843 :         int ret;
    5917             : 
    5918         843 :         *trimmed = 0;
    5919             : 
    5920             :         /* Discard not supported = nothing to do. */
    5921         843 :         if (!bdev_max_discard_sectors(device->bdev))
    5922             :                 return 0;
    5923             : 
    5924             :         /* Not writable = nothing to do. */
    5925         843 :         if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
    5926             :                 return 0;
    5927             : 
    5928             :         /* No free space = nothing to do. */
    5929         843 :         if (device->total_bytes <= device->bytes_used)
    5930             :                 return 0;
    5931             : 
    5932             :         ret = 0;
    5933             : 
    5934          30 :         while (1) {
    5935         873 :                 struct btrfs_fs_info *fs_info = device->fs_info;
    5936         873 :                 u64 bytes;
    5937             : 
    5938         873 :                 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
    5939         873 :                 if (ret)
    5940             :                         break;
    5941             : 
    5942         873 :                 find_first_clear_extent_bit(&device->alloc_state, start,
    5943             :                                             &start, &end,
    5944             :                                             CHUNK_TRIMMED | CHUNK_ALLOCATED);
    5945             : 
    5946             :                 /* Check if there are any CHUNK_* bits left */
    5947         873 :                 if (start > device->total_bytes) {
    5948           0 :                         WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
    5949           0 :                         btrfs_warn_in_rcu(fs_info,
    5950             : "ignoring attempt to trim beyond device size: offset %llu length %llu device %s device size %llu",
    5951             :                                           start, end - start + 1,
    5952             :                                           btrfs_dev_name(device),
    5953             :                                           device->total_bytes);
    5954           0 :                         mutex_unlock(&fs_info->chunk_mutex);
    5955           0 :                         ret = 0;
    5956           0 :                         break;
    5957             :                 }
    5958             : 
    5959             :                 /* Ensure we skip the reserved space on each device. */
    5960         873 :                 start = max_t(u64, start, BTRFS_DEVICE_RANGE_RESERVED);
    5961             : 
    5962             :                 /*
    5963             :                  * If find_first_clear_extent_bit find a range that spans the
    5964             :                  * end of the device it will set end to -1, in this case it's up
    5965             :                  * to the caller to trim the value to the size of the device.
    5966             :                  */
    5967         873 :                 end = min(end, device->total_bytes - 1);
    5968             : 
    5969         873 :                 len = end - start + 1;
    5970             : 
    5971             :                 /* We didn't find any extents */
    5972         873 :                 if (!len) {
    5973         843 :                         mutex_unlock(&fs_info->chunk_mutex);
    5974         843 :                         ret = 0;
    5975         843 :                         break;
    5976             :                 }
    5977             : 
    5978          30 :                 ret = btrfs_issue_discard(device->bdev, start, len,
    5979             :                                           &bytes);
    5980          30 :                 if (!ret)
    5981          30 :                         set_extent_bit(&device->alloc_state, start,
    5982          30 :                                        start + bytes - 1, CHUNK_TRIMMED, NULL);
    5983          30 :                 mutex_unlock(&fs_info->chunk_mutex);
    5984             : 
    5985          30 :                 if (ret)
    5986             :                         break;
    5987             : 
    5988          30 :                 start += len;
    5989          30 :                 *trimmed += bytes;
    5990             : 
    5991          30 :                 if (fatal_signal_pending(current)) {
    5992             :                         ret = -ERESTARTSYS;
    5993             :                         break;
    5994             :                 }
    5995             : 
    5996          30 :                 cond_resched();
    5997             :         }
    5998             : 
    5999         843 :         return ret;
    6000             : }
    6001             : 
    6002             : /*
    6003             :  * Trim the whole filesystem by:
    6004             :  * 1) trimming the free space in each block group
    6005             :  * 2) trimming the unallocated space on each device
    6006             :  *
    6007             :  * This will also continue trimming even if a block group or device encounters
    6008             :  * an error.  The return value will be the last error, or 0 if nothing bad
    6009             :  * happens.
    6010             :  */
    6011         848 : int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
    6012             : {
    6013         848 :         struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
    6014         848 :         struct btrfs_block_group *cache = NULL;
    6015         848 :         struct btrfs_device *device;
    6016         848 :         u64 group_trimmed;
    6017         848 :         u64 range_end = U64_MAX;
    6018         848 :         u64 start;
    6019         848 :         u64 end;
    6020         848 :         u64 trimmed = 0;
    6021         848 :         u64 bg_failed = 0;
    6022         848 :         u64 dev_failed = 0;
    6023         848 :         int bg_ret = 0;
    6024         848 :         int dev_ret = 0;
    6025         848 :         int ret = 0;
    6026             : 
    6027         848 :         if (range->start == U64_MAX)
    6028             :                 return -EINVAL;
    6029             : 
    6030             :         /*
    6031             :          * Check range overflow if range->len is set.
    6032             :          * The default range->len is U64_MAX.
    6033             :          */
    6034         843 :         if (range->len != U64_MAX &&
    6035         588 :             check_add_overflow(range->start, range->len, &range_end))
    6036             :                 return -EINVAL;
    6037             : 
    6038         843 :         cache = btrfs_lookup_first_block_group(fs_info, range->start);
    6039        5948 :         for (; cache; cache = btrfs_next_block_group(cache)) {
    6040        4263 :                 if (cache->start >= range_end) {
    6041           1 :                         btrfs_put_block_group(cache);
    6042           1 :                         break;
    6043             :                 }
    6044             : 
    6045        4262 :                 start = max(range->start, cache->start);
    6046        4262 :                 end = min(range_end, cache->start + cache->length);
    6047             : 
    6048        4262 :                 if (end - start >= range->minlen) {
    6049        3895 :                         if (!btrfs_block_group_done(cache)) {
    6050          27 :                                 ret = btrfs_cache_block_group(cache, true);
    6051          27 :                                 if (ret) {
    6052           0 :                                         bg_failed++;
    6053           0 :                                         bg_ret = ret;
    6054           0 :                                         continue;
    6055             :                                 }
    6056             :                         }
    6057        3895 :                         ret = btrfs_trim_block_group(cache,
    6058             :                                                      &group_trimmed,
    6059             :                                                      start,
    6060             :                                                      end,
    6061             :                                                      range->minlen);
    6062             : 
    6063        3895 :                         trimmed += group_trimmed;
    6064        3895 :                         if (ret) {
    6065           5 :                                 bg_failed++;
    6066           5 :                                 bg_ret = ret;
    6067           5 :                                 continue;
    6068             :                         }
    6069             :                 }
    6070             :         }
    6071             : 
    6072         843 :         if (bg_failed)
    6073           1 :                 btrfs_warn(fs_info,
    6074             :                         "failed to trim %llu block group(s), last error %d",
    6075             :                         bg_failed, bg_ret);
    6076             : 
    6077         843 :         mutex_lock(&fs_devices->device_list_mutex);
    6078        1686 :         list_for_each_entry(device, &fs_devices->devices, dev_list) {
    6079        1686 :                 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
    6080           0 :                         continue;
    6081             : 
    6082         843 :                 ret = btrfs_trim_free_extents(device, &group_trimmed);
    6083         843 :                 if (ret) {
    6084             :                         dev_failed++;
    6085             :                         dev_ret = ret;
    6086             :                         break;
    6087             :                 }
    6088             : 
    6089         843 :                 trimmed += group_trimmed;
    6090             :         }
    6091         843 :         mutex_unlock(&fs_devices->device_list_mutex);
    6092             : 
    6093         843 :         if (dev_failed)
    6094           0 :                 btrfs_warn(fs_info,
    6095             :                         "failed to trim %llu device(s), last error %d",
    6096             :                         dev_failed, dev_ret);
    6097         843 :         range->len = trimmed;
    6098         843 :         if (bg_ret)
    6099           1 :                 return bg_ret;
    6100             :         return dev_ret;
    6101             : }

Generated by: LCOV version 1.14