LCOV - code coverage report
Current view: top level - fs/btrfs - extent-tree.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-achx @ Mon Jul 31 20:08:12 PDT 2023 Lines: 2281 3008 75.8 %
Date: 2023-07-31 20:08:12 Functions: 95 100 95.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (C) 2007 Oracle.  All rights reserved.
       4             :  */
       5             : 
       6             : #include <linux/sched.h>
       7             : #include <linux/sched/signal.h>
       8             : #include <linux/pagemap.h>
       9             : #include <linux/writeback.h>
      10             : #include <linux/blkdev.h>
      11             : #include <linux/sort.h>
      12             : #include <linux/rcupdate.h>
      13             : #include <linux/kthread.h>
      14             : #include <linux/slab.h>
      15             : #include <linux/ratelimit.h>
      16             : #include <linux/percpu_counter.h>
      17             : #include <linux/lockdep.h>
      18             : #include <linux/crc32c.h>
      19             : #include "ctree.h"
      20             : #include "extent-tree.h"
      21             : #include "tree-log.h"
      22             : #include "disk-io.h"
      23             : #include "print-tree.h"
      24             : #include "volumes.h"
      25             : #include "raid56.h"
      26             : #include "locking.h"
      27             : #include "free-space-cache.h"
      28             : #include "free-space-tree.h"
      29             : #include "sysfs.h"
      30             : #include "qgroup.h"
      31             : #include "ref-verify.h"
      32             : #include "space-info.h"
      33             : #include "block-rsv.h"
      34             : #include "delalloc-space.h"
      35             : #include "discard.h"
      36             : #include "rcu-string.h"
      37             : #include "zoned.h"
      38             : #include "dev-replace.h"
      39             : #include "fs.h"
      40             : #include "accessors.h"
      41             : #include "root-tree.h"
      42             : #include "file-item.h"
      43             : #include "orphan.h"
      44             : #include "tree-checker.h"
      45             : 
      46             : #undef SCRAMBLE_DELAYED_REFS
      47             : 
      48             : 
      49             : static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
      50             :                                struct btrfs_delayed_ref_node *node, u64 parent,
      51             :                                u64 root_objectid, u64 owner_objectid,
      52             :                                u64 owner_offset, int refs_to_drop,
      53             :                                struct btrfs_delayed_extent_op *extra_op);
      54             : static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
      55             :                                     struct extent_buffer *leaf,
      56             :                                     struct btrfs_extent_item *ei);
      57             : static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
      58             :                                       u64 parent, u64 root_objectid,
      59             :                                       u64 flags, u64 owner, u64 offset,
      60             :                                       struct btrfs_key *ins, int ref_mod);
      61             : static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
      62             :                                      struct btrfs_delayed_ref_node *node,
      63             :                                      struct btrfs_delayed_extent_op *extent_op);
      64             : static int find_next_key(struct btrfs_path *path, int level,
      65             :                          struct btrfs_key *key);
      66             : 
      67             : static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
      68             : {
      69    23685218 :         return (cache->flags & bits) == bits;
      70             : }
      71             : 
      72        3240 : int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
      73             :                               u64 start, u64 num_bytes)
      74             : {
      75        3240 :         u64 end = start + num_bytes - 1;
      76        3240 :         set_extent_bit(&fs_info->excluded_extents, start, end,
      77             :                        EXTENT_UPTODATE, NULL);
      78        3240 :         return 0;
      79             : }
      80             : 
      81       22711 : void btrfs_free_excluded_extents(struct btrfs_block_group *cache)
      82             : {
      83       22711 :         struct btrfs_fs_info *fs_info = cache->fs_info;
      84       22711 :         u64 start, end;
      85             : 
      86       22711 :         start = cache->start;
      87       22711 :         end = start + cache->length - 1;
      88             : 
      89       22711 :         clear_extent_bits(&fs_info->excluded_extents, start, end,
      90             :                           EXTENT_UPTODATE);
      91       22711 : }
      92             : 
      93             : /* simple helper to search for an existing data extent at a given offset */
      94       87872 : int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
      95             : {
      96       87872 :         struct btrfs_root *root = btrfs_extent_root(fs_info, start);
      97       87872 :         int ret;
      98       87872 :         struct btrfs_key key;
      99       87872 :         struct btrfs_path *path;
     100             : 
     101       87872 :         path = btrfs_alloc_path();
     102       87872 :         if (!path)
     103             :                 return -ENOMEM;
     104             : 
     105       87872 :         key.objectid = start;
     106       87872 :         key.offset = len;
     107       87872 :         key.type = BTRFS_EXTENT_ITEM_KEY;
     108       87872 :         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
     109       87872 :         btrfs_free_path(path);
     110       87872 :         return ret;
     111             : }
     112             : 
     113             : /*
     114             :  * helper function to lookup reference count and flags of a tree block.
     115             :  *
     116             :  * the head node for delayed ref is used to store the sum of all the
     117             :  * reference count modifications queued up in the rbtree. the head
     118             :  * node may also store the extent flags to set. This way you can check
     119             :  * to see what the reference count and extent flags would be if all of
     120             :  * the delayed refs are not processed.
     121             :  */
     122     8717771 : int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
     123             :                              struct btrfs_fs_info *fs_info, u64 bytenr,
     124             :                              u64 offset, int metadata, u64 *refs, u64 *flags)
     125             : {
     126     8717771 :         struct btrfs_root *extent_root;
     127     8717771 :         struct btrfs_delayed_ref_head *head;
     128     8717771 :         struct btrfs_delayed_ref_root *delayed_refs;
     129     8717771 :         struct btrfs_path *path;
     130     8717771 :         struct btrfs_extent_item *ei;
     131     8717771 :         struct extent_buffer *leaf;
     132     8717771 :         struct btrfs_key key;
     133     8717771 :         u32 item_size;
     134     8717771 :         u64 num_refs;
     135     8717771 :         u64 extent_flags;
     136     8717771 :         int ret;
     137             : 
     138             :         /*
     139             :          * If we don't have skinny metadata, don't bother doing anything
     140             :          * different
     141             :          */
     142     8717771 :         if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
     143           0 :                 offset = fs_info->nodesize;
     144           0 :                 metadata = 0;
     145             :         }
     146             : 
     147     8717771 :         path = btrfs_alloc_path();
     148     8717772 :         if (!path)
     149             :                 return -ENOMEM;
     150             : 
     151     8717772 :         if (!trans) {
     152           0 :                 path->skip_locking = 1;
     153           0 :                 path->search_commit_root = 1;
     154             :         }
     155             : 
     156     8717772 : search_again:
     157     8717822 :         key.objectid = bytenr;
     158     8717822 :         key.offset = offset;
     159     8717822 :         if (metadata)
     160     8717822 :                 key.type = BTRFS_METADATA_ITEM_KEY;
     161             :         else
     162           0 :                 key.type = BTRFS_EXTENT_ITEM_KEY;
     163             : 
     164     8717822 :         extent_root = btrfs_extent_root(fs_info, bytenr);
     165     8717822 :         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
     166     8717821 :         if (ret < 0)
     167           0 :                 goto out_free;
     168             : 
     169     8717821 :         if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
     170         721 :                 if (path->slots[0]) {
     171         721 :                         path->slots[0]--;
     172         721 :                         btrfs_item_key_to_cpu(path->nodes[0], &key,
     173             :                                               path->slots[0]);
     174         721 :                         if (key.objectid == bytenr &&
     175           0 :                             key.type == BTRFS_EXTENT_ITEM_KEY &&
     176           0 :                             key.offset == fs_info->nodesize)
     177             :                                 ret = 0;
     178             :                 }
     179             :         }
     180             : 
     181     8717821 :         if (ret == 0) {
     182     8717100 :                 leaf = path->nodes[0];
     183     8717100 :                 item_size = btrfs_item_size(leaf, path->slots[0]);
     184     8717100 :                 if (item_size >= sizeof(*ei)) {
     185     8717100 :                         ei = btrfs_item_ptr(leaf, path->slots[0],
     186             :                                             struct btrfs_extent_item);
     187     8717100 :                         num_refs = btrfs_extent_refs(leaf, ei);
     188     8717101 :                         extent_flags = btrfs_extent_flags(leaf, ei);
     189             :                 } else {
     190           0 :                         ret = -EINVAL;
     191           0 :                         btrfs_print_v0_err(fs_info);
     192           0 :                         if (trans)
     193           0 :                                 btrfs_abort_transaction(trans, ret);
     194             :                         else
     195           0 :                                 btrfs_handle_fs_error(fs_info, ret, NULL);
     196             : 
     197           0 :                         goto out_free;
     198             :                 }
     199             : 
     200     8717101 :                 BUG_ON(num_refs == 0);
     201             :         } else {
     202             :                 num_refs = 0;
     203             :                 extent_flags = 0;
     204             :                 ret = 0;
     205             :         }
     206             : 
     207     8717822 :         if (!trans)
     208           0 :                 goto out;
     209             : 
     210     8717822 :         delayed_refs = &trans->transaction->delayed_refs;
     211     8717822 :         spin_lock(&delayed_refs->lock);
     212     8717822 :         head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
     213     8717822 :         if (head) {
     214     2520512 :                 if (!mutex_trylock(&head->mutex)) {
     215          50 :                         refcount_inc(&head->refs);
     216          50 :                         spin_unlock(&delayed_refs->lock);
     217             : 
     218          50 :                         btrfs_release_path(path);
     219             : 
     220             :                         /*
     221             :                          * Mutex was contended, block until it's released and try
     222             :                          * again
     223             :                          */
     224          50 :                         mutex_lock(&head->mutex);
     225          50 :                         mutex_unlock(&head->mutex);
     226          50 :                         btrfs_put_delayed_ref_head(head);
     227          50 :                         goto search_again;
     228             :                 }
     229     2520462 :                 spin_lock(&head->lock);
     230     2520462 :                 if (head->extent_op && head->extent_op->update_flags)
     231        2506 :                         extent_flags |= head->extent_op->flags_to_set;
     232             :                 else
     233     2517956 :                         BUG_ON(num_refs == 0);
     234             : 
     235     2520462 :                 num_refs += head->ref_mod;
     236     2520462 :                 spin_unlock(&head->lock);
     237     2520462 :                 mutex_unlock(&head->mutex);
     238             :         }
     239     8717772 :         spin_unlock(&delayed_refs->lock);
     240     8717772 : out:
     241     8717772 :         WARN_ON(num_refs == 0);
     242     8717772 :         if (refs)
     243     8717772 :                 *refs = num_refs;
     244     8717772 :         if (flags)
     245     8717772 :                 *flags = extent_flags;
     246           0 : out_free:
     247     8717772 :         btrfs_free_path(path);
     248     8717772 :         return ret;
     249             : }
     250             : 
     251             : /*
     252             :  * Back reference rules.  Back refs have three main goals:
     253             :  *
     254             :  * 1) differentiate between all holders of references to an extent so that
     255             :  *    when a reference is dropped we can make sure it was a valid reference
     256             :  *    before freeing the extent.
     257             :  *
     258             :  * 2) Provide enough information to quickly find the holders of an extent
     259             :  *    if we notice a given block is corrupted or bad.
     260             :  *
     261             :  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
     262             :  *    maintenance.  This is actually the same as #2, but with a slightly
     263             :  *    different use case.
     264             :  *
     265             :  * There are two kinds of back refs. The implicit back refs is optimized
     266             :  * for pointers in non-shared tree blocks. For a given pointer in a block,
     267             :  * back refs of this kind provide information about the block's owner tree
     268             :  * and the pointer's key. These information allow us to find the block by
     269             :  * b-tree searching. The full back refs is for pointers in tree blocks not
     270             :  * referenced by their owner trees. The location of tree block is recorded
     271             :  * in the back refs. Actually the full back refs is generic, and can be
     272             :  * used in all cases the implicit back refs is used. The major shortcoming
     273             :  * of the full back refs is its overhead. Every time a tree block gets
     274             :  * COWed, we have to update back refs entry for all pointers in it.
     275             :  *
     276             :  * For a newly allocated tree block, we use implicit back refs for
     277             :  * pointers in it. This means most tree related operations only involve
     278             :  * implicit back refs. For a tree block created in old transaction, the
     279             :  * only way to drop a reference to it is COW it. So we can detect the
     280             :  * event that tree block loses its owner tree's reference and do the
     281             :  * back refs conversion.
     282             :  *
     283             :  * When a tree block is COWed through a tree, there are four cases:
     284             :  *
     285             :  * The reference count of the block is one and the tree is the block's
     286             :  * owner tree. Nothing to do in this case.
     287             :  *
     288             :  * The reference count of the block is one and the tree is not the
     289             :  * block's owner tree. In this case, full back refs is used for pointers
     290             :  * in the block. Remove these full back refs, add implicit back refs for
     291             :  * every pointers in the new block.
     292             :  *
     293             :  * The reference count of the block is greater than one and the tree is
     294             :  * the block's owner tree. In this case, implicit back refs is used for
     295             :  * pointers in the block. Add full back refs for every pointers in the
     296             :  * block, increase lower level extents' reference counts. The original
     297             :  * implicit back refs are entailed to the new block.
     298             :  *
     299             :  * The reference count of the block is greater than one and the tree is
     300             :  * not the block's owner tree. Add implicit back refs for every pointer in
     301             :  * the new block, increase lower level extents' reference count.
     302             :  *
     303             :  * Back Reference Key composing:
     304             :  *
     305             :  * The key objectid corresponds to the first byte in the extent,
     306             :  * The key type is used to differentiate between types of back refs.
     307             :  * There are different meanings of the key offset for different types
     308             :  * of back refs.
     309             :  *
     310             :  * File extents can be referenced by:
     311             :  *
     312             :  * - multiple snapshots, subvolumes, or different generations in one subvol
     313             :  * - different files inside a single subvolume
     314             :  * - different offsets inside a file (bookend extents in file.c)
     315             :  *
     316             :  * The extent ref structure for the implicit back refs has fields for:
     317             :  *
     318             :  * - Objectid of the subvolume root
     319             :  * - objectid of the file holding the reference
     320             :  * - original offset in the file
     321             :  * - how many bookend extents
     322             :  *
     323             :  * The key offset for the implicit back refs is hash of the first
     324             :  * three fields.
     325             :  *
     326             :  * The extent ref structure for the full back refs has field for:
     327             :  *
     328             :  * - number of pointers in the tree leaf
     329             :  *
     330             :  * The key offset for the implicit back refs is the first byte of
     331             :  * the tree leaf
     332             :  *
     333             :  * When a file extent is allocated, The implicit back refs is used.
     334             :  * the fields are filled in:
     335             :  *
     336             :  *     (root_key.objectid, inode objectid, offset in file, 1)
     337             :  *
     338             :  * When a file extent is removed file truncation, we find the
     339             :  * corresponding implicit back refs and check the following fields:
     340             :  *
     341             :  *     (btrfs_header_owner(leaf), inode objectid, offset in file)
     342             :  *
     343             :  * Btree extents can be referenced by:
     344             :  *
     345             :  * - Different subvolumes
     346             :  *
     347             :  * Both the implicit back refs and the full back refs for tree blocks
     348             :  * only consist of key. The key offset for the implicit back refs is
     349             :  * objectid of block's owner tree. The key offset for the full back refs
     350             :  * is the first byte of parent block.
     351             :  *
     352             :  * When implicit back refs is used, information about the lowest key and
     353             :  * level of the tree block are required. These information are stored in
     354             :  * tree block info structure.
     355             :  */
     356             : 
     357             : /*
     358             :  * is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required,
     359             :  * is_data == BTRFS_REF_TYPE_DATA, data type is requiried,
     360             :  * is_data == BTRFS_REF_TYPE_ANY, either type is OK.
     361             :  */
     362   344209226 : int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
     363             :                                      struct btrfs_extent_inline_ref *iref,
     364             :                                      enum btrfs_inline_ref_type is_data)
     365             : {
     366   344209226 :         int type = btrfs_extent_inline_ref_type(eb, iref);
     367   344179502 :         u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
     368             : 
     369   344180061 :         if (type == BTRFS_TREE_BLOCK_REF_KEY ||
     370   344180061 :             type == BTRFS_SHARED_BLOCK_REF_KEY ||
     371   291346511 :             type == BTRFS_SHARED_DATA_REF_KEY ||
     372   291346511 :             type == BTRFS_EXTENT_DATA_REF_KEY) {
     373   344183489 :                 if (is_data == BTRFS_REF_TYPE_BLOCK) {
     374    29797488 :                         if (type == BTRFS_TREE_BLOCK_REF_KEY)
     375             :                                 return type;
     376    23562031 :                         if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
     377    23562036 :                                 ASSERT(eb->fs_info);
     378             :                                 /*
     379             :                                  * Every shared one has parent tree block,
     380             :                                  * which must be aligned to sector size.
     381             :                                  */
     382    23562036 :                                 if (offset &&
     383    23562021 :                                     IS_ALIGNED(offset, eb->fs_info->sectorsize))
     384             :                                         return type;
     385             :                         }
     386   314386001 :                 } else if (is_data == BTRFS_REF_TYPE_DATA) {
     387   281806178 :                         if (type == BTRFS_EXTENT_DATA_REF_KEY)
     388             :                                 return type;
     389    40113018 :                         if (type == BTRFS_SHARED_DATA_REF_KEY) {
     390    40113087 :                                 ASSERT(eb->fs_info);
     391             :                                 /*
     392             :                                  * Every shared one has parent tree block,
     393             :                                  * which must be aligned to sector size.
     394             :                                  */
     395    40113087 :                                 if (offset &&
     396    40113152 :                                     IS_ALIGNED(offset, eb->fs_info->sectorsize))
     397             :                                         return type;
     398             :                         }
     399             :                 } else {
     400             :                         ASSERT(is_data == BTRFS_REF_TYPE_ANY);
     401             :                         return type;
     402             :                 }
     403             :         }
     404             : 
     405           0 :         btrfs_print_leaf(eb);
     406           0 :         btrfs_err(eb->fs_info,
     407             :                   "eb %llu iref 0x%lx invalid extent inline ref type %d",
     408             :                   eb->start, (unsigned long)iref, type);
     409           0 :         WARN_ON(1);
     410             : 
     411             :         return BTRFS_REF_TYPE_INVALID;
     412             : }
     413             : 
     414   481407809 : u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
     415             : {
     416   481407809 :         u32 high_crc = ~(u32)0;
     417   481407809 :         u32 low_crc = ~(u32)0;
     418   481407809 :         __le64 lenum;
     419             : 
     420   481407809 :         lenum = cpu_to_le64(root_objectid);
     421   481407809 :         high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
     422   481466592 :         lenum = cpu_to_le64(owner);
     423   481466592 :         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
     424   481523809 :         lenum = cpu_to_le64(offset);
     425   481523809 :         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
     426             : 
     427   481556783 :         return ((u64)high_crc << 31) ^ (u64)low_crc;
     428             : }
     429             : 
     430   234209774 : static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
     431             :                                      struct btrfs_extent_data_ref *ref)
     432             : {
     433   468414881 :         return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
     434             :                                     btrfs_extent_data_ref_objectid(leaf, ref),
     435             :                                     btrfs_extent_data_ref_offset(leaf, ref));
     436             : }
     437             : 
     438   235655287 : static int match_extent_data_ref(struct extent_buffer *leaf,
     439             :                                  struct btrfs_extent_data_ref *ref,
     440             :                                  u64 root_objectid, u64 owner, u64 offset)
     441             : {
     442   471158707 :         if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
     443   100344708 :             btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
     444             :             btrfs_extent_data_ref_offset(leaf, ref) != offset)
     445   234207874 :                 return 0;
     446             :         return 1;
     447             : }
     448             : 
     449      223934 : static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
     450             :                                            struct btrfs_path *path,
     451             :                                            u64 bytenr, u64 parent,
     452             :                                            u64 root_objectid,
     453             :                                            u64 owner, u64 offset)
     454             : {
     455      223934 :         struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
     456      223934 :         struct btrfs_key key;
     457      223934 :         struct btrfs_extent_data_ref *ref;
     458      223934 :         struct extent_buffer *leaf;
     459      223934 :         u32 nritems;
     460      223934 :         int ret;
     461      223934 :         int recow;
     462      223934 :         int err = -ENOENT;
     463             : 
     464      223934 :         key.objectid = bytenr;
     465      223934 :         if (parent) {
     466       86511 :                 key.type = BTRFS_SHARED_DATA_REF_KEY;
     467       86511 :                 key.offset = parent;
     468             :         } else {
     469      137423 :                 key.type = BTRFS_EXTENT_DATA_REF_KEY;
     470      137423 :                 key.offset = hash_extent_data_ref(root_objectid,
     471             :                                                   owner, offset);
     472             :         }
     473             : again:
     474      223934 :         recow = 0;
     475      223934 :         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
     476      223934 :         if (ret < 0) {
     477           0 :                 err = ret;
     478           0 :                 goto fail;
     479             :         }
     480             : 
     481      223934 :         if (parent) {
     482       86511 :                 if (!ret)
     483             :                         return 0;
     484           0 :                 goto fail;
     485             :         }
     486             : 
     487      137423 :         leaf = path->nodes[0];
     488      137423 :         nritems = btrfs_header_nritems(leaf);
     489      137423 :         while (1) {
     490      137423 :                 if (path->slots[0] >= nritems) {
     491           0 :                         ret = btrfs_next_leaf(root, path);
     492           0 :                         if (ret < 0)
     493           0 :                                 err = ret;
     494           0 :                         if (ret)
     495           0 :                                 goto fail;
     496             : 
     497           0 :                         leaf = path->nodes[0];
     498           0 :                         nritems = btrfs_header_nritems(leaf);
     499           0 :                         recow = 1;
     500             :                 }
     501             : 
     502      137423 :                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
     503      137423 :                 if (key.objectid != bytenr ||
     504      137423 :                     key.type != BTRFS_EXTENT_DATA_REF_KEY)
     505           0 :                         goto fail;
     506             : 
     507      137423 :                 ref = btrfs_item_ptr(leaf, path->slots[0],
     508             :                                      struct btrfs_extent_data_ref);
     509             : 
     510      137423 :                 if (match_extent_data_ref(leaf, ref, root_objectid,
     511             :                                           owner, offset)) {
     512      137423 :                         if (recow) {
     513           0 :                                 btrfs_release_path(path);
     514           0 :                                 goto again;
     515             :                         }
     516             :                         err = 0;
     517             :                         break;
     518             :                 }
     519           0 :                 path->slots[0]++;
     520             :         }
     521             : fail:
     522             :         return err;
     523             : }
     524             : 
     525    13662563 : static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
     526             :                                            struct btrfs_path *path,
     527             :                                            u64 bytenr, u64 parent,
     528             :                                            u64 root_objectid, u64 owner,
     529             :                                            u64 offset, int refs_to_add)
     530             : {
     531    13662563 :         struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
     532    13662501 :         struct btrfs_key key;
     533    13662501 :         struct extent_buffer *leaf;
     534    13662501 :         u32 size;
     535    13662501 :         u32 num_refs;
     536    13662501 :         int ret;
     537             : 
     538    13662501 :         key.objectid = bytenr;
     539    13662501 :         if (parent) {
     540      658303 :                 key.type = BTRFS_SHARED_DATA_REF_KEY;
     541      658303 :                 key.offset = parent;
     542      658303 :                 size = sizeof(struct btrfs_shared_data_ref);
     543             :         } else {
     544    13004198 :                 key.type = BTRFS_EXTENT_DATA_REF_KEY;
     545    13004198 :                 key.offset = hash_extent_data_ref(root_objectid,
     546             :                                                   owner, offset);
     547    13004222 :                 size = sizeof(struct btrfs_extent_data_ref);
     548             :         }
     549             : 
     550    13662525 :         ret = btrfs_insert_empty_item(trans, root, path, &key, size);
     551    13661917 :         if (ret && ret != -EEXIST)
     552           0 :                 goto fail;
     553             : 
     554    13661917 :         leaf = path->nodes[0];
     555    13661917 :         if (parent) {
     556      658277 :                 struct btrfs_shared_data_ref *ref;
     557      658277 :                 ref = btrfs_item_ptr(leaf, path->slots[0],
     558             :                                      struct btrfs_shared_data_ref);
     559      658278 :                 if (ret == 0) {
     560      658246 :                         btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
     561             :                 } else {
     562          32 :                         num_refs = btrfs_shared_data_ref_count(leaf, ref);
     563          32 :                         num_refs += refs_to_add;
     564          32 :                         btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
     565             :                 }
     566             :         } else {
     567             :                 struct btrfs_extent_data_ref *ref;
     568    13003640 :                 while (ret == -EEXIST) {
     569        3344 :                         ref = btrfs_item_ptr(leaf, path->slots[0],
     570             :                                              struct btrfs_extent_data_ref);
     571        3301 :                         if (match_extent_data_ref(leaf, ref, root_objectid,
     572             :                                                   owner, offset))
     573             :                                 break;
     574           0 :                         btrfs_release_path(path);
     575           0 :                         key.offset++;
     576           0 :                         ret = btrfs_insert_empty_item(trans, root, path, &key,
     577             :                                                       size);
     578           0 :                         if (ret && ret != -EEXIST)
     579           0 :                                 goto fail;
     580             : 
     581           0 :                         leaf = path->nodes[0];
     582             :                 }
     583    13003597 :                 ref = btrfs_item_ptr(leaf, path->slots[0],
     584             :                                      struct btrfs_extent_data_ref);
     585    13003893 :                 if (ret == 0) {
     586    13000592 :                         btrfs_set_extent_data_ref_root(leaf, ref,
     587             :                                                        root_objectid);
     588    13000065 :                         btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
     589    13000085 :                         btrfs_set_extent_data_ref_offset(leaf, ref, offset);
     590    13000062 :                         btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
     591             :                 } else {
     592        3301 :                         num_refs = btrfs_extent_data_ref_count(leaf, ref);
     593        3301 :                         num_refs += refs_to_add;
     594        3301 :                         btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
     595             :                 }
     596             :         }
     597    13661849 :         btrfs_mark_buffer_dirty(leaf);
     598    13661849 :         ret = 0;
     599    13662301 : fail:
     600    13662301 :         btrfs_release_path(path);
     601    13662572 :         return ret;
     602             : }
     603             : 
     604      223662 : static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
     605             :                                            struct btrfs_root *root,
     606             :                                            struct btrfs_path *path,
     607             :                                            int refs_to_drop)
     608             : {
     609      223662 :         struct btrfs_key key;
     610      223662 :         struct btrfs_extent_data_ref *ref1 = NULL;
     611      223662 :         struct btrfs_shared_data_ref *ref2 = NULL;
     612      223662 :         struct extent_buffer *leaf;
     613      223662 :         u32 num_refs = 0;
     614      223662 :         int ret = 0;
     615             : 
     616      223662 :         leaf = path->nodes[0];
     617      223662 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
     618             : 
     619      223662 :         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
     620      137268 :                 ref1 = btrfs_item_ptr(leaf, path->slots[0],
     621             :                                       struct btrfs_extent_data_ref);
     622      137268 :                 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
     623       86394 :         } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
     624       86394 :                 ref2 = btrfs_item_ptr(leaf, path->slots[0],
     625             :                                       struct btrfs_shared_data_ref);
     626       86394 :                 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
     627           0 :         } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
     628           0 :                 btrfs_print_v0_err(trans->fs_info);
     629           0 :                 btrfs_abort_transaction(trans, -EINVAL);
     630           0 :                 return -EINVAL;
     631             :         } else {
     632           0 :                 BUG();
     633             :         }
     634             : 
     635      223662 :         BUG_ON(num_refs < refs_to_drop);
     636      223662 :         num_refs -= refs_to_drop;
     637             : 
     638      223662 :         if (num_refs == 0) {
     639      220396 :                 ret = btrfs_del_item(trans, root, path);
     640             :         } else {
     641        3266 :                 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
     642        3255 :                         btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
     643          11 :                 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
     644          11 :                         btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
     645        3266 :                 btrfs_mark_buffer_dirty(leaf);
     646             :         }
     647             :         return ret;
     648             : }
     649             : 
     650     1015682 : static noinline u32 extent_data_ref_count(struct btrfs_path *path,
     651             :                                           struct btrfs_extent_inline_ref *iref)
     652             : {
     653     1015682 :         struct btrfs_key key;
     654     1015682 :         struct extent_buffer *leaf;
     655     1015682 :         struct btrfs_extent_data_ref *ref1;
     656     1015682 :         struct btrfs_shared_data_ref *ref2;
     657     1015682 :         u32 num_refs = 0;
     658     1015682 :         int type;
     659             : 
     660     1015682 :         leaf = path->nodes[0];
     661     1015682 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
     662             : 
     663     1015682 :         BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
     664     1015682 :         if (iref) {
     665             :                 /*
     666             :                  * If type is invalid, we should have bailed out earlier than
     667             :                  * this call.
     668             :                  */
     669     1015410 :                 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
     670     1015410 :                 ASSERT(type != BTRFS_REF_TYPE_INVALID);
     671     1015410 :                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
     672      991703 :                         ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
     673      991703 :                         num_refs = btrfs_extent_data_ref_count(leaf, ref1);
     674             :                 } else {
     675       23707 :                         ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
     676       23707 :                         num_refs = btrfs_shared_data_ref_count(leaf, ref2);
     677             :                 }
     678         272 :         } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
     679         155 :                 ref1 = btrfs_item_ptr(leaf, path->slots[0],
     680             :                                       struct btrfs_extent_data_ref);
     681         155 :                 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
     682         117 :         } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
     683         117 :                 ref2 = btrfs_item_ptr(leaf, path->slots[0],
     684             :                                       struct btrfs_shared_data_ref);
     685         117 :                 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
     686             :         } else {
     687           0 :                 WARN_ON(1);
     688             :         }
     689     1015682 :         return num_refs;
     690             : }
     691             : 
     692           0 : static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
     693             :                                           struct btrfs_path *path,
     694             :                                           u64 bytenr, u64 parent,
     695             :                                           u64 root_objectid)
     696             : {
     697           0 :         struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
     698           0 :         struct btrfs_key key;
     699           0 :         int ret;
     700             : 
     701           0 :         key.objectid = bytenr;
     702           0 :         if (parent) {
     703           0 :                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
     704           0 :                 key.offset = parent;
     705             :         } else {
     706           0 :                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
     707           0 :                 key.offset = root_objectid;
     708             :         }
     709             : 
     710           0 :         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
     711           0 :         if (ret > 0)
     712           0 :                 ret = -ENOENT;
     713           0 :         return ret;
     714             : }
     715             : 
     716           0 : static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
     717             :                                           struct btrfs_path *path,
     718             :                                           u64 bytenr, u64 parent,
     719             :                                           u64 root_objectid)
     720             : {
     721           0 :         struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
     722           0 :         struct btrfs_key key;
     723           0 :         int ret;
     724             : 
     725           0 :         key.objectid = bytenr;
     726           0 :         if (parent) {
     727           0 :                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
     728           0 :                 key.offset = parent;
     729             :         } else {
     730           0 :                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
     731           0 :                 key.offset = root_objectid;
     732             :         }
     733             : 
     734           0 :         ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
     735           0 :         btrfs_release_path(path);
     736           0 :         return ret;
     737             : }
     738             : 
     739             : static inline int extent_ref_type(u64 parent, u64 owner)
     740             : {
     741    65705321 :         int type;
     742    65705321 :         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
     743    26245367 :                 if (parent > 0)
     744             :                         type = BTRFS_SHARED_BLOCK_REF_KEY;
     745             :                 else
     746     5625412 :                         type = BTRFS_TREE_BLOCK_REF_KEY;
     747             :         } else {
     748    39459954 :                 if (parent > 0)
     749             :                         type = BTRFS_SHARED_DATA_REF_KEY;
     750             :                 else
     751    16066948 :                         type = BTRFS_EXTENT_DATA_REF_KEY;
     752             :         }
     753    65705321 :         return type;
     754             : }
     755             : 
     756    19924655 : static int find_next_key(struct btrfs_path *path, int level,
     757             :                          struct btrfs_key *key)
     758             : 
     759             : {
     760    21409883 :         for (; level < BTRFS_MAX_LEVEL; level++) {
     761    21409883 :                 if (!path->nodes[level])
     762             :                         break;
     763    20975515 :                 if (path->slots[level] + 1 >=
     764             :                     btrfs_header_nritems(path->nodes[level]))
     765     1485228 :                         continue;
     766    19490287 :                 if (level == 0)
     767    14856565 :                         btrfs_item_key_to_cpu(path->nodes[level], key,
     768             :                                               path->slots[level] + 1);
     769             :                 else
     770     4633722 :                         btrfs_node_key_to_cpu(path->nodes[level], key,
     771             :                                               path->slots[level] + 1);
     772             :                 return 0;
     773             :         }
     774             :         return 1;
     775             : }
     776             : 
     777             : /*
     778             :  * look for inline back ref. if back ref is found, *ref_ret is set
     779             :  * to the address of inline back ref, and 0 is returned.
     780             :  *
     781             :  * if back ref isn't found, *ref_ret is set to the address where it
     782             :  * should be inserted, and -ENOENT is returned.
     783             :  *
     784             :  * if insert is true and there are too many inline back refs, the path
     785             :  * points to the extent item, and -EAGAIN is returned.
     786             :  *
     787             :  * NOTE: inline back refs are ordered in the same way that back ref
     788             :  *       items in the tree are ordered.
     789             :  */
     790             : static noinline_for_stack
     791    50673653 : int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
     792             :                                  struct btrfs_path *path,
     793             :                                  struct btrfs_extent_inline_ref **ref_ret,
     794             :                                  u64 bytenr, u64 num_bytes,
     795             :                                  u64 parent, u64 root_objectid,
     796             :                                  u64 owner, u64 offset, int insert)
     797             : {
     798    50673653 :         struct btrfs_fs_info *fs_info = trans->fs_info;
     799    50673653 :         struct btrfs_root *root = btrfs_extent_root(fs_info, bytenr);
     800    50673494 :         struct btrfs_key key;
     801    50673494 :         struct extent_buffer *leaf;
     802    50673494 :         struct btrfs_extent_item *ei;
     803    50673494 :         struct btrfs_extent_inline_ref *iref;
     804    50673494 :         u64 flags;
     805    50673494 :         u64 item_size;
     806    50673494 :         unsigned long ptr;
     807    50673494 :         unsigned long end;
     808    50673494 :         int extra_size;
     809    50673494 :         int type;
     810    50673494 :         int want;
     811    50673494 :         int ret;
     812    50673494 :         int err = 0;
     813    50673494 :         bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
     814    50673494 :         int needed;
     815             : 
     816    50673494 :         key.objectid = bytenr;
     817    50673494 :         key.type = BTRFS_EXTENT_ITEM_KEY;
     818    50673494 :         key.offset = num_bytes;
     819             : 
     820    50673494 :         want = extent_ref_type(parent, owner);
     821    50673494 :         if (insert) {
     822    28764274 :                 extra_size = btrfs_extent_inline_ref_size(want);
     823    28764274 :                 path->search_for_extension = 1;
     824    28764274 :                 path->keep_locks = 1;
     825             :         } else
     826             :                 extra_size = -1;
     827             : 
     828             :         /*
     829             :          * Owner is our level, so we can just add one to get the level for the
     830             :          * block we are interested in.
     831             :          */
     832    50673494 :         if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
     833    19819096 :                 key.type = BTRFS_METADATA_ITEM_KEY;
     834    19819096 :                 key.offset = owner;
     835             :         }
     836             : 
     837    30854398 : again:
     838    50673494 :         ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
     839    50674130 :         if (ret < 0) {
     840           0 :                 err = ret;
     841           0 :                 goto out;
     842             :         }
     843             : 
     844             :         /*
     845             :          * We may be a newly converted file system which still has the old fat
     846             :          * extent entries for metadata, so try and see if we have one of those.
     847             :          */
     848    50674130 :         if (ret > 0 && skinny_metadata) {
     849           0 :                 skinny_metadata = false;
     850           0 :                 if (path->slots[0]) {
     851           0 :                         path->slots[0]--;
     852           0 :                         btrfs_item_key_to_cpu(path->nodes[0], &key,
     853             :                                               path->slots[0]);
     854           0 :                         if (key.objectid == bytenr &&
     855           0 :                             key.type == BTRFS_EXTENT_ITEM_KEY &&
     856           0 :                             key.offset == num_bytes)
     857             :                                 ret = 0;
     858             :                 }
     859           0 :                 if (ret) {
     860           0 :                         key.objectid = bytenr;
     861           0 :                         key.type = BTRFS_EXTENT_ITEM_KEY;
     862           0 :                         key.offset = num_bytes;
     863           0 :                         btrfs_release_path(path);
     864           0 :                         goto again;
     865             :                 }
     866             :         }
     867             : 
     868    50674130 :         if (ret && !insert) {
     869           0 :                 err = -ENOENT;
     870           0 :                 goto out;
     871    50674130 :         } else if (WARN_ON(ret)) {
     872           0 :                 err = -EIO;
     873           0 :                 goto out;
     874             :         }
     875             : 
     876    50674130 :         leaf = path->nodes[0];
     877    50674130 :         item_size = btrfs_item_size(leaf, path->slots[0]);
     878    50674125 :         if (unlikely(item_size < sizeof(*ei))) {
     879           0 :                 err = -EINVAL;
     880           0 :                 btrfs_print_v0_err(fs_info);
     881           0 :                 btrfs_abort_transaction(trans, err);
     882           0 :                 goto out;
     883             :         }
     884             : 
     885    50674125 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
     886    50674126 :         flags = btrfs_extent_flags(leaf, ei);
     887             : 
     888    50674127 :         ptr = (unsigned long)(ei + 1);
     889    50674127 :         end = (unsigned long)ei + item_size;
     890             : 
     891    50674127 :         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
     892           0 :                 ptr += sizeof(struct btrfs_tree_block_info);
     893           0 :                 BUG_ON(ptr > end);
     894             :         }
     895             : 
     896    50674127 :         if (owner >= BTRFS_FIRST_FREE_OBJECTID)
     897             :                 needed = BTRFS_REF_TYPE_DATA;
     898             :         else
     899    19819153 :                 needed = BTRFS_REF_TYPE_BLOCK;
     900             : 
     901    50674127 :         err = -ENOENT;
     902   310181380 :         while (1) {
     903   310181380 :                 if (ptr >= end) {
     904     2163760 :                         if (ptr > end) {
     905           0 :                                 err = -EUCLEAN;
     906           0 :                                 btrfs_print_leaf(path->nodes[0]);
     907           0 :                                 btrfs_crit(fs_info,
     908             : "overrun extent record at slot %d while looking for inline extent for root %llu owner %llu offset %llu parent %llu",
     909             :                                         path->slots[0], root_objectid, owner, offset, parent);
     910             :                         }
     911             :                         break;
     912             :                 }
     913   308017620 :                 iref = (struct btrfs_extent_inline_ref *)ptr;
     914   308017620 :                 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
     915   307978301 :                 if (type == BTRFS_REF_TYPE_INVALID) {
     916           0 :                         err = -EUCLEAN;
     917           0 :                         goto out;
     918             :                 }
     919             : 
     920   307978301 :                 if (want < type)
     921             :                         break;
     922   307900827 :                 if (want > type) {
     923     5598734 :                         ptr += btrfs_extent_inline_ref_size(type);
     924     5598734 :                         continue;
     925             :                 }
     926             : 
     927   302302093 :                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
     928   235517461 :                         struct btrfs_extent_data_ref *dref;
     929   235517461 :                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
     930   235517461 :                         if (match_extent_data_ref(leaf, dref, root_objectid,
     931             :                                                   owner, offset)) {
     932             :                                 err = 0;
     933             :                                 break;
     934             :                         }
     935   468488810 :                         if (hash_extent_data_ref_item(leaf, dref) <
     936   234273108 :                             hash_extent_data_ref(root_objectid, owner, offset))
     937             :                                 break;
     938             :                 } else {
     939    66784632 :                         u64 ref_offset;
     940    66784632 :                         ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
     941    66784372 :                         if (parent > 0) {
     942    61262104 :                                 if (parent == ref_offset) {
     943             :                                         err = 0;
     944             :                                         break;
     945             :                                 }
     946    46133926 :                                 if (ref_offset < parent)
     947             :                                         break;
     948             :                         } else {
     949     5522268 :                                 if (root_objectid == ref_offset) {
     950             :                                         err = 0;
     951             :                                         break;
     952             :                                 }
     953      175977 :                                 if (ref_offset < root_objectid)
     954             :                                         break;
     955             :                         }
     956             :                 }
     957   499481773 :                 ptr += btrfs_extent_inline_ref_size(type);
     958             :         }
     959    50673829 :         if (err == -ENOENT && insert) {
     960    28694069 :                 if (item_size + extra_size >=
     961    28694069 :                     BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
     962    13229742 :                         err = -EAGAIN;
     963    13229742 :                         goto out;
     964             :                 }
     965             :                 /*
     966             :                  * To add new inline back ref, we have to make sure
     967             :                  * there is no corresponding back ref item.
     968             :                  * For simplicity, we just do not add new inline back
     969             :                  * ref if there is any kind of item for this block
     970             :                  */
     971    15464327 :                 if (find_next_key(path, 0, &key) == 0 &&
     972    15030503 :                     key.objectid == bytenr &&
     973      436318 :                     key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
     974      432512 :                         err = -EAGAIN;
     975      432512 :                         goto out;
     976             :                 }
     977             :         }
     978    37011574 :         *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
     979    50673828 : out:
     980    50673828 :         if (insert) {
     981    28764398 :                 path->keep_locks = 0;
     982    28764398 :                 path->search_for_extension = 0;
     983    28764398 :                 btrfs_unlock_up_safe(path, 1);
     984             :         }
     985    50673866 :         return err;
     986             : }
     987             : 
     988             : /*
     989             :  * helper to add new inline back ref
     990             :  */
     991             : static noinline_for_stack
     992    15031827 : void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
     993             :                                  struct btrfs_path *path,
     994             :                                  struct btrfs_extent_inline_ref *iref,
     995             :                                  u64 parent, u64 root_objectid,
     996             :                                  u64 owner, u64 offset, int refs_to_add,
     997             :                                  struct btrfs_delayed_extent_op *extent_op)
     998             : {
     999    15031827 :         struct extent_buffer *leaf;
    1000    15031827 :         struct btrfs_extent_item *ei;
    1001    15031827 :         unsigned long ptr;
    1002    15031827 :         unsigned long end;
    1003    15031827 :         unsigned long item_offset;
    1004    15031827 :         u64 refs;
    1005    15031827 :         int size;
    1006    15031827 :         int type;
    1007             : 
    1008    15031827 :         leaf = path->nodes[0];
    1009    15031827 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    1010    15031827 :         item_offset = (unsigned long)iref - (unsigned long)ei;
    1011             : 
    1012    15031827 :         type = extent_ref_type(parent, owner);
    1013    15031827 :         size = btrfs_extent_inline_ref_size(type);
    1014             : 
    1015    15031827 :         btrfs_extend_item(path, size);
    1016             : 
    1017    15031804 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    1018    15031799 :         refs = btrfs_extent_refs(leaf, ei);
    1019    15031793 :         refs += refs_to_add;
    1020    15031793 :         btrfs_set_extent_refs(leaf, ei, refs);
    1021    15031794 :         if (extent_op)
    1022       22148 :                 __run_delayed_extent_op(extent_op, leaf, ei);
    1023             : 
    1024    15031794 :         ptr = (unsigned long)ei + item_offset;
    1025    15031794 :         end = (unsigned long)ei + btrfs_item_size(leaf, path->slots[0]);
    1026    15031804 :         if (ptr < end - size)
    1027    13203789 :                 memmove_extent_buffer(leaf, ptr + size, ptr,
    1028             :                                       end - size - ptr);
    1029             : 
    1030    15031798 :         iref = (struct btrfs_extent_inline_ref *)ptr;
    1031    15031798 :         btrfs_set_extent_inline_ref_type(leaf, iref, type);
    1032    15031797 :         if (type == BTRFS_EXTENT_DATA_REF_KEY) {
    1033      822193 :                 struct btrfs_extent_data_ref *dref;
    1034      822193 :                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
    1035      822193 :                 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
    1036      822192 :                 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
    1037      822193 :                 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
    1038      822189 :                 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
    1039    14209604 :         } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
    1040     7783335 :                 struct btrfs_shared_data_ref *sref;
    1041     7783335 :                 sref = (struct btrfs_shared_data_ref *)(iref + 1);
    1042     7783335 :                 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
    1043     7783333 :                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
    1044     6426269 :         } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
    1045     6286705 :                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
    1046             :         } else {
    1047      139564 :                 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
    1048             :         }
    1049    15031801 :         btrfs_mark_buffer_dirty(leaf);
    1050    15031814 : }
    1051             : 
    1052    21909246 : static int lookup_extent_backref(struct btrfs_trans_handle *trans,
    1053             :                                  struct btrfs_path *path,
    1054             :                                  struct btrfs_extent_inline_ref **ref_ret,
    1055             :                                  u64 bytenr, u64 num_bytes, u64 parent,
    1056             :                                  u64 root_objectid, u64 owner, u64 offset)
    1057             : {
    1058    21909246 :         int ret;
    1059             : 
    1060    21909246 :         ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
    1061             :                                            num_bytes, parent, root_objectid,
    1062             :                                            owner, offset, 0);
    1063    21909286 :         if (ret != -ENOENT)
    1064             :                 return ret;
    1065             : 
    1066      223934 :         btrfs_release_path(path);
    1067      223934 :         *ref_ret = NULL;
    1068             : 
    1069      223934 :         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
    1070           0 :                 ret = lookup_tree_block_ref(trans, path, bytenr, parent,
    1071             :                                             root_objectid);
    1072             :         } else {
    1073      223934 :                 ret = lookup_extent_data_ref(trans, path, bytenr, parent,
    1074             :                                              root_objectid, owner, offset);
    1075             :         }
    1076             :         return ret;
    1077             : }
    1078             : 
    1079             : /*
    1080             :  * helper to update/remove inline back ref
    1081             :  */
    1082             : static noinline_for_stack
    1083    13682819 : void update_inline_extent_backref(struct btrfs_path *path,
    1084             :                                   struct btrfs_extent_inline_ref *iref,
    1085             :                                   int refs_to_mod,
    1086             :                                   struct btrfs_delayed_extent_op *extent_op)
    1087             : {
    1088    13682819 :         struct extent_buffer *leaf = path->nodes[0];
    1089    13682819 :         struct btrfs_extent_item *ei;
    1090    13682819 :         struct btrfs_extent_data_ref *dref = NULL;
    1091    13682819 :         struct btrfs_shared_data_ref *sref = NULL;
    1092    13682819 :         unsigned long ptr;
    1093    13682819 :         unsigned long end;
    1094    13682819 :         u32 item_size;
    1095    13682819 :         int size;
    1096    13682819 :         int type;
    1097    13682819 :         u64 refs;
    1098             : 
    1099    13682819 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    1100    13682819 :         refs = btrfs_extent_refs(leaf, ei);
    1101    27365636 :         WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
    1102    13682818 :         refs += refs_to_mod;
    1103    13682818 :         btrfs_set_extent_refs(leaf, ei, refs);
    1104    13682818 :         if (extent_op)
    1105           0 :                 __run_delayed_extent_op(extent_op, leaf, ei);
    1106             : 
    1107             :         /*
    1108             :          * If type is invalid, we should have bailed out after
    1109             :          * lookup_inline_extent_backref().
    1110             :          */
    1111    13682818 :         type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
    1112    13682818 :         ASSERT(type != BTRFS_REF_TYPE_INVALID);
    1113             : 
    1114    13682818 :         if (type == BTRFS_EXTENT_DATA_REF_KEY) {
    1115      289610 :                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
    1116      289610 :                 refs = btrfs_extent_data_ref_count(leaf, dref);
    1117    13393208 :         } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
    1118     7057879 :                 sref = (struct btrfs_shared_data_ref *)(iref + 1);
    1119     7057879 :                 refs = btrfs_shared_data_ref_count(leaf, sref);
    1120             :         } else {
    1121     6335329 :                 refs = 1;
    1122     6335329 :                 BUG_ON(refs_to_mod != -1);
    1123             :         }
    1124             : 
    1125    13682819 :         BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
    1126    13682819 :         refs += refs_to_mod;
    1127             : 
    1128    13682819 :         if (refs > 0) {
    1129       89086 :                 if (type == BTRFS_EXTENT_DATA_REF_KEY)
    1130       89085 :                         btrfs_set_extent_data_ref_count(leaf, dref, refs);
    1131             :                 else
    1132           1 :                         btrfs_set_shared_data_ref_count(leaf, sref, refs);
    1133             :         } else {
    1134    13593733 :                 size =  btrfs_extent_inline_ref_size(type);
    1135    13593733 :                 item_size = btrfs_item_size(leaf, path->slots[0]);
    1136    13593733 :                 ptr = (unsigned long)iref;
    1137    13593733 :                 end = (unsigned long)ei + item_size;
    1138    13593733 :                 if (ptr + size < end)
    1139     5019234 :                         memmove_extent_buffer(leaf, ptr, ptr + size,
    1140     5019234 :                                               end - ptr - size);
    1141    13593733 :                 item_size -= size;
    1142    13593733 :                 btrfs_truncate_item(path, item_size, 1);
    1143             :         }
    1144    13682819 :         btrfs_mark_buffer_dirty(leaf);
    1145    13682819 : }
    1146             : 
    1147             : static noinline_for_stack
    1148    28764275 : int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
    1149             :                                  struct btrfs_path *path,
    1150             :                                  u64 bytenr, u64 num_bytes, u64 parent,
    1151             :                                  u64 root_objectid, u64 owner,
    1152             :                                  u64 offset, int refs_to_add,
    1153             :                                  struct btrfs_delayed_extent_op *extent_op)
    1154             : {
    1155    28764275 :         struct btrfs_extent_inline_ref *iref;
    1156    28764275 :         int ret;
    1157             : 
    1158    28764275 :         ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
    1159             :                                            num_bytes, parent, root_objectid,
    1160             :                                            owner, offset, 1);
    1161    28764548 :         if (ret == 0) {
    1162             :                 /*
    1163             :                  * We're adding refs to a tree block we already own, this
    1164             :                  * should not happen at all.
    1165             :                  */
    1166       70431 :                 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
    1167           0 :                         btrfs_print_leaf(path->nodes[0]);
    1168           0 :                         btrfs_crit(trans->fs_info,
    1169             : "adding refs to an existing tree ref, bytenr %llu num_bytes %llu root_objectid %llu slot %u",
    1170             :                                    bytenr, num_bytes, root_objectid, path->slots[0]);
    1171           0 :                         return -EUCLEAN;
    1172             :                 }
    1173       70431 :                 update_inline_extent_backref(path, iref, refs_to_add, extent_op);
    1174    28694117 :         } else if (ret == -ENOENT) {
    1175    15031829 :                 setup_inline_extent_backref(trans->fs_info, path, iref, parent,
    1176             :                                             root_objectid, owner, offset,
    1177             :                                             refs_to_add, extent_op);
    1178    15031829 :                 ret = 0;
    1179             :         }
    1180             :         return ret;
    1181             : }
    1182             : 
    1183    13836050 : static int remove_extent_backref(struct btrfs_trans_handle *trans,
    1184             :                                  struct btrfs_root *root,
    1185             :                                  struct btrfs_path *path,
    1186             :                                  struct btrfs_extent_inline_ref *iref,
    1187             :                                  int refs_to_drop, int is_data)
    1188             : {
    1189    13836050 :         int ret = 0;
    1190             : 
    1191    13836050 :         BUG_ON(!is_data && refs_to_drop != 1);
    1192    13836050 :         if (iref)
    1193    13612388 :                 update_inline_extent_backref(path, iref, -refs_to_drop, NULL);
    1194      223662 :         else if (is_data)
    1195      223662 :                 ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
    1196             :         else
    1197           0 :                 ret = btrfs_del_item(trans, root, path);
    1198    13836050 :         return ret;
    1199             : }
    1200             : 
    1201       36347 : static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
    1202             :                                u64 *discarded_bytes)
    1203             : {
    1204       36347 :         int j, ret = 0;
    1205       36347 :         u64 bytes_left, end;
    1206       36347 :         u64 aligned_start = ALIGN(start, 1 << SECTOR_SHIFT);
    1207             : 
    1208       36347 :         if (WARN_ON(start != aligned_start)) {
    1209           0 :                 len -= aligned_start - start;
    1210           0 :                 len = round_down(len, 1 << SECTOR_SHIFT);
    1211           0 :                 start = aligned_start;
    1212             :         }
    1213             : 
    1214       36347 :         *discarded_bytes = 0;
    1215             : 
    1216       36347 :         if (!len)
    1217             :                 return 0;
    1218             : 
    1219       36347 :         end = start + len;
    1220       36347 :         bytes_left = len;
    1221             : 
    1222             :         /* Skip any superblocks on this device. */
    1223      145388 :         for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
    1224      109041 :                 u64 sb_start = btrfs_sb_offset(j);
    1225      109041 :                 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
    1226      109041 :                 u64 size = sb_start - start;
    1227             : 
    1228      109041 :                 if (!in_range(sb_start, start, bytes_left) &&
    1229      109040 :                     !in_range(sb_end, start, bytes_left) &&
    1230       71189 :                     !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
    1231      109040 :                         continue;
    1232             : 
    1233             :                 /*
    1234             :                  * Superblock spans beginning of range.  Adjust start and
    1235             :                  * try again.
    1236             :                  */
    1237           1 :                 if (sb_start <= start) {
    1238           0 :                         start += sb_end - start;
    1239           0 :                         if (start > end) {
    1240             :                                 bytes_left = 0;
    1241             :                                 break;
    1242             :                         }
    1243           0 :                         bytes_left = end - start;
    1244           0 :                         continue;
    1245             :                 }
    1246             : 
    1247           1 :                 if (size) {
    1248           1 :                         ret = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
    1249             :                                                    size >> SECTOR_SHIFT,
    1250             :                                                    GFP_NOFS);
    1251           1 :                         if (!ret)
    1252           1 :                                 *discarded_bytes += size;
    1253           0 :                         else if (ret != -EOPNOTSUPP)
    1254           0 :                                 return ret;
    1255             :                 }
    1256             : 
    1257           1 :                 start = sb_end;
    1258           1 :                 if (start > end) {
    1259             :                         bytes_left = 0;
    1260             :                         break;
    1261             :                 }
    1262           1 :                 bytes_left = end - start;
    1263             :         }
    1264             : 
    1265       36347 :         if (bytes_left) {
    1266       36347 :                 ret = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
    1267             :                                            bytes_left >> SECTOR_SHIFT,
    1268             :                                            GFP_NOFS);
    1269       36347 :                 if (!ret)
    1270       36345 :                         *discarded_bytes += bytes_left;
    1271             :         }
    1272             :         return ret;
    1273             : }
    1274             : 
    1275       36316 : static int do_discard_extent(struct btrfs_discard_stripe *stripe, u64 *bytes)
    1276             : {
    1277       36316 :         struct btrfs_device *dev = stripe->dev;
    1278       36316 :         struct btrfs_fs_info *fs_info = dev->fs_info;
    1279       36316 :         struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
    1280       36316 :         u64 phys = stripe->physical;
    1281       36316 :         u64 len = stripe->length;
    1282       36316 :         u64 discarded = 0;
    1283       36316 :         int ret = 0;
    1284             : 
    1285             :         /* Zone reset on a zoned filesystem */
    1286       36316 :         if (btrfs_can_zone_reset(dev, phys, len)) {
    1287           0 :                 u64 src_disc;
    1288             : 
    1289           0 :                 ret = btrfs_reset_device_zone(dev, phys, len, &discarded);
    1290           0 :                 if (ret)
    1291             :                         goto out;
    1292             : 
    1293           0 :                 if (!btrfs_dev_replace_is_ongoing(dev_replace) ||
    1294           0 :                     dev != dev_replace->srcdev)
    1295           0 :                         goto out;
    1296             : 
    1297             :                 src_disc = discarded;
    1298             : 
    1299             :                 /* Send to replace target as well */
    1300             :                 ret = btrfs_reset_device_zone(dev_replace->tgtdev, phys, len,
    1301             :                                               &discarded);
    1302             :                 discarded += src_disc;
    1303       36316 :         } else if (bdev_max_discard_sectors(stripe->dev->bdev)) {
    1304       36316 :                 ret = btrfs_issue_discard(dev->bdev, phys, len, &discarded);
    1305             :         } else {
    1306             :                 ret = 0;
    1307             :                 *bytes = 0;
    1308             :         }
    1309             : 
    1310       36316 : out:
    1311       36316 :         *bytes = discarded;
    1312       36316 :         return ret;
    1313             : }
    1314             : 
    1315       29039 : int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
    1316             :                          u64 num_bytes, u64 *actual_bytes)
    1317             : {
    1318       29039 :         int ret = 0;
    1319       29039 :         u64 discarded_bytes = 0;
    1320       29039 :         u64 end = bytenr + num_bytes;
    1321       29039 :         u64 cur = bytenr;
    1322             : 
    1323             :         /*
    1324             :          * Avoid races with device replace and make sure the devices in the
    1325             :          * stripes don't go away while we are discarding.
    1326             :          */
    1327       29039 :         btrfs_bio_counter_inc_blocked(fs_info);
    1328       58077 :         while (cur < end) {
    1329       29040 :                 struct btrfs_discard_stripe *stripes;
    1330       29040 :                 unsigned int num_stripes;
    1331       29040 :                 int i;
    1332             : 
    1333       29040 :                 num_bytes = end - cur;
    1334       29040 :                 stripes = btrfs_map_discard(fs_info, cur, &num_bytes, &num_stripes);
    1335       29040 :                 if (IS_ERR(stripes)) {
    1336           0 :                         ret = PTR_ERR(stripes);
    1337           0 :                         if (ret == -EOPNOTSUPP)
    1338           0 :                                 ret = 0;
    1339           2 :                         break;
    1340             :                 }
    1341             : 
    1342       65354 :                 for (i = 0; i < num_stripes; i++) {
    1343       36316 :                         struct btrfs_discard_stripe *stripe = stripes + i;
    1344       36316 :                         u64 bytes;
    1345             : 
    1346       36316 :                         if (!stripe->dev->bdev) {
    1347           0 :                                 ASSERT(btrfs_test_opt(fs_info, DEGRADED));
    1348           0 :                                 continue;
    1349             :                         }
    1350             : 
    1351       36316 :                         if (!test_bit(BTRFS_DEV_STATE_WRITEABLE,
    1352             :                                         &stripe->dev->dev_state))
    1353           0 :                                 continue;
    1354             : 
    1355       36316 :                         ret = do_discard_extent(stripe, &bytes);
    1356       36316 :                         if (ret) {
    1357             :                                 /*
    1358             :                                  * Keep going if discard is not supported by the
    1359             :                                  * device.
    1360             :                                  */
    1361           2 :                                 if (ret != -EOPNOTSUPP)
    1362             :                                         break;
    1363             :                                 ret = 0;
    1364             :                         } else {
    1365       36314 :                                 discarded_bytes += bytes;
    1366             :                         }
    1367             :                 }
    1368       29040 :                 kfree(stripes);
    1369       29040 :                 if (ret)
    1370             :                         break;
    1371       29038 :                 cur += num_bytes;
    1372             :         }
    1373       29039 :         btrfs_bio_counter_dec(fs_info);
    1374       29039 :         if (actual_bytes)
    1375       29001 :                 *actual_bytes = discarded_bytes;
    1376       29039 :         return ret;
    1377             : }
    1378             : 
    1379             : /* Can return -ENOMEM */
    1380    34293764 : int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
    1381             :                          struct btrfs_ref *generic_ref)
    1382             : {
    1383    34293764 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1384    34293764 :         int ret;
    1385             : 
    1386    34293764 :         ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
    1387             :                generic_ref->action);
    1388    34293764 :         BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
    1389             :                generic_ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID);
    1390             : 
    1391    34293764 :         if (generic_ref->type == BTRFS_REF_METADATA)
    1392     7175378 :                 ret = btrfs_add_delayed_tree_ref(trans, generic_ref, NULL);
    1393             :         else
    1394    27118386 :                 ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0);
    1395             : 
    1396    34293810 :         btrfs_ref_tree_mod(fs_info, generic_ref);
    1397             : 
    1398    34293810 :         return ret;
    1399             : }
    1400             : 
    1401             : /*
    1402             :  * __btrfs_inc_extent_ref - insert backreference for a given extent
    1403             :  *
    1404             :  * The counterpart is in __btrfs_free_extent(), with examples and more details
    1405             :  * how it works.
    1406             :  *
    1407             :  * @trans:          Handle of transaction
    1408             :  *
    1409             :  * @node:           The delayed ref node used to get the bytenr/length for
    1410             :  *                  extent whose references are incremented.
    1411             :  *
    1412             :  * @parent:         If this is a shared extent (BTRFS_SHARED_DATA_REF_KEY/
    1413             :  *                  BTRFS_SHARED_BLOCK_REF_KEY) then it holds the logical
    1414             :  *                  bytenr of the parent block. Since new extents are always
    1415             :  *                  created with indirect references, this will only be the case
    1416             :  *                  when relocating a shared extent. In that case, root_objectid
    1417             :  *                  will be BTRFS_TREE_RELOC_OBJECTID. Otherwise, parent must
    1418             :  *                  be 0
    1419             :  *
    1420             :  * @root_objectid:  The id of the root where this modification has originated,
    1421             :  *                  this can be either one of the well-known metadata trees or
    1422             :  *                  the subvolume id which references this extent.
    1423             :  *
    1424             :  * @owner:          For data extents it is the inode number of the owning file.
    1425             :  *                  For metadata extents this parameter holds the level in the
    1426             :  *                  tree of the extent.
    1427             :  *
    1428             :  * @offset:         For metadata extents the offset is ignored and is currently
    1429             :  *                  always passed as 0. For data extents it is the fileoffset
    1430             :  *                  this extent belongs to.
    1431             :  *
    1432             :  * @refs_to_add     Number of references to add
    1433             :  *
    1434             :  * @extent_op       Pointer to a structure, holding information necessary when
    1435             :  *                  updating a tree block's flags
    1436             :  *
    1437             :  */
    1438    28764648 : static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
    1439             :                                   struct btrfs_delayed_ref_node *node,
    1440             :                                   u64 parent, u64 root_objectid,
    1441             :                                   u64 owner, u64 offset, int refs_to_add,
    1442             :                                   struct btrfs_delayed_extent_op *extent_op)
    1443             : {
    1444    28764648 :         struct btrfs_path *path;
    1445    28764648 :         struct extent_buffer *leaf;
    1446    28764648 :         struct btrfs_extent_item *item;
    1447    28764648 :         struct btrfs_key key;
    1448    28764648 :         u64 bytenr = node->bytenr;
    1449    28764648 :         u64 num_bytes = node->num_bytes;
    1450    28764648 :         u64 refs;
    1451    28764648 :         int ret;
    1452             : 
    1453    28764648 :         path = btrfs_alloc_path();
    1454    28764324 :         if (!path)
    1455             :                 return -ENOMEM;
    1456             : 
    1457             :         /* this will setup the path even if it fails to insert the back ref */
    1458    28764324 :         ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
    1459             :                                            parent, root_objectid, owner,
    1460             :                                            offset, refs_to_add, extent_op);
    1461    28763695 :         if ((ret < 0 && ret != -EAGAIN) || !ret)
    1462    15102242 :                 goto out;
    1463             : 
    1464             :         /*
    1465             :          * Ok we had -EAGAIN which means we didn't have space to insert and
    1466             :          * inline extent ref, so just update the reference count and add a
    1467             :          * normal backref.
    1468             :          */
    1469    13661453 :         leaf = path->nodes[0];
    1470    13661453 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    1471    13661656 :         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    1472    13661692 :         refs = btrfs_extent_refs(leaf, item);
    1473    13661608 :         btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
    1474    13661405 :         if (extent_op)
    1475           0 :                 __run_delayed_extent_op(extent_op, leaf, item);
    1476             : 
    1477    13661405 :         btrfs_mark_buffer_dirty(leaf);
    1478    13662033 :         btrfs_release_path(path);
    1479             : 
    1480             :         /* now insert the actual backref */
    1481    13662558 :         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
    1482           0 :                 BUG_ON(refs_to_add != 1);
    1483           0 :                 ret = insert_tree_block_ref(trans, path, bytenr, parent,
    1484             :                                             root_objectid);
    1485             :         } else {
    1486    13662558 :                 ret = insert_extent_data_ref(trans, path, bytenr, parent,
    1487             :                                              root_objectid, owner, offset,
    1488             :                                              refs_to_add);
    1489             :         }
    1490    13662575 :         if (ret)
    1491           0 :                 btrfs_abort_transaction(trans, ret);
    1492    13662575 : out:
    1493    28764817 :         btrfs_free_path(path);
    1494    28764817 :         return ret;
    1495             : }
    1496             : 
    1497    32774020 : static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
    1498             :                                 struct btrfs_delayed_ref_node *node,
    1499             :                                 struct btrfs_delayed_extent_op *extent_op,
    1500             :                                 bool insert_reserved)
    1501             : {
    1502    32774020 :         int ret = 0;
    1503    32774020 :         struct btrfs_delayed_data_ref *ref;
    1504    32774020 :         struct btrfs_key ins;
    1505    32774020 :         u64 parent = 0;
    1506    32774020 :         u64 ref_root = 0;
    1507    32774020 :         u64 flags = 0;
    1508             : 
    1509    32774020 :         ins.objectid = node->bytenr;
    1510    32774020 :         ins.offset = node->num_bytes;
    1511    32774020 :         ins.type = BTRFS_EXTENT_ITEM_KEY;
    1512             : 
    1513    32774020 :         ref = btrfs_delayed_node_to_data_ref(node);
    1514    32774020 :         trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
    1515             : 
    1516    32773972 :         if (node->type == BTRFS_SHARED_DATA_REF_KEY)
    1517    15609668 :                 parent = ref->parent;
    1518    32773972 :         ref_root = ref->root;
    1519             : 
    1520    32773972 :         if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
    1521     1919198 :                 if (extent_op)
    1522           0 :                         flags |= extent_op->flags_to_set;
    1523     1919198 :                 ret = alloc_reserved_file_extent(trans, parent, ref_root,
    1524             :                                                  flags, ref->objectid,
    1525             :                                                  ref->offset, &ins,
    1526             :                                                  node->ref_mod);
    1527    30854774 :         } else if (node->action == BTRFS_ADD_DELAYED_REF) {
    1528    22338393 :                 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
    1529             :                                              ref->objectid, ref->offset,
    1530             :                                              node->ref_mod, extent_op);
    1531     8516381 :         } else if (node->action == BTRFS_DROP_DELAYED_REF) {
    1532     8516381 :                 ret = __btrfs_free_extent(trans, node, parent,
    1533             :                                           ref_root, ref->objectid,
    1534             :                                           ref->offset, node->ref_mod,
    1535             :                                           extent_op);
    1536             :         } else {
    1537           0 :                 BUG();
    1538             :         }
    1539    32774042 :         return ret;
    1540             : }
    1541             : 
    1542       71472 : static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
    1543             :                                     struct extent_buffer *leaf,
    1544             :                                     struct btrfs_extent_item *ei)
    1545             : {
    1546       71472 :         u64 flags = btrfs_extent_flags(leaf, ei);
    1547       71472 :         if (extent_op->update_flags) {
    1548       71472 :                 flags |= extent_op->flags_to_set;
    1549       71472 :                 btrfs_set_extent_flags(leaf, ei, flags);
    1550             :         }
    1551             : 
    1552       71472 :         if (extent_op->update_key) {
    1553           0 :                 struct btrfs_tree_block_info *bi;
    1554           0 :                 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
    1555           0 :                 bi = (struct btrfs_tree_block_info *)(ei + 1);
    1556           0 :                 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
    1557             :         }
    1558       71472 : }
    1559             : 
    1560        7234 : static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
    1561             :                                  struct btrfs_delayed_ref_head *head,
    1562             :                                  struct btrfs_delayed_extent_op *extent_op)
    1563             : {
    1564        7234 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1565        7234 :         struct btrfs_root *root;
    1566        7234 :         struct btrfs_key key;
    1567        7234 :         struct btrfs_path *path;
    1568        7234 :         struct btrfs_extent_item *ei;
    1569        7234 :         struct extent_buffer *leaf;
    1570        7234 :         u32 item_size;
    1571        7234 :         int ret;
    1572        7234 :         int err = 0;
    1573        7234 :         int metadata = 1;
    1574             : 
    1575        7234 :         if (TRANS_ABORTED(trans))
    1576             :                 return 0;
    1577             : 
    1578        7234 :         if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
    1579           0 :                 metadata = 0;
    1580             : 
    1581        7234 :         path = btrfs_alloc_path();
    1582        7234 :         if (!path)
    1583             :                 return -ENOMEM;
    1584             : 
    1585        7234 :         key.objectid = head->bytenr;
    1586             : 
    1587        7234 :         if (metadata) {
    1588        7234 :                 key.type = BTRFS_METADATA_ITEM_KEY;
    1589        7234 :                 key.offset = extent_op->level;
    1590             :         } else {
    1591           0 :                 key.type = BTRFS_EXTENT_ITEM_KEY;
    1592           0 :                 key.offset = head->num_bytes;
    1593             :         }
    1594             : 
    1595        7234 :         root = btrfs_extent_root(fs_info, key.objectid);
    1596        7234 : again:
    1597        7234 :         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
    1598        7234 :         if (ret < 0) {
    1599           0 :                 err = ret;
    1600           0 :                 goto out;
    1601             :         }
    1602        7234 :         if (ret > 0) {
    1603           0 :                 if (metadata) {
    1604           0 :                         if (path->slots[0] > 0) {
    1605           0 :                                 path->slots[0]--;
    1606           0 :                                 btrfs_item_key_to_cpu(path->nodes[0], &key,
    1607             :                                                       path->slots[0]);
    1608           0 :                                 if (key.objectid == head->bytenr &&
    1609           0 :                                     key.type == BTRFS_EXTENT_ITEM_KEY &&
    1610           0 :                                     key.offset == head->num_bytes)
    1611             :                                         ret = 0;
    1612             :                         }
    1613           0 :                         if (ret > 0) {
    1614           0 :                                 btrfs_release_path(path);
    1615           0 :                                 metadata = 0;
    1616             : 
    1617           0 :                                 key.objectid = head->bytenr;
    1618           0 :                                 key.offset = head->num_bytes;
    1619           0 :                                 key.type = BTRFS_EXTENT_ITEM_KEY;
    1620           0 :                                 goto again;
    1621             :                         }
    1622             :                 } else {
    1623           0 :                         err = -EIO;
    1624           0 :                         goto out;
    1625             :                 }
    1626             :         }
    1627             : 
    1628        7234 :         leaf = path->nodes[0];
    1629        7234 :         item_size = btrfs_item_size(leaf, path->slots[0]);
    1630             : 
    1631        7234 :         if (unlikely(item_size < sizeof(*ei))) {
    1632           0 :                 err = -EINVAL;
    1633           0 :                 btrfs_print_v0_err(fs_info);
    1634           0 :                 btrfs_abort_transaction(trans, err);
    1635           0 :                 goto out;
    1636             :         }
    1637             : 
    1638        7234 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    1639        7234 :         __run_delayed_extent_op(extent_op, leaf, ei);
    1640             : 
    1641        7234 :         btrfs_mark_buffer_dirty(leaf);
    1642        7234 : out:
    1643        7234 :         btrfs_free_path(path);
    1644        7234 :         return err;
    1645             : }
    1646             : 
    1647    27150216 : static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
    1648             :                                 struct btrfs_delayed_ref_node *node,
    1649             :                                 struct btrfs_delayed_extent_op *extent_op,
    1650             :                                 bool insert_reserved)
    1651             : {
    1652    27150216 :         int ret = 0;
    1653    27150216 :         struct btrfs_delayed_tree_ref *ref;
    1654    27150216 :         u64 parent = 0;
    1655    27150216 :         u64 ref_root = 0;
    1656             : 
    1657    27150216 :         ref = btrfs_delayed_node_to_tree_ref(node);
    1658    27150216 :         trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
    1659             : 
    1660    27150247 :         if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
    1661    16188055 :                 parent = ref->parent;
    1662    27150247 :         ref_root = ref->root;
    1663             : 
    1664    27150247 :         if (node->ref_mod != 1) {
    1665           0 :                 btrfs_err(trans->fs_info,
    1666             :         "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
    1667             :                           node->bytenr, node->ref_mod, node->action, ref_root,
    1668             :                           parent);
    1669           0 :                 return -EIO;
    1670             :         }
    1671    27150247 :         if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
    1672     7331161 :                 BUG_ON(!extent_op || !extent_op->update_flags);
    1673     7331161 :                 ret = alloc_reserved_tree_block(trans, node, extent_op);
    1674    19819086 :         } else if (node->action == BTRFS_ADD_DELAYED_REF) {
    1675     6426212 :                 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
    1676     6426212 :                                              ref->level, 0, 1, extent_op);
    1677    13392874 :         } else if (node->action == BTRFS_DROP_DELAYED_REF) {
    1678    13392874 :                 ret = __btrfs_free_extent(trans, node, parent, ref_root,
    1679    13392874 :                                           ref->level, 0, 1, extent_op);
    1680             :         } else {
    1681           0 :                 BUG();
    1682             :         }
    1683             :         return ret;
    1684             : }
    1685             : 
    1686             : /* helper function to actually process a single delayed ref entry */
    1687    59924256 : static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
    1688             :                                struct btrfs_delayed_ref_node *node,
    1689             :                                struct btrfs_delayed_extent_op *extent_op,
    1690             :                                bool insert_reserved)
    1691             : {
    1692    59924256 :         int ret = 0;
    1693             : 
    1694    59924256 :         if (TRANS_ABORTED(trans)) {
    1695           0 :                 if (insert_reserved)
    1696           0 :                         btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
    1697           0 :                 return 0;
    1698             :         }
    1699             : 
    1700    59924256 :         if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
    1701             :             node->type == BTRFS_SHARED_BLOCK_REF_KEY)
    1702    27150228 :                 ret = run_delayed_tree_ref(trans, node, extent_op,
    1703             :                                            insert_reserved);
    1704    32774028 :         else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
    1705             :                  node->type == BTRFS_SHARED_DATA_REF_KEY)
    1706    32774028 :                 ret = run_delayed_data_ref(trans, node, extent_op,
    1707             :                                            insert_reserved);
    1708             :         else
    1709           0 :                 BUG();
    1710    59924300 :         if (ret && insert_reserved)
    1711           1 :                 btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
    1712    59924300 :         if (ret < 0)
    1713           1 :                 btrfs_err(trans->fs_info,
    1714             : "failed to run delayed ref for logical %llu num_bytes %llu type %u action %u ref_mod %d: %d",
    1715             :                           node->bytenr, node->num_bytes, node->type,
    1716             :                           node->action, node->ref_mod, ret);
    1717             :         return ret;
    1718             : }
    1719             : 
    1720             : static inline struct btrfs_delayed_ref_node *
    1721             : select_delayed_ref(struct btrfs_delayed_ref_head *head)
    1722             : {
    1723   108189323 :         struct btrfs_delayed_ref_node *ref;
    1724             : 
    1725   108189323 :         if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
    1726             :                 return NULL;
    1727             : 
    1728             :         /*
    1729             :          * Select a delayed ref of type BTRFS_ADD_DELAYED_REF first.
    1730             :          * This is to prevent a ref count from going down to zero, which deletes
    1731             :          * the extent item from the extent tree, when there still are references
    1732             :          * to add, which would fail because they would not find the extent item.
    1733             :          */
    1734    59924215 :         if (!list_empty(&head->ref_add_list))
    1735    38014914 :                 return list_first_entry(&head->ref_add_list,
    1736             :                                 struct btrfs_delayed_ref_node, add_list);
    1737             : 
    1738    21909301 :         ref = rb_entry(rb_first_cached(&head->ref_tree),
    1739             :                        struct btrfs_delayed_ref_node, ref_node);
    1740    21909301 :         ASSERT(list_empty(&ref->add_list));
    1741    21909301 :         return ref;
    1742             : }
    1743             : 
    1744          80 : static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
    1745             :                                       struct btrfs_delayed_ref_head *head)
    1746             : {
    1747          80 :         spin_lock(&delayed_refs->lock);
    1748          80 :         head->processing = false;
    1749          80 :         delayed_refs->num_heads_ready++;
    1750          80 :         spin_unlock(&delayed_refs->lock);
    1751          80 :         btrfs_delayed_ref_unlock(head);
    1752          80 : }
    1753             : 
    1754    48603924 : static struct btrfs_delayed_extent_op *cleanup_extent_op(
    1755             :                                 struct btrfs_delayed_ref_head *head)
    1756             : {
    1757    48603924 :         struct btrfs_delayed_extent_op *extent_op = head->extent_op;
    1758             : 
    1759    48603924 :         if (!extent_op)
    1760             :                 return NULL;
    1761             : 
    1762      378391 :         if (head->must_insert_reserved) {
    1763      371157 :                 head->extent_op = NULL;
    1764      371157 :                 btrfs_free_delayed_extent_op(extent_op);
    1765      371157 :                 return NULL;
    1766             :         }
    1767             :         return extent_op;
    1768             : }
    1769             : 
    1770    48265107 : static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
    1771             :                                      struct btrfs_delayed_ref_head *head)
    1772             : {
    1773    48265107 :         struct btrfs_delayed_extent_op *extent_op;
    1774    48265107 :         int ret;
    1775             : 
    1776    48265107 :         extent_op = cleanup_extent_op(head);
    1777    48265081 :         if (!extent_op)
    1778             :                 return 0;
    1779        7234 :         head->extent_op = NULL;
    1780        7234 :         spin_unlock(&head->lock);
    1781        7234 :         ret = run_delayed_extent_op(trans, head, extent_op);
    1782        7234 :         btrfs_free_delayed_extent_op(extent_op);
    1783        7234 :         return ret ? ret : 1;
    1784             : }
    1785             : 
    1786    48597070 : void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
    1787             :                                   struct btrfs_delayed_ref_root *delayed_refs,
    1788             :                                   struct btrfs_delayed_ref_head *head)
    1789             : {
    1790    48597070 :         int nr_items = 1;       /* Dropping this ref head update. */
    1791             : 
    1792             :         /*
    1793             :          * We had csum deletions accounted for in our delayed refs rsv, we need
    1794             :          * to drop the csum leaves for this update from our delayed_refs_rsv.
    1795             :          */
    1796    48597070 :         if (head->total_ref_mod < 0 && head->is_data) {
    1797     6806704 :                 spin_lock(&delayed_refs->lock);
    1798     6806704 :                 delayed_refs->pending_csums -= head->num_bytes;
    1799     6806704 :                 spin_unlock(&delayed_refs->lock);
    1800     6806704 :                 nr_items += btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes);
    1801             :         }
    1802             : 
    1803    48597070 :         btrfs_delayed_refs_rsv_release(fs_info, nr_items);
    1804    48597016 : }
    1805             : 
    1806    48265138 : static int cleanup_ref_head(struct btrfs_trans_handle *trans,
    1807             :                             struct btrfs_delayed_ref_head *head)
    1808             : {
    1809             : 
    1810    48265138 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1811    48265138 :         struct btrfs_delayed_ref_root *delayed_refs;
    1812    48265138 :         int ret;
    1813             : 
    1814    48265138 :         delayed_refs = &trans->transaction->delayed_refs;
    1815             : 
    1816    48265138 :         ret = run_and_cleanup_extent_op(trans, head);
    1817    48265071 :         if (ret < 0) {
    1818           0 :                 unselect_delayed_ref_head(delayed_refs, head);
    1819           0 :                 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
    1820           0 :                 return ret;
    1821    48265071 :         } else if (ret) {
    1822             :                 return ret;
    1823             :         }
    1824             : 
    1825             :         /*
    1826             :          * Need to drop our head ref lock and re-acquire the delayed ref lock
    1827             :          * and then re-check to make sure nobody got added.
    1828             :          */
    1829    48257820 :         spin_unlock(&head->lock);
    1830    48258090 :         spin_lock(&delayed_refs->lock);
    1831    48258264 :         spin_lock(&head->lock);
    1832    48258264 :         if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) {
    1833          13 :                 spin_unlock(&head->lock);
    1834          13 :                 spin_unlock(&delayed_refs->lock);
    1835          13 :                 return 1;
    1836             :         }
    1837    48258251 :         btrfs_delete_ref_head(delayed_refs, head);
    1838    48258251 :         spin_unlock(&head->lock);
    1839    48258250 :         spin_unlock(&delayed_refs->lock);
    1840             : 
    1841    48258251 :         if (head->must_insert_reserved) {
    1842     1673293 :                 btrfs_pin_extent(trans, head->bytenr, head->num_bytes, 1);
    1843     1673293 :                 if (head->is_data) {
    1844     1640931 :                         struct btrfs_root *csum_root;
    1845             : 
    1846     1640931 :                         csum_root = btrfs_csum_root(fs_info, head->bytenr);
    1847     1640931 :                         ret = btrfs_del_csums(trans, csum_root, head->bytenr,
    1848             :                                               head->num_bytes);
    1849             :                 }
    1850             :         }
    1851             : 
    1852    48258251 :         btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
    1853             : 
    1854    48258165 :         trace_run_delayed_ref_head(fs_info, head, 0);
    1855    48258127 :         btrfs_delayed_ref_unlock(head);
    1856    48258207 :         btrfs_put_delayed_ref_head(head);
    1857    48258207 :         return ret;
    1858             : }
    1859             : 
    1860    49264078 : static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
    1861             :                                         struct btrfs_trans_handle *trans)
    1862             : {
    1863    49264078 :         struct btrfs_delayed_ref_root *delayed_refs =
    1864    49264078 :                 &trans->transaction->delayed_refs;
    1865    49264078 :         struct btrfs_delayed_ref_head *head = NULL;
    1866    49264078 :         int ret;
    1867             : 
    1868    49264078 :         spin_lock(&delayed_refs->lock);
    1869    49265018 :         head = btrfs_select_ref_head(delayed_refs);
    1870    49265017 :         if (!head) {
    1871     1006687 :                 spin_unlock(&delayed_refs->lock);
    1872     1006687 :                 return head;
    1873             :         }
    1874             : 
    1875             :         /*
    1876             :          * Grab the lock that says we are going to process all the refs for
    1877             :          * this head
    1878             :          */
    1879    48258330 :         ret = btrfs_delayed_ref_lock(delayed_refs, head);
    1880    48258330 :         spin_unlock(&delayed_refs->lock);
    1881             : 
    1882             :         /*
    1883             :          * We may have dropped the spin lock to get the head mutex lock, and
    1884             :          * that might have given someone else time to free the head.  If that's
    1885             :          * true, it has been removed from our list and we can move on.
    1886             :          */
    1887    48258329 :         if (ret == -EAGAIN)
    1888           0 :                 head = ERR_PTR(-EAGAIN);
    1889             : 
    1890             :         return head;
    1891             : }
    1892             : 
    1893    48265471 : static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
    1894             :                                            struct btrfs_delayed_ref_head *locked_ref)
    1895             : {
    1896    48265471 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1897    48265471 :         struct btrfs_delayed_ref_root *delayed_refs;
    1898    48265471 :         struct btrfs_delayed_extent_op *extent_op;
    1899    48265471 :         struct btrfs_delayed_ref_node *ref;
    1900    48265471 :         bool must_insert_reserved;
    1901    48265471 :         int ret;
    1902             : 
    1903    48265471 :         delayed_refs = &trans->transaction->delayed_refs;
    1904             : 
    1905    48265471 :         lockdep_assert_held(&locked_ref->mutex);
    1906    48265471 :         lockdep_assert_held(&locked_ref->lock);
    1907             : 
    1908   168113538 :         while ((ref = select_delayed_ref(locked_ref))) {
    1909    60868627 :                 if (ref->seq &&
    1910      944411 :                     btrfs_check_delayed_seq(fs_info, ref->seq)) {
    1911          79 :                         spin_unlock(&locked_ref->lock);
    1912          79 :                         unselect_delayed_ref_head(delayed_refs, locked_ref);
    1913          79 :                         return -EAGAIN;
    1914             :                 }
    1915             : 
    1916    59924137 :                 rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
    1917    59923949 :                 RB_CLEAR_NODE(&ref->ref_node);
    1918    59923949 :                 if (!list_empty(&ref->add_list))
    1919    38014724 :                         list_del(&ref->add_list);
    1920             :                 /*
    1921             :                  * When we play the delayed ref, also correct the ref_mod on
    1922             :                  * head
    1923             :                  */
    1924    59923925 :                 switch (ref->action) {
    1925    38014695 :                 case BTRFS_ADD_DELAYED_REF:
    1926             :                 case BTRFS_ADD_DELAYED_EXTENT:
    1927    38014695 :                         locked_ref->ref_mod -= ref->ref_mod;
    1928    38014695 :                         break;
    1929    21909230 :                 case BTRFS_DROP_DELAYED_REF:
    1930    21909230 :                         locked_ref->ref_mod += ref->ref_mod;
    1931    21909230 :                         break;
    1932             :                 default:
    1933           0 :                         WARN_ON(1);
    1934             :                 }
    1935    59923925 :                 atomic_dec(&delayed_refs->num_entries);
    1936             : 
    1937             :                 /*
    1938             :                  * Record the must_insert_reserved flag before we drop the
    1939             :                  * spin lock.
    1940             :                  */
    1941    59924229 :                 must_insert_reserved = locked_ref->must_insert_reserved;
    1942    59924229 :                 locked_ref->must_insert_reserved = false;
    1943             : 
    1944    59924229 :                 extent_op = locked_ref->extent_op;
    1945    59924229 :                 locked_ref->extent_op = NULL;
    1946    59924229 :                 spin_unlock(&locked_ref->lock);
    1947             : 
    1948    59924284 :                 ret = run_one_delayed_ref(trans, ref, extent_op,
    1949             :                                           must_insert_reserved);
    1950             : 
    1951    59924204 :                 btrfs_free_delayed_extent_op(extent_op);
    1952    59924178 :                 if (ret) {
    1953           1 :                         unselect_delayed_ref_head(delayed_refs, locked_ref);
    1954           1 :                         btrfs_put_delayed_ref(ref);
    1955           1 :                         return ret;
    1956             :                 }
    1957             : 
    1958    59924177 :                 btrfs_put_delayed_ref(ref);
    1959    59924251 :                 cond_resched();
    1960             : 
    1961    59924198 :                 spin_lock(&locked_ref->lock);
    1962    59924078 :                 btrfs_merge_delayed_refs(fs_info, delayed_refs, locked_ref);
    1963             :         }
    1964             : 
    1965             :         return 0;
    1966             : }
    1967             : 
    1968             : /*
    1969             :  * Returns 0 on success or if called with an already aborted transaction.
    1970             :  * Returns -ENOMEM or -EIO on failure and will abort the transaction.
    1971             :  */
    1972     4375260 : static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
    1973             :                                              unsigned long nr)
    1974             : {
    1975     4375260 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1976     4375260 :         struct btrfs_delayed_ref_root *delayed_refs;
    1977     4375260 :         struct btrfs_delayed_ref_head *locked_ref = NULL;
    1978     4375260 :         int ret;
    1979     4375260 :         unsigned long count = 0;
    1980             : 
    1981     4375260 :         delayed_refs = &trans->transaction->delayed_refs;
    1982    49271607 :         do {
    1983    49271607 :                 if (!locked_ref) {
    1984    49264337 :                         locked_ref = btrfs_obtain_ref_head(trans);
    1985    97523345 :                         if (IS_ERR_OR_NULL(locked_ref)) {
    1986     1006687 :                                 if (PTR_ERR(locked_ref) == -EAGAIN) {
    1987           0 :                                         continue;
    1988             :                                 } else {
    1989             :                                         break;
    1990             :                                 }
    1991             :                         }
    1992    48258329 :                         count++;
    1993             :                 }
    1994             :                 /*
    1995             :                  * We need to try and merge add/drops of the same ref since we
    1996             :                  * can run into issues with relocate dropping the implicit ref
    1997             :                  * and then it being added back again before the drop can
    1998             :                  * finish.  If we merged anything we need to re-loop so we can
    1999             :                  * get a good ref.
    2000             :                  * Or we can get node references of the same type that weren't
    2001             :                  * merged when created due to bumps in the tree mod seq, and
    2002             :                  * we need to merge them to prevent adding an inline extent
    2003             :                  * backref before dropping it (triggering a BUG_ON at
    2004             :                  * insert_inline_extent_backref()).
    2005             :                  */
    2006    48265599 :                 spin_lock(&locked_ref->lock);
    2007    48265554 :                 btrfs_merge_delayed_refs(fs_info, delayed_refs, locked_ref);
    2008             : 
    2009    48265469 :                 ret = btrfs_run_delayed_refs_for_head(trans, locked_ref);
    2010    48265211 :                 if (ret < 0 && ret != -EAGAIN) {
    2011             :                         /*
    2012             :                          * Error, btrfs_run_delayed_refs_for_head already
    2013             :                          * unlocked everything so just bail out
    2014             :                          */
    2015           1 :                         return ret;
    2016    48265210 :                 } else if (!ret) {
    2017             :                         /*
    2018             :                          * Success, perform the usual cleanup of a processed
    2019             :                          * head
    2020             :                          */
    2021    48265131 :                         ret = cleanup_ref_head(trans, locked_ref);
    2022    48264764 :                         if (ret > 0 ) {
    2023             :                                 /* We dropped our lock, we need to loop. */
    2024        7247 :                                 ret = 0;
    2025        7247 :                                 continue;
    2026    48257517 :                         } else if (ret) {
    2027           0 :                                 return ret;
    2028             :                         }
    2029             :                 }
    2030             : 
    2031             :                 /*
    2032             :                  * Either success case or btrfs_run_delayed_refs_for_head
    2033             :                  * returned -EAGAIN, meaning we need to select another head
    2034             :                  */
    2035             : 
    2036    48257596 :                 locked_ref = NULL;
    2037    48257596 :                 cond_resched();
    2038    48264922 :         } while ((nr != -1 && count < nr) || locked_ref);
    2039             : 
    2040             :         return 0;
    2041             : }
    2042             : 
    2043             : #ifdef SCRAMBLE_DELAYED_REFS
    2044             : /*
    2045             :  * Normally delayed refs get processed in ascending bytenr order. This
    2046             :  * correlates in most cases to the order added. To expose dependencies on this
    2047             :  * order, we start to process the tree in the middle instead of the beginning
    2048             :  */
    2049             : static u64 find_middle(struct rb_root *root)
    2050             : {
    2051             :         struct rb_node *n = root->rb_node;
    2052             :         struct btrfs_delayed_ref_node *entry;
    2053             :         int alt = 1;
    2054             :         u64 middle;
    2055             :         u64 first = 0, last = 0;
    2056             : 
    2057             :         n = rb_first(root);
    2058             :         if (n) {
    2059             :                 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
    2060             :                 first = entry->bytenr;
    2061             :         }
    2062             :         n = rb_last(root);
    2063             :         if (n) {
    2064             :                 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
    2065             :                 last = entry->bytenr;
    2066             :         }
    2067             :         n = root->rb_node;
    2068             : 
    2069             :         while (n) {
    2070             :                 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
    2071             :                 WARN_ON(!entry->in_tree);
    2072             : 
    2073             :                 middle = entry->bytenr;
    2074             : 
    2075             :                 if (alt)
    2076             :                         n = n->rb_left;
    2077             :                 else
    2078             :                         n = n->rb_right;
    2079             : 
    2080             :                 alt = 1 - alt;
    2081             :         }
    2082             :         return middle;
    2083             : }
    2084             : #endif
    2085             : 
    2086             : /*
    2087             :  * this starts processing the delayed reference count updates and
    2088             :  * extent insertions we have queued up so far.  count can be
    2089             :  * 0, which means to process everything in the tree at the start
    2090             :  * of the run (but not newly added entries), or it can be some target
    2091             :  * number you'd like to process.
    2092             :  *
    2093             :  * Returns 0 on success or if called with an aborted transaction
    2094             :  * Returns <0 on error and aborts the transaction
    2095             :  */
    2096     2400106 : int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
    2097             :                            unsigned long count)
    2098             : {
    2099     2400106 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    2100     2400106 :         struct rb_node *node;
    2101     2400106 :         struct btrfs_delayed_ref_root *delayed_refs;
    2102     2400106 :         struct btrfs_delayed_ref_head *head;
    2103     2400106 :         int ret;
    2104     2400106 :         int run_all = count == (unsigned long)-1;
    2105             : 
    2106             :         /* We'll clean this up in btrfs_cleanup_transaction */
    2107     2400106 :         if (TRANS_ABORTED(trans))
    2108             :                 return 0;
    2109             : 
    2110     4800210 :         if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
    2111             :                 return 0;
    2112             : 
    2113     2400105 :         delayed_refs = &trans->transaction->delayed_refs;
    2114     2400105 :         if (count == 0)
    2115      591385 :                 count = delayed_refs->num_heads_ready;
    2116             : 
    2117     2400105 : again:
    2118             : #ifdef SCRAMBLE_DELAYED_REFS
    2119             :         delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
    2120             : #endif
    2121     4375259 :         ret = __btrfs_run_delayed_refs(trans, count);
    2122     4375263 :         if (ret < 0) {
    2123           1 :                 btrfs_abort_transaction(trans, ret);
    2124           1 :                 return ret;
    2125             :         }
    2126             : 
    2127     4375262 :         if (run_all) {
    2128     2868867 :                 btrfs_create_pending_block_groups(trans);
    2129             : 
    2130     2868867 :                 spin_lock(&delayed_refs->lock);
    2131     2868867 :                 node = rb_first_cached(&delayed_refs->href_root);
    2132     2868867 :                 if (!node) {
    2133      893713 :                         spin_unlock(&delayed_refs->lock);
    2134      893713 :                         goto out;
    2135             :                 }
    2136     1975154 :                 head = rb_entry(node, struct btrfs_delayed_ref_head,
    2137             :                                 href_node);
    2138     1975154 :                 refcount_inc(&head->refs);
    2139     1975154 :                 spin_unlock(&delayed_refs->lock);
    2140             : 
    2141             :                 /* Mutex was contended, block until it's released and retry. */
    2142     1975154 :                 mutex_lock(&head->mutex);
    2143     1975154 :                 mutex_unlock(&head->mutex);
    2144             : 
    2145     1975154 :                 btrfs_put_delayed_ref_head(head);
    2146     1975154 :                 cond_resched();
    2147     1975154 :                 goto again;
    2148             :         }
    2149     1506395 : out:
    2150             :         return 0;
    2151             : }
    2152             : 
    2153       71699 : int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
    2154             :                                 struct extent_buffer *eb, u64 flags)
    2155             : {
    2156       71699 :         struct btrfs_delayed_extent_op *extent_op;
    2157       71699 :         int level = btrfs_header_level(eb);
    2158       71699 :         int ret;
    2159             : 
    2160       71699 :         extent_op = btrfs_alloc_delayed_extent_op();
    2161       71699 :         if (!extent_op)
    2162             :                 return -ENOMEM;
    2163             : 
    2164       71699 :         extent_op->flags_to_set = flags;
    2165       71699 :         extent_op->update_flags = true;
    2166       71699 :         extent_op->update_key = false;
    2167       71699 :         extent_op->level = level;
    2168             : 
    2169       71699 :         ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len, extent_op);
    2170       71699 :         if (ret)
    2171           0 :                 btrfs_free_delayed_extent_op(extent_op);
    2172             :         return ret;
    2173             : }
    2174             : 
    2175      355897 : static noinline int check_delayed_ref(struct btrfs_root *root,
    2176             :                                       struct btrfs_path *path,
    2177             :                                       u64 objectid, u64 offset, u64 bytenr)
    2178             : {
    2179      355897 :         struct btrfs_delayed_ref_head *head;
    2180      355897 :         struct btrfs_delayed_ref_node *ref;
    2181      355897 :         struct btrfs_delayed_data_ref *data_ref;
    2182      355897 :         struct btrfs_delayed_ref_root *delayed_refs;
    2183      355897 :         struct btrfs_transaction *cur_trans;
    2184      355897 :         struct rb_node *node;
    2185      355897 :         int ret = 0;
    2186             : 
    2187      355897 :         spin_lock(&root->fs_info->trans_lock);
    2188      355899 :         cur_trans = root->fs_info->running_transaction;
    2189      355899 :         if (cur_trans)
    2190      354217 :                 refcount_inc(&cur_trans->use_count);
    2191      355899 :         spin_unlock(&root->fs_info->trans_lock);
    2192      355899 :         if (!cur_trans)
    2193             :                 return 0;
    2194             : 
    2195      354217 :         delayed_refs = &cur_trans->delayed_refs;
    2196      354217 :         spin_lock(&delayed_refs->lock);
    2197      354217 :         head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
    2198      354217 :         if (!head) {
    2199       60860 :                 spin_unlock(&delayed_refs->lock);
    2200       60860 :                 btrfs_put_transaction(cur_trans);
    2201       60860 :                 return 0;
    2202             :         }
    2203             : 
    2204      293357 :         if (!mutex_trylock(&head->mutex)) {
    2205           0 :                 if (path->nowait) {
    2206           0 :                         spin_unlock(&delayed_refs->lock);
    2207           0 :                         btrfs_put_transaction(cur_trans);
    2208           0 :                         return -EAGAIN;
    2209             :                 }
    2210             : 
    2211           0 :                 refcount_inc(&head->refs);
    2212           0 :                 spin_unlock(&delayed_refs->lock);
    2213             : 
    2214           0 :                 btrfs_release_path(path);
    2215             : 
    2216             :                 /*
    2217             :                  * Mutex was contended, block until it's released and let
    2218             :                  * caller try again
    2219             :                  */
    2220           0 :                 mutex_lock(&head->mutex);
    2221           0 :                 mutex_unlock(&head->mutex);
    2222           0 :                 btrfs_put_delayed_ref_head(head);
    2223           0 :                 btrfs_put_transaction(cur_trans);
    2224           0 :                 return -EAGAIN;
    2225             :         }
    2226      293357 :         spin_unlock(&delayed_refs->lock);
    2227             : 
    2228      293357 :         spin_lock(&head->lock);
    2229             :         /*
    2230             :          * XXX: We should replace this with a proper search function in the
    2231             :          * future.
    2232             :          */
    2233      549106 :         for (node = rb_first_cached(&head->ref_tree); node;
    2234      255749 :              node = rb_next(node)) {
    2235      319980 :                 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
    2236             :                 /* If it's a shared ref we know a cross reference exists */
    2237      319980 :                 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
    2238             :                         ret = 1;
    2239             :                         break;
    2240             :                 }
    2241             : 
    2242      319980 :                 data_ref = btrfs_delayed_node_to_data_ref(ref);
    2243             : 
    2244             :                 /*
    2245             :                  * If our ref doesn't match the one we're currently looking at
    2246             :                  * then we have a cross reference.
    2247             :                  */
    2248      319980 :                 if (data_ref->root != root->root_key.objectid ||
    2249      319980 :                     data_ref->objectid != objectid ||
    2250      319905 :                     data_ref->offset != offset) {
    2251             :                         ret = 1;
    2252             :                         break;
    2253             :                 }
    2254             :         }
    2255      293357 :         spin_unlock(&head->lock);
    2256      293357 :         mutex_unlock(&head->mutex);
    2257      293357 :         btrfs_put_transaction(cur_trans);
    2258      293357 :         return ret;
    2259             : }
    2260             : 
    2261      361234 : static noinline int check_committed_ref(struct btrfs_root *root,
    2262             :                                         struct btrfs_path *path,
    2263             :                                         u64 objectid, u64 offset, u64 bytenr,
    2264             :                                         bool strict)
    2265             : {
    2266      361234 :         struct btrfs_fs_info *fs_info = root->fs_info;
    2267      361234 :         struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bytenr);
    2268      361236 :         struct extent_buffer *leaf;
    2269      361236 :         struct btrfs_extent_data_ref *ref;
    2270      361236 :         struct btrfs_extent_inline_ref *iref;
    2271      361236 :         struct btrfs_extent_item *ei;
    2272      361236 :         struct btrfs_key key;
    2273      361236 :         u32 item_size;
    2274      361236 :         int type;
    2275      361236 :         int ret;
    2276             : 
    2277      361236 :         key.objectid = bytenr;
    2278      361236 :         key.offset = (u64)-1;
    2279      361236 :         key.type = BTRFS_EXTENT_ITEM_KEY;
    2280             : 
    2281      361236 :         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
    2282      361236 :         if (ret < 0)
    2283           0 :                 goto out;
    2284      361236 :         BUG_ON(ret == 0); /* Corruption */
    2285             : 
    2286      361236 :         ret = -ENOENT;
    2287      361236 :         if (path->slots[0] == 0)
    2288        2220 :                 goto out;
    2289             : 
    2290      359016 :         path->slots[0]--;
    2291      359016 :         leaf = path->nodes[0];
    2292      359016 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    2293             : 
    2294      359017 :         if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
    2295      259917 :                 goto out;
    2296             : 
    2297       99100 :         ret = 1;
    2298       99100 :         item_size = btrfs_item_size(leaf, path->slots[0]);
    2299       99100 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    2300             : 
    2301             :         /* If extent item has more than 1 inline ref then it's shared */
    2302       99099 :         if (item_size != sizeof(*ei) +
    2303             :             btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
    2304        5227 :                 goto out;
    2305             : 
    2306             :         /*
    2307             :          * If extent created before last snapshot => it's shared unless the
    2308             :          * snapshot has been deleted. Use the heuristic if strict is false.
    2309             :          */
    2310      187730 :         if (!strict &&
    2311             :             (btrfs_extent_generation(leaf, ei) <=
    2312             :              btrfs_root_last_snapshot(&root->root_item)))
    2313           0 :                 goto out;
    2314             : 
    2315       93872 :         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
    2316             : 
    2317             :         /* If this extent has SHARED_DATA_REF then it's shared */
    2318       93872 :         type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
    2319       93871 :         if (type != BTRFS_EXTENT_DATA_REF_KEY)
    2320           0 :                 goto out;
    2321             : 
    2322       93871 :         ref = (struct btrfs_extent_data_ref *)(&iref->offset);
    2323       93871 :         if (btrfs_extent_refs(leaf, ei) !=
    2324       93870 :             btrfs_extent_data_ref_count(leaf, ref) ||
    2325             :             btrfs_extent_data_ref_root(leaf, ref) !=
    2326      187742 :             root->root_key.objectid ||
    2327       93849 :             btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
    2328             :             btrfs_extent_data_ref_offset(leaf, ref) != offset)
    2329         111 :                 goto out;
    2330             : 
    2331             :         ret = 0;
    2332      361236 : out:
    2333      361236 :         return ret;
    2334             : }
    2335             : 
    2336      361234 : int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
    2337             :                           u64 bytenr, bool strict, struct btrfs_path *path)
    2338             : {
    2339      361234 :         int ret;
    2340             : 
    2341      361234 :         do {
    2342      361234 :                 ret = check_committed_ref(root, path, objectid,
    2343             :                                           offset, bytenr, strict);
    2344      361236 :                 if (ret && ret != -ENOENT)
    2345        5338 :                         goto out;
    2346             : 
    2347      355898 :                 ret = check_delayed_ref(root, path, objectid, offset, bytenr);
    2348      355899 :         } while (ret == -EAGAIN);
    2349             : 
    2350      355899 : out:
    2351      361237 :         btrfs_release_path(path);
    2352      361237 :         if (btrfs_is_data_reloc_root(root))
    2353       22660 :                 WARN_ON(ret > 0);
    2354      361237 :         return ret;
    2355             : }
    2356             : 
    2357     3801480 : static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
    2358             :                            struct btrfs_root *root,
    2359             :                            struct extent_buffer *buf,
    2360             :                            int full_backref, int inc)
    2361             : {
    2362     3801480 :         struct btrfs_fs_info *fs_info = root->fs_info;
    2363     3801480 :         u64 bytenr;
    2364     3801480 :         u64 num_bytes;
    2365     3801480 :         u64 parent;
    2366     3801480 :         u64 ref_root;
    2367     3801480 :         u32 nritems;
    2368     3801480 :         struct btrfs_key key;
    2369     3801480 :         struct btrfs_file_extent_item *fi;
    2370     3801480 :         struct btrfs_ref generic_ref = { 0 };
    2371     3801480 :         bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC);
    2372     3801480 :         int i;
    2373     3801480 :         int action;
    2374     3801480 :         int level;
    2375     3801480 :         int ret = 0;
    2376             : 
    2377     3801480 :         if (btrfs_is_testing(fs_info))
    2378             :                 return 0;
    2379             : 
    2380     3801480 :         ref_root = btrfs_header_owner(buf);
    2381     3801480 :         nritems = btrfs_header_nritems(buf);
    2382     3801480 :         level = btrfs_header_level(buf);
    2383             : 
    2384     3801480 :         if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && level == 0)
    2385             :                 return 0;
    2386             : 
    2387     3801480 :         if (full_backref)
    2388     3766605 :                 parent = buf->start;
    2389             :         else
    2390             :                 parent = 0;
    2391     3801480 :         if (inc)
    2392             :                 action = BTRFS_ADD_DELAYED_REF;
    2393             :         else
    2394     1872726 :                 action = BTRFS_DROP_DELAYED_REF;
    2395             : 
    2396    91714074 :         for (i = 0; i < nritems; i++) {
    2397    87912594 :                 if (level == 0) {
    2398    81188489 :                         btrfs_item_key_to_cpu(buf, &key, i);
    2399    81188586 :                         if (key.type != BTRFS_EXTENT_DATA_KEY)
    2400    64564499 :                                 continue;
    2401    16624087 :                         fi = btrfs_item_ptr(buf, i,
    2402             :                                             struct btrfs_file_extent_item);
    2403    16624080 :                         if (btrfs_file_extent_type(buf, fi) ==
    2404             :                             BTRFS_FILE_EXTENT_INLINE)
    2405      130282 :                                 continue;
    2406    16493799 :                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
    2407    16493797 :                         if (bytenr == 0)
    2408        3195 :                                 continue;
    2409             : 
    2410    16490602 :                         num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
    2411    16490606 :                         key.offset -= btrfs_file_extent_offset(buf, fi);
    2412    16490606 :                         btrfs_init_generic_ref(&generic_ref, action, bytenr,
    2413             :                                                num_bytes, parent);
    2414    16490606 :                         btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
    2415             :                                             key.offset, root->root_key.objectid,
    2416             :                                             for_reloc);
    2417    16490613 :                         if (inc)
    2418     9023206 :                                 ret = btrfs_inc_extent_ref(trans, &generic_ref);
    2419             :                         else
    2420     7467407 :                                 ret = btrfs_free_extent(trans, &generic_ref);
    2421    16490621 :                         if (ret)
    2422           0 :                                 goto fail;
    2423             :                 } else {
    2424     6724105 :                         bytenr = btrfs_node_blockptr(buf, i);
    2425     6724105 :                         num_bytes = fs_info->nodesize;
    2426     6724105 :                         btrfs_init_generic_ref(&generic_ref, action, bytenr,
    2427             :                                                num_bytes, parent);
    2428     6724105 :                         btrfs_init_tree_ref(&generic_ref, level - 1, ref_root,
    2429             :                                             root->root_key.objectid, for_reloc);
    2430     6724105 :                         if (inc)
    2431     6583821 :                                 ret = btrfs_inc_extent_ref(trans, &generic_ref);
    2432             :                         else
    2433      140284 :                                 ret = btrfs_free_extent(trans, &generic_ref);
    2434     6723997 :                         if (ret)
    2435           0 :                                 goto fail;
    2436             :                 }
    2437             :         }
    2438             :         return 0;
    2439             : fail:
    2440             :         return ret;
    2441             : }
    2442             : 
    2443     1928754 : int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
    2444             :                   struct extent_buffer *buf, int full_backref)
    2445             : {
    2446     1928754 :         return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
    2447             : }
    2448             : 
    2449       33901 : int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
    2450             :                   struct extent_buffer *buf, int full_backref)
    2451             : {
    2452       33901 :         return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
    2453             : }
    2454             : 
    2455    12354834 : static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
    2456             : {
    2457    12354834 :         struct btrfs_fs_info *fs_info = root->fs_info;
    2458    12354834 :         u64 flags;
    2459    12354834 :         u64 ret;
    2460             : 
    2461    12354834 :         if (data)
    2462             :                 flags = BTRFS_BLOCK_GROUP_DATA;
    2463     8795172 :         else if (root == fs_info->chunk_root)
    2464             :                 flags = BTRFS_BLOCK_GROUP_SYSTEM;
    2465             :         else
    2466     8793761 :                 flags = BTRFS_BLOCK_GROUP_METADATA;
    2467             : 
    2468    12354834 :         ret = btrfs_get_alloc_profile(fs_info, flags);
    2469    12355313 :         return ret;
    2470             : }
    2471             : 
    2472    12358356 : static u64 first_logical_byte(struct btrfs_fs_info *fs_info)
    2473             : {
    2474    12358356 :         struct rb_node *leftmost;
    2475    12358356 :         u64 bytenr = 0;
    2476             : 
    2477    12358356 :         read_lock(&fs_info->block_group_cache_lock);
    2478             :         /* Get the block group with the lowest logical start address. */
    2479    12358675 :         leftmost = rb_first_cached(&fs_info->block_group_cache_tree);
    2480    12358675 :         if (leftmost) {
    2481    12358675 :                 struct btrfs_block_group *bg;
    2482             : 
    2483    12358675 :                 bg = rb_entry(leftmost, struct btrfs_block_group, cache_node);
    2484    12358675 :                 bytenr = bg->start;
    2485             :         }
    2486    12358675 :         read_unlock(&fs_info->block_group_cache_lock);
    2487             : 
    2488    12358958 :         return bytenr;
    2489             : }
    2490             : 
    2491     3069205 : static int pin_down_extent(struct btrfs_trans_handle *trans,
    2492             :                            struct btrfs_block_group *cache,
    2493             :                            u64 bytenr, u64 num_bytes, int reserved)
    2494             : {
    2495     3069205 :         struct btrfs_fs_info *fs_info = cache->fs_info;
    2496             : 
    2497     3069205 :         spin_lock(&cache->space_info->lock);
    2498     3069230 :         spin_lock(&cache->lock);
    2499     3069229 :         cache->pinned += num_bytes;
    2500     3069229 :         btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info,
    2501             :                                              num_bytes);
    2502     3069223 :         if (reserved) {
    2503     3064252 :                 cache->reserved -= num_bytes;
    2504     3064252 :                 cache->space_info->bytes_reserved -= num_bytes;
    2505             :         }
    2506     3069223 :         spin_unlock(&cache->lock);
    2507     3069234 :         spin_unlock(&cache->space_info->lock);
    2508             : 
    2509     3069233 :         set_extent_bit(&trans->transaction->pinned_extents, bytenr,
    2510     3069233 :                        bytenr + num_bytes - 1, EXTENT_DIRTY, NULL);
    2511     3069236 :         return 0;
    2512             : }
    2513             : 
    2514     1673294 : int btrfs_pin_extent(struct btrfs_trans_handle *trans,
    2515             :                      u64 bytenr, u64 num_bytes, int reserved)
    2516             : {
    2517     1673294 :         struct btrfs_block_group *cache;
    2518             : 
    2519     1673294 :         cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
    2520     1673294 :         BUG_ON(!cache); /* Logic error */
    2521             : 
    2522     1673294 :         pin_down_extent(trans, cache, bytenr, num_bytes, reserved);
    2523             : 
    2524     1673294 :         btrfs_put_block_group(cache);
    2525     1673294 :         return 0;
    2526             : }
    2527             : 
    2528             : /*
    2529             :  * this function must be called within transaction
    2530             :  */
    2531        4971 : int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
    2532             :                                     u64 bytenr, u64 num_bytes)
    2533             : {
    2534        4971 :         struct btrfs_block_group *cache;
    2535        4971 :         int ret;
    2536             : 
    2537        4971 :         cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
    2538        4971 :         if (!cache)
    2539             :                 return -EINVAL;
    2540             : 
    2541             :         /*
    2542             :          * Fully cache the free space first so that our pin removes the free space
    2543             :          * from the cache.
    2544             :          */
    2545        4971 :         ret = btrfs_cache_block_group(cache, true);
    2546        4971 :         if (ret)
    2547           0 :                 goto out;
    2548             : 
    2549        4971 :         pin_down_extent(trans, cache, bytenr, num_bytes, 0);
    2550             : 
    2551             :         /* remove us from the free space cache (if we're there at all) */
    2552        4971 :         ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
    2553        4971 : out:
    2554        4971 :         btrfs_put_block_group(cache);
    2555        4971 :         return ret;
    2556             : }
    2557             : 
    2558       76673 : static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
    2559             :                                    u64 start, u64 num_bytes)
    2560             : {
    2561       76673 :         int ret;
    2562       76673 :         struct btrfs_block_group *block_group;
    2563             : 
    2564       76673 :         block_group = btrfs_lookup_block_group(fs_info, start);
    2565       76673 :         if (!block_group)
    2566             :                 return -EINVAL;
    2567             : 
    2568       76673 :         ret = btrfs_cache_block_group(block_group, true);
    2569       76673 :         if (ret)
    2570           0 :                 goto out;
    2571             : 
    2572       76673 :         ret = btrfs_remove_free_space(block_group, start, num_bytes);
    2573       76673 : out:
    2574       76673 :         btrfs_put_block_group(block_group);
    2575       76673 :         return ret;
    2576             : }
    2577             : 
    2578        4431 : int btrfs_exclude_logged_extents(struct extent_buffer *eb)
    2579             : {
    2580        4431 :         struct btrfs_fs_info *fs_info = eb->fs_info;
    2581        4431 :         struct btrfs_file_extent_item *item;
    2582        4431 :         struct btrfs_key key;
    2583        4431 :         int found_type;
    2584        4431 :         int i;
    2585        4431 :         int ret = 0;
    2586             : 
    2587        4431 :         if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
    2588             :                 return 0;
    2589             : 
    2590           0 :         for (i = 0; i < btrfs_header_nritems(eb); i++) {
    2591           0 :                 btrfs_item_key_to_cpu(eb, &key, i);
    2592           0 :                 if (key.type != BTRFS_EXTENT_DATA_KEY)
    2593           0 :                         continue;
    2594           0 :                 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
    2595           0 :                 found_type = btrfs_file_extent_type(eb, item);
    2596           0 :                 if (found_type == BTRFS_FILE_EXTENT_INLINE)
    2597           0 :                         continue;
    2598           0 :                 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
    2599           0 :                         continue;
    2600           0 :                 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
    2601           0 :                 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
    2602           0 :                 ret = __exclude_logged_extent(fs_info, key.objectid, key.offset);
    2603           0 :                 if (ret)
    2604             :                         break;
    2605             :         }
    2606             : 
    2607             :         return ret;
    2608             : }
    2609             : 
    2610             : static void
    2611             : btrfs_inc_block_group_reservations(struct btrfs_block_group *bg)
    2612             : {
    2613    12355409 :         atomic_inc(&bg->reservations);
    2614             : }
    2615             : 
    2616             : /*
    2617             :  * Returns the free cluster for the given space info and sets empty_cluster to
    2618             :  * what it should be based on the mount options.
    2619             :  */
    2620             : static struct btrfs_free_cluster *
    2621    16012921 : fetch_cluster_info(struct btrfs_fs_info *fs_info,
    2622             :                    struct btrfs_space_info *space_info, u64 *empty_cluster)
    2623             : {
    2624    16012921 :         struct btrfs_free_cluster *ret = NULL;
    2625             : 
    2626    16012921 :         *empty_cluster = 0;
    2627    16012921 :         if (btrfs_mixed_space_info(space_info))
    2628             :                 return ret;
    2629             : 
    2630    15422031 :         if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
    2631    11779375 :                 ret = &fs_info->meta_alloc_cluster;
    2632    11779375 :                 if (btrfs_test_opt(fs_info, SSD))
    2633        6579 :                         *empty_cluster = SZ_2M;
    2634             :                 else
    2635    11772796 :                         *empty_cluster = SZ_64K;
    2636     3642656 :         } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
    2637     3639837 :                    btrfs_test_opt(fs_info, SSD_SPREAD)) {
    2638           0 :                 *empty_cluster = SZ_2M;
    2639           0 :                 ret = &fs_info->data_alloc_cluster;
    2640             :         }
    2641             : 
    2642             :         return ret;
    2643             : }
    2644             : 
    2645     3654276 : static int unpin_extent_range(struct btrfs_fs_info *fs_info,
    2646             :                               u64 start, u64 end,
    2647             :                               const bool return_free_space)
    2648             : {
    2649     3654276 :         struct btrfs_block_group *cache = NULL;
    2650     3654276 :         struct btrfs_space_info *space_info;
    2651     3654276 :         struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
    2652     3654276 :         struct btrfs_free_cluster *cluster = NULL;
    2653     3654276 :         u64 len;
    2654     3654276 :         u64 total_unpinned = 0;
    2655     3654276 :         u64 empty_cluster = 0;
    2656     3654276 :         bool readonly;
    2657             : 
    2658     7309036 :         while (start <= end) {
    2659     3654760 :                 readonly = false;
    2660     3654760 :                 if (!cache ||
    2661         484 :                     start >= cache->start + cache->length) {
    2662     3654760 :                         if (cache)
    2663         484 :                                 btrfs_put_block_group(cache);
    2664     3654760 :                         total_unpinned = 0;
    2665     3654760 :                         cache = btrfs_lookup_block_group(fs_info, start);
    2666     3654760 :                         BUG_ON(!cache); /* Logic error */
    2667             : 
    2668     3654760 :                         cluster = fetch_cluster_info(fs_info,
    2669             :                                                      cache->space_info,
    2670             :                                                      &empty_cluster);
    2671     3654760 :                         empty_cluster <<= 1;
    2672             :                 }
    2673             : 
    2674     3654760 :                 len = cache->start + cache->length - start;
    2675     3654760 :                 len = min(len, end + 1 - start);
    2676             : 
    2677     3654760 :                 if (return_free_space)
    2678     3654616 :                         btrfs_add_free_space(cache, start, len);
    2679             : 
    2680     3654760 :                 start += len;
    2681     3654760 :                 total_unpinned += len;
    2682     3654760 :                 space_info = cache->space_info;
    2683             : 
    2684             :                 /*
    2685             :                  * If this space cluster has been marked as fragmented and we've
    2686             :                  * unpinned enough in this block group to potentially allow a
    2687             :                  * cluster to be created inside of it go ahead and clear the
    2688             :                  * fragmented check.
    2689             :                  */
    2690     3654760 :                 if (cluster && cluster->fragmented &&
    2691           0 :                     total_unpinned > empty_cluster) {
    2692           0 :                         spin_lock(&cluster->lock);
    2693           0 :                         cluster->fragmented = 0;
    2694           0 :                         spin_unlock(&cluster->lock);
    2695             :                 }
    2696             : 
    2697     3654760 :                 spin_lock(&space_info->lock);
    2698     3654760 :                 spin_lock(&cache->lock);
    2699     3654760 :                 cache->pinned -= len;
    2700     3654760 :                 btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
    2701     3654760 :                 space_info->max_extent_size = 0;
    2702     3654760 :                 if (cache->ro) {
    2703       55494 :                         space_info->bytes_readonly += len;
    2704       55494 :                         readonly = true;
    2705             :                 } else if (btrfs_is_zoned(fs_info)) {
    2706             :                         /* Need reset before reusing in a zoned block group */
    2707             :                         space_info->bytes_zone_unusable += len;
    2708             :                         readonly = true;
    2709             :                 }
    2710     3654760 :                 spin_unlock(&cache->lock);
    2711     3654760 :                 if (!readonly && return_free_space &&
    2712     3599122 :                     global_rsv->space_info == space_info) {
    2713     3438369 :                         spin_lock(&global_rsv->lock);
    2714     3438369 :                         if (!global_rsv->full) {
    2715      291299 :                                 u64 to_add = min(len, global_rsv->size -
    2716             :                                                       global_rsv->reserved);
    2717             : 
    2718      291299 :                                 global_rsv->reserved += to_add;
    2719      291299 :                                 btrfs_space_info_update_bytes_may_use(fs_info,
    2720             :                                                 space_info, to_add);
    2721      291299 :                                 if (global_rsv->reserved >= global_rsv->size)
    2722      142810 :                                         global_rsv->full = 1;
    2723      291299 :                                 len -= to_add;
    2724             :                         }
    2725     3438369 :                         spin_unlock(&global_rsv->lock);
    2726             :                 }
    2727             :                 /* Add to any tickets we may have */
    2728     3654760 :                 if (!readonly && return_free_space && len)
    2729     3347676 :                         btrfs_try_granting_tickets(fs_info, space_info);
    2730     3654760 :                 spin_unlock(&space_info->lock);
    2731             :         }
    2732             : 
    2733     3654276 :         if (cache)
    2734     3654276 :                 btrfs_put_block_group(cache);
    2735     3654276 :         return 0;
    2736             : }
    2737             : 
    2738      203256 : int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
    2739             : {
    2740      203256 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    2741      203256 :         struct btrfs_block_group *block_group, *tmp;
    2742      203256 :         struct list_head *deleted_bgs;
    2743      203256 :         struct extent_io_tree *unpin;
    2744      203256 :         u64 start;
    2745      203256 :         u64 end;
    2746      203256 :         int ret;
    2747             : 
    2748      203256 :         unpin = &trans->transaction->pinned_extents;
    2749             : 
    2750     3857388 :         while (!TRANS_ABORTED(trans)) {
    2751     3857388 :                 struct extent_state *cached_state = NULL;
    2752             : 
    2753     3857388 :                 mutex_lock(&fs_info->unused_bg_unpin_mutex);
    2754     3857388 :                 ret = find_first_extent_bit(unpin, 0, &start, &end,
    2755             :                                             EXTENT_DIRTY, &cached_state);
    2756     3857388 :                 if (ret) {
    2757      203256 :                         mutex_unlock(&fs_info->unused_bg_unpin_mutex);
    2758      203256 :                         break;
    2759             :                 }
    2760             : 
    2761     3654132 :                 if (btrfs_test_opt(fs_info, DISCARD_SYNC))
    2762          36 :                         ret = btrfs_discard_extent(fs_info, start,
    2763          36 :                                                    end + 1 - start, NULL);
    2764             : 
    2765     3654132 :                 clear_extent_dirty(unpin, start, end, &cached_state);
    2766     3654132 :                 unpin_extent_range(fs_info, start, end, true);
    2767     3654132 :                 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
    2768     3654132 :                 free_extent_state(cached_state);
    2769     3654132 :                 cond_resched();
    2770             :         }
    2771             : 
    2772      203256 :         if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
    2773      203227 :                 btrfs_discard_calc_delay(&fs_info->discard_ctl);
    2774      203227 :                 btrfs_discard_schedule_work(&fs_info->discard_ctl, true);
    2775             :         }
    2776             : 
    2777             :         /*
    2778             :          * Transaction is finished.  We don't need the lock anymore.  We
    2779             :          * do need to clean up the block groups in case of a transaction
    2780             :          * abort.
    2781             :          */
    2782      203256 :         deleted_bgs = &trans->transaction->deleted_bgs;
    2783      203256 :         list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
    2784           0 :                 u64 trimmed = 0;
    2785             : 
    2786           0 :                 ret = -EROFS;
    2787           0 :                 if (!TRANS_ABORTED(trans))
    2788           0 :                         ret = btrfs_discard_extent(fs_info,
    2789             :                                                    block_group->start,
    2790             :                                                    block_group->length,
    2791             :                                                    &trimmed);
    2792             : 
    2793           0 :                 list_del_init(&block_group->bg_list);
    2794           0 :                 btrfs_unfreeze_block_group(block_group);
    2795           0 :                 btrfs_put_block_group(block_group);
    2796             : 
    2797           0 :                 if (ret) {
    2798           0 :                         const char *errstr = btrfs_decode_error(ret);
    2799           0 :                         btrfs_warn(fs_info,
    2800             :                            "discard failed while removing blockgroup: errno=%d %s",
    2801             :                                    ret, errstr);
    2802             :                 }
    2803             :         }
    2804             : 
    2805      203256 :         return 0;
    2806             : }
    2807             : 
    2808     8073269 : static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
    2809             :                                      u64 bytenr, u64 num_bytes, bool is_data)
    2810             : {
    2811     8073269 :         int ret;
    2812             : 
    2813     8073269 :         if (is_data) {
    2814     1015715 :                 struct btrfs_root *csum_root;
    2815             : 
    2816     1015715 :                 csum_root = btrfs_csum_root(trans->fs_info, bytenr);
    2817     1015714 :                 ret = btrfs_del_csums(trans, csum_root, bytenr, num_bytes);
    2818     1015715 :                 if (ret) {
    2819           0 :                         btrfs_abort_transaction(trans, ret);
    2820           0 :                         return ret;
    2821             :                 }
    2822             :         }
    2823             : 
    2824     8073269 :         ret = add_to_free_space_tree(trans, bytenr, num_bytes);
    2825     8073268 :         if (ret) {
    2826           0 :                 btrfs_abort_transaction(trans, ret);
    2827           0 :                 return ret;
    2828             :         }
    2829             : 
    2830     8073268 :         ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
    2831     8073269 :         if (ret)
    2832           0 :                 btrfs_abort_transaction(trans, ret);
    2833             : 
    2834             :         return ret;
    2835             : }
    2836             : 
    2837             : #define abort_and_dump(trans, path, fmt, args...)       \
    2838             : ({                                                      \
    2839             :         btrfs_abort_transaction(trans, -EUCLEAN);       \
    2840             :         btrfs_print_leaf(path->nodes[0]);            \
    2841             :         btrfs_crit(trans->fs_info, fmt, ##args);     \
    2842             : })
    2843             : 
    2844             : /*
    2845             :  * Drop one or more refs of @node.
    2846             :  *
    2847             :  * 1. Locate the extent refs.
    2848             :  *    It's either inline in EXTENT/METADATA_ITEM or in keyed SHARED_* item.
    2849             :  *    Locate it, then reduce the refs number or remove the ref line completely.
    2850             :  *
    2851             :  * 2. Update the refs count in EXTENT/METADATA_ITEM
    2852             :  *
    2853             :  * Inline backref case:
    2854             :  *
    2855             :  * in extent tree we have:
    2856             :  *
    2857             :  *      item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 16201 itemsize 82
    2858             :  *              refs 2 gen 6 flags DATA
    2859             :  *              extent data backref root FS_TREE objectid 258 offset 0 count 1
    2860             :  *              extent data backref root FS_TREE objectid 257 offset 0 count 1
    2861             :  *
    2862             :  * This function gets called with:
    2863             :  *
    2864             :  *    node->bytenr = 13631488
    2865             :  *    node->num_bytes = 1048576
    2866             :  *    root_objectid = FS_TREE
    2867             :  *    owner_objectid = 257
    2868             :  *    owner_offset = 0
    2869             :  *    refs_to_drop = 1
    2870             :  *
    2871             :  * Then we should get some like:
    2872             :  *
    2873             :  *      item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 16201 itemsize 82
    2874             :  *              refs 1 gen 6 flags DATA
    2875             :  *              extent data backref root FS_TREE objectid 258 offset 0 count 1
    2876             :  *
    2877             :  * Keyed backref case:
    2878             :  *
    2879             :  * in extent tree we have:
    2880             :  *
    2881             :  *      item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 3971 itemsize 24
    2882             :  *              refs 754 gen 6 flags DATA
    2883             :  *      [...]
    2884             :  *      item 2 key (13631488 EXTENT_DATA_REF <HASH>) itemoff 3915 itemsize 28
    2885             :  *              extent data backref root FS_TREE objectid 866 offset 0 count 1
    2886             :  *
    2887             :  * This function get called with:
    2888             :  *
    2889             :  *    node->bytenr = 13631488
    2890             :  *    node->num_bytes = 1048576
    2891             :  *    root_objectid = FS_TREE
    2892             :  *    owner_objectid = 866
    2893             :  *    owner_offset = 0
    2894             :  *    refs_to_drop = 1
    2895             :  *
    2896             :  * Then we should get some like:
    2897             :  *
    2898             :  *      item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 3971 itemsize 24
    2899             :  *              refs 753 gen 6 flags DATA
    2900             :  *
    2901             :  * And that (13631488 EXTENT_DATA_REF <HASH>) gets removed.
    2902             :  */
    2903    21909262 : static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
    2904             :                                struct btrfs_delayed_ref_node *node, u64 parent,
    2905             :                                u64 root_objectid, u64 owner_objectid,
    2906             :                                u64 owner_offset, int refs_to_drop,
    2907             :                                struct btrfs_delayed_extent_op *extent_op)
    2908             : {
    2909    21909262 :         struct btrfs_fs_info *info = trans->fs_info;
    2910    21909262 :         struct btrfs_key key;
    2911    21909262 :         struct btrfs_path *path;
    2912    21909262 :         struct btrfs_root *extent_root;
    2913    21909262 :         struct extent_buffer *leaf;
    2914    21909262 :         struct btrfs_extent_item *ei;
    2915    21909262 :         struct btrfs_extent_inline_ref *iref;
    2916    21909262 :         int ret;
    2917    21909262 :         int is_data;
    2918    21909262 :         int extent_slot = 0;
    2919    21909262 :         int found_extent = 0;
    2920    21909262 :         int num_to_del = 1;
    2921    21909262 :         u32 item_size;
    2922    21909262 :         u64 refs;
    2923    21909262 :         u64 bytenr = node->bytenr;
    2924    21909262 :         u64 num_bytes = node->num_bytes;
    2925    21909262 :         bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
    2926             : 
    2927    21909262 :         extent_root = btrfs_extent_root(info, bytenr);
    2928    21909242 :         ASSERT(extent_root);
    2929             : 
    2930    21909242 :         path = btrfs_alloc_path();
    2931    21909255 :         if (!path)
    2932             :                 return -ENOMEM;
    2933             : 
    2934    21909255 :         is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
    2935             : 
    2936    21909255 :         if (!is_data && refs_to_drop != 1) {
    2937           0 :                 btrfs_crit(info,
    2938             : "invalid refs_to_drop, dropping more than 1 refs for tree block %llu refs_to_drop %u",
    2939             :                            node->bytenr, refs_to_drop);
    2940           0 :                 ret = -EINVAL;
    2941           0 :                 btrfs_abort_transaction(trans, ret);
    2942           0 :                 goto out;
    2943             :         }
    2944             : 
    2945    21909255 :         if (is_data)
    2946     8516373 :                 skinny_metadata = false;
    2947             : 
    2948    21909255 :         ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
    2949             :                                     parent, root_objectid, owner_objectid,
    2950             :                                     owner_offset);
    2951    21909286 :         if (ret == 0) {
    2952             :                 /*
    2953             :                  * Either the inline backref or the SHARED_DATA_REF/
    2954             :                  * SHARED_BLOCK_REF is found
    2955             :                  *
    2956             :                  * Here is a quick path to locate EXTENT/METADATA_ITEM.
    2957             :                  * It's possible the EXTENT/METADATA_ITEM is near current slot.
    2958             :                  */
    2959    21909286 :                 extent_slot = path->slots[0];
    2960    22875572 :                 while (extent_slot >= 0) {
    2961    22849000 :                         btrfs_item_key_to_cpu(path->nodes[0], &key,
    2962             :                                               extent_slot);
    2963    22849000 :                         if (key.objectid != bytenr)
    2964             :                                 break;
    2965    22849000 :                         if (key.type == BTRFS_EXTENT_ITEM_KEY &&
    2966     8351722 :                             key.offset == num_bytes) {
    2967             :                                 found_extent = 1;
    2968             :                                 break;
    2969             :                         }
    2970    14497278 :                         if (key.type == BTRFS_METADATA_ITEM_KEY &&
    2971    13392883 :                             key.offset == owner_objectid) {
    2972             :                                 found_extent = 1;
    2973             :                                 break;
    2974             :                         }
    2975             : 
    2976             :                         /* Quick path didn't find the EXTEMT/METADATA_ITEM */
    2977     1104395 :                         if (path->slots[0] - extent_slot > 5)
    2978             :                                 break;
    2979      966286 :                         extent_slot--;
    2980             :                 }
    2981             : 
    2982    21909286 :                 if (!found_extent) {
    2983      164681 :                         if (iref) {
    2984           0 :                                 abort_and_dump(trans, path,
    2985             : "invalid iref slot %u, no EXTENT/METADATA_ITEM found but has inline extent ref",
    2986             :                                            path->slots[0]);
    2987           0 :                                 ret = -EUCLEAN;
    2988           0 :                                 goto out;
    2989             :                         }
    2990             :                         /* Must be SHARED_* item, remove the backref first */
    2991      164681 :                         ret = remove_extent_backref(trans, extent_root, path,
    2992             :                                                     NULL, refs_to_drop, is_data);
    2993      164681 :                         if (ret) {
    2994           0 :                                 btrfs_abort_transaction(trans, ret);
    2995           0 :                                 goto out;
    2996             :                         }
    2997      164681 :                         btrfs_release_path(path);
    2998             : 
    2999             :                         /* Slow path to locate EXTENT/METADATA_ITEM */
    3000      164681 :                         key.objectid = bytenr;
    3001      164681 :                         key.type = BTRFS_EXTENT_ITEM_KEY;
    3002      164681 :                         key.offset = num_bytes;
    3003             : 
    3004      164681 :                         if (!is_data && skinny_metadata) {
    3005           0 :                                 key.type = BTRFS_METADATA_ITEM_KEY;
    3006           0 :                                 key.offset = owner_objectid;
    3007             :                         }
    3008             : 
    3009      164681 :                         ret = btrfs_search_slot(trans, extent_root,
    3010             :                                                 &key, path, -1, 1);
    3011      164681 :                         if (ret > 0 && skinny_metadata && path->slots[0]) {
    3012             :                                 /*
    3013             :                                  * Couldn't find our skinny metadata item,
    3014             :                                  * see if we have ye olde extent item.
    3015             :                                  */
    3016           0 :                                 path->slots[0]--;
    3017           0 :                                 btrfs_item_key_to_cpu(path->nodes[0], &key,
    3018             :                                                       path->slots[0]);
    3019           0 :                                 if (key.objectid == bytenr &&
    3020           0 :                                     key.type == BTRFS_EXTENT_ITEM_KEY &&
    3021           0 :                                     key.offset == num_bytes)
    3022           0 :                                         ret = 0;
    3023             :                         }
    3024             : 
    3025      164681 :                         if (ret > 0 && skinny_metadata) {
    3026           0 :                                 skinny_metadata = false;
    3027           0 :                                 key.objectid = bytenr;
    3028           0 :                                 key.type = BTRFS_EXTENT_ITEM_KEY;
    3029           0 :                                 key.offset = num_bytes;
    3030           0 :                                 btrfs_release_path(path);
    3031           0 :                                 ret = btrfs_search_slot(trans, extent_root,
    3032             :                                                         &key, path, -1, 1);
    3033             :                         }
    3034             : 
    3035      164681 :                         if (ret) {
    3036           0 :                                 if (ret > 0)
    3037           0 :                                         btrfs_print_leaf(path->nodes[0]);
    3038           0 :                                 btrfs_err(info,
    3039             :                         "umm, got %d back from search, was looking for %llu, slot %d",
    3040             :                                           ret, bytenr, path->slots[0]);
    3041             :                         }
    3042      164681 :                         if (ret < 0) {
    3043           0 :                                 btrfs_abort_transaction(trans, ret);
    3044           0 :                                 goto out;
    3045             :                         }
    3046      164681 :                         extent_slot = path->slots[0];
    3047             :                 }
    3048           0 :         } else if (WARN_ON(ret == -ENOENT)) {
    3049           0 :                 abort_and_dump(trans, path,
    3050             : "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu slot %d",
    3051             :                                bytenr, parent, root_objectid, owner_objectid,
    3052             :                                owner_offset, path->slots[0]);
    3053           0 :                 goto out;
    3054             :         } else {
    3055           0 :                 btrfs_abort_transaction(trans, ret);
    3056           0 :                 goto out;
    3057             :         }
    3058             : 
    3059    21909286 :         leaf = path->nodes[0];
    3060    21909286 :         item_size = btrfs_item_size(leaf, extent_slot);
    3061    21909286 :         if (unlikely(item_size < sizeof(*ei))) {
    3062           0 :                 ret = -EINVAL;
    3063           0 :                 btrfs_print_v0_err(info);
    3064           0 :                 btrfs_abort_transaction(trans, ret);
    3065           0 :                 goto out;
    3066             :         }
    3067    21909286 :         ei = btrfs_item_ptr(leaf, extent_slot,
    3068             :                             struct btrfs_extent_item);
    3069    21909286 :         if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
    3070    13392883 :             key.type == BTRFS_EXTENT_ITEM_KEY) {
    3071           0 :                 struct btrfs_tree_block_info *bi;
    3072             : 
    3073           0 :                 if (item_size < sizeof(*ei) + sizeof(*bi)) {
    3074           0 :                         abort_and_dump(trans, path,
    3075             : "invalid extent item size for key (%llu, %u, %llu) slot %u owner %llu, has %u expect >= %zu",
    3076             :                                        key.objectid, key.type, key.offset,
    3077             :                                        path->slots[0], owner_objectid, item_size,
    3078             :                                        sizeof(*ei) + sizeof(*bi));
    3079           0 :                         ret = -EUCLEAN;
    3080           0 :                         goto out;
    3081             :                 }
    3082           0 :                 bi = (struct btrfs_tree_block_info *)(ei + 1);
    3083           0 :                 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
    3084             :         }
    3085             : 
    3086    21909286 :         refs = btrfs_extent_refs(leaf, ei);
    3087    21909286 :         if (refs < refs_to_drop) {
    3088           0 :                 abort_and_dump(trans, path,
    3089             :                 "trying to drop %d refs but we only have %llu for bytenr %llu slot %u",
    3090             :                                refs_to_drop, refs, bytenr, path->slots[0]);
    3091           0 :                 ret = -EUCLEAN;
    3092           0 :                 goto out;
    3093             :         }
    3094    21909286 :         refs -= refs_to_drop;
    3095             : 
    3096    21909286 :         if (refs > 0) {
    3097    13836017 :                 if (extent_op)
    3098       42090 :                         __run_delayed_extent_op(extent_op, leaf, ei);
    3099             :                 /*
    3100             :                  * In the case of inline back ref, reference count will
    3101             :                  * be updated by remove_extent_backref
    3102             :                  */
    3103    13836017 :                 if (iref) {
    3104    13612388 :                         if (!found_extent) {
    3105           0 :                                 abort_and_dump(trans, path,
    3106             : "invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found, slot %u",
    3107             :                                                path->slots[0]);
    3108           0 :                                 ret = -EUCLEAN;
    3109           0 :                                 goto out;
    3110             :                         }
    3111             :                 } else {
    3112      223629 :                         btrfs_set_extent_refs(leaf, ei, refs);
    3113      223629 :                         btrfs_mark_buffer_dirty(leaf);
    3114             :                 }
    3115    13836017 :                 if (found_extent) {
    3116    13671369 :                         ret = remove_extent_backref(trans, extent_root, path,
    3117             :                                                     iref, refs_to_drop, is_data);
    3118    13671369 :                         if (ret) {
    3119           0 :                                 btrfs_abort_transaction(trans, ret);
    3120           0 :                                 goto out;
    3121             :                         }
    3122             :                 }
    3123             :         } else {
    3124             :                 /* In this branch refs == 1 */
    3125     8073269 :                 if (found_extent) {
    3126     9088918 :                         if (is_data && refs_to_drop !=
    3127     1015682 :                             extent_data_ref_count(path, iref)) {
    3128           0 :                                 abort_and_dump(trans, path,
    3129             :                 "invalid refs_to_drop, current refs %u refs_to_drop %u slot %u",
    3130             :                                                extent_data_ref_count(path, iref),
    3131             :                                                refs_to_drop, path->slots[0]);
    3132           0 :                                 ret = -EUCLEAN;
    3133           0 :                                 goto out;
    3134             :                         }
    3135     8073236 :                         if (iref) {
    3136     8072964 :                                 if (path->slots[0] != extent_slot) {
    3137           0 :                                         abort_and_dump(trans, path,
    3138             : "invalid iref, extent item key (%llu %u %llu) slot %u doesn't have wanted iref",
    3139             :                                                        key.objectid, key.type,
    3140             :                                                        key.offset, path->slots[0]);
    3141           0 :                                         ret = -EUCLEAN;
    3142           0 :                                         goto out;
    3143             :                                 }
    3144             :                         } else {
    3145             :                                 /*
    3146             :                                  * No inline ref, we must be at SHARED_* item,
    3147             :                                  * And it's single ref, it must be:
    3148             :                                  * |    extent_slot       ||extent_slot + 1|
    3149             :                                  * [ EXTENT/METADATA_ITEM ][ SHARED_* ITEM ]
    3150             :                                  */
    3151         272 :                                 if (path->slots[0] != extent_slot + 1) {
    3152           0 :                                         abort_and_dump(trans, path,
    3153             :         "invalid SHARED_* item slot %u, previous item is not EXTENT/METADATA_ITEM",
    3154             :                                                        path->slots[0]);
    3155           0 :                                         ret = -EUCLEAN;
    3156           0 :                                         goto out;
    3157             :                                 }
    3158         272 :                                 path->slots[0] = extent_slot;
    3159         272 :                                 num_to_del = 2;
    3160             :                         }
    3161             :                 }
    3162             : 
    3163     8073269 :                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
    3164             :                                       num_to_del);
    3165     8073268 :                 if (ret) {
    3166           0 :                         btrfs_abort_transaction(trans, ret);
    3167           0 :                         goto out;
    3168             :                 }
    3169     8073268 :                 btrfs_release_path(path);
    3170             : 
    3171     8073269 :                 ret = do_free_extent_accounting(trans, bytenr, num_bytes, is_data);
    3172             :         }
    3173    21909286 :         btrfs_release_path(path);
    3174             : 
    3175    21909270 : out:
    3176    21909270 :         btrfs_free_path(path);
    3177    21909270 :         return ret;
    3178             : }
    3179             : 
    3180             : /*
    3181             :  * when we free an block, it is possible (and likely) that we free the last
    3182             :  * delayed ref for that extent as well.  This searches the delayed ref tree for
    3183             :  * a given extent, and if there are no other delayed refs to be processed, it
    3184             :  * removes it from the tree.
    3185             :  */
    3186      737141 : static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
    3187             :                                       u64 bytenr)
    3188             : {
    3189      737141 :         struct btrfs_delayed_ref_head *head;
    3190      737141 :         struct btrfs_delayed_ref_root *delayed_refs;
    3191      737141 :         int ret = 0;
    3192             : 
    3193      737141 :         delayed_refs = &trans->transaction->delayed_refs;
    3194      737141 :         spin_lock(&delayed_refs->lock);
    3195      737141 :         head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
    3196      737141 :         if (!head)
    3197           1 :                 goto out_delayed_unlock;
    3198             : 
    3199      737140 :         spin_lock(&head->lock);
    3200      737140 :         if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root))
    3201      398321 :                 goto out;
    3202             : 
    3203      338819 :         if (cleanup_extent_op(head) != NULL)
    3204           0 :                 goto out;
    3205             : 
    3206             :         /*
    3207             :          * waiting for the lock here would deadlock.  If someone else has it
    3208             :          * locked they are already in the process of dropping it anyway
    3209             :          */
    3210      338819 :         if (!mutex_trylock(&head->mutex))
    3211          24 :                 goto out;
    3212             : 
    3213      338795 :         btrfs_delete_ref_head(delayed_refs, head);
    3214      338795 :         head->processing = false;
    3215             : 
    3216      338795 :         spin_unlock(&head->lock);
    3217      338795 :         spin_unlock(&delayed_refs->lock);
    3218             : 
    3219      338795 :         BUG_ON(head->extent_op);
    3220      338795 :         if (head->must_insert_reserved)
    3221      338795 :                 ret = 1;
    3222             : 
    3223      338795 :         btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
    3224      338795 :         mutex_unlock(&head->mutex);
    3225      338793 :         btrfs_put_delayed_ref_head(head);
    3226      338793 :         return ret;
    3227      398345 : out:
    3228      398345 :         spin_unlock(&head->lock);
    3229             : 
    3230      398346 : out_delayed_unlock:
    3231      398346 :         spin_unlock(&delayed_refs->lock);
    3232      398346 :         return 0;
    3233             : }
    3234             : 
    3235    10895459 : void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
    3236             :                            u64 root_id,
    3237             :                            struct extent_buffer *buf,
    3238             :                            u64 parent, int last_ref)
    3239             : {
    3240    10895459 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    3241    10895459 :         struct btrfs_ref generic_ref = { 0 };
    3242    10895459 :         int ret;
    3243             : 
    3244    10895459 :         btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
    3245    10895459 :                                buf->start, buf->len, parent);
    3246    10895459 :         btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
    3247             :                             root_id, 0, false);
    3248             : 
    3249    10895459 :         if (root_id != BTRFS_TREE_LOG_OBJECTID) {
    3250     9825529 :                 btrfs_ref_tree_mod(fs_info, &generic_ref);
    3251     9825529 :                 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL);
    3252     9825887 :                 BUG_ON(ret); /* -ENOMEM */
    3253             :         }
    3254             : 
    3255    10895817 :         if (last_ref && btrfs_header_generation(buf) == trans->transid) {
    3256     1807073 :                 struct btrfs_block_group *cache;
    3257     1807073 :                 bool must_pin = false;
    3258             : 
    3259     1807073 :                 if (root_id != BTRFS_TREE_LOG_OBJECTID) {
    3260      737141 :                         ret = check_ref_cleanup(trans, buf->start);
    3261      737136 :                         if (!ret) {
    3262      398346 :                                 btrfs_redirty_list_add(trans->transaction, buf);
    3263      398346 :                                 goto out;
    3264             :                         }
    3265             :                 }
    3266             : 
    3267     1408722 :                 cache = btrfs_lookup_block_group(fs_info, buf->start);
    3268             : 
    3269     1408741 :                 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
    3270     1367909 :                         pin_down_extent(trans, cache, buf->start, buf->len, 1);
    3271     1367917 :                         btrfs_put_block_group(cache);
    3272     1367916 :                         goto out;
    3273             :                 }
    3274             : 
    3275             :                 /*
    3276             :                  * If there are tree mod log users we may have recorded mod log
    3277             :                  * operations for this node.  If we re-allocate this node we
    3278             :                  * could replay operations on this node that happened when it
    3279             :                  * existed in a completely different root.  For example if it
    3280             :                  * was part of root A, then was reallocated to root B, and we
    3281             :                  * are doing a btrfs_old_search_slot(root b), we could replay
    3282             :                  * operations that happened when the block was part of root A,
    3283             :                  * giving us an inconsistent view of the btree.
    3284             :                  *
    3285             :                  * We are safe from races here because at this point no other
    3286             :                  * node or root points to this extent buffer, so if after this
    3287             :                  * check a new tree mod log user joins we will not have an
    3288             :                  * existing log of operations on this node that we have to
    3289             :                  * contend with.
    3290             :                  */
    3291       81664 :                 if (test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
    3292           2 :                         must_pin = true;
    3293             : 
    3294       40832 :                 if (must_pin || btrfs_is_zoned(fs_info)) {
    3295           2 :                         btrfs_redirty_list_add(trans->transaction, buf);
    3296           2 :                         pin_down_extent(trans, cache, buf->start, buf->len, 1);
    3297           2 :                         btrfs_put_block_group(cache);
    3298           2 :                         goto out;
    3299             :                 }
    3300             : 
    3301       40830 :                 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
    3302             : 
    3303       40830 :                 btrfs_add_free_space(cache, buf->start, buf->len);
    3304       40830 :                 btrfs_free_reserved_bytes(cache, buf->len, 0);
    3305       40830 :                 btrfs_put_block_group(cache);
    3306       40830 :                 trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
    3307             :         }
    3308     9088744 : out:
    3309    10895838 :         if (last_ref) {
    3310             :                 /*
    3311             :                  * Deleting the buffer, clear the corrupt flag since it doesn't
    3312             :                  * matter anymore.
    3313             :                  */
    3314     8498640 :                 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
    3315             :         }
    3316    10895854 : }
    3317             : 
    3318             : /* Can return -ENOMEM */
    3319    16044375 : int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
    3320             : {
    3321    16044375 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    3322    16044375 :         int ret;
    3323             : 
    3324    16044375 :         if (btrfs_is_testing(fs_info))
    3325             :                 return 0;
    3326             : 
    3327             :         /*
    3328             :          * tree log blocks never actually go into the extent allocation
    3329             :          * tree, just update pinning info and exit early.
    3330             :          */
    3331    16044375 :         if ((ref->type == BTRFS_REF_METADATA &&
    3332    16044375 :              ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
    3333    11357119 :             (ref->type == BTRFS_REF_DATA &&
    3334    11357119 :              ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)) {
    3335             :                 /* unlocks the pinned mutex */
    3336           0 :                 btrfs_pin_extent(trans, ref->bytenr, ref->len, 1);
    3337           0 :                 ret = 0;
    3338    16044375 :         } else if (ref->type == BTRFS_REF_METADATA) {
    3339     4687258 :                 ret = btrfs_add_delayed_tree_ref(trans, ref, NULL);
    3340             :         } else {
    3341    11357117 :                 ret = btrfs_add_delayed_data_ref(trans, ref, 0);
    3342             :         }
    3343             : 
    3344    16044382 :         if (!((ref->type == BTRFS_REF_METADATA &&
    3345             :                ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
    3346             :               (ref->type == BTRFS_REF_DATA &&
    3347             :                ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)))
    3348             :                 btrfs_ref_tree_mod(fs_info, ref);
    3349             : 
    3350    16044382 :         return ret;
    3351             : }
    3352             : 
    3353             : enum btrfs_loop_type {
    3354             :         LOOP_CACHING_NOWAIT,
    3355             :         LOOP_CACHING_WAIT,
    3356             :         LOOP_UNSET_SIZE_CLASS,
    3357             :         LOOP_ALLOC_CHUNK,
    3358             :         LOOP_WRONG_SIZE_CLASS,
    3359             :         LOOP_NO_EMPTY_SIZE,
    3360             : };
    3361             : 
    3362             : static inline void
    3363             : btrfs_lock_block_group(struct btrfs_block_group *cache,
    3364             :                        int delalloc)
    3365             : {
    3366    10621478 :         if (delalloc)
    3367     2016844 :                 down_read(&cache->data_rwsem);
    3368             : }
    3369             : 
    3370    13062784 : static inline void btrfs_grab_block_group(struct btrfs_block_group *cache,
    3371             :                        int delalloc)
    3372             : {
    3373    13062784 :         btrfs_get_block_group(cache);
    3374    13062298 :         if (delalloc)
    3375     8996044 :                 down_read(&cache->data_rwsem);
    3376    13062738 : }
    3377             : 
    3378     8519835 : static struct btrfs_block_group *btrfs_lock_cluster(
    3379             :                    struct btrfs_block_group *block_group,
    3380             :                    struct btrfs_free_cluster *cluster,
    3381             :                    int delalloc)
    3382             :         __acquires(&cluster->refill_lock)
    3383             : {
    3384     8519835 :         struct btrfs_block_group *used_bg = NULL;
    3385             : 
    3386     8519835 :         spin_lock(&cluster->refill_lock);
    3387     8520473 :         while (1) {
    3388     8520473 :                 used_bg = cluster->block_group;
    3389     8520473 :                 if (!used_bg)
    3390             :                         return NULL;
    3391             : 
    3392     8510333 :                 if (used_bg == block_group)
    3393     8510247 :                         return used_bg;
    3394             : 
    3395          86 :                 btrfs_get_block_group(used_bg);
    3396             : 
    3397          86 :                 if (!delalloc)
    3398          86 :                         return used_bg;
    3399             : 
    3400           0 :                 if (down_read_trylock(&used_bg->data_rwsem))
    3401           0 :                         return used_bg;
    3402             : 
    3403           0 :                 spin_unlock(&cluster->refill_lock);
    3404             : 
    3405             :                 /* We should only have one-level nested. */
    3406           0 :                 down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
    3407             : 
    3408           0 :                 spin_lock(&cluster->refill_lock);
    3409           0 :                 if (used_bg == cluster->block_group)
    3410           0 :                         return used_bg;
    3411             : 
    3412           0 :                 up_read(&used_bg->data_rwsem);
    3413           0 :                 btrfs_put_block_group(used_bg);
    3414             :         }
    3415             : }
    3416             : 
    3417             : static inline void
    3418    23684797 : btrfs_release_block_group(struct btrfs_block_group *cache,
    3419             :                          int delalloc)
    3420             : {
    3421    23684797 :         if (delalloc)
    3422    11013762 :                 up_read(&cache->data_rwsem);
    3423    23685090 :         btrfs_put_block_group(cache);
    3424    23685463 : }
    3425             : 
    3426             : /*
    3427             :  * Helper function for find_free_extent().
    3428             :  *
    3429             :  * Return -ENOENT to inform caller that we need fallback to unclustered mode.
    3430             :  * Return -EAGAIN to inform caller that we need to re-search this block group
    3431             :  * Return >0 to inform caller that we find nothing
    3432             :  * Return 0 means we have found a location and set ffe_ctl->found_offset.
    3433             :  */
    3434     8519919 : static int find_free_extent_clustered(struct btrfs_block_group *bg,
    3435             :                                       struct find_free_extent_ctl *ffe_ctl,
    3436             :                                       struct btrfs_block_group **cluster_bg_ret)
    3437             : {
    3438     8519919 :         struct btrfs_block_group *cluster_bg;
    3439     8519919 :         struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
    3440     8519919 :         u64 aligned_cluster;
    3441     8519919 :         u64 offset;
    3442     8519919 :         int ret;
    3443             : 
    3444     8519919 :         cluster_bg = btrfs_lock_cluster(bg, last_ptr, ffe_ctl->delalloc);
    3445     8520473 :         if (!cluster_bg)
    3446       10140 :                 goto refill_cluster;
    3447     8510333 :         if (cluster_bg != bg && (cluster_bg->ro ||
    3448           0 :             !block_group_bits(cluster_bg, ffe_ctl->flags)))
    3449          86 :                 goto release_cluster;
    3450             : 
    3451     8510247 :         offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
    3452             :                         ffe_ctl->num_bytes, cluster_bg->start,
    3453             :                         &ffe_ctl->max_extent_size);
    3454     8510247 :         if (offset) {
    3455             :                 /* We have a block, we're done */
    3456     8503940 :                 spin_unlock(&last_ptr->refill_lock);
    3457     8503940 :                 trace_btrfs_reserve_extent_cluster(cluster_bg, ffe_ctl);
    3458     8503940 :                 *cluster_bg_ret = cluster_bg;
    3459     8503940 :                 ffe_ctl->found_offset = offset;
    3460     8503940 :                 return 0;
    3461             :         }
    3462        6307 :         WARN_ON(last_ptr->block_group != cluster_bg);
    3463             : 
    3464        6393 : release_cluster:
    3465             :         /*
    3466             :          * If we are on LOOP_NO_EMPTY_SIZE, we can't set up a new clusters, so
    3467             :          * lets just skip it and let the allocator find whatever block it can
    3468             :          * find. If we reach this point, we will have tried the cluster
    3469             :          * allocator plenty of times and not have found anything, so we are
    3470             :          * likely way too fragmented for the clustering stuff to find anything.
    3471             :          *
    3472             :          * However, if the cluster is taken from the current block group,
    3473             :          * release the cluster first, so that we stand a better chance of
    3474             :          * succeeding in the unclustered allocation.
    3475             :          */
    3476        6393 :         if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE && cluster_bg != bg) {
    3477           0 :                 spin_unlock(&last_ptr->refill_lock);
    3478           0 :                 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
    3479           0 :                 return -ENOENT;
    3480             :         }
    3481             : 
    3482             :         /* This cluster didn't work out, free it and start over */
    3483        6393 :         btrfs_return_cluster_to_free_space(NULL, last_ptr);
    3484             : 
    3485        6393 :         if (cluster_bg != bg)
    3486          86 :                 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
    3487             : 
    3488        6307 : refill_cluster:
    3489       16533 :         if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE) {
    3490           0 :                 spin_unlock(&last_ptr->refill_lock);
    3491           0 :                 return -ENOENT;
    3492             :         }
    3493             : 
    3494       16533 :         aligned_cluster = max_t(u64,
    3495             :                         ffe_ctl->empty_cluster + ffe_ctl->empty_size,
    3496             :                         bg->full_stripe_len);
    3497       16533 :         ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start,
    3498             :                         ffe_ctl->num_bytes, aligned_cluster);
    3499       16533 :         if (ret == 0) {
    3500             :                 /* Now pull our allocation out of this cluster */
    3501        9539 :                 offset = btrfs_alloc_from_cluster(bg, last_ptr,
    3502             :                                 ffe_ctl->num_bytes, ffe_ctl->search_start,
    3503             :                                 &ffe_ctl->max_extent_size);
    3504        9539 :                 if (offset) {
    3505             :                         /* We found one, proceed */
    3506        9539 :                         spin_unlock(&last_ptr->refill_lock);
    3507        9539 :                         ffe_ctl->found_offset = offset;
    3508        9539 :                         trace_btrfs_reserve_extent_cluster(bg, ffe_ctl);
    3509        9539 :                         return 0;
    3510             :                 }
    3511        6994 :         } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
    3512        2254 :                    !ffe_ctl->retry_clustered) {
    3513        2254 :                 spin_unlock(&last_ptr->refill_lock);
    3514             : 
    3515        2254 :                 ffe_ctl->retry_clustered = true;
    3516        2254 :                 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
    3517        2254 :                                 ffe_ctl->empty_cluster + ffe_ctl->empty_size);
    3518        2254 :                 return -EAGAIN;
    3519             :         }
    3520             :         /*
    3521             :          * At this point we either didn't find a cluster or we weren't able to
    3522             :          * allocate a block from our cluster.  Free the cluster we've been
    3523             :          * trying to use, and go to the next block group.
    3524             :          */
    3525        4740 :         btrfs_return_cluster_to_free_space(NULL, last_ptr);
    3526        4740 :         spin_unlock(&last_ptr->refill_lock);
    3527        4740 :         return 1;
    3528             : }
    3529             : 
    3530             : /*
    3531             :  * Return >0 to inform caller that we find nothing
    3532             :  * Return 0 when we found an free extent and set ffe_ctrl->found_offset
    3533             :  * Return -EAGAIN to inform caller that we need to re-search this block group
    3534             :  */
    3535     7721784 : static int find_free_extent_unclustered(struct btrfs_block_group *bg,
    3536             :                                         struct find_free_extent_ctl *ffe_ctl)
    3537             : {
    3538     7721784 :         struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
    3539     7721784 :         u64 offset;
    3540             : 
    3541             :         /*
    3542             :          * We are doing an unclustered allocation, set the fragmented flag so
    3543             :          * we don't bother trying to setup a cluster again until we get more
    3544             :          * space.
    3545             :          */
    3546     7721784 :         if (unlikely(last_ptr)) {
    3547           0 :                 spin_lock(&last_ptr->lock);
    3548           0 :                 last_ptr->fragmented = 1;
    3549           0 :                 spin_unlock(&last_ptr->lock);
    3550             :         }
    3551     7721784 :         if (ffe_ctl->cached) {
    3552     7720358 :                 struct btrfs_free_space_ctl *free_space_ctl;
    3553             : 
    3554     7720358 :                 free_space_ctl = bg->free_space_ctl;
    3555     7720358 :                 spin_lock(&free_space_ctl->tree_lock);
    3556     7721339 :                 if (free_space_ctl->free_space <
    3557     7721339 :                     ffe_ctl->num_bytes + ffe_ctl->empty_cluster +
    3558     7721339 :                     ffe_ctl->empty_size) {
    3559     3608667 :                         ffe_ctl->total_free_space = max_t(u64,
    3560             :                                         ffe_ctl->total_free_space,
    3561             :                                         free_space_ctl->free_space);
    3562     3608667 :                         spin_unlock(&free_space_ctl->tree_lock);
    3563     3608667 :                         return 1;
    3564             :                 }
    3565     4112672 :                 spin_unlock(&free_space_ctl->tree_lock);
    3566             :         }
    3567             : 
    3568     4114037 :         offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
    3569             :                         ffe_ctl->num_bytes, ffe_ctl->empty_size,
    3570             :                         &ffe_ctl->max_extent_size);
    3571             : 
    3572             :         /*
    3573             :          * If we didn't find a chunk, and we haven't failed on this block group
    3574             :          * before, and this block group is in the middle of caching and we are
    3575             :          * ok with waiting, then go ahead and wait for progress to be made, and
    3576             :          * set @retry_unclustered to true.
    3577             :          *
    3578             :          * If @retry_unclustered is true then we've already waited on this
    3579             :          * block group once and should move on to the next block group.
    3580             :          */
    3581     4114112 :         if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
    3582        1127 :             ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
    3583         741 :                 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
    3584         741 :                                                       ffe_ctl->empty_size);
    3585         741 :                 ffe_ctl->retry_unclustered = true;
    3586         741 :                 return -EAGAIN;
    3587     4113371 :         } else if (!offset) {
    3588             :                 return 1;
    3589             :         }
    3590     3842004 :         ffe_ctl->found_offset = offset;
    3591     3842004 :         return 0;
    3592             : }
    3593             : 
    3594    16241909 : static int do_allocation_clustered(struct btrfs_block_group *block_group,
    3595             :                                    struct find_free_extent_ctl *ffe_ctl,
    3596             :                                    struct btrfs_block_group **bg_ret)
    3597             : {
    3598    16241909 :         int ret;
    3599             : 
    3600             :         /* We want to try and use the cluster allocator, so lets look there */
    3601    16241909 :         if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) {
    3602     8519931 :                 ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret);
    3603     8520473 :                 if (ret >= 0 || ret == -EAGAIN)
    3604             :                         return ret;
    3605             :                 /* ret == -ENOENT case falls through */
    3606             :         }
    3607             : 
    3608     7721979 :         return find_free_extent_unclustered(block_group, ffe_ctl);
    3609             : }
    3610             : 
    3611             : /*
    3612             :  * Tree-log block group locking
    3613             :  * ============================
    3614             :  *
    3615             :  * fs_info::treelog_bg_lock protects the fs_info::treelog_bg which
    3616             :  * indicates the starting address of a block group, which is reserved only
    3617             :  * for tree-log metadata.
    3618             :  *
    3619             :  * Lock nesting
    3620             :  * ============
    3621             :  *
    3622             :  * space_info::lock
    3623             :  *   block_group::lock
    3624             :  *     fs_info::treelog_bg_lock
    3625             :  */
    3626             : 
    3627             : /*
    3628             :  * Simple allocator for sequential-only block group. It only allows sequential
    3629             :  * allocation. No need to play with trees. This function also reserves the
    3630             :  * bytes as in btrfs_add_reserved_bytes.
    3631             :  */
    3632           0 : static int do_allocation_zoned(struct btrfs_block_group *block_group,
    3633             :                                struct find_free_extent_ctl *ffe_ctl,
    3634             :                                struct btrfs_block_group **bg_ret)
    3635             : {
    3636           0 :         struct btrfs_fs_info *fs_info = block_group->fs_info;
    3637           0 :         struct btrfs_space_info *space_info = block_group->space_info;
    3638           0 :         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
    3639           0 :         u64 start = block_group->start;
    3640           0 :         u64 num_bytes = ffe_ctl->num_bytes;
    3641           0 :         u64 avail;
    3642           0 :         u64 bytenr = block_group->start;
    3643           0 :         u64 log_bytenr;
    3644           0 :         u64 data_reloc_bytenr;
    3645           0 :         int ret = 0;
    3646           0 :         bool skip = false;
    3647             : 
    3648           0 :         ASSERT(btrfs_is_zoned(block_group->fs_info));
    3649             : 
    3650             :         /*
    3651             :          * Do not allow non-tree-log blocks in the dedicated tree-log block
    3652             :          * group, and vice versa.
    3653             :          */
    3654           0 :         spin_lock(&fs_info->treelog_bg_lock);
    3655           0 :         log_bytenr = fs_info->treelog_bg;
    3656           0 :         if (log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) ||
    3657           0 :                            (!ffe_ctl->for_treelog && bytenr == log_bytenr)))
    3658           0 :                 skip = true;
    3659           0 :         spin_unlock(&fs_info->treelog_bg_lock);
    3660           0 :         if (skip)
    3661             :                 return 1;
    3662             : 
    3663             :         /*
    3664             :          * Do not allow non-relocation blocks in the dedicated relocation block
    3665             :          * group, and vice versa.
    3666             :          */
    3667           0 :         spin_lock(&fs_info->relocation_bg_lock);
    3668           0 :         data_reloc_bytenr = fs_info->data_reloc_bg;
    3669           0 :         if (data_reloc_bytenr &&
    3670           0 :             ((ffe_ctl->for_data_reloc && bytenr != data_reloc_bytenr) ||
    3671           0 :              (!ffe_ctl->for_data_reloc && bytenr == data_reloc_bytenr)))
    3672           0 :                 skip = true;
    3673           0 :         spin_unlock(&fs_info->relocation_bg_lock);
    3674           0 :         if (skip)
    3675             :                 return 1;
    3676             : 
    3677             :         /* Check RO and no space case before trying to activate it */
    3678           0 :         spin_lock(&block_group->lock);
    3679           0 :         if (block_group->ro || btrfs_zoned_bg_is_full(block_group)) {
    3680             :                 ret = 1;
    3681             :                 /*
    3682             :                  * May need to clear fs_info->{treelog,data_reloc}_bg.
    3683             :                  * Return the error after taking the locks.
    3684             :                  */
    3685             :         }
    3686           0 :         spin_unlock(&block_group->lock);
    3687             : 
    3688           0 :         if (!ret && !btrfs_zone_activate(block_group)) {
    3689             :                 ret = 1;
    3690             :                 /*
    3691             :                  * May need to clear fs_info->{treelog,data_reloc}_bg.
    3692             :                  * Return the error after taking the locks.
    3693             :                  */
    3694             :         }
    3695             : 
    3696           0 :         spin_lock(&space_info->lock);
    3697           0 :         spin_lock(&block_group->lock);
    3698           0 :         spin_lock(&fs_info->treelog_bg_lock);
    3699           0 :         spin_lock(&fs_info->relocation_bg_lock);
    3700             : 
    3701           0 :         if (ret)
    3702           0 :                 goto out;
    3703             : 
    3704           0 :         ASSERT(!ffe_ctl->for_treelog ||
    3705             :                block_group->start == fs_info->treelog_bg ||
    3706             :                fs_info->treelog_bg == 0);
    3707           0 :         ASSERT(!ffe_ctl->for_data_reloc ||
    3708             :                block_group->start == fs_info->data_reloc_bg ||
    3709             :                fs_info->data_reloc_bg == 0);
    3710             : 
    3711           0 :         if (block_group->ro ||
    3712           0 :             test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
    3713           0 :                 ret = 1;
    3714           0 :                 goto out;
    3715             :         }
    3716             : 
    3717             :         /*
    3718             :          * Do not allow currently using block group to be tree-log dedicated
    3719             :          * block group.
    3720             :          */
    3721           0 :         if (ffe_ctl->for_treelog && !fs_info->treelog_bg &&
    3722           0 :             (block_group->used || block_group->reserved)) {
    3723           0 :                 ret = 1;
    3724           0 :                 goto out;
    3725             :         }
    3726             : 
    3727             :         /*
    3728             :          * Do not allow currently used block group to be the data relocation
    3729             :          * dedicated block group.
    3730             :          */
    3731           0 :         if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg &&
    3732           0 :             (block_group->used || block_group->reserved)) {
    3733           0 :                 ret = 1;
    3734           0 :                 goto out;
    3735             :         }
    3736             : 
    3737           0 :         WARN_ON_ONCE(block_group->alloc_offset > block_group->zone_capacity);
    3738           0 :         avail = block_group->zone_capacity - block_group->alloc_offset;
    3739           0 :         if (avail < num_bytes) {
    3740           0 :                 if (ffe_ctl->max_extent_size < avail) {
    3741             :                         /*
    3742             :                          * With sequential allocator, free space is always
    3743             :                          * contiguous
    3744             :                          */
    3745           0 :                         ffe_ctl->max_extent_size = avail;
    3746           0 :                         ffe_ctl->total_free_space = avail;
    3747             :                 }
    3748           0 :                 ret = 1;
    3749           0 :                 goto out;
    3750             :         }
    3751             : 
    3752           0 :         if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
    3753           0 :                 fs_info->treelog_bg = block_group->start;
    3754             : 
    3755           0 :         if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg)
    3756           0 :                 fs_info->data_reloc_bg = block_group->start;
    3757             : 
    3758           0 :         ffe_ctl->found_offset = start + block_group->alloc_offset;
    3759           0 :         block_group->alloc_offset += num_bytes;
    3760           0 :         spin_lock(&ctl->tree_lock);
    3761           0 :         ctl->free_space -= num_bytes;
    3762           0 :         spin_unlock(&ctl->tree_lock);
    3763             : 
    3764             :         /*
    3765             :          * We do not check if found_offset is aligned to stripesize. The
    3766             :          * address is anyway rewritten when using zone append writing.
    3767             :          */
    3768             : 
    3769           0 :         ffe_ctl->search_start = ffe_ctl->found_offset;
    3770             : 
    3771           0 : out:
    3772           0 :         if (ret && ffe_ctl->for_treelog)
    3773           0 :                 fs_info->treelog_bg = 0;
    3774           0 :         if (ret && ffe_ctl->for_data_reloc &&
    3775           0 :             fs_info->data_reloc_bg == block_group->start) {
    3776             :                 /*
    3777             :                  * Do not allow further allocations from this block group.
    3778             :                  * Compared to increasing the ->ro, setting the
    3779             :                  * ->zoned_data_reloc_ongoing flag still allows nocow
    3780             :                  *  writers to come in. See btrfs_inc_nocow_writers().
    3781             :                  *
    3782             :                  * We need to disable an allocation to avoid an allocation of
    3783             :                  * regular (non-relocation data) extent. With mix of relocation
    3784             :                  * extents and regular extents, we can dispatch WRITE commands
    3785             :                  * (for relocation extents) and ZONE APPEND commands (for
    3786             :                  * regular extents) at the same time to the same zone, which
    3787             :                  * easily break the write pointer.
    3788             :                  */
    3789           0 :                 set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
    3790           0 :                 fs_info->data_reloc_bg = 0;
    3791             :         }
    3792           0 :         spin_unlock(&fs_info->relocation_bg_lock);
    3793           0 :         spin_unlock(&fs_info->treelog_bg_lock);
    3794           0 :         spin_unlock(&block_group->lock);
    3795           0 :         spin_unlock(&space_info->lock);
    3796           0 :         return ret;
    3797             : }
    3798             : 
    3799    16242004 : static int do_allocation(struct btrfs_block_group *block_group,
    3800             :                          struct find_free_extent_ctl *ffe_ctl,
    3801             :                          struct btrfs_block_group **bg_ret)
    3802             : {
    3803    16242004 :         switch (ffe_ctl->policy) {
    3804    16242004 :         case BTRFS_EXTENT_ALLOC_CLUSTERED:
    3805    16242004 :                 return do_allocation_clustered(block_group, ffe_ctl, bg_ret);
    3806           0 :         case BTRFS_EXTENT_ALLOC_ZONED:
    3807           0 :                 return do_allocation_zoned(block_group, ffe_ctl, bg_ret);
    3808           0 :         default:
    3809           0 :                 BUG();
    3810             :         }
    3811             : }
    3812             : 
    3813    11329727 : static void release_block_group(struct btrfs_block_group *block_group,
    3814             :                                 struct find_free_extent_ctl *ffe_ctl,
    3815             :                                 int delalloc)
    3816             : {
    3817    11329727 :         switch (ffe_ctl->policy) {
    3818    11329916 :         case BTRFS_EXTENT_ALLOC_CLUSTERED:
    3819    11329916 :                 ffe_ctl->retry_clustered = false;
    3820    11329916 :                 ffe_ctl->retry_unclustered = false;
    3821    11329916 :                 break;
    3822             :         case BTRFS_EXTENT_ALLOC_ZONED:
    3823             :                 /* Nothing to do */
    3824             :                 break;
    3825           0 :         default:
    3826           0 :                 BUG();
    3827             :         }
    3828             : 
    3829    11329727 :         BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
    3830             :                ffe_ctl->index);
    3831    11329727 :         btrfs_release_block_group(block_group, delalloc);
    3832    11330061 : }
    3833             : 
    3834    12355281 : static void found_extent_clustered(struct find_free_extent_ctl *ffe_ctl,
    3835             :                                    struct btrfs_key *ins)
    3836             : {
    3837    12355281 :         struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
    3838             : 
    3839    12355281 :         if (!ffe_ctl->use_cluster && last_ptr) {
    3840           0 :                 spin_lock(&last_ptr->lock);
    3841           0 :                 last_ptr->window_start = ins->objectid;
    3842           0 :                 spin_unlock(&last_ptr->lock);
    3843             :         }
    3844    12355281 : }
    3845             : 
    3846    12355304 : static void found_extent(struct find_free_extent_ctl *ffe_ctl,
    3847             :                          struct btrfs_key *ins)
    3848             : {
    3849    12355304 :         switch (ffe_ctl->policy) {
    3850    12355304 :         case BTRFS_EXTENT_ALLOC_CLUSTERED:
    3851    12355304 :                 found_extent_clustered(ffe_ctl, ins);
    3852    12355304 :                 break;
    3853             :         case BTRFS_EXTENT_ALLOC_ZONED:
    3854             :                 /* Nothing to do */
    3855             :                 break;
    3856           0 :         default:
    3857           0 :                 BUG();
    3858             :         }
    3859    12355263 : }
    3860             : 
    3861             : static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info,
    3862             :                                     struct find_free_extent_ctl *ffe_ctl)
    3863             : {
    3864             :         /* If we can activate new zone, just allocate a chunk and use it */
    3865             :         if (btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
    3866             :                 return 0;
    3867             : 
    3868             :         /*
    3869             :          * We already reached the max active zones. Try to finish one block
    3870             :          * group to make a room for a new block group. This is only possible
    3871             :          * for a data block group because btrfs_zone_finish() may need to wait
    3872             :          * for a running transaction which can cause a deadlock for metadata
    3873             :          * allocation.
    3874             :          */
    3875             :         if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
    3876             :                 int ret = btrfs_zone_finish_one_bg(fs_info);
    3877             : 
    3878             :                 if (ret == 1)
    3879             :                         return 0;
    3880             :                 else if (ret < 0)
    3881             :                         return ret;
    3882             :         }
    3883             : 
    3884             :         /*
    3885             :          * If we have enough free space left in an already active block group
    3886             :          * and we can't activate any other zone now, do not allow allocating a
    3887             :          * new chunk and let find_free_extent() retry with a smaller size.
    3888             :          */
    3889             :         if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size)
    3890             :                 return -ENOSPC;
    3891             : 
    3892             :         /*
    3893             :          * Even min_alloc_size is not left in any block groups. Since we cannot
    3894             :          * activate a new block group, allocating it may not help. Let's tell a
    3895             :          * caller to try again and hope it progress something by writing some
    3896             :          * parts of the region. That is only possible for data block groups,
    3897             :          * where a part of the region can be written.
    3898             :          */
    3899             :         if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA)
    3900             :                 return -EAGAIN;
    3901             : 
    3902             :         /*
    3903             :          * We cannot activate a new block group and no enough space left in any
    3904             :          * block groups. So, allocating a new block group may not help. But,
    3905             :          * there is nothing to do anyway, so let's go with it.
    3906             :          */
    3907             :         return 0;
    3908             : }
    3909             : 
    3910             : static int can_allocate_chunk(struct btrfs_fs_info *fs_info,
    3911             :                               struct find_free_extent_ctl *ffe_ctl)
    3912             : {
    3913       42389 :         switch (ffe_ctl->policy) {
    3914             :         case BTRFS_EXTENT_ALLOC_CLUSTERED:
    3915             :                 return 0;
    3916             :         case BTRFS_EXTENT_ALLOC_ZONED:
    3917             :                 return can_allocate_chunk_zoned(fs_info, ffe_ctl);
    3918           0 :         default:
    3919           0 :                 BUG();
    3920             :         }
    3921             : }
    3922             : 
    3923             : /*
    3924             :  * Return >0 means caller needs to re-search for free extent
    3925             :  * Return 0 means we have the needed free extent.
    3926             :  * Return <0 means we failed to locate any free extent.
    3927             :  */
    3928    14767068 : static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
    3929             :                                         struct btrfs_key *ins,
    3930             :                                         struct find_free_extent_ctl *ffe_ctl,
    3931             :                                         bool full_search)
    3932             : {
    3933    14767068 :         struct btrfs_root *root = fs_info->chunk_root;
    3934    14767068 :         int ret;
    3935             : 
    3936    14767068 :         if ((ffe_ctl->loop == LOOP_CACHING_NOWAIT) &&
    3937    13187071 :             ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
    3938        4179 :                 ffe_ctl->orig_have_caching_bg = true;
    3939             : 
    3940    14767068 :         if (ins->objectid) {
    3941    12355348 :                 found_extent(ffe_ctl, ins);
    3942    12355348 :                 return 0;
    3943             :         }
    3944             : 
    3945     2411720 :         if (ffe_ctl->loop >= LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg)
    3946             :                 return 1;
    3947             : 
    3948     1707841 :         ffe_ctl->index++;
    3949     1707841 :         if (ffe_ctl->index < BTRFS_NR_RAID_TYPES)
    3950             :                 return 1;
    3951             : 
    3952             :         /*
    3953             :          * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
    3954             :          *                      caching kthreads as we move along
    3955             :          * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
    3956             :          * LOOP_UNSET_SIZE_CLASS, allow unset size class
    3957             :          * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
    3958             :          * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
    3959             :          *                     again
    3960             :          */
    3961      189764 :         if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
    3962      189764 :                 ffe_ctl->index = 0;
    3963             :                 /*
    3964             :                  * We want to skip the LOOP_CACHING_WAIT step if we don't have
    3965             :                  * any uncached bgs and we've already done a full search
    3966             :                  * through.
    3967             :                  */
    3968      189764 :                 if (ffe_ctl->loop == LOOP_CACHING_NOWAIT &&
    3969      104843 :                     (!ffe_ctl->orig_have_caching_bg && full_search))
    3970        5066 :                         ffe_ctl->loop++;
    3971      189764 :                 ffe_ctl->loop++;
    3972             : 
    3973      189764 :                 if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) {
    3974       42389 :                         struct btrfs_trans_handle *trans;
    3975       42389 :                         int exist = 0;
    3976             : 
    3977             :                         /* Check if allocation policy allows to create a new chunk */
    3978       42389 :                         ret = can_allocate_chunk(fs_info, ffe_ctl);
    3979       42389 :                         if (ret)
    3980             :                                 return ret;
    3981             : 
    3982       42389 :                         trans = current->journal_info;
    3983       42389 :                         if (trans)
    3984             :                                 exist = 1;
    3985             :                         else
    3986       42175 :                                 trans = btrfs_join_transaction(root);
    3987             : 
    3988       42390 :                         if (IS_ERR(trans)) {
    3989           0 :                                 ret = PTR_ERR(trans);
    3990           0 :                                 return ret;
    3991             :                         }
    3992             : 
    3993       42390 :                         ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
    3994             :                                                 CHUNK_ALLOC_FORCE_FOR_EXTENT);
    3995             : 
    3996             :                         /* Do not bail out on ENOSPC since we can do more. */
    3997       42390 :                         if (ret == -ENOSPC) {
    3998       41805 :                                 ret = 0;
    3999       41805 :                                 ffe_ctl->loop++;
    4000             :                         }
    4001         585 :                         else if (ret < 0)
    4002           0 :                                 btrfs_abort_transaction(trans, ret);
    4003             :                         else
    4004             :                                 ret = 0;
    4005       42390 :                         if (!exist)
    4006       42175 :                                 btrfs_end_transaction(trans);
    4007       42388 :                         if (ret)
    4008             :                                 return ret;
    4009             :                 }
    4010             : 
    4011      189763 :                 if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) {
    4012        3947 :                         if (ffe_ctl->policy != BTRFS_EXTENT_ALLOC_CLUSTERED)
    4013             :                                 return -ENOSPC;
    4014             : 
    4015             :                         /*
    4016             :                          * Don't loop again if we already have no empty_size and
    4017             :                          * no empty_cluster.
    4018             :                          */
    4019        3947 :                         if (ffe_ctl->empty_size == 0 &&
    4020        3947 :                             ffe_ctl->empty_cluster == 0)
    4021             :                                 return -ENOSPC;
    4022           0 :                         ffe_ctl->empty_size = 0;
    4023           0 :                         ffe_ctl->empty_cluster = 0;
    4024             :                 }
    4025      185816 :                 return 1;
    4026             :         }
    4027             :         return -ENOSPC;
    4028             : }
    4029             : 
    4030    23687982 : static bool find_free_extent_check_size_class(struct find_free_extent_ctl *ffe_ctl,
    4031             :                                               struct btrfs_block_group *bg)
    4032             : {
    4033    23687982 :         if (ffe_ctl->policy == BTRFS_EXTENT_ALLOC_ZONED)
    4034             :                 return true;
    4035    23687982 :         if (!btrfs_block_group_should_use_size_class(bg))
    4036             :                 return true;
    4037    13503883 :         if (ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS)
    4038             :                 return true;
    4039    13246791 :         if (ffe_ctl->loop >= LOOP_UNSET_SIZE_CLASS &&
    4040      391110 :             bg->size_class == BTRFS_BG_SZ_NONE)
    4041             :                 return true;
    4042    13244458 :         return ffe_ctl->size_class == bg->size_class;
    4043             : }
    4044             : 
    4045    12364385 : static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
    4046             :                                         struct find_free_extent_ctl *ffe_ctl,
    4047             :                                         struct btrfs_space_info *space_info,
    4048             :                                         struct btrfs_key *ins)
    4049             : {
    4050             :         /*
    4051             :          * If our free space is heavily fragmented we may not be able to make
    4052             :          * big contiguous allocations, so instead of doing the expensive search
    4053             :          * for free space, simply return ENOSPC with our max_extent_size so we
    4054             :          * can go ahead and search for a more manageable chunk.
    4055             :          *
    4056             :          * If our max_extent_size is large enough for our allocation simply
    4057             :          * disable clustering since we will likely not be able to find enough
    4058             :          * space to create a cluster and induce latency trying.
    4059             :          */
    4060    12364385 :         if (space_info->max_extent_size) {
    4061       22511 :                 spin_lock(&space_info->lock);
    4062       22512 :                 if (space_info->max_extent_size &&
    4063       22510 :                     ffe_ctl->num_bytes > space_info->max_extent_size) {
    4064        5981 :                         ins->offset = space_info->max_extent_size;
    4065        5981 :                         spin_unlock(&space_info->lock);
    4066        5981 :                         return -ENOSPC;
    4067       16531 :                 } else if (space_info->max_extent_size) {
    4068       16529 :                         ffe_ctl->use_cluster = false;
    4069             :                 }
    4070       16531 :                 spin_unlock(&space_info->lock);
    4071             :         }
    4072             : 
    4073    12358405 :         ffe_ctl->last_ptr = fetch_cluster_info(fs_info, space_info,
    4074             :                                                &ffe_ctl->empty_cluster);
    4075    12358180 :         if (ffe_ctl->last_ptr) {
    4076     8512982 :                 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
    4077             : 
    4078     8512982 :                 spin_lock(&last_ptr->lock);
    4079     8513479 :                 if (last_ptr->block_group)
    4080     8510320 :                         ffe_ctl->hint_byte = last_ptr->window_start;
    4081     8513479 :                 if (last_ptr->fragmented) {
    4082             :                         /*
    4083             :                          * We still set window_start so we can keep track of the
    4084             :                          * last place we found an allocation to try and save
    4085             :                          * some time.
    4086             :                          */
    4087           0 :                         ffe_ctl->hint_byte = last_ptr->window_start;
    4088           0 :                         ffe_ctl->use_cluster = false;
    4089             :                 }
    4090     8513479 :                 spin_unlock(&last_ptr->lock);
    4091             :         }
    4092             : 
    4093             :         return 0;
    4094             : }
    4095             : 
    4096    12364443 : static int prepare_allocation(struct btrfs_fs_info *fs_info,
    4097             :                               struct find_free_extent_ctl *ffe_ctl,
    4098             :                               struct btrfs_space_info *space_info,
    4099             :                               struct btrfs_key *ins)
    4100             : {
    4101    12364443 :         switch (ffe_ctl->policy) {
    4102    12364443 :         case BTRFS_EXTENT_ALLOC_CLUSTERED:
    4103    12364443 :                 return prepare_allocation_clustered(fs_info, ffe_ctl,
    4104             :                                                     space_info, ins);
    4105           0 :         case BTRFS_EXTENT_ALLOC_ZONED:
    4106           0 :                 if (ffe_ctl->for_treelog) {
    4107           0 :                         spin_lock(&fs_info->treelog_bg_lock);
    4108           0 :                         if (fs_info->treelog_bg)
    4109           0 :                                 ffe_ctl->hint_byte = fs_info->treelog_bg;
    4110           0 :                         spin_unlock(&fs_info->treelog_bg_lock);
    4111             :                 }
    4112           0 :                 if (ffe_ctl->for_data_reloc) {
    4113           0 :                         spin_lock(&fs_info->relocation_bg_lock);
    4114           0 :                         if (fs_info->data_reloc_bg)
    4115           0 :                                 ffe_ctl->hint_byte = fs_info->data_reloc_bg;
    4116           0 :                         spin_unlock(&fs_info->relocation_bg_lock);
    4117             :                 }
    4118             :                 return 0;
    4119           0 :         default:
    4120           0 :                 BUG();
    4121             :         }
    4122             : }
    4123             : 
    4124             : /*
    4125             :  * walks the btree of allocated extents and find a hole of a given size.
    4126             :  * The key ins is changed to record the hole:
    4127             :  * ins->objectid == start position
    4128             :  * ins->flags = BTRFS_EXTENT_ITEM_KEY
    4129             :  * ins->offset == the size of the hole.
    4130             :  * Any available blocks before search_start are skipped.
    4131             :  *
    4132             :  * If there is no suitable free space, we will record the max size of
    4133             :  * the free space extent currently.
    4134             :  *
    4135             :  * The overall logic and call chain:
    4136             :  *
    4137             :  * find_free_extent()
    4138             :  * |- Iterate through all block groups
    4139             :  * |  |- Get a valid block group
    4140             :  * |  |- Try to do clustered allocation in that block group
    4141             :  * |  |- Try to do unclustered allocation in that block group
    4142             :  * |  |- Check if the result is valid
    4143             :  * |  |  |- If valid, then exit
    4144             :  * |  |- Jump to next block group
    4145             :  * |
    4146             :  * |- Push harder to find free extents
    4147             :  *    |- If not found, re-iterate all block groups
    4148             :  */
    4149    12365114 : static noinline int find_free_extent(struct btrfs_root *root,
    4150             :                                      struct btrfs_key *ins,
    4151             :                                      struct find_free_extent_ctl *ffe_ctl)
    4152             : {
    4153    12365114 :         struct btrfs_fs_info *fs_info = root->fs_info;
    4154    12365114 :         int ret = 0;
    4155    12365114 :         int cache_block_group_error = 0;
    4156    12365114 :         struct btrfs_block_group *block_group = NULL;
    4157    12365114 :         struct btrfs_space_info *space_info;
    4158    12365114 :         bool full_search = false;
    4159             : 
    4160    12365114 :         WARN_ON(ffe_ctl->num_bytes < fs_info->sectorsize);
    4161             : 
    4162    12365114 :         ffe_ctl->search_start = 0;
    4163             :         /* For clustered allocation */
    4164    12365114 :         ffe_ctl->empty_cluster = 0;
    4165    12365114 :         ffe_ctl->last_ptr = NULL;
    4166    12365114 :         ffe_ctl->use_cluster = true;
    4167    12365114 :         ffe_ctl->have_caching_bg = false;
    4168    12365114 :         ffe_ctl->orig_have_caching_bg = false;
    4169    12365114 :         ffe_ctl->index = btrfs_bg_flags_to_raid_index(ffe_ctl->flags);
    4170    12365114 :         ffe_ctl->loop = 0;
    4171             :         /* For clustered allocation */
    4172    12365114 :         ffe_ctl->retry_clustered = false;
    4173    12365114 :         ffe_ctl->retry_unclustered = false;
    4174    12365114 :         ffe_ctl->cached = 0;
    4175    12365114 :         ffe_ctl->max_extent_size = 0;
    4176    12365114 :         ffe_ctl->total_free_space = 0;
    4177    12365114 :         ffe_ctl->found_offset = 0;
    4178    12365114 :         ffe_ctl->policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
    4179    12365114 :         ffe_ctl->size_class = btrfs_calc_block_group_size_class(ffe_ctl->num_bytes);
    4180             : 
    4181    12364539 :         if (btrfs_is_zoned(fs_info))
    4182             :                 ffe_ctl->policy = BTRFS_EXTENT_ALLOC_ZONED;
    4183             : 
    4184    12364539 :         ins->type = BTRFS_EXTENT_ITEM_KEY;
    4185    12364539 :         ins->objectid = 0;
    4186    12364539 :         ins->offset = 0;
    4187             : 
    4188    12364539 :         trace_find_free_extent(root, ffe_ctl);
    4189             : 
    4190    12364422 :         space_info = btrfs_find_space_info(fs_info, ffe_ctl->flags);
    4191    12364469 :         if (!space_info) {
    4192           0 :                 btrfs_err(fs_info, "No space info for %llu", ffe_ctl->flags);
    4193           0 :                 return -ENOSPC;
    4194             :         }
    4195             : 
    4196    12364469 :         ret = prepare_allocation(fs_info, ffe_ctl, space_info, ins);
    4197    12364328 :         if (ret < 0)
    4198             :                 return ret;
    4199             : 
    4200    12358347 :         ffe_ctl->search_start = max(ffe_ctl->search_start,
    4201             :                                     first_logical_byte(fs_info));
    4202    12358932 :         ffe_ctl->search_start = max(ffe_ctl->search_start, ffe_ctl->hint_byte);
    4203    12358932 :         if (ffe_ctl->search_start == ffe_ctl->hint_byte) {
    4204    10623361 :                 block_group = btrfs_lookup_block_group(fs_info,
    4205             :                                                        ffe_ctl->search_start);
    4206             :                 /*
    4207             :                  * we don't want to use the block group if it doesn't match our
    4208             :                  * allocation bits, or if its not cached.
    4209             :                  *
    4210             :                  * However if we are re-searching with an ideal block group
    4211             :                  * picked out then we don't care that the block group is cached.
    4212             :                  */
    4213    10623027 :                 if (block_group && block_group_bits(block_group, ffe_ctl->flags) &&
    4214    10622293 :                     block_group->cached != BTRFS_CACHE_NO) {
    4215    10622236 :                         down_read(&space_info->groups_sem);
    4216    10622266 :                         if (list_empty(&block_group->list) ||
    4217    10622266 :                             block_group->ro) {
    4218             :                                 /*
    4219             :                                  * someone is removing this block group,
    4220             :                                  * we can't jump into the have_block_group
    4221             :                                  * target because our list pointers are not
    4222             :                                  * valid
    4223             :                                  */
    4224         788 :                                 btrfs_put_block_group(block_group);
    4225         616 :                                 up_read(&space_info->groups_sem);
    4226             :                         } else {
    4227    10621478 :                                 ffe_ctl->index = btrfs_bg_flags_to_raid_index(
    4228             :                                                         block_group->flags);
    4229    10621478 :                                 btrfs_lock_block_group(block_group,
    4230             :                                                        ffe_ctl->delalloc);
    4231    10621518 :                                 ffe_ctl->hinted = true;
    4232    10621518 :                                 goto have_block_group;
    4233             :                         }
    4234         791 :                 } else if (block_group) {
    4235         403 :                         btrfs_put_block_group(block_group);
    4236             :                 }
    4237             :         }
    4238     1735959 : search:
    4239     4144734 :         trace_find_free_extent_search_loop(root, ffe_ctl);
    4240     4144770 :         ffe_ctl->have_caching_bg = false;
    4241     4144770 :         if (ffe_ctl->index == btrfs_bg_flags_to_raid_index(ffe_ctl->flags) ||
    4242             :             ffe_ctl->index == 0)
    4243     2630715 :                 full_search = true;
    4244     4144770 :         down_read(&space_info->groups_sem);
    4245    15482121 :         list_for_each_entry(block_group,
    4246             :                             &space_info->block_groups[ffe_ctl->index], list) {
    4247    13070376 :                 struct btrfs_block_group *bg_ret;
    4248             : 
    4249    13070376 :                 ffe_ctl->hinted = false;
    4250             :                 /* If the block group is read-only, we can skip it entirely. */
    4251    13070376 :                 if (unlikely(block_group->ro)) {
    4252        7476 :                         if (ffe_ctl->for_treelog)
    4253             :                                 btrfs_clear_treelog_bg(block_group);
    4254        7476 :                         if (ffe_ctl->for_data_reloc)
    4255             :                                 btrfs_clear_data_reloc_bg(block_group);
    4256        7476 :                         continue;
    4257             :                 }
    4258             : 
    4259    13062900 :                 btrfs_grab_block_group(block_group, ffe_ctl->delalloc);
    4260    13062579 :                 ffe_ctl->search_start = block_group->start;
    4261             : 
    4262             :                 /*
    4263             :                  * this can happen if we end up cycling through all the
    4264             :                  * raid types, but we want to make sure we only allocate
    4265             :                  * for the proper type.
    4266             :                  */
    4267    13062579 :                 if (!block_group_bits(block_group, ffe_ctl->flags)) {
    4268           0 :                         u64 extra = BTRFS_BLOCK_GROUP_DUP |
    4269             :                                 BTRFS_BLOCK_GROUP_RAID1_MASK |
    4270             :                                 BTRFS_BLOCK_GROUP_RAID56_MASK |
    4271             :                                 BTRFS_BLOCK_GROUP_RAID10;
    4272             : 
    4273             :                         /*
    4274             :                          * if they asked for extra copies and this block group
    4275             :                          * doesn't provide them, bail.  This does allow us to
    4276             :                          * fill raid0 from raid1.
    4277             :                          */
    4278           0 :                         if ((ffe_ctl->flags & extra) && !(block_group->flags & extra))
    4279           0 :                                 goto loop;
    4280             : 
    4281             :                         /*
    4282             :                          * This block group has different flags than we want.
    4283             :                          * It's possible that we have MIXED_GROUP flag but no
    4284             :                          * block group is mixed.  Just skip such block group.
    4285             :                          */
    4286           0 :                         btrfs_release_block_group(block_group, ffe_ctl->delalloc);
    4287           0 :                         continue;
    4288             :                 }
    4289             : 
    4290    23684097 : have_block_group:
    4291    23687092 :                 trace_find_free_extent_have_block_group(root, ffe_ctl, block_group);
    4292    23685966 :                 ffe_ctl->cached = btrfs_block_group_done(block_group);
    4293    23688180 :                 if (unlikely(!ffe_ctl->cached)) {
    4294     1515640 :                         ffe_ctl->have_caching_bg = true;
    4295     1515640 :                         ret = btrfs_cache_block_group(block_group, false);
    4296             : 
    4297             :                         /*
    4298             :                          * If we get ENOMEM here or something else we want to
    4299             :                          * try other block groups, because it may not be fatal.
    4300             :                          * However if we can't find anything else we need to
    4301             :                          * save our return here so that we return the actual
    4302             :                          * error that caused problems, not ENOSPC.
    4303             :                          */
    4304     1515636 :                         if (ret < 0) {
    4305           0 :                                 if (!cache_block_group_error)
    4306           0 :                                         cache_block_group_error = ret;
    4307           0 :                                 ret = 0;
    4308           0 :                                 goto loop;
    4309             :                         }
    4310             :                         ret = 0;
    4311             :                 }
    4312             : 
    4313    23688176 :                 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
    4314           0 :                         goto loop;
    4315             : 
    4316    23688176 :                 if (!find_free_extent_check_size_class(ffe_ctl, block_group))
    4317     7445433 :                         goto loop;
    4318             : 
    4319    16242122 :                 bg_ret = NULL;
    4320    16242122 :                 ret = do_allocation(block_group, ffe_ctl, &bg_ret);
    4321    16243075 :                 if (ret == 0) {
    4322    12355430 :                         if (bg_ret && bg_ret != block_group) {
    4323           0 :                                 btrfs_release_block_group(block_group,
    4324             :                                                           ffe_ctl->delalloc);
    4325           0 :                                 block_group = bg_ret;
    4326             :                         }
    4327     3887645 :                 } else if (ret == -EAGAIN) {
    4328        2995 :                         goto have_block_group;
    4329     3884650 :                 } else if (ret > 0) {
    4330     3884634 :                         goto loop;
    4331             :                 }
    4332             : 
    4333             :                 /* Checks */
    4334    12355446 :                 ffe_ctl->search_start = round_up(ffe_ctl->found_offset,
    4335             :                                                  fs_info->stripesize);
    4336             : 
    4337             :                 /* move on to the next group */
    4338    12355446 :                 if (ffe_ctl->search_start + ffe_ctl->num_bytes >
    4339    12355446 :                     block_group->start + block_group->length) {
    4340           0 :                         btrfs_add_free_space_unused(block_group,
    4341             :                                             ffe_ctl->found_offset,
    4342             :                                             ffe_ctl->num_bytes);
    4343           0 :                         goto loop;
    4344             :                 }
    4345             : 
    4346    12355446 :                 if (ffe_ctl->found_offset < ffe_ctl->search_start)
    4347           0 :                         btrfs_add_free_space_unused(block_group,
    4348             :                                         ffe_ctl->found_offset,
    4349             :                                         ffe_ctl->search_start - ffe_ctl->found_offset);
    4350             : 
    4351    12355446 :                 ret = btrfs_add_reserved_bytes(block_group, ffe_ctl->ram_bytes,
    4352             :                                                ffe_ctl->num_bytes,
    4353             :                                                ffe_ctl->delalloc,
    4354    12355446 :                                                ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS);
    4355    12355409 :                 if (ret == -EAGAIN) {
    4356           0 :                         btrfs_add_free_space_unused(block_group,
    4357             :                                         ffe_ctl->found_offset,
    4358             :                                         ffe_ctl->num_bytes);
    4359           0 :                         goto loop;
    4360             :                 }
    4361    12355409 :                 btrfs_inc_block_group_reservations(block_group);
    4362             : 
    4363             :                 /* we are all good, lets return */
    4364    12355470 :                 ins->objectid = ffe_ctl->search_start;
    4365    12355470 :                 ins->offset = ffe_ctl->num_bytes;
    4366             : 
    4367    12355470 :                 trace_btrfs_reserve_extent(block_group, ffe_ctl);
    4368    12355449 :                 btrfs_release_block_group(block_group, ffe_ctl->delalloc);
    4369    12355415 :                 break;
    4370    11330067 : loop:
    4371    11330067 :                 release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
    4372    11329967 :                 cond_resched();
    4373             :         }
    4374    14767160 :         up_read(&space_info->groups_sem);
    4375             : 
    4376    14767060 :         ret = find_free_extent_update_loop(fs_info, ins, ffe_ctl, full_search);
    4377    14766954 :         if (ret > 0)
    4378     2407756 :                 goto search;
    4379             : 
    4380    12359198 :         if (ret == -ENOSPC && !cache_block_group_error) {
    4381             :                 /*
    4382             :                  * Use ffe_ctl->total_free_space as fallback if we can't find
    4383             :                  * any contiguous hole.
    4384             :                  */
    4385        3947 :                 if (!ffe_ctl->max_extent_size)
    4386        1539 :                         ffe_ctl->max_extent_size = ffe_ctl->total_free_space;
    4387        3947 :                 spin_lock(&space_info->lock);
    4388        3947 :                 space_info->max_extent_size = ffe_ctl->max_extent_size;
    4389        3947 :                 spin_unlock(&space_info->lock);
    4390        3947 :                 ins->offset = ffe_ctl->max_extent_size;
    4391    12355251 :         } else if (ret == -ENOSPC) {
    4392           0 :                 ret = cache_block_group_error;
    4393             :         }
    4394             :         return ret;
    4395             : }
    4396             : 
    4397             : /*
    4398             :  * btrfs_reserve_extent - entry point to the extent allocator. Tries to find a
    4399             :  *                        hole that is at least as big as @num_bytes.
    4400             :  *
    4401             :  * @root           -    The root that will contain this extent
    4402             :  *
    4403             :  * @ram_bytes      -    The amount of space in ram that @num_bytes take. This
    4404             :  *                      is used for accounting purposes. This value differs
    4405             :  *                      from @num_bytes only in the case of compressed extents.
    4406             :  *
    4407             :  * @num_bytes      -    Number of bytes to allocate on-disk.
    4408             :  *
    4409             :  * @min_alloc_size -    Indicates the minimum amount of space that the
    4410             :  *                      allocator should try to satisfy. In some cases
    4411             :  *                      @num_bytes may be larger than what is required and if
    4412             :  *                      the filesystem is fragmented then allocation fails.
    4413             :  *                      However, the presence of @min_alloc_size gives a
    4414             :  *                      chance to try and satisfy the smaller allocation.
    4415             :  *
    4416             :  * @empty_size     -    A hint that you plan on doing more COW. This is the
    4417             :  *                      size in bytes the allocator should try to find free
    4418             :  *                      next to the block it returns.  This is just a hint and
    4419             :  *                      may be ignored by the allocator.
    4420             :  *
    4421             :  * @hint_byte      -    Hint to the allocator to start searching above the byte
    4422             :  *                      address passed. It might be ignored.
    4423             :  *
    4424             :  * @ins            -    This key is modified to record the found hole. It will
    4425             :  *                      have the following values:
    4426             :  *                      ins->objectid == start position
    4427             :  *                      ins->flags = BTRFS_EXTENT_ITEM_KEY
    4428             :  *                      ins->offset == the size of the hole.
    4429             :  *
    4430             :  * @is_data        -    Boolean flag indicating whether an extent is
    4431             :  *                      allocated for data (true) or metadata (false)
    4432             :  *
    4433             :  * @delalloc       -    Boolean flag indicating whether this allocation is for
    4434             :  *                      delalloc or not. If 'true' data_rwsem of block groups
    4435             :  *                      is going to be acquired.
    4436             :  *
    4437             :  *
    4438             :  * Returns 0 when an allocation succeeded or < 0 when an error occurred. In
    4439             :  * case -ENOSPC is returned then @ins->offset will contain the size of the
    4440             :  * largest available hole the allocator managed to find.
    4441             :  */
    4442    12355062 : int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
    4443             :                          u64 num_bytes, u64 min_alloc_size,
    4444             :                          u64 empty_size, u64 hint_byte,
    4445             :                          struct btrfs_key *ins, int is_data, int delalloc)
    4446             : {
    4447    12355062 :         struct btrfs_fs_info *fs_info = root->fs_info;
    4448    12355062 :         struct find_free_extent_ctl ffe_ctl = {};
    4449    12355062 :         bool final_tried = num_bytes == min_alloc_size;
    4450    12355062 :         u64 flags;
    4451    12355062 :         int ret;
    4452    12355062 :         bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
    4453    12355062 :         bool for_data_reloc = (btrfs_is_data_reloc_root(root) && is_data);
    4454             : 
    4455    12355062 :         flags = get_alloc_profile_by_root(root, is_data);
    4456    12365242 : again:
    4457    12365242 :         WARN_ON(num_bytes < fs_info->sectorsize);
    4458             : 
    4459    12365242 :         ffe_ctl.ram_bytes = ram_bytes;
    4460    12365242 :         ffe_ctl.num_bytes = num_bytes;
    4461    12365242 :         ffe_ctl.min_alloc_size = min_alloc_size;
    4462    12365242 :         ffe_ctl.empty_size = empty_size;
    4463    12365242 :         ffe_ctl.flags = flags;
    4464    12365242 :         ffe_ctl.delalloc = delalloc;
    4465    12365242 :         ffe_ctl.hint_byte = hint_byte;
    4466    12365242 :         ffe_ctl.for_treelog = for_treelog;
    4467    12365242 :         ffe_ctl.for_data_reloc = for_data_reloc;
    4468             : 
    4469    12365242 :         ret = find_free_extent(root, ins, &ffe_ctl);
    4470    12365188 :         if (!ret && !is_data) {
    4471     8795280 :                 btrfs_dec_block_group_reservations(fs_info, ins->objectid);
    4472     3569908 :         } else if (ret == -ENOSPC) {
    4473        9928 :                 if (!final_tried && ins->offset) {
    4474        9928 :                         num_bytes = min(num_bytes >> 1, ins->offset);
    4475        9928 :                         num_bytes = round_down(num_bytes,
    4476             :                                                fs_info->sectorsize);
    4477        9928 :                         num_bytes = max(num_bytes, min_alloc_size);
    4478        9928 :                         ram_bytes = num_bytes;
    4479        9928 :                         if (num_bytes == min_alloc_size)
    4480        1338 :                                 final_tried = true;
    4481        9928 :                         goto again;
    4482           0 :                 } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
    4483           0 :                         struct btrfs_space_info *sinfo;
    4484             : 
    4485           0 :                         sinfo = btrfs_find_space_info(fs_info, flags);
    4486           0 :                         btrfs_err(fs_info,
    4487             :         "allocation failed flags %llu, wanted %llu tree-log %d, relocation: %d",
    4488             :                                   flags, num_bytes, for_treelog, for_data_reloc);
    4489           0 :                         if (sinfo)
    4490           0 :                                 btrfs_dump_space_info(fs_info, sinfo,
    4491             :                                                       num_bytes, 1);
    4492             :                 }
    4493             :         }
    4494             : 
    4495    12355330 :         return ret;
    4496             : }
    4497             : 
    4498         174 : int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
    4499             :                                u64 start, u64 len, int delalloc)
    4500             : {
    4501         174 :         struct btrfs_block_group *cache;
    4502             : 
    4503         174 :         cache = btrfs_lookup_block_group(fs_info, start);
    4504         174 :         if (!cache) {
    4505           0 :                 btrfs_err(fs_info, "Unable to find block group for %llu",
    4506             :                           start);
    4507           0 :                 return -ENOSPC;
    4508             :         }
    4509             : 
    4510         174 :         btrfs_add_free_space(cache, start, len);
    4511         174 :         btrfs_free_reserved_bytes(cache, len, delalloc);
    4512         174 :         trace_btrfs_reserved_extent_free(fs_info, start, len);
    4513             : 
    4514         174 :         btrfs_put_block_group(cache);
    4515         174 :         return 0;
    4516             : }
    4517             : 
    4518       23052 : int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
    4519             :                               u64 len)
    4520             : {
    4521       23052 :         struct btrfs_block_group *cache;
    4522       23052 :         int ret = 0;
    4523             : 
    4524       23052 :         cache = btrfs_lookup_block_group(trans->fs_info, start);
    4525       23052 :         if (!cache) {
    4526           0 :                 btrfs_err(trans->fs_info, "unable to find block group for %llu",
    4527             :                           start);
    4528           0 :                 return -ENOSPC;
    4529             :         }
    4530             : 
    4531       23052 :         ret = pin_down_extent(trans, cache, start, len, 1);
    4532       23052 :         btrfs_put_block_group(cache);
    4533       23052 :         return ret;
    4534             : }
    4535             : 
    4536     9327087 : static int alloc_reserved_extent(struct btrfs_trans_handle *trans, u64 bytenr,
    4537             :                                  u64 num_bytes)
    4538             : {
    4539     9327087 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    4540     9327087 :         int ret;
    4541             : 
    4542     9327087 :         ret = remove_from_free_space_tree(trans, bytenr, num_bytes);
    4543     9327087 :         if (ret)
    4544             :                 return ret;
    4545             : 
    4546     9327087 :         ret = btrfs_update_block_group(trans, bytenr, num_bytes, true);
    4547     9327084 :         if (ret) {
    4548           0 :                 ASSERT(!ret);
    4549           0 :                 btrfs_err(fs_info, "update block group failed for %llu %llu",
    4550             :                           bytenr, num_bytes);
    4551           0 :                 return ret;
    4552             :         }
    4553             : 
    4554     9327084 :         trace_btrfs_reserved_extent_alloc(fs_info, bytenr, num_bytes);
    4555     9327084 :         return 0;
    4556             : }
    4557             : 
    4558     1995870 : static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
    4559             :                                       u64 parent, u64 root_objectid,
    4560             :                                       u64 flags, u64 owner, u64 offset,
    4561             :                                       struct btrfs_key *ins, int ref_mod)
    4562             : {
    4563     1995870 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    4564     1995870 :         struct btrfs_root *extent_root;
    4565     1995870 :         int ret;
    4566     1995870 :         struct btrfs_extent_item *extent_item;
    4567     1995870 :         struct btrfs_extent_inline_ref *iref;
    4568     1995870 :         struct btrfs_path *path;
    4569     1995870 :         struct extent_buffer *leaf;
    4570     1995870 :         int type;
    4571     1995870 :         u32 size;
    4572             : 
    4573     1995870 :         if (parent > 0)
    4574             :                 type = BTRFS_SHARED_DATA_REF_KEY;
    4575             :         else
    4576     1995870 :                 type = BTRFS_EXTENT_DATA_REF_KEY;
    4577             : 
    4578     1995870 :         size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
    4579             : 
    4580     1995870 :         path = btrfs_alloc_path();
    4581     1995866 :         if (!path)
    4582             :                 return -ENOMEM;
    4583             : 
    4584     1995866 :         extent_root = btrfs_extent_root(fs_info, ins->objectid);
    4585     1995868 :         ret = btrfs_insert_empty_item(trans, extent_root, path, ins, size);
    4586     1995872 :         if (ret) {
    4587           1 :                 btrfs_free_path(path);
    4588           1 :                 return ret;
    4589             :         }
    4590             : 
    4591     1995871 :         leaf = path->nodes[0];
    4592     1995871 :         extent_item = btrfs_item_ptr(leaf, path->slots[0],
    4593             :                                      struct btrfs_extent_item);
    4594     1995871 :         btrfs_set_extent_refs(leaf, extent_item, ref_mod);
    4595     1995870 :         btrfs_set_extent_generation(leaf, extent_item, trans->transid);
    4596     1995871 :         btrfs_set_extent_flags(leaf, extent_item,
    4597             :                                flags | BTRFS_EXTENT_FLAG_DATA);
    4598             : 
    4599     1995871 :         iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
    4600     1995871 :         btrfs_set_extent_inline_ref_type(leaf, iref, type);
    4601     1995871 :         if (parent > 0) {
    4602           0 :                 struct btrfs_shared_data_ref *ref;
    4603           0 :                 ref = (struct btrfs_shared_data_ref *)(iref + 1);
    4604           0 :                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
    4605           0 :                 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
    4606             :         } else {
    4607     1995871 :                 struct btrfs_extent_data_ref *ref;
    4608     1995871 :                 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
    4609     1995871 :                 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
    4610     1995870 :                 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
    4611     1995871 :                 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
    4612     1995870 :                 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
    4613             :         }
    4614             : 
    4615     1995871 :         btrfs_mark_buffer_dirty(path->nodes[0]);
    4616     1995871 :         btrfs_free_path(path);
    4617             : 
    4618     1995871 :         return alloc_reserved_extent(trans, ins->objectid, ins->offset);
    4619             : }
    4620             : 
    4621     7331147 : static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
    4622             :                                      struct btrfs_delayed_ref_node *node,
    4623             :                                      struct btrfs_delayed_extent_op *extent_op)
    4624             : {
    4625     7331147 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    4626     7331147 :         struct btrfs_root *extent_root;
    4627     7331147 :         int ret;
    4628     7331147 :         struct btrfs_extent_item *extent_item;
    4629     7331147 :         struct btrfs_key extent_key;
    4630     7331147 :         struct btrfs_tree_block_info *block_info;
    4631     7331147 :         struct btrfs_extent_inline_ref *iref;
    4632     7331147 :         struct btrfs_path *path;
    4633     7331147 :         struct extent_buffer *leaf;
    4634     7331147 :         struct btrfs_delayed_tree_ref *ref;
    4635     7331147 :         u32 size = sizeof(*extent_item) + sizeof(*iref);
    4636     7331147 :         u64 flags = extent_op->flags_to_set;
    4637     7331147 :         bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
    4638             : 
    4639     7331147 :         ref = btrfs_delayed_node_to_tree_ref(node);
    4640             : 
    4641     7331147 :         extent_key.objectid = node->bytenr;
    4642     7331147 :         if (skinny_metadata) {
    4643     7331147 :                 extent_key.offset = ref->level;
    4644     7331147 :                 extent_key.type = BTRFS_METADATA_ITEM_KEY;
    4645             :         } else {
    4646           0 :                 extent_key.offset = node->num_bytes;
    4647           0 :                 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
    4648           0 :                 size += sizeof(*block_info);
    4649             :         }
    4650             : 
    4651     7331147 :         path = btrfs_alloc_path();
    4652     7331171 :         if (!path)
    4653             :                 return -ENOMEM;
    4654             : 
    4655     7331171 :         extent_root = btrfs_extent_root(fs_info, extent_key.objectid);
    4656     7331121 :         ret = btrfs_insert_empty_item(trans, extent_root, path, &extent_key,
    4657             :                                       size);
    4658     7331216 :         if (ret) {
    4659           0 :                 btrfs_free_path(path);
    4660           0 :                 return ret;
    4661             :         }
    4662             : 
    4663     7331216 :         leaf = path->nodes[0];
    4664     7331216 :         extent_item = btrfs_item_ptr(leaf, path->slots[0],
    4665             :                                      struct btrfs_extent_item);
    4666     7331216 :         btrfs_set_extent_refs(leaf, extent_item, 1);
    4667     7331216 :         btrfs_set_extent_generation(leaf, extent_item, trans->transid);
    4668     7331216 :         btrfs_set_extent_flags(leaf, extent_item,
    4669             :                                flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
    4670             : 
    4671     7331216 :         if (skinny_metadata) {
    4672     7331216 :                 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
    4673             :         } else {
    4674           0 :                 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
    4675           0 :                 btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
    4676           0 :                 btrfs_set_tree_block_level(leaf, block_info, ref->level);
    4677           0 :                 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
    4678             :         }
    4679             : 
    4680     7331216 :         if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
    4681     1854913 :                 btrfs_set_extent_inline_ref_type(leaf, iref,
    4682             :                                                  BTRFS_SHARED_BLOCK_REF_KEY);
    4683     1854913 :                 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent);
    4684             :         } else {
    4685     5476303 :                 btrfs_set_extent_inline_ref_type(leaf, iref,
    4686             :                                                  BTRFS_TREE_BLOCK_REF_KEY);
    4687     5476303 :                 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
    4688             :         }
    4689             : 
    4690     7331216 :         btrfs_mark_buffer_dirty(leaf);
    4691     7331216 :         btrfs_free_path(path);
    4692             : 
    4693     7331216 :         return alloc_reserved_extent(trans, node->bytenr, fs_info->nodesize);
    4694             : }
    4695             : 
    4696     3560088 : int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
    4697             :                                      struct btrfs_root *root, u64 owner,
    4698             :                                      u64 offset, u64 ram_bytes,
    4699             :                                      struct btrfs_key *ins)
    4700             : {
    4701     3560088 :         struct btrfs_ref generic_ref = { 0 };
    4702             : 
    4703     3560088 :         BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
    4704             : 
    4705     3560088 :         btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
    4706             :                                ins->objectid, ins->offset, 0);
    4707     3560088 :         btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner,
    4708             :                             offset, 0, false);
    4709     3560088 :         btrfs_ref_tree_mod(root->fs_info, &generic_ref);
    4710             : 
    4711     3560088 :         return btrfs_add_delayed_data_ref(trans, &generic_ref, ram_bytes);
    4712             : }
    4713             : 
    4714             : /*
    4715             :  * this is used by the tree logging recovery code.  It records that
    4716             :  * an extent has been allocated and makes sure to clear the free
    4717             :  * space cache bits as well
    4718             :  */
    4719       76673 : int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
    4720             :                                    u64 root_objectid, u64 owner, u64 offset,
    4721             :                                    struct btrfs_key *ins)
    4722             : {
    4723       76673 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    4724       76673 :         int ret;
    4725       76673 :         struct btrfs_block_group *block_group;
    4726       76673 :         struct btrfs_space_info *space_info;
    4727             : 
    4728             :         /*
    4729             :          * Mixed block groups will exclude before processing the log so we only
    4730             :          * need to do the exclude dance if this fs isn't mixed.
    4731             :          */
    4732       76673 :         if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
    4733       76673 :                 ret = __exclude_logged_extent(fs_info, ins->objectid,
    4734             :                                               ins->offset);
    4735       76673 :                 if (ret)
    4736             :                         return ret;
    4737             :         }
    4738             : 
    4739       76673 :         block_group = btrfs_lookup_block_group(fs_info, ins->objectid);
    4740       76673 :         if (!block_group)
    4741             :                 return -EINVAL;
    4742             : 
    4743       76673 :         space_info = block_group->space_info;
    4744       76673 :         spin_lock(&space_info->lock);
    4745       76673 :         spin_lock(&block_group->lock);
    4746       76673 :         space_info->bytes_reserved += ins->offset;
    4747       76673 :         block_group->reserved += ins->offset;
    4748       76673 :         spin_unlock(&block_group->lock);
    4749       76673 :         spin_unlock(&space_info->lock);
    4750             : 
    4751       76673 :         ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
    4752             :                                          offset, ins, 1);
    4753       76673 :         if (ret)
    4754           0 :                 btrfs_pin_extent(trans, ins->objectid, ins->offset, 1);
    4755       76673 :         btrfs_put_block_group(block_group);
    4756       76673 :         return ret;
    4757             : }
    4758             : 
    4759             : static struct extent_buffer *
    4760     8795353 : btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
    4761             :                       u64 bytenr, int level, u64 owner,
    4762             :                       enum btrfs_lock_nesting nest)
    4763             : {
    4764     8795353 :         struct btrfs_fs_info *fs_info = root->fs_info;
    4765     8795353 :         struct extent_buffer *buf;
    4766     8795353 :         u64 lockdep_owner = owner;
    4767             : 
    4768     8795353 :         buf = btrfs_find_create_tree_block(fs_info, bytenr, owner, level);
    4769     8795287 :         if (IS_ERR(buf))
    4770             :                 return buf;
    4771             : 
    4772             :         /*
    4773             :          * Extra safety check in case the extent tree is corrupted and extent
    4774             :          * allocator chooses to use a tree block which is already used and
    4775             :          * locked.
    4776             :          */
    4777     8795287 :         if (buf->lock_owner == current->pid) {
    4778           0 :                 btrfs_err_rl(fs_info,
    4779             : "tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
    4780             :                         buf->start, btrfs_header_owner(buf), current->pid);
    4781           0 :                 free_extent_buffer(buf);
    4782           0 :                 return ERR_PTR(-EUCLEAN);
    4783             :         }
    4784             : 
    4785             :         /*
    4786             :          * The reloc trees are just snapshots, so we need them to appear to be
    4787             :          * just like any other fs tree WRT lockdep.
    4788             :          *
    4789             :          * The exception however is in replace_path() in relocation, where we
    4790             :          * hold the lock on the original fs root and then search for the reloc
    4791             :          * root.  At that point we need to make sure any reloc root buffers are
    4792             :          * set to the BTRFS_TREE_RELOC_OBJECTID lockdep class in order to make
    4793             :          * lockdep happy.
    4794             :          */
    4795     8795287 :         if (lockdep_owner == BTRFS_TREE_RELOC_OBJECTID &&
    4796     1854913 :             !test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
    4797             :                 lockdep_owner = BTRFS_FS_TREE_OBJECTID;
    4798             : 
    4799             :         /* btrfs_clear_buffer_dirty() accesses generation field. */
    4800     8795287 :         btrfs_set_header_generation(buf, trans->transid);
    4801             : 
    4802             :         /*
    4803             :          * This needs to stay, because we could allocate a freed block from an
    4804             :          * old tree into a new tree, so we need to make sure this new block is
    4805             :          * set to the appropriate level and owner.
    4806             :          */
    4807     8795287 :         btrfs_set_buffer_lockdep_class(lockdep_owner, buf, level);
    4808             : 
    4809     8795287 :         __btrfs_tree_lock(buf, nest);
    4810     8794619 :         btrfs_clear_buffer_dirty(trans, buf);
    4811     8795094 :         clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
    4812     8795266 :         clear_bit(EXTENT_BUFFER_NO_CHECK, &buf->bflags);
    4813             : 
    4814     8795160 :         set_extent_buffer_uptodate(buf);
    4815             : 
    4816     8795086 :         memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
    4817     8793684 :         btrfs_set_header_level(buf, level);
    4818     8793684 :         btrfs_set_header_bytenr(buf, buf->start);
    4819     8793684 :         btrfs_set_header_generation(buf, trans->transid);
    4820     8793684 :         btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
    4821     8793930 :         btrfs_set_header_owner(buf, owner);
    4822     8793930 :         write_extent_buffer_fsid(buf, fs_info->fs_devices->metadata_uuid);
    4823     8794508 :         write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
    4824     8794418 :         if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
    4825     1093045 :                 buf->log_index = root->log_transid % 2;
    4826             :                 /*
    4827             :                  * we allow two log transactions at a time, use different
    4828             :                  * EXTENT bit to differentiate dirty pages.
    4829             :                  */
    4830     1093045 :                 if (buf->log_index == 0)
    4831      555296 :                         set_extent_bit(&root->dirty_log_pages, buf->start,
    4832      555296 :                                        buf->start + buf->len - 1,
    4833             :                                        EXTENT_DIRTY, NULL);
    4834             :                 else
    4835      537749 :                         set_extent_bit(&root->dirty_log_pages, buf->start,
    4836      537749 :                                        buf->start + buf->len - 1,
    4837             :                                        EXTENT_NEW, NULL);
    4838             :         } else {
    4839     7701373 :                 buf->log_index = -1;
    4840     7701373 :                 set_extent_bit(&trans->transaction->dirty_pages, buf->start,
    4841     7701373 :                                buf->start + buf->len - 1, EXTENT_DIRTY, NULL);
    4842             :         }
    4843             :         /* this returns a buffer locked for blocking */
    4844             :         return buf;
    4845             : }
    4846             : 
    4847             : /*
    4848             :  * finds a free extent and does all the dirty work required for allocation
    4849             :  * returns the tree buffer or an ERR_PTR on error.
    4850             :  */
    4851     8797528 : struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
    4852             :                                              struct btrfs_root *root,
    4853             :                                              u64 parent, u64 root_objectid,
    4854             :                                              const struct btrfs_disk_key *key,
    4855             :                                              int level, u64 hint,
    4856             :                                              u64 empty_size,
    4857             :                                              enum btrfs_lock_nesting nest)
    4858             : {
    4859     8797528 :         struct btrfs_fs_info *fs_info = root->fs_info;
    4860     8797528 :         struct btrfs_key ins;
    4861     8797528 :         struct btrfs_block_rsv *block_rsv;
    4862     8797528 :         struct extent_buffer *buf;
    4863     8797528 :         struct btrfs_delayed_extent_op *extent_op;
    4864     8797528 :         struct btrfs_ref generic_ref = { 0 };
    4865     8797528 :         u64 flags = 0;
    4866     8797528 :         int ret;
    4867     8797528 :         u32 blocksize = fs_info->nodesize;
    4868     8797528 :         bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
    4869             : 
    4870             : #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
    4871             :         if (btrfs_is_testing(fs_info)) {
    4872             :                 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
    4873             :                                             level, root_objectid, nest);
    4874             :                 if (!IS_ERR(buf))
    4875             :                         root->alloc_bytenr += blocksize;
    4876             :                 return buf;
    4877             :         }
    4878             : #endif
    4879             : 
    4880     8797528 :         block_rsv = btrfs_use_block_rsv(trans, root, blocksize);
    4881     8797496 :         if (IS_ERR(block_rsv))
    4882             :                 return ERR_CAST(block_rsv);
    4883             : 
    4884     8795239 :         ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
    4885             :                                    empty_size, hint, &ins, 0, 0);
    4886     8795356 :         if (ret)
    4887           0 :                 goto out_unuse;
    4888             : 
    4889     8795356 :         buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
    4890             :                                     root_objectid, nest);
    4891     8795387 :         if (IS_ERR(buf)) {
    4892           0 :                 ret = PTR_ERR(buf);
    4893           0 :                 goto out_free_reserved;
    4894             :         }
    4895             : 
    4896     8795387 :         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
    4897     1854913 :                 if (parent == 0)
    4898        2165 :                         parent = ins.objectid;
    4899             :                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
    4900             :         } else
    4901     6940474 :                 BUG_ON(parent > 0);
    4902             : 
    4903     8795387 :         if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
    4904     7702373 :                 extent_op = btrfs_alloc_delayed_extent_op();
    4905     7702364 :                 if (!extent_op) {
    4906           0 :                         ret = -ENOMEM;
    4907           0 :                         goto out_free_buf;
    4908             :                 }
    4909     7702364 :                 if (key)
    4910    15403898 :                         memcpy(&extent_op->key, key, sizeof(extent_op->key));
    4911             :                 else
    4912         830 :                         memset(&extent_op->key, 0, sizeof(extent_op->key));
    4913     7702364 :                 extent_op->flags_to_set = flags;
    4914     7702364 :                 extent_op->update_key = skinny_metadata ? false : true;
    4915     7702364 :                 extent_op->update_flags = true;
    4916     7702364 :                 extent_op->level = level;
    4917             : 
    4918     7702364 :                 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
    4919             :                                        ins.objectid, ins.offset, parent);
    4920     7702364 :                 btrfs_init_tree_ref(&generic_ref, level, root_objectid,
    4921             :                                     root->root_key.objectid, false);
    4922     7702354 :                 btrfs_ref_tree_mod(fs_info, &generic_ref);
    4923     7702354 :                 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op);
    4924     7702367 :                 if (ret)
    4925           0 :                         goto out_free_delayed;
    4926             :         }
    4927             :         return buf;
    4928             : 
    4929             : out_free_delayed:
    4930           0 :         btrfs_free_delayed_extent_op(extent_op);
    4931           0 : out_free_buf:
    4932           0 :         btrfs_tree_unlock(buf);
    4933           0 :         free_extent_buffer(buf);
    4934           0 : out_free_reserved:
    4935           0 :         btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
    4936           0 : out_unuse:
    4937           0 :         btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize);
    4938           0 :         return ERR_PTR(ret);
    4939             : }
    4940             : 
    4941             : struct walk_control {
    4942             :         u64 refs[BTRFS_MAX_LEVEL];
    4943             :         u64 flags[BTRFS_MAX_LEVEL];
    4944             :         struct btrfs_key update_progress;
    4945             :         struct btrfs_key drop_progress;
    4946             :         int drop_level;
    4947             :         int stage;
    4948             :         int level;
    4949             :         int shared_level;
    4950             :         int update_ref;
    4951             :         int keep_locks;
    4952             :         int reada_slot;
    4953             :         int reada_count;
    4954             :         int restarted;
    4955             : };
    4956             : 
    4957             : #define DROP_REFERENCE  1
    4958             : #define UPDATE_BACKREF  2
    4959             : 
    4960           0 : static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
    4961             :                                      struct btrfs_root *root,
    4962             :                                      struct walk_control *wc,
    4963             :                                      struct btrfs_path *path)
    4964             : {
    4965           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    4966           0 :         u64 bytenr;
    4967           0 :         u64 generation;
    4968           0 :         u64 refs;
    4969           0 :         u64 flags;
    4970           0 :         u32 nritems;
    4971           0 :         struct btrfs_key key;
    4972           0 :         struct extent_buffer *eb;
    4973           0 :         int ret;
    4974           0 :         int slot;
    4975           0 :         int nread = 0;
    4976             : 
    4977           0 :         if (path->slots[wc->level] < wc->reada_slot) {
    4978           0 :                 wc->reada_count = wc->reada_count * 2 / 3;
    4979           0 :                 wc->reada_count = max(wc->reada_count, 2);
    4980             :         } else {
    4981           0 :                 wc->reada_count = wc->reada_count * 3 / 2;
    4982           0 :                 wc->reada_count = min_t(int, wc->reada_count,
    4983             :                                         BTRFS_NODEPTRS_PER_BLOCK(fs_info));
    4984             :         }
    4985             : 
    4986           0 :         eb = path->nodes[wc->level];
    4987           0 :         nritems = btrfs_header_nritems(eb);
    4988             : 
    4989           0 :         for (slot = path->slots[wc->level]; slot < nritems; slot++) {
    4990           0 :                 if (nread >= wc->reada_count)
    4991             :                         break;
    4992             : 
    4993           0 :                 cond_resched();
    4994           0 :                 bytenr = btrfs_node_blockptr(eb, slot);
    4995           0 :                 generation = btrfs_node_ptr_generation(eb, slot);
    4996             : 
    4997           0 :                 if (slot == path->slots[wc->level])
    4998           0 :                         goto reada;
    4999             : 
    5000           0 :                 if (wc->stage == UPDATE_BACKREF &&
    5001           0 :                     generation <= root->root_key.offset)
    5002           0 :                         continue;
    5003             : 
    5004             :                 /* We don't lock the tree block, it's OK to be racy here */
    5005           0 :                 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
    5006           0 :                                                wc->level - 1, 1, &refs,
    5007             :                                                &flags);
    5008             :                 /* We don't care about errors in readahead. */
    5009           0 :                 if (ret < 0)
    5010           0 :                         continue;
    5011           0 :                 BUG_ON(refs == 0);
    5012             : 
    5013           0 :                 if (wc->stage == DROP_REFERENCE) {
    5014           0 :                         if (refs == 1)
    5015           0 :                                 goto reada;
    5016             : 
    5017           0 :                         if (wc->level == 1 &&
    5018           0 :                             (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
    5019           0 :                                 continue;
    5020           0 :                         if (!wc->update_ref ||
    5021           0 :                             generation <= root->root_key.offset)
    5022           0 :                                 continue;
    5023           0 :                         btrfs_node_key_to_cpu(eb, &key, slot);
    5024           0 :                         ret = btrfs_comp_cpu_keys(&key,
    5025           0 :                                                   &wc->update_progress);
    5026           0 :                         if (ret < 0)
    5027           0 :                                 continue;
    5028             :                 } else {
    5029           0 :                         if (wc->level == 1 &&
    5030           0 :                             (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
    5031           0 :                                 continue;
    5032             :                 }
    5033           0 : reada:
    5034           0 :                 btrfs_readahead_node_child(eb, slot);
    5035           0 :                 nread++;
    5036             :         }
    5037           0 :         wc->reada_slot = slot;
    5038           0 : }
    5039             : 
    5040             : /*
    5041             :  * helper to process tree block while walking down the tree.
    5042             :  *
    5043             :  * when wc->stage == UPDATE_BACKREF, this function updates
    5044             :  * back refs for pointers in the block.
    5045             :  *
    5046             :  * NOTE: return value 1 means we should stop walking down.
    5047             :  */
    5048     8674504 : static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
    5049             :                                    struct btrfs_root *root,
    5050             :                                    struct btrfs_path *path,
    5051             :                                    struct walk_control *wc, int lookup_info)
    5052             : {
    5053     8674504 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5054     8674504 :         int level = wc->level;
    5055     8674504 :         struct extent_buffer *eb = path->nodes[level];
    5056     8674504 :         u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
    5057     8674504 :         int ret;
    5058             : 
    5059     8674504 :         if (wc->stage == UPDATE_BACKREF &&
    5060           0 :             btrfs_header_owner(eb) != root->root_key.objectid)
    5061             :                 return 1;
    5062             : 
    5063             :         /*
    5064             :          * when reference count of tree block is 1, it won't increase
    5065             :          * again. once full backref flag is set, we never clear it.
    5066             :          */
    5067     8674504 :         if (lookup_info &&
    5068     6817545 :             ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
    5069           0 :              (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
    5070      506527 :                 BUG_ON(!path->locks[level]);
    5071      506527 :                 ret = btrfs_lookup_extent_info(trans, fs_info,
    5072             :                                                eb->start, level, 1,
    5073             :                                                &wc->refs[level],
    5074             :                                                &wc->flags[level]);
    5075      506527 :                 BUG_ON(ret == -ENOMEM);
    5076      506527 :                 if (ret)
    5077             :                         return ret;
    5078      506527 :                 BUG_ON(wc->refs[level] == 0);
    5079             :         }
    5080             : 
    5081     8674504 :         if (wc->stage == DROP_REFERENCE) {
    5082     8674504 :                 if (wc->refs[level] > 1)
    5083             :                         return 1;
    5084             : 
    5085     8169650 :                 if (path->locks[level] && !wc->keep_locks) {
    5086     1858577 :                         btrfs_tree_unlock_rw(eb, path->locks[level]);
    5087     1858577 :                         path->locks[level] = 0;
    5088             :                 }
    5089     8169650 :                 return 0;
    5090             :         }
    5091             : 
    5092             :         /* wc->stage == UPDATE_BACKREF */
    5093           0 :         if (!(wc->flags[level] & flag)) {
    5094           0 :                 BUG_ON(!path->locks[level]);
    5095           0 :                 ret = btrfs_inc_ref(trans, root, eb, 1);
    5096           0 :                 BUG_ON(ret); /* -ENOMEM */
    5097           0 :                 ret = btrfs_dec_ref(trans, root, eb, 0);
    5098           0 :                 BUG_ON(ret); /* -ENOMEM */
    5099           0 :                 ret = btrfs_set_disk_extent_flags(trans, eb, flag);
    5100           0 :                 BUG_ON(ret); /* -ENOMEM */
    5101           0 :                 wc->flags[level] |= flag;
    5102             :         }
    5103             : 
    5104             :         /*
    5105             :          * the block is shared by multiple trees, so it's not good to
    5106             :          * keep the tree lock
    5107             :          */
    5108           0 :         if (path->locks[level] && level > 0) {
    5109           0 :                 btrfs_tree_unlock_rw(eb, path->locks[level]);
    5110           0 :                 path->locks[level] = 0;
    5111             :         }
    5112             :         return 0;
    5113             : }
    5114             : 
    5115             : /*
    5116             :  * This is used to verify a ref exists for this root to deal with a bug where we
    5117             :  * would have a drop_progress key that hadn't been updated properly.
    5118             :  */
    5119           0 : static int check_ref_exists(struct btrfs_trans_handle *trans,
    5120             :                             struct btrfs_root *root, u64 bytenr, u64 parent,
    5121             :                             int level)
    5122             : {
    5123           0 :         struct btrfs_path *path;
    5124           0 :         struct btrfs_extent_inline_ref *iref;
    5125           0 :         int ret;
    5126             : 
    5127           0 :         path = btrfs_alloc_path();
    5128           0 :         if (!path)
    5129             :                 return -ENOMEM;
    5130             : 
    5131           0 :         ret = lookup_extent_backref(trans, path, &iref, bytenr,
    5132           0 :                                     root->fs_info->nodesize, parent,
    5133             :                                     root->root_key.objectid, level, 0);
    5134           0 :         btrfs_free_path(path);
    5135           0 :         if (ret == -ENOENT)
    5136             :                 return 0;
    5137           0 :         if (ret < 0)
    5138           0 :                 return ret;
    5139             :         return 1;
    5140             : }
    5141             : 
    5142             : /*
    5143             :  * helper to process tree block pointer.
    5144             :  *
    5145             :  * when wc->stage == DROP_REFERENCE, this function checks
    5146             :  * reference count of the block pointed to. if the block
    5147             :  * is shared and we need update back refs for the subtree
    5148             :  * rooted at the block, this function changes wc->stage to
    5149             :  * UPDATE_BACKREF. if the block is shared and there is no
    5150             :  * need to update back, this function drops the reference
    5151             :  * to the block.
    5152             :  *
    5153             :  * NOTE: return value 1 means we should stop walking down.
    5154             :  */
    5155     6317285 : static noinline int do_walk_down(struct btrfs_trans_handle *trans,
    5156             :                                  struct btrfs_root *root,
    5157             :                                  struct btrfs_path *path,
    5158             :                                  struct walk_control *wc, int *lookup_info)
    5159             : {
    5160     6317285 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5161     6317285 :         u64 bytenr;
    5162     6317285 :         u64 generation;
    5163     6317285 :         u64 parent;
    5164     6317285 :         struct btrfs_tree_parent_check check = { 0 };
    5165     6317285 :         struct btrfs_key key;
    5166     6317285 :         struct btrfs_ref ref = { 0 };
    5167     6317285 :         struct extent_buffer *next;
    5168     6317285 :         int level = wc->level;
    5169     6317285 :         int reada = 0;
    5170     6317285 :         int ret = 0;
    5171     6317285 :         bool need_account = false;
    5172             : 
    5173     6317285 :         generation = btrfs_node_ptr_generation(path->nodes[level],
    5174             :                                                path->slots[level]);
    5175             :         /*
    5176             :          * if the lower level block was created before the snapshot
    5177             :          * was created, we know there is no need to update back refs
    5178             :          * for the subtree
    5179             :          */
    5180     6317285 :         if (wc->stage == UPDATE_BACKREF &&
    5181           0 :             generation <= root->root_key.offset) {
    5182           0 :                 *lookup_info = 1;
    5183           0 :                 return 1;
    5184             :         }
    5185             : 
    5186     6317285 :         bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
    5187             : 
    5188     6317285 :         check.level = level - 1;
    5189     6317285 :         check.transid = generation;
    5190     6317285 :         check.owner_root = root->root_key.objectid;
    5191     6317285 :         check.has_first_key = true;
    5192     6317285 :         btrfs_node_key_to_cpu(path->nodes[level], &check.first_key,
    5193             :                               path->slots[level]);
    5194             : 
    5195     6317285 :         next = find_extent_buffer(fs_info, bytenr);
    5196     6317285 :         if (!next) {
    5197     1244265 :                 next = btrfs_find_create_tree_block(fs_info, bytenr,
    5198             :                                 root->root_key.objectid, level - 1);
    5199     1244265 :                 if (IS_ERR(next))
    5200           0 :                         return PTR_ERR(next);
    5201             :                 reada = 1;
    5202             :         }
    5203     6317285 :         btrfs_tree_lock(next);
    5204             : 
    5205     6317285 :         ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
    5206             :                                        &wc->refs[level - 1],
    5207     6317285 :                                        &wc->flags[level - 1]);
    5208     6317285 :         if (ret < 0)
    5209           0 :                 goto out_unlock;
    5210             : 
    5211     6317285 :         if (unlikely(wc->refs[level - 1] == 0)) {
    5212           0 :                 btrfs_err(fs_info, "Missing references.");
    5213           0 :                 ret = -EIO;
    5214           0 :                 goto out_unlock;
    5215             :         }
    5216     6317285 :         *lookup_info = 0;
    5217             : 
    5218     6317285 :         if (wc->stage == DROP_REFERENCE) {
    5219     6317285 :                 if (wc->refs[level - 1] > 1) {
    5220     4460326 :                         need_account = true;
    5221     4460326 :                         if (level == 1 &&
    5222     4448408 :                             (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
    5223     4416352 :                                 goto skip;
    5224             : 
    5225       43974 :                         if (!wc->update_ref ||
    5226       21681 :                             generation <= root->root_key.offset)
    5227       43974 :                                 goto skip;
    5228             : 
    5229           0 :                         btrfs_node_key_to_cpu(path->nodes[level], &key,
    5230             :                                               path->slots[level]);
    5231           0 :                         ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
    5232           0 :                         if (ret < 0)
    5233           0 :                                 goto skip;
    5234             : 
    5235           0 :                         wc->stage = UPDATE_BACKREF;
    5236           0 :                         wc->shared_level = level - 1;
    5237             :                 }
    5238             :         } else {
    5239           0 :                 if (level == 1 &&
    5240           0 :                     (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
    5241           0 :                         goto skip;
    5242             :         }
    5243             : 
    5244     1856959 :         if (!btrfs_buffer_uptodate(next, generation, 0)) {
    5245           0 :                 btrfs_tree_unlock(next);
    5246           0 :                 free_extent_buffer(next);
    5247           0 :                 next = NULL;
    5248           0 :                 *lookup_info = 1;
    5249             :         }
    5250             : 
    5251     1856959 :         if (!next) {
    5252           0 :                 if (reada && level == 1)
    5253           0 :                         reada_walk_down(trans, root, wc, path);
    5254           0 :                 next = read_tree_block(fs_info, bytenr, &check);
    5255           0 :                 if (IS_ERR(next)) {
    5256           0 :                         return PTR_ERR(next);
    5257           0 :                 } else if (!extent_buffer_uptodate(next)) {
    5258           0 :                         free_extent_buffer(next);
    5259           0 :                         return -EIO;
    5260             :                 }
    5261           0 :                 btrfs_tree_lock(next);
    5262             :         }
    5263             : 
    5264     1856959 :         level--;
    5265     1856959 :         ASSERT(level == btrfs_header_level(next));
    5266     1856959 :         if (level != btrfs_header_level(next)) {
    5267           0 :                 btrfs_err(root->fs_info, "mismatched level");
    5268           0 :                 ret = -EIO;
    5269           0 :                 goto out_unlock;
    5270             :         }
    5271     1856959 :         path->nodes[level] = next;
    5272     1856959 :         path->slots[level] = 0;
    5273     1856959 :         path->locks[level] = BTRFS_WRITE_LOCK;
    5274     1856959 :         wc->level = level;
    5275     1856959 :         if (wc->level == 1)
    5276       18673 :                 wc->reada_slot = 0;
    5277             :         return 0;
    5278     4460326 : skip:
    5279     4460326 :         wc->refs[level - 1] = 0;
    5280     4460326 :         wc->flags[level - 1] = 0;
    5281     4460326 :         if (wc->stage == DROP_REFERENCE) {
    5282     4460326 :                 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
    5283     4454128 :                         parent = path->nodes[level]->start;
    5284             :                 } else {
    5285        6198 :                         ASSERT(root->root_key.objectid ==
    5286             :                                btrfs_header_owner(path->nodes[level]));
    5287        6198 :                         if (root->root_key.objectid !=
    5288             :                             btrfs_header_owner(path->nodes[level])) {
    5289           0 :                                 btrfs_err(root->fs_info,
    5290             :                                                 "mismatched block owner");
    5291           0 :                                 ret = -EIO;
    5292           0 :                                 goto out_unlock;
    5293             :                         }
    5294             :                         parent = 0;
    5295             :                 }
    5296             : 
    5297             :                 /*
    5298             :                  * If we had a drop_progress we need to verify the refs are set
    5299             :                  * as expected.  If we find our ref then we know that from here
    5300             :                  * on out everything should be correct, and we can clear the
    5301             :                  * ->restarted flag.
    5302             :                  */
    5303     4460326 :                 if (wc->restarted) {
    5304           0 :                         ret = check_ref_exists(trans, root, bytenr, parent,
    5305             :                                                level - 1);
    5306           0 :                         if (ret < 0)
    5307           0 :                                 goto out_unlock;
    5308           0 :                         if (ret == 0)
    5309           0 :                                 goto no_delete;
    5310           0 :                         ret = 0;
    5311           0 :                         wc->restarted = 0;
    5312             :                 }
    5313             : 
    5314             :                 /*
    5315             :                  * Reloc tree doesn't contribute to qgroup numbers, and we have
    5316             :                  * already accounted them at merge time (replace_path),
    5317             :                  * thus we could skip expensive subtree trace here.
    5318             :                  */
    5319     4460326 :                 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
    5320             :                     need_account) {
    5321       33729 :                         ret = btrfs_qgroup_trace_subtree(trans, next,
    5322             :                                                          generation, level - 1);
    5323       33729 :                         if (ret) {
    5324           0 :                                 btrfs_err_rl(fs_info,
    5325             :                                              "Error %d accounting shared subtree. Quota is out of sync, rescan required.",
    5326             :                                              ret);
    5327             :                         }
    5328             :                 }
    5329             : 
    5330             :                 /*
    5331             :                  * We need to update the next key in our walk control so we can
    5332             :                  * update the drop_progress key accordingly.  We don't care if
    5333             :                  * find_next_key doesn't find a key because that means we're at
    5334             :                  * the end and are going to clean up now.
    5335             :                  */
    5336     4460326 :                 wc->drop_level = level;
    5337     4460326 :                 find_next_key(path, level, &wc->drop_progress);
    5338             : 
    5339     4460326 :                 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
    5340     4460326 :                                        fs_info->nodesize, parent);
    5341     4460326 :                 btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid,
    5342             :                                     0, false);
    5343     4460326 :                 ret = btrfs_free_extent(trans, &ref);
    5344     4460326 :                 if (ret)
    5345           0 :                         goto out_unlock;
    5346             :         }
    5347     4460326 : no_delete:
    5348     4460326 :         *lookup_info = 1;
    5349     4460326 :         ret = 1;
    5350             : 
    5351     4460326 : out_unlock:
    5352     4460326 :         btrfs_tree_unlock(next);
    5353     4460326 :         free_extent_buffer(next);
    5354             : 
    5355     4460326 :         return ret;
    5356             : }
    5357             : 
    5358             : /*
    5359             :  * helper to process tree block while walking up the tree.
    5360             :  *
    5361             :  * when wc->stage == DROP_REFERENCE, this function drops
    5362             :  * reference count on the block.
    5363             :  *
    5364             :  * when wc->stage == UPDATE_BACKREF, this function changes
    5365             :  * wc->stage back to DROP_REFERENCE if we changed wc->stage
    5366             :  * to UPDATE_BACKREF previously while processing the block.
    5367             :  *
    5368             :  * NOTE: return value 1 means we should stop walking up.
    5369             :  */
    5370     2363486 : static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
    5371             :                                  struct btrfs_root *root,
    5372             :                                  struct btrfs_path *path,
    5373             :                                  struct walk_control *wc)
    5374             : {
    5375     2363486 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5376     2363486 :         int ret;
    5377     2363486 :         int level = wc->level;
    5378     2363486 :         struct extent_buffer *eb = path->nodes[level];
    5379     2363486 :         u64 parent = 0;
    5380             : 
    5381     2363486 :         if (wc->stage == UPDATE_BACKREF) {
    5382           0 :                 BUG_ON(wc->shared_level < level);
    5383           0 :                 if (level < wc->shared_level)
    5384           0 :                         goto out;
    5385             : 
    5386           0 :                 ret = find_next_key(path, level + 1, &wc->update_progress);
    5387           0 :                 if (ret > 0)
    5388           0 :                         wc->update_ref = 0;
    5389             : 
    5390           0 :                 wc->stage = DROP_REFERENCE;
    5391           0 :                 wc->shared_level = -1;
    5392           0 :                 path->slots[level] = 0;
    5393             : 
    5394             :                 /*
    5395             :                  * check reference count again if the block isn't locked.
    5396             :                  * we should start walking down the tree again if reference
    5397             :                  * count is one.
    5398             :                  */
    5399           0 :                 if (!path->locks[level]) {
    5400           0 :                         BUG_ON(level == 0);
    5401           0 :                         btrfs_tree_lock(eb);
    5402           0 :                         path->locks[level] = BTRFS_WRITE_LOCK;
    5403             : 
    5404           0 :                         ret = btrfs_lookup_extent_info(trans, fs_info,
    5405             :                                                        eb->start, level, 1,
    5406             :                                                        &wc->refs[level],
    5407             :                                                        &wc->flags[level]);
    5408           0 :                         if (ret < 0) {
    5409           0 :                                 btrfs_tree_unlock_rw(eb, path->locks[level]);
    5410           0 :                                 path->locks[level] = 0;
    5411           0 :                                 return ret;
    5412             :                         }
    5413           0 :                         BUG_ON(wc->refs[level] == 0);
    5414           0 :                         if (wc->refs[level] == 1) {
    5415           0 :                                 btrfs_tree_unlock_rw(eb, path->locks[level]);
    5416           0 :                                 path->locks[level] = 0;
    5417           0 :                                 return 1;
    5418             :                         }
    5419             :                 }
    5420             :         }
    5421             : 
    5422             :         /* wc->stage == DROP_REFERENCE */
    5423     2363486 :         BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
    5424             : 
    5425     2363486 :         if (wc->refs[level] == 1) {
    5426     1858632 :                 if (level == 0) {
    5427     1838825 :                         if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
    5428     1838776 :                                 ret = btrfs_dec_ref(trans, root, eb, 1);
    5429             :                         else
    5430          49 :                                 ret = btrfs_dec_ref(trans, root, eb, 0);
    5431     1838825 :                         BUG_ON(ret); /* -ENOMEM */
    5432     1838825 :                         if (is_fstree(root->root_key.objectid)) {
    5433        3313 :                                 ret = btrfs_qgroup_trace_leaf_items(trans, eb);
    5434        3313 :                                 if (ret) {
    5435           0 :                                         btrfs_err_rl(fs_info,
    5436             :         "error %d accounting leaf items, quota is out of sync, rescan required",
    5437             :                                              ret);
    5438             :                                 }
    5439             :                         }
    5440             :                 }
    5441             :                 /* Make block locked assertion in btrfs_clear_buffer_dirty happy. */
    5442     1858632 :                 if (!path->locks[level]) {
    5443     1858577 :                         btrfs_tree_lock(eb);
    5444     1858577 :                         path->locks[level] = BTRFS_WRITE_LOCK;
    5445             :                 }
    5446     1858632 :                 btrfs_clear_buffer_dirty(trans, eb);
    5447             :         }
    5448             : 
    5449     2363486 :         if (eb == root->node) {
    5450        1618 :                 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
    5451        1500 :                         parent = eb->start;
    5452         118 :                 else if (root->root_key.objectid != btrfs_header_owner(eb))
    5453           0 :                         goto owner_mismatch;
    5454             :         } else {
    5455     2361868 :                 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
    5456     2360097 :                         parent = path->nodes[level + 1]->start;
    5457        1771 :                 else if (root->root_key.objectid !=
    5458        1771 :                          btrfs_header_owner(path->nodes[level + 1]))
    5459           0 :                         goto owner_mismatch;
    5460             :         }
    5461             : 
    5462     2363486 :         btrfs_free_tree_block(trans, btrfs_root_id(root), eb, parent,
    5463     2363486 :                               wc->refs[level] == 1);
    5464     2363486 : out:
    5465     2363486 :         wc->refs[level] = 0;
    5466     2363486 :         wc->flags[level] = 0;
    5467     2363486 :         return 0;
    5468             : 
    5469           0 : owner_mismatch:
    5470           0 :         btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu",
    5471             :                      btrfs_header_owner(eb), root->root_key.objectid);
    5472             :         return -EUCLEAN;
    5473             : }
    5474             : 
    5475     2357219 : static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
    5476             :                                    struct btrfs_root *root,
    5477             :                                    struct btrfs_path *path,
    5478             :                                    struct walk_control *wc)
    5479             : {
    5480     2357219 :         int level = wc->level;
    5481     2357219 :         int lookup_info = 1;
    5482     2357219 :         int ret = 0;
    5483             : 
    5484     8674504 :         while (level >= 0) {
    5485     8674504 :                 ret = walk_down_proc(trans, root, path, wc, lookup_info);
    5486     8674504 :                 if (ret)
    5487             :                         break;
    5488             : 
    5489     8169650 :                 if (level == 0)
    5490             :                         break;
    5491             : 
    5492     6330825 :                 if (path->slots[level] >=
    5493     6330825 :                     btrfs_header_nritems(path->nodes[level]))
    5494             :                         break;
    5495             : 
    5496     6317285 :                 ret = do_walk_down(trans, root, path, wc, &lookup_info);
    5497     6317285 :                 if (ret > 0) {
    5498     4460326 :                         path->slots[level]++;
    5499     4460326 :                         continue;
    5500     1856959 :                 } else if (ret < 0)
    5501             :                         break;
    5502     1856959 :                 level = wc->level;
    5503             :         }
    5504     2357219 :         return (ret == 1) ? 0 : ret;
    5505             : }
    5506             : 
    5507     2357219 : static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
    5508             :                                  struct btrfs_root *root,
    5509             :                                  struct btrfs_path *path,
    5510             :                                  struct walk_control *wc, int max_level)
    5511             : {
    5512     2357219 :         int level = wc->level;
    5513     2357219 :         int ret;
    5514             : 
    5515     2357219 :         path->slots[level] = btrfs_header_nritems(path->nodes[level]);
    5516     4720705 :         while (level < max_level && path->nodes[level]) {
    5517     4214178 :                 wc->level = level;
    5518     4214178 :                 if (path->slots[level] + 1 <
    5519     4214178 :                     btrfs_header_nritems(path->nodes[level])) {
    5520     1850692 :                         path->slots[level]++;
    5521     1850692 :                         return 0;
    5522             :                 } else {
    5523     2363486 :                         ret = walk_up_proc(trans, root, path, wc);
    5524     2363486 :                         if (ret > 0)
    5525             :                                 return 0;
    5526     2363486 :                         if (ret < 0)
    5527           0 :                                 return ret;
    5528             : 
    5529     2363486 :                         if (path->locks[level]) {
    5530     2363486 :                                 btrfs_tree_unlock_rw(path->nodes[level],
    5531             :                                                      path->locks[level]);
    5532     2363486 :                                 path->locks[level] = 0;
    5533             :                         }
    5534     2363486 :                         free_extent_buffer(path->nodes[level]);
    5535     2363486 :                         path->nodes[level] = NULL;
    5536     2363486 :                         level++;
    5537             :                 }
    5538             :         }
    5539             :         return 1;
    5540             : }
    5541             : 
    5542             : /*
    5543             :  * drop a subvolume tree.
    5544             :  *
    5545             :  * this function traverses the tree freeing any blocks that only
    5546             :  * referenced by the tree.
    5547             :  *
    5548             :  * when a shared tree block is found. this function decreases its
    5549             :  * reference count by one. if update_ref is true, this function
    5550             :  * also make sure backrefs for the shared block and all lower level
    5551             :  * blocks are properly updated.
    5552             :  *
    5553             :  * If called with for_reloc == 0, may exit early with -EAGAIN
    5554             :  */
    5555        1618 : int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
    5556             : {
    5557        1618 :         const bool is_reloc_root = (root->root_key.objectid ==
    5558             :                                     BTRFS_TREE_RELOC_OBJECTID);
    5559        1618 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5560        1618 :         struct btrfs_path *path;
    5561        1618 :         struct btrfs_trans_handle *trans;
    5562        1618 :         struct btrfs_root *tree_root = fs_info->tree_root;
    5563        1618 :         struct btrfs_root_item *root_item = &root->root_item;
    5564        1618 :         struct walk_control *wc;
    5565        1618 :         struct btrfs_key key;
    5566        1618 :         int err = 0;
    5567        1618 :         int ret;
    5568        1618 :         int level;
    5569        1618 :         bool root_dropped = false;
    5570        1618 :         bool unfinished_drop = false;
    5571             : 
    5572        1618 :         btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
    5573             : 
    5574        1618 :         path = btrfs_alloc_path();
    5575        1618 :         if (!path) {
    5576           0 :                 err = -ENOMEM;
    5577           0 :                 goto out;
    5578             :         }
    5579             : 
    5580        1618 :         wc = kzalloc(sizeof(*wc), GFP_NOFS);
    5581        1618 :         if (!wc) {
    5582           0 :                 btrfs_free_path(path);
    5583           0 :                 err = -ENOMEM;
    5584           0 :                 goto out;
    5585             :         }
    5586             : 
    5587             :         /*
    5588             :          * Use join to avoid potential EINTR from transaction start. See
    5589             :          * wait_reserve_ticket and the whole reservation callchain.
    5590             :          */
    5591        1618 :         if (for_reloc)
    5592        1500 :                 trans = btrfs_join_transaction(tree_root);
    5593             :         else
    5594         118 :                 trans = btrfs_start_transaction(tree_root, 0);
    5595        1618 :         if (IS_ERR(trans)) {
    5596           0 :                 err = PTR_ERR(trans);
    5597           0 :                 goto out_free;
    5598             :         }
    5599             : 
    5600        1618 :         err = btrfs_run_delayed_items(trans);
    5601        1618 :         if (err)
    5602           0 :                 goto out_end_trans;
    5603             : 
    5604             :         /*
    5605             :          * This will help us catch people modifying the fs tree while we're
    5606             :          * dropping it.  It is unsafe to mess with the fs tree while it's being
    5607             :          * dropped as we unlock the root node and parent nodes as we walk down
    5608             :          * the tree, assuming nothing will change.  If something does change
    5609             :          * then we'll have stale information and drop references to blocks we've
    5610             :          * already dropped.
    5611             :          */
    5612        1618 :         set_bit(BTRFS_ROOT_DELETING, &root->state);
    5613        1618 :         unfinished_drop = test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state);
    5614             : 
    5615        1618 :         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
    5616        1618 :                 level = btrfs_header_level(root->node);
    5617        1618 :                 path->nodes[level] = btrfs_lock_root_node(root);
    5618        1618 :                 path->slots[level] = 0;
    5619        1618 :                 path->locks[level] = BTRFS_WRITE_LOCK;
    5620        3236 :                 memset(&wc->update_progress, 0,
    5621             :                        sizeof(wc->update_progress));
    5622             :         } else {
    5623           0 :                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
    5624           0 :                 memcpy(&wc->update_progress, &key,
    5625             :                        sizeof(wc->update_progress));
    5626             : 
    5627           0 :                 level = btrfs_root_drop_level(root_item);
    5628           0 :                 BUG_ON(level == 0);
    5629           0 :                 path->lowest_level = level;
    5630           0 :                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    5631           0 :                 path->lowest_level = 0;
    5632           0 :                 if (ret < 0) {
    5633           0 :                         err = ret;
    5634           0 :                         goto out_end_trans;
    5635             :                 }
    5636           0 :                 WARN_ON(ret > 0);
    5637             : 
    5638             :                 /*
    5639             :                  * unlock our path, this is safe because only this
    5640             :                  * function is allowed to delete this snapshot
    5641             :                  */
    5642           0 :                 btrfs_unlock_up_safe(path, 0);
    5643             : 
    5644           0 :                 level = btrfs_header_level(root->node);
    5645           0 :                 while (1) {
    5646           0 :                         btrfs_tree_lock(path->nodes[level]);
    5647           0 :                         path->locks[level] = BTRFS_WRITE_LOCK;
    5648             : 
    5649           0 :                         ret = btrfs_lookup_extent_info(trans, fs_info,
    5650           0 :                                                 path->nodes[level]->start,
    5651             :                                                 level, 1, &wc->refs[level],
    5652             :                                                 &wc->flags[level]);
    5653           0 :                         if (ret < 0) {
    5654           0 :                                 err = ret;
    5655           0 :                                 goto out_end_trans;
    5656             :                         }
    5657           0 :                         BUG_ON(wc->refs[level] == 0);
    5658             : 
    5659           0 :                         if (level == btrfs_root_drop_level(root_item))
    5660             :                                 break;
    5661             : 
    5662           0 :                         btrfs_tree_unlock(path->nodes[level]);
    5663           0 :                         path->locks[level] = 0;
    5664           0 :                         WARN_ON(wc->refs[level] != 1);
    5665           0 :                         level--;
    5666             :                 }
    5667             :         }
    5668             : 
    5669        1618 :         wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
    5670        1618 :         wc->level = level;
    5671        1618 :         wc->shared_level = -1;
    5672        1618 :         wc->stage = DROP_REFERENCE;
    5673        1618 :         wc->update_ref = update_ref;
    5674        1618 :         wc->keep_locks = 0;
    5675        1618 :         wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
    5676             : 
    5677     1852310 :         while (1) {
    5678             : 
    5679     1852310 :                 ret = walk_down_tree(trans, root, path, wc);
    5680     1852310 :                 if (ret < 0) {
    5681           0 :                         btrfs_abort_transaction(trans, ret);
    5682           0 :                         err = ret;
    5683           0 :                         break;
    5684             :                 }
    5685             : 
    5686     1852310 :                 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
    5687     1852310 :                 if (ret < 0) {
    5688           0 :                         btrfs_abort_transaction(trans, ret);
    5689           0 :                         err = ret;
    5690           0 :                         break;
    5691             :                 }
    5692             : 
    5693     1852310 :                 if (ret > 0) {
    5694        1618 :                         BUG_ON(wc->stage != DROP_REFERENCE);
    5695             :                         break;
    5696             :                 }
    5697             : 
    5698     1850692 :                 if (wc->stage == DROP_REFERENCE) {
    5699     1850692 :                         wc->drop_level = wc->level;
    5700     1850692 :                         btrfs_node_key_to_cpu(path->nodes[wc->drop_level],
    5701             :                                               &wc->drop_progress,
    5702             :                                               path->slots[wc->drop_level]);
    5703             :                 }
    5704     1850692 :                 btrfs_cpu_key_to_disk(&root_item->drop_progress,
    5705     1850692 :                                       &wc->drop_progress);
    5706     1850692 :                 btrfs_set_root_drop_level(root_item, wc->drop_level);
    5707             : 
    5708     1850692 :                 BUG_ON(wc->level == 0);
    5709     1850692 :                 if (btrfs_should_end_transaction(trans) ||
    5710           0 :                     (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) {
    5711     1848545 :                         ret = btrfs_update_root(trans, tree_root,
    5712             :                                                 &root->root_key,
    5713             :                                                 root_item);
    5714     1848545 :                         if (ret) {
    5715           0 :                                 btrfs_abort_transaction(trans, ret);
    5716           0 :                                 err = ret;
    5717           0 :                                 goto out_end_trans;
    5718             :                         }
    5719             : 
    5720     1848545 :                         if (!is_reloc_root)
    5721        3242 :                                 btrfs_set_last_root_drop_gen(fs_info, trans->transid);
    5722             : 
    5723     1848545 :                         btrfs_end_transaction_throttle(trans);
    5724     1848545 :                         if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
    5725           0 :                                 btrfs_debug(fs_info,
    5726             :                                             "drop snapshot early exit");
    5727           0 :                                 err = -EAGAIN;
    5728           0 :                                 goto out_free;
    5729             :                         }
    5730             : 
    5731             :                        /*
    5732             :                         * Use join to avoid potential EINTR from transaction
    5733             :                         * start. See wait_reserve_ticket and the whole
    5734             :                         * reservation callchain.
    5735             :                         */
    5736     1848545 :                         if (for_reloc)
    5737     1845303 :                                 trans = btrfs_join_transaction(tree_root);
    5738             :                         else
    5739        3242 :                                 trans = btrfs_start_transaction(tree_root, 0);
    5740     1848545 :                         if (IS_ERR(trans)) {
    5741           0 :                                 err = PTR_ERR(trans);
    5742           0 :                                 goto out_free;
    5743             :                         }
    5744             :                 }
    5745             :         }
    5746        1618 :         btrfs_release_path(path);
    5747        1618 :         if (err)
    5748           0 :                 goto out_end_trans;
    5749             : 
    5750        1618 :         ret = btrfs_del_root(trans, &root->root_key);
    5751        1618 :         if (ret) {
    5752           0 :                 btrfs_abort_transaction(trans, ret);
    5753           0 :                 err = ret;
    5754           0 :                 goto out_end_trans;
    5755             :         }
    5756             : 
    5757        1618 :         if (!is_reloc_root) {
    5758         118 :                 ret = btrfs_find_root(tree_root, &root->root_key, path,
    5759             :                                       NULL, NULL);
    5760         118 :                 if (ret < 0) {
    5761           0 :                         btrfs_abort_transaction(trans, ret);
    5762           0 :                         err = ret;
    5763           0 :                         goto out_end_trans;
    5764         118 :                 } else if (ret > 0) {
    5765             :                         /* if we fail to delete the orphan item this time
    5766             :                          * around, it'll get picked up the next time.
    5767             :                          *
    5768             :                          * The most common failure here is just -ENOENT.
    5769             :                          */
    5770         118 :                         btrfs_del_orphan_item(trans, tree_root,
    5771             :                                               root->root_key.objectid);
    5772             :                 }
    5773             :         }
    5774             : 
    5775             :         /*
    5776             :          * This subvolume is going to be completely dropped, and won't be
    5777             :          * recorded as dirty roots, thus pertrans meta rsv will not be freed at
    5778             :          * commit transaction time.  So free it here manually.
    5779             :          */
    5780        1618 :         btrfs_qgroup_convert_reserved_meta(root, INT_MAX);
    5781        1618 :         btrfs_qgroup_free_meta_all_pertrans(root);
    5782             : 
    5783        3236 :         if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
    5784         118 :                 btrfs_add_dropped_root(trans, root);
    5785             :         else
    5786        1500 :                 btrfs_put_root(root);
    5787             :         root_dropped = true;
    5788        1618 : out_end_trans:
    5789        1618 :         if (!is_reloc_root)
    5790         118 :                 btrfs_set_last_root_drop_gen(fs_info, trans->transid);
    5791             : 
    5792        1618 :         btrfs_end_transaction_throttle(trans);
    5793        1618 : out_free:
    5794        1618 :         kfree(wc);
    5795        1618 :         btrfs_free_path(path);
    5796        1618 : out:
    5797             :         /*
    5798             :          * We were an unfinished drop root, check to see if there are any
    5799             :          * pending, and if not clear and wake up any waiters.
    5800             :          */
    5801        1618 :         if (!err && unfinished_drop)
    5802           0 :                 btrfs_maybe_wake_unfinished_drop(fs_info);
    5803             : 
    5804             :         /*
    5805             :          * So if we need to stop dropping the snapshot for whatever reason we
    5806             :          * need to make sure to add it back to the dead root list so that we
    5807             :          * keep trying to do the work later.  This also cleans up roots if we
    5808             :          * don't have it in the radix (like when we recover after a power fail
    5809             :          * or unmount) so we don't leak memory.
    5810             :          */
    5811        1618 :         if (!for_reloc && !root_dropped)
    5812           0 :                 btrfs_add_dead_root(root);
    5813        1618 :         return err;
    5814             : }
    5815             : 
    5816             : /*
    5817             :  * drop subtree rooted at tree block 'node'.
    5818             :  *
    5819             :  * NOTE: this function will unlock and release tree block 'node'
    5820             :  * only used by relocation code
    5821             :  */
    5822      504909 : int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
    5823             :                         struct btrfs_root *root,
    5824             :                         struct extent_buffer *node,
    5825             :                         struct extent_buffer *parent)
    5826             : {
    5827      504909 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5828      504909 :         struct btrfs_path *path;
    5829      504909 :         struct walk_control *wc;
    5830      504909 :         int level;
    5831      504909 :         int parent_level;
    5832      504909 :         int ret = 0;
    5833      504909 :         int wret;
    5834             : 
    5835      504909 :         BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
    5836             : 
    5837      504909 :         path = btrfs_alloc_path();
    5838      504909 :         if (!path)
    5839             :                 return -ENOMEM;
    5840             : 
    5841      504909 :         wc = kzalloc(sizeof(*wc), GFP_NOFS);
    5842      504909 :         if (!wc) {
    5843           0 :                 btrfs_free_path(path);
    5844           0 :                 return -ENOMEM;
    5845             :         }
    5846             : 
    5847      504909 :         btrfs_assert_tree_write_locked(parent);
    5848      504909 :         parent_level = btrfs_header_level(parent);
    5849      504909 :         atomic_inc(&parent->refs);
    5850      504909 :         path->nodes[parent_level] = parent;
    5851      504909 :         path->slots[parent_level] = btrfs_header_nritems(parent);
    5852             : 
    5853      504909 :         btrfs_assert_tree_write_locked(node);
    5854      504909 :         level = btrfs_header_level(node);
    5855      504909 :         path->nodes[level] = node;
    5856      504909 :         path->slots[level] = 0;
    5857      504909 :         path->locks[level] = BTRFS_WRITE_LOCK;
    5858             : 
    5859      504909 :         wc->refs[parent_level] = 1;
    5860      504909 :         wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
    5861      504909 :         wc->level = level;
    5862      504909 :         wc->shared_level = -1;
    5863      504909 :         wc->stage = DROP_REFERENCE;
    5864      504909 :         wc->update_ref = 0;
    5865      504909 :         wc->keep_locks = 1;
    5866      504909 :         wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
    5867             : 
    5868      504909 :         while (1) {
    5869      504909 :                 wret = walk_down_tree(trans, root, path, wc);
    5870      504909 :                 if (wret < 0) {
    5871             :                         ret = wret;
    5872             :                         break;
    5873             :                 }
    5874             : 
    5875      504909 :                 wret = walk_up_tree(trans, root, path, wc, parent_level);
    5876      504909 :                 if (wret < 0)
    5877           0 :                         ret = wret;
    5878      504909 :                 if (wret != 0)
    5879             :                         break;
    5880             :         }
    5881             : 
    5882      504909 :         kfree(wc);
    5883      504909 :         btrfs_free_path(path);
    5884      504909 :         return ret;
    5885             : }
    5886             : 
    5887         144 : int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
    5888             :                                    u64 start, u64 end)
    5889             : {
    5890         144 :         return unpin_extent_range(fs_info, start, end, false);
    5891             : }
    5892             : 
    5893             : /*
    5894             :  * It used to be that old block groups would be left around forever.
    5895             :  * Iterating over them would be enough to trim unused space.  Since we
    5896             :  * now automatically remove them, we also need to iterate over unallocated
    5897             :  * space.
    5898             :  *
    5899             :  * We don't want a transaction for this since the discard may take a
    5900             :  * substantial amount of time.  We don't require that a transaction be
    5901             :  * running, but we do need to take a running transaction into account
    5902             :  * to ensure that we're not discarding chunks that were released or
    5903             :  * allocated in the current transaction.
    5904             :  *
    5905             :  * Holding the chunks lock will prevent other threads from allocating
    5906             :  * or releasing chunks, but it won't prevent a running transaction
    5907             :  * from committing and releasing the memory that the pending chunks
    5908             :  * list head uses.  For that, we need to take a reference to the
    5909             :  * transaction and hold the commit root sem.  We only need to hold
    5910             :  * it while performing the free space search since we have already
    5911             :  * held back allocations.
    5912             :  */
    5913         589 : static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
    5914             : {
    5915         589 :         u64 start = BTRFS_DEVICE_RANGE_RESERVED, len = 0, end = 0;
    5916         589 :         int ret;
    5917             : 
    5918         589 :         *trimmed = 0;
    5919             : 
    5920             :         /* Discard not supported = nothing to do. */
    5921         589 :         if (!bdev_max_discard_sectors(device->bdev))
    5922             :                 return 0;
    5923             : 
    5924             :         /* Not writable = nothing to do. */
    5925         589 :         if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
    5926             :                 return 0;
    5927             : 
    5928             :         /* No free space = nothing to do. */
    5929         589 :         if (device->total_bytes <= device->bytes_used)
    5930             :                 return 0;
    5931             : 
    5932             :         ret = 0;
    5933             : 
    5934          31 :         while (1) {
    5935         620 :                 struct btrfs_fs_info *fs_info = device->fs_info;
    5936         620 :                 u64 bytes;
    5937             : 
    5938         620 :                 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
    5939         620 :                 if (ret)
    5940             :                         break;
    5941             : 
    5942         620 :                 find_first_clear_extent_bit(&device->alloc_state, start,
    5943             :                                             &start, &end,
    5944             :                                             CHUNK_TRIMMED | CHUNK_ALLOCATED);
    5945             : 
    5946             :                 /* Check if there are any CHUNK_* bits left */
    5947         620 :                 if (start > device->total_bytes) {
    5948           0 :                         WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
    5949           0 :                         btrfs_warn_in_rcu(fs_info,
    5950             : "ignoring attempt to trim beyond device size: offset %llu length %llu device %s device size %llu",
    5951             :                                           start, end - start + 1,
    5952             :                                           btrfs_dev_name(device),
    5953             :                                           device->total_bytes);
    5954           0 :                         mutex_unlock(&fs_info->chunk_mutex);
    5955           0 :                         ret = 0;
    5956           0 :                         break;
    5957             :                 }
    5958             : 
    5959             :                 /* Ensure we skip the reserved space on each device. */
    5960         620 :                 start = max_t(u64, start, BTRFS_DEVICE_RANGE_RESERVED);
    5961             : 
    5962             :                 /*
    5963             :                  * If find_first_clear_extent_bit find a range that spans the
    5964             :                  * end of the device it will set end to -1, in this case it's up
    5965             :                  * to the caller to trim the value to the size of the device.
    5966             :                  */
    5967         620 :                 end = min(end, device->total_bytes - 1);
    5968             : 
    5969         620 :                 len = end - start + 1;
    5970             : 
    5971             :                 /* We didn't find any extents */
    5972         620 :                 if (!len) {
    5973         589 :                         mutex_unlock(&fs_info->chunk_mutex);
    5974         589 :                         ret = 0;
    5975         589 :                         break;
    5976             :                 }
    5977             : 
    5978          31 :                 ret = btrfs_issue_discard(device->bdev, start, len,
    5979             :                                           &bytes);
    5980          31 :                 if (!ret)
    5981          31 :                         set_extent_bit(&device->alloc_state, start,
    5982          31 :                                        start + bytes - 1, CHUNK_TRIMMED, NULL);
    5983          31 :                 mutex_unlock(&fs_info->chunk_mutex);
    5984             : 
    5985          31 :                 if (ret)
    5986             :                         break;
    5987             : 
    5988          31 :                 start += len;
    5989          31 :                 *trimmed += bytes;
    5990             : 
    5991          31 :                 if (fatal_signal_pending(current)) {
    5992             :                         ret = -ERESTARTSYS;
    5993             :                         break;
    5994             :                 }
    5995             : 
    5996          31 :                 cond_resched();
    5997             :         }
    5998             : 
    5999         589 :         return ret;
    6000             : }
    6001             : 
    6002             : /*
    6003             :  * Trim the whole filesystem by:
    6004             :  * 1) trimming the free space in each block group
    6005             :  * 2) trimming the unallocated space on each device
    6006             :  *
    6007             :  * This will also continue trimming even if a block group or device encounters
    6008             :  * an error.  The return value will be the last error, or 0 if nothing bad
    6009             :  * happens.
    6010             :  */
    6011         594 : int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
    6012             : {
    6013         594 :         struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
    6014         594 :         struct btrfs_block_group *cache = NULL;
    6015         594 :         struct btrfs_device *device;
    6016         594 :         u64 group_trimmed;
    6017         594 :         u64 range_end = U64_MAX;
    6018         594 :         u64 start;
    6019         594 :         u64 end;
    6020         594 :         u64 trimmed = 0;
    6021         594 :         u64 bg_failed = 0;
    6022         594 :         u64 dev_failed = 0;
    6023         594 :         int bg_ret = 0;
    6024         594 :         int dev_ret = 0;
    6025         594 :         int ret = 0;
    6026             : 
    6027         594 :         if (range->start == U64_MAX)
    6028             :                 return -EINVAL;
    6029             : 
    6030             :         /*
    6031             :          * Check range overflow if range->len is set.
    6032             :          * The default range->len is U64_MAX.
    6033             :          */
    6034         589 :         if (range->len != U64_MAX &&
    6035         449 :             check_add_overflow(range->start, range->len, &range_end))
    6036             :                 return -EINVAL;
    6037             : 
    6038         589 :         cache = btrfs_lookup_first_block_group(fs_info, range->start);
    6039        3335 :         for (; cache; cache = btrfs_next_block_group(cache)) {
    6040        2160 :                 if (cache->start >= range_end) {
    6041           3 :                         btrfs_put_block_group(cache);
    6042           3 :                         break;
    6043             :                 }
    6044             : 
    6045        2157 :                 start = max(range->start, cache->start);
    6046        2157 :                 end = min(range_end, cache->start + cache->length);
    6047             : 
    6048        2157 :                 if (end - start >= range->minlen) {
    6049        1980 :                         if (!btrfs_block_group_done(cache)) {
    6050          27 :                                 ret = btrfs_cache_block_group(cache, true);
    6051          27 :                                 if (ret) {
    6052           0 :                                         bg_failed++;
    6053           0 :                                         bg_ret = ret;
    6054           0 :                                         continue;
    6055             :                                 }
    6056             :                         }
    6057        1980 :                         ret = btrfs_trim_block_group(cache,
    6058             :                                                      &group_trimmed,
    6059             :                                                      start,
    6060             :                                                      end,
    6061             :                                                      range->minlen);
    6062             : 
    6063        1980 :                         trimmed += group_trimmed;
    6064        1980 :                         if (ret) {
    6065           5 :                                 bg_failed++;
    6066           5 :                                 bg_ret = ret;
    6067           5 :                                 continue;
    6068             :                         }
    6069             :                 }
    6070             :         }
    6071             : 
    6072         589 :         if (bg_failed)
    6073           1 :                 btrfs_warn(fs_info,
    6074             :                         "failed to trim %llu block group(s), last error %d",
    6075             :                         bg_failed, bg_ret);
    6076             : 
    6077         589 :         mutex_lock(&fs_devices->device_list_mutex);
    6078        1178 :         list_for_each_entry(device, &fs_devices->devices, dev_list) {
    6079        1178 :                 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
    6080           0 :                         continue;
    6081             : 
    6082         589 :                 ret = btrfs_trim_free_extents(device, &group_trimmed);
    6083         589 :                 if (ret) {
    6084             :                         dev_failed++;
    6085             :                         dev_ret = ret;
    6086             :                         break;
    6087             :                 }
    6088             : 
    6089         589 :                 trimmed += group_trimmed;
    6090             :         }
    6091         589 :         mutex_unlock(&fs_devices->device_list_mutex);
    6092             : 
    6093         589 :         if (dev_failed)
    6094           0 :                 btrfs_warn(fs_info,
    6095             :                         "failed to trim %llu device(s), last error %d",
    6096             :                         dev_failed, dev_ret);
    6097         589 :         range->len = trimmed;
    6098         589 :         if (bg_ret)
    6099           1 :                 return bg_ret;
    6100             :         return dev_ret;
    6101             : }

Generated by: LCOV version 1.14