LCOV - code coverage report
Current view: top level - fs/btrfs - extent-tree.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-acha @ Mon Jul 31 20:08:06 PDT 2023 Lines: 0 3032 0.0 %
Date: 2023-07-31 20:08:07 Functions: 0 103 0.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (C) 2007 Oracle.  All rights reserved.
       4             :  */
       5             : 
       6             : #include <linux/sched.h>
       7             : #include <linux/sched/signal.h>
       8             : #include <linux/pagemap.h>
       9             : #include <linux/writeback.h>
      10             : #include <linux/blkdev.h>
      11             : #include <linux/sort.h>
      12             : #include <linux/rcupdate.h>
      13             : #include <linux/kthread.h>
      14             : #include <linux/slab.h>
      15             : #include <linux/ratelimit.h>
      16             : #include <linux/percpu_counter.h>
      17             : #include <linux/lockdep.h>
      18             : #include <linux/crc32c.h>
      19             : #include "ctree.h"
      20             : #include "extent-tree.h"
      21             : #include "tree-log.h"
      22             : #include "disk-io.h"
      23             : #include "print-tree.h"
      24             : #include "volumes.h"
      25             : #include "raid56.h"
      26             : #include "locking.h"
      27             : #include "free-space-cache.h"
      28             : #include "free-space-tree.h"
      29             : #include "sysfs.h"
      30             : #include "qgroup.h"
      31             : #include "ref-verify.h"
      32             : #include "space-info.h"
      33             : #include "block-rsv.h"
      34             : #include "delalloc-space.h"
      35             : #include "discard.h"
      36             : #include "rcu-string.h"
      37             : #include "zoned.h"
      38             : #include "dev-replace.h"
      39             : #include "fs.h"
      40             : #include "accessors.h"
      41             : #include "root-tree.h"
      42             : #include "file-item.h"
      43             : #include "orphan.h"
      44             : #include "tree-checker.h"
      45             : 
      46             : #undef SCRAMBLE_DELAYED_REFS
      47             : 
      48             : 
      49             : static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
      50             :                                struct btrfs_delayed_ref_node *node, u64 parent,
      51             :                                u64 root_objectid, u64 owner_objectid,
      52             :                                u64 owner_offset, int refs_to_drop,
      53             :                                struct btrfs_delayed_extent_op *extra_op);
      54             : static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
      55             :                                     struct extent_buffer *leaf,
      56             :                                     struct btrfs_extent_item *ei);
      57             : static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
      58             :                                       u64 parent, u64 root_objectid,
      59             :                                       u64 flags, u64 owner, u64 offset,
      60             :                                       struct btrfs_key *ins, int ref_mod);
      61             : static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
      62             :                                      struct btrfs_delayed_ref_node *node,
      63             :                                      struct btrfs_delayed_extent_op *extent_op);
      64             : static int find_next_key(struct btrfs_path *path, int level,
      65             :                          struct btrfs_key *key);
      66             : 
      67             : static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
      68             : {
      69           0 :         return (cache->flags & bits) == bits;
      70             : }
      71             : 
      72           0 : int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
      73             :                               u64 start, u64 num_bytes)
      74             : {
      75           0 :         u64 end = start + num_bytes - 1;
      76           0 :         set_extent_bit(&fs_info->excluded_extents, start, end,
      77             :                        EXTENT_UPTODATE, NULL);
      78           0 :         return 0;
      79             : }
      80             : 
      81           0 : void btrfs_free_excluded_extents(struct btrfs_block_group *cache)
      82             : {
      83           0 :         struct btrfs_fs_info *fs_info = cache->fs_info;
      84           0 :         u64 start, end;
      85             : 
      86           0 :         start = cache->start;
      87           0 :         end = start + cache->length - 1;
      88             : 
      89           0 :         clear_extent_bits(&fs_info->excluded_extents, start, end,
      90             :                           EXTENT_UPTODATE);
      91           0 : }
      92             : 
      93             : /* simple helper to search for an existing data extent at a given offset */
      94           0 : int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
      95             : {
      96           0 :         struct btrfs_root *root = btrfs_extent_root(fs_info, start);
      97           0 :         int ret;
      98           0 :         struct btrfs_key key;
      99           0 :         struct btrfs_path *path;
     100             : 
     101           0 :         path = btrfs_alloc_path();
     102           0 :         if (!path)
     103             :                 return -ENOMEM;
     104             : 
     105           0 :         key.objectid = start;
     106           0 :         key.offset = len;
     107           0 :         key.type = BTRFS_EXTENT_ITEM_KEY;
     108           0 :         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
     109           0 :         btrfs_free_path(path);
     110           0 :         return ret;
     111             : }
     112             : 
     113             : /*
     114             :  * helper function to lookup reference count and flags of a tree block.
     115             :  *
     116             :  * the head node for delayed ref is used to store the sum of all the
     117             :  * reference count modifications queued up in the rbtree. the head
     118             :  * node may also store the extent flags to set. This way you can check
     119             :  * to see what the reference count and extent flags would be if all of
     120             :  * the delayed refs are not processed.
     121             :  */
     122           0 : int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
     123             :                              struct btrfs_fs_info *fs_info, u64 bytenr,
     124             :                              u64 offset, int metadata, u64 *refs, u64 *flags)
     125             : {
     126           0 :         struct btrfs_root *extent_root;
     127           0 :         struct btrfs_delayed_ref_head *head;
     128           0 :         struct btrfs_delayed_ref_root *delayed_refs;
     129           0 :         struct btrfs_path *path;
     130           0 :         struct btrfs_extent_item *ei;
     131           0 :         struct extent_buffer *leaf;
     132           0 :         struct btrfs_key key;
     133           0 :         u32 item_size;
     134           0 :         u64 num_refs;
     135           0 :         u64 extent_flags;
     136           0 :         int ret;
     137             : 
     138             :         /*
     139             :          * If we don't have skinny metadata, don't bother doing anything
     140             :          * different
     141             :          */
     142           0 :         if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
     143           0 :                 offset = fs_info->nodesize;
     144           0 :                 metadata = 0;
     145             :         }
     146             : 
     147           0 :         path = btrfs_alloc_path();
     148           0 :         if (!path)
     149             :                 return -ENOMEM;
     150             : 
     151           0 :         if (!trans) {
     152           0 :                 path->skip_locking = 1;
     153           0 :                 path->search_commit_root = 1;
     154             :         }
     155             : 
     156           0 : search_again:
     157           0 :         key.objectid = bytenr;
     158           0 :         key.offset = offset;
     159           0 :         if (metadata)
     160           0 :                 key.type = BTRFS_METADATA_ITEM_KEY;
     161             :         else
     162           0 :                 key.type = BTRFS_EXTENT_ITEM_KEY;
     163             : 
     164           0 :         extent_root = btrfs_extent_root(fs_info, bytenr);
     165           0 :         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
     166           0 :         if (ret < 0)
     167           0 :                 goto out_free;
     168             : 
     169           0 :         if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
     170           0 :                 if (path->slots[0]) {
     171           0 :                         path->slots[0]--;
     172           0 :                         btrfs_item_key_to_cpu(path->nodes[0], &key,
     173             :                                               path->slots[0]);
     174           0 :                         if (key.objectid == bytenr &&
     175           0 :                             key.type == BTRFS_EXTENT_ITEM_KEY &&
     176           0 :                             key.offset == fs_info->nodesize)
     177             :                                 ret = 0;
     178             :                 }
     179             :         }
     180             : 
     181           0 :         if (ret == 0) {
     182           0 :                 leaf = path->nodes[0];
     183           0 :                 item_size = btrfs_item_size(leaf, path->slots[0]);
     184           0 :                 if (item_size >= sizeof(*ei)) {
     185           0 :                         ei = btrfs_item_ptr(leaf, path->slots[0],
     186             :                                             struct btrfs_extent_item);
     187           0 :                         num_refs = btrfs_extent_refs(leaf, ei);
     188           0 :                         extent_flags = btrfs_extent_flags(leaf, ei);
     189             :                 } else {
     190           0 :                         ret = -EINVAL;
     191           0 :                         btrfs_print_v0_err(fs_info);
     192           0 :                         if (trans)
     193           0 :                                 btrfs_abort_transaction(trans, ret);
     194             :                         else
     195           0 :                                 btrfs_handle_fs_error(fs_info, ret, NULL);
     196             : 
     197           0 :                         goto out_free;
     198             :                 }
     199             : 
     200           0 :                 BUG_ON(num_refs == 0);
     201             :         } else {
     202             :                 num_refs = 0;
     203             :                 extent_flags = 0;
     204             :                 ret = 0;
     205             :         }
     206             : 
     207           0 :         if (!trans)
     208           0 :                 goto out;
     209             : 
     210           0 :         delayed_refs = &trans->transaction->delayed_refs;
     211           0 :         spin_lock(&delayed_refs->lock);
     212           0 :         head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
     213           0 :         if (head) {
     214           0 :                 if (!mutex_trylock(&head->mutex)) {
     215           0 :                         refcount_inc(&head->refs);
     216           0 :                         spin_unlock(&delayed_refs->lock);
     217             : 
     218           0 :                         btrfs_release_path(path);
     219             : 
     220             :                         /*
     221             :                          * Mutex was contended, block until it's released and try
     222             :                          * again
     223             :                          */
     224           0 :                         mutex_lock(&head->mutex);
     225           0 :                         mutex_unlock(&head->mutex);
     226           0 :                         btrfs_put_delayed_ref_head(head);
     227           0 :                         goto search_again;
     228             :                 }
     229           0 :                 spin_lock(&head->lock);
     230           0 :                 if (head->extent_op && head->extent_op->update_flags)
     231           0 :                         extent_flags |= head->extent_op->flags_to_set;
     232             :                 else
     233           0 :                         BUG_ON(num_refs == 0);
     234             : 
     235           0 :                 num_refs += head->ref_mod;
     236           0 :                 spin_unlock(&head->lock);
     237           0 :                 mutex_unlock(&head->mutex);
     238             :         }
     239           0 :         spin_unlock(&delayed_refs->lock);
     240           0 : out:
     241           0 :         WARN_ON(num_refs == 0);
     242           0 :         if (refs)
     243           0 :                 *refs = num_refs;
     244           0 :         if (flags)
     245           0 :                 *flags = extent_flags;
     246           0 : out_free:
     247           0 :         btrfs_free_path(path);
     248           0 :         return ret;
     249             : }
     250             : 
     251             : /*
     252             :  * Back reference rules.  Back refs have three main goals:
     253             :  *
     254             :  * 1) differentiate between all holders of references to an extent so that
     255             :  *    when a reference is dropped we can make sure it was a valid reference
     256             :  *    before freeing the extent.
     257             :  *
     258             :  * 2) Provide enough information to quickly find the holders of an extent
     259             :  *    if we notice a given block is corrupted or bad.
     260             :  *
     261             :  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
     262             :  *    maintenance.  This is actually the same as #2, but with a slightly
     263             :  *    different use case.
     264             :  *
     265             :  * There are two kinds of back refs. The implicit back refs is optimized
     266             :  * for pointers in non-shared tree blocks. For a given pointer in a block,
     267             :  * back refs of this kind provide information about the block's owner tree
     268             :  * and the pointer's key. These information allow us to find the block by
     269             :  * b-tree searching. The full back refs is for pointers in tree blocks not
     270             :  * referenced by their owner trees. The location of tree block is recorded
     271             :  * in the back refs. Actually the full back refs is generic, and can be
     272             :  * used in all cases the implicit back refs is used. The major shortcoming
     273             :  * of the full back refs is its overhead. Every time a tree block gets
     274             :  * COWed, we have to update back refs entry for all pointers in it.
     275             :  *
     276             :  * For a newly allocated tree block, we use implicit back refs for
     277             :  * pointers in it. This means most tree related operations only involve
     278             :  * implicit back refs. For a tree block created in old transaction, the
     279             :  * only way to drop a reference to it is COW it. So we can detect the
     280             :  * event that tree block loses its owner tree's reference and do the
     281             :  * back refs conversion.
     282             :  *
     283             :  * When a tree block is COWed through a tree, there are four cases:
     284             :  *
     285             :  * The reference count of the block is one and the tree is the block's
     286             :  * owner tree. Nothing to do in this case.
     287             :  *
     288             :  * The reference count of the block is one and the tree is not the
     289             :  * block's owner tree. In this case, full back refs is used for pointers
     290             :  * in the block. Remove these full back refs, add implicit back refs for
     291             :  * every pointers in the new block.
     292             :  *
     293             :  * The reference count of the block is greater than one and the tree is
     294             :  * the block's owner tree. In this case, implicit back refs is used for
     295             :  * pointers in the block. Add full back refs for every pointers in the
     296             :  * block, increase lower level extents' reference counts. The original
     297             :  * implicit back refs are entailed to the new block.
     298             :  *
     299             :  * The reference count of the block is greater than one and the tree is
     300             :  * not the block's owner tree. Add implicit back refs for every pointer in
     301             :  * the new block, increase lower level extents' reference count.
     302             :  *
     303             :  * Back Reference Key composing:
     304             :  *
     305             :  * The key objectid corresponds to the first byte in the extent,
     306             :  * The key type is used to differentiate between types of back refs.
     307             :  * There are different meanings of the key offset for different types
     308             :  * of back refs.
     309             :  *
     310             :  * File extents can be referenced by:
     311             :  *
     312             :  * - multiple snapshots, subvolumes, or different generations in one subvol
     313             :  * - different files inside a single subvolume
     314             :  * - different offsets inside a file (bookend extents in file.c)
     315             :  *
     316             :  * The extent ref structure for the implicit back refs has fields for:
     317             :  *
     318             :  * - Objectid of the subvolume root
     319             :  * - objectid of the file holding the reference
     320             :  * - original offset in the file
     321             :  * - how many bookend extents
     322             :  *
     323             :  * The key offset for the implicit back refs is hash of the first
     324             :  * three fields.
     325             :  *
     326             :  * The extent ref structure for the full back refs has field for:
     327             :  *
     328             :  * - number of pointers in the tree leaf
     329             :  *
     330             :  * The key offset for the implicit back refs is the first byte of
     331             :  * the tree leaf
     332             :  *
     333             :  * When a file extent is allocated, The implicit back refs is used.
     334             :  * the fields are filled in:
     335             :  *
     336             :  *     (root_key.objectid, inode objectid, offset in file, 1)
     337             :  *
     338             :  * When a file extent is removed file truncation, we find the
     339             :  * corresponding implicit back refs and check the following fields:
     340             :  *
     341             :  *     (btrfs_header_owner(leaf), inode objectid, offset in file)
     342             :  *
     343             :  * Btree extents can be referenced by:
     344             :  *
     345             :  * - Different subvolumes
     346             :  *
     347             :  * Both the implicit back refs and the full back refs for tree blocks
     348             :  * only consist of key. The key offset for the implicit back refs is
     349             :  * objectid of block's owner tree. The key offset for the full back refs
     350             :  * is the first byte of parent block.
     351             :  *
     352             :  * When implicit back refs is used, information about the lowest key and
     353             :  * level of the tree block are required. These information are stored in
     354             :  * tree block info structure.
     355             :  */
     356             : 
     357             : /*
     358             :  * is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required,
     359             :  * is_data == BTRFS_REF_TYPE_DATA, data type is requiried,
     360             :  * is_data == BTRFS_REF_TYPE_ANY, either type is OK.
     361             :  */
     362           0 : int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
     363             :                                      struct btrfs_extent_inline_ref *iref,
     364             :                                      enum btrfs_inline_ref_type is_data)
     365             : {
     366           0 :         int type = btrfs_extent_inline_ref_type(eb, iref);
     367           0 :         u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
     368             : 
     369           0 :         if (type == BTRFS_TREE_BLOCK_REF_KEY ||
     370           0 :             type == BTRFS_SHARED_BLOCK_REF_KEY ||
     371           0 :             type == BTRFS_SHARED_DATA_REF_KEY ||
     372           0 :             type == BTRFS_EXTENT_DATA_REF_KEY) {
     373           0 :                 if (is_data == BTRFS_REF_TYPE_BLOCK) {
     374           0 :                         if (type == BTRFS_TREE_BLOCK_REF_KEY)
     375             :                                 return type;
     376           0 :                         if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
     377           0 :                                 ASSERT(eb->fs_info);
     378             :                                 /*
     379             :                                  * Every shared one has parent tree block,
     380             :                                  * which must be aligned to sector size.
     381             :                                  */
     382           0 :                                 if (offset &&
     383           0 :                                     IS_ALIGNED(offset, eb->fs_info->sectorsize))
     384             :                                         return type;
     385             :                         }
     386           0 :                 } else if (is_data == BTRFS_REF_TYPE_DATA) {
     387           0 :                         if (type == BTRFS_EXTENT_DATA_REF_KEY)
     388             :                                 return type;
     389           0 :                         if (type == BTRFS_SHARED_DATA_REF_KEY) {
     390           0 :                                 ASSERT(eb->fs_info);
     391             :                                 /*
     392             :                                  * Every shared one has parent tree block,
     393             :                                  * which must be aligned to sector size.
     394             :                                  */
     395           0 :                                 if (offset &&
     396           0 :                                     IS_ALIGNED(offset, eb->fs_info->sectorsize))
     397             :                                         return type;
     398             :                         }
     399             :                 } else {
     400             :                         ASSERT(is_data == BTRFS_REF_TYPE_ANY);
     401             :                         return type;
     402             :                 }
     403             :         }
     404             : 
     405           0 :         btrfs_print_leaf(eb);
     406           0 :         btrfs_err(eb->fs_info,
     407             :                   "eb %llu iref 0x%lx invalid extent inline ref type %d",
     408             :                   eb->start, (unsigned long)iref, type);
     409           0 :         WARN_ON(1);
     410             : 
     411           0 :         return BTRFS_REF_TYPE_INVALID;
     412             : }
     413             : 
     414           0 : u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
     415             : {
     416           0 :         u32 high_crc = ~(u32)0;
     417           0 :         u32 low_crc = ~(u32)0;
     418           0 :         __le64 lenum;
     419             : 
     420           0 :         lenum = cpu_to_le64(root_objectid);
     421           0 :         high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
     422           0 :         lenum = cpu_to_le64(owner);
     423           0 :         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
     424           0 :         lenum = cpu_to_le64(offset);
     425           0 :         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
     426             : 
     427           0 :         return ((u64)high_crc << 31) ^ (u64)low_crc;
     428             : }
     429             : 
     430           0 : static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
     431             :                                      struct btrfs_extent_data_ref *ref)
     432             : {
     433           0 :         return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
     434             :                                     btrfs_extent_data_ref_objectid(leaf, ref),
     435             :                                     btrfs_extent_data_ref_offset(leaf, ref));
     436             : }
     437             : 
     438           0 : static int match_extent_data_ref(struct extent_buffer *leaf,
     439             :                                  struct btrfs_extent_data_ref *ref,
     440             :                                  u64 root_objectid, u64 owner, u64 offset)
     441             : {
     442           0 :         if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
     443           0 :             btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
     444             :             btrfs_extent_data_ref_offset(leaf, ref) != offset)
     445           0 :                 return 0;
     446             :         return 1;
     447             : }
     448             : 
     449           0 : static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
     450             :                                            struct btrfs_path *path,
     451             :                                            u64 bytenr, u64 parent,
     452             :                                            u64 root_objectid,
     453             :                                            u64 owner, u64 offset)
     454             : {
     455           0 :         struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
     456           0 :         struct btrfs_key key;
     457           0 :         struct btrfs_extent_data_ref *ref;
     458           0 :         struct extent_buffer *leaf;
     459           0 :         u32 nritems;
     460           0 :         int ret;
     461           0 :         int recow;
     462           0 :         int err = -ENOENT;
     463             : 
     464           0 :         key.objectid = bytenr;
     465           0 :         if (parent) {
     466           0 :                 key.type = BTRFS_SHARED_DATA_REF_KEY;
     467           0 :                 key.offset = parent;
     468             :         } else {
     469           0 :                 key.type = BTRFS_EXTENT_DATA_REF_KEY;
     470           0 :                 key.offset = hash_extent_data_ref(root_objectid,
     471             :                                                   owner, offset);
     472             :         }
     473             : again:
     474           0 :         recow = 0;
     475           0 :         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
     476           0 :         if (ret < 0) {
     477           0 :                 err = ret;
     478           0 :                 goto fail;
     479             :         }
     480             : 
     481           0 :         if (parent) {
     482           0 :                 if (!ret)
     483             :                         return 0;
     484           0 :                 goto fail;
     485             :         }
     486             : 
     487           0 :         leaf = path->nodes[0];
     488           0 :         nritems = btrfs_header_nritems(leaf);
     489           0 :         while (1) {
     490           0 :                 if (path->slots[0] >= nritems) {
     491           0 :                         ret = btrfs_next_leaf(root, path);
     492           0 :                         if (ret < 0)
     493           0 :                                 err = ret;
     494           0 :                         if (ret)
     495           0 :                                 goto fail;
     496             : 
     497           0 :                         leaf = path->nodes[0];
     498           0 :                         nritems = btrfs_header_nritems(leaf);
     499           0 :                         recow = 1;
     500             :                 }
     501             : 
     502           0 :                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
     503           0 :                 if (key.objectid != bytenr ||
     504           0 :                     key.type != BTRFS_EXTENT_DATA_REF_KEY)
     505           0 :                         goto fail;
     506             : 
     507           0 :                 ref = btrfs_item_ptr(leaf, path->slots[0],
     508             :                                      struct btrfs_extent_data_ref);
     509             : 
     510           0 :                 if (match_extent_data_ref(leaf, ref, root_objectid,
     511             :                                           owner, offset)) {
     512           0 :                         if (recow) {
     513           0 :                                 btrfs_release_path(path);
     514           0 :                                 goto again;
     515             :                         }
     516             :                         err = 0;
     517             :                         break;
     518             :                 }
     519           0 :                 path->slots[0]++;
     520             :         }
     521             : fail:
     522             :         return err;
     523             : }
     524             : 
     525           0 : static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
     526             :                                            struct btrfs_path *path,
     527             :                                            u64 bytenr, u64 parent,
     528             :                                            u64 root_objectid, u64 owner,
     529             :                                            u64 offset, int refs_to_add)
     530             : {
     531           0 :         struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
     532           0 :         struct btrfs_key key;
     533           0 :         struct extent_buffer *leaf;
     534           0 :         u32 size;
     535           0 :         u32 num_refs;
     536           0 :         int ret;
     537             : 
     538           0 :         key.objectid = bytenr;
     539           0 :         if (parent) {
     540           0 :                 key.type = BTRFS_SHARED_DATA_REF_KEY;
     541           0 :                 key.offset = parent;
     542           0 :                 size = sizeof(struct btrfs_shared_data_ref);
     543             :         } else {
     544           0 :                 key.type = BTRFS_EXTENT_DATA_REF_KEY;
     545           0 :                 key.offset = hash_extent_data_ref(root_objectid,
     546             :                                                   owner, offset);
     547           0 :                 size = sizeof(struct btrfs_extent_data_ref);
     548             :         }
     549             : 
     550           0 :         ret = btrfs_insert_empty_item(trans, root, path, &key, size);
     551           0 :         if (ret && ret != -EEXIST)
     552           0 :                 goto fail;
     553             : 
     554           0 :         leaf = path->nodes[0];
     555           0 :         if (parent) {
     556           0 :                 struct btrfs_shared_data_ref *ref;
     557           0 :                 ref = btrfs_item_ptr(leaf, path->slots[0],
     558             :                                      struct btrfs_shared_data_ref);
     559           0 :                 if (ret == 0) {
     560           0 :                         btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
     561             :                 } else {
     562           0 :                         num_refs = btrfs_shared_data_ref_count(leaf, ref);
     563           0 :                         num_refs += refs_to_add;
     564           0 :                         btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
     565             :                 }
     566             :         } else {
     567             :                 struct btrfs_extent_data_ref *ref;
     568           0 :                 while (ret == -EEXIST) {
     569           0 :                         ref = btrfs_item_ptr(leaf, path->slots[0],
     570             :                                              struct btrfs_extent_data_ref);
     571           0 :                         if (match_extent_data_ref(leaf, ref, root_objectid,
     572             :                                                   owner, offset))
     573             :                                 break;
     574           0 :                         btrfs_release_path(path);
     575           0 :                         key.offset++;
     576           0 :                         ret = btrfs_insert_empty_item(trans, root, path, &key,
     577             :                                                       size);
     578           0 :                         if (ret && ret != -EEXIST)
     579           0 :                                 goto fail;
     580             : 
     581           0 :                         leaf = path->nodes[0];
     582             :                 }
     583           0 :                 ref = btrfs_item_ptr(leaf, path->slots[0],
     584             :                                      struct btrfs_extent_data_ref);
     585           0 :                 if (ret == 0) {
     586           0 :                         btrfs_set_extent_data_ref_root(leaf, ref,
     587             :                                                        root_objectid);
     588           0 :                         btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
     589           0 :                         btrfs_set_extent_data_ref_offset(leaf, ref, offset);
     590           0 :                         btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
     591             :                 } else {
     592           0 :                         num_refs = btrfs_extent_data_ref_count(leaf, ref);
     593           0 :                         num_refs += refs_to_add;
     594           0 :                         btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
     595             :                 }
     596             :         }
     597           0 :         btrfs_mark_buffer_dirty(leaf);
     598           0 :         ret = 0;
     599           0 : fail:
     600           0 :         btrfs_release_path(path);
     601           0 :         return ret;
     602             : }
     603             : 
     604           0 : static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
     605             :                                            struct btrfs_root *root,
     606             :                                            struct btrfs_path *path,
     607             :                                            int refs_to_drop)
     608             : {
     609           0 :         struct btrfs_key key;
     610           0 :         struct btrfs_extent_data_ref *ref1 = NULL;
     611           0 :         struct btrfs_shared_data_ref *ref2 = NULL;
     612           0 :         struct extent_buffer *leaf;
     613           0 :         u32 num_refs = 0;
     614           0 :         int ret = 0;
     615             : 
     616           0 :         leaf = path->nodes[0];
     617           0 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
     618             : 
     619           0 :         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
     620           0 :                 ref1 = btrfs_item_ptr(leaf, path->slots[0],
     621             :                                       struct btrfs_extent_data_ref);
     622           0 :                 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
     623           0 :         } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
     624           0 :                 ref2 = btrfs_item_ptr(leaf, path->slots[0],
     625             :                                       struct btrfs_shared_data_ref);
     626           0 :                 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
     627           0 :         } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
     628           0 :                 btrfs_print_v0_err(trans->fs_info);
     629           0 :                 btrfs_abort_transaction(trans, -EINVAL);
     630           0 :                 return -EINVAL;
     631             :         } else {
     632           0 :                 BUG();
     633             :         }
     634             : 
     635           0 :         BUG_ON(num_refs < refs_to_drop);
     636           0 :         num_refs -= refs_to_drop;
     637             : 
     638           0 :         if (num_refs == 0) {
     639           0 :                 ret = btrfs_del_item(trans, root, path);
     640             :         } else {
     641           0 :                 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
     642           0 :                         btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
     643           0 :                 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
     644           0 :                         btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
     645           0 :                 btrfs_mark_buffer_dirty(leaf);
     646             :         }
     647             :         return ret;
     648             : }
     649             : 
     650           0 : static noinline u32 extent_data_ref_count(struct btrfs_path *path,
     651             :                                           struct btrfs_extent_inline_ref *iref)
     652             : {
     653           0 :         struct btrfs_key key;
     654           0 :         struct extent_buffer *leaf;
     655           0 :         struct btrfs_extent_data_ref *ref1;
     656           0 :         struct btrfs_shared_data_ref *ref2;
     657           0 :         u32 num_refs = 0;
     658           0 :         int type;
     659             : 
     660           0 :         leaf = path->nodes[0];
     661           0 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
     662             : 
     663           0 :         BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
     664           0 :         if (iref) {
     665             :                 /*
     666             :                  * If type is invalid, we should have bailed out earlier than
     667             :                  * this call.
     668             :                  */
     669           0 :                 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
     670           0 :                 ASSERT(type != BTRFS_REF_TYPE_INVALID);
     671           0 :                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
     672           0 :                         ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
     673           0 :                         num_refs = btrfs_extent_data_ref_count(leaf, ref1);
     674             :                 } else {
     675           0 :                         ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
     676           0 :                         num_refs = btrfs_shared_data_ref_count(leaf, ref2);
     677             :                 }
     678           0 :         } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
     679           0 :                 ref1 = btrfs_item_ptr(leaf, path->slots[0],
     680             :                                       struct btrfs_extent_data_ref);
     681           0 :                 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
     682           0 :         } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
     683           0 :                 ref2 = btrfs_item_ptr(leaf, path->slots[0],
     684             :                                       struct btrfs_shared_data_ref);
     685           0 :                 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
     686             :         } else {
     687           0 :                 WARN_ON(1);
     688             :         }
     689           0 :         return num_refs;
     690             : }
     691             : 
     692           0 : static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
     693             :                                           struct btrfs_path *path,
     694             :                                           u64 bytenr, u64 parent,
     695             :                                           u64 root_objectid)
     696             : {
     697           0 :         struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
     698           0 :         struct btrfs_key key;
     699           0 :         int ret;
     700             : 
     701           0 :         key.objectid = bytenr;
     702           0 :         if (parent) {
     703           0 :                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
     704           0 :                 key.offset = parent;
     705             :         } else {
     706           0 :                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
     707           0 :                 key.offset = root_objectid;
     708             :         }
     709             : 
     710           0 :         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
     711           0 :         if (ret > 0)
     712           0 :                 ret = -ENOENT;
     713           0 :         return ret;
     714             : }
     715             : 
     716           0 : static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
     717             :                                           struct btrfs_path *path,
     718             :                                           u64 bytenr, u64 parent,
     719             :                                           u64 root_objectid)
     720             : {
     721           0 :         struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
     722           0 :         struct btrfs_key key;
     723           0 :         int ret;
     724             : 
     725           0 :         key.objectid = bytenr;
     726           0 :         if (parent) {
     727           0 :                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
     728           0 :                 key.offset = parent;
     729             :         } else {
     730           0 :                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
     731           0 :                 key.offset = root_objectid;
     732             :         }
     733             : 
     734           0 :         ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
     735           0 :         btrfs_release_path(path);
     736           0 :         return ret;
     737             : }
     738             : 
     739             : static inline int extent_ref_type(u64 parent, u64 owner)
     740             : {
     741           0 :         int type;
     742           0 :         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
     743           0 :                 if (parent > 0)
     744             :                         type = BTRFS_SHARED_BLOCK_REF_KEY;
     745             :                 else
     746           0 :                         type = BTRFS_TREE_BLOCK_REF_KEY;
     747             :         } else {
     748           0 :                 if (parent > 0)
     749             :                         type = BTRFS_SHARED_DATA_REF_KEY;
     750             :                 else
     751           0 :                         type = BTRFS_EXTENT_DATA_REF_KEY;
     752             :         }
     753           0 :         return type;
     754             : }
     755             : 
     756           0 : static int find_next_key(struct btrfs_path *path, int level,
     757             :                          struct btrfs_key *key)
     758             : 
     759             : {
     760           0 :         for (; level < BTRFS_MAX_LEVEL; level++) {
     761           0 :                 if (!path->nodes[level])
     762             :                         break;
     763           0 :                 if (path->slots[level] + 1 >=
     764             :                     btrfs_header_nritems(path->nodes[level]))
     765           0 :                         continue;
     766           0 :                 if (level == 0)
     767           0 :                         btrfs_item_key_to_cpu(path->nodes[level], key,
     768             :                                               path->slots[level] + 1);
     769             :                 else
     770           0 :                         btrfs_node_key_to_cpu(path->nodes[level], key,
     771             :                                               path->slots[level] + 1);
     772             :                 return 0;
     773             :         }
     774             :         return 1;
     775             : }
     776             : 
     777             : /*
     778             :  * look for inline back ref. if back ref is found, *ref_ret is set
     779             :  * to the address of inline back ref, and 0 is returned.
     780             :  *
     781             :  * if back ref isn't found, *ref_ret is set to the address where it
     782             :  * should be inserted, and -ENOENT is returned.
     783             :  *
     784             :  * if insert is true and there are too many inline back refs, the path
     785             :  * points to the extent item, and -EAGAIN is returned.
     786             :  *
     787             :  * NOTE: inline back refs are ordered in the same way that back ref
     788             :  *       items in the tree are ordered.
     789             :  */
     790             : static noinline_for_stack
     791           0 : int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
     792             :                                  struct btrfs_path *path,
     793             :                                  struct btrfs_extent_inline_ref **ref_ret,
     794             :                                  u64 bytenr, u64 num_bytes,
     795             :                                  u64 parent, u64 root_objectid,
     796             :                                  u64 owner, u64 offset, int insert)
     797             : {
     798           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
     799           0 :         struct btrfs_root *root = btrfs_extent_root(fs_info, bytenr);
     800           0 :         struct btrfs_key key;
     801           0 :         struct extent_buffer *leaf;
     802           0 :         struct btrfs_extent_item *ei;
     803           0 :         struct btrfs_extent_inline_ref *iref;
     804           0 :         u64 flags;
     805           0 :         u64 item_size;
     806           0 :         unsigned long ptr;
     807           0 :         unsigned long end;
     808           0 :         int extra_size;
     809           0 :         int type;
     810           0 :         int want;
     811           0 :         int ret;
     812           0 :         int err = 0;
     813           0 :         bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
     814           0 :         int needed;
     815             : 
     816           0 :         key.objectid = bytenr;
     817           0 :         key.type = BTRFS_EXTENT_ITEM_KEY;
     818           0 :         key.offset = num_bytes;
     819             : 
     820           0 :         want = extent_ref_type(parent, owner);
     821           0 :         if (insert) {
     822           0 :                 extra_size = btrfs_extent_inline_ref_size(want);
     823           0 :                 path->search_for_extension = 1;
     824           0 :                 path->keep_locks = 1;
     825             :         } else
     826             :                 extra_size = -1;
     827             : 
     828             :         /*
     829             :          * Owner is our level, so we can just add one to get the level for the
     830             :          * block we are interested in.
     831             :          */
     832           0 :         if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
     833           0 :                 key.type = BTRFS_METADATA_ITEM_KEY;
     834           0 :                 key.offset = owner;
     835             :         }
     836             : 
     837           0 : again:
     838           0 :         ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
     839           0 :         if (ret < 0) {
     840           0 :                 err = ret;
     841           0 :                 goto out;
     842             :         }
     843             : 
     844             :         /*
     845             :          * We may be a newly converted file system which still has the old fat
     846             :          * extent entries for metadata, so try and see if we have one of those.
     847             :          */
     848           0 :         if (ret > 0 && skinny_metadata) {
     849           0 :                 skinny_metadata = false;
     850           0 :                 if (path->slots[0]) {
     851           0 :                         path->slots[0]--;
     852           0 :                         btrfs_item_key_to_cpu(path->nodes[0], &key,
     853             :                                               path->slots[0]);
     854           0 :                         if (key.objectid == bytenr &&
     855           0 :                             key.type == BTRFS_EXTENT_ITEM_KEY &&
     856           0 :                             key.offset == num_bytes)
     857             :                                 ret = 0;
     858             :                 }
     859           0 :                 if (ret) {
     860           0 :                         key.objectid = bytenr;
     861           0 :                         key.type = BTRFS_EXTENT_ITEM_KEY;
     862           0 :                         key.offset = num_bytes;
     863           0 :                         btrfs_release_path(path);
     864           0 :                         goto again;
     865             :                 }
     866             :         }
     867             : 
     868           0 :         if (ret && !insert) {
     869           0 :                 err = -ENOENT;
     870           0 :                 goto out;
     871           0 :         } else if (WARN_ON(ret)) {
     872           0 :                 err = -EIO;
     873           0 :                 goto out;
     874             :         }
     875             : 
     876           0 :         leaf = path->nodes[0];
     877           0 :         item_size = btrfs_item_size(leaf, path->slots[0]);
     878           0 :         if (unlikely(item_size < sizeof(*ei))) {
     879           0 :                 err = -EINVAL;
     880           0 :                 btrfs_print_v0_err(fs_info);
     881           0 :                 btrfs_abort_transaction(trans, err);
     882           0 :                 goto out;
     883             :         }
     884             : 
     885           0 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
     886           0 :         flags = btrfs_extent_flags(leaf, ei);
     887             : 
     888           0 :         ptr = (unsigned long)(ei + 1);
     889           0 :         end = (unsigned long)ei + item_size;
     890             : 
     891           0 :         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
     892           0 :                 ptr += sizeof(struct btrfs_tree_block_info);
     893           0 :                 BUG_ON(ptr > end);
     894             :         }
     895             : 
     896           0 :         if (owner >= BTRFS_FIRST_FREE_OBJECTID)
     897             :                 needed = BTRFS_REF_TYPE_DATA;
     898             :         else
     899           0 :                 needed = BTRFS_REF_TYPE_BLOCK;
     900             : 
     901           0 :         err = -ENOENT;
     902           0 :         while (1) {
     903           0 :                 if (ptr >= end) {
     904           0 :                         if (ptr > end) {
     905           0 :                                 err = -EUCLEAN;
     906           0 :                                 btrfs_print_leaf(path->nodes[0]);
     907           0 :                                 btrfs_crit(fs_info,
     908             : "overrun extent record at slot %d while looking for inline extent for root %llu owner %llu offset %llu parent %llu",
     909             :                                         path->slots[0], root_objectid, owner, offset, parent);
     910             :                         }
     911             :                         break;
     912             :                 }
     913           0 :                 iref = (struct btrfs_extent_inline_ref *)ptr;
     914           0 :                 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
     915           0 :                 if (type == BTRFS_REF_TYPE_INVALID) {
     916           0 :                         err = -EUCLEAN;
     917           0 :                         goto out;
     918             :                 }
     919             : 
     920           0 :                 if (want < type)
     921             :                         break;
     922           0 :                 if (want > type) {
     923           0 :                         ptr += btrfs_extent_inline_ref_size(type);
     924           0 :                         continue;
     925             :                 }
     926             : 
     927           0 :                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
     928           0 :                         struct btrfs_extent_data_ref *dref;
     929           0 :                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
     930           0 :                         if (match_extent_data_ref(leaf, dref, root_objectid,
     931             :                                                   owner, offset)) {
     932             :                                 err = 0;
     933             :                                 break;
     934             :                         }
     935           0 :                         if (hash_extent_data_ref_item(leaf, dref) <
     936           0 :                             hash_extent_data_ref(root_objectid, owner, offset))
     937             :                                 break;
     938             :                 } else {
     939           0 :                         u64 ref_offset;
     940           0 :                         ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
     941           0 :                         if (parent > 0) {
     942           0 :                                 if (parent == ref_offset) {
     943             :                                         err = 0;
     944             :                                         break;
     945             :                                 }
     946           0 :                                 if (ref_offset < parent)
     947             :                                         break;
     948             :                         } else {
     949           0 :                                 if (root_objectid == ref_offset) {
     950             :                                         err = 0;
     951             :                                         break;
     952             :                                 }
     953           0 :                                 if (ref_offset < root_objectid)
     954             :                                         break;
     955             :                         }
     956             :                 }
     957           0 :                 ptr += btrfs_extent_inline_ref_size(type);
     958             :         }
     959           0 :         if (err == -ENOENT && insert) {
     960           0 :                 if (item_size + extra_size >=
     961           0 :                     BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
     962           0 :                         err = -EAGAIN;
     963           0 :                         goto out;
     964             :                 }
     965             :                 /*
     966             :                  * To add new inline back ref, we have to make sure
     967             :                  * there is no corresponding back ref item.
     968             :                  * For simplicity, we just do not add new inline back
     969             :                  * ref if there is any kind of item for this block
     970             :                  */
     971           0 :                 if (find_next_key(path, 0, &key) == 0 &&
     972           0 :                     key.objectid == bytenr &&
     973           0 :                     key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
     974           0 :                         err = -EAGAIN;
     975           0 :                         goto out;
     976             :                 }
     977             :         }
     978           0 :         *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
     979           0 : out:
     980           0 :         if (insert) {
     981           0 :                 path->keep_locks = 0;
     982           0 :                 path->search_for_extension = 0;
     983           0 :                 btrfs_unlock_up_safe(path, 1);
     984             :         }
     985           0 :         return err;
     986             : }
     987             : 
     988             : /*
     989             :  * helper to add new inline back ref
     990             :  */
     991             : static noinline_for_stack
     992           0 : void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
     993             :                                  struct btrfs_path *path,
     994             :                                  struct btrfs_extent_inline_ref *iref,
     995             :                                  u64 parent, u64 root_objectid,
     996             :                                  u64 owner, u64 offset, int refs_to_add,
     997             :                                  struct btrfs_delayed_extent_op *extent_op)
     998             : {
     999           0 :         struct extent_buffer *leaf;
    1000           0 :         struct btrfs_extent_item *ei;
    1001           0 :         unsigned long ptr;
    1002           0 :         unsigned long end;
    1003           0 :         unsigned long item_offset;
    1004           0 :         u64 refs;
    1005           0 :         int size;
    1006           0 :         int type;
    1007             : 
    1008           0 :         leaf = path->nodes[0];
    1009           0 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    1010           0 :         item_offset = (unsigned long)iref - (unsigned long)ei;
    1011             : 
    1012           0 :         type = extent_ref_type(parent, owner);
    1013           0 :         size = btrfs_extent_inline_ref_size(type);
    1014             : 
    1015           0 :         btrfs_extend_item(path, size);
    1016             : 
    1017           0 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    1018           0 :         refs = btrfs_extent_refs(leaf, ei);
    1019           0 :         refs += refs_to_add;
    1020           0 :         btrfs_set_extent_refs(leaf, ei, refs);
    1021           0 :         if (extent_op)
    1022           0 :                 __run_delayed_extent_op(extent_op, leaf, ei);
    1023             : 
    1024           0 :         ptr = (unsigned long)ei + item_offset;
    1025           0 :         end = (unsigned long)ei + btrfs_item_size(leaf, path->slots[0]);
    1026           0 :         if (ptr < end - size)
    1027           0 :                 memmove_extent_buffer(leaf, ptr + size, ptr,
    1028             :                                       end - size - ptr);
    1029             : 
    1030           0 :         iref = (struct btrfs_extent_inline_ref *)ptr;
    1031           0 :         btrfs_set_extent_inline_ref_type(leaf, iref, type);
    1032           0 :         if (type == BTRFS_EXTENT_DATA_REF_KEY) {
    1033           0 :                 struct btrfs_extent_data_ref *dref;
    1034           0 :                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
    1035           0 :                 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
    1036           0 :                 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
    1037           0 :                 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
    1038           0 :                 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
    1039           0 :         } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
    1040           0 :                 struct btrfs_shared_data_ref *sref;
    1041           0 :                 sref = (struct btrfs_shared_data_ref *)(iref + 1);
    1042           0 :                 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
    1043           0 :                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
    1044           0 :         } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
    1045           0 :                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
    1046             :         } else {
    1047           0 :                 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
    1048             :         }
    1049           0 :         btrfs_mark_buffer_dirty(leaf);
    1050           0 : }
    1051             : 
    1052           0 : static int lookup_extent_backref(struct btrfs_trans_handle *trans,
    1053             :                                  struct btrfs_path *path,
    1054             :                                  struct btrfs_extent_inline_ref **ref_ret,
    1055             :                                  u64 bytenr, u64 num_bytes, u64 parent,
    1056             :                                  u64 root_objectid, u64 owner, u64 offset)
    1057             : {
    1058           0 :         int ret;
    1059             : 
    1060           0 :         ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
    1061             :                                            num_bytes, parent, root_objectid,
    1062             :                                            owner, offset, 0);
    1063           0 :         if (ret != -ENOENT)
    1064             :                 return ret;
    1065             : 
    1066           0 :         btrfs_release_path(path);
    1067           0 :         *ref_ret = NULL;
    1068             : 
    1069           0 :         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
    1070           0 :                 ret = lookup_tree_block_ref(trans, path, bytenr, parent,
    1071             :                                             root_objectid);
    1072             :         } else {
    1073           0 :                 ret = lookup_extent_data_ref(trans, path, bytenr, parent,
    1074             :                                              root_objectid, owner, offset);
    1075             :         }
    1076             :         return ret;
    1077             : }
    1078             : 
    1079             : /*
    1080             :  * helper to update/remove inline back ref
    1081             :  */
    1082             : static noinline_for_stack
    1083           0 : void update_inline_extent_backref(struct btrfs_path *path,
    1084             :                                   struct btrfs_extent_inline_ref *iref,
    1085             :                                   int refs_to_mod,
    1086             :                                   struct btrfs_delayed_extent_op *extent_op)
    1087             : {
    1088           0 :         struct extent_buffer *leaf = path->nodes[0];
    1089           0 :         struct btrfs_extent_item *ei;
    1090           0 :         struct btrfs_extent_data_ref *dref = NULL;
    1091           0 :         struct btrfs_shared_data_ref *sref = NULL;
    1092           0 :         unsigned long ptr;
    1093           0 :         unsigned long end;
    1094           0 :         u32 item_size;
    1095           0 :         int size;
    1096           0 :         int type;
    1097           0 :         u64 refs;
    1098             : 
    1099           0 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    1100           0 :         refs = btrfs_extent_refs(leaf, ei);
    1101           0 :         WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
    1102           0 :         refs += refs_to_mod;
    1103           0 :         btrfs_set_extent_refs(leaf, ei, refs);
    1104           0 :         if (extent_op)
    1105           0 :                 __run_delayed_extent_op(extent_op, leaf, ei);
    1106             : 
    1107             :         /*
    1108             :          * If type is invalid, we should have bailed out after
    1109             :          * lookup_inline_extent_backref().
    1110             :          */
    1111           0 :         type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
    1112           0 :         ASSERT(type != BTRFS_REF_TYPE_INVALID);
    1113             : 
    1114           0 :         if (type == BTRFS_EXTENT_DATA_REF_KEY) {
    1115           0 :                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
    1116           0 :                 refs = btrfs_extent_data_ref_count(leaf, dref);
    1117           0 :         } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
    1118           0 :                 sref = (struct btrfs_shared_data_ref *)(iref + 1);
    1119           0 :                 refs = btrfs_shared_data_ref_count(leaf, sref);
    1120             :         } else {
    1121           0 :                 refs = 1;
    1122           0 :                 BUG_ON(refs_to_mod != -1);
    1123             :         }
    1124             : 
    1125           0 :         BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
    1126           0 :         refs += refs_to_mod;
    1127             : 
    1128           0 :         if (refs > 0) {
    1129           0 :                 if (type == BTRFS_EXTENT_DATA_REF_KEY)
    1130           0 :                         btrfs_set_extent_data_ref_count(leaf, dref, refs);
    1131             :                 else
    1132           0 :                         btrfs_set_shared_data_ref_count(leaf, sref, refs);
    1133             :         } else {
    1134           0 :                 size =  btrfs_extent_inline_ref_size(type);
    1135           0 :                 item_size = btrfs_item_size(leaf, path->slots[0]);
    1136           0 :                 ptr = (unsigned long)iref;
    1137           0 :                 end = (unsigned long)ei + item_size;
    1138           0 :                 if (ptr + size < end)
    1139           0 :                         memmove_extent_buffer(leaf, ptr, ptr + size,
    1140           0 :                                               end - ptr - size);
    1141           0 :                 item_size -= size;
    1142           0 :                 btrfs_truncate_item(path, item_size, 1);
    1143             :         }
    1144           0 :         btrfs_mark_buffer_dirty(leaf);
    1145           0 : }
    1146             : 
    1147             : static noinline_for_stack
    1148           0 : int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
    1149             :                                  struct btrfs_path *path,
    1150             :                                  u64 bytenr, u64 num_bytes, u64 parent,
    1151             :                                  u64 root_objectid, u64 owner,
    1152             :                                  u64 offset, int refs_to_add,
    1153             :                                  struct btrfs_delayed_extent_op *extent_op)
    1154             : {
    1155           0 :         struct btrfs_extent_inline_ref *iref;
    1156           0 :         int ret;
    1157             : 
    1158           0 :         ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
    1159             :                                            num_bytes, parent, root_objectid,
    1160             :                                            owner, offset, 1);
    1161           0 :         if (ret == 0) {
    1162             :                 /*
    1163             :                  * We're adding refs to a tree block we already own, this
    1164             :                  * should not happen at all.
    1165             :                  */
    1166           0 :                 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
    1167           0 :                         btrfs_print_leaf(path->nodes[0]);
    1168           0 :                         btrfs_crit(trans->fs_info,
    1169             : "adding refs to an existing tree ref, bytenr %llu num_bytes %llu root_objectid %llu slot %u",
    1170             :                                    bytenr, num_bytes, root_objectid, path->slots[0]);
    1171           0 :                         return -EUCLEAN;
    1172             :                 }
    1173           0 :                 update_inline_extent_backref(path, iref, refs_to_add, extent_op);
    1174           0 :         } else if (ret == -ENOENT) {
    1175           0 :                 setup_inline_extent_backref(trans->fs_info, path, iref, parent,
    1176             :                                             root_objectid, owner, offset,
    1177             :                                             refs_to_add, extent_op);
    1178           0 :                 ret = 0;
    1179             :         }
    1180             :         return ret;
    1181             : }
    1182             : 
    1183           0 : static int remove_extent_backref(struct btrfs_trans_handle *trans,
    1184             :                                  struct btrfs_root *root,
    1185             :                                  struct btrfs_path *path,
    1186             :                                  struct btrfs_extent_inline_ref *iref,
    1187             :                                  int refs_to_drop, int is_data)
    1188             : {
    1189           0 :         int ret = 0;
    1190             : 
    1191           0 :         BUG_ON(!is_data && refs_to_drop != 1);
    1192           0 :         if (iref)
    1193           0 :                 update_inline_extent_backref(path, iref, -refs_to_drop, NULL);
    1194           0 :         else if (is_data)
    1195           0 :                 ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
    1196             :         else
    1197           0 :                 ret = btrfs_del_item(trans, root, path);
    1198           0 :         return ret;
    1199             : }
    1200             : 
    1201           0 : static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
    1202             :                                u64 *discarded_bytes)
    1203             : {
    1204           0 :         int j, ret = 0;
    1205           0 :         u64 bytes_left, end;
    1206           0 :         u64 aligned_start = ALIGN(start, 1 << SECTOR_SHIFT);
    1207             : 
    1208           0 :         if (WARN_ON(start != aligned_start)) {
    1209           0 :                 len -= aligned_start - start;
    1210           0 :                 len = round_down(len, 1 << SECTOR_SHIFT);
    1211           0 :                 start = aligned_start;
    1212             :         }
    1213             : 
    1214           0 :         *discarded_bytes = 0;
    1215             : 
    1216           0 :         if (!len)
    1217             :                 return 0;
    1218             : 
    1219           0 :         end = start + len;
    1220           0 :         bytes_left = len;
    1221             : 
    1222             :         /* Skip any superblocks on this device. */
    1223           0 :         for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
    1224           0 :                 u64 sb_start = btrfs_sb_offset(j);
    1225           0 :                 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
    1226           0 :                 u64 size = sb_start - start;
    1227             : 
    1228           0 :                 if (!in_range(sb_start, start, bytes_left) &&
    1229           0 :                     !in_range(sb_end, start, bytes_left) &&
    1230           0 :                     !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
    1231           0 :                         continue;
    1232             : 
    1233             :                 /*
    1234             :                  * Superblock spans beginning of range.  Adjust start and
    1235             :                  * try again.
    1236             :                  */
    1237           0 :                 if (sb_start <= start) {
    1238           0 :                         start += sb_end - start;
    1239           0 :                         if (start > end) {
    1240             :                                 bytes_left = 0;
    1241             :                                 break;
    1242             :                         }
    1243           0 :                         bytes_left = end - start;
    1244           0 :                         continue;
    1245             :                 }
    1246             : 
    1247           0 :                 if (size) {
    1248           0 :                         ret = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
    1249             :                                                    size >> SECTOR_SHIFT,
    1250             :                                                    GFP_NOFS);
    1251           0 :                         if (!ret)
    1252           0 :                                 *discarded_bytes += size;
    1253           0 :                         else if (ret != -EOPNOTSUPP)
    1254           0 :                                 return ret;
    1255             :                 }
    1256             : 
    1257           0 :                 start = sb_end;
    1258           0 :                 if (start > end) {
    1259             :                         bytes_left = 0;
    1260             :                         break;
    1261             :                 }
    1262           0 :                 bytes_left = end - start;
    1263             :         }
    1264             : 
    1265           0 :         if (bytes_left) {
    1266           0 :                 ret = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
    1267             :                                            bytes_left >> SECTOR_SHIFT,
    1268             :                                            GFP_NOFS);
    1269           0 :                 if (!ret)
    1270           0 :                         *discarded_bytes += bytes_left;
    1271             :         }
    1272             :         return ret;
    1273             : }
    1274             : 
    1275           0 : static int do_discard_extent(struct btrfs_discard_stripe *stripe, u64 *bytes)
    1276             : {
    1277           0 :         struct btrfs_device *dev = stripe->dev;
    1278           0 :         struct btrfs_fs_info *fs_info = dev->fs_info;
    1279           0 :         struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
    1280           0 :         u64 phys = stripe->physical;
    1281           0 :         u64 len = stripe->length;
    1282           0 :         u64 discarded = 0;
    1283           0 :         int ret = 0;
    1284             : 
    1285             :         /* Zone reset on a zoned filesystem */
    1286           0 :         if (btrfs_can_zone_reset(dev, phys, len)) {
    1287           0 :                 u64 src_disc;
    1288             : 
    1289           0 :                 ret = btrfs_reset_device_zone(dev, phys, len, &discarded);
    1290           0 :                 if (ret)
    1291           0 :                         goto out;
    1292             : 
    1293           0 :                 if (!btrfs_dev_replace_is_ongoing(dev_replace) ||
    1294           0 :                     dev != dev_replace->srcdev)
    1295           0 :                         goto out;
    1296             : 
    1297           0 :                 src_disc = discarded;
    1298             : 
    1299             :                 /* Send to replace target as well */
    1300           0 :                 ret = btrfs_reset_device_zone(dev_replace->tgtdev, phys, len,
    1301             :                                               &discarded);
    1302           0 :                 discarded += src_disc;
    1303           0 :         } else if (bdev_max_discard_sectors(stripe->dev->bdev)) {
    1304           0 :                 ret = btrfs_issue_discard(dev->bdev, phys, len, &discarded);
    1305             :         } else {
    1306             :                 ret = 0;
    1307             :                 *bytes = 0;
    1308             :         }
    1309             : 
    1310           0 : out:
    1311           0 :         *bytes = discarded;
    1312           0 :         return ret;
    1313             : }
    1314             : 
    1315           0 : int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
    1316             :                          u64 num_bytes, u64 *actual_bytes)
    1317             : {
    1318           0 :         int ret = 0;
    1319           0 :         u64 discarded_bytes = 0;
    1320           0 :         u64 end = bytenr + num_bytes;
    1321           0 :         u64 cur = bytenr;
    1322             : 
    1323             :         /*
    1324             :          * Avoid races with device replace and make sure the devices in the
    1325             :          * stripes don't go away while we are discarding.
    1326             :          */
    1327           0 :         btrfs_bio_counter_inc_blocked(fs_info);
    1328           0 :         while (cur < end) {
    1329           0 :                 struct btrfs_discard_stripe *stripes;
    1330           0 :                 unsigned int num_stripes;
    1331           0 :                 int i;
    1332             : 
    1333           0 :                 num_bytes = end - cur;
    1334           0 :                 stripes = btrfs_map_discard(fs_info, cur, &num_bytes, &num_stripes);
    1335           0 :                 if (IS_ERR(stripes)) {
    1336           0 :                         ret = PTR_ERR(stripes);
    1337           0 :                         if (ret == -EOPNOTSUPP)
    1338           0 :                                 ret = 0;
    1339           0 :                         break;
    1340             :                 }
    1341             : 
    1342           0 :                 for (i = 0; i < num_stripes; i++) {
    1343           0 :                         struct btrfs_discard_stripe *stripe = stripes + i;
    1344           0 :                         u64 bytes;
    1345             : 
    1346           0 :                         if (!stripe->dev->bdev) {
    1347           0 :                                 ASSERT(btrfs_test_opt(fs_info, DEGRADED));
    1348           0 :                                 continue;
    1349             :                         }
    1350             : 
    1351           0 :                         if (!test_bit(BTRFS_DEV_STATE_WRITEABLE,
    1352             :                                         &stripe->dev->dev_state))
    1353           0 :                                 continue;
    1354             : 
    1355           0 :                         ret = do_discard_extent(stripe, &bytes);
    1356           0 :                         if (ret) {
    1357             :                                 /*
    1358             :                                  * Keep going if discard is not supported by the
    1359             :                                  * device.
    1360             :                                  */
    1361           0 :                                 if (ret != -EOPNOTSUPP)
    1362             :                                         break;
    1363             :                                 ret = 0;
    1364             :                         } else {
    1365           0 :                                 discarded_bytes += bytes;
    1366             :                         }
    1367             :                 }
    1368           0 :                 kfree(stripes);
    1369           0 :                 if (ret)
    1370             :                         break;
    1371           0 :                 cur += num_bytes;
    1372             :         }
    1373           0 :         btrfs_bio_counter_dec(fs_info);
    1374           0 :         if (actual_bytes)
    1375           0 :                 *actual_bytes = discarded_bytes;
    1376           0 :         return ret;
    1377             : }
    1378             : 
    1379             : /* Can return -ENOMEM */
    1380           0 : int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
    1381             :                          struct btrfs_ref *generic_ref)
    1382             : {
    1383           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1384           0 :         int ret;
    1385             : 
    1386           0 :         ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
    1387             :                generic_ref->action);
    1388           0 :         BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
    1389             :                generic_ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID);
    1390             : 
    1391           0 :         if (generic_ref->type == BTRFS_REF_METADATA)
    1392           0 :                 ret = btrfs_add_delayed_tree_ref(trans, generic_ref, NULL);
    1393             :         else
    1394           0 :                 ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0);
    1395             : 
    1396           0 :         btrfs_ref_tree_mod(fs_info, generic_ref);
    1397             : 
    1398           0 :         return ret;
    1399             : }
    1400             : 
    1401             : /*
    1402             :  * __btrfs_inc_extent_ref - insert backreference for a given extent
    1403             :  *
    1404             :  * The counterpart is in __btrfs_free_extent(), with examples and more details
    1405             :  * how it works.
    1406             :  *
    1407             :  * @trans:          Handle of transaction
    1408             :  *
    1409             :  * @node:           The delayed ref node used to get the bytenr/length for
    1410             :  *                  extent whose references are incremented.
    1411             :  *
    1412             :  * @parent:         If this is a shared extent (BTRFS_SHARED_DATA_REF_KEY/
    1413             :  *                  BTRFS_SHARED_BLOCK_REF_KEY) then it holds the logical
    1414             :  *                  bytenr of the parent block. Since new extents are always
    1415             :  *                  created with indirect references, this will only be the case
    1416             :  *                  when relocating a shared extent. In that case, root_objectid
    1417             :  *                  will be BTRFS_TREE_RELOC_OBJECTID. Otherwise, parent must
    1418             :  *                  be 0
    1419             :  *
    1420             :  * @root_objectid:  The id of the root where this modification has originated,
    1421             :  *                  this can be either one of the well-known metadata trees or
    1422             :  *                  the subvolume id which references this extent.
    1423             :  *
    1424             :  * @owner:          For data extents it is the inode number of the owning file.
    1425             :  *                  For metadata extents this parameter holds the level in the
    1426             :  *                  tree of the extent.
    1427             :  *
    1428             :  * @offset:         For metadata extents the offset is ignored and is currently
    1429             :  *                  always passed as 0. For data extents it is the fileoffset
    1430             :  *                  this extent belongs to.
    1431             :  *
    1432             :  * @refs_to_add     Number of references to add
    1433             :  *
    1434             :  * @extent_op       Pointer to a structure, holding information necessary when
    1435             :  *                  updating a tree block's flags
    1436             :  *
    1437             :  */
    1438           0 : static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
    1439             :                                   struct btrfs_delayed_ref_node *node,
    1440             :                                   u64 parent, u64 root_objectid,
    1441             :                                   u64 owner, u64 offset, int refs_to_add,
    1442             :                                   struct btrfs_delayed_extent_op *extent_op)
    1443             : {
    1444           0 :         struct btrfs_path *path;
    1445           0 :         struct extent_buffer *leaf;
    1446           0 :         struct btrfs_extent_item *item;
    1447           0 :         struct btrfs_key key;
    1448           0 :         u64 bytenr = node->bytenr;
    1449           0 :         u64 num_bytes = node->num_bytes;
    1450           0 :         u64 refs;
    1451           0 :         int ret;
    1452             : 
    1453           0 :         path = btrfs_alloc_path();
    1454           0 :         if (!path)
    1455             :                 return -ENOMEM;
    1456             : 
    1457             :         /* this will setup the path even if it fails to insert the back ref */
    1458           0 :         ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
    1459             :                                            parent, root_objectid, owner,
    1460             :                                            offset, refs_to_add, extent_op);
    1461           0 :         if ((ret < 0 && ret != -EAGAIN) || !ret)
    1462           0 :                 goto out;
    1463             : 
    1464             :         /*
    1465             :          * Ok we had -EAGAIN which means we didn't have space to insert and
    1466             :          * inline extent ref, so just update the reference count and add a
    1467             :          * normal backref.
    1468             :          */
    1469           0 :         leaf = path->nodes[0];
    1470           0 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    1471           0 :         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    1472           0 :         refs = btrfs_extent_refs(leaf, item);
    1473           0 :         btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
    1474           0 :         if (extent_op)
    1475           0 :                 __run_delayed_extent_op(extent_op, leaf, item);
    1476             : 
    1477           0 :         btrfs_mark_buffer_dirty(leaf);
    1478           0 :         btrfs_release_path(path);
    1479             : 
    1480             :         /* now insert the actual backref */
    1481           0 :         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
    1482           0 :                 BUG_ON(refs_to_add != 1);
    1483           0 :                 ret = insert_tree_block_ref(trans, path, bytenr, parent,
    1484             :                                             root_objectid);
    1485             :         } else {
    1486           0 :                 ret = insert_extent_data_ref(trans, path, bytenr, parent,
    1487             :                                              root_objectid, owner, offset,
    1488             :                                              refs_to_add);
    1489             :         }
    1490           0 :         if (ret)
    1491           0 :                 btrfs_abort_transaction(trans, ret);
    1492           0 : out:
    1493           0 :         btrfs_free_path(path);
    1494           0 :         return ret;
    1495             : }
    1496             : 
    1497           0 : static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
    1498             :                                 struct btrfs_delayed_ref_node *node,
    1499             :                                 struct btrfs_delayed_extent_op *extent_op,
    1500             :                                 bool insert_reserved)
    1501             : {
    1502           0 :         int ret = 0;
    1503           0 :         struct btrfs_delayed_data_ref *ref;
    1504           0 :         struct btrfs_key ins;
    1505           0 :         u64 parent = 0;
    1506           0 :         u64 ref_root = 0;
    1507           0 :         u64 flags = 0;
    1508             : 
    1509           0 :         ins.objectid = node->bytenr;
    1510           0 :         ins.offset = node->num_bytes;
    1511           0 :         ins.type = BTRFS_EXTENT_ITEM_KEY;
    1512             : 
    1513           0 :         ref = btrfs_delayed_node_to_data_ref(node);
    1514           0 :         trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
    1515             : 
    1516           0 :         if (node->type == BTRFS_SHARED_DATA_REF_KEY)
    1517           0 :                 parent = ref->parent;
    1518           0 :         ref_root = ref->root;
    1519             : 
    1520           0 :         if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
    1521           0 :                 if (extent_op)
    1522           0 :                         flags |= extent_op->flags_to_set;
    1523           0 :                 ret = alloc_reserved_file_extent(trans, parent, ref_root,
    1524             :                                                  flags, ref->objectid,
    1525             :                                                  ref->offset, &ins,
    1526             :                                                  node->ref_mod);
    1527           0 :         } else if (node->action == BTRFS_ADD_DELAYED_REF) {
    1528           0 :                 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
    1529             :                                              ref->objectid, ref->offset,
    1530             :                                              node->ref_mod, extent_op);
    1531           0 :         } else if (node->action == BTRFS_DROP_DELAYED_REF) {
    1532           0 :                 ret = __btrfs_free_extent(trans, node, parent,
    1533             :                                           ref_root, ref->objectid,
    1534             :                                           ref->offset, node->ref_mod,
    1535             :                                           extent_op);
    1536             :         } else {
    1537           0 :                 BUG();
    1538             :         }
    1539           0 :         return ret;
    1540             : }
    1541             : 
    1542           0 : static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
    1543             :                                     struct extent_buffer *leaf,
    1544             :                                     struct btrfs_extent_item *ei)
    1545             : {
    1546           0 :         u64 flags = btrfs_extent_flags(leaf, ei);
    1547           0 :         if (extent_op->update_flags) {
    1548           0 :                 flags |= extent_op->flags_to_set;
    1549           0 :                 btrfs_set_extent_flags(leaf, ei, flags);
    1550             :         }
    1551             : 
    1552           0 :         if (extent_op->update_key) {
    1553           0 :                 struct btrfs_tree_block_info *bi;
    1554           0 :                 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
    1555           0 :                 bi = (struct btrfs_tree_block_info *)(ei + 1);
    1556           0 :                 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
    1557             :         }
    1558           0 : }
    1559             : 
    1560           0 : static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
    1561             :                                  struct btrfs_delayed_ref_head *head,
    1562             :                                  struct btrfs_delayed_extent_op *extent_op)
    1563             : {
    1564           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1565           0 :         struct btrfs_root *root;
    1566           0 :         struct btrfs_key key;
    1567           0 :         struct btrfs_path *path;
    1568           0 :         struct btrfs_extent_item *ei;
    1569           0 :         struct extent_buffer *leaf;
    1570           0 :         u32 item_size;
    1571           0 :         int ret;
    1572           0 :         int err = 0;
    1573           0 :         int metadata = 1;
    1574             : 
    1575           0 :         if (TRANS_ABORTED(trans))
    1576             :                 return 0;
    1577             : 
    1578           0 :         if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
    1579           0 :                 metadata = 0;
    1580             : 
    1581           0 :         path = btrfs_alloc_path();
    1582           0 :         if (!path)
    1583             :                 return -ENOMEM;
    1584             : 
    1585           0 :         key.objectid = head->bytenr;
    1586             : 
    1587           0 :         if (metadata) {
    1588           0 :                 key.type = BTRFS_METADATA_ITEM_KEY;
    1589           0 :                 key.offset = extent_op->level;
    1590             :         } else {
    1591           0 :                 key.type = BTRFS_EXTENT_ITEM_KEY;
    1592           0 :                 key.offset = head->num_bytes;
    1593             :         }
    1594             : 
    1595           0 :         root = btrfs_extent_root(fs_info, key.objectid);
    1596           0 : again:
    1597           0 :         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
    1598           0 :         if (ret < 0) {
    1599           0 :                 err = ret;
    1600           0 :                 goto out;
    1601             :         }
    1602           0 :         if (ret > 0) {
    1603           0 :                 if (metadata) {
    1604           0 :                         if (path->slots[0] > 0) {
    1605           0 :                                 path->slots[0]--;
    1606           0 :                                 btrfs_item_key_to_cpu(path->nodes[0], &key,
    1607             :                                                       path->slots[0]);
    1608           0 :                                 if (key.objectid == head->bytenr &&
    1609           0 :                                     key.type == BTRFS_EXTENT_ITEM_KEY &&
    1610           0 :                                     key.offset == head->num_bytes)
    1611             :                                         ret = 0;
    1612             :                         }
    1613           0 :                         if (ret > 0) {
    1614           0 :                                 btrfs_release_path(path);
    1615           0 :                                 metadata = 0;
    1616             : 
    1617           0 :                                 key.objectid = head->bytenr;
    1618           0 :                                 key.offset = head->num_bytes;
    1619           0 :                                 key.type = BTRFS_EXTENT_ITEM_KEY;
    1620           0 :                                 goto again;
    1621             :                         }
    1622             :                 } else {
    1623           0 :                         err = -EIO;
    1624           0 :                         goto out;
    1625             :                 }
    1626             :         }
    1627             : 
    1628           0 :         leaf = path->nodes[0];
    1629           0 :         item_size = btrfs_item_size(leaf, path->slots[0]);
    1630             : 
    1631           0 :         if (unlikely(item_size < sizeof(*ei))) {
    1632           0 :                 err = -EINVAL;
    1633           0 :                 btrfs_print_v0_err(fs_info);
    1634           0 :                 btrfs_abort_transaction(trans, err);
    1635           0 :                 goto out;
    1636             :         }
    1637             : 
    1638           0 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    1639           0 :         __run_delayed_extent_op(extent_op, leaf, ei);
    1640             : 
    1641           0 :         btrfs_mark_buffer_dirty(leaf);
    1642           0 : out:
    1643           0 :         btrfs_free_path(path);
    1644           0 :         return err;
    1645             : }
    1646             : 
    1647           0 : static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
    1648             :                                 struct btrfs_delayed_ref_node *node,
    1649             :                                 struct btrfs_delayed_extent_op *extent_op,
    1650             :                                 bool insert_reserved)
    1651             : {
    1652           0 :         int ret = 0;
    1653           0 :         struct btrfs_delayed_tree_ref *ref;
    1654           0 :         u64 parent = 0;
    1655           0 :         u64 ref_root = 0;
    1656             : 
    1657           0 :         ref = btrfs_delayed_node_to_tree_ref(node);
    1658           0 :         trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
    1659             : 
    1660           0 :         if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
    1661           0 :                 parent = ref->parent;
    1662           0 :         ref_root = ref->root;
    1663             : 
    1664           0 :         if (node->ref_mod != 1) {
    1665           0 :                 btrfs_err(trans->fs_info,
    1666             :         "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
    1667             :                           node->bytenr, node->ref_mod, node->action, ref_root,
    1668             :                           parent);
    1669           0 :                 return -EIO;
    1670             :         }
    1671           0 :         if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
    1672           0 :                 BUG_ON(!extent_op || !extent_op->update_flags);
    1673           0 :                 ret = alloc_reserved_tree_block(trans, node, extent_op);
    1674           0 :         } else if (node->action == BTRFS_ADD_DELAYED_REF) {
    1675           0 :                 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
    1676           0 :                                              ref->level, 0, 1, extent_op);
    1677           0 :         } else if (node->action == BTRFS_DROP_DELAYED_REF) {
    1678           0 :                 ret = __btrfs_free_extent(trans, node, parent, ref_root,
    1679           0 :                                           ref->level, 0, 1, extent_op);
    1680             :         } else {
    1681           0 :                 BUG();
    1682             :         }
    1683             :         return ret;
    1684             : }
    1685             : 
    1686             : /* helper function to actually process a single delayed ref entry */
    1687           0 : static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
    1688             :                                struct btrfs_delayed_ref_node *node,
    1689             :                                struct btrfs_delayed_extent_op *extent_op,
    1690             :                                bool insert_reserved)
    1691             : {
    1692           0 :         int ret = 0;
    1693             : 
    1694           0 :         if (TRANS_ABORTED(trans)) {
    1695           0 :                 if (insert_reserved)
    1696           0 :                         btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
    1697           0 :                 return 0;
    1698             :         }
    1699             : 
    1700           0 :         if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
    1701             :             node->type == BTRFS_SHARED_BLOCK_REF_KEY)
    1702           0 :                 ret = run_delayed_tree_ref(trans, node, extent_op,
    1703             :                                            insert_reserved);
    1704           0 :         else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
    1705             :                  node->type == BTRFS_SHARED_DATA_REF_KEY)
    1706           0 :                 ret = run_delayed_data_ref(trans, node, extent_op,
    1707             :                                            insert_reserved);
    1708             :         else
    1709           0 :                 BUG();
    1710           0 :         if (ret && insert_reserved)
    1711           0 :                 btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
    1712           0 :         if (ret < 0)
    1713           0 :                 btrfs_err(trans->fs_info,
    1714             : "failed to run delayed ref for logical %llu num_bytes %llu type %u action %u ref_mod %d: %d",
    1715             :                           node->bytenr, node->num_bytes, node->type,
    1716             :                           node->action, node->ref_mod, ret);
    1717             :         return ret;
    1718             : }
    1719             : 
    1720             : static inline struct btrfs_delayed_ref_node *
    1721             : select_delayed_ref(struct btrfs_delayed_ref_head *head)
    1722             : {
    1723           0 :         struct btrfs_delayed_ref_node *ref;
    1724             : 
    1725           0 :         if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
    1726             :                 return NULL;
    1727             : 
    1728             :         /*
    1729             :          * Select a delayed ref of type BTRFS_ADD_DELAYED_REF first.
    1730             :          * This is to prevent a ref count from going down to zero, which deletes
    1731             :          * the extent item from the extent tree, when there still are references
    1732             :          * to add, which would fail because they would not find the extent item.
    1733             :          */
    1734           0 :         if (!list_empty(&head->ref_add_list))
    1735           0 :                 return list_first_entry(&head->ref_add_list,
    1736             :                                 struct btrfs_delayed_ref_node, add_list);
    1737             : 
    1738           0 :         ref = rb_entry(rb_first_cached(&head->ref_tree),
    1739             :                        struct btrfs_delayed_ref_node, ref_node);
    1740           0 :         ASSERT(list_empty(&ref->add_list));
    1741           0 :         return ref;
    1742             : }
    1743             : 
    1744           0 : static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
    1745             :                                       struct btrfs_delayed_ref_head *head)
    1746             : {
    1747           0 :         spin_lock(&delayed_refs->lock);
    1748           0 :         head->processing = false;
    1749           0 :         delayed_refs->num_heads_ready++;
    1750           0 :         spin_unlock(&delayed_refs->lock);
    1751           0 :         btrfs_delayed_ref_unlock(head);
    1752           0 : }
    1753             : 
    1754           0 : static struct btrfs_delayed_extent_op *cleanup_extent_op(
    1755             :                                 struct btrfs_delayed_ref_head *head)
    1756             : {
    1757           0 :         struct btrfs_delayed_extent_op *extent_op = head->extent_op;
    1758             : 
    1759           0 :         if (!extent_op)
    1760             :                 return NULL;
    1761             : 
    1762           0 :         if (head->must_insert_reserved) {
    1763           0 :                 head->extent_op = NULL;
    1764           0 :                 btrfs_free_delayed_extent_op(extent_op);
    1765           0 :                 return NULL;
    1766             :         }
    1767             :         return extent_op;
    1768             : }
    1769             : 
    1770           0 : static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
    1771             :                                      struct btrfs_delayed_ref_head *head)
    1772             : {
    1773           0 :         struct btrfs_delayed_extent_op *extent_op;
    1774           0 :         int ret;
    1775             : 
    1776           0 :         extent_op = cleanup_extent_op(head);
    1777           0 :         if (!extent_op)
    1778             :                 return 0;
    1779           0 :         head->extent_op = NULL;
    1780           0 :         spin_unlock(&head->lock);
    1781           0 :         ret = run_delayed_extent_op(trans, head, extent_op);
    1782           0 :         btrfs_free_delayed_extent_op(extent_op);
    1783           0 :         return ret ? ret : 1;
    1784             : }
    1785             : 
    1786           0 : void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
    1787             :                                   struct btrfs_delayed_ref_root *delayed_refs,
    1788             :                                   struct btrfs_delayed_ref_head *head)
    1789             : {
    1790           0 :         int nr_items = 1;       /* Dropping this ref head update. */
    1791             : 
    1792             :         /*
    1793             :          * We had csum deletions accounted for in our delayed refs rsv, we need
    1794             :          * to drop the csum leaves for this update from our delayed_refs_rsv.
    1795             :          */
    1796           0 :         if (head->total_ref_mod < 0 && head->is_data) {
    1797           0 :                 spin_lock(&delayed_refs->lock);
    1798           0 :                 delayed_refs->pending_csums -= head->num_bytes;
    1799           0 :                 spin_unlock(&delayed_refs->lock);
    1800           0 :                 nr_items += btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes);
    1801             :         }
    1802             : 
    1803           0 :         btrfs_delayed_refs_rsv_release(fs_info, nr_items);
    1804           0 : }
    1805             : 
    1806           0 : static int cleanup_ref_head(struct btrfs_trans_handle *trans,
    1807             :                             struct btrfs_delayed_ref_head *head)
    1808             : {
    1809             : 
    1810           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1811           0 :         struct btrfs_delayed_ref_root *delayed_refs;
    1812           0 :         int ret;
    1813             : 
    1814           0 :         delayed_refs = &trans->transaction->delayed_refs;
    1815             : 
    1816           0 :         ret = run_and_cleanup_extent_op(trans, head);
    1817           0 :         if (ret < 0) {
    1818           0 :                 unselect_delayed_ref_head(delayed_refs, head);
    1819           0 :                 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
    1820           0 :                 return ret;
    1821           0 :         } else if (ret) {
    1822             :                 return ret;
    1823             :         }
    1824             : 
    1825             :         /*
    1826             :          * Need to drop our head ref lock and re-acquire the delayed ref lock
    1827             :          * and then re-check to make sure nobody got added.
    1828             :          */
    1829           0 :         spin_unlock(&head->lock);
    1830           0 :         spin_lock(&delayed_refs->lock);
    1831           0 :         spin_lock(&head->lock);
    1832           0 :         if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) {
    1833           0 :                 spin_unlock(&head->lock);
    1834           0 :                 spin_unlock(&delayed_refs->lock);
    1835           0 :                 return 1;
    1836             :         }
    1837           0 :         btrfs_delete_ref_head(delayed_refs, head);
    1838           0 :         spin_unlock(&head->lock);
    1839           0 :         spin_unlock(&delayed_refs->lock);
    1840             : 
    1841           0 :         if (head->must_insert_reserved) {
    1842           0 :                 btrfs_pin_extent(trans, head->bytenr, head->num_bytes, 1);
    1843           0 :                 if (head->is_data) {
    1844           0 :                         struct btrfs_root *csum_root;
    1845             : 
    1846           0 :                         csum_root = btrfs_csum_root(fs_info, head->bytenr);
    1847           0 :                         ret = btrfs_del_csums(trans, csum_root, head->bytenr,
    1848             :                                               head->num_bytes);
    1849             :                 }
    1850             :         }
    1851             : 
    1852           0 :         btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
    1853             : 
    1854           0 :         trace_run_delayed_ref_head(fs_info, head, 0);
    1855           0 :         btrfs_delayed_ref_unlock(head);
    1856           0 :         btrfs_put_delayed_ref_head(head);
    1857           0 :         return ret;
    1858             : }
    1859             : 
    1860           0 : static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
    1861             :                                         struct btrfs_trans_handle *trans)
    1862             : {
    1863           0 :         struct btrfs_delayed_ref_root *delayed_refs =
    1864           0 :                 &trans->transaction->delayed_refs;
    1865           0 :         struct btrfs_delayed_ref_head *head = NULL;
    1866           0 :         int ret;
    1867             : 
    1868           0 :         spin_lock(&delayed_refs->lock);
    1869           0 :         head = btrfs_select_ref_head(delayed_refs);
    1870           0 :         if (!head) {
    1871           0 :                 spin_unlock(&delayed_refs->lock);
    1872           0 :                 return head;
    1873             :         }
    1874             : 
    1875             :         /*
    1876             :          * Grab the lock that says we are going to process all the refs for
    1877             :          * this head
    1878             :          */
    1879           0 :         ret = btrfs_delayed_ref_lock(delayed_refs, head);
    1880           0 :         spin_unlock(&delayed_refs->lock);
    1881             : 
    1882             :         /*
    1883             :          * We may have dropped the spin lock to get the head mutex lock, and
    1884             :          * that might have given someone else time to free the head.  If that's
    1885             :          * true, it has been removed from our list and we can move on.
    1886             :          */
    1887           0 :         if (ret == -EAGAIN)
    1888           0 :                 head = ERR_PTR(-EAGAIN);
    1889             : 
    1890             :         return head;
    1891             : }
    1892             : 
    1893           0 : static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
    1894             :                                            struct btrfs_delayed_ref_head *locked_ref)
    1895             : {
    1896           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1897           0 :         struct btrfs_delayed_ref_root *delayed_refs;
    1898           0 :         struct btrfs_delayed_extent_op *extent_op;
    1899           0 :         struct btrfs_delayed_ref_node *ref;
    1900           0 :         bool must_insert_reserved;
    1901           0 :         int ret;
    1902             : 
    1903           0 :         delayed_refs = &trans->transaction->delayed_refs;
    1904             : 
    1905           0 :         lockdep_assert_held(&locked_ref->mutex);
    1906           0 :         lockdep_assert_held(&locked_ref->lock);
    1907             : 
    1908           0 :         while ((ref = select_delayed_ref(locked_ref))) {
    1909           0 :                 if (ref->seq &&
    1910           0 :                     btrfs_check_delayed_seq(fs_info, ref->seq)) {
    1911           0 :                         spin_unlock(&locked_ref->lock);
    1912           0 :                         unselect_delayed_ref_head(delayed_refs, locked_ref);
    1913           0 :                         return -EAGAIN;
    1914             :                 }
    1915             : 
    1916           0 :                 rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
    1917           0 :                 RB_CLEAR_NODE(&ref->ref_node);
    1918           0 :                 if (!list_empty(&ref->add_list))
    1919           0 :                         list_del(&ref->add_list);
    1920             :                 /*
    1921             :                  * When we play the delayed ref, also correct the ref_mod on
    1922             :                  * head
    1923             :                  */
    1924           0 :                 switch (ref->action) {
    1925           0 :                 case BTRFS_ADD_DELAYED_REF:
    1926             :                 case BTRFS_ADD_DELAYED_EXTENT:
    1927           0 :                         locked_ref->ref_mod -= ref->ref_mod;
    1928           0 :                         break;
    1929           0 :                 case BTRFS_DROP_DELAYED_REF:
    1930           0 :                         locked_ref->ref_mod += ref->ref_mod;
    1931           0 :                         break;
    1932             :                 default:
    1933           0 :                         WARN_ON(1);
    1934             :                 }
    1935           0 :                 atomic_dec(&delayed_refs->num_entries);
    1936             : 
    1937             :                 /*
    1938             :                  * Record the must_insert_reserved flag before we drop the
    1939             :                  * spin lock.
    1940             :                  */
    1941           0 :                 must_insert_reserved = locked_ref->must_insert_reserved;
    1942           0 :                 locked_ref->must_insert_reserved = false;
    1943             : 
    1944           0 :                 extent_op = locked_ref->extent_op;
    1945           0 :                 locked_ref->extent_op = NULL;
    1946           0 :                 spin_unlock(&locked_ref->lock);
    1947             : 
    1948           0 :                 ret = run_one_delayed_ref(trans, ref, extent_op,
    1949             :                                           must_insert_reserved);
    1950             : 
    1951           0 :                 btrfs_free_delayed_extent_op(extent_op);
    1952           0 :                 if (ret) {
    1953           0 :                         unselect_delayed_ref_head(delayed_refs, locked_ref);
    1954           0 :                         btrfs_put_delayed_ref(ref);
    1955           0 :                         return ret;
    1956             :                 }
    1957             : 
    1958           0 :                 btrfs_put_delayed_ref(ref);
    1959           0 :                 cond_resched();
    1960             : 
    1961           0 :                 spin_lock(&locked_ref->lock);
    1962           0 :                 btrfs_merge_delayed_refs(fs_info, delayed_refs, locked_ref);
    1963             :         }
    1964             : 
    1965             :         return 0;
    1966             : }
    1967             : 
    1968             : /*
    1969             :  * Returns 0 on success or if called with an already aborted transaction.
    1970             :  * Returns -ENOMEM or -EIO on failure and will abort the transaction.
    1971             :  */
    1972           0 : static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
    1973             :                                              unsigned long nr)
    1974             : {
    1975           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1976           0 :         struct btrfs_delayed_ref_root *delayed_refs;
    1977           0 :         struct btrfs_delayed_ref_head *locked_ref = NULL;
    1978           0 :         int ret;
    1979           0 :         unsigned long count = 0;
    1980             : 
    1981           0 :         delayed_refs = &trans->transaction->delayed_refs;
    1982           0 :         do {
    1983           0 :                 if (!locked_ref) {
    1984           0 :                         locked_ref = btrfs_obtain_ref_head(trans);
    1985           0 :                         if (IS_ERR_OR_NULL(locked_ref)) {
    1986           0 :                                 if (PTR_ERR(locked_ref) == -EAGAIN) {
    1987           0 :                                         continue;
    1988             :                                 } else {
    1989             :                                         break;
    1990             :                                 }
    1991             :                         }
    1992           0 :                         count++;
    1993             :                 }
    1994             :                 /*
    1995             :                  * We need to try and merge add/drops of the same ref since we
    1996             :                  * can run into issues with relocate dropping the implicit ref
    1997             :                  * and then it being added back again before the drop can
    1998             :                  * finish.  If we merged anything we need to re-loop so we can
    1999             :                  * get a good ref.
    2000             :                  * Or we can get node references of the same type that weren't
    2001             :                  * merged when created due to bumps in the tree mod seq, and
    2002             :                  * we need to merge them to prevent adding an inline extent
    2003             :                  * backref before dropping it (triggering a BUG_ON at
    2004             :                  * insert_inline_extent_backref()).
    2005             :                  */
    2006           0 :                 spin_lock(&locked_ref->lock);
    2007           0 :                 btrfs_merge_delayed_refs(fs_info, delayed_refs, locked_ref);
    2008             : 
    2009           0 :                 ret = btrfs_run_delayed_refs_for_head(trans, locked_ref);
    2010           0 :                 if (ret < 0 && ret != -EAGAIN) {
    2011             :                         /*
    2012             :                          * Error, btrfs_run_delayed_refs_for_head already
    2013             :                          * unlocked everything so just bail out
    2014             :                          */
    2015           0 :                         return ret;
    2016           0 :                 } else if (!ret) {
    2017             :                         /*
    2018             :                          * Success, perform the usual cleanup of a processed
    2019             :                          * head
    2020             :                          */
    2021           0 :                         ret = cleanup_ref_head(trans, locked_ref);
    2022           0 :                         if (ret > 0 ) {
    2023             :                                 /* We dropped our lock, we need to loop. */
    2024           0 :                                 ret = 0;
    2025           0 :                                 continue;
    2026           0 :                         } else if (ret) {
    2027           0 :                                 return ret;
    2028             :                         }
    2029             :                 }
    2030             : 
    2031             :                 /*
    2032             :                  * Either success case or btrfs_run_delayed_refs_for_head
    2033             :                  * returned -EAGAIN, meaning we need to select another head
    2034             :                  */
    2035             : 
    2036           0 :                 locked_ref = NULL;
    2037           0 :                 cond_resched();
    2038           0 :         } while ((nr != -1 && count < nr) || locked_ref);
    2039             : 
    2040             :         return 0;
    2041             : }
    2042             : 
    2043             : #ifdef SCRAMBLE_DELAYED_REFS
    2044             : /*
    2045             :  * Normally delayed refs get processed in ascending bytenr order. This
    2046             :  * correlates in most cases to the order added. To expose dependencies on this
    2047             :  * order, we start to process the tree in the middle instead of the beginning
    2048             :  */
    2049             : static u64 find_middle(struct rb_root *root)
    2050             : {
    2051             :         struct rb_node *n = root->rb_node;
    2052             :         struct btrfs_delayed_ref_node *entry;
    2053             :         int alt = 1;
    2054             :         u64 middle;
    2055             :         u64 first = 0, last = 0;
    2056             : 
    2057             :         n = rb_first(root);
    2058             :         if (n) {
    2059             :                 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
    2060             :                 first = entry->bytenr;
    2061             :         }
    2062             :         n = rb_last(root);
    2063             :         if (n) {
    2064             :                 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
    2065             :                 last = entry->bytenr;
    2066             :         }
    2067             :         n = root->rb_node;
    2068             : 
    2069             :         while (n) {
    2070             :                 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
    2071             :                 WARN_ON(!entry->in_tree);
    2072             : 
    2073             :                 middle = entry->bytenr;
    2074             : 
    2075             :                 if (alt)
    2076             :                         n = n->rb_left;
    2077             :                 else
    2078             :                         n = n->rb_right;
    2079             : 
    2080             :                 alt = 1 - alt;
    2081             :         }
    2082             :         return middle;
    2083             : }
    2084             : #endif
    2085             : 
    2086             : /*
    2087             :  * this starts processing the delayed reference count updates and
    2088             :  * extent insertions we have queued up so far.  count can be
    2089             :  * 0, which means to process everything in the tree at the start
    2090             :  * of the run (but not newly added entries), or it can be some target
    2091             :  * number you'd like to process.
    2092             :  *
    2093             :  * Returns 0 on success or if called with an aborted transaction
    2094             :  * Returns <0 on error and aborts the transaction
    2095             :  */
    2096           0 : int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
    2097             :                            unsigned long count)
    2098             : {
    2099           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    2100           0 :         struct rb_node *node;
    2101           0 :         struct btrfs_delayed_ref_root *delayed_refs;
    2102           0 :         struct btrfs_delayed_ref_head *head;
    2103           0 :         int ret;
    2104           0 :         int run_all = count == (unsigned long)-1;
    2105             : 
    2106             :         /* We'll clean this up in btrfs_cleanup_transaction */
    2107           0 :         if (TRANS_ABORTED(trans))
    2108             :                 return 0;
    2109             : 
    2110           0 :         if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
    2111             :                 return 0;
    2112             : 
    2113           0 :         delayed_refs = &trans->transaction->delayed_refs;
    2114           0 :         if (count == 0)
    2115           0 :                 count = delayed_refs->num_heads_ready;
    2116             : 
    2117           0 : again:
    2118             : #ifdef SCRAMBLE_DELAYED_REFS
    2119             :         delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
    2120             : #endif
    2121           0 :         ret = __btrfs_run_delayed_refs(trans, count);
    2122           0 :         if (ret < 0) {
    2123           0 :                 btrfs_abort_transaction(trans, ret);
    2124           0 :                 return ret;
    2125             :         }
    2126             : 
    2127           0 :         if (run_all) {
    2128           0 :                 btrfs_create_pending_block_groups(trans);
    2129             : 
    2130           0 :                 spin_lock(&delayed_refs->lock);
    2131           0 :                 node = rb_first_cached(&delayed_refs->href_root);
    2132           0 :                 if (!node) {
    2133           0 :                         spin_unlock(&delayed_refs->lock);
    2134           0 :                         goto out;
    2135             :                 }
    2136           0 :                 head = rb_entry(node, struct btrfs_delayed_ref_head,
    2137             :                                 href_node);
    2138           0 :                 refcount_inc(&head->refs);
    2139           0 :                 spin_unlock(&delayed_refs->lock);
    2140             : 
    2141             :                 /* Mutex was contended, block until it's released and retry. */
    2142           0 :                 mutex_lock(&head->mutex);
    2143           0 :                 mutex_unlock(&head->mutex);
    2144             : 
    2145           0 :                 btrfs_put_delayed_ref_head(head);
    2146           0 :                 cond_resched();
    2147           0 :                 goto again;
    2148             :         }
    2149           0 : out:
    2150             :         return 0;
    2151             : }
    2152             : 
    2153           0 : int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
    2154             :                                 struct extent_buffer *eb, u64 flags)
    2155             : {
    2156           0 :         struct btrfs_delayed_extent_op *extent_op;
    2157           0 :         int level = btrfs_header_level(eb);
    2158           0 :         int ret;
    2159             : 
    2160           0 :         extent_op = btrfs_alloc_delayed_extent_op();
    2161           0 :         if (!extent_op)
    2162             :                 return -ENOMEM;
    2163             : 
    2164           0 :         extent_op->flags_to_set = flags;
    2165           0 :         extent_op->update_flags = true;
    2166           0 :         extent_op->update_key = false;
    2167           0 :         extent_op->level = level;
    2168             : 
    2169           0 :         ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len, extent_op);
    2170           0 :         if (ret)
    2171           0 :                 btrfs_free_delayed_extent_op(extent_op);
    2172             :         return ret;
    2173             : }
    2174             : 
    2175           0 : static noinline int check_delayed_ref(struct btrfs_root *root,
    2176             :                                       struct btrfs_path *path,
    2177             :                                       u64 objectid, u64 offset, u64 bytenr)
    2178             : {
    2179           0 :         struct btrfs_delayed_ref_head *head;
    2180           0 :         struct btrfs_delayed_ref_node *ref;
    2181           0 :         struct btrfs_delayed_data_ref *data_ref;
    2182           0 :         struct btrfs_delayed_ref_root *delayed_refs;
    2183           0 :         struct btrfs_transaction *cur_trans;
    2184           0 :         struct rb_node *node;
    2185           0 :         int ret = 0;
    2186             : 
    2187           0 :         spin_lock(&root->fs_info->trans_lock);
    2188           0 :         cur_trans = root->fs_info->running_transaction;
    2189           0 :         if (cur_trans)
    2190           0 :                 refcount_inc(&cur_trans->use_count);
    2191           0 :         spin_unlock(&root->fs_info->trans_lock);
    2192           0 :         if (!cur_trans)
    2193             :                 return 0;
    2194             : 
    2195           0 :         delayed_refs = &cur_trans->delayed_refs;
    2196           0 :         spin_lock(&delayed_refs->lock);
    2197           0 :         head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
    2198           0 :         if (!head) {
    2199           0 :                 spin_unlock(&delayed_refs->lock);
    2200           0 :                 btrfs_put_transaction(cur_trans);
    2201           0 :                 return 0;
    2202             :         }
    2203             : 
    2204           0 :         if (!mutex_trylock(&head->mutex)) {
    2205           0 :                 if (path->nowait) {
    2206           0 :                         spin_unlock(&delayed_refs->lock);
    2207           0 :                         btrfs_put_transaction(cur_trans);
    2208           0 :                         return -EAGAIN;
    2209             :                 }
    2210             : 
    2211           0 :                 refcount_inc(&head->refs);
    2212           0 :                 spin_unlock(&delayed_refs->lock);
    2213             : 
    2214           0 :                 btrfs_release_path(path);
    2215             : 
    2216             :                 /*
    2217             :                  * Mutex was contended, block until it's released and let
    2218             :                  * caller try again
    2219             :                  */
    2220           0 :                 mutex_lock(&head->mutex);
    2221           0 :                 mutex_unlock(&head->mutex);
    2222           0 :                 btrfs_put_delayed_ref_head(head);
    2223           0 :                 btrfs_put_transaction(cur_trans);
    2224           0 :                 return -EAGAIN;
    2225             :         }
    2226           0 :         spin_unlock(&delayed_refs->lock);
    2227             : 
    2228           0 :         spin_lock(&head->lock);
    2229             :         /*
    2230             :          * XXX: We should replace this with a proper search function in the
    2231             :          * future.
    2232             :          */
    2233           0 :         for (node = rb_first_cached(&head->ref_tree); node;
    2234           0 :              node = rb_next(node)) {
    2235           0 :                 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
    2236             :                 /* If it's a shared ref we know a cross reference exists */
    2237           0 :                 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
    2238             :                         ret = 1;
    2239             :                         break;
    2240             :                 }
    2241             : 
    2242           0 :                 data_ref = btrfs_delayed_node_to_data_ref(ref);
    2243             : 
    2244             :                 /*
    2245             :                  * If our ref doesn't match the one we're currently looking at
    2246             :                  * then we have a cross reference.
    2247             :                  */
    2248           0 :                 if (data_ref->root != root->root_key.objectid ||
    2249           0 :                     data_ref->objectid != objectid ||
    2250           0 :                     data_ref->offset != offset) {
    2251             :                         ret = 1;
    2252             :                         break;
    2253             :                 }
    2254             :         }
    2255           0 :         spin_unlock(&head->lock);
    2256           0 :         mutex_unlock(&head->mutex);
    2257           0 :         btrfs_put_transaction(cur_trans);
    2258           0 :         return ret;
    2259             : }
    2260             : 
    2261           0 : static noinline int check_committed_ref(struct btrfs_root *root,
    2262             :                                         struct btrfs_path *path,
    2263             :                                         u64 objectid, u64 offset, u64 bytenr,
    2264             :                                         bool strict)
    2265             : {
    2266           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    2267           0 :         struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bytenr);
    2268           0 :         struct extent_buffer *leaf;
    2269           0 :         struct btrfs_extent_data_ref *ref;
    2270           0 :         struct btrfs_extent_inline_ref *iref;
    2271           0 :         struct btrfs_extent_item *ei;
    2272           0 :         struct btrfs_key key;
    2273           0 :         u32 item_size;
    2274           0 :         int type;
    2275           0 :         int ret;
    2276             : 
    2277           0 :         key.objectid = bytenr;
    2278           0 :         key.offset = (u64)-1;
    2279           0 :         key.type = BTRFS_EXTENT_ITEM_KEY;
    2280             : 
    2281           0 :         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
    2282           0 :         if (ret < 0)
    2283           0 :                 goto out;
    2284           0 :         BUG_ON(ret == 0); /* Corruption */
    2285             : 
    2286           0 :         ret = -ENOENT;
    2287           0 :         if (path->slots[0] == 0)
    2288           0 :                 goto out;
    2289             : 
    2290           0 :         path->slots[0]--;
    2291           0 :         leaf = path->nodes[0];
    2292           0 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    2293             : 
    2294           0 :         if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
    2295           0 :                 goto out;
    2296             : 
    2297           0 :         ret = 1;
    2298           0 :         item_size = btrfs_item_size(leaf, path->slots[0]);
    2299           0 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
    2300             : 
    2301             :         /* If extent item has more than 1 inline ref then it's shared */
    2302           0 :         if (item_size != sizeof(*ei) +
    2303             :             btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
    2304           0 :                 goto out;
    2305             : 
    2306             :         /*
    2307             :          * If extent created before last snapshot => it's shared unless the
    2308             :          * snapshot has been deleted. Use the heuristic if strict is false.
    2309             :          */
    2310           0 :         if (!strict &&
    2311             :             (btrfs_extent_generation(leaf, ei) <=
    2312             :              btrfs_root_last_snapshot(&root->root_item)))
    2313           0 :                 goto out;
    2314             : 
    2315           0 :         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
    2316             : 
    2317             :         /* If this extent has SHARED_DATA_REF then it's shared */
    2318           0 :         type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
    2319           0 :         if (type != BTRFS_EXTENT_DATA_REF_KEY)
    2320           0 :                 goto out;
    2321             : 
    2322           0 :         ref = (struct btrfs_extent_data_ref *)(&iref->offset);
    2323           0 :         if (btrfs_extent_refs(leaf, ei) !=
    2324           0 :             btrfs_extent_data_ref_count(leaf, ref) ||
    2325             :             btrfs_extent_data_ref_root(leaf, ref) !=
    2326           0 :             root->root_key.objectid ||
    2327           0 :             btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
    2328             :             btrfs_extent_data_ref_offset(leaf, ref) != offset)
    2329           0 :                 goto out;
    2330             : 
    2331             :         ret = 0;
    2332           0 : out:
    2333           0 :         return ret;
    2334             : }
    2335             : 
    2336           0 : int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
    2337             :                           u64 bytenr, bool strict, struct btrfs_path *path)
    2338             : {
    2339           0 :         int ret;
    2340             : 
    2341           0 :         do {
    2342           0 :                 ret = check_committed_ref(root, path, objectid,
    2343             :                                           offset, bytenr, strict);
    2344           0 :                 if (ret && ret != -ENOENT)
    2345           0 :                         goto out;
    2346             : 
    2347           0 :                 ret = check_delayed_ref(root, path, objectid, offset, bytenr);
    2348           0 :         } while (ret == -EAGAIN);
    2349             : 
    2350           0 : out:
    2351           0 :         btrfs_release_path(path);
    2352           0 :         if (btrfs_is_data_reloc_root(root))
    2353           0 :                 WARN_ON(ret > 0);
    2354           0 :         return ret;
    2355             : }
    2356             : 
    2357           0 : static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
    2358             :                            struct btrfs_root *root,
    2359             :                            struct extent_buffer *buf,
    2360             :                            int full_backref, int inc)
    2361             : {
    2362           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    2363           0 :         u64 bytenr;
    2364           0 :         u64 num_bytes;
    2365           0 :         u64 parent;
    2366           0 :         u64 ref_root;
    2367           0 :         u32 nritems;
    2368           0 :         struct btrfs_key key;
    2369           0 :         struct btrfs_file_extent_item *fi;
    2370           0 :         struct btrfs_ref generic_ref = { 0 };
    2371           0 :         bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC);
    2372           0 :         int i;
    2373           0 :         int action;
    2374           0 :         int level;
    2375           0 :         int ret = 0;
    2376             : 
    2377           0 :         if (btrfs_is_testing(fs_info))
    2378             :                 return 0;
    2379             : 
    2380           0 :         ref_root = btrfs_header_owner(buf);
    2381           0 :         nritems = btrfs_header_nritems(buf);
    2382           0 :         level = btrfs_header_level(buf);
    2383             : 
    2384           0 :         if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && level == 0)
    2385             :                 return 0;
    2386             : 
    2387           0 :         if (full_backref)
    2388           0 :                 parent = buf->start;
    2389             :         else
    2390             :                 parent = 0;
    2391           0 :         if (inc)
    2392             :                 action = BTRFS_ADD_DELAYED_REF;
    2393             :         else
    2394           0 :                 action = BTRFS_DROP_DELAYED_REF;
    2395             : 
    2396           0 :         for (i = 0; i < nritems; i++) {
    2397           0 :                 if (level == 0) {
    2398           0 :                         btrfs_item_key_to_cpu(buf, &key, i);
    2399           0 :                         if (key.type != BTRFS_EXTENT_DATA_KEY)
    2400           0 :                                 continue;
    2401           0 :                         fi = btrfs_item_ptr(buf, i,
    2402             :                                             struct btrfs_file_extent_item);
    2403           0 :                         if (btrfs_file_extent_type(buf, fi) ==
    2404             :                             BTRFS_FILE_EXTENT_INLINE)
    2405           0 :                                 continue;
    2406           0 :                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
    2407           0 :                         if (bytenr == 0)
    2408           0 :                                 continue;
    2409             : 
    2410           0 :                         num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
    2411           0 :                         key.offset -= btrfs_file_extent_offset(buf, fi);
    2412           0 :                         btrfs_init_generic_ref(&generic_ref, action, bytenr,
    2413             :                                                num_bytes, parent);
    2414           0 :                         btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
    2415             :                                             key.offset, root->root_key.objectid,
    2416             :                                             for_reloc);
    2417           0 :                         if (inc)
    2418           0 :                                 ret = btrfs_inc_extent_ref(trans, &generic_ref);
    2419             :                         else
    2420           0 :                                 ret = btrfs_free_extent(trans, &generic_ref);
    2421           0 :                         if (ret)
    2422           0 :                                 goto fail;
    2423             :                 } else {
    2424           0 :                         bytenr = btrfs_node_blockptr(buf, i);
    2425           0 :                         num_bytes = fs_info->nodesize;
    2426           0 :                         btrfs_init_generic_ref(&generic_ref, action, bytenr,
    2427             :                                                num_bytes, parent);
    2428           0 :                         btrfs_init_tree_ref(&generic_ref, level - 1, ref_root,
    2429             :                                             root->root_key.objectid, for_reloc);
    2430           0 :                         if (inc)
    2431           0 :                                 ret = btrfs_inc_extent_ref(trans, &generic_ref);
    2432             :                         else
    2433           0 :                                 ret = btrfs_free_extent(trans, &generic_ref);
    2434           0 :                         if (ret)
    2435           0 :                                 goto fail;
    2436             :                 }
    2437             :         }
    2438             :         return 0;
    2439             : fail:
    2440             :         return ret;
    2441             : }
    2442             : 
    2443           0 : int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
    2444             :                   struct extent_buffer *buf, int full_backref)
    2445             : {
    2446           0 :         return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
    2447             : }
    2448             : 
    2449           0 : int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
    2450             :                   struct extent_buffer *buf, int full_backref)
    2451             : {
    2452           0 :         return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
    2453             : }
    2454             : 
    2455           0 : static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
    2456             : {
    2457           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    2458           0 :         u64 flags;
    2459           0 :         u64 ret;
    2460             : 
    2461           0 :         if (data)
    2462             :                 flags = BTRFS_BLOCK_GROUP_DATA;
    2463           0 :         else if (root == fs_info->chunk_root)
    2464             :                 flags = BTRFS_BLOCK_GROUP_SYSTEM;
    2465             :         else
    2466           0 :                 flags = BTRFS_BLOCK_GROUP_METADATA;
    2467             : 
    2468           0 :         ret = btrfs_get_alloc_profile(fs_info, flags);
    2469           0 :         return ret;
    2470             : }
    2471             : 
    2472           0 : static u64 first_logical_byte(struct btrfs_fs_info *fs_info)
    2473             : {
    2474           0 :         struct rb_node *leftmost;
    2475           0 :         u64 bytenr = 0;
    2476             : 
    2477           0 :         read_lock(&fs_info->block_group_cache_lock);
    2478             :         /* Get the block group with the lowest logical start address. */
    2479           0 :         leftmost = rb_first_cached(&fs_info->block_group_cache_tree);
    2480           0 :         if (leftmost) {
    2481           0 :                 struct btrfs_block_group *bg;
    2482             : 
    2483           0 :                 bg = rb_entry(leftmost, struct btrfs_block_group, cache_node);
    2484           0 :                 bytenr = bg->start;
    2485             :         }
    2486           0 :         read_unlock(&fs_info->block_group_cache_lock);
    2487             : 
    2488           0 :         return bytenr;
    2489             : }
    2490             : 
    2491           0 : static int pin_down_extent(struct btrfs_trans_handle *trans,
    2492             :                            struct btrfs_block_group *cache,
    2493             :                            u64 bytenr, u64 num_bytes, int reserved)
    2494             : {
    2495           0 :         struct btrfs_fs_info *fs_info = cache->fs_info;
    2496             : 
    2497           0 :         spin_lock(&cache->space_info->lock);
    2498           0 :         spin_lock(&cache->lock);
    2499           0 :         cache->pinned += num_bytes;
    2500           0 :         btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info,
    2501             :                                              num_bytes);
    2502           0 :         if (reserved) {
    2503           0 :                 cache->reserved -= num_bytes;
    2504           0 :                 cache->space_info->bytes_reserved -= num_bytes;
    2505             :         }
    2506           0 :         spin_unlock(&cache->lock);
    2507           0 :         spin_unlock(&cache->space_info->lock);
    2508             : 
    2509           0 :         set_extent_bit(&trans->transaction->pinned_extents, bytenr,
    2510           0 :                        bytenr + num_bytes - 1, EXTENT_DIRTY, NULL);
    2511           0 :         return 0;
    2512             : }
    2513             : 
    2514           0 : int btrfs_pin_extent(struct btrfs_trans_handle *trans,
    2515             :                      u64 bytenr, u64 num_bytes, int reserved)
    2516             : {
    2517           0 :         struct btrfs_block_group *cache;
    2518             : 
    2519           0 :         cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
    2520           0 :         BUG_ON(!cache); /* Logic error */
    2521             : 
    2522           0 :         pin_down_extent(trans, cache, bytenr, num_bytes, reserved);
    2523             : 
    2524           0 :         btrfs_put_block_group(cache);
    2525           0 :         return 0;
    2526             : }
    2527             : 
    2528             : /*
    2529             :  * this function must be called within transaction
    2530             :  */
    2531           0 : int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
    2532             :                                     u64 bytenr, u64 num_bytes)
    2533             : {
    2534           0 :         struct btrfs_block_group *cache;
    2535           0 :         int ret;
    2536             : 
    2537           0 :         cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
    2538           0 :         if (!cache)
    2539             :                 return -EINVAL;
    2540             : 
    2541             :         /*
    2542             :          * Fully cache the free space first so that our pin removes the free space
    2543             :          * from the cache.
    2544             :          */
    2545           0 :         ret = btrfs_cache_block_group(cache, true);
    2546           0 :         if (ret)
    2547           0 :                 goto out;
    2548             : 
    2549           0 :         pin_down_extent(trans, cache, bytenr, num_bytes, 0);
    2550             : 
    2551             :         /* remove us from the free space cache (if we're there at all) */
    2552           0 :         ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
    2553           0 : out:
    2554           0 :         btrfs_put_block_group(cache);
    2555           0 :         return ret;
    2556             : }
    2557             : 
    2558           0 : static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
    2559             :                                    u64 start, u64 num_bytes)
    2560             : {
    2561           0 :         int ret;
    2562           0 :         struct btrfs_block_group *block_group;
    2563             : 
    2564           0 :         block_group = btrfs_lookup_block_group(fs_info, start);
    2565           0 :         if (!block_group)
    2566             :                 return -EINVAL;
    2567             : 
    2568           0 :         ret = btrfs_cache_block_group(block_group, true);
    2569           0 :         if (ret)
    2570           0 :                 goto out;
    2571             : 
    2572           0 :         ret = btrfs_remove_free_space(block_group, start, num_bytes);
    2573           0 : out:
    2574           0 :         btrfs_put_block_group(block_group);
    2575           0 :         return ret;
    2576             : }
    2577             : 
    2578           0 : int btrfs_exclude_logged_extents(struct extent_buffer *eb)
    2579             : {
    2580           0 :         struct btrfs_fs_info *fs_info = eb->fs_info;
    2581           0 :         struct btrfs_file_extent_item *item;
    2582           0 :         struct btrfs_key key;
    2583           0 :         int found_type;
    2584           0 :         int i;
    2585           0 :         int ret = 0;
    2586             : 
    2587           0 :         if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
    2588             :                 return 0;
    2589             : 
    2590           0 :         for (i = 0; i < btrfs_header_nritems(eb); i++) {
    2591           0 :                 btrfs_item_key_to_cpu(eb, &key, i);
    2592           0 :                 if (key.type != BTRFS_EXTENT_DATA_KEY)
    2593           0 :                         continue;
    2594           0 :                 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
    2595           0 :                 found_type = btrfs_file_extent_type(eb, item);
    2596           0 :                 if (found_type == BTRFS_FILE_EXTENT_INLINE)
    2597           0 :                         continue;
    2598           0 :                 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
    2599           0 :                         continue;
    2600           0 :                 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
    2601           0 :                 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
    2602           0 :                 ret = __exclude_logged_extent(fs_info, key.objectid, key.offset);
    2603           0 :                 if (ret)
    2604             :                         break;
    2605             :         }
    2606             : 
    2607             :         return ret;
    2608             : }
    2609             : 
    2610             : static void
    2611           0 : btrfs_inc_block_group_reservations(struct btrfs_block_group *bg)
    2612             : {
    2613           0 :         atomic_inc(&bg->reservations);
    2614           0 : }
    2615             : 
    2616             : /*
    2617             :  * Returns the free cluster for the given space info and sets empty_cluster to
    2618             :  * what it should be based on the mount options.
    2619             :  */
    2620             : static struct btrfs_free_cluster *
    2621           0 : fetch_cluster_info(struct btrfs_fs_info *fs_info,
    2622             :                    struct btrfs_space_info *space_info, u64 *empty_cluster)
    2623             : {
    2624           0 :         struct btrfs_free_cluster *ret = NULL;
    2625             : 
    2626           0 :         *empty_cluster = 0;
    2627           0 :         if (btrfs_mixed_space_info(space_info))
    2628             :                 return ret;
    2629             : 
    2630           0 :         if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
    2631           0 :                 ret = &fs_info->meta_alloc_cluster;
    2632           0 :                 if (btrfs_test_opt(fs_info, SSD))
    2633           0 :                         *empty_cluster = SZ_2M;
    2634             :                 else
    2635           0 :                         *empty_cluster = SZ_64K;
    2636           0 :         } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
    2637           0 :                    btrfs_test_opt(fs_info, SSD_SPREAD)) {
    2638           0 :                 *empty_cluster = SZ_2M;
    2639           0 :                 ret = &fs_info->data_alloc_cluster;
    2640             :         }
    2641             : 
    2642             :         return ret;
    2643             : }
    2644             : 
    2645           0 : static int unpin_extent_range(struct btrfs_fs_info *fs_info,
    2646             :                               u64 start, u64 end,
    2647             :                               const bool return_free_space)
    2648             : {
    2649           0 :         struct btrfs_block_group *cache = NULL;
    2650           0 :         struct btrfs_space_info *space_info;
    2651           0 :         struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
    2652           0 :         struct btrfs_free_cluster *cluster = NULL;
    2653           0 :         u64 len;
    2654           0 :         u64 total_unpinned = 0;
    2655           0 :         u64 empty_cluster = 0;
    2656           0 :         bool readonly;
    2657             : 
    2658           0 :         while (start <= end) {
    2659           0 :                 readonly = false;
    2660           0 :                 if (!cache ||
    2661           0 :                     start >= cache->start + cache->length) {
    2662           0 :                         if (cache)
    2663           0 :                                 btrfs_put_block_group(cache);
    2664           0 :                         total_unpinned = 0;
    2665           0 :                         cache = btrfs_lookup_block_group(fs_info, start);
    2666           0 :                         BUG_ON(!cache); /* Logic error */
    2667             : 
    2668           0 :                         cluster = fetch_cluster_info(fs_info,
    2669             :                                                      cache->space_info,
    2670             :                                                      &empty_cluster);
    2671           0 :                         empty_cluster <<= 1;
    2672             :                 }
    2673             : 
    2674           0 :                 len = cache->start + cache->length - start;
    2675           0 :                 len = min(len, end + 1 - start);
    2676             : 
    2677           0 :                 if (return_free_space)
    2678           0 :                         btrfs_add_free_space(cache, start, len);
    2679             : 
    2680           0 :                 start += len;
    2681           0 :                 total_unpinned += len;
    2682           0 :                 space_info = cache->space_info;
    2683             : 
    2684             :                 /*
    2685             :                  * If this space cluster has been marked as fragmented and we've
    2686             :                  * unpinned enough in this block group to potentially allow a
    2687             :                  * cluster to be created inside of it go ahead and clear the
    2688             :                  * fragmented check.
    2689             :                  */
    2690           0 :                 if (cluster && cluster->fragmented &&
    2691           0 :                     total_unpinned > empty_cluster) {
    2692           0 :                         spin_lock(&cluster->lock);
    2693           0 :                         cluster->fragmented = 0;
    2694           0 :                         spin_unlock(&cluster->lock);
    2695             :                 }
    2696             : 
    2697           0 :                 spin_lock(&space_info->lock);
    2698           0 :                 spin_lock(&cache->lock);
    2699           0 :                 cache->pinned -= len;
    2700           0 :                 btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
    2701           0 :                 space_info->max_extent_size = 0;
    2702           0 :                 if (cache->ro) {
    2703           0 :                         space_info->bytes_readonly += len;
    2704           0 :                         readonly = true;
    2705           0 :                 } else if (btrfs_is_zoned(fs_info)) {
    2706             :                         /* Need reset before reusing in a zoned block group */
    2707           0 :                         space_info->bytes_zone_unusable += len;
    2708           0 :                         readonly = true;
    2709             :                 }
    2710           0 :                 spin_unlock(&cache->lock);
    2711           0 :                 if (!readonly && return_free_space &&
    2712           0 :                     global_rsv->space_info == space_info) {
    2713           0 :                         spin_lock(&global_rsv->lock);
    2714           0 :                         if (!global_rsv->full) {
    2715           0 :                                 u64 to_add = min(len, global_rsv->size -
    2716             :                                                       global_rsv->reserved);
    2717             : 
    2718           0 :                                 global_rsv->reserved += to_add;
    2719           0 :                                 btrfs_space_info_update_bytes_may_use(fs_info,
    2720             :                                                 space_info, to_add);
    2721           0 :                                 if (global_rsv->reserved >= global_rsv->size)
    2722           0 :                                         global_rsv->full = 1;
    2723           0 :                                 len -= to_add;
    2724             :                         }
    2725           0 :                         spin_unlock(&global_rsv->lock);
    2726             :                 }
    2727             :                 /* Add to any tickets we may have */
    2728           0 :                 if (!readonly && return_free_space && len)
    2729           0 :                         btrfs_try_granting_tickets(fs_info, space_info);
    2730           0 :                 spin_unlock(&space_info->lock);
    2731             :         }
    2732             : 
    2733           0 :         if (cache)
    2734           0 :                 btrfs_put_block_group(cache);
    2735           0 :         return 0;
    2736             : }
    2737             : 
    2738           0 : int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
    2739             : {
    2740           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    2741           0 :         struct btrfs_block_group *block_group, *tmp;
    2742           0 :         struct list_head *deleted_bgs;
    2743           0 :         struct extent_io_tree *unpin;
    2744           0 :         u64 start;
    2745           0 :         u64 end;
    2746           0 :         int ret;
    2747             : 
    2748           0 :         unpin = &trans->transaction->pinned_extents;
    2749             : 
    2750           0 :         while (!TRANS_ABORTED(trans)) {
    2751           0 :                 struct extent_state *cached_state = NULL;
    2752             : 
    2753           0 :                 mutex_lock(&fs_info->unused_bg_unpin_mutex);
    2754           0 :                 ret = find_first_extent_bit(unpin, 0, &start, &end,
    2755             :                                             EXTENT_DIRTY, &cached_state);
    2756           0 :                 if (ret) {
    2757           0 :                         mutex_unlock(&fs_info->unused_bg_unpin_mutex);
    2758           0 :                         break;
    2759             :                 }
    2760             : 
    2761           0 :                 if (btrfs_test_opt(fs_info, DISCARD_SYNC))
    2762           0 :                         ret = btrfs_discard_extent(fs_info, start,
    2763           0 :                                                    end + 1 - start, NULL);
    2764             : 
    2765           0 :                 clear_extent_dirty(unpin, start, end, &cached_state);
    2766           0 :                 unpin_extent_range(fs_info, start, end, true);
    2767           0 :                 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
    2768           0 :                 free_extent_state(cached_state);
    2769           0 :                 cond_resched();
    2770             :         }
    2771             : 
    2772           0 :         if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
    2773           0 :                 btrfs_discard_calc_delay(&fs_info->discard_ctl);
    2774           0 :                 btrfs_discard_schedule_work(&fs_info->discard_ctl, true);
    2775             :         }
    2776             : 
    2777             :         /*
    2778             :          * Transaction is finished.  We don't need the lock anymore.  We
    2779             :          * do need to clean up the block groups in case of a transaction
    2780             :          * abort.
    2781             :          */
    2782           0 :         deleted_bgs = &trans->transaction->deleted_bgs;
    2783           0 :         list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
    2784           0 :                 u64 trimmed = 0;
    2785             : 
    2786           0 :                 ret = -EROFS;
    2787           0 :                 if (!TRANS_ABORTED(trans))
    2788           0 :                         ret = btrfs_discard_extent(fs_info,
    2789             :                                                    block_group->start,
    2790             :                                                    block_group->length,
    2791             :                                                    &trimmed);
    2792             : 
    2793           0 :                 list_del_init(&block_group->bg_list);
    2794           0 :                 btrfs_unfreeze_block_group(block_group);
    2795           0 :                 btrfs_put_block_group(block_group);
    2796             : 
    2797           0 :                 if (ret) {
    2798           0 :                         const char *errstr = btrfs_decode_error(ret);
    2799           0 :                         btrfs_warn(fs_info,
    2800             :                            "discard failed while removing blockgroup: errno=%d %s",
    2801             :                                    ret, errstr);
    2802             :                 }
    2803             :         }
    2804             : 
    2805           0 :         return 0;
    2806             : }
    2807             : 
    2808           0 : static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
    2809             :                                      u64 bytenr, u64 num_bytes, bool is_data)
    2810             : {
    2811           0 :         int ret;
    2812             : 
    2813           0 :         if (is_data) {
    2814           0 :                 struct btrfs_root *csum_root;
    2815             : 
    2816           0 :                 csum_root = btrfs_csum_root(trans->fs_info, bytenr);
    2817           0 :                 ret = btrfs_del_csums(trans, csum_root, bytenr, num_bytes);
    2818           0 :                 if (ret) {
    2819           0 :                         btrfs_abort_transaction(trans, ret);
    2820           0 :                         return ret;
    2821             :                 }
    2822             :         }
    2823             : 
    2824           0 :         ret = add_to_free_space_tree(trans, bytenr, num_bytes);
    2825           0 :         if (ret) {
    2826           0 :                 btrfs_abort_transaction(trans, ret);
    2827           0 :                 return ret;
    2828             :         }
    2829             : 
    2830           0 :         ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
    2831           0 :         if (ret)
    2832           0 :                 btrfs_abort_transaction(trans, ret);
    2833             : 
    2834             :         return ret;
    2835             : }
    2836             : 
    2837             : #define abort_and_dump(trans, path, fmt, args...)       \
    2838             : ({                                                      \
    2839             :         btrfs_abort_transaction(trans, -EUCLEAN);       \
    2840             :         btrfs_print_leaf(path->nodes[0]);            \
    2841             :         btrfs_crit(trans->fs_info, fmt, ##args);     \
    2842             : })
    2843             : 
    2844             : /*
    2845             :  * Drop one or more refs of @node.
    2846             :  *
    2847             :  * 1. Locate the extent refs.
    2848             :  *    It's either inline in EXTENT/METADATA_ITEM or in keyed SHARED_* item.
    2849             :  *    Locate it, then reduce the refs number or remove the ref line completely.
    2850             :  *
    2851             :  * 2. Update the refs count in EXTENT/METADATA_ITEM
    2852             :  *
    2853             :  * Inline backref case:
    2854             :  *
    2855             :  * in extent tree we have:
    2856             :  *
    2857             :  *      item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 16201 itemsize 82
    2858             :  *              refs 2 gen 6 flags DATA
    2859             :  *              extent data backref root FS_TREE objectid 258 offset 0 count 1
    2860             :  *              extent data backref root FS_TREE objectid 257 offset 0 count 1
    2861             :  *
    2862             :  * This function gets called with:
    2863             :  *
    2864             :  *    node->bytenr = 13631488
    2865             :  *    node->num_bytes = 1048576
    2866             :  *    root_objectid = FS_TREE
    2867             :  *    owner_objectid = 257
    2868             :  *    owner_offset = 0
    2869             :  *    refs_to_drop = 1
    2870             :  *
    2871             :  * Then we should get some like:
    2872             :  *
    2873             :  *      item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 16201 itemsize 82
    2874             :  *              refs 1 gen 6 flags DATA
    2875             :  *              extent data backref root FS_TREE objectid 258 offset 0 count 1
    2876             :  *
    2877             :  * Keyed backref case:
    2878             :  *
    2879             :  * in extent tree we have:
    2880             :  *
    2881             :  *      item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 3971 itemsize 24
    2882             :  *              refs 754 gen 6 flags DATA
    2883             :  *      [...]
    2884             :  *      item 2 key (13631488 EXTENT_DATA_REF <HASH>) itemoff 3915 itemsize 28
    2885             :  *              extent data backref root FS_TREE objectid 866 offset 0 count 1
    2886             :  *
    2887             :  * This function get called with:
    2888             :  *
    2889             :  *    node->bytenr = 13631488
    2890             :  *    node->num_bytes = 1048576
    2891             :  *    root_objectid = FS_TREE
    2892             :  *    owner_objectid = 866
    2893             :  *    owner_offset = 0
    2894             :  *    refs_to_drop = 1
    2895             :  *
    2896             :  * Then we should get some like:
    2897             :  *
    2898             :  *      item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 3971 itemsize 24
    2899             :  *              refs 753 gen 6 flags DATA
    2900             :  *
    2901             :  * And that (13631488 EXTENT_DATA_REF <HASH>) gets removed.
    2902             :  */
    2903           0 : static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
    2904             :                                struct btrfs_delayed_ref_node *node, u64 parent,
    2905             :                                u64 root_objectid, u64 owner_objectid,
    2906             :                                u64 owner_offset, int refs_to_drop,
    2907             :                                struct btrfs_delayed_extent_op *extent_op)
    2908             : {
    2909           0 :         struct btrfs_fs_info *info = trans->fs_info;
    2910           0 :         struct btrfs_key key;
    2911           0 :         struct btrfs_path *path;
    2912           0 :         struct btrfs_root *extent_root;
    2913           0 :         struct extent_buffer *leaf;
    2914           0 :         struct btrfs_extent_item *ei;
    2915           0 :         struct btrfs_extent_inline_ref *iref;
    2916           0 :         int ret;
    2917           0 :         int is_data;
    2918           0 :         int extent_slot = 0;
    2919           0 :         int found_extent = 0;
    2920           0 :         int num_to_del = 1;
    2921           0 :         u32 item_size;
    2922           0 :         u64 refs;
    2923           0 :         u64 bytenr = node->bytenr;
    2924           0 :         u64 num_bytes = node->num_bytes;
    2925           0 :         bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
    2926             : 
    2927           0 :         extent_root = btrfs_extent_root(info, bytenr);
    2928           0 :         ASSERT(extent_root);
    2929             : 
    2930           0 :         path = btrfs_alloc_path();
    2931           0 :         if (!path)
    2932             :                 return -ENOMEM;
    2933             : 
    2934           0 :         is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
    2935             : 
    2936           0 :         if (!is_data && refs_to_drop != 1) {
    2937           0 :                 btrfs_crit(info,
    2938             : "invalid refs_to_drop, dropping more than 1 refs for tree block %llu refs_to_drop %u",
    2939             :                            node->bytenr, refs_to_drop);
    2940           0 :                 ret = -EINVAL;
    2941           0 :                 btrfs_abort_transaction(trans, ret);
    2942           0 :                 goto out;
    2943             :         }
    2944             : 
    2945           0 :         if (is_data)
    2946           0 :                 skinny_metadata = false;
    2947             : 
    2948           0 :         ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
    2949             :                                     parent, root_objectid, owner_objectid,
    2950             :                                     owner_offset);
    2951           0 :         if (ret == 0) {
    2952             :                 /*
    2953             :                  * Either the inline backref or the SHARED_DATA_REF/
    2954             :                  * SHARED_BLOCK_REF is found
    2955             :                  *
    2956             :                  * Here is a quick path to locate EXTENT/METADATA_ITEM.
    2957             :                  * It's possible the EXTENT/METADATA_ITEM is near current slot.
    2958             :                  */
    2959           0 :                 extent_slot = path->slots[0];
    2960           0 :                 while (extent_slot >= 0) {
    2961           0 :                         btrfs_item_key_to_cpu(path->nodes[0], &key,
    2962             :                                               extent_slot);
    2963           0 :                         if (key.objectid != bytenr)
    2964             :                                 break;
    2965           0 :                         if (key.type == BTRFS_EXTENT_ITEM_KEY &&
    2966           0 :                             key.offset == num_bytes) {
    2967             :                                 found_extent = 1;
    2968             :                                 break;
    2969             :                         }
    2970           0 :                         if (key.type == BTRFS_METADATA_ITEM_KEY &&
    2971           0 :                             key.offset == owner_objectid) {
    2972             :                                 found_extent = 1;
    2973             :                                 break;
    2974             :                         }
    2975             : 
    2976             :                         /* Quick path didn't find the EXTEMT/METADATA_ITEM */
    2977           0 :                         if (path->slots[0] - extent_slot > 5)
    2978             :                                 break;
    2979           0 :                         extent_slot--;
    2980             :                 }
    2981             : 
    2982           0 :                 if (!found_extent) {
    2983           0 :                         if (iref) {
    2984           0 :                                 abort_and_dump(trans, path,
    2985             : "invalid iref slot %u, no EXTENT/METADATA_ITEM found but has inline extent ref",
    2986             :                                            path->slots[0]);
    2987           0 :                                 ret = -EUCLEAN;
    2988           0 :                                 goto out;
    2989             :                         }
    2990             :                         /* Must be SHARED_* item, remove the backref first */
    2991           0 :                         ret = remove_extent_backref(trans, extent_root, path,
    2992             :                                                     NULL, refs_to_drop, is_data);
    2993           0 :                         if (ret) {
    2994           0 :                                 btrfs_abort_transaction(trans, ret);
    2995           0 :                                 goto out;
    2996             :                         }
    2997           0 :                         btrfs_release_path(path);
    2998             : 
    2999             :                         /* Slow path to locate EXTENT/METADATA_ITEM */
    3000           0 :                         key.objectid = bytenr;
    3001           0 :                         key.type = BTRFS_EXTENT_ITEM_KEY;
    3002           0 :                         key.offset = num_bytes;
    3003             : 
    3004           0 :                         if (!is_data && skinny_metadata) {
    3005           0 :                                 key.type = BTRFS_METADATA_ITEM_KEY;
    3006           0 :                                 key.offset = owner_objectid;
    3007             :                         }
    3008             : 
    3009           0 :                         ret = btrfs_search_slot(trans, extent_root,
    3010             :                                                 &key, path, -1, 1);
    3011           0 :                         if (ret > 0 && skinny_metadata && path->slots[0]) {
    3012             :                                 /*
    3013             :                                  * Couldn't find our skinny metadata item,
    3014             :                                  * see if we have ye olde extent item.
    3015             :                                  */
    3016           0 :                                 path->slots[0]--;
    3017           0 :                                 btrfs_item_key_to_cpu(path->nodes[0], &key,
    3018             :                                                       path->slots[0]);
    3019           0 :                                 if (key.objectid == bytenr &&
    3020           0 :                                     key.type == BTRFS_EXTENT_ITEM_KEY &&
    3021           0 :                                     key.offset == num_bytes)
    3022           0 :                                         ret = 0;
    3023             :                         }
    3024             : 
    3025           0 :                         if (ret > 0 && skinny_metadata) {
    3026           0 :                                 skinny_metadata = false;
    3027           0 :                                 key.objectid = bytenr;
    3028           0 :                                 key.type = BTRFS_EXTENT_ITEM_KEY;
    3029           0 :                                 key.offset = num_bytes;
    3030           0 :                                 btrfs_release_path(path);
    3031           0 :                                 ret = btrfs_search_slot(trans, extent_root,
    3032             :                                                         &key, path, -1, 1);
    3033             :                         }
    3034             : 
    3035           0 :                         if (ret) {
    3036           0 :                                 if (ret > 0)
    3037           0 :                                         btrfs_print_leaf(path->nodes[0]);
    3038           0 :                                 btrfs_err(info,
    3039             :                         "umm, got %d back from search, was looking for %llu, slot %d",
    3040             :                                           ret, bytenr, path->slots[0]);
    3041             :                         }
    3042           0 :                         if (ret < 0) {
    3043           0 :                                 btrfs_abort_transaction(trans, ret);
    3044           0 :                                 goto out;
    3045             :                         }
    3046           0 :                         extent_slot = path->slots[0];
    3047             :                 }
    3048           0 :         } else if (WARN_ON(ret == -ENOENT)) {
    3049           0 :                 abort_and_dump(trans, path,
    3050             : "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu slot %d",
    3051             :                                bytenr, parent, root_objectid, owner_objectid,
    3052             :                                owner_offset, path->slots[0]);
    3053           0 :                 goto out;
    3054             :         } else {
    3055           0 :                 btrfs_abort_transaction(trans, ret);
    3056           0 :                 goto out;
    3057             :         }
    3058             : 
    3059           0 :         leaf = path->nodes[0];
    3060           0 :         item_size = btrfs_item_size(leaf, extent_slot);
    3061           0 :         if (unlikely(item_size < sizeof(*ei))) {
    3062           0 :                 ret = -EINVAL;
    3063           0 :                 btrfs_print_v0_err(info);
    3064           0 :                 btrfs_abort_transaction(trans, ret);
    3065           0 :                 goto out;
    3066             :         }
    3067           0 :         ei = btrfs_item_ptr(leaf, extent_slot,
    3068             :                             struct btrfs_extent_item);
    3069           0 :         if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
    3070           0 :             key.type == BTRFS_EXTENT_ITEM_KEY) {
    3071           0 :                 struct btrfs_tree_block_info *bi;
    3072             : 
    3073           0 :                 if (item_size < sizeof(*ei) + sizeof(*bi)) {
    3074           0 :                         abort_and_dump(trans, path,
    3075             : "invalid extent item size for key (%llu, %u, %llu) slot %u owner %llu, has %u expect >= %zu",
    3076             :                                        key.objectid, key.type, key.offset,
    3077             :                                        path->slots[0], owner_objectid, item_size,
    3078             :                                        sizeof(*ei) + sizeof(*bi));
    3079           0 :                         ret = -EUCLEAN;
    3080           0 :                         goto out;
    3081             :                 }
    3082           0 :                 bi = (struct btrfs_tree_block_info *)(ei + 1);
    3083           0 :                 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
    3084             :         }
    3085             : 
    3086           0 :         refs = btrfs_extent_refs(leaf, ei);
    3087           0 :         if (refs < refs_to_drop) {
    3088           0 :                 abort_and_dump(trans, path,
    3089             :                 "trying to drop %d refs but we only have %llu for bytenr %llu slot %u",
    3090             :                                refs_to_drop, refs, bytenr, path->slots[0]);
    3091           0 :                 ret = -EUCLEAN;
    3092           0 :                 goto out;
    3093             :         }
    3094           0 :         refs -= refs_to_drop;
    3095             : 
    3096           0 :         if (refs > 0) {
    3097           0 :                 if (extent_op)
    3098           0 :                         __run_delayed_extent_op(extent_op, leaf, ei);
    3099             :                 /*
    3100             :                  * In the case of inline back ref, reference count will
    3101             :                  * be updated by remove_extent_backref
    3102             :                  */
    3103           0 :                 if (iref) {
    3104           0 :                         if (!found_extent) {
    3105           0 :                                 abort_and_dump(trans, path,
    3106             : "invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found, slot %u",
    3107             :                                                path->slots[0]);
    3108           0 :                                 ret = -EUCLEAN;
    3109           0 :                                 goto out;
    3110             :                         }
    3111             :                 } else {
    3112           0 :                         btrfs_set_extent_refs(leaf, ei, refs);
    3113           0 :                         btrfs_mark_buffer_dirty(leaf);
    3114             :                 }
    3115           0 :                 if (found_extent) {
    3116           0 :                         ret = remove_extent_backref(trans, extent_root, path,
    3117             :                                                     iref, refs_to_drop, is_data);
    3118           0 :                         if (ret) {
    3119           0 :                                 btrfs_abort_transaction(trans, ret);
    3120           0 :                                 goto out;
    3121             :                         }
    3122             :                 }
    3123             :         } else {
    3124             :                 /* In this branch refs == 1 */
    3125           0 :                 if (found_extent) {
    3126           0 :                         if (is_data && refs_to_drop !=
    3127           0 :                             extent_data_ref_count(path, iref)) {
    3128           0 :                                 abort_and_dump(trans, path,
    3129             :                 "invalid refs_to_drop, current refs %u refs_to_drop %u slot %u",
    3130             :                                                extent_data_ref_count(path, iref),
    3131             :                                                refs_to_drop, path->slots[0]);
    3132           0 :                                 ret = -EUCLEAN;
    3133           0 :                                 goto out;
    3134             :                         }
    3135           0 :                         if (iref) {
    3136           0 :                                 if (path->slots[0] != extent_slot) {
    3137           0 :                                         abort_and_dump(trans, path,
    3138             : "invalid iref, extent item key (%llu %u %llu) slot %u doesn't have wanted iref",
    3139             :                                                        key.objectid, key.type,
    3140             :                                                        key.offset, path->slots[0]);
    3141           0 :                                         ret = -EUCLEAN;
    3142           0 :                                         goto out;
    3143             :                                 }
    3144             :                         } else {
    3145             :                                 /*
    3146             :                                  * No inline ref, we must be at SHARED_* item,
    3147             :                                  * And it's single ref, it must be:
    3148             :                                  * |    extent_slot       ||extent_slot + 1|
    3149             :                                  * [ EXTENT/METADATA_ITEM ][ SHARED_* ITEM ]
    3150             :                                  */
    3151           0 :                                 if (path->slots[0] != extent_slot + 1) {
    3152           0 :                                         abort_and_dump(trans, path,
    3153             :         "invalid SHARED_* item slot %u, previous item is not EXTENT/METADATA_ITEM",
    3154             :                                                        path->slots[0]);
    3155           0 :                                         ret = -EUCLEAN;
    3156           0 :                                         goto out;
    3157             :                                 }
    3158           0 :                                 path->slots[0] = extent_slot;
    3159           0 :                                 num_to_del = 2;
    3160             :                         }
    3161             :                 }
    3162             : 
    3163           0 :                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
    3164             :                                       num_to_del);
    3165           0 :                 if (ret) {
    3166           0 :                         btrfs_abort_transaction(trans, ret);
    3167           0 :                         goto out;
    3168             :                 }
    3169           0 :                 btrfs_release_path(path);
    3170             : 
    3171           0 :                 ret = do_free_extent_accounting(trans, bytenr, num_bytes, is_data);
    3172             :         }
    3173           0 :         btrfs_release_path(path);
    3174             : 
    3175           0 : out:
    3176           0 :         btrfs_free_path(path);
    3177           0 :         return ret;
    3178             : }
    3179             : 
    3180             : /*
    3181             :  * when we free an block, it is possible (and likely) that we free the last
    3182             :  * delayed ref for that extent as well.  This searches the delayed ref tree for
    3183             :  * a given extent, and if there are no other delayed refs to be processed, it
    3184             :  * removes it from the tree.
    3185             :  */
    3186           0 : static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
    3187             :                                       u64 bytenr)
    3188             : {
    3189           0 :         struct btrfs_delayed_ref_head *head;
    3190           0 :         struct btrfs_delayed_ref_root *delayed_refs;
    3191           0 :         int ret = 0;
    3192             : 
    3193           0 :         delayed_refs = &trans->transaction->delayed_refs;
    3194           0 :         spin_lock(&delayed_refs->lock);
    3195           0 :         head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
    3196           0 :         if (!head)
    3197           0 :                 goto out_delayed_unlock;
    3198             : 
    3199           0 :         spin_lock(&head->lock);
    3200           0 :         if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root))
    3201           0 :                 goto out;
    3202             : 
    3203           0 :         if (cleanup_extent_op(head) != NULL)
    3204           0 :                 goto out;
    3205             : 
    3206             :         /*
    3207             :          * waiting for the lock here would deadlock.  If someone else has it
    3208             :          * locked they are already in the process of dropping it anyway
    3209             :          */
    3210           0 :         if (!mutex_trylock(&head->mutex))
    3211           0 :                 goto out;
    3212             : 
    3213           0 :         btrfs_delete_ref_head(delayed_refs, head);
    3214           0 :         head->processing = false;
    3215             : 
    3216           0 :         spin_unlock(&head->lock);
    3217           0 :         spin_unlock(&delayed_refs->lock);
    3218             : 
    3219           0 :         BUG_ON(head->extent_op);
    3220           0 :         if (head->must_insert_reserved)
    3221           0 :                 ret = 1;
    3222             : 
    3223           0 :         btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
    3224           0 :         mutex_unlock(&head->mutex);
    3225           0 :         btrfs_put_delayed_ref_head(head);
    3226           0 :         return ret;
    3227           0 : out:
    3228           0 :         spin_unlock(&head->lock);
    3229             : 
    3230           0 : out_delayed_unlock:
    3231           0 :         spin_unlock(&delayed_refs->lock);
    3232           0 :         return 0;
    3233             : }
    3234             : 
    3235           0 : void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
    3236             :                            u64 root_id,
    3237             :                            struct extent_buffer *buf,
    3238             :                            u64 parent, int last_ref)
    3239             : {
    3240           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    3241           0 :         struct btrfs_ref generic_ref = { 0 };
    3242           0 :         int ret;
    3243             : 
    3244           0 :         btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
    3245           0 :                                buf->start, buf->len, parent);
    3246           0 :         btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
    3247             :                             root_id, 0, false);
    3248             : 
    3249           0 :         if (root_id != BTRFS_TREE_LOG_OBJECTID) {
    3250           0 :                 btrfs_ref_tree_mod(fs_info, &generic_ref);
    3251           0 :                 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL);
    3252           0 :                 BUG_ON(ret); /* -ENOMEM */
    3253             :         }
    3254             : 
    3255           0 :         if (last_ref && btrfs_header_generation(buf) == trans->transid) {
    3256           0 :                 struct btrfs_block_group *cache;
    3257           0 :                 bool must_pin = false;
    3258             : 
    3259           0 :                 if (root_id != BTRFS_TREE_LOG_OBJECTID) {
    3260           0 :                         ret = check_ref_cleanup(trans, buf->start);
    3261           0 :                         if (!ret) {
    3262           0 :                                 btrfs_redirty_list_add(trans->transaction, buf);
    3263           0 :                                 goto out;
    3264             :                         }
    3265             :                 }
    3266             : 
    3267           0 :                 cache = btrfs_lookup_block_group(fs_info, buf->start);
    3268             : 
    3269           0 :                 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
    3270           0 :                         pin_down_extent(trans, cache, buf->start, buf->len, 1);
    3271           0 :                         btrfs_put_block_group(cache);
    3272           0 :                         goto out;
    3273             :                 }
    3274             : 
    3275             :                 /*
    3276             :                  * If there are tree mod log users we may have recorded mod log
    3277             :                  * operations for this node.  If we re-allocate this node we
    3278             :                  * could replay operations on this node that happened when it
    3279             :                  * existed in a completely different root.  For example if it
    3280             :                  * was part of root A, then was reallocated to root B, and we
    3281             :                  * are doing a btrfs_old_search_slot(root b), we could replay
    3282             :                  * operations that happened when the block was part of root A,
    3283             :                  * giving us an inconsistent view of the btree.
    3284             :                  *
    3285             :                  * We are safe from races here because at this point no other
    3286             :                  * node or root points to this extent buffer, so if after this
    3287             :                  * check a new tree mod log user joins we will not have an
    3288             :                  * existing log of operations on this node that we have to
    3289             :                  * contend with.
    3290             :                  */
    3291           0 :                 if (test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
    3292             :                         must_pin = true;
    3293             : 
    3294           0 :                 if (must_pin || btrfs_is_zoned(fs_info)) {
    3295           0 :                         btrfs_redirty_list_add(trans->transaction, buf);
    3296           0 :                         pin_down_extent(trans, cache, buf->start, buf->len, 1);
    3297           0 :                         btrfs_put_block_group(cache);
    3298           0 :                         goto out;
    3299             :                 }
    3300             : 
    3301           0 :                 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
    3302             : 
    3303           0 :                 btrfs_add_free_space(cache, buf->start, buf->len);
    3304           0 :                 btrfs_free_reserved_bytes(cache, buf->len, 0);
    3305           0 :                 btrfs_put_block_group(cache);
    3306           0 :                 trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
    3307             :         }
    3308           0 : out:
    3309           0 :         if (last_ref) {
    3310             :                 /*
    3311             :                  * Deleting the buffer, clear the corrupt flag since it doesn't
    3312             :                  * matter anymore.
    3313             :                  */
    3314           0 :                 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
    3315             :         }
    3316           0 : }
    3317             : 
    3318             : /* Can return -ENOMEM */
    3319           0 : int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
    3320             : {
    3321           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    3322           0 :         int ret;
    3323             : 
    3324           0 :         if (btrfs_is_testing(fs_info))
    3325             :                 return 0;
    3326             : 
    3327             :         /*
    3328             :          * tree log blocks never actually go into the extent allocation
    3329             :          * tree, just update pinning info and exit early.
    3330             :          */
    3331           0 :         if ((ref->type == BTRFS_REF_METADATA &&
    3332           0 :              ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
    3333           0 :             (ref->type == BTRFS_REF_DATA &&
    3334           0 :              ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)) {
    3335             :                 /* unlocks the pinned mutex */
    3336           0 :                 btrfs_pin_extent(trans, ref->bytenr, ref->len, 1);
    3337           0 :                 ret = 0;
    3338           0 :         } else if (ref->type == BTRFS_REF_METADATA) {
    3339           0 :                 ret = btrfs_add_delayed_tree_ref(trans, ref, NULL);
    3340             :         } else {
    3341           0 :                 ret = btrfs_add_delayed_data_ref(trans, ref, 0);
    3342             :         }
    3343             : 
    3344           0 :         if (!((ref->type == BTRFS_REF_METADATA &&
    3345             :                ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
    3346             :               (ref->type == BTRFS_REF_DATA &&
    3347             :                ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)))
    3348             :                 btrfs_ref_tree_mod(fs_info, ref);
    3349             : 
    3350           0 :         return ret;
    3351             : }
    3352             : 
    3353             : enum btrfs_loop_type {
    3354             :         LOOP_CACHING_NOWAIT,
    3355             :         LOOP_CACHING_WAIT,
    3356             :         LOOP_UNSET_SIZE_CLASS,
    3357             :         LOOP_ALLOC_CHUNK,
    3358             :         LOOP_WRONG_SIZE_CLASS,
    3359             :         LOOP_NO_EMPTY_SIZE,
    3360             : };
    3361             : 
    3362             : static inline void
    3363             : btrfs_lock_block_group(struct btrfs_block_group *cache,
    3364             :                        int delalloc)
    3365             : {
    3366           0 :         if (delalloc)
    3367           0 :                 down_read(&cache->data_rwsem);
    3368             : }
    3369             : 
    3370           0 : static inline void btrfs_grab_block_group(struct btrfs_block_group *cache,
    3371             :                        int delalloc)
    3372             : {
    3373           0 :         btrfs_get_block_group(cache);
    3374           0 :         if (delalloc)
    3375           0 :                 down_read(&cache->data_rwsem);
    3376           0 : }
    3377             : 
    3378           0 : static struct btrfs_block_group *btrfs_lock_cluster(
    3379             :                    struct btrfs_block_group *block_group,
    3380             :                    struct btrfs_free_cluster *cluster,
    3381             :                    int delalloc)
    3382             :         __acquires(&cluster->refill_lock)
    3383             : {
    3384           0 :         struct btrfs_block_group *used_bg = NULL;
    3385             : 
    3386           0 :         spin_lock(&cluster->refill_lock);
    3387           0 :         while (1) {
    3388           0 :                 used_bg = cluster->block_group;
    3389           0 :                 if (!used_bg)
    3390             :                         return NULL;
    3391             : 
    3392           0 :                 if (used_bg == block_group)
    3393           0 :                         return used_bg;
    3394             : 
    3395           0 :                 btrfs_get_block_group(used_bg);
    3396             : 
    3397           0 :                 if (!delalloc)
    3398           0 :                         return used_bg;
    3399             : 
    3400           0 :                 if (down_read_trylock(&used_bg->data_rwsem))
    3401           0 :                         return used_bg;
    3402             : 
    3403           0 :                 spin_unlock(&cluster->refill_lock);
    3404             : 
    3405             :                 /* We should only have one-level nested. */
    3406           0 :                 down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
    3407             : 
    3408           0 :                 spin_lock(&cluster->refill_lock);
    3409           0 :                 if (used_bg == cluster->block_group)
    3410           0 :                         return used_bg;
    3411             : 
    3412           0 :                 up_read(&used_bg->data_rwsem);
    3413           0 :                 btrfs_put_block_group(used_bg);
    3414             :         }
    3415             : }
    3416             : 
    3417             : static inline void
    3418           0 : btrfs_release_block_group(struct btrfs_block_group *cache,
    3419             :                          int delalloc)
    3420             : {
    3421           0 :         if (delalloc)
    3422           0 :                 up_read(&cache->data_rwsem);
    3423           0 :         btrfs_put_block_group(cache);
    3424           0 : }
    3425             : 
    3426             : /*
    3427             :  * Helper function for find_free_extent().
    3428             :  *
    3429             :  * Return -ENOENT to inform caller that we need fallback to unclustered mode.
    3430             :  * Return -EAGAIN to inform caller that we need to re-search this block group
    3431             :  * Return >0 to inform caller that we find nothing
    3432             :  * Return 0 means we have found a location and set ffe_ctl->found_offset.
    3433             :  */
    3434           0 : static int find_free_extent_clustered(struct btrfs_block_group *bg,
    3435             :                                       struct find_free_extent_ctl *ffe_ctl,
    3436             :                                       struct btrfs_block_group **cluster_bg_ret)
    3437             : {
    3438           0 :         struct btrfs_block_group *cluster_bg;
    3439           0 :         struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
    3440           0 :         u64 aligned_cluster;
    3441           0 :         u64 offset;
    3442           0 :         int ret;
    3443             : 
    3444           0 :         cluster_bg = btrfs_lock_cluster(bg, last_ptr, ffe_ctl->delalloc);
    3445           0 :         if (!cluster_bg)
    3446           0 :                 goto refill_cluster;
    3447           0 :         if (cluster_bg != bg && (cluster_bg->ro ||
    3448           0 :             !block_group_bits(cluster_bg, ffe_ctl->flags)))
    3449           0 :                 goto release_cluster;
    3450             : 
    3451           0 :         offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
    3452             :                         ffe_ctl->num_bytes, cluster_bg->start,
    3453             :                         &ffe_ctl->max_extent_size);
    3454           0 :         if (offset) {
    3455             :                 /* We have a block, we're done */
    3456           0 :                 spin_unlock(&last_ptr->refill_lock);
    3457           0 :                 trace_btrfs_reserve_extent_cluster(cluster_bg, ffe_ctl);
    3458           0 :                 *cluster_bg_ret = cluster_bg;
    3459           0 :                 ffe_ctl->found_offset = offset;
    3460           0 :                 return 0;
    3461             :         }
    3462           0 :         WARN_ON(last_ptr->block_group != cluster_bg);
    3463             : 
    3464           0 : release_cluster:
    3465             :         /*
    3466             :          * If we are on LOOP_NO_EMPTY_SIZE, we can't set up a new clusters, so
    3467             :          * lets just skip it and let the allocator find whatever block it can
    3468             :          * find. If we reach this point, we will have tried the cluster
    3469             :          * allocator plenty of times and not have found anything, so we are
    3470             :          * likely way too fragmented for the clustering stuff to find anything.
    3471             :          *
    3472             :          * However, if the cluster is taken from the current block group,
    3473             :          * release the cluster first, so that we stand a better chance of
    3474             :          * succeeding in the unclustered allocation.
    3475             :          */
    3476           0 :         if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE && cluster_bg != bg) {
    3477           0 :                 spin_unlock(&last_ptr->refill_lock);
    3478           0 :                 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
    3479           0 :                 return -ENOENT;
    3480             :         }
    3481             : 
    3482             :         /* This cluster didn't work out, free it and start over */
    3483           0 :         btrfs_return_cluster_to_free_space(NULL, last_ptr);
    3484             : 
    3485           0 :         if (cluster_bg != bg)
    3486           0 :                 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
    3487             : 
    3488           0 : refill_cluster:
    3489           0 :         if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE) {
    3490           0 :                 spin_unlock(&last_ptr->refill_lock);
    3491           0 :                 return -ENOENT;
    3492             :         }
    3493             : 
    3494           0 :         aligned_cluster = max_t(u64,
    3495             :                         ffe_ctl->empty_cluster + ffe_ctl->empty_size,
    3496             :                         bg->full_stripe_len);
    3497           0 :         ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start,
    3498             :                         ffe_ctl->num_bytes, aligned_cluster);
    3499           0 :         if (ret == 0) {
    3500             :                 /* Now pull our allocation out of this cluster */
    3501           0 :                 offset = btrfs_alloc_from_cluster(bg, last_ptr,
    3502             :                                 ffe_ctl->num_bytes, ffe_ctl->search_start,
    3503             :                                 &ffe_ctl->max_extent_size);
    3504           0 :                 if (offset) {
    3505             :                         /* We found one, proceed */
    3506           0 :                         spin_unlock(&last_ptr->refill_lock);
    3507           0 :                         ffe_ctl->found_offset = offset;
    3508           0 :                         trace_btrfs_reserve_extent_cluster(bg, ffe_ctl);
    3509           0 :                         return 0;
    3510             :                 }
    3511           0 :         } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
    3512           0 :                    !ffe_ctl->retry_clustered) {
    3513           0 :                 spin_unlock(&last_ptr->refill_lock);
    3514             : 
    3515           0 :                 ffe_ctl->retry_clustered = true;
    3516           0 :                 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
    3517           0 :                                 ffe_ctl->empty_cluster + ffe_ctl->empty_size);
    3518           0 :                 return -EAGAIN;
    3519             :         }
    3520             :         /*
    3521             :          * At this point we either didn't find a cluster or we weren't able to
    3522             :          * allocate a block from our cluster.  Free the cluster we've been
    3523             :          * trying to use, and go to the next block group.
    3524             :          */
    3525           0 :         btrfs_return_cluster_to_free_space(NULL, last_ptr);
    3526           0 :         spin_unlock(&last_ptr->refill_lock);
    3527           0 :         return 1;
    3528             : }
    3529             : 
    3530             : /*
    3531             :  * Return >0 to inform caller that we find nothing
    3532             :  * Return 0 when we found an free extent and set ffe_ctrl->found_offset
    3533             :  * Return -EAGAIN to inform caller that we need to re-search this block group
    3534             :  */
    3535           0 : static int find_free_extent_unclustered(struct btrfs_block_group *bg,
    3536             :                                         struct find_free_extent_ctl *ffe_ctl)
    3537             : {
    3538           0 :         struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
    3539           0 :         u64 offset;
    3540             : 
    3541             :         /*
    3542             :          * We are doing an unclustered allocation, set the fragmented flag so
    3543             :          * we don't bother trying to setup a cluster again until we get more
    3544             :          * space.
    3545             :          */
    3546           0 :         if (unlikely(last_ptr)) {
    3547           0 :                 spin_lock(&last_ptr->lock);
    3548           0 :                 last_ptr->fragmented = 1;
    3549           0 :                 spin_unlock(&last_ptr->lock);
    3550             :         }
    3551           0 :         if (ffe_ctl->cached) {
    3552           0 :                 struct btrfs_free_space_ctl *free_space_ctl;
    3553             : 
    3554           0 :                 free_space_ctl = bg->free_space_ctl;
    3555           0 :                 spin_lock(&free_space_ctl->tree_lock);
    3556           0 :                 if (free_space_ctl->free_space <
    3557           0 :                     ffe_ctl->num_bytes + ffe_ctl->empty_cluster +
    3558           0 :                     ffe_ctl->empty_size) {
    3559           0 :                         ffe_ctl->total_free_space = max_t(u64,
    3560             :                                         ffe_ctl->total_free_space,
    3561             :                                         free_space_ctl->free_space);
    3562           0 :                         spin_unlock(&free_space_ctl->tree_lock);
    3563           0 :                         return 1;
    3564             :                 }
    3565           0 :                 spin_unlock(&free_space_ctl->tree_lock);
    3566             :         }
    3567             : 
    3568           0 :         offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
    3569             :                         ffe_ctl->num_bytes, ffe_ctl->empty_size,
    3570             :                         &ffe_ctl->max_extent_size);
    3571             : 
    3572             :         /*
    3573             :          * If we didn't find a chunk, and we haven't failed on this block group
    3574             :          * before, and this block group is in the middle of caching and we are
    3575             :          * ok with waiting, then go ahead and wait for progress to be made, and
    3576             :          * set @retry_unclustered to true.
    3577             :          *
    3578             :          * If @retry_unclustered is true then we've already waited on this
    3579             :          * block group once and should move on to the next block group.
    3580             :          */
    3581           0 :         if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
    3582           0 :             ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
    3583           0 :                 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
    3584           0 :                                                       ffe_ctl->empty_size);
    3585           0 :                 ffe_ctl->retry_unclustered = true;
    3586           0 :                 return -EAGAIN;
    3587           0 :         } else if (!offset) {
    3588             :                 return 1;
    3589             :         }
    3590           0 :         ffe_ctl->found_offset = offset;
    3591           0 :         return 0;
    3592             : }
    3593             : 
    3594           0 : static int do_allocation_clustered(struct btrfs_block_group *block_group,
    3595             :                                    struct find_free_extent_ctl *ffe_ctl,
    3596             :                                    struct btrfs_block_group **bg_ret)
    3597             : {
    3598           0 :         int ret;
    3599             : 
    3600             :         /* We want to try and use the cluster allocator, so lets look there */
    3601           0 :         if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) {
    3602           0 :                 ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret);
    3603           0 :                 if (ret >= 0 || ret == -EAGAIN)
    3604             :                         return ret;
    3605             :                 /* ret == -ENOENT case falls through */
    3606             :         }
    3607             : 
    3608           0 :         return find_free_extent_unclustered(block_group, ffe_ctl);
    3609             : }
    3610             : 
    3611             : /*
    3612             :  * Tree-log block group locking
    3613             :  * ============================
    3614             :  *
    3615             :  * fs_info::treelog_bg_lock protects the fs_info::treelog_bg which
    3616             :  * indicates the starting address of a block group, which is reserved only
    3617             :  * for tree-log metadata.
    3618             :  *
    3619             :  * Lock nesting
    3620             :  * ============
    3621             :  *
    3622             :  * space_info::lock
    3623             :  *   block_group::lock
    3624             :  *     fs_info::treelog_bg_lock
    3625             :  */
    3626             : 
    3627             : /*
    3628             :  * Simple allocator for sequential-only block group. It only allows sequential
    3629             :  * allocation. No need to play with trees. This function also reserves the
    3630             :  * bytes as in btrfs_add_reserved_bytes.
    3631             :  */
    3632           0 : static int do_allocation_zoned(struct btrfs_block_group *block_group,
    3633             :                                struct find_free_extent_ctl *ffe_ctl,
    3634             :                                struct btrfs_block_group **bg_ret)
    3635             : {
    3636           0 :         struct btrfs_fs_info *fs_info = block_group->fs_info;
    3637           0 :         struct btrfs_space_info *space_info = block_group->space_info;
    3638           0 :         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
    3639           0 :         u64 start = block_group->start;
    3640           0 :         u64 num_bytes = ffe_ctl->num_bytes;
    3641           0 :         u64 avail;
    3642           0 :         u64 bytenr = block_group->start;
    3643           0 :         u64 log_bytenr;
    3644           0 :         u64 data_reloc_bytenr;
    3645           0 :         int ret = 0;
    3646           0 :         bool skip = false;
    3647             : 
    3648           0 :         ASSERT(btrfs_is_zoned(block_group->fs_info));
    3649             : 
    3650             :         /*
    3651             :          * Do not allow non-tree-log blocks in the dedicated tree-log block
    3652             :          * group, and vice versa.
    3653             :          */
    3654           0 :         spin_lock(&fs_info->treelog_bg_lock);
    3655           0 :         log_bytenr = fs_info->treelog_bg;
    3656           0 :         if (log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) ||
    3657           0 :                            (!ffe_ctl->for_treelog && bytenr == log_bytenr)))
    3658           0 :                 skip = true;
    3659           0 :         spin_unlock(&fs_info->treelog_bg_lock);
    3660           0 :         if (skip)
    3661             :                 return 1;
    3662             : 
    3663             :         /*
    3664             :          * Do not allow non-relocation blocks in the dedicated relocation block
    3665             :          * group, and vice versa.
    3666             :          */
    3667           0 :         spin_lock(&fs_info->relocation_bg_lock);
    3668           0 :         data_reloc_bytenr = fs_info->data_reloc_bg;
    3669           0 :         if (data_reloc_bytenr &&
    3670           0 :             ((ffe_ctl->for_data_reloc && bytenr != data_reloc_bytenr) ||
    3671           0 :              (!ffe_ctl->for_data_reloc && bytenr == data_reloc_bytenr)))
    3672           0 :                 skip = true;
    3673           0 :         spin_unlock(&fs_info->relocation_bg_lock);
    3674           0 :         if (skip)
    3675             :                 return 1;
    3676             : 
    3677             :         /* Check RO and no space case before trying to activate it */
    3678           0 :         spin_lock(&block_group->lock);
    3679           0 :         if (block_group->ro || btrfs_zoned_bg_is_full(block_group)) {
    3680             :                 ret = 1;
    3681             :                 /*
    3682             :                  * May need to clear fs_info->{treelog,data_reloc}_bg.
    3683             :                  * Return the error after taking the locks.
    3684             :                  */
    3685             :         }
    3686           0 :         spin_unlock(&block_group->lock);
    3687             : 
    3688           0 :         if (!ret && !btrfs_zone_activate(block_group)) {
    3689           0 :                 ret = 1;
    3690             :                 /*
    3691             :                  * May need to clear fs_info->{treelog,data_reloc}_bg.
    3692             :                  * Return the error after taking the locks.
    3693             :                  */
    3694             :         }
    3695             : 
    3696           0 :         spin_lock(&space_info->lock);
    3697           0 :         spin_lock(&block_group->lock);
    3698           0 :         spin_lock(&fs_info->treelog_bg_lock);
    3699           0 :         spin_lock(&fs_info->relocation_bg_lock);
    3700             : 
    3701           0 :         if (ret)
    3702           0 :                 goto out;
    3703             : 
    3704           0 :         ASSERT(!ffe_ctl->for_treelog ||
    3705             :                block_group->start == fs_info->treelog_bg ||
    3706             :                fs_info->treelog_bg == 0);
    3707           0 :         ASSERT(!ffe_ctl->for_data_reloc ||
    3708             :                block_group->start == fs_info->data_reloc_bg ||
    3709             :                fs_info->data_reloc_bg == 0);
    3710             : 
    3711           0 :         if (block_group->ro ||
    3712           0 :             test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
    3713           0 :                 ret = 1;
    3714           0 :                 goto out;
    3715             :         }
    3716             : 
    3717             :         /*
    3718             :          * Do not allow currently using block group to be tree-log dedicated
    3719             :          * block group.
    3720             :          */
    3721           0 :         if (ffe_ctl->for_treelog && !fs_info->treelog_bg &&
    3722           0 :             (block_group->used || block_group->reserved)) {
    3723           0 :                 ret = 1;
    3724           0 :                 goto out;
    3725             :         }
    3726             : 
    3727             :         /*
    3728             :          * Do not allow currently used block group to be the data relocation
    3729             :          * dedicated block group.
    3730             :          */
    3731           0 :         if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg &&
    3732           0 :             (block_group->used || block_group->reserved)) {
    3733           0 :                 ret = 1;
    3734           0 :                 goto out;
    3735             :         }
    3736             : 
    3737           0 :         WARN_ON_ONCE(block_group->alloc_offset > block_group->zone_capacity);
    3738           0 :         avail = block_group->zone_capacity - block_group->alloc_offset;
    3739           0 :         if (avail < num_bytes) {
    3740           0 :                 if (ffe_ctl->max_extent_size < avail) {
    3741             :                         /*
    3742             :                          * With sequential allocator, free space is always
    3743             :                          * contiguous
    3744             :                          */
    3745           0 :                         ffe_ctl->max_extent_size = avail;
    3746           0 :                         ffe_ctl->total_free_space = avail;
    3747             :                 }
    3748           0 :                 ret = 1;
    3749           0 :                 goto out;
    3750             :         }
    3751             : 
    3752           0 :         if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
    3753           0 :                 fs_info->treelog_bg = block_group->start;
    3754             : 
    3755           0 :         if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg)
    3756           0 :                 fs_info->data_reloc_bg = block_group->start;
    3757             : 
    3758           0 :         ffe_ctl->found_offset = start + block_group->alloc_offset;
    3759           0 :         block_group->alloc_offset += num_bytes;
    3760           0 :         spin_lock(&ctl->tree_lock);
    3761           0 :         ctl->free_space -= num_bytes;
    3762           0 :         spin_unlock(&ctl->tree_lock);
    3763             : 
    3764             :         /*
    3765             :          * We do not check if found_offset is aligned to stripesize. The
    3766             :          * address is anyway rewritten when using zone append writing.
    3767             :          */
    3768             : 
    3769           0 :         ffe_ctl->search_start = ffe_ctl->found_offset;
    3770             : 
    3771           0 : out:
    3772           0 :         if (ret && ffe_ctl->for_treelog)
    3773           0 :                 fs_info->treelog_bg = 0;
    3774           0 :         if (ret && ffe_ctl->for_data_reloc &&
    3775           0 :             fs_info->data_reloc_bg == block_group->start) {
    3776             :                 /*
    3777             :                  * Do not allow further allocations from this block group.
    3778             :                  * Compared to increasing the ->ro, setting the
    3779             :                  * ->zoned_data_reloc_ongoing flag still allows nocow
    3780             :                  *  writers to come in. See btrfs_inc_nocow_writers().
    3781             :                  *
    3782             :                  * We need to disable an allocation to avoid an allocation of
    3783             :                  * regular (non-relocation data) extent. With mix of relocation
    3784             :                  * extents and regular extents, we can dispatch WRITE commands
    3785             :                  * (for relocation extents) and ZONE APPEND commands (for
    3786             :                  * regular extents) at the same time to the same zone, which
    3787             :                  * easily break the write pointer.
    3788             :                  */
    3789           0 :                 set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
    3790           0 :                 fs_info->data_reloc_bg = 0;
    3791             :         }
    3792           0 :         spin_unlock(&fs_info->relocation_bg_lock);
    3793           0 :         spin_unlock(&fs_info->treelog_bg_lock);
    3794           0 :         spin_unlock(&block_group->lock);
    3795           0 :         spin_unlock(&space_info->lock);
    3796           0 :         return ret;
    3797             : }
    3798             : 
    3799           0 : static int do_allocation(struct btrfs_block_group *block_group,
    3800             :                          struct find_free_extent_ctl *ffe_ctl,
    3801             :                          struct btrfs_block_group **bg_ret)
    3802             : {
    3803           0 :         switch (ffe_ctl->policy) {
    3804           0 :         case BTRFS_EXTENT_ALLOC_CLUSTERED:
    3805           0 :                 return do_allocation_clustered(block_group, ffe_ctl, bg_ret);
    3806           0 :         case BTRFS_EXTENT_ALLOC_ZONED:
    3807           0 :                 return do_allocation_zoned(block_group, ffe_ctl, bg_ret);
    3808           0 :         default:
    3809           0 :                 BUG();
    3810             :         }
    3811             : }
    3812             : 
    3813           0 : static void release_block_group(struct btrfs_block_group *block_group,
    3814             :                                 struct find_free_extent_ctl *ffe_ctl,
    3815             :                                 int delalloc)
    3816             : {
    3817           0 :         switch (ffe_ctl->policy) {
    3818           0 :         case BTRFS_EXTENT_ALLOC_CLUSTERED:
    3819           0 :                 ffe_ctl->retry_clustered = false;
    3820           0 :                 ffe_ctl->retry_unclustered = false;
    3821           0 :                 break;
    3822             :         case BTRFS_EXTENT_ALLOC_ZONED:
    3823             :                 /* Nothing to do */
    3824             :                 break;
    3825           0 :         default:
    3826           0 :                 BUG();
    3827             :         }
    3828             : 
    3829           0 :         BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
    3830             :                ffe_ctl->index);
    3831           0 :         btrfs_release_block_group(block_group, delalloc);
    3832           0 : }
    3833             : 
    3834           0 : static void found_extent_clustered(struct find_free_extent_ctl *ffe_ctl,
    3835             :                                    struct btrfs_key *ins)
    3836             : {
    3837           0 :         struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
    3838             : 
    3839           0 :         if (!ffe_ctl->use_cluster && last_ptr) {
    3840           0 :                 spin_lock(&last_ptr->lock);
    3841           0 :                 last_ptr->window_start = ins->objectid;
    3842           0 :                 spin_unlock(&last_ptr->lock);
    3843             :         }
    3844           0 : }
    3845             : 
    3846           0 : static void found_extent(struct find_free_extent_ctl *ffe_ctl,
    3847             :                          struct btrfs_key *ins)
    3848             : {
    3849           0 :         switch (ffe_ctl->policy) {
    3850           0 :         case BTRFS_EXTENT_ALLOC_CLUSTERED:
    3851           0 :                 found_extent_clustered(ffe_ctl, ins);
    3852           0 :                 break;
    3853             :         case BTRFS_EXTENT_ALLOC_ZONED:
    3854             :                 /* Nothing to do */
    3855             :                 break;
    3856           0 :         default:
    3857           0 :                 BUG();
    3858             :         }
    3859           0 : }
    3860             : 
    3861           0 : static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info,
    3862             :                                     struct find_free_extent_ctl *ffe_ctl)
    3863             : {
    3864             :         /* If we can activate new zone, just allocate a chunk and use it */
    3865           0 :         if (btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
    3866             :                 return 0;
    3867             : 
    3868             :         /*
    3869             :          * We already reached the max active zones. Try to finish one block
    3870             :          * group to make a room for a new block group. This is only possible
    3871             :          * for a data block group because btrfs_zone_finish() may need to wait
    3872             :          * for a running transaction which can cause a deadlock for metadata
    3873             :          * allocation.
    3874             :          */
    3875           0 :         if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
    3876           0 :                 int ret = btrfs_zone_finish_one_bg(fs_info);
    3877             : 
    3878           0 :                 if (ret == 1)
    3879             :                         return 0;
    3880           0 :                 else if (ret < 0)
    3881             :                         return ret;
    3882             :         }
    3883             : 
    3884             :         /*
    3885             :          * If we have enough free space left in an already active block group
    3886             :          * and we can't activate any other zone now, do not allow allocating a
    3887             :          * new chunk and let find_free_extent() retry with a smaller size.
    3888             :          */
    3889           0 :         if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size)
    3890             :                 return -ENOSPC;
    3891             : 
    3892             :         /*
    3893             :          * Even min_alloc_size is not left in any block groups. Since we cannot
    3894             :          * activate a new block group, allocating it may not help. Let's tell a
    3895             :          * caller to try again and hope it progress something by writing some
    3896             :          * parts of the region. That is only possible for data block groups,
    3897             :          * where a part of the region can be written.
    3898             :          */
    3899           0 :         if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA)
    3900           0 :                 return -EAGAIN;
    3901             : 
    3902             :         /*
    3903             :          * We cannot activate a new block group and no enough space left in any
    3904             :          * block groups. So, allocating a new block group may not help. But,
    3905             :          * there is nothing to do anyway, so let's go with it.
    3906             :          */
    3907             :         return 0;
    3908             : }
    3909             : 
    3910           0 : static int can_allocate_chunk(struct btrfs_fs_info *fs_info,
    3911             :                               struct find_free_extent_ctl *ffe_ctl)
    3912             : {
    3913           0 :         switch (ffe_ctl->policy) {
    3914             :         case BTRFS_EXTENT_ALLOC_CLUSTERED:
    3915             :                 return 0;
    3916           0 :         case BTRFS_EXTENT_ALLOC_ZONED:
    3917           0 :                 return can_allocate_chunk_zoned(fs_info, ffe_ctl);
    3918           0 :         default:
    3919           0 :                 BUG();
    3920             :         }
    3921             : }
    3922             : 
    3923             : /*
    3924             :  * Return >0 means caller needs to re-search for free extent
    3925             :  * Return 0 means we have the needed free extent.
    3926             :  * Return <0 means we failed to locate any free extent.
    3927             :  */
    3928           0 : static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
    3929             :                                         struct btrfs_key *ins,
    3930             :                                         struct find_free_extent_ctl *ffe_ctl,
    3931             :                                         bool full_search)
    3932             : {
    3933           0 :         struct btrfs_root *root = fs_info->chunk_root;
    3934           0 :         int ret;
    3935             : 
    3936           0 :         if ((ffe_ctl->loop == LOOP_CACHING_NOWAIT) &&
    3937           0 :             ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
    3938           0 :                 ffe_ctl->orig_have_caching_bg = true;
    3939             : 
    3940           0 :         if (ins->objectid) {
    3941           0 :                 found_extent(ffe_ctl, ins);
    3942           0 :                 return 0;
    3943             :         }
    3944             : 
    3945           0 :         if (ffe_ctl->loop >= LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg)
    3946             :                 return 1;
    3947             : 
    3948           0 :         ffe_ctl->index++;
    3949           0 :         if (ffe_ctl->index < BTRFS_NR_RAID_TYPES)
    3950             :                 return 1;
    3951             : 
    3952             :         /*
    3953             :          * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
    3954             :          *                      caching kthreads as we move along
    3955             :          * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
    3956             :          * LOOP_UNSET_SIZE_CLASS, allow unset size class
    3957             :          * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
    3958             :          * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
    3959             :          *                     again
    3960             :          */
    3961           0 :         if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
    3962           0 :                 ffe_ctl->index = 0;
    3963             :                 /*
    3964             :                  * We want to skip the LOOP_CACHING_WAIT step if we don't have
    3965             :                  * any uncached bgs and we've already done a full search
    3966             :                  * through.
    3967             :                  */
    3968           0 :                 if (ffe_ctl->loop == LOOP_CACHING_NOWAIT &&
    3969           0 :                     (!ffe_ctl->orig_have_caching_bg && full_search))
    3970           0 :                         ffe_ctl->loop++;
    3971           0 :                 ffe_ctl->loop++;
    3972             : 
    3973           0 :                 if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) {
    3974           0 :                         struct btrfs_trans_handle *trans;
    3975           0 :                         int exist = 0;
    3976             : 
    3977             :                         /* Check if allocation policy allows to create a new chunk */
    3978           0 :                         ret = can_allocate_chunk(fs_info, ffe_ctl);
    3979           0 :                         if (ret)
    3980             :                                 return ret;
    3981             : 
    3982           0 :                         trans = current->journal_info;
    3983           0 :                         if (trans)
    3984             :                                 exist = 1;
    3985             :                         else
    3986           0 :                                 trans = btrfs_join_transaction(root);
    3987             : 
    3988           0 :                         if (IS_ERR(trans)) {
    3989           0 :                                 ret = PTR_ERR(trans);
    3990           0 :                                 return ret;
    3991             :                         }
    3992             : 
    3993           0 :                         ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
    3994             :                                                 CHUNK_ALLOC_FORCE_FOR_EXTENT);
    3995             : 
    3996             :                         /* Do not bail out on ENOSPC since we can do more. */
    3997           0 :                         if (ret == -ENOSPC) {
    3998           0 :                                 ret = 0;
    3999           0 :                                 ffe_ctl->loop++;
    4000             :                         }
    4001           0 :                         else if (ret < 0)
    4002           0 :                                 btrfs_abort_transaction(trans, ret);
    4003             :                         else
    4004             :                                 ret = 0;
    4005           0 :                         if (!exist)
    4006           0 :                                 btrfs_end_transaction(trans);
    4007           0 :                         if (ret)
    4008             :                                 return ret;
    4009             :                 }
    4010             : 
    4011           0 :                 if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) {
    4012           0 :                         if (ffe_ctl->policy != BTRFS_EXTENT_ALLOC_CLUSTERED)
    4013             :                                 return -ENOSPC;
    4014             : 
    4015             :                         /*
    4016             :                          * Don't loop again if we already have no empty_size and
    4017             :                          * no empty_cluster.
    4018             :                          */
    4019           0 :                         if (ffe_ctl->empty_size == 0 &&
    4020           0 :                             ffe_ctl->empty_cluster == 0)
    4021             :                                 return -ENOSPC;
    4022           0 :                         ffe_ctl->empty_size = 0;
    4023           0 :                         ffe_ctl->empty_cluster = 0;
    4024             :                 }
    4025           0 :                 return 1;
    4026             :         }
    4027             :         return -ENOSPC;
    4028             : }
    4029             : 
    4030           0 : static bool find_free_extent_check_size_class(struct find_free_extent_ctl *ffe_ctl,
    4031             :                                               struct btrfs_block_group *bg)
    4032             : {
    4033           0 :         if (ffe_ctl->policy == BTRFS_EXTENT_ALLOC_ZONED)
    4034             :                 return true;
    4035           0 :         if (!btrfs_block_group_should_use_size_class(bg))
    4036             :                 return true;
    4037           0 :         if (ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS)
    4038             :                 return true;
    4039           0 :         if (ffe_ctl->loop >= LOOP_UNSET_SIZE_CLASS &&
    4040           0 :             bg->size_class == BTRFS_BG_SZ_NONE)
    4041             :                 return true;
    4042           0 :         return ffe_ctl->size_class == bg->size_class;
    4043             : }
    4044             : 
    4045           0 : static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
    4046             :                                         struct find_free_extent_ctl *ffe_ctl,
    4047             :                                         struct btrfs_space_info *space_info,
    4048             :                                         struct btrfs_key *ins)
    4049             : {
    4050             :         /*
    4051             :          * If our free space is heavily fragmented we may not be able to make
    4052             :          * big contiguous allocations, so instead of doing the expensive search
    4053             :          * for free space, simply return ENOSPC with our max_extent_size so we
    4054             :          * can go ahead and search for a more manageable chunk.
    4055             :          *
    4056             :          * If our max_extent_size is large enough for our allocation simply
    4057             :          * disable clustering since we will likely not be able to find enough
    4058             :          * space to create a cluster and induce latency trying.
    4059             :          */
    4060           0 :         if (space_info->max_extent_size) {
    4061           0 :                 spin_lock(&space_info->lock);
    4062           0 :                 if (space_info->max_extent_size &&
    4063           0 :                     ffe_ctl->num_bytes > space_info->max_extent_size) {
    4064           0 :                         ins->offset = space_info->max_extent_size;
    4065           0 :                         spin_unlock(&space_info->lock);
    4066           0 :                         return -ENOSPC;
    4067           0 :                 } else if (space_info->max_extent_size) {
    4068           0 :                         ffe_ctl->use_cluster = false;
    4069             :                 }
    4070           0 :                 spin_unlock(&space_info->lock);
    4071             :         }
    4072             : 
    4073           0 :         ffe_ctl->last_ptr = fetch_cluster_info(fs_info, space_info,
    4074             :                                                &ffe_ctl->empty_cluster);
    4075           0 :         if (ffe_ctl->last_ptr) {
    4076           0 :                 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
    4077             : 
    4078           0 :                 spin_lock(&last_ptr->lock);
    4079           0 :                 if (last_ptr->block_group)
    4080           0 :                         ffe_ctl->hint_byte = last_ptr->window_start;
    4081           0 :                 if (last_ptr->fragmented) {
    4082             :                         /*
    4083             :                          * We still set window_start so we can keep track of the
    4084             :                          * last place we found an allocation to try and save
    4085             :                          * some time.
    4086             :                          */
    4087           0 :                         ffe_ctl->hint_byte = last_ptr->window_start;
    4088           0 :                         ffe_ctl->use_cluster = false;
    4089             :                 }
    4090           0 :                 spin_unlock(&last_ptr->lock);
    4091             :         }
    4092             : 
    4093             :         return 0;
    4094             : }
    4095             : 
    4096           0 : static int prepare_allocation(struct btrfs_fs_info *fs_info,
    4097             :                               struct find_free_extent_ctl *ffe_ctl,
    4098             :                               struct btrfs_space_info *space_info,
    4099             :                               struct btrfs_key *ins)
    4100             : {
    4101           0 :         switch (ffe_ctl->policy) {
    4102           0 :         case BTRFS_EXTENT_ALLOC_CLUSTERED:
    4103           0 :                 return prepare_allocation_clustered(fs_info, ffe_ctl,
    4104             :                                                     space_info, ins);
    4105           0 :         case BTRFS_EXTENT_ALLOC_ZONED:
    4106           0 :                 if (ffe_ctl->for_treelog) {
    4107           0 :                         spin_lock(&fs_info->treelog_bg_lock);
    4108           0 :                         if (fs_info->treelog_bg)
    4109           0 :                                 ffe_ctl->hint_byte = fs_info->treelog_bg;
    4110           0 :                         spin_unlock(&fs_info->treelog_bg_lock);
    4111             :                 }
    4112           0 :                 if (ffe_ctl->for_data_reloc) {
    4113           0 :                         spin_lock(&fs_info->relocation_bg_lock);
    4114           0 :                         if (fs_info->data_reloc_bg)
    4115           0 :                                 ffe_ctl->hint_byte = fs_info->data_reloc_bg;
    4116           0 :                         spin_unlock(&fs_info->relocation_bg_lock);
    4117             :                 }
    4118             :                 return 0;
    4119           0 :         default:
    4120           0 :                 BUG();
    4121             :         }
    4122             : }
    4123             : 
    4124             : /*
    4125             :  * walks the btree of allocated extents and find a hole of a given size.
    4126             :  * The key ins is changed to record the hole:
    4127             :  * ins->objectid == start position
    4128             :  * ins->flags = BTRFS_EXTENT_ITEM_KEY
    4129             :  * ins->offset == the size of the hole.
    4130             :  * Any available blocks before search_start are skipped.
    4131             :  *
    4132             :  * If there is no suitable free space, we will record the max size of
    4133             :  * the free space extent currently.
    4134             :  *
    4135             :  * The overall logic and call chain:
    4136             :  *
    4137             :  * find_free_extent()
    4138             :  * |- Iterate through all block groups
    4139             :  * |  |- Get a valid block group
    4140             :  * |  |- Try to do clustered allocation in that block group
    4141             :  * |  |- Try to do unclustered allocation in that block group
    4142             :  * |  |- Check if the result is valid
    4143             :  * |  |  |- If valid, then exit
    4144             :  * |  |- Jump to next block group
    4145             :  * |
    4146             :  * |- Push harder to find free extents
    4147             :  *    |- If not found, re-iterate all block groups
    4148             :  */
    4149           0 : static noinline int find_free_extent(struct btrfs_root *root,
    4150             :                                      struct btrfs_key *ins,
    4151             :                                      struct find_free_extent_ctl *ffe_ctl)
    4152             : {
    4153           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    4154           0 :         int ret = 0;
    4155           0 :         int cache_block_group_error = 0;
    4156           0 :         struct btrfs_block_group *block_group = NULL;
    4157           0 :         struct btrfs_space_info *space_info;
    4158           0 :         bool full_search = false;
    4159             : 
    4160           0 :         WARN_ON(ffe_ctl->num_bytes < fs_info->sectorsize);
    4161             : 
    4162           0 :         ffe_ctl->search_start = 0;
    4163             :         /* For clustered allocation */
    4164           0 :         ffe_ctl->empty_cluster = 0;
    4165           0 :         ffe_ctl->last_ptr = NULL;
    4166           0 :         ffe_ctl->use_cluster = true;
    4167           0 :         ffe_ctl->have_caching_bg = false;
    4168           0 :         ffe_ctl->orig_have_caching_bg = false;
    4169           0 :         ffe_ctl->index = btrfs_bg_flags_to_raid_index(ffe_ctl->flags);
    4170           0 :         ffe_ctl->loop = 0;
    4171             :         /* For clustered allocation */
    4172           0 :         ffe_ctl->retry_clustered = false;
    4173           0 :         ffe_ctl->retry_unclustered = false;
    4174           0 :         ffe_ctl->cached = 0;
    4175           0 :         ffe_ctl->max_extent_size = 0;
    4176           0 :         ffe_ctl->total_free_space = 0;
    4177           0 :         ffe_ctl->found_offset = 0;
    4178           0 :         ffe_ctl->policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
    4179           0 :         ffe_ctl->size_class = btrfs_calc_block_group_size_class(ffe_ctl->num_bytes);
    4180             : 
    4181           0 :         if (btrfs_is_zoned(fs_info))
    4182           0 :                 ffe_ctl->policy = BTRFS_EXTENT_ALLOC_ZONED;
    4183             : 
    4184           0 :         ins->type = BTRFS_EXTENT_ITEM_KEY;
    4185           0 :         ins->objectid = 0;
    4186           0 :         ins->offset = 0;
    4187             : 
    4188           0 :         trace_find_free_extent(root, ffe_ctl);
    4189             : 
    4190           0 :         space_info = btrfs_find_space_info(fs_info, ffe_ctl->flags);
    4191           0 :         if (!space_info) {
    4192           0 :                 btrfs_err(fs_info, "No space info for %llu", ffe_ctl->flags);
    4193           0 :                 return -ENOSPC;
    4194             :         }
    4195             : 
    4196           0 :         ret = prepare_allocation(fs_info, ffe_ctl, space_info, ins);
    4197           0 :         if (ret < 0)
    4198             :                 return ret;
    4199             : 
    4200           0 :         ffe_ctl->search_start = max(ffe_ctl->search_start,
    4201             :                                     first_logical_byte(fs_info));
    4202           0 :         ffe_ctl->search_start = max(ffe_ctl->search_start, ffe_ctl->hint_byte);
    4203           0 :         if (ffe_ctl->search_start == ffe_ctl->hint_byte) {
    4204           0 :                 block_group = btrfs_lookup_block_group(fs_info,
    4205             :                                                        ffe_ctl->search_start);
    4206             :                 /*
    4207             :                  * we don't want to use the block group if it doesn't match our
    4208             :                  * allocation bits, or if its not cached.
    4209             :                  *
    4210             :                  * However if we are re-searching with an ideal block group
    4211             :                  * picked out then we don't care that the block group is cached.
    4212             :                  */
    4213           0 :                 if (block_group && block_group_bits(block_group, ffe_ctl->flags) &&
    4214           0 :                     block_group->cached != BTRFS_CACHE_NO) {
    4215           0 :                         down_read(&space_info->groups_sem);
    4216           0 :                         if (list_empty(&block_group->list) ||
    4217           0 :                             block_group->ro) {
    4218             :                                 /*
    4219             :                                  * someone is removing this block group,
    4220             :                                  * we can't jump into the have_block_group
    4221             :                                  * target because our list pointers are not
    4222             :                                  * valid
    4223             :                                  */
    4224           0 :                                 btrfs_put_block_group(block_group);
    4225           0 :                                 up_read(&space_info->groups_sem);
    4226             :                         } else {
    4227           0 :                                 ffe_ctl->index = btrfs_bg_flags_to_raid_index(
    4228             :                                                         block_group->flags);
    4229           0 :                                 btrfs_lock_block_group(block_group,
    4230             :                                                        ffe_ctl->delalloc);
    4231           0 :                                 ffe_ctl->hinted = true;
    4232           0 :                                 goto have_block_group;
    4233             :                         }
    4234           0 :                 } else if (block_group) {
    4235           0 :                         btrfs_put_block_group(block_group);
    4236             :                 }
    4237             :         }
    4238           0 : search:
    4239           0 :         trace_find_free_extent_search_loop(root, ffe_ctl);
    4240           0 :         ffe_ctl->have_caching_bg = false;
    4241           0 :         if (ffe_ctl->index == btrfs_bg_flags_to_raid_index(ffe_ctl->flags) ||
    4242             :             ffe_ctl->index == 0)
    4243           0 :                 full_search = true;
    4244           0 :         down_read(&space_info->groups_sem);
    4245           0 :         list_for_each_entry(block_group,
    4246             :                             &space_info->block_groups[ffe_ctl->index], list) {
    4247           0 :                 struct btrfs_block_group *bg_ret;
    4248             : 
    4249           0 :                 ffe_ctl->hinted = false;
    4250             :                 /* If the block group is read-only, we can skip it entirely. */
    4251           0 :                 if (unlikely(block_group->ro)) {
    4252           0 :                         if (ffe_ctl->for_treelog)
    4253           0 :                                 btrfs_clear_treelog_bg(block_group);
    4254           0 :                         if (ffe_ctl->for_data_reloc)
    4255           0 :                                 btrfs_clear_data_reloc_bg(block_group);
    4256           0 :                         continue;
    4257             :                 }
    4258             : 
    4259           0 :                 btrfs_grab_block_group(block_group, ffe_ctl->delalloc);
    4260           0 :                 ffe_ctl->search_start = block_group->start;
    4261             : 
    4262             :                 /*
    4263             :                  * this can happen if we end up cycling through all the
    4264             :                  * raid types, but we want to make sure we only allocate
    4265             :                  * for the proper type.
    4266             :                  */
    4267           0 :                 if (!block_group_bits(block_group, ffe_ctl->flags)) {
    4268           0 :                         u64 extra = BTRFS_BLOCK_GROUP_DUP |
    4269             :                                 BTRFS_BLOCK_GROUP_RAID1_MASK |
    4270             :                                 BTRFS_BLOCK_GROUP_RAID56_MASK |
    4271             :                                 BTRFS_BLOCK_GROUP_RAID10;
    4272             : 
    4273             :                         /*
    4274             :                          * if they asked for extra copies and this block group
    4275             :                          * doesn't provide them, bail.  This does allow us to
    4276             :                          * fill raid0 from raid1.
    4277             :                          */
    4278           0 :                         if ((ffe_ctl->flags & extra) && !(block_group->flags & extra))
    4279           0 :                                 goto loop;
    4280             : 
    4281             :                         /*
    4282             :                          * This block group has different flags than we want.
    4283             :                          * It's possible that we have MIXED_GROUP flag but no
    4284             :                          * block group is mixed.  Just skip such block group.
    4285             :                          */
    4286           0 :                         btrfs_release_block_group(block_group, ffe_ctl->delalloc);
    4287           0 :                         continue;
    4288             :                 }
    4289             : 
    4290           0 : have_block_group:
    4291           0 :                 trace_find_free_extent_have_block_group(root, ffe_ctl, block_group);
    4292           0 :                 ffe_ctl->cached = btrfs_block_group_done(block_group);
    4293           0 :                 if (unlikely(!ffe_ctl->cached)) {
    4294           0 :                         ffe_ctl->have_caching_bg = true;
    4295           0 :                         ret = btrfs_cache_block_group(block_group, false);
    4296             : 
    4297             :                         /*
    4298             :                          * If we get ENOMEM here or something else we want to
    4299             :                          * try other block groups, because it may not be fatal.
    4300             :                          * However if we can't find anything else we need to
    4301             :                          * save our return here so that we return the actual
    4302             :                          * error that caused problems, not ENOSPC.
    4303             :                          */
    4304           0 :                         if (ret < 0) {
    4305           0 :                                 if (!cache_block_group_error)
    4306           0 :                                         cache_block_group_error = ret;
    4307           0 :                                 ret = 0;
    4308           0 :                                 goto loop;
    4309             :                         }
    4310             :                         ret = 0;
    4311             :                 }
    4312             : 
    4313           0 :                 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
    4314           0 :                         goto loop;
    4315             : 
    4316           0 :                 if (!find_free_extent_check_size_class(ffe_ctl, block_group))
    4317           0 :                         goto loop;
    4318             : 
    4319           0 :                 bg_ret = NULL;
    4320           0 :                 ret = do_allocation(block_group, ffe_ctl, &bg_ret);
    4321           0 :                 if (ret == 0) {
    4322           0 :                         if (bg_ret && bg_ret != block_group) {
    4323           0 :                                 btrfs_release_block_group(block_group,
    4324             :                                                           ffe_ctl->delalloc);
    4325           0 :                                 block_group = bg_ret;
    4326             :                         }
    4327           0 :                 } else if (ret == -EAGAIN) {
    4328           0 :                         goto have_block_group;
    4329           0 :                 } else if (ret > 0) {
    4330           0 :                         goto loop;
    4331             :                 }
    4332             : 
    4333             :                 /* Checks */
    4334           0 :                 ffe_ctl->search_start = round_up(ffe_ctl->found_offset,
    4335             :                                                  fs_info->stripesize);
    4336             : 
    4337             :                 /* move on to the next group */
    4338           0 :                 if (ffe_ctl->search_start + ffe_ctl->num_bytes >
    4339           0 :                     block_group->start + block_group->length) {
    4340           0 :                         btrfs_add_free_space_unused(block_group,
    4341             :                                             ffe_ctl->found_offset,
    4342             :                                             ffe_ctl->num_bytes);
    4343           0 :                         goto loop;
    4344             :                 }
    4345             : 
    4346           0 :                 if (ffe_ctl->found_offset < ffe_ctl->search_start)
    4347           0 :                         btrfs_add_free_space_unused(block_group,
    4348             :                                         ffe_ctl->found_offset,
    4349             :                                         ffe_ctl->search_start - ffe_ctl->found_offset);
    4350             : 
    4351           0 :                 ret = btrfs_add_reserved_bytes(block_group, ffe_ctl->ram_bytes,
    4352             :                                                ffe_ctl->num_bytes,
    4353             :                                                ffe_ctl->delalloc,
    4354           0 :                                                ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS);
    4355           0 :                 if (ret == -EAGAIN) {
    4356           0 :                         btrfs_add_free_space_unused(block_group,
    4357             :                                         ffe_ctl->found_offset,
    4358             :                                         ffe_ctl->num_bytes);
    4359           0 :                         goto loop;
    4360             :                 }
    4361           0 :                 btrfs_inc_block_group_reservations(block_group);
    4362             : 
    4363             :                 /* we are all good, lets return */
    4364           0 :                 ins->objectid = ffe_ctl->search_start;
    4365           0 :                 ins->offset = ffe_ctl->num_bytes;
    4366             : 
    4367           0 :                 trace_btrfs_reserve_extent(block_group, ffe_ctl);
    4368           0 :                 btrfs_release_block_group(block_group, ffe_ctl->delalloc);
    4369           0 :                 break;
    4370           0 : loop:
    4371           0 :                 release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
    4372           0 :                 cond_resched();
    4373             :         }
    4374           0 :         up_read(&space_info->groups_sem);
    4375             : 
    4376           0 :         ret = find_free_extent_update_loop(fs_info, ins, ffe_ctl, full_search);
    4377           0 :         if (ret > 0)
    4378           0 :                 goto search;
    4379             : 
    4380           0 :         if (ret == -ENOSPC && !cache_block_group_error) {
    4381             :                 /*
    4382             :                  * Use ffe_ctl->total_free_space as fallback if we can't find
    4383             :                  * any contiguous hole.
    4384             :                  */
    4385           0 :                 if (!ffe_ctl->max_extent_size)
    4386           0 :                         ffe_ctl->max_extent_size = ffe_ctl->total_free_space;
    4387           0 :                 spin_lock(&space_info->lock);
    4388           0 :                 space_info->max_extent_size = ffe_ctl->max_extent_size;
    4389           0 :                 spin_unlock(&space_info->lock);
    4390           0 :                 ins->offset = ffe_ctl->max_extent_size;
    4391           0 :         } else if (ret == -ENOSPC) {
    4392           0 :                 ret = cache_block_group_error;
    4393             :         }
    4394             :         return ret;
    4395             : }
    4396             : 
    4397             : /*
    4398             :  * btrfs_reserve_extent - entry point to the extent allocator. Tries to find a
    4399             :  *                        hole that is at least as big as @num_bytes.
    4400             :  *
    4401             :  * @root           -    The root that will contain this extent
    4402             :  *
    4403             :  * @ram_bytes      -    The amount of space in ram that @num_bytes take. This
    4404             :  *                      is used for accounting purposes. This value differs
    4405             :  *                      from @num_bytes only in the case of compressed extents.
    4406             :  *
    4407             :  * @num_bytes      -    Number of bytes to allocate on-disk.
    4408             :  *
    4409             :  * @min_alloc_size -    Indicates the minimum amount of space that the
    4410             :  *                      allocator should try to satisfy. In some cases
    4411             :  *                      @num_bytes may be larger than what is required and if
    4412             :  *                      the filesystem is fragmented then allocation fails.
    4413             :  *                      However, the presence of @min_alloc_size gives a
    4414             :  *                      chance to try and satisfy the smaller allocation.
    4415             :  *
    4416             :  * @empty_size     -    A hint that you plan on doing more COW. This is the
    4417             :  *                      size in bytes the allocator should try to find free
    4418             :  *                      next to the block it returns.  This is just a hint and
    4419             :  *                      may be ignored by the allocator.
    4420             :  *
    4421             :  * @hint_byte      -    Hint to the allocator to start searching above the byte
    4422             :  *                      address passed. It might be ignored.
    4423             :  *
    4424             :  * @ins            -    This key is modified to record the found hole. It will
    4425             :  *                      have the following values:
    4426             :  *                      ins->objectid == start position
    4427             :  *                      ins->flags = BTRFS_EXTENT_ITEM_KEY
    4428             :  *                      ins->offset == the size of the hole.
    4429             :  *
    4430             :  * @is_data        -    Boolean flag indicating whether an extent is
    4431             :  *                      allocated for data (true) or metadata (false)
    4432             :  *
    4433             :  * @delalloc       -    Boolean flag indicating whether this allocation is for
    4434             :  *                      delalloc or not. If 'true' data_rwsem of block groups
    4435             :  *                      is going to be acquired.
    4436             :  *
    4437             :  *
    4438             :  * Returns 0 when an allocation succeeded or < 0 when an error occurred. In
    4439             :  * case -ENOSPC is returned then @ins->offset will contain the size of the
    4440             :  * largest available hole the allocator managed to find.
    4441             :  */
    4442           0 : int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
    4443             :                          u64 num_bytes, u64 min_alloc_size,
    4444             :                          u64 empty_size, u64 hint_byte,
    4445             :                          struct btrfs_key *ins, int is_data, int delalloc)
    4446             : {
    4447           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    4448           0 :         struct find_free_extent_ctl ffe_ctl = {};
    4449           0 :         bool final_tried = num_bytes == min_alloc_size;
    4450           0 :         u64 flags;
    4451           0 :         int ret;
    4452           0 :         bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
    4453           0 :         bool for_data_reloc = (btrfs_is_data_reloc_root(root) && is_data);
    4454             : 
    4455           0 :         flags = get_alloc_profile_by_root(root, is_data);
    4456           0 : again:
    4457           0 :         WARN_ON(num_bytes < fs_info->sectorsize);
    4458             : 
    4459           0 :         ffe_ctl.ram_bytes = ram_bytes;
    4460           0 :         ffe_ctl.num_bytes = num_bytes;
    4461           0 :         ffe_ctl.min_alloc_size = min_alloc_size;
    4462           0 :         ffe_ctl.empty_size = empty_size;
    4463           0 :         ffe_ctl.flags = flags;
    4464           0 :         ffe_ctl.delalloc = delalloc;
    4465           0 :         ffe_ctl.hint_byte = hint_byte;
    4466           0 :         ffe_ctl.for_treelog = for_treelog;
    4467           0 :         ffe_ctl.for_data_reloc = for_data_reloc;
    4468             : 
    4469           0 :         ret = find_free_extent(root, ins, &ffe_ctl);
    4470           0 :         if (!ret && !is_data) {
    4471           0 :                 btrfs_dec_block_group_reservations(fs_info, ins->objectid);
    4472           0 :         } else if (ret == -ENOSPC) {
    4473           0 :                 if (!final_tried && ins->offset) {
    4474           0 :                         num_bytes = min(num_bytes >> 1, ins->offset);
    4475           0 :                         num_bytes = round_down(num_bytes,
    4476             :                                                fs_info->sectorsize);
    4477           0 :                         num_bytes = max(num_bytes, min_alloc_size);
    4478           0 :                         ram_bytes = num_bytes;
    4479           0 :                         if (num_bytes == min_alloc_size)
    4480           0 :                                 final_tried = true;
    4481           0 :                         goto again;
    4482           0 :                 } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
    4483           0 :                         struct btrfs_space_info *sinfo;
    4484             : 
    4485           0 :                         sinfo = btrfs_find_space_info(fs_info, flags);
    4486           0 :                         btrfs_err(fs_info,
    4487             :         "allocation failed flags %llu, wanted %llu tree-log %d, relocation: %d",
    4488             :                                   flags, num_bytes, for_treelog, for_data_reloc);
    4489           0 :                         if (sinfo)
    4490           0 :                                 btrfs_dump_space_info(fs_info, sinfo,
    4491             :                                                       num_bytes, 1);
    4492             :                 }
    4493             :         }
    4494             : 
    4495           0 :         return ret;
    4496             : }
    4497             : 
    4498           0 : int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
    4499             :                                u64 start, u64 len, int delalloc)
    4500             : {
    4501           0 :         struct btrfs_block_group *cache;
    4502             : 
    4503           0 :         cache = btrfs_lookup_block_group(fs_info, start);
    4504           0 :         if (!cache) {
    4505           0 :                 btrfs_err(fs_info, "Unable to find block group for %llu",
    4506             :                           start);
    4507           0 :                 return -ENOSPC;
    4508             :         }
    4509             : 
    4510           0 :         btrfs_add_free_space(cache, start, len);
    4511           0 :         btrfs_free_reserved_bytes(cache, len, delalloc);
    4512           0 :         trace_btrfs_reserved_extent_free(fs_info, start, len);
    4513             : 
    4514           0 :         btrfs_put_block_group(cache);
    4515           0 :         return 0;
    4516             : }
    4517             : 
    4518           0 : int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
    4519             :                               u64 len)
    4520             : {
    4521           0 :         struct btrfs_block_group *cache;
    4522           0 :         int ret = 0;
    4523             : 
    4524           0 :         cache = btrfs_lookup_block_group(trans->fs_info, start);
    4525           0 :         if (!cache) {
    4526           0 :                 btrfs_err(trans->fs_info, "unable to find block group for %llu",
    4527             :                           start);
    4528           0 :                 return -ENOSPC;
    4529             :         }
    4530             : 
    4531           0 :         ret = pin_down_extent(trans, cache, start, len, 1);
    4532           0 :         btrfs_put_block_group(cache);
    4533           0 :         return ret;
    4534             : }
    4535             : 
    4536           0 : static int alloc_reserved_extent(struct btrfs_trans_handle *trans, u64 bytenr,
    4537             :                                  u64 num_bytes)
    4538             : {
    4539           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    4540           0 :         int ret;
    4541             : 
    4542           0 :         ret = remove_from_free_space_tree(trans, bytenr, num_bytes);
    4543           0 :         if (ret)
    4544             :                 return ret;
    4545             : 
    4546           0 :         ret = btrfs_update_block_group(trans, bytenr, num_bytes, true);
    4547           0 :         if (ret) {
    4548           0 :                 ASSERT(!ret);
    4549           0 :                 btrfs_err(fs_info, "update block group failed for %llu %llu",
    4550             :                           bytenr, num_bytes);
    4551           0 :                 return ret;
    4552             :         }
    4553             : 
    4554           0 :         trace_btrfs_reserved_extent_alloc(fs_info, bytenr, num_bytes);
    4555           0 :         return 0;
    4556             : }
    4557             : 
    4558           0 : static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
    4559             :                                       u64 parent, u64 root_objectid,
    4560             :                                       u64 flags, u64 owner, u64 offset,
    4561             :                                       struct btrfs_key *ins, int ref_mod)
    4562             : {
    4563           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    4564           0 :         struct btrfs_root *extent_root;
    4565           0 :         int ret;
    4566           0 :         struct btrfs_extent_item *extent_item;
    4567           0 :         struct btrfs_extent_inline_ref *iref;
    4568           0 :         struct btrfs_path *path;
    4569           0 :         struct extent_buffer *leaf;
    4570           0 :         int type;
    4571           0 :         u32 size;
    4572             : 
    4573           0 :         if (parent > 0)
    4574             :                 type = BTRFS_SHARED_DATA_REF_KEY;
    4575             :         else
    4576           0 :                 type = BTRFS_EXTENT_DATA_REF_KEY;
    4577             : 
    4578           0 :         size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
    4579             : 
    4580           0 :         path = btrfs_alloc_path();
    4581           0 :         if (!path)
    4582             :                 return -ENOMEM;
    4583             : 
    4584           0 :         extent_root = btrfs_extent_root(fs_info, ins->objectid);
    4585           0 :         ret = btrfs_insert_empty_item(trans, extent_root, path, ins, size);
    4586           0 :         if (ret) {
    4587           0 :                 btrfs_free_path(path);
    4588           0 :                 return ret;
    4589             :         }
    4590             : 
    4591           0 :         leaf = path->nodes[0];
    4592           0 :         extent_item = btrfs_item_ptr(leaf, path->slots[0],
    4593             :                                      struct btrfs_extent_item);
    4594           0 :         btrfs_set_extent_refs(leaf, extent_item, ref_mod);
    4595           0 :         btrfs_set_extent_generation(leaf, extent_item, trans->transid);
    4596           0 :         btrfs_set_extent_flags(leaf, extent_item,
    4597             :                                flags | BTRFS_EXTENT_FLAG_DATA);
    4598             : 
    4599           0 :         iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
    4600           0 :         btrfs_set_extent_inline_ref_type(leaf, iref, type);
    4601           0 :         if (parent > 0) {
    4602           0 :                 struct btrfs_shared_data_ref *ref;
    4603           0 :                 ref = (struct btrfs_shared_data_ref *)(iref + 1);
    4604           0 :                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
    4605           0 :                 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
    4606             :         } else {
    4607           0 :                 struct btrfs_extent_data_ref *ref;
    4608           0 :                 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
    4609           0 :                 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
    4610           0 :                 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
    4611           0 :                 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
    4612           0 :                 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
    4613             :         }
    4614             : 
    4615           0 :         btrfs_mark_buffer_dirty(path->nodes[0]);
    4616           0 :         btrfs_free_path(path);
    4617             : 
    4618           0 :         return alloc_reserved_extent(trans, ins->objectid, ins->offset);
    4619             : }
    4620             : 
    4621           0 : static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
    4622             :                                      struct btrfs_delayed_ref_node *node,
    4623             :                                      struct btrfs_delayed_extent_op *extent_op)
    4624             : {
    4625           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    4626           0 :         struct btrfs_root *extent_root;
    4627           0 :         int ret;
    4628           0 :         struct btrfs_extent_item *extent_item;
    4629           0 :         struct btrfs_key extent_key;
    4630           0 :         struct btrfs_tree_block_info *block_info;
    4631           0 :         struct btrfs_extent_inline_ref *iref;
    4632           0 :         struct btrfs_path *path;
    4633           0 :         struct extent_buffer *leaf;
    4634           0 :         struct btrfs_delayed_tree_ref *ref;
    4635           0 :         u32 size = sizeof(*extent_item) + sizeof(*iref);
    4636           0 :         u64 flags = extent_op->flags_to_set;
    4637           0 :         bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
    4638             : 
    4639           0 :         ref = btrfs_delayed_node_to_tree_ref(node);
    4640             : 
    4641           0 :         extent_key.objectid = node->bytenr;
    4642           0 :         if (skinny_metadata) {
    4643           0 :                 extent_key.offset = ref->level;
    4644           0 :                 extent_key.type = BTRFS_METADATA_ITEM_KEY;
    4645             :         } else {
    4646           0 :                 extent_key.offset = node->num_bytes;
    4647           0 :                 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
    4648           0 :                 size += sizeof(*block_info);
    4649             :         }
    4650             : 
    4651           0 :         path = btrfs_alloc_path();
    4652           0 :         if (!path)
    4653             :                 return -ENOMEM;
    4654             : 
    4655           0 :         extent_root = btrfs_extent_root(fs_info, extent_key.objectid);
    4656           0 :         ret = btrfs_insert_empty_item(trans, extent_root, path, &extent_key,
    4657             :                                       size);
    4658           0 :         if (ret) {
    4659           0 :                 btrfs_free_path(path);
    4660           0 :                 return ret;
    4661             :         }
    4662             : 
    4663           0 :         leaf = path->nodes[0];
    4664           0 :         extent_item = btrfs_item_ptr(leaf, path->slots[0],
    4665             :                                      struct btrfs_extent_item);
    4666           0 :         btrfs_set_extent_refs(leaf, extent_item, 1);
    4667           0 :         btrfs_set_extent_generation(leaf, extent_item, trans->transid);
    4668           0 :         btrfs_set_extent_flags(leaf, extent_item,
    4669             :                                flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
    4670             : 
    4671           0 :         if (skinny_metadata) {
    4672           0 :                 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
    4673             :         } else {
    4674           0 :                 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
    4675           0 :                 btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
    4676           0 :                 btrfs_set_tree_block_level(leaf, block_info, ref->level);
    4677           0 :                 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
    4678             :         }
    4679             : 
    4680           0 :         if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
    4681           0 :                 btrfs_set_extent_inline_ref_type(leaf, iref,
    4682             :                                                  BTRFS_SHARED_BLOCK_REF_KEY);
    4683           0 :                 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent);
    4684             :         } else {
    4685           0 :                 btrfs_set_extent_inline_ref_type(leaf, iref,
    4686             :                                                  BTRFS_TREE_BLOCK_REF_KEY);
    4687           0 :                 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
    4688             :         }
    4689             : 
    4690           0 :         btrfs_mark_buffer_dirty(leaf);
    4691           0 :         btrfs_free_path(path);
    4692             : 
    4693           0 :         return alloc_reserved_extent(trans, node->bytenr, fs_info->nodesize);
    4694             : }
    4695             : 
    4696           0 : int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
    4697             :                                      struct btrfs_root *root, u64 owner,
    4698             :                                      u64 offset, u64 ram_bytes,
    4699             :                                      struct btrfs_key *ins)
    4700             : {
    4701           0 :         struct btrfs_ref generic_ref = { 0 };
    4702             : 
    4703           0 :         BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
    4704             : 
    4705           0 :         btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
    4706             :                                ins->objectid, ins->offset, 0);
    4707           0 :         btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner,
    4708             :                             offset, 0, false);
    4709           0 :         btrfs_ref_tree_mod(root->fs_info, &generic_ref);
    4710             : 
    4711           0 :         return btrfs_add_delayed_data_ref(trans, &generic_ref, ram_bytes);
    4712             : }
    4713             : 
    4714             : /*
    4715             :  * this is used by the tree logging recovery code.  It records that
    4716             :  * an extent has been allocated and makes sure to clear the free
    4717             :  * space cache bits as well
    4718             :  */
    4719           0 : int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
    4720             :                                    u64 root_objectid, u64 owner, u64 offset,
    4721             :                                    struct btrfs_key *ins)
    4722             : {
    4723           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    4724           0 :         int ret;
    4725           0 :         struct btrfs_block_group *block_group;
    4726           0 :         struct btrfs_space_info *space_info;
    4727             : 
    4728             :         /*
    4729             :          * Mixed block groups will exclude before processing the log so we only
    4730             :          * need to do the exclude dance if this fs isn't mixed.
    4731             :          */
    4732           0 :         if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
    4733           0 :                 ret = __exclude_logged_extent(fs_info, ins->objectid,
    4734             :                                               ins->offset);
    4735           0 :                 if (ret)
    4736             :                         return ret;
    4737             :         }
    4738             : 
    4739           0 :         block_group = btrfs_lookup_block_group(fs_info, ins->objectid);
    4740           0 :         if (!block_group)
    4741             :                 return -EINVAL;
    4742             : 
    4743           0 :         space_info = block_group->space_info;
    4744           0 :         spin_lock(&space_info->lock);
    4745           0 :         spin_lock(&block_group->lock);
    4746           0 :         space_info->bytes_reserved += ins->offset;
    4747           0 :         block_group->reserved += ins->offset;
    4748           0 :         spin_unlock(&block_group->lock);
    4749           0 :         spin_unlock(&space_info->lock);
    4750             : 
    4751           0 :         ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
    4752             :                                          offset, ins, 1);
    4753           0 :         if (ret)
    4754           0 :                 btrfs_pin_extent(trans, ins->objectid, ins->offset, 1);
    4755           0 :         btrfs_put_block_group(block_group);
    4756           0 :         return ret;
    4757             : }
    4758             : 
    4759             : static struct extent_buffer *
    4760           0 : btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
    4761             :                       u64 bytenr, int level, u64 owner,
    4762             :                       enum btrfs_lock_nesting nest)
    4763             : {
    4764           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    4765           0 :         struct extent_buffer *buf;
    4766           0 :         u64 lockdep_owner = owner;
    4767             : 
    4768           0 :         buf = btrfs_find_create_tree_block(fs_info, bytenr, owner, level);
    4769           0 :         if (IS_ERR(buf))
    4770             :                 return buf;
    4771             : 
    4772             :         /*
    4773             :          * Extra safety check in case the extent tree is corrupted and extent
    4774             :          * allocator chooses to use a tree block which is already used and
    4775             :          * locked.
    4776             :          */
    4777           0 :         if (buf->lock_owner == current->pid) {
    4778           0 :                 btrfs_err_rl(fs_info,
    4779             : "tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
    4780             :                         buf->start, btrfs_header_owner(buf), current->pid);
    4781           0 :                 free_extent_buffer(buf);
    4782           0 :                 return ERR_PTR(-EUCLEAN);
    4783             :         }
    4784             : 
    4785             :         /*
    4786             :          * The reloc trees are just snapshots, so we need them to appear to be
    4787             :          * just like any other fs tree WRT lockdep.
    4788             :          *
    4789             :          * The exception however is in replace_path() in relocation, where we
    4790             :          * hold the lock on the original fs root and then search for the reloc
    4791             :          * root.  At that point we need to make sure any reloc root buffers are
    4792             :          * set to the BTRFS_TREE_RELOC_OBJECTID lockdep class in order to make
    4793             :          * lockdep happy.
    4794             :          */
    4795           0 :         if (lockdep_owner == BTRFS_TREE_RELOC_OBJECTID &&
    4796           0 :             !test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
    4797             :                 lockdep_owner = BTRFS_FS_TREE_OBJECTID;
    4798             : 
    4799             :         /* btrfs_clear_buffer_dirty() accesses generation field. */
    4800           0 :         btrfs_set_header_generation(buf, trans->transid);
    4801             : 
    4802             :         /*
    4803             :          * This needs to stay, because we could allocate a freed block from an
    4804             :          * old tree into a new tree, so we need to make sure this new block is
    4805             :          * set to the appropriate level and owner.
    4806             :          */
    4807           0 :         btrfs_set_buffer_lockdep_class(lockdep_owner, buf, level);
    4808             : 
    4809           0 :         __btrfs_tree_lock(buf, nest);
    4810           0 :         btrfs_clear_buffer_dirty(trans, buf);
    4811           0 :         clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
    4812           0 :         clear_bit(EXTENT_BUFFER_NO_CHECK, &buf->bflags);
    4813             : 
    4814           0 :         set_extent_buffer_uptodate(buf);
    4815             : 
    4816           0 :         memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
    4817           0 :         btrfs_set_header_level(buf, level);
    4818           0 :         btrfs_set_header_bytenr(buf, buf->start);
    4819           0 :         btrfs_set_header_generation(buf, trans->transid);
    4820           0 :         btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
    4821           0 :         btrfs_set_header_owner(buf, owner);
    4822           0 :         write_extent_buffer_fsid(buf, fs_info->fs_devices->metadata_uuid);
    4823           0 :         write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
    4824           0 :         if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
    4825           0 :                 buf->log_index = root->log_transid % 2;
    4826             :                 /*
    4827             :                  * we allow two log transactions at a time, use different
    4828             :                  * EXTENT bit to differentiate dirty pages.
    4829             :                  */
    4830           0 :                 if (buf->log_index == 0)
    4831           0 :                         set_extent_bit(&root->dirty_log_pages, buf->start,
    4832           0 :                                        buf->start + buf->len - 1,
    4833             :                                        EXTENT_DIRTY, NULL);
    4834             :                 else
    4835           0 :                         set_extent_bit(&root->dirty_log_pages, buf->start,
    4836           0 :                                        buf->start + buf->len - 1,
    4837             :                                        EXTENT_NEW, NULL);
    4838             :         } else {
    4839           0 :                 buf->log_index = -1;
    4840           0 :                 set_extent_bit(&trans->transaction->dirty_pages, buf->start,
    4841           0 :                                buf->start + buf->len - 1, EXTENT_DIRTY, NULL);
    4842             :         }
    4843             :         /* this returns a buffer locked for blocking */
    4844             :         return buf;
    4845             : }
    4846             : 
    4847             : /*
    4848             :  * finds a free extent and does all the dirty work required for allocation
    4849             :  * returns the tree buffer or an ERR_PTR on error.
    4850             :  */
    4851           0 : struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
    4852             :                                              struct btrfs_root *root,
    4853             :                                              u64 parent, u64 root_objectid,
    4854             :                                              const struct btrfs_disk_key *key,
    4855             :                                              int level, u64 hint,
    4856             :                                              u64 empty_size,
    4857             :                                              enum btrfs_lock_nesting nest)
    4858             : {
    4859           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    4860           0 :         struct btrfs_key ins;
    4861           0 :         struct btrfs_block_rsv *block_rsv;
    4862           0 :         struct extent_buffer *buf;
    4863           0 :         struct btrfs_delayed_extent_op *extent_op;
    4864           0 :         struct btrfs_ref generic_ref = { 0 };
    4865           0 :         u64 flags = 0;
    4866           0 :         int ret;
    4867           0 :         u32 blocksize = fs_info->nodesize;
    4868           0 :         bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
    4869             : 
    4870             : #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
    4871             :         if (btrfs_is_testing(fs_info)) {
    4872             :                 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
    4873             :                                             level, root_objectid, nest);
    4874             :                 if (!IS_ERR(buf))
    4875             :                         root->alloc_bytenr += blocksize;
    4876             :                 return buf;
    4877             :         }
    4878             : #endif
    4879             : 
    4880           0 :         block_rsv = btrfs_use_block_rsv(trans, root, blocksize);
    4881           0 :         if (IS_ERR(block_rsv))
    4882             :                 return ERR_CAST(block_rsv);
    4883             : 
    4884           0 :         ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
    4885             :                                    empty_size, hint, &ins, 0, 0);
    4886           0 :         if (ret)
    4887           0 :                 goto out_unuse;
    4888             : 
    4889           0 :         buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
    4890             :                                     root_objectid, nest);
    4891           0 :         if (IS_ERR(buf)) {
    4892           0 :                 ret = PTR_ERR(buf);
    4893           0 :                 goto out_free_reserved;
    4894             :         }
    4895             : 
    4896           0 :         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
    4897           0 :                 if (parent == 0)
    4898           0 :                         parent = ins.objectid;
    4899             :                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
    4900             :         } else
    4901           0 :                 BUG_ON(parent > 0);
    4902             : 
    4903           0 :         if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
    4904           0 :                 extent_op = btrfs_alloc_delayed_extent_op();
    4905           0 :                 if (!extent_op) {
    4906           0 :                         ret = -ENOMEM;
    4907           0 :                         goto out_free_buf;
    4908             :                 }
    4909           0 :                 if (key)
    4910           0 :                         memcpy(&extent_op->key, key, sizeof(extent_op->key));
    4911             :                 else
    4912           0 :                         memset(&extent_op->key, 0, sizeof(extent_op->key));
    4913           0 :                 extent_op->flags_to_set = flags;
    4914           0 :                 extent_op->update_key = skinny_metadata ? false : true;
    4915           0 :                 extent_op->update_flags = true;
    4916           0 :                 extent_op->level = level;
    4917             : 
    4918           0 :                 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
    4919             :                                        ins.objectid, ins.offset, parent);
    4920           0 :                 btrfs_init_tree_ref(&generic_ref, level, root_objectid,
    4921             :                                     root->root_key.objectid, false);
    4922           0 :                 btrfs_ref_tree_mod(fs_info, &generic_ref);
    4923           0 :                 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op);
    4924           0 :                 if (ret)
    4925           0 :                         goto out_free_delayed;
    4926             :         }
    4927             :         return buf;
    4928             : 
    4929             : out_free_delayed:
    4930           0 :         btrfs_free_delayed_extent_op(extent_op);
    4931           0 : out_free_buf:
    4932           0 :         btrfs_tree_unlock(buf);
    4933           0 :         free_extent_buffer(buf);
    4934           0 : out_free_reserved:
    4935           0 :         btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
    4936           0 : out_unuse:
    4937           0 :         btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize);
    4938           0 :         return ERR_PTR(ret);
    4939             : }
    4940             : 
    4941             : struct walk_control {
    4942             :         u64 refs[BTRFS_MAX_LEVEL];
    4943             :         u64 flags[BTRFS_MAX_LEVEL];
    4944             :         struct btrfs_key update_progress;
    4945             :         struct btrfs_key drop_progress;
    4946             :         int drop_level;
    4947             :         int stage;
    4948             :         int level;
    4949             :         int shared_level;
    4950             :         int update_ref;
    4951             :         int keep_locks;
    4952             :         int reada_slot;
    4953             :         int reada_count;
    4954             :         int restarted;
    4955             : };
    4956             : 
    4957             : #define DROP_REFERENCE  1
    4958             : #define UPDATE_BACKREF  2
    4959             : 
    4960           0 : static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
    4961             :                                      struct btrfs_root *root,
    4962             :                                      struct walk_control *wc,
    4963             :                                      struct btrfs_path *path)
    4964             : {
    4965           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    4966           0 :         u64 bytenr;
    4967           0 :         u64 generation;
    4968           0 :         u64 refs;
    4969           0 :         u64 flags;
    4970           0 :         u32 nritems;
    4971           0 :         struct btrfs_key key;
    4972           0 :         struct extent_buffer *eb;
    4973           0 :         int ret;
    4974           0 :         int slot;
    4975           0 :         int nread = 0;
    4976             : 
    4977           0 :         if (path->slots[wc->level] < wc->reada_slot) {
    4978           0 :                 wc->reada_count = wc->reada_count * 2 / 3;
    4979           0 :                 wc->reada_count = max(wc->reada_count, 2);
    4980             :         } else {
    4981           0 :                 wc->reada_count = wc->reada_count * 3 / 2;
    4982           0 :                 wc->reada_count = min_t(int, wc->reada_count,
    4983             :                                         BTRFS_NODEPTRS_PER_BLOCK(fs_info));
    4984             :         }
    4985             : 
    4986           0 :         eb = path->nodes[wc->level];
    4987           0 :         nritems = btrfs_header_nritems(eb);
    4988             : 
    4989           0 :         for (slot = path->slots[wc->level]; slot < nritems; slot++) {
    4990           0 :                 if (nread >= wc->reada_count)
    4991             :                         break;
    4992             : 
    4993           0 :                 cond_resched();
    4994           0 :                 bytenr = btrfs_node_blockptr(eb, slot);
    4995           0 :                 generation = btrfs_node_ptr_generation(eb, slot);
    4996             : 
    4997           0 :                 if (slot == path->slots[wc->level])
    4998           0 :                         goto reada;
    4999             : 
    5000           0 :                 if (wc->stage == UPDATE_BACKREF &&
    5001           0 :                     generation <= root->root_key.offset)
    5002           0 :                         continue;
    5003             : 
    5004             :                 /* We don't lock the tree block, it's OK to be racy here */
    5005           0 :                 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
    5006           0 :                                                wc->level - 1, 1, &refs,
    5007             :                                                &flags);
    5008             :                 /* We don't care about errors in readahead. */
    5009           0 :                 if (ret < 0)
    5010           0 :                         continue;
    5011           0 :                 BUG_ON(refs == 0);
    5012             : 
    5013           0 :                 if (wc->stage == DROP_REFERENCE) {
    5014           0 :                         if (refs == 1)
    5015           0 :                                 goto reada;
    5016             : 
    5017           0 :                         if (wc->level == 1 &&
    5018           0 :                             (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
    5019           0 :                                 continue;
    5020           0 :                         if (!wc->update_ref ||
    5021           0 :                             generation <= root->root_key.offset)
    5022           0 :                                 continue;
    5023           0 :                         btrfs_node_key_to_cpu(eb, &key, slot);
    5024           0 :                         ret = btrfs_comp_cpu_keys(&key,
    5025           0 :                                                   &wc->update_progress);
    5026           0 :                         if (ret < 0)
    5027           0 :                                 continue;
    5028             :                 } else {
    5029           0 :                         if (wc->level == 1 &&
    5030           0 :                             (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
    5031           0 :                                 continue;
    5032             :                 }
    5033           0 : reada:
    5034           0 :                 btrfs_readahead_node_child(eb, slot);
    5035           0 :                 nread++;
    5036             :         }
    5037           0 :         wc->reada_slot = slot;
    5038           0 : }
    5039             : 
    5040             : /*
    5041             :  * helper to process tree block while walking down the tree.
    5042             :  *
    5043             :  * when wc->stage == UPDATE_BACKREF, this function updates
    5044             :  * back refs for pointers in the block.
    5045             :  *
    5046             :  * NOTE: return value 1 means we should stop walking down.
    5047             :  */
    5048           0 : static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
    5049             :                                    struct btrfs_root *root,
    5050             :                                    struct btrfs_path *path,
    5051             :                                    struct walk_control *wc, int lookup_info)
    5052             : {
    5053           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5054           0 :         int level = wc->level;
    5055           0 :         struct extent_buffer *eb = path->nodes[level];
    5056           0 :         u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
    5057           0 :         int ret;
    5058             : 
    5059           0 :         if (wc->stage == UPDATE_BACKREF &&
    5060           0 :             btrfs_header_owner(eb) != root->root_key.objectid)
    5061             :                 return 1;
    5062             : 
    5063             :         /*
    5064             :          * when reference count of tree block is 1, it won't increase
    5065             :          * again. once full backref flag is set, we never clear it.
    5066             :          */
    5067           0 :         if (lookup_info &&
    5068           0 :             ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
    5069           0 :              (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
    5070           0 :                 BUG_ON(!path->locks[level]);
    5071           0 :                 ret = btrfs_lookup_extent_info(trans, fs_info,
    5072             :                                                eb->start, level, 1,
    5073             :                                                &wc->refs[level],
    5074             :                                                &wc->flags[level]);
    5075           0 :                 BUG_ON(ret == -ENOMEM);
    5076           0 :                 if (ret)
    5077             :                         return ret;
    5078           0 :                 BUG_ON(wc->refs[level] == 0);
    5079             :         }
    5080             : 
    5081           0 :         if (wc->stage == DROP_REFERENCE) {
    5082           0 :                 if (wc->refs[level] > 1)
    5083             :                         return 1;
    5084             : 
    5085           0 :                 if (path->locks[level] && !wc->keep_locks) {
    5086           0 :                         btrfs_tree_unlock_rw(eb, path->locks[level]);
    5087           0 :                         path->locks[level] = 0;
    5088             :                 }
    5089           0 :                 return 0;
    5090             :         }
    5091             : 
    5092             :         /* wc->stage == UPDATE_BACKREF */
    5093           0 :         if (!(wc->flags[level] & flag)) {
    5094           0 :                 BUG_ON(!path->locks[level]);
    5095           0 :                 ret = btrfs_inc_ref(trans, root, eb, 1);
    5096           0 :                 BUG_ON(ret); /* -ENOMEM */
    5097           0 :                 ret = btrfs_dec_ref(trans, root, eb, 0);
    5098           0 :                 BUG_ON(ret); /* -ENOMEM */
    5099           0 :                 ret = btrfs_set_disk_extent_flags(trans, eb, flag);
    5100           0 :                 BUG_ON(ret); /* -ENOMEM */
    5101           0 :                 wc->flags[level] |= flag;
    5102             :         }
    5103             : 
    5104             :         /*
    5105             :          * the block is shared by multiple trees, so it's not good to
    5106             :          * keep the tree lock
    5107             :          */
    5108           0 :         if (path->locks[level] && level > 0) {
    5109           0 :                 btrfs_tree_unlock_rw(eb, path->locks[level]);
    5110           0 :                 path->locks[level] = 0;
    5111             :         }
    5112             :         return 0;
    5113             : }
    5114             : 
    5115             : /*
    5116             :  * This is used to verify a ref exists for this root to deal with a bug where we
    5117             :  * would have a drop_progress key that hadn't been updated properly.
    5118             :  */
    5119           0 : static int check_ref_exists(struct btrfs_trans_handle *trans,
    5120             :                             struct btrfs_root *root, u64 bytenr, u64 parent,
    5121             :                             int level)
    5122             : {
    5123           0 :         struct btrfs_path *path;
    5124           0 :         struct btrfs_extent_inline_ref *iref;
    5125           0 :         int ret;
    5126             : 
    5127           0 :         path = btrfs_alloc_path();
    5128           0 :         if (!path)
    5129             :                 return -ENOMEM;
    5130             : 
    5131           0 :         ret = lookup_extent_backref(trans, path, &iref, bytenr,
    5132           0 :                                     root->fs_info->nodesize, parent,
    5133             :                                     root->root_key.objectid, level, 0);
    5134           0 :         btrfs_free_path(path);
    5135           0 :         if (ret == -ENOENT)
    5136             :                 return 0;
    5137           0 :         if (ret < 0)
    5138           0 :                 return ret;
    5139             :         return 1;
    5140             : }
    5141             : 
    5142             : /*
    5143             :  * helper to process tree block pointer.
    5144             :  *
    5145             :  * when wc->stage == DROP_REFERENCE, this function checks
    5146             :  * reference count of the block pointed to. if the block
    5147             :  * is shared and we need update back refs for the subtree
    5148             :  * rooted at the block, this function changes wc->stage to
    5149             :  * UPDATE_BACKREF. if the block is shared and there is no
    5150             :  * need to update back, this function drops the reference
    5151             :  * to the block.
    5152             :  *
    5153             :  * NOTE: return value 1 means we should stop walking down.
    5154             :  */
    5155           0 : static noinline int do_walk_down(struct btrfs_trans_handle *trans,
    5156             :                                  struct btrfs_root *root,
    5157             :                                  struct btrfs_path *path,
    5158             :                                  struct walk_control *wc, int *lookup_info)
    5159             : {
    5160           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5161           0 :         u64 bytenr;
    5162           0 :         u64 generation;
    5163           0 :         u64 parent;
    5164           0 :         struct btrfs_tree_parent_check check = { 0 };
    5165           0 :         struct btrfs_key key;
    5166           0 :         struct btrfs_ref ref = { 0 };
    5167           0 :         struct extent_buffer *next;
    5168           0 :         int level = wc->level;
    5169           0 :         int reada = 0;
    5170           0 :         int ret = 0;
    5171           0 :         bool need_account = false;
    5172             : 
    5173           0 :         generation = btrfs_node_ptr_generation(path->nodes[level],
    5174             :                                                path->slots[level]);
    5175             :         /*
    5176             :          * if the lower level block was created before the snapshot
    5177             :          * was created, we know there is no need to update back refs
    5178             :          * for the subtree
    5179             :          */
    5180           0 :         if (wc->stage == UPDATE_BACKREF &&
    5181           0 :             generation <= root->root_key.offset) {
    5182           0 :                 *lookup_info = 1;
    5183           0 :                 return 1;
    5184             :         }
    5185             : 
    5186           0 :         bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
    5187             : 
    5188           0 :         check.level = level - 1;
    5189           0 :         check.transid = generation;
    5190           0 :         check.owner_root = root->root_key.objectid;
    5191           0 :         check.has_first_key = true;
    5192           0 :         btrfs_node_key_to_cpu(path->nodes[level], &check.first_key,
    5193             :                               path->slots[level]);
    5194             : 
    5195           0 :         next = find_extent_buffer(fs_info, bytenr);
    5196           0 :         if (!next) {
    5197           0 :                 next = btrfs_find_create_tree_block(fs_info, bytenr,
    5198             :                                 root->root_key.objectid, level - 1);
    5199           0 :                 if (IS_ERR(next))
    5200           0 :                         return PTR_ERR(next);
    5201             :                 reada = 1;
    5202             :         }
    5203           0 :         btrfs_tree_lock(next);
    5204             : 
    5205           0 :         ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
    5206             :                                        &wc->refs[level - 1],
    5207             :                                        &wc->flags[level - 1]);
    5208           0 :         if (ret < 0)
    5209           0 :                 goto out_unlock;
    5210             : 
    5211           0 :         if (unlikely(wc->refs[level - 1] == 0)) {
    5212           0 :                 btrfs_err(fs_info, "Missing references.");
    5213           0 :                 ret = -EIO;
    5214           0 :                 goto out_unlock;
    5215             :         }
    5216           0 :         *lookup_info = 0;
    5217             : 
    5218           0 :         if (wc->stage == DROP_REFERENCE) {
    5219           0 :                 if (wc->refs[level - 1] > 1) {
    5220           0 :                         need_account = true;
    5221           0 :                         if (level == 1 &&
    5222           0 :                             (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
    5223           0 :                                 goto skip;
    5224             : 
    5225           0 :                         if (!wc->update_ref ||
    5226           0 :                             generation <= root->root_key.offset)
    5227           0 :                                 goto skip;
    5228             : 
    5229           0 :                         btrfs_node_key_to_cpu(path->nodes[level], &key,
    5230             :                                               path->slots[level]);
    5231           0 :                         ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
    5232           0 :                         if (ret < 0)
    5233           0 :                                 goto skip;
    5234             : 
    5235           0 :                         wc->stage = UPDATE_BACKREF;
    5236           0 :                         wc->shared_level = level - 1;
    5237             :                 }
    5238             :         } else {
    5239           0 :                 if (level == 1 &&
    5240           0 :                     (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
    5241           0 :                         goto skip;
    5242             :         }
    5243             : 
    5244           0 :         if (!btrfs_buffer_uptodate(next, generation, 0)) {
    5245           0 :                 btrfs_tree_unlock(next);
    5246           0 :                 free_extent_buffer(next);
    5247           0 :                 next = NULL;
    5248           0 :                 *lookup_info = 1;
    5249             :         }
    5250             : 
    5251           0 :         if (!next) {
    5252           0 :                 if (reada && level == 1)
    5253           0 :                         reada_walk_down(trans, root, wc, path);
    5254           0 :                 next = read_tree_block(fs_info, bytenr, &check);
    5255           0 :                 if (IS_ERR(next)) {
    5256           0 :                         return PTR_ERR(next);
    5257           0 :                 } else if (!extent_buffer_uptodate(next)) {
    5258           0 :                         free_extent_buffer(next);
    5259           0 :                         return -EIO;
    5260             :                 }
    5261           0 :                 btrfs_tree_lock(next);
    5262             :         }
    5263             : 
    5264           0 :         level--;
    5265           0 :         ASSERT(level == btrfs_header_level(next));
    5266           0 :         if (level != btrfs_header_level(next)) {
    5267           0 :                 btrfs_err(root->fs_info, "mismatched level");
    5268           0 :                 ret = -EIO;
    5269           0 :                 goto out_unlock;
    5270             :         }
    5271           0 :         path->nodes[level] = next;
    5272           0 :         path->slots[level] = 0;
    5273           0 :         path->locks[level] = BTRFS_WRITE_LOCK;
    5274           0 :         wc->level = level;
    5275           0 :         if (wc->level == 1)
    5276           0 :                 wc->reada_slot = 0;
    5277             :         return 0;
    5278           0 : skip:
    5279           0 :         wc->refs[level - 1] = 0;
    5280           0 :         wc->flags[level - 1] = 0;
    5281           0 :         if (wc->stage == DROP_REFERENCE) {
    5282           0 :                 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
    5283           0 :                         parent = path->nodes[level]->start;
    5284             :                 } else {
    5285           0 :                         ASSERT(root->root_key.objectid ==
    5286             :                                btrfs_header_owner(path->nodes[level]));
    5287           0 :                         if (root->root_key.objectid !=
    5288             :                             btrfs_header_owner(path->nodes[level])) {
    5289           0 :                                 btrfs_err(root->fs_info,
    5290             :                                                 "mismatched block owner");
    5291           0 :                                 ret = -EIO;
    5292           0 :                                 goto out_unlock;
    5293             :                         }
    5294             :                         parent = 0;
    5295             :                 }
    5296             : 
    5297             :                 /*
    5298             :                  * If we had a drop_progress we need to verify the refs are set
    5299             :                  * as expected.  If we find our ref then we know that from here
    5300             :                  * on out everything should be correct, and we can clear the
    5301             :                  * ->restarted flag.
    5302             :                  */
    5303           0 :                 if (wc->restarted) {
    5304           0 :                         ret = check_ref_exists(trans, root, bytenr, parent,
    5305             :                                                level - 1);
    5306           0 :                         if (ret < 0)
    5307           0 :                                 goto out_unlock;
    5308           0 :                         if (ret == 0)
    5309           0 :                                 goto no_delete;
    5310           0 :                         ret = 0;
    5311           0 :                         wc->restarted = 0;
    5312             :                 }
    5313             : 
    5314             :                 /*
    5315             :                  * Reloc tree doesn't contribute to qgroup numbers, and we have
    5316             :                  * already accounted them at merge time (replace_path),
    5317             :                  * thus we could skip expensive subtree trace here.
    5318             :                  */
    5319           0 :                 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
    5320             :                     need_account) {
    5321           0 :                         ret = btrfs_qgroup_trace_subtree(trans, next,
    5322             :                                                          generation, level - 1);
    5323           0 :                         if (ret) {
    5324           0 :                                 btrfs_err_rl(fs_info,
    5325             :                                              "Error %d accounting shared subtree. Quota is out of sync, rescan required.",
    5326             :                                              ret);
    5327             :                         }
    5328             :                 }
    5329             : 
    5330             :                 /*
    5331             :                  * We need to update the next key in our walk control so we can
    5332             :                  * update the drop_progress key accordingly.  We don't care if
    5333             :                  * find_next_key doesn't find a key because that means we're at
    5334             :                  * the end and are going to clean up now.
    5335             :                  */
    5336           0 :                 wc->drop_level = level;
    5337           0 :                 find_next_key(path, level, &wc->drop_progress);
    5338             : 
    5339           0 :                 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
    5340           0 :                                        fs_info->nodesize, parent);
    5341           0 :                 btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid,
    5342             :                                     0, false);
    5343           0 :                 ret = btrfs_free_extent(trans, &ref);
    5344           0 :                 if (ret)
    5345           0 :                         goto out_unlock;
    5346             :         }
    5347           0 : no_delete:
    5348           0 :         *lookup_info = 1;
    5349           0 :         ret = 1;
    5350             : 
    5351           0 : out_unlock:
    5352           0 :         btrfs_tree_unlock(next);
    5353           0 :         free_extent_buffer(next);
    5354             : 
    5355           0 :         return ret;
    5356             : }
    5357             : 
    5358             : /*
    5359             :  * helper to process tree block while walking up the tree.
    5360             :  *
    5361             :  * when wc->stage == DROP_REFERENCE, this function drops
    5362             :  * reference count on the block.
    5363             :  *
    5364             :  * when wc->stage == UPDATE_BACKREF, this function changes
    5365             :  * wc->stage back to DROP_REFERENCE if we changed wc->stage
    5366             :  * to UPDATE_BACKREF previously while processing the block.
    5367             :  *
    5368             :  * NOTE: return value 1 means we should stop walking up.
    5369             :  */
    5370           0 : static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
    5371             :                                  struct btrfs_root *root,
    5372             :                                  struct btrfs_path *path,
    5373             :                                  struct walk_control *wc)
    5374             : {
    5375           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5376           0 :         int ret;
    5377           0 :         int level = wc->level;
    5378           0 :         struct extent_buffer *eb = path->nodes[level];
    5379           0 :         u64 parent = 0;
    5380             : 
    5381           0 :         if (wc->stage == UPDATE_BACKREF) {
    5382           0 :                 BUG_ON(wc->shared_level < level);
    5383           0 :                 if (level < wc->shared_level)
    5384           0 :                         goto out;
    5385             : 
    5386           0 :                 ret = find_next_key(path, level + 1, &wc->update_progress);
    5387           0 :                 if (ret > 0)
    5388           0 :                         wc->update_ref = 0;
    5389             : 
    5390           0 :                 wc->stage = DROP_REFERENCE;
    5391           0 :                 wc->shared_level = -1;
    5392           0 :                 path->slots[level] = 0;
    5393             : 
    5394             :                 /*
    5395             :                  * check reference count again if the block isn't locked.
    5396             :                  * we should start walking down the tree again if reference
    5397             :                  * count is one.
    5398             :                  */
    5399           0 :                 if (!path->locks[level]) {
    5400           0 :                         BUG_ON(level == 0);
    5401           0 :                         btrfs_tree_lock(eb);
    5402           0 :                         path->locks[level] = BTRFS_WRITE_LOCK;
    5403             : 
    5404           0 :                         ret = btrfs_lookup_extent_info(trans, fs_info,
    5405             :                                                        eb->start, level, 1,
    5406             :                                                        &wc->refs[level],
    5407             :                                                        &wc->flags[level]);
    5408           0 :                         if (ret < 0) {
    5409           0 :                                 btrfs_tree_unlock_rw(eb, path->locks[level]);
    5410           0 :                                 path->locks[level] = 0;
    5411           0 :                                 return ret;
    5412             :                         }
    5413           0 :                         BUG_ON(wc->refs[level] == 0);
    5414           0 :                         if (wc->refs[level] == 1) {
    5415           0 :                                 btrfs_tree_unlock_rw(eb, path->locks[level]);
    5416           0 :                                 path->locks[level] = 0;
    5417           0 :                                 return 1;
    5418             :                         }
    5419             :                 }
    5420             :         }
    5421             : 
    5422             :         /* wc->stage == DROP_REFERENCE */
    5423           0 :         BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
    5424             : 
    5425           0 :         if (wc->refs[level] == 1) {
    5426           0 :                 if (level == 0) {
    5427           0 :                         if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
    5428           0 :                                 ret = btrfs_dec_ref(trans, root, eb, 1);
    5429             :                         else
    5430           0 :                                 ret = btrfs_dec_ref(trans, root, eb, 0);
    5431           0 :                         BUG_ON(ret); /* -ENOMEM */
    5432           0 :                         if (is_fstree(root->root_key.objectid)) {
    5433           0 :                                 ret = btrfs_qgroup_trace_leaf_items(trans, eb);
    5434           0 :                                 if (ret) {
    5435           0 :                                         btrfs_err_rl(fs_info,
    5436             :         "error %d accounting leaf items, quota is out of sync, rescan required",
    5437             :                                              ret);
    5438             :                                 }
    5439             :                         }
    5440             :                 }
    5441             :                 /* Make block locked assertion in btrfs_clear_buffer_dirty happy. */
    5442           0 :                 if (!path->locks[level]) {
    5443           0 :                         btrfs_tree_lock(eb);
    5444           0 :                         path->locks[level] = BTRFS_WRITE_LOCK;
    5445             :                 }
    5446           0 :                 btrfs_clear_buffer_dirty(trans, eb);
    5447             :         }
    5448             : 
    5449           0 :         if (eb == root->node) {
    5450           0 :                 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
    5451           0 :                         parent = eb->start;
    5452           0 :                 else if (root->root_key.objectid != btrfs_header_owner(eb))
    5453           0 :                         goto owner_mismatch;
    5454             :         } else {
    5455           0 :                 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
    5456           0 :                         parent = path->nodes[level + 1]->start;
    5457           0 :                 else if (root->root_key.objectid !=
    5458           0 :                          btrfs_header_owner(path->nodes[level + 1]))
    5459           0 :                         goto owner_mismatch;
    5460             :         }
    5461             : 
    5462           0 :         btrfs_free_tree_block(trans, btrfs_root_id(root), eb, parent,
    5463           0 :                               wc->refs[level] == 1);
    5464           0 : out:
    5465           0 :         wc->refs[level] = 0;
    5466           0 :         wc->flags[level] = 0;
    5467           0 :         return 0;
    5468             : 
    5469           0 : owner_mismatch:
    5470           0 :         btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu",
    5471             :                      btrfs_header_owner(eb), root->root_key.objectid);
    5472             :         return -EUCLEAN;
    5473             : }
    5474             : 
    5475           0 : static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
    5476             :                                    struct btrfs_root *root,
    5477             :                                    struct btrfs_path *path,
    5478             :                                    struct walk_control *wc)
    5479             : {
    5480           0 :         int level = wc->level;
    5481           0 :         int lookup_info = 1;
    5482           0 :         int ret = 0;
    5483             : 
    5484           0 :         while (level >= 0) {
    5485           0 :                 ret = walk_down_proc(trans, root, path, wc, lookup_info);
    5486           0 :                 if (ret)
    5487             :                         break;
    5488             : 
    5489           0 :                 if (level == 0)
    5490             :                         break;
    5491             : 
    5492           0 :                 if (path->slots[level] >=
    5493           0 :                     btrfs_header_nritems(path->nodes[level]))
    5494             :                         break;
    5495             : 
    5496           0 :                 ret = do_walk_down(trans, root, path, wc, &lookup_info);
    5497           0 :                 if (ret > 0) {
    5498           0 :                         path->slots[level]++;
    5499           0 :                         continue;
    5500           0 :                 } else if (ret < 0)
    5501             :                         break;
    5502           0 :                 level = wc->level;
    5503             :         }
    5504           0 :         return (ret == 1) ? 0 : ret;
    5505             : }
    5506             : 
    5507           0 : static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
    5508             :                                  struct btrfs_root *root,
    5509             :                                  struct btrfs_path *path,
    5510             :                                  struct walk_control *wc, int max_level)
    5511             : {
    5512           0 :         int level = wc->level;
    5513           0 :         int ret;
    5514             : 
    5515           0 :         path->slots[level] = btrfs_header_nritems(path->nodes[level]);
    5516           0 :         while (level < max_level && path->nodes[level]) {
    5517           0 :                 wc->level = level;
    5518           0 :                 if (path->slots[level] + 1 <
    5519           0 :                     btrfs_header_nritems(path->nodes[level])) {
    5520           0 :                         path->slots[level]++;
    5521           0 :                         return 0;
    5522             :                 } else {
    5523           0 :                         ret = walk_up_proc(trans, root, path, wc);
    5524           0 :                         if (ret > 0)
    5525             :                                 return 0;
    5526           0 :                         if (ret < 0)
    5527           0 :                                 return ret;
    5528             : 
    5529           0 :                         if (path->locks[level]) {
    5530           0 :                                 btrfs_tree_unlock_rw(path->nodes[level],
    5531             :                                                      path->locks[level]);
    5532           0 :                                 path->locks[level] = 0;
    5533             :                         }
    5534           0 :                         free_extent_buffer(path->nodes[level]);
    5535           0 :                         path->nodes[level] = NULL;
    5536           0 :                         level++;
    5537             :                 }
    5538             :         }
    5539             :         return 1;
    5540             : }
    5541             : 
    5542             : /*
    5543             :  * drop a subvolume tree.
    5544             :  *
    5545             :  * this function traverses the tree freeing any blocks that only
    5546             :  * referenced by the tree.
    5547             :  *
    5548             :  * when a shared tree block is found. this function decreases its
    5549             :  * reference count by one. if update_ref is true, this function
    5550             :  * also make sure backrefs for the shared block and all lower level
    5551             :  * blocks are properly updated.
    5552             :  *
    5553             :  * If called with for_reloc == 0, may exit early with -EAGAIN
    5554             :  */
    5555           0 : int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
    5556             : {
    5557           0 :         const bool is_reloc_root = (root->root_key.objectid ==
    5558             :                                     BTRFS_TREE_RELOC_OBJECTID);
    5559           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5560           0 :         struct btrfs_path *path;
    5561           0 :         struct btrfs_trans_handle *trans;
    5562           0 :         struct btrfs_root *tree_root = fs_info->tree_root;
    5563           0 :         struct btrfs_root_item *root_item = &root->root_item;
    5564           0 :         struct walk_control *wc;
    5565           0 :         struct btrfs_key key;
    5566           0 :         int err = 0;
    5567           0 :         int ret;
    5568           0 :         int level;
    5569           0 :         bool root_dropped = false;
    5570           0 :         bool unfinished_drop = false;
    5571             : 
    5572           0 :         btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
    5573             : 
    5574           0 :         path = btrfs_alloc_path();
    5575           0 :         if (!path) {
    5576           0 :                 err = -ENOMEM;
    5577           0 :                 goto out;
    5578             :         }
    5579             : 
    5580           0 :         wc = kzalloc(sizeof(*wc), GFP_NOFS);
    5581           0 :         if (!wc) {
    5582           0 :                 btrfs_free_path(path);
    5583           0 :                 err = -ENOMEM;
    5584           0 :                 goto out;
    5585             :         }
    5586             : 
    5587             :         /*
    5588             :          * Use join to avoid potential EINTR from transaction start. See
    5589             :          * wait_reserve_ticket and the whole reservation callchain.
    5590             :          */
    5591           0 :         if (for_reloc)
    5592           0 :                 trans = btrfs_join_transaction(tree_root);
    5593             :         else
    5594           0 :                 trans = btrfs_start_transaction(tree_root, 0);
    5595           0 :         if (IS_ERR(trans)) {
    5596           0 :                 err = PTR_ERR(trans);
    5597           0 :                 goto out_free;
    5598             :         }
    5599             : 
    5600           0 :         err = btrfs_run_delayed_items(trans);
    5601           0 :         if (err)
    5602           0 :                 goto out_end_trans;
    5603             : 
    5604             :         /*
    5605             :          * This will help us catch people modifying the fs tree while we're
    5606             :          * dropping it.  It is unsafe to mess with the fs tree while it's being
    5607             :          * dropped as we unlock the root node and parent nodes as we walk down
    5608             :          * the tree, assuming nothing will change.  If something does change
    5609             :          * then we'll have stale information and drop references to blocks we've
    5610             :          * already dropped.
    5611             :          */
    5612           0 :         set_bit(BTRFS_ROOT_DELETING, &root->state);
    5613           0 :         unfinished_drop = test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state);
    5614             : 
    5615           0 :         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
    5616           0 :                 level = btrfs_header_level(root->node);
    5617           0 :                 path->nodes[level] = btrfs_lock_root_node(root);
    5618           0 :                 path->slots[level] = 0;
    5619           0 :                 path->locks[level] = BTRFS_WRITE_LOCK;
    5620           0 :                 memset(&wc->update_progress, 0,
    5621             :                        sizeof(wc->update_progress));
    5622             :         } else {
    5623           0 :                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
    5624           0 :                 memcpy(&wc->update_progress, &key,
    5625             :                        sizeof(wc->update_progress));
    5626             : 
    5627           0 :                 level = btrfs_root_drop_level(root_item);
    5628           0 :                 BUG_ON(level == 0);
    5629           0 :                 path->lowest_level = level;
    5630           0 :                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    5631           0 :                 path->lowest_level = 0;
    5632           0 :                 if (ret < 0) {
    5633           0 :                         err = ret;
    5634           0 :                         goto out_end_trans;
    5635             :                 }
    5636           0 :                 WARN_ON(ret > 0);
    5637             : 
    5638             :                 /*
    5639             :                  * unlock our path, this is safe because only this
    5640             :                  * function is allowed to delete this snapshot
    5641             :                  */
    5642           0 :                 btrfs_unlock_up_safe(path, 0);
    5643             : 
    5644           0 :                 level = btrfs_header_level(root->node);
    5645           0 :                 while (1) {
    5646           0 :                         btrfs_tree_lock(path->nodes[level]);
    5647           0 :                         path->locks[level] = BTRFS_WRITE_LOCK;
    5648             : 
    5649           0 :                         ret = btrfs_lookup_extent_info(trans, fs_info,
    5650           0 :                                                 path->nodes[level]->start,
    5651             :                                                 level, 1, &wc->refs[level],
    5652             :                                                 &wc->flags[level]);
    5653           0 :                         if (ret < 0) {
    5654           0 :                                 err = ret;
    5655           0 :                                 goto out_end_trans;
    5656             :                         }
    5657           0 :                         BUG_ON(wc->refs[level] == 0);
    5658             : 
    5659           0 :                         if (level == btrfs_root_drop_level(root_item))
    5660             :                                 break;
    5661             : 
    5662           0 :                         btrfs_tree_unlock(path->nodes[level]);
    5663           0 :                         path->locks[level] = 0;
    5664           0 :                         WARN_ON(wc->refs[level] != 1);
    5665           0 :                         level--;
    5666             :                 }
    5667             :         }
    5668             : 
    5669           0 :         wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
    5670           0 :         wc->level = level;
    5671           0 :         wc->shared_level = -1;
    5672           0 :         wc->stage = DROP_REFERENCE;
    5673           0 :         wc->update_ref = update_ref;
    5674           0 :         wc->keep_locks = 0;
    5675           0 :         wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
    5676             : 
    5677           0 :         while (1) {
    5678             : 
    5679           0 :                 ret = walk_down_tree(trans, root, path, wc);
    5680           0 :                 if (ret < 0) {
    5681           0 :                         btrfs_abort_transaction(trans, ret);
    5682           0 :                         err = ret;
    5683           0 :                         break;
    5684             :                 }
    5685             : 
    5686           0 :                 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
    5687           0 :                 if (ret < 0) {
    5688           0 :                         btrfs_abort_transaction(trans, ret);
    5689           0 :                         err = ret;
    5690           0 :                         break;
    5691             :                 }
    5692             : 
    5693           0 :                 if (ret > 0) {
    5694           0 :                         BUG_ON(wc->stage != DROP_REFERENCE);
    5695             :                         break;
    5696             :                 }
    5697             : 
    5698           0 :                 if (wc->stage == DROP_REFERENCE) {
    5699           0 :                         wc->drop_level = wc->level;
    5700           0 :                         btrfs_node_key_to_cpu(path->nodes[wc->drop_level],
    5701             :                                               &wc->drop_progress,
    5702             :                                               path->slots[wc->drop_level]);
    5703             :                 }
    5704           0 :                 btrfs_cpu_key_to_disk(&root_item->drop_progress,
    5705           0 :                                       &wc->drop_progress);
    5706           0 :                 btrfs_set_root_drop_level(root_item, wc->drop_level);
    5707             : 
    5708           0 :                 BUG_ON(wc->level == 0);
    5709           0 :                 if (btrfs_should_end_transaction(trans) ||
    5710           0 :                     (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) {
    5711           0 :                         ret = btrfs_update_root(trans, tree_root,
    5712             :                                                 &root->root_key,
    5713             :                                                 root_item);
    5714           0 :                         if (ret) {
    5715           0 :                                 btrfs_abort_transaction(trans, ret);
    5716           0 :                                 err = ret;
    5717           0 :                                 goto out_end_trans;
    5718             :                         }
    5719             : 
    5720           0 :                         if (!is_reloc_root)
    5721           0 :                                 btrfs_set_last_root_drop_gen(fs_info, trans->transid);
    5722             : 
    5723           0 :                         btrfs_end_transaction_throttle(trans);
    5724           0 :                         if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
    5725           0 :                                 btrfs_debug(fs_info,
    5726             :                                             "drop snapshot early exit");
    5727           0 :                                 err = -EAGAIN;
    5728           0 :                                 goto out_free;
    5729             :                         }
    5730             : 
    5731             :                        /*
    5732             :                         * Use join to avoid potential EINTR from transaction
    5733             :                         * start. See wait_reserve_ticket and the whole
    5734             :                         * reservation callchain.
    5735             :                         */
    5736           0 :                         if (for_reloc)
    5737           0 :                                 trans = btrfs_join_transaction(tree_root);
    5738             :                         else
    5739           0 :                                 trans = btrfs_start_transaction(tree_root, 0);
    5740           0 :                         if (IS_ERR(trans)) {
    5741           0 :                                 err = PTR_ERR(trans);
    5742           0 :                                 goto out_free;
    5743             :                         }
    5744             :                 }
    5745             :         }
    5746           0 :         btrfs_release_path(path);
    5747           0 :         if (err)
    5748           0 :                 goto out_end_trans;
    5749             : 
    5750           0 :         ret = btrfs_del_root(trans, &root->root_key);
    5751           0 :         if (ret) {
    5752           0 :                 btrfs_abort_transaction(trans, ret);
    5753           0 :                 err = ret;
    5754           0 :                 goto out_end_trans;
    5755             :         }
    5756             : 
    5757           0 :         if (!is_reloc_root) {
    5758           0 :                 ret = btrfs_find_root(tree_root, &root->root_key, path,
    5759             :                                       NULL, NULL);
    5760           0 :                 if (ret < 0) {
    5761           0 :                         btrfs_abort_transaction(trans, ret);
    5762           0 :                         err = ret;
    5763           0 :                         goto out_end_trans;
    5764           0 :                 } else if (ret > 0) {
    5765             :                         /* if we fail to delete the orphan item this time
    5766             :                          * around, it'll get picked up the next time.
    5767             :                          *
    5768             :                          * The most common failure here is just -ENOENT.
    5769             :                          */
    5770           0 :                         btrfs_del_orphan_item(trans, tree_root,
    5771             :                                               root->root_key.objectid);
    5772             :                 }
    5773             :         }
    5774             : 
    5775             :         /*
    5776             :          * This subvolume is going to be completely dropped, and won't be
    5777             :          * recorded as dirty roots, thus pertrans meta rsv will not be freed at
    5778             :          * commit transaction time.  So free it here manually.
    5779             :          */
    5780           0 :         btrfs_qgroup_convert_reserved_meta(root, INT_MAX);
    5781           0 :         btrfs_qgroup_free_meta_all_pertrans(root);
    5782             : 
    5783           0 :         if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
    5784           0 :                 btrfs_add_dropped_root(trans, root);
    5785             :         else
    5786           0 :                 btrfs_put_root(root);
    5787             :         root_dropped = true;
    5788           0 : out_end_trans:
    5789           0 :         if (!is_reloc_root)
    5790           0 :                 btrfs_set_last_root_drop_gen(fs_info, trans->transid);
    5791             : 
    5792           0 :         btrfs_end_transaction_throttle(trans);
    5793           0 : out_free:
    5794           0 :         kfree(wc);
    5795           0 :         btrfs_free_path(path);
    5796           0 : out:
    5797             :         /*
    5798             :          * We were an unfinished drop root, check to see if there are any
    5799             :          * pending, and if not clear and wake up any waiters.
    5800             :          */
    5801           0 :         if (!err && unfinished_drop)
    5802           0 :                 btrfs_maybe_wake_unfinished_drop(fs_info);
    5803             : 
    5804             :         /*
    5805             :          * So if we need to stop dropping the snapshot for whatever reason we
    5806             :          * need to make sure to add it back to the dead root list so that we
    5807             :          * keep trying to do the work later.  This also cleans up roots if we
    5808             :          * don't have it in the radix (like when we recover after a power fail
    5809             :          * or unmount) so we don't leak memory.
    5810             :          */
    5811           0 :         if (!for_reloc && !root_dropped)
    5812           0 :                 btrfs_add_dead_root(root);
    5813           0 :         return err;
    5814             : }
    5815             : 
    5816             : /*
    5817             :  * drop subtree rooted at tree block 'node'.
    5818             :  *
    5819             :  * NOTE: this function will unlock and release tree block 'node'
    5820             :  * only used by relocation code
    5821             :  */
    5822           0 : int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
    5823             :                         struct btrfs_root *root,
    5824             :                         struct extent_buffer *node,
    5825             :                         struct extent_buffer *parent)
    5826             : {
    5827           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5828           0 :         struct btrfs_path *path;
    5829           0 :         struct walk_control *wc;
    5830           0 :         int level;
    5831           0 :         int parent_level;
    5832           0 :         int ret = 0;
    5833           0 :         int wret;
    5834             : 
    5835           0 :         BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
    5836             : 
    5837           0 :         path = btrfs_alloc_path();
    5838           0 :         if (!path)
    5839             :                 return -ENOMEM;
    5840             : 
    5841           0 :         wc = kzalloc(sizeof(*wc), GFP_NOFS);
    5842           0 :         if (!wc) {
    5843           0 :                 btrfs_free_path(path);
    5844           0 :                 return -ENOMEM;
    5845             :         }
    5846             : 
    5847           0 :         btrfs_assert_tree_write_locked(parent);
    5848           0 :         parent_level = btrfs_header_level(parent);
    5849           0 :         atomic_inc(&parent->refs);
    5850           0 :         path->nodes[parent_level] = parent;
    5851           0 :         path->slots[parent_level] = btrfs_header_nritems(parent);
    5852             : 
    5853           0 :         btrfs_assert_tree_write_locked(node);
    5854           0 :         level = btrfs_header_level(node);
    5855           0 :         path->nodes[level] = node;
    5856           0 :         path->slots[level] = 0;
    5857           0 :         path->locks[level] = BTRFS_WRITE_LOCK;
    5858             : 
    5859           0 :         wc->refs[parent_level] = 1;
    5860           0 :         wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
    5861           0 :         wc->level = level;
    5862           0 :         wc->shared_level = -1;
    5863           0 :         wc->stage = DROP_REFERENCE;
    5864           0 :         wc->update_ref = 0;
    5865           0 :         wc->keep_locks = 1;
    5866           0 :         wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
    5867             : 
    5868           0 :         while (1) {
    5869           0 :                 wret = walk_down_tree(trans, root, path, wc);
    5870           0 :                 if (wret < 0) {
    5871             :                         ret = wret;
    5872             :                         break;
    5873             :                 }
    5874             : 
    5875           0 :                 wret = walk_up_tree(trans, root, path, wc, parent_level);
    5876           0 :                 if (wret < 0)
    5877           0 :                         ret = wret;
    5878           0 :                 if (wret != 0)
    5879             :                         break;
    5880             :         }
    5881             : 
    5882           0 :         kfree(wc);
    5883           0 :         btrfs_free_path(path);
    5884           0 :         return ret;
    5885             : }
    5886             : 
    5887           0 : int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
    5888             :                                    u64 start, u64 end)
    5889             : {
    5890           0 :         return unpin_extent_range(fs_info, start, end, false);
    5891             : }
    5892             : 
    5893             : /*
    5894             :  * It used to be that old block groups would be left around forever.
    5895             :  * Iterating over them would be enough to trim unused space.  Since we
    5896             :  * now automatically remove them, we also need to iterate over unallocated
    5897             :  * space.
    5898             :  *
    5899             :  * We don't want a transaction for this since the discard may take a
    5900             :  * substantial amount of time.  We don't require that a transaction be
    5901             :  * running, but we do need to take a running transaction into account
    5902             :  * to ensure that we're not discarding chunks that were released or
    5903             :  * allocated in the current transaction.
    5904             :  *
    5905             :  * Holding the chunks lock will prevent other threads from allocating
    5906             :  * or releasing chunks, but it won't prevent a running transaction
    5907             :  * from committing and releasing the memory that the pending chunks
    5908             :  * list head uses.  For that, we need to take a reference to the
    5909             :  * transaction and hold the commit root sem.  We only need to hold
    5910             :  * it while performing the free space search since we have already
    5911             :  * held back allocations.
    5912             :  */
    5913           0 : static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
    5914             : {
    5915           0 :         u64 start = BTRFS_DEVICE_RANGE_RESERVED, len = 0, end = 0;
    5916           0 :         int ret;
    5917             : 
    5918           0 :         *trimmed = 0;
    5919             : 
    5920             :         /* Discard not supported = nothing to do. */
    5921           0 :         if (!bdev_max_discard_sectors(device->bdev))
    5922             :                 return 0;
    5923             : 
    5924             :         /* Not writable = nothing to do. */
    5925           0 :         if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
    5926             :                 return 0;
    5927             : 
    5928             :         /* No free space = nothing to do. */
    5929           0 :         if (device->total_bytes <= device->bytes_used)
    5930             :                 return 0;
    5931             : 
    5932             :         ret = 0;
    5933             : 
    5934           0 :         while (1) {
    5935           0 :                 struct btrfs_fs_info *fs_info = device->fs_info;
    5936           0 :                 u64 bytes;
    5937             : 
    5938           0 :                 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
    5939           0 :                 if (ret)
    5940             :                         break;
    5941             : 
    5942           0 :                 find_first_clear_extent_bit(&device->alloc_state, start,
    5943             :                                             &start, &end,
    5944             :                                             CHUNK_TRIMMED | CHUNK_ALLOCATED);
    5945             : 
    5946             :                 /* Check if there are any CHUNK_* bits left */
    5947           0 :                 if (start > device->total_bytes) {
    5948           0 :                         WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
    5949           0 :                         btrfs_warn_in_rcu(fs_info,
    5950             : "ignoring attempt to trim beyond device size: offset %llu length %llu device %s device size %llu",
    5951             :                                           start, end - start + 1,
    5952             :                                           btrfs_dev_name(device),
    5953             :                                           device->total_bytes);
    5954           0 :                         mutex_unlock(&fs_info->chunk_mutex);
    5955           0 :                         ret = 0;
    5956           0 :                         break;
    5957             :                 }
    5958             : 
    5959             :                 /* Ensure we skip the reserved space on each device. */
    5960           0 :                 start = max_t(u64, start, BTRFS_DEVICE_RANGE_RESERVED);
    5961             : 
    5962             :                 /*
    5963             :                  * If find_first_clear_extent_bit find a range that spans the
    5964             :                  * end of the device it will set end to -1, in this case it's up
    5965             :                  * to the caller to trim the value to the size of the device.
    5966             :                  */
    5967           0 :                 end = min(end, device->total_bytes - 1);
    5968             : 
    5969           0 :                 len = end - start + 1;
    5970             : 
    5971             :                 /* We didn't find any extents */
    5972           0 :                 if (!len) {
    5973           0 :                         mutex_unlock(&fs_info->chunk_mutex);
    5974           0 :                         ret = 0;
    5975           0 :                         break;
    5976             :                 }
    5977             : 
    5978           0 :                 ret = btrfs_issue_discard(device->bdev, start, len,
    5979             :                                           &bytes);
    5980           0 :                 if (!ret)
    5981           0 :                         set_extent_bit(&device->alloc_state, start,
    5982           0 :                                        start + bytes - 1, CHUNK_TRIMMED, NULL);
    5983           0 :                 mutex_unlock(&fs_info->chunk_mutex);
    5984             : 
    5985           0 :                 if (ret)
    5986             :                         break;
    5987             : 
    5988           0 :                 start += len;
    5989           0 :                 *trimmed += bytes;
    5990             : 
    5991           0 :                 if (fatal_signal_pending(current)) {
    5992             :                         ret = -ERESTARTSYS;
    5993             :                         break;
    5994             :                 }
    5995             : 
    5996           0 :                 cond_resched();
    5997             :         }
    5998             : 
    5999           0 :         return ret;
    6000             : }
    6001             : 
    6002             : /*
    6003             :  * Trim the whole filesystem by:
    6004             :  * 1) trimming the free space in each block group
    6005             :  * 2) trimming the unallocated space on each device
    6006             :  *
    6007             :  * This will also continue trimming even if a block group or device encounters
    6008             :  * an error.  The return value will be the last error, or 0 if nothing bad
    6009             :  * happens.
    6010             :  */
    6011           0 : int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
    6012             : {
    6013           0 :         struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
    6014           0 :         struct btrfs_block_group *cache = NULL;
    6015           0 :         struct btrfs_device *device;
    6016           0 :         u64 group_trimmed;
    6017           0 :         u64 range_end = U64_MAX;
    6018           0 :         u64 start;
    6019           0 :         u64 end;
    6020           0 :         u64 trimmed = 0;
    6021           0 :         u64 bg_failed = 0;
    6022           0 :         u64 dev_failed = 0;
    6023           0 :         int bg_ret = 0;
    6024           0 :         int dev_ret = 0;
    6025           0 :         int ret = 0;
    6026             : 
    6027           0 :         if (range->start == U64_MAX)
    6028             :                 return -EINVAL;
    6029             : 
    6030             :         /*
    6031             :          * Check range overflow if range->len is set.
    6032             :          * The default range->len is U64_MAX.
    6033             :          */
    6034           0 :         if (range->len != U64_MAX &&
    6035           0 :             check_add_overflow(range->start, range->len, &range_end))
    6036             :                 return -EINVAL;
    6037             : 
    6038           0 :         cache = btrfs_lookup_first_block_group(fs_info, range->start);
    6039           0 :         for (; cache; cache = btrfs_next_block_group(cache)) {
    6040           0 :                 if (cache->start >= range_end) {
    6041           0 :                         btrfs_put_block_group(cache);
    6042           0 :                         break;
    6043             :                 }
    6044             : 
    6045           0 :                 start = max(range->start, cache->start);
    6046           0 :                 end = min(range_end, cache->start + cache->length);
    6047             : 
    6048           0 :                 if (end - start >= range->minlen) {
    6049           0 :                         if (!btrfs_block_group_done(cache)) {
    6050           0 :                                 ret = btrfs_cache_block_group(cache, true);
    6051           0 :                                 if (ret) {
    6052           0 :                                         bg_failed++;
    6053           0 :                                         bg_ret = ret;
    6054           0 :                                         continue;
    6055             :                                 }
    6056             :                         }
    6057           0 :                         ret = btrfs_trim_block_group(cache,
    6058             :                                                      &group_trimmed,
    6059             :                                                      start,
    6060             :                                                      end,
    6061             :                                                      range->minlen);
    6062             : 
    6063           0 :                         trimmed += group_trimmed;
    6064           0 :                         if (ret) {
    6065           0 :                                 bg_failed++;
    6066           0 :                                 bg_ret = ret;
    6067           0 :                                 continue;
    6068             :                         }
    6069             :                 }
    6070             :         }
    6071             : 
    6072           0 :         if (bg_failed)
    6073           0 :                 btrfs_warn(fs_info,
    6074             :                         "failed to trim %llu block group(s), last error %d",
    6075             :                         bg_failed, bg_ret);
    6076             : 
    6077           0 :         mutex_lock(&fs_devices->device_list_mutex);
    6078           0 :         list_for_each_entry(device, &fs_devices->devices, dev_list) {
    6079           0 :                 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
    6080           0 :                         continue;
    6081             : 
    6082           0 :                 ret = btrfs_trim_free_extents(device, &group_trimmed);
    6083           0 :                 if (ret) {
    6084             :                         dev_failed++;
    6085             :                         dev_ret = ret;
    6086             :                         break;
    6087             :                 }
    6088             : 
    6089           0 :                 trimmed += group_trimmed;
    6090             :         }
    6091           0 :         mutex_unlock(&fs_devices->device_list_mutex);
    6092             : 
    6093           0 :         if (dev_failed)
    6094           0 :                 btrfs_warn(fs_info,
    6095             :                         "failed to trim %llu device(s), last error %d",
    6096             :                         dev_failed, dev_ret);
    6097           0 :         range->len = trimmed;
    6098           0 :         if (bg_ret)
    6099           0 :                 return bg_ret;
    6100             :         return dev_ret;
    6101             : }

Generated by: LCOV version 1.14