LCOV - code coverage report
Current view: top level - fs/btrfs - transaction.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsx @ Mon Jul 31 20:08:34 PDT 2023 Lines: 1060 1212 87.5 %
Date: 2023-07-31 20:08:34 Functions: 54 56 96.4 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (C) 2007 Oracle.  All rights reserved.
       4             :  */
       5             : 
       6             : #include <linux/fs.h>
       7             : #include <linux/slab.h>
       8             : #include <linux/sched.h>
       9             : #include <linux/sched/mm.h>
      10             : #include <linux/writeback.h>
      11             : #include <linux/pagemap.h>
      12             : #include <linux/blkdev.h>
      13             : #include <linux/uuid.h>
      14             : #include <linux/timekeeping.h>
      15             : #include "misc.h"
      16             : #include "ctree.h"
      17             : #include "disk-io.h"
      18             : #include "transaction.h"
      19             : #include "locking.h"
      20             : #include "tree-log.h"
      21             : #include "volumes.h"
      22             : #include "dev-replace.h"
      23             : #include "qgroup.h"
      24             : #include "block-group.h"
      25             : #include "space-info.h"
      26             : #include "zoned.h"
      27             : #include "fs.h"
      28             : #include "accessors.h"
      29             : #include "extent-tree.h"
      30             : #include "root-tree.h"
      31             : #include "defrag.h"
      32             : #include "dir-item.h"
      33             : #include "uuid-tree.h"
      34             : #include "ioctl.h"
      35             : #include "relocation.h"
      36             : #include "scrub.h"
      37             : 
      38             : static struct kmem_cache *btrfs_trans_handle_cachep;
      39             : 
      40             : #define BTRFS_ROOT_TRANS_TAG 0
      41             : 
      42             : /*
      43             :  * Transaction states and transitions
      44             :  *
      45             :  * No running transaction (fs tree blocks are not modified)
      46             :  * |
      47             :  * | To next stage:
      48             :  * |  Call start_transaction() variants. Except btrfs_join_transaction_nostart().
      49             :  * V
      50             :  * Transaction N [[TRANS_STATE_RUNNING]]
      51             :  * |
      52             :  * | New trans handles can be attached to transaction N by calling all
      53             :  * | start_transaction() variants.
      54             :  * |
      55             :  * | To next stage:
      56             :  * |  Call btrfs_commit_transaction() on any trans handle attached to
      57             :  * |  transaction N
      58             :  * V
      59             :  * Transaction N [[TRANS_STATE_COMMIT_START]]
      60             :  * |
      61             :  * | Will wait for previous running transaction to completely finish if there
      62             :  * | is one
      63             :  * |
      64             :  * | Then one of the following happes:
      65             :  * | - Wait for all other trans handle holders to release.
      66             :  * |   The btrfs_commit_transaction() caller will do the commit work.
      67             :  * | - Wait for current transaction to be committed by others.
      68             :  * |   Other btrfs_commit_transaction() caller will do the commit work.
      69             :  * |
      70             :  * | At this stage, only btrfs_join_transaction*() variants can attach
      71             :  * | to this running transaction.
      72             :  * | All other variants will wait for current one to finish and attach to
      73             :  * | transaction N+1.
      74             :  * |
      75             :  * | To next stage:
      76             :  * |  Caller is chosen to commit transaction N, and all other trans handle
      77             :  * |  haven been released.
      78             :  * V
      79             :  * Transaction N [[TRANS_STATE_COMMIT_DOING]]
      80             :  * |
      81             :  * | The heavy lifting transaction work is started.
      82             :  * | From running delayed refs (modifying extent tree) to creating pending
      83             :  * | snapshots, running qgroups.
      84             :  * | In short, modify supporting trees to reflect modifications of subvolume
      85             :  * | trees.
      86             :  * |
      87             :  * | At this stage, all start_transaction() calls will wait for this
      88             :  * | transaction to finish and attach to transaction N+1.
      89             :  * |
      90             :  * | To next stage:
      91             :  * |  Until all supporting trees are updated.
      92             :  * V
      93             :  * Transaction N [[TRANS_STATE_UNBLOCKED]]
      94             :  * |                                                Transaction N+1
      95             :  * | All needed trees are modified, thus we only    [[TRANS_STATE_RUNNING]]
      96             :  * | need to write them back to disk and update     |
      97             :  * | super blocks.                                  |
      98             :  * |                                                |
      99             :  * | At this stage, new transaction is allowed to   |
     100             :  * | start.                                         |
     101             :  * | All new start_transaction() calls will be      |
     102             :  * | attached to transid N+1.                       |
     103             :  * |                                                |
     104             :  * | To next stage:                                 |
     105             :  * |  Until all tree blocks are super blocks are    |
     106             :  * |  written to block devices                      |
     107             :  * V                                                |
     108             :  * Transaction N [[TRANS_STATE_COMPLETED]]          V
     109             :  *   All tree blocks and super blocks are written.  Transaction N+1
     110             :  *   This transaction is finished and all its       [[TRANS_STATE_COMMIT_START]]
     111             :  *   data structures will be cleaned up.            | Life goes on
     112             :  */
     113             : static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
     114             :         [TRANS_STATE_RUNNING]           = 0U,
     115             :         [TRANS_STATE_COMMIT_START]      = (__TRANS_START | __TRANS_ATTACH),
     116             :         [TRANS_STATE_COMMIT_DOING]      = (__TRANS_START |
     117             :                                            __TRANS_ATTACH |
     118             :                                            __TRANS_JOIN |
     119             :                                            __TRANS_JOIN_NOSTART),
     120             :         [TRANS_STATE_UNBLOCKED]         = (__TRANS_START |
     121             :                                            __TRANS_ATTACH |
     122             :                                            __TRANS_JOIN |
     123             :                                            __TRANS_JOIN_NOLOCK |
     124             :                                            __TRANS_JOIN_NOSTART),
     125             :         [TRANS_STATE_SUPER_COMMITTED]   = (__TRANS_START |
     126             :                                            __TRANS_ATTACH |
     127             :                                            __TRANS_JOIN |
     128             :                                            __TRANS_JOIN_NOLOCK |
     129             :                                            __TRANS_JOIN_NOSTART),
     130             :         [TRANS_STATE_COMPLETED]         = (__TRANS_START |
     131             :                                            __TRANS_ATTACH |
     132             :                                            __TRANS_JOIN |
     133             :                                            __TRANS_JOIN_NOLOCK |
     134             :                                            __TRANS_JOIN_NOSTART),
     135             : };
     136             : 
     137    85169741 : void btrfs_put_transaction(struct btrfs_transaction *transaction)
     138             : {
     139    85169741 :         WARN_ON(refcount_read(&transaction->use_count) == 0);
     140    85169741 :         if (refcount_dec_and_test(&transaction->use_count)) {
     141      206187 :                 BUG_ON(!list_empty(&transaction->list));
     142      206187 :                 WARN_ON(!RB_EMPTY_ROOT(
     143             :                                 &transaction->delayed_refs.href_root.rb_root));
     144      206187 :                 WARN_ON(!RB_EMPTY_ROOT(
     145             :                                 &transaction->delayed_refs.dirty_extent_root));
     146      206187 :                 if (transaction->delayed_refs.pending_csums)
     147           0 :                         btrfs_err(transaction->fs_info,
     148             :                                   "pending csums is %llu",
     149             :                                   transaction->delayed_refs.pending_csums);
     150             :                 /*
     151             :                  * If any block groups are found in ->deleted_bgs then it's
     152             :                  * because the transaction was aborted and a commit did not
     153             :                  * happen (things failed before writing the new superblock
     154             :                  * and calling btrfs_finish_extent_commit()), so we can not
     155             :                  * discard the physical locations of the block groups.
     156             :                  */
     157      206187 :                 while (!list_empty(&transaction->deleted_bgs)) {
     158           0 :                         struct btrfs_block_group *cache;
     159             : 
     160           0 :                         cache = list_first_entry(&transaction->deleted_bgs,
     161             :                                                  struct btrfs_block_group,
     162             :                                                  bg_list);
     163           0 :                         list_del_init(&cache->bg_list);
     164           0 :                         btrfs_unfreeze_block_group(cache);
     165           0 :                         btrfs_put_block_group(cache);
     166             :                 }
     167      206187 :                 WARN_ON(!list_empty(&transaction->dev_update_list));
     168      206187 :                 kfree(transaction);
     169             :         }
     170    85092531 : }
     171             : 
     172      206348 : static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
     173             : {
     174      206348 :         struct btrfs_transaction *cur_trans = trans->transaction;
     175      206348 :         struct btrfs_fs_info *fs_info = trans->fs_info;
     176      206348 :         struct btrfs_root *root, *tmp;
     177             : 
     178             :         /*
     179             :          * At this point no one can be using this transaction to modify any tree
     180             :          * and no one can start another transaction to modify any tree either.
     181             :          */
     182      206348 :         ASSERT(cur_trans->state == TRANS_STATE_COMMIT_DOING);
     183             : 
     184      206348 :         down_write(&fs_info->commit_root_sem);
     185             : 
     186      412696 :         if (test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags))
     187        3574 :                 fs_info->last_reloc_trans = trans->transid;
     188             : 
     189     1225339 :         list_for_each_entry_safe(root, tmp, &cur_trans->switch_commits,
     190             :                                  dirty_list) {
     191     1018991 :                 list_del_init(&root->dirty_list);
     192     1018991 :                 free_extent_buffer(root->commit_root);
     193     1018991 :                 root->commit_root = btrfs_root_node(root);
     194     1018991 :                 extent_io_tree_release(&root->dirty_log_pages);
     195     1018991 :                 btrfs_qgroup_clean_swapped_blocks(root);
     196             :         }
     197             : 
     198             :         /* We can free old roots now. */
     199      206348 :         spin_lock(&cur_trans->dropped_roots_lock);
     200      206467 :         while (!list_empty(&cur_trans->dropped_roots)) {
     201         119 :                 root = list_first_entry(&cur_trans->dropped_roots,
     202             :                                         struct btrfs_root, root_list);
     203         119 :                 list_del_init(&root->root_list);
     204         119 :                 spin_unlock(&cur_trans->dropped_roots_lock);
     205         119 :                 btrfs_free_log(trans, root);
     206         119 :                 btrfs_drop_and_free_fs_root(fs_info, root);
     207         119 :                 spin_lock(&cur_trans->dropped_roots_lock);
     208             :         }
     209      206348 :         spin_unlock(&cur_trans->dropped_roots_lock);
     210             : 
     211      206348 :         up_write(&fs_info->commit_root_sem);
     212      206348 : }
     213             : 
     214    53318667 : static inline void extwriter_counter_inc(struct btrfs_transaction *trans,
     215             :                                          unsigned int type)
     216             : {
     217    53318667 :         if (type & TRANS_EXTWRITERS)
     218    27886600 :                 atomic_inc(&trans->num_extwriters);
     219    53318667 : }
     220             : 
     221    53511296 : static inline void extwriter_counter_dec(struct btrfs_transaction *trans,
     222             :                                          unsigned int type)
     223             : {
     224    53511296 :         if (type & TRANS_EXTWRITERS)
     225    27949899 :                 atomic_dec(&trans->num_extwriters);
     226    53511553 : }
     227             : 
     228             : static inline void extwriter_counter_init(struct btrfs_transaction *trans,
     229             :                                           unsigned int type)
     230             : {
     231      206186 :         atomic_set(&trans->num_extwriters, ((type & TRANS_EXTWRITERS) ? 1 : 0));
     232             : }
     233             : 
     234             : static inline int extwriter_counter_read(struct btrfs_transaction *trans)
     235             : {
     236      222787 :         return atomic_read(&trans->num_extwriters);
     237             : }
     238             : 
     239             : /*
     240             :  * To be called after doing the chunk btree updates right after allocating a new
     241             :  * chunk (after btrfs_chunk_alloc_add_chunk_item() is called), when removing a
     242             :  * chunk after all chunk btree updates and after finishing the second phase of
     243             :  * chunk allocation (btrfs_create_pending_block_groups()) in case some block
     244             :  * group had its chunk item insertion delayed to the second phase.
     245             :  */
     246   110159794 : void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
     247             : {
     248   110159794 :         struct btrfs_fs_info *fs_info = trans->fs_info;
     249             : 
     250   110159794 :         if (!trans->chunk_bytes_reserved)
     251             :                 return;
     252             : 
     253        2120 :         btrfs_block_rsv_release(fs_info, &fs_info->chunk_block_rsv,
     254             :                                 trans->chunk_bytes_reserved, NULL);
     255        2120 :         trans->chunk_bytes_reserved = 0;
     256             : }
     257             : 
     258             : /*
     259             :  * either allocate a new transaction or hop into the existing one
     260             :  */
     261    53524323 : static noinline int join_transaction(struct btrfs_fs_info *fs_info,
     262             :                                      unsigned int type)
     263             : {
     264    53524323 :         struct btrfs_transaction *cur_trans;
     265             : 
     266    53524323 :         spin_lock(&fs_info->trans_lock);
     267    53553280 : loop:
     268             :         /* The file system has been taken offline. No new transactions. */
     269    53553280 :         if (BTRFS_FS_ERROR(fs_info)) {
     270           0 :                 spin_unlock(&fs_info->trans_lock);
     271           0 :                 return -EROFS;
     272             :         }
     273             : 
     274    53553280 :         cur_trans = fs_info->running_transaction;
     275    53553280 :         if (cur_trans) {
     276    53332682 :                 if (TRANS_ABORTED(cur_trans)) {
     277           0 :                         spin_unlock(&fs_info->trans_lock);
     278           0 :                         return cur_trans->aborted;
     279             :                 }
     280    53332682 :                 if (btrfs_blocked_trans_types[cur_trans->state] & type) {
     281       14016 :                         spin_unlock(&fs_info->trans_lock);
     282       14016 :                         return -EBUSY;
     283             :                 }
     284    53318666 :                 refcount_inc(&cur_trans->use_count);
     285    53318669 :                 atomic_inc(&cur_trans->num_writers);
     286    53318669 :                 extwriter_counter_inc(cur_trans, type);
     287    53318667 :                 spin_unlock(&fs_info->trans_lock);
     288    53318667 :                 btrfs_lockdep_acquire(fs_info, btrfs_trans_num_writers);
     289    53318667 :                 btrfs_lockdep_acquire(fs_info, btrfs_trans_num_extwriters);
     290    53318667 :                 return 0;
     291             :         }
     292      220598 :         spin_unlock(&fs_info->trans_lock);
     293             : 
     294             :         /*
     295             :          * If we are ATTACH, we just want to catch the current transaction,
     296             :          * and commit it. If there is no transaction, just return ENOENT.
     297             :          */
     298      220576 :         if (type == TRANS_ATTACH)
     299             :                 return -ENOENT;
     300             : 
     301             :         /*
     302             :          * JOIN_NOLOCK only happens during the transaction commit, so
     303             :          * it is impossible that ->running_transaction is NULL
     304             :          */
     305      207619 :         BUG_ON(type == TRANS_JOIN_NOLOCK);
     306             : 
     307      207619 :         cur_trans = kmalloc(sizeof(*cur_trans), GFP_NOFS);
     308      207589 :         if (!cur_trans)
     309             :                 return -ENOMEM;
     310             : 
     311      207589 :         btrfs_lockdep_acquire(fs_info, btrfs_trans_num_writers);
     312      207589 :         btrfs_lockdep_acquire(fs_info, btrfs_trans_num_extwriters);
     313             : 
     314      207589 :         spin_lock(&fs_info->trans_lock);
     315      207635 :         if (fs_info->running_transaction) {
     316             :                 /*
     317             :                  * someone started a transaction after we unlocked.  Make sure
     318             :                  * to redo the checks above
     319             :                  */
     320        1449 :                 btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters);
     321        1449 :                 btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
     322        1449 :                 kfree(cur_trans);
     323        1449 :                 goto loop;
     324      206186 :         } else if (BTRFS_FS_ERROR(fs_info)) {
     325           0 :                 spin_unlock(&fs_info->trans_lock);
     326           0 :                 btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters);
     327           0 :                 btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
     328           0 :                 kfree(cur_trans);
     329           0 :                 return -EROFS;
     330             :         }
     331             : 
     332      206186 :         cur_trans->fs_info = fs_info;
     333      206186 :         atomic_set(&cur_trans->pending_ordered, 0);
     334      206186 :         init_waitqueue_head(&cur_trans->pending_wait);
     335      206186 :         atomic_set(&cur_trans->num_writers, 1);
     336      206186 :         extwriter_counter_init(cur_trans, type);
     337      206186 :         init_waitqueue_head(&cur_trans->writer_wait);
     338      206186 :         init_waitqueue_head(&cur_trans->commit_wait);
     339      206186 :         cur_trans->state = TRANS_STATE_RUNNING;
     340             :         /*
     341             :          * One for this trans handle, one so it will live on until we
     342             :          * commit the transaction.
     343             :          */
     344      206186 :         refcount_set(&cur_trans->use_count, 2);
     345      206186 :         cur_trans->flags = 0;
     346      206186 :         cur_trans->start_time = ktime_get_seconds();
     347             : 
     348      206186 :         memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs));
     349             : 
     350      206186 :         cur_trans->delayed_refs.href_root = RB_ROOT_CACHED;
     351      206186 :         cur_trans->delayed_refs.dirty_extent_root = RB_ROOT;
     352      206186 :         atomic_set(&cur_trans->delayed_refs.num_entries, 0);
     353             : 
     354             :         /*
     355             :          * although the tree mod log is per file system and not per transaction,
     356             :          * the log must never go across transaction boundaries.
     357             :          */
     358      206186 :         smp_mb();
     359      206186 :         if (!list_empty(&fs_info->tree_mod_seq_list))
     360           0 :                 WARN(1, KERN_ERR "BTRFS: tree_mod_seq_list not empty when creating a fresh transaction\n");
     361      206186 :         if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log))
     362           0 :                 WARN(1, KERN_ERR "BTRFS: tree_mod_log rb tree not empty when creating a fresh transaction\n");
     363      206186 :         atomic64_set(&fs_info->tree_mod_seq, 0);
     364             : 
     365      206186 :         spin_lock_init(&cur_trans->delayed_refs.lock);
     366             : 
     367      206186 :         INIT_LIST_HEAD(&cur_trans->pending_snapshots);
     368      206186 :         INIT_LIST_HEAD(&cur_trans->dev_update_list);
     369      206186 :         INIT_LIST_HEAD(&cur_trans->switch_commits);
     370      206186 :         INIT_LIST_HEAD(&cur_trans->dirty_bgs);
     371      206186 :         INIT_LIST_HEAD(&cur_trans->io_bgs);
     372      206186 :         INIT_LIST_HEAD(&cur_trans->dropped_roots);
     373      206186 :         mutex_init(&cur_trans->cache_write_mutex);
     374      206186 :         spin_lock_init(&cur_trans->dirty_bgs_lock);
     375      206186 :         INIT_LIST_HEAD(&cur_trans->deleted_bgs);
     376      206186 :         spin_lock_init(&cur_trans->dropped_roots_lock);
     377      206186 :         list_add_tail(&cur_trans->list, &fs_info->trans_list);
     378      206186 :         extent_io_tree_init(fs_info, &cur_trans->dirty_pages,
     379             :                         IO_TREE_TRANS_DIRTY_PAGES);
     380      206186 :         extent_io_tree_init(fs_info, &cur_trans->pinned_extents,
     381             :                         IO_TREE_FS_PINNED_EXTENTS);
     382      206186 :         fs_info->generation++;
     383      206186 :         cur_trans->transid = fs_info->generation;
     384      206186 :         fs_info->running_transaction = cur_trans;
     385      206186 :         cur_trans->aborted = 0;
     386      206186 :         spin_unlock(&fs_info->trans_lock);
     387             : 
     388      206186 :         return 0;
     389             : }
     390             : 
     391             : /*
     392             :  * This does all the record keeping required to make sure that a shareable root
     393             :  * is properly recorded in a given transaction.  This is required to make sure
     394             :  * the old root from before we joined the transaction is deleted when the
     395             :  * transaction commits.
     396             :  */
     397      159168 : static int record_root_in_trans(struct btrfs_trans_handle *trans,
     398             :                                struct btrfs_root *root,
     399             :                                int force)
     400             : {
     401      159168 :         struct btrfs_fs_info *fs_info = root->fs_info;
     402      159168 :         int ret = 0;
     403             : 
     404      159168 :         if ((test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
     405      159168 :             root->last_trans < trans->transid) || force) {
     406      310678 :                 WARN_ON(!force && root->commit_root != root->node);
     407             : 
     408             :                 /*
     409             :                  * see below for IN_TRANS_SETUP usage rules
     410             :                  * we have the reloc mutex held now, so there
     411             :                  * is only one writer in this function
     412             :                  */
     413      155339 :                 set_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state);
     414             : 
     415             :                 /* make sure readers find IN_TRANS_SETUP before
     416             :                  * they find our root->last_trans update
     417             :                  */
     418      155339 :                 smp_wmb();
     419             : 
     420      155339 :                 spin_lock(&fs_info->fs_roots_radix_lock);
     421      155339 :                 if (root->last_trans == trans->transid && !force) {
     422           0 :                         spin_unlock(&fs_info->fs_roots_radix_lock);
     423           0 :                         return 0;
     424             :                 }
     425      155339 :                 radix_tree_tag_set(&fs_info->fs_roots_radix,
     426      155339 :                                    (unsigned long)root->root_key.objectid,
     427             :                                    BTRFS_ROOT_TRANS_TAG);
     428      155339 :                 spin_unlock(&fs_info->fs_roots_radix_lock);
     429      155339 :                 root->last_trans = trans->transid;
     430             : 
     431             :                 /* this is pretty tricky.  We don't want to
     432             :                  * take the relocation lock in btrfs_record_root_in_trans
     433             :                  * unless we're really doing the first setup for this root in
     434             :                  * this transaction.
     435             :                  *
     436             :                  * Normally we'd use root->last_trans as a flag to decide
     437             :                  * if we want to take the expensive mutex.
     438             :                  *
     439             :                  * But, we have to set root->last_trans before we
     440             :                  * init the relocation root, otherwise, we trip over warnings
     441             :                  * in ctree.c.  The solution used here is to flag ourselves
     442             :                  * with root IN_TRANS_SETUP.  When this is 1, we're still
     443             :                  * fixing up the reloc trees and everyone must wait.
     444             :                  *
     445             :                  * When this is zero, they can trust root->last_trans and fly
     446             :                  * through btrfs_record_root_in_trans without having to take the
     447             :                  * lock.  smp_wmb() makes sure that all the writes above are
     448             :                  * done before we pop in the zero below
     449             :                  */
     450      155339 :                 ret = btrfs_init_reloc_root(trans, root);
     451      155339 :                 smp_mb__before_atomic();
     452      155339 :                 clear_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state);
     453             :         }
     454             :         return ret;
     455             : }
     456             : 
     457             : 
     458         119 : void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
     459             :                             struct btrfs_root *root)
     460             : {
     461         119 :         struct btrfs_fs_info *fs_info = root->fs_info;
     462         119 :         struct btrfs_transaction *cur_trans = trans->transaction;
     463             : 
     464             :         /* Add ourselves to the transaction dropped list */
     465         119 :         spin_lock(&cur_trans->dropped_roots_lock);
     466         119 :         list_add_tail(&root->root_list, &cur_trans->dropped_roots);
     467         119 :         spin_unlock(&cur_trans->dropped_roots_lock);
     468             : 
     469             :         /* Make sure we don't try to update the root at commit time */
     470         119 :         spin_lock(&fs_info->fs_roots_radix_lock);
     471         119 :         radix_tree_tag_clear(&fs_info->fs_roots_radix,
     472         119 :                              (unsigned long)root->root_key.objectid,
     473             :                              BTRFS_ROOT_TRANS_TAG);
     474         119 :         spin_unlock(&fs_info->fs_roots_radix_lock);
     475         119 : }
     476             : 
     477    53543874 : int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
     478             :                                struct btrfs_root *root)
     479             : {
     480    53543874 :         struct btrfs_fs_info *fs_info = root->fs_info;
     481    53543874 :         int ret;
     482             : 
     483    53543874 :         if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
     484             :                 return 0;
     485             : 
     486             :         /*
     487             :          * see record_root_in_trans for comments about IN_TRANS_SETUP usage
     488             :          * and barriers
     489             :          */
     490    46012854 :         smp_rmb();
     491    46012736 :         if (root->last_trans == trans->transid &&
     492           0 :             !test_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state))
     493             :                 return 0;
     494             : 
     495      156808 :         mutex_lock(&fs_info->reloc_mutex);
     496      156768 :         ret = record_root_in_trans(trans, root, 0);
     497      156768 :         mutex_unlock(&fs_info->reloc_mutex);
     498             : 
     499      156768 :         return ret;
     500             : }
     501             : 
     502             : static inline int is_transaction_blocked(struct btrfs_transaction *trans)
     503             : {
     504    29157071 :         return (trans->state >= TRANS_STATE_COMMIT_START &&
     505    29156863 :                 trans->state < TRANS_STATE_UNBLOCKED &&
     506       93972 :                 !TRANS_ABORTED(trans));
     507             : }
     508             : 
     509             : /* wait for commit against the current transaction to become unblocked
     510             :  * when this is done, it is safe to start a new transaction, but the current
     511             :  * transaction might not be fully on disk.
     512             :  */
     513    29210608 : static void wait_current_trans(struct btrfs_fs_info *fs_info)
     514             : {
     515    29210608 :         struct btrfs_transaction *cur_trans;
     516             : 
     517    29210608 :         spin_lock(&fs_info->trans_lock);
     518    29212485 :         cur_trans = fs_info->running_transaction;
     519    29212485 :         if (cur_trans && is_transaction_blocked(cur_trans)) {
     520       93971 :                 refcount_inc(&cur_trans->use_count);
     521       93971 :                 spin_unlock(&fs_info->trans_lock);
     522             : 
     523       93971 :                 btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
     524      187697 :                 wait_event(fs_info->transaction_wait,
     525             :                            cur_trans->state >= TRANS_STATE_UNBLOCKED ||
     526             :                            TRANS_ABORTED(cur_trans));
     527       93970 :                 btrfs_put_transaction(cur_trans);
     528             :         } else {
     529    29118514 :                 spin_unlock(&fs_info->trans_lock);
     530             :         }
     531    29212313 : }
     532             : 
     533    53602433 : static int may_wait_transaction(struct btrfs_fs_info *fs_info, int type)
     534             : {
     535   107204866 :         if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
     536             :                 return 0;
     537             : 
     538    53602148 :         if (type == TRANS_START)
     539    27931506 :                 return 1;
     540             : 
     541             :         return 0;
     542             : }
     543             : 
     544    25556688 : static inline bool need_reserve_reloc_root(struct btrfs_root *root)
     545             : {
     546    25556688 :         struct btrfs_fs_info *fs_info = root->fs_info;
     547             : 
     548    25594076 :         if (!fs_info->reloc_ctl ||
     549       37388 :             !test_bit(BTRFS_ROOT_SHAREABLE, &root->state) ||
     550       15283 :             root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
     551       15283 :             root->reloc_root)
     552    25555009 :                 return false;
     553             : 
     554             :         return true;
     555             : }
     556             : 
     557             : static struct btrfs_trans_handle *
     558    53517750 : start_transaction(struct btrfs_root *root, unsigned int num_items,
     559             :                   unsigned int type, enum btrfs_reserve_flush_enum flush,
     560             :                   bool enforce_qgroups)
     561             : {
     562    53517750 :         struct btrfs_fs_info *fs_info = root->fs_info;
     563    53517750 :         struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
     564    53517750 :         struct btrfs_trans_handle *h;
     565    53517750 :         struct btrfs_transaction *cur_trans;
     566    53517750 :         u64 num_bytes = 0;
     567    53517750 :         u64 qgroup_reserved = 0;
     568    53517750 :         bool reloc_reserved = false;
     569    53517750 :         bool do_chunk_alloc = false;
     570    53517750 :         int ret;
     571             : 
     572    53517750 :         if (BTRFS_FS_ERROR(fs_info))
     573             :                 return ERR_PTR(-EROFS);
     574             : 
     575    53517750 :         if (current->journal_info) {
     576           0 :                 WARN_ON(type & TRANS_EXTWRITERS);
     577           0 :                 h = current->journal_info;
     578           0 :                 refcount_inc(&h->use_count);
     579           0 :                 WARN_ON(refcount_read(&h->use_count) > 2);
     580           0 :                 h->orig_rsv = h->block_rsv;
     581           0 :                 h->block_rsv = NULL;
     582           0 :                 goto got_it;
     583             :         }
     584             : 
     585             :         /*
     586             :          * Do the reservation before we join the transaction so we can do all
     587             :          * the appropriate flushing if need be.
     588             :          */
     589    53517750 :         if (num_items && root != fs_info->chunk_root) {
     590    25566384 :                 struct btrfs_block_rsv *rsv = &fs_info->trans_block_rsv;
     591    25566384 :                 u64 delayed_refs_bytes = 0;
     592             : 
     593    25566384 :                 qgroup_reserved = num_items * fs_info->nodesize;
     594    25566384 :                 ret = btrfs_qgroup_reserve_meta_pertrans(root, qgroup_reserved,
     595             :                                 enforce_qgroups);
     596    25563778 :                 if (ret)
     597       12160 :                         return ERR_PTR(ret);
     598             : 
     599             :                 /*
     600             :                  * We want to reserve all the bytes we may need all at once, so
     601             :                  * we only do 1 enospc flushing cycle per transaction start.  We
     602             :                  * accomplish this by simply assuming we'll do num_items worth
     603             :                  * of delayed refs updates in this trans handle, and refill that
     604             :                  * amount for whatever is missing in the reserve.
     605             :                  */
     606    25551618 :                 num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items);
     607    25551618 :                 if (flush == BTRFS_RESERVE_FLUSH_ALL &&
     608             :                     !btrfs_block_rsv_full(delayed_refs_rsv)) {
     609     1413328 :                         delayed_refs_bytes = btrfs_calc_delayed_ref_bytes(fs_info,
     610             :                                                                           num_items);
     611     1413328 :                         num_bytes += delayed_refs_bytes;
     612             :                 }
     613             : 
     614             :                 /*
     615             :                  * Do the reservation for the relocation root creation
     616             :                  */
     617    25551618 :                 if (need_reserve_reloc_root(root)) {
     618        1679 :                         num_bytes += fs_info->nodesize;
     619        1679 :                         reloc_reserved = true;
     620             :                 }
     621             : 
     622    25546226 :                 ret = btrfs_block_rsv_add(fs_info, rsv, num_bytes, flush);
     623    25593449 :                 if (ret)
     624       22910 :                         goto reserve_fail;
     625    25570539 :                 if (delayed_refs_bytes) {
     626     1393572 :                         btrfs_migrate_to_delayed_refs_rsv(fs_info, rsv,
     627             :                                                           delayed_refs_bytes);
     628     1393551 :                         num_bytes -= delayed_refs_bytes;
     629             :                 }
     630             : 
     631    25570518 :                 if (rsv->space_info->force_alloc)
     632           0 :                         do_chunk_alloc = true;
     633    27951366 :         } else if (num_items == 0 && flush == BTRFS_RESERVE_FLUSH_ALL &&
     634             :                    !btrfs_block_rsv_full(delayed_refs_rsv)) {
     635             :                 /*
     636             :                  * Some people call with btrfs_start_transaction(root, 0)
     637             :                  * because they can be throttled, but have some other mechanism
     638             :                  * for reserving space.  We still want these guys to refill the
     639             :                  * delayed block_rsv so just add 1 items worth of reservation
     640             :                  * here.
     641             :                  */
     642     2124147 :                 ret = btrfs_delayed_refs_rsv_refill(fs_info, flush);
     643     2124155 :                 if (ret)
     644           0 :                         goto reserve_fail;
     645             :         }
     646    53521892 : again:
     647    53521908 :         h = kmem_cache_zalloc(btrfs_trans_handle_cachep, GFP_NOFS);
     648    53520736 :         if (!h) {
     649           0 :                 ret = -ENOMEM;
     650           0 :                 goto alloc_fail;
     651             :         }
     652             : 
     653             :         /*
     654             :          * If we are JOIN_NOLOCK we're already committing a transaction and
     655             :          * waiting on this guy, so we don't need to do the sb_start_intwrite
     656             :          * because we're already holding a ref.  We need this because we could
     657             :          * have raced in and did an fsync() on a file which can kick a commit
     658             :          * and then we deadlock with somebody doing a freeze.
     659             :          *
     660             :          * If we are ATTACH, it means we just want to catch the current
     661             :          * transaction and commit it, so we needn't do sb_start_intwrite(). 
     662             :          */
     663    53520736 :         if (type & __TRANS_FREEZABLE)
     664    52337271 :                 sb_start_intwrite(fs_info->sb);
     665             : 
     666    53519346 :         if (may_wait_transaction(fs_info, type))
     667    27931273 :                 wait_current_trans(fs_info);
     668             : 
     669    53526705 :         do {
     670    53526705 :                 ret = join_transaction(fs_info, type);
     671    53551768 :                 if (ret == -EBUSY) {
     672       14015 :                         wait_current_trans(fs_info);
     673       14010 :                         if (unlikely(type == TRANS_ATTACH ||
     674             :                                      type == TRANS_JOIN_NOSTART))
     675             :                                 ret = -ENOENT;
     676             :                 }
     677    53550457 :         } while (ret == -EBUSY);
     678             : 
     679    53539057 :         if (ret < 0)
     680       14258 :                 goto join_fail;
     681             : 
     682    53524799 :         cur_trans = fs_info->running_transaction;
     683             : 
     684    53524799 :         h->transid = cur_trans->transid;
     685    53524799 :         h->transaction = cur_trans;
     686    53524799 :         refcount_set(&h->use_count, 1);
     687    53524799 :         h->fs_info = root->fs_info;
     688             : 
     689    53524799 :         h->type = type;
     690    53524799 :         INIT_LIST_HEAD(&h->new_bgs);
     691             : 
     692    53524799 :         smp_mb();
     693    53611409 :         if (cur_trans->state >= TRANS_STATE_COMMIT_START &&
     694       86694 :             may_wait_transaction(fs_info, type)) {
     695          16 :                 current->journal_info = h;
     696          16 :                 btrfs_commit_transaction(h);
     697          16 :                 goto again;
     698             :         }
     699             : 
     700    53524699 :         if (num_bytes) {
     701    25570675 :                 trace_btrfs_space_reservation(fs_info, "transaction",
     702             :                                               h->transid, num_bytes, 1);
     703    25570629 :                 h->block_rsv = &fs_info->trans_block_rsv;
     704    25570629 :                 h->bytes_reserved = num_bytes;
     705    25570629 :                 h->reloc_reserved = reloc_reserved;
     706             :         }
     707             : 
     708    27954024 : got_it:
     709    53524653 :         if (!current->journal_info)
     710    53524526 :                 current->journal_info = h;
     711             : 
     712             :         /*
     713             :          * If the space_info is marked ALLOC_FORCE then we'll get upgraded to
     714             :          * ALLOC_FORCE the first run through, and then we won't allocate for
     715             :          * anybody else who races in later.  We don't care about the return
     716             :          * value here.
     717             :          */
     718    53524653 :         if (do_chunk_alloc && num_bytes) {
     719           0 :                 u64 flags = h->block_rsv->space_info->flags;
     720             : 
     721           0 :                 btrfs_chunk_alloc(h, btrfs_get_alloc_profile(fs_info, flags),
     722             :                                   CHUNK_ALLOC_NO_FORCE);
     723             :         }
     724             : 
     725             :         /*
     726             :          * btrfs_record_root_in_trans() needs to alloc new extents, and may
     727             :          * call btrfs_join_transaction() while we're also starting a
     728             :          * transaction.
     729             :          *
     730             :          * Thus it need to be called after current->journal_info initialized,
     731             :          * or we can deadlock.
     732             :          */
     733    53524653 :         ret = btrfs_record_root_in_trans(h, root);
     734    53523373 :         if (ret) {
     735             :                 /*
     736             :                  * The transaction handle is fully initialized and linked with
     737             :                  * other structures so it needs to be ended in case of errors,
     738             :                  * not just freed.
     739             :                  */
     740           0 :                 btrfs_end_transaction(h);
     741           0 :                 return ERR_PTR(ret);
     742             :         }
     743             : 
     744             :         return h;
     745             : 
     746             : join_fail:
     747       14258 :         if (type & __TRANS_FREEZABLE)
     748           0 :                 sb_end_intwrite(fs_info->sb);
     749       14258 :         kmem_cache_free(btrfs_trans_handle_cachep, h);
     750       14258 : alloc_fail:
     751       14258 :         if (num_bytes)
     752           0 :                 btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv,
     753             :                                         num_bytes, NULL);
     754       14258 : reserve_fail:
     755       37168 :         btrfs_qgroup_free_meta_pertrans(root, qgroup_reserved);
     756       37156 :         return ERR_PTR(ret);
     757             : }
     758             : 
     759    26553244 : struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
     760             :                                                    unsigned int num_items)
     761             : {
     762    26553244 :         return start_transaction(root, num_items, TRANS_START,
     763             :                                  BTRFS_RESERVE_FLUSH_ALL, true);
     764             : }
     765             : 
     766     1386995 : struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
     767             :                                         struct btrfs_root *root,
     768             :                                         unsigned int num_items)
     769             : {
     770     1386995 :         return start_transaction(root, num_items, TRANS_START,
     771             :                                  BTRFS_RESERVE_FLUSH_ALL_STEAL, false);
     772             : }
     773             : 
     774    24406197 : struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
     775             : {
     776    24406197 :         return start_transaction(root, 0, TRANS_JOIN, BTRFS_RESERVE_NO_FLUSH,
     777             :                                  true);
     778             : }
     779             : 
     780          41 : struct btrfs_trans_handle *btrfs_join_transaction_spacecache(struct btrfs_root *root)
     781             : {
     782          41 :         return start_transaction(root, 0, TRANS_JOIN_NOLOCK,
     783             :                                  BTRFS_RESERVE_NO_FLUSH, true);
     784             : }
     785             : 
     786             : /*
     787             :  * Similar to regular join but it never starts a transaction when none is
     788             :  * running or after waiting for the current one to finish.
     789             :  */
     790     1150843 : struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root)
     791             : {
     792     1150843 :         return start_transaction(root, 0, TRANS_JOIN_NOSTART,
     793             :                                  BTRFS_RESERVE_NO_FLUSH, true);
     794             : }
     795             : 
     796             : /*
     797             :  * btrfs_attach_transaction() - catch the running transaction
     798             :  *
     799             :  * It is used when we want to commit the current the transaction, but
     800             :  * don't want to start a new one.
     801             :  *
     802             :  * Note: If this function return -ENOENT, it just means there is no
     803             :  * running transaction. But it is possible that the inactive transaction
     804             :  * is still in the memory, not fully on disk. If you hope there is no
     805             :  * inactive transaction in the fs when -ENOENT is returned, you should
     806             :  * invoke
     807             :  *     btrfs_attach_transaction_barrier()
     808             :  */
     809         944 : struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root)
     810             : {
     811         944 :         return start_transaction(root, 0, TRANS_ATTACH,
     812             :                                  BTRFS_RESERVE_NO_FLUSH, true);
     813             : }
     814             : 
     815             : /*
     816             :  * btrfs_attach_transaction_barrier() - catch the running transaction
     817             :  *
     818             :  * It is similar to the above function, the difference is this one
     819             :  * will wait for all the inactive transactions until they fully
     820             :  * complete.
     821             :  */
     822             : struct btrfs_trans_handle *
     823       30724 : btrfs_attach_transaction_barrier(struct btrfs_root *root)
     824             : {
     825       30724 :         struct btrfs_trans_handle *trans;
     826             : 
     827       30724 :         trans = start_transaction(root, 0, TRANS_ATTACH,
     828             :                                   BTRFS_RESERVE_NO_FLUSH, true);
     829       30689 :         if (trans == ERR_PTR(-ENOENT)) {
     830       14115 :                 int ret;
     831             : 
     832       14115 :                 ret = btrfs_wait_for_commit(root->fs_info, 0);
     833       14131 :                 if (ret)
     834           0 :                         return ERR_PTR(ret);
     835             :         }
     836             : 
     837             :         return trans;
     838             : }
     839             : 
     840             : /* Wait for a transaction commit to reach at least the given state. */
     841        8102 : static noinline void wait_for_commit(struct btrfs_transaction *commit,
     842             :                                      const enum btrfs_trans_state min_state)
     843             : {
     844        8102 :         struct btrfs_fs_info *fs_info = commit->fs_info;
     845        8102 :         u64 transid = commit->transid;
     846        8102 :         bool put = false;
     847             : 
     848             :         /*
     849             :          * At the moment this function is called with min_state either being
     850             :          * TRANS_STATE_COMPLETED or TRANS_STATE_SUPER_COMMITTED.
     851             :          */
     852        8102 :         if (min_state == TRANS_STATE_COMPLETED)
     853             :                 btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED);
     854             :         else
     855    30998670 :                 btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
     856             : 
     857    30998670 :         while (1) {
     858    31011990 :                 wait_event(commit->commit_wait, commit->state >= min_state);
     859    30997921 :                 if (put)
     860    30990045 :                         btrfs_put_transaction(commit);
     861             : 
     862    30924132 :                 if (min_state < TRANS_STATE_COMPLETED)
     863             :                         break;
     864             : 
     865             :                 /*
     866             :                  * A transaction isn't really completed until all of the
     867             :                  * previous transactions are completed, but with fsync we can
     868             :                  * end up with SUPER_COMMITTED transactions before a COMPLETED
     869             :                  * transaction. Wait for those.
     870             :                  */
     871             : 
     872    30923967 :                 spin_lock(&fs_info->trans_lock);
     873    31005829 :                 commit = list_first_entry_or_null(&fs_info->trans_list,
     874             :                                                   struct btrfs_transaction,
     875             :                                                   list);
     876    31005341 :                 if (!commit || commit->transid > transid) {
     877        7941 :                         spin_unlock(&fs_info->trans_lock);
     878             :                         break;
     879             :                 }
     880    30997888 :                 refcount_inc(&commit->use_count);
     881    30997888 :                 put = true;
     882    30997888 :                 spin_unlock(&fs_info->trans_lock);
     883             :         }
     884        8106 : }
     885             : 
     886       14110 : int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid)
     887             : {
     888       14110 :         struct btrfs_transaction *cur_trans = NULL, *t;
     889       14110 :         int ret = 0;
     890             : 
     891       14110 :         if (transid) {
     892           5 :                 if (transid <= fs_info->last_trans_committed)
     893           0 :                         goto out;
     894             : 
     895             :                 /* find specified transaction */
     896           5 :                 spin_lock(&fs_info->trans_lock);
     897           5 :                 list_for_each_entry(t, &fs_info->trans_list, list) {
     898           5 :                         if (t->transid == transid) {
     899           5 :                                 cur_trans = t;
     900           5 :                                 refcount_inc(&cur_trans->use_count);
     901           5 :                                 ret = 0;
     902           5 :                                 break;
     903             :                         }
     904           0 :                         if (t->transid > transid) {
     905             :                                 ret = 0;
     906             :                                 break;
     907             :                         }
     908             :                 }
     909           5 :                 spin_unlock(&fs_info->trans_lock);
     910             : 
     911             :                 /*
     912             :                  * The specified transaction doesn't exist, or we
     913             :                  * raced with btrfs_commit_transaction
     914             :                  */
     915           5 :                 if (!cur_trans) {
     916           0 :                         if (transid > fs_info->last_trans_committed)
     917           0 :                                 ret = -EINVAL;
     918           0 :                         goto out;
     919             :                 }
     920             :         } else {
     921             :                 /* find newest transaction that is committing | committed */
     922       14105 :                 spin_lock(&fs_info->trans_lock);
     923       14804 :                 list_for_each_entry_reverse(t, &fs_info->trans_list,
     924             :                                             list) {
     925        2437 :                         if (t->state >= TRANS_STATE_COMMIT_START) {
     926        1770 :                                 if (t->state == TRANS_STATE_COMPLETED)
     927             :                                         break;
     928        1770 :                                 cur_trans = t;
     929        1770 :                                 refcount_inc(&cur_trans->use_count);
     930             :                                 break;
     931             :                         }
     932             :                 }
     933       14137 :                 spin_unlock(&fs_info->trans_lock);
     934       14136 :                 if (!cur_trans)
     935       12366 :                         goto out;  /* nothing committing|committed */
     936             :         }
     937             : 
     938        1775 :         wait_for_commit(cur_trans, TRANS_STATE_COMPLETED);
     939        1772 :         ret = cur_trans->aborted;
     940        1772 :         btrfs_put_transaction(cur_trans);
     941       14140 : out:
     942       14140 :         return ret;
     943             : }
     944             : 
     945     1265523 : void btrfs_throttle(struct btrfs_fs_info *fs_info)
     946             : {
     947     1265523 :         wait_current_trans(fs_info);
     948     1265523 : }
     949             : 
     950     1980269 : bool btrfs_should_end_transaction(struct btrfs_trans_handle *trans)
     951             : {
     952     1980269 :         struct btrfs_transaction *cur_trans = trans->transaction;
     953             : 
     954     3960533 :         if (cur_trans->state >= TRANS_STATE_COMMIT_START ||
     955     1980264 :             test_bit(BTRFS_DELAYED_REFS_FLUSHING, &cur_trans->delayed_refs.flags))
     956             :                 return true;
     957             : 
     958     1979861 :         if (btrfs_check_space_for_delayed_refs(trans->fs_info))
     959             :                 return true;
     960             : 
     961        2906 :         return !!btrfs_block_rsv_check(&trans->fs_info->global_block_rsv, 50);
     962             : }
     963             : 
     964    53519713 : static void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans)
     965             : 
     966             : {
     967    53519713 :         struct btrfs_fs_info *fs_info = trans->fs_info;
     968             : 
     969    53519713 :         if (!trans->block_rsv) {
     970             :                 ASSERT(!trans->bytes_reserved);
     971             :                 return;
     972             :         }
     973             : 
     974    35954561 :         if (!trans->bytes_reserved)
     975             :                 return;
     976             : 
     977    29140625 :         ASSERT(trans->block_rsv == &fs_info->trans_block_rsv);
     978    29140625 :         trace_btrfs_space_reservation(fs_info, "transaction",
     979             :                                       trans->transid, trans->bytes_reserved, 0);
     980    29140143 :         btrfs_block_rsv_release(fs_info, trans->block_rsv,
     981             :                                 trans->bytes_reserved, NULL);
     982    29142768 :         trans->bytes_reserved = 0;
     983             : }
     984             : 
     985    53309111 : static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
     986             :                                    int throttle)
     987             : {
     988    53309111 :         struct btrfs_fs_info *info = trans->fs_info;
     989    53309111 :         struct btrfs_transaction *cur_trans = trans->transaction;
     990    53309111 :         int err = 0;
     991             : 
     992    53309111 :         if (refcount_read(&trans->use_count) > 1) {
     993           0 :                 refcount_dec(&trans->use_count);
     994           0 :                 trans->block_rsv = trans->orig_rsv;
     995           0 :                 return 0;
     996             :         }
     997             : 
     998    53309111 :         btrfs_trans_release_metadata(trans);
     999    53308439 :         trans->block_rsv = NULL;
    1000             : 
    1001    53308439 :         btrfs_create_pending_block_groups(trans);
    1002             : 
    1003    53303746 :         btrfs_trans_release_chunk_metadata(trans);
    1004             : 
    1005    53302421 :         if (trans->type & __TRANS_FREEZABLE)
    1006    52149676 :                 sb_end_intwrite(info->sb);
    1007             : 
    1008    53300104 :         WARN_ON(cur_trans != info->running_transaction);
    1009    53300104 :         WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
    1010    53300104 :         atomic_dec(&cur_trans->num_writers);
    1011    53305646 :         extwriter_counter_dec(cur_trans, trans->type);
    1012             : 
    1013    53306375 :         cond_wake_up(&cur_trans->writer_wait);
    1014             : 
    1015    53311563 :         btrfs_lockdep_release(info, btrfs_trans_num_extwriters);
    1016    53311563 :         btrfs_lockdep_release(info, btrfs_trans_num_writers);
    1017             : 
    1018    53311563 :         btrfs_put_transaction(cur_trans);
    1019             : 
    1020    53304616 :         if (current->journal_info == trans)
    1021    53304829 :                 current->journal_info = NULL;
    1022             : 
    1023    53304616 :         if (throttle)
    1024     4081479 :                 btrfs_run_delayed_iputs(info);
    1025             : 
    1026    53304616 :         if (TRANS_ABORTED(trans) || BTRFS_FS_ERROR(info)) {
    1027           1 :                 wake_up_process(info->transaction_kthread);
    1028           1 :                 if (TRANS_ABORTED(trans))
    1029           1 :                         err = trans->aborted;
    1030             :                 else
    1031             :                         err = -EROFS;
    1032             :         }
    1033             : 
    1034    53304616 :         kmem_cache_free(btrfs_trans_handle_cachep, trans);
    1035    53304616 :         return err;
    1036             : }
    1037             : 
    1038    49227683 : int btrfs_end_transaction(struct btrfs_trans_handle *trans)
    1039             : {
    1040    49227683 :         return __btrfs_end_transaction(trans, 0);
    1041             : }
    1042             : 
    1043     4081479 : int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans)
    1044             : {
    1045     4081479 :         return __btrfs_end_transaction(trans, 1);
    1046             : }
    1047             : 
    1048             : /*
    1049             :  * when btree blocks are allocated, they have some corresponding bits set for
    1050             :  * them in one of two extent_io trees.  This is used to make sure all of
    1051             :  * those extents are sent to disk but does not wait on them
    1052             :  */
    1053      689524 : int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
    1054             :                                struct extent_io_tree *dirty_pages, int mark)
    1055             : {
    1056      689524 :         int err = 0;
    1057      689524 :         int werr = 0;
    1058      689524 :         struct address_space *mapping = fs_info->btree_inode->i_mapping;
    1059      689524 :         struct extent_state *cached_state = NULL;
    1060      689524 :         u64 start = 0;
    1061      689524 :         u64 end;
    1062             : 
    1063     2984565 :         while (!find_first_extent_bit(dirty_pages, start, &start, &end,
    1064             :                                       mark, &cached_state)) {
    1065     2295025 :                 bool wait_writeback = false;
    1066             : 
    1067     2295025 :                 err = convert_extent_bit(dirty_pages, start, end,
    1068             :                                          EXTENT_NEED_WAIT,
    1069             :                                          mark, &cached_state);
    1070             :                 /*
    1071             :                  * convert_extent_bit can return -ENOMEM, which is most of the
    1072             :                  * time a temporary error. So when it happens, ignore the error
    1073             :                  * and wait for writeback of this range to finish - because we
    1074             :                  * failed to set the bit EXTENT_NEED_WAIT for the range, a call
    1075             :                  * to __btrfs_wait_marked_extents() would not know that
    1076             :                  * writeback for this range started and therefore wouldn't
    1077             :                  * wait for it to finish - we don't want to commit a
    1078             :                  * superblock that points to btree nodes/leafs for which
    1079             :                  * writeback hasn't finished yet (and without errors).
    1080             :                  * We cleanup any entries left in the io tree when committing
    1081             :                  * the transaction (through extent_io_tree_release()).
    1082             :                  */
    1083     2295028 :                 if (err == -ENOMEM) {
    1084             :                         err = 0;
    1085             :                         wait_writeback = true;
    1086             :                 }
    1087     2295028 :                 if (!err)
    1088     2295028 :                         err = filemap_fdatawrite_range(mapping, start, end);
    1089     2295041 :                 if (err)
    1090             :                         werr = err;
    1091     2295041 :                 else if (wait_writeback)
    1092           0 :                         werr = filemap_fdatawait_range(mapping, start, end);
    1093     2295041 :                 free_extent_state(cached_state);
    1094     2295041 :                 cached_state = NULL;
    1095     2295041 :                 cond_resched();
    1096     2295041 :                 start = end + 1;
    1097             :         }
    1098      689573 :         return werr;
    1099             : }
    1100             : 
    1101             : /*
    1102             :  * when btree blocks are allocated, they have some corresponding bits set for
    1103             :  * them in one of two extent_io trees.  This is used to make sure all of
    1104             :  * those extents are on disk for transaction or log commit.  We wait
    1105             :  * on all the pages and clear them from the dirty pages state tree
    1106             :  */
    1107      689574 : static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info,
    1108             :                                        struct extent_io_tree *dirty_pages)
    1109             : {
    1110      689574 :         int err = 0;
    1111      689574 :         int werr = 0;
    1112      689574 :         struct address_space *mapping = fs_info->btree_inode->i_mapping;
    1113      689574 :         struct extent_state *cached_state = NULL;
    1114      689574 :         u64 start = 0;
    1115      689574 :         u64 end;
    1116             : 
    1117     2984613 :         while (!find_first_extent_bit(dirty_pages, start, &start, &end,
    1118             :                                       EXTENT_NEED_WAIT, &cached_state)) {
    1119             :                 /*
    1120             :                  * Ignore -ENOMEM errors returned by clear_extent_bit().
    1121             :                  * When committing the transaction, we'll remove any entries
    1122             :                  * left in the io tree. For a log commit, we don't remove them
    1123             :                  * after committing the log because the tree can be accessed
    1124             :                  * concurrently - we do it only at transaction commit time when
    1125             :                  * it's safe to do it (through extent_io_tree_release()).
    1126             :                  */
    1127     2295040 :                 err = clear_extent_bit(dirty_pages, start, end,
    1128             :                                        EXTENT_NEED_WAIT, &cached_state);
    1129     2295039 :                 if (err == -ENOMEM)
    1130             :                         err = 0;
    1131     2295039 :                 if (!err)
    1132     2295039 :                         err = filemap_fdatawait_range(mapping, start, end);
    1133     2295040 :                 if (err)
    1134           5 :                         werr = err;
    1135     2295040 :                 free_extent_state(cached_state);
    1136     2295039 :                 cached_state = NULL;
    1137     2295039 :                 cond_resched();
    1138     2295039 :                 start = end + 1;
    1139             :         }
    1140      689573 :         if (err)
    1141           2 :                 werr = err;
    1142      689573 :         return werr;
    1143             : }
    1144             : 
    1145      206348 : static int btrfs_wait_extents(struct btrfs_fs_info *fs_info,
    1146             :                        struct extent_io_tree *dirty_pages)
    1147             : {
    1148      206348 :         bool errors = false;
    1149      206348 :         int err;
    1150             : 
    1151      206348 :         err = __btrfs_wait_marked_extents(fs_info, dirty_pages);
    1152      206348 :         if (test_and_clear_bit(BTRFS_FS_BTREE_ERR, &fs_info->flags))
    1153           4 :                 errors = true;
    1154             : 
    1155      206348 :         if (errors && !err)
    1156           0 :                 err = -EIO;
    1157      206348 :         return err;
    1158             : }
    1159             : 
    1160      483226 : int btrfs_wait_tree_log_extents(struct btrfs_root *log_root, int mark)
    1161             : {
    1162      483226 :         struct btrfs_fs_info *fs_info = log_root->fs_info;
    1163      483226 :         struct extent_io_tree *dirty_pages = &log_root->dirty_log_pages;
    1164      483226 :         bool errors = false;
    1165      483226 :         int err;
    1166             : 
    1167      483226 :         ASSERT(log_root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
    1168             : 
    1169      483226 :         err = __btrfs_wait_marked_extents(fs_info, dirty_pages);
    1170      846540 :         if ((mark & EXTENT_DIRTY) &&
    1171      363314 :             test_and_clear_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags))
    1172           1 :                 errors = true;
    1173             : 
    1174      844020 :         if ((mark & EXTENT_NEW) &&
    1175      360794 :             test_and_clear_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags))
    1176           0 :                 errors = true;
    1177             : 
    1178      483226 :         if (errors && !err)
    1179           0 :                 err = -EIO;
    1180      483226 :         return err;
    1181             : }
    1182             : 
    1183             : /*
    1184             :  * When btree blocks are allocated the corresponding extents are marked dirty.
    1185             :  * This function ensures such extents are persisted on disk for transaction or
    1186             :  * log commit.
    1187             :  *
    1188             :  * @trans: transaction whose dirty pages we'd like to write
    1189             :  */
    1190      206348 : static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans)
    1191             : {
    1192      206348 :         int ret;
    1193      206348 :         int ret2;
    1194      206348 :         struct extent_io_tree *dirty_pages = &trans->transaction->dirty_pages;
    1195      206348 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1196      206348 :         struct blk_plug plug;
    1197             : 
    1198      206348 :         blk_start_plug(&plug);
    1199      206348 :         ret = btrfs_write_marked_extents(fs_info, dirty_pages, EXTENT_DIRTY);
    1200      206348 :         blk_finish_plug(&plug);
    1201      206348 :         ret2 = btrfs_wait_extents(fs_info, dirty_pages);
    1202             : 
    1203      206348 :         extent_io_tree_release(&trans->transaction->dirty_pages);
    1204             : 
    1205      206348 :         if (ret)
    1206             :                 return ret;
    1207      206348 :         else if (ret2)
    1208             :                 return ret2;
    1209             :         else
    1210      206344 :                 return 0;
    1211             : }
    1212             : 
    1213             : /*
    1214             :  * this is used to update the root pointer in the tree of tree roots.
    1215             :  *
    1216             :  * But, in the case of the extent allocation tree, updating the root
    1217             :  * pointer may allocate blocks which may change the root of the extent
    1218             :  * allocation tree.
    1219             :  *
    1220             :  * So, this loops and repeats and makes sure the cowonly root didn't
    1221             :  * change while the root pointer was being updated in the metadata.
    1222             :  */
    1223      459024 : static int update_cowonly_root(struct btrfs_trans_handle *trans,
    1224             :                                struct btrfs_root *root)
    1225             : {
    1226      459024 :         int ret;
    1227      459024 :         u64 old_root_bytenr;
    1228      459024 :         u64 old_root_used;
    1229      459024 :         struct btrfs_fs_info *fs_info = root->fs_info;
    1230      459024 :         struct btrfs_root *tree_root = fs_info->tree_root;
    1231             : 
    1232      459024 :         old_root_used = btrfs_root_used(&root->root_item);
    1233             : 
    1234     1377072 :         while (1) {
    1235      918048 :                 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
    1236      918048 :                 if (old_root_bytenr == root->node->start &&
    1237             :                     old_root_used == btrfs_root_used(&root->root_item))
    1238             :                         break;
    1239             : 
    1240      459024 :                 btrfs_set_root_node(&root->root_item, root->node);
    1241      459024 :                 ret = btrfs_update_root(trans, tree_root,
    1242             :                                         &root->root_key,
    1243             :                                         &root->root_item);
    1244      459024 :                 if (ret)
    1245           0 :                         return ret;
    1246             : 
    1247      459024 :                 old_root_used = btrfs_root_used(&root->root_item);
    1248             :         }
    1249             : 
    1250             :         return 0;
    1251             : }
    1252             : 
    1253             : /*
    1254             :  * update all the cowonly tree roots on disk
    1255             :  *
    1256             :  * The error handling in this function may not be obvious. Any of the
    1257             :  * failures will cause the file system to go offline. We still need
    1258             :  * to clean up the delayed refs.
    1259             :  */
    1260      206349 : static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans)
    1261             : {
    1262      206349 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1263      206349 :         struct list_head *dirty_bgs = &trans->transaction->dirty_bgs;
    1264      206349 :         struct list_head *io_bgs = &trans->transaction->io_bgs;
    1265      206349 :         struct list_head *next;
    1266      206349 :         struct extent_buffer *eb;
    1267      206349 :         int ret;
    1268             : 
    1269             :         /*
    1270             :          * At this point no one can be using this transaction to modify any tree
    1271             :          * and no one can start another transaction to modify any tree either.
    1272             :          */
    1273      206349 :         ASSERT(trans->transaction->state == TRANS_STATE_COMMIT_DOING);
    1274             : 
    1275      206349 :         eb = btrfs_lock_root_node(fs_info->tree_root);
    1276      206349 :         ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL,
    1277             :                               0, &eb, BTRFS_NESTING_COW);
    1278      206349 :         btrfs_tree_unlock(eb);
    1279      206349 :         free_extent_buffer(eb);
    1280             : 
    1281      206349 :         if (ret)
    1282             :                 return ret;
    1283             : 
    1284      206349 :         ret = btrfs_run_dev_stats(trans);
    1285      206349 :         if (ret)
    1286             :                 return ret;
    1287      206349 :         ret = btrfs_run_dev_replace(trans);
    1288      206349 :         if (ret)
    1289             :                 return ret;
    1290      206349 :         ret = btrfs_run_qgroups(trans);
    1291      206349 :         if (ret)
    1292             :                 return ret;
    1293             : 
    1294      206349 :         ret = btrfs_setup_space_cache(trans);
    1295      206349 :         if (ret)
    1296             :                 return ret;
    1297             : 
    1298      206349 : again:
    1299      727955 :         while (!list_empty(&fs_info->dirty_cowonly_roots)) {
    1300      459024 :                 struct btrfs_root *root;
    1301      459024 :                 next = fs_info->dirty_cowonly_roots.next;
    1302      459024 :                 list_del_init(next);
    1303      459024 :                 root = list_entry(next, struct btrfs_root, dirty_list);
    1304      459024 :                 clear_bit(BTRFS_ROOT_DIRTY, &root->state);
    1305             : 
    1306      459024 :                 list_add_tail(&root->dirty_list,
    1307      459024 :                               &trans->transaction->switch_commits);
    1308      459024 :                 ret = update_cowonly_root(trans, root);
    1309      459024 :                 if (ret)
    1310           0 :                         return ret;
    1311             :         }
    1312             : 
    1313             :         /* Now flush any delayed refs generated by updating all of the roots */
    1314      268931 :         ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
    1315      268931 :         if (ret)
    1316           0 :                 return ret;
    1317             : 
    1318      475670 :         while (!list_empty(dirty_bgs) || !list_empty(io_bgs)) {
    1319      206739 :                 ret = btrfs_write_dirty_block_groups(trans);
    1320      206739 :                 if (ret)
    1321           0 :                         return ret;
    1322             : 
    1323             :                 /*
    1324             :                  * We're writing the dirty block groups, which could generate
    1325             :                  * delayed refs, which could generate more dirty block groups,
    1326             :                  * so we want to keep this flushing in this loop to make sure
    1327             :                  * everything gets run.
    1328             :                  */
    1329      206739 :                 ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
    1330      206739 :                 if (ret)
    1331           0 :                         return ret;
    1332             :         }
    1333             : 
    1334      268931 :         if (!list_empty(&fs_info->dirty_cowonly_roots))
    1335       62582 :                 goto again;
    1336             : 
    1337             :         /* Update dev-replace pointer once everything is committed */
    1338      206349 :         fs_info->dev_replace.committed_cursor_left =
    1339      206349 :                 fs_info->dev_replace.cursor_left_last_write_of_item;
    1340             : 
    1341      206349 :         return 0;
    1342             : }
    1343             : 
    1344             : /*
    1345             :  * If we had a pending drop we need to see if there are any others left in our
    1346             :  * dead roots list, and if not clear our bit and wake any waiters.
    1347             :  */
    1348           0 : void btrfs_maybe_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
    1349             : {
    1350             :         /*
    1351             :          * We put the drop in progress roots at the front of the list, so if the
    1352             :          * first entry doesn't have UNFINISHED_DROP set we can wake everybody
    1353             :          * up.
    1354             :          */
    1355           0 :         spin_lock(&fs_info->trans_lock);
    1356           0 :         if (!list_empty(&fs_info->dead_roots)) {
    1357           0 :                 struct btrfs_root *root = list_first_entry(&fs_info->dead_roots,
    1358             :                                                            struct btrfs_root,
    1359             :                                                            root_list);
    1360           0 :                 if (test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state)) {
    1361           0 :                         spin_unlock(&fs_info->trans_lock);
    1362           0 :                         return;
    1363             :                 }
    1364             :         }
    1365           0 :         spin_unlock(&fs_info->trans_lock);
    1366             : 
    1367           0 :         btrfs_wake_unfinished_drop(fs_info);
    1368             : }
    1369             : 
    1370             : /*
    1371             :  * dead roots are old snapshots that need to be deleted.  This allocates
    1372             :  * a dirty root struct and adds it into the list of dead roots that need to
    1373             :  * be deleted
    1374             :  */
    1375         368 : void btrfs_add_dead_root(struct btrfs_root *root)
    1376             : {
    1377         368 :         struct btrfs_fs_info *fs_info = root->fs_info;
    1378             : 
    1379         368 :         spin_lock(&fs_info->trans_lock);
    1380         368 :         if (list_empty(&root->root_list)) {
    1381         368 :                 btrfs_grab_root(root);
    1382             : 
    1383             :                 /* We want to process the partially complete drops first. */
    1384         736 :                 if (test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state))
    1385           0 :                         list_add(&root->root_list, &fs_info->dead_roots);
    1386             :                 else
    1387         368 :                         list_add_tail(&root->root_list, &fs_info->dead_roots);
    1388             :         }
    1389         368 :         spin_unlock(&fs_info->trans_lock);
    1390         368 : }
    1391             : 
    1392             : /*
    1393             :  * Update each subvolume root and its relocation root, if it exists, in the tree
    1394             :  * of tree roots. Also free log roots if they exist.
    1395             :  */
    1396      206349 : static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
    1397             : {
    1398      206349 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1399      206349 :         struct btrfs_root *gang[8];
    1400      206349 :         int i;
    1401      206349 :         int ret;
    1402             : 
    1403             :         /*
    1404             :          * At this point no one can be using this transaction to modify any tree
    1405             :          * and no one can start another transaction to modify any tree either.
    1406             :          */
    1407      206349 :         ASSERT(trans->transaction->state == TRANS_STATE_COMMIT_DOING);
    1408             : 
    1409      206349 :         spin_lock(&fs_info->fs_roots_radix_lock);
    1410      355146 :         while (1) {
    1411      355146 :                 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
    1412             :                                                  (void **)gang, 0,
    1413             :                                                  ARRAY_SIZE(gang),
    1414             :                                                  BTRFS_ROOT_TRANS_TAG);
    1415      355146 :                 if (ret == 0)
    1416             :                         break;
    1417      303950 :                 for (i = 0; i < ret; i++) {
    1418      155153 :                         struct btrfs_root *root = gang[i];
    1419      155153 :                         int ret2;
    1420             : 
    1421             :                         /*
    1422             :                          * At this point we can neither have tasks logging inodes
    1423             :                          * from a root nor trying to commit a log tree.
    1424             :                          */
    1425      155153 :                         ASSERT(atomic_read(&root->log_writers) == 0);
    1426      155153 :                         ASSERT(atomic_read(&root->log_commit[0]) == 0);
    1427      155153 :                         ASSERT(atomic_read(&root->log_commit[1]) == 0);
    1428             : 
    1429      155153 :                         radix_tree_tag_clear(&fs_info->fs_roots_radix,
    1430      155153 :                                         (unsigned long)root->root_key.objectid,
    1431             :                                         BTRFS_ROOT_TRANS_TAG);
    1432      155153 :                         spin_unlock(&fs_info->fs_roots_radix_lock);
    1433             : 
    1434      155153 :                         btrfs_free_log(trans, root);
    1435      155153 :                         ret2 = btrfs_update_reloc_root(trans, root);
    1436      155153 :                         if (ret2)
    1437           0 :                                 return ret2;
    1438             : 
    1439             :                         /* see comments in should_cow_block() */
    1440      155153 :                         clear_bit(BTRFS_ROOT_FORCE_COW, &root->state);
    1441      155153 :                         smp_mb__after_atomic();
    1442             : 
    1443      155153 :                         if (root->commit_root != root->node) {
    1444      147817 :                                 list_add_tail(&root->dirty_list,
    1445      147817 :                                         &trans->transaction->switch_commits);
    1446      147817 :                                 btrfs_set_root_node(&root->root_item,
    1447             :                                                     root->node);
    1448             :                         }
    1449             : 
    1450      155153 :                         ret2 = btrfs_update_root(trans, fs_info->tree_root,
    1451             :                                                 &root->root_key,
    1452             :                                                 &root->root_item);
    1453      155153 :                         if (ret2)
    1454           0 :                                 return ret2;
    1455      155153 :                         spin_lock(&fs_info->fs_roots_radix_lock);
    1456      155153 :                         btrfs_qgroup_free_meta_all_pertrans(root);
    1457             :                 }
    1458             :         }
    1459      206349 :         spin_unlock(&fs_info->fs_roots_radix_lock);
    1460      206349 :         return 0;
    1461             : }
    1462             : 
    1463             : /*
    1464             :  * defrag a given btree.
    1465             :  * Every leaf in the btree is read and defragged.
    1466             :  */
    1467           2 : int btrfs_defrag_root(struct btrfs_root *root)
    1468             : {
    1469           2 :         struct btrfs_fs_info *info = root->fs_info;
    1470           2 :         struct btrfs_trans_handle *trans;
    1471           2 :         int ret;
    1472             : 
    1473           2 :         if (test_and_set_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state))
    1474             :                 return 0;
    1475             : 
    1476           2 :         while (1) {
    1477           2 :                 trans = btrfs_start_transaction(root, 0);
    1478           2 :                 if (IS_ERR(trans)) {
    1479           0 :                         ret = PTR_ERR(trans);
    1480           0 :                         break;
    1481             :                 }
    1482             : 
    1483           2 :                 ret = btrfs_defrag_leaves(trans, root);
    1484             : 
    1485           2 :                 btrfs_end_transaction(trans);
    1486           2 :                 btrfs_btree_balance_dirty(info);
    1487           2 :                 cond_resched();
    1488             : 
    1489           2 :                 if (btrfs_fs_closing(info) || ret != -EAGAIN)
    1490             :                         break;
    1491             : 
    1492           0 :                 if (btrfs_defrag_cancelled(info)) {
    1493             :                         btrfs_debug(info, "defrag_root cancelled");
    1494             :                         ret = -EAGAIN;
    1495             :                         break;
    1496             :                 }
    1497             :         }
    1498           2 :         clear_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state);
    1499           2 :         return ret;
    1500             : }
    1501             : 
    1502             : /*
    1503             :  * Do all special snapshot related qgroup dirty hack.
    1504             :  *
    1505             :  * Will do all needed qgroup inherit and dirty hack like switch commit
    1506             :  * roots inside one transaction and write all btree into disk, to make
    1507             :  * qgroup works.
    1508             :  */
    1509        1026 : static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
    1510             :                                    struct btrfs_root *src,
    1511             :                                    struct btrfs_root *parent,
    1512             :                                    struct btrfs_qgroup_inherit *inherit,
    1513             :                                    u64 dst_objectid)
    1514             : {
    1515        1026 :         struct btrfs_fs_info *fs_info = src->fs_info;
    1516        1026 :         int ret;
    1517             : 
    1518             :         /*
    1519             :          * Save some performance in the case that qgroups are not
    1520             :          * enabled. If this check races with the ioctl, rescan will
    1521             :          * kick in anyway.
    1522             :          */
    1523        1026 :         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
    1524             :                 return 0;
    1525             : 
    1526             :         /*
    1527             :          * Ensure dirty @src will be committed.  Or, after coming
    1528             :          * commit_fs_roots() and switch_commit_roots(), any dirty but not
    1529             :          * recorded root will never be updated again, causing an outdated root
    1530             :          * item.
    1531             :          */
    1532         174 :         ret = record_root_in_trans(trans, src, 1);
    1533         174 :         if (ret)
    1534             :                 return ret;
    1535             : 
    1536             :         /*
    1537             :          * btrfs_qgroup_inherit relies on a consistent view of the usage for the
    1538             :          * src root, so we must run the delayed refs here.
    1539             :          *
    1540             :          * However this isn't particularly fool proof, because there's no
    1541             :          * synchronization keeping us from changing the tree after this point
    1542             :          * before we do the qgroup_inherit, or even from making changes while
    1543             :          * we're doing the qgroup_inherit.  But that's a problem for the future,
    1544             :          * for now flush the delayed refs to narrow the race window where the
    1545             :          * qgroup counters could end up wrong.
    1546             :          */
    1547         174 :         ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
    1548         174 :         if (ret) {
    1549           0 :                 btrfs_abort_transaction(trans, ret);
    1550           0 :                 return ret;
    1551             :         }
    1552             : 
    1553         174 :         ret = commit_fs_roots(trans);
    1554         174 :         if (ret)
    1555           0 :                 goto out;
    1556         174 :         ret = btrfs_qgroup_account_extents(trans);
    1557         174 :         if (ret < 0)
    1558           0 :                 goto out;
    1559             : 
    1560             :         /* Now qgroup are all updated, we can inherit it to new qgroups */
    1561         174 :         ret = btrfs_qgroup_inherit(trans, src->root_key.objectid, dst_objectid,
    1562             :                                    inherit);
    1563         174 :         if (ret < 0)
    1564           0 :                 goto out;
    1565             : 
    1566             :         /*
    1567             :          * Now we do a simplified commit transaction, which will:
    1568             :          * 1) commit all subvolume and extent tree
    1569             :          *    To ensure all subvolume and extent tree have a valid
    1570             :          *    commit_root to accounting later insert_dir_item()
    1571             :          * 2) write all btree blocks onto disk
    1572             :          *    This is to make sure later btree modification will be cowed
    1573             :          *    Or commit_root can be populated and cause wrong qgroup numbers
    1574             :          * In this simplified commit, we don't really care about other trees
    1575             :          * like chunk and root tree, as they won't affect qgroup.
    1576             :          * And we don't write super to avoid half committed status.
    1577             :          */
    1578         174 :         ret = commit_cowonly_roots(trans);
    1579         174 :         if (ret)
    1580           0 :                 goto out;
    1581         174 :         switch_commit_roots(trans);
    1582         174 :         ret = btrfs_write_and_wait_transaction(trans);
    1583         174 :         if (ret)
    1584           0 :                 btrfs_handle_fs_error(fs_info, ret,
    1585             :                         "Error while writing out transaction for qgroup");
    1586             : 
    1587         174 : out:
    1588             :         /*
    1589             :          * Force parent root to be updated, as we recorded it before so its
    1590             :          * last_trans == cur_transid.
    1591             :          * Or it won't be committed again onto disk after later
    1592             :          * insert_dir_item()
    1593             :          */
    1594         174 :         if (!ret)
    1595         174 :                 ret = record_root_in_trans(trans, parent, 1);
    1596             :         return ret;
    1597             : }
    1598             : 
    1599             : /*
    1600             :  * new snapshots need to be created at a very specific time in the
    1601             :  * transaction commit.  This does the actual creation.
    1602             :  *
    1603             :  * Note:
    1604             :  * If the error which may affect the commitment of the current transaction
    1605             :  * happens, we should return the error number. If the error which just affect
    1606             :  * the creation of the pending snapshots, just return 0.
    1607             :  */
    1608        1026 : static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
    1609             :                                    struct btrfs_pending_snapshot *pending)
    1610             : {
    1611             : 
    1612        1026 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1613        1026 :         struct btrfs_key key;
    1614        1026 :         struct btrfs_root_item *new_root_item;
    1615        1026 :         struct btrfs_root *tree_root = fs_info->tree_root;
    1616        1026 :         struct btrfs_root *root = pending->root;
    1617        1026 :         struct btrfs_root *parent_root;
    1618        1026 :         struct btrfs_block_rsv *rsv;
    1619        1026 :         struct inode *parent_inode = pending->dir;
    1620        1026 :         struct btrfs_path *path;
    1621        1026 :         struct btrfs_dir_item *dir_item;
    1622        1026 :         struct extent_buffer *tmp;
    1623        1026 :         struct extent_buffer *old;
    1624        1026 :         struct timespec64 cur_time;
    1625        1026 :         int ret = 0;
    1626        1026 :         u64 to_reserve = 0;
    1627        1026 :         u64 index = 0;
    1628        1026 :         u64 objectid;
    1629        1026 :         u64 root_flags;
    1630        1026 :         unsigned int nofs_flags;
    1631        1026 :         struct fscrypt_name fname;
    1632             : 
    1633        1026 :         ASSERT(pending->path);
    1634        1026 :         path = pending->path;
    1635             : 
    1636        1026 :         ASSERT(pending->root_item);
    1637        1026 :         new_root_item = pending->root_item;
    1638             : 
    1639             :         /*
    1640             :          * We're inside a transaction and must make sure that any potential
    1641             :          * allocations with GFP_KERNEL in fscrypt won't recurse back to
    1642             :          * filesystem.
    1643             :          */
    1644        1026 :         nofs_flags = memalloc_nofs_save();
    1645        2052 :         pending->error = fscrypt_setup_filename(parent_inode,
    1646        1026 :                                                 &pending->dentry->d_name, 0,
    1647             :                                                 &fname);
    1648        1026 :         memalloc_nofs_restore(nofs_flags);
    1649        1026 :         if (pending->error)
    1650           0 :                 goto free_pending;
    1651             : 
    1652        1026 :         pending->error = btrfs_get_free_objectid(tree_root, &objectid);
    1653        1026 :         if (pending->error)
    1654           0 :                 goto free_fname;
    1655             : 
    1656             :         /*
    1657             :          * Make qgroup to skip current new snapshot's qgroupid, as it is
    1658             :          * accounted by later btrfs_qgroup_inherit().
    1659             :          */
    1660        1026 :         btrfs_set_skip_qgroup(trans, objectid);
    1661             : 
    1662        1026 :         btrfs_reloc_pre_snapshot(pending, &to_reserve);
    1663             : 
    1664        1026 :         if (to_reserve > 0) {
    1665           0 :                 pending->error = btrfs_block_rsv_add(fs_info,
    1666             :                                                      &pending->block_rsv,
    1667             :                                                      to_reserve,
    1668             :                                                      BTRFS_RESERVE_NO_FLUSH);
    1669           0 :                 if (pending->error)
    1670           0 :                         goto clear_skip_qgroup;
    1671             :         }
    1672             : 
    1673        1026 :         key.objectid = objectid;
    1674        1026 :         key.offset = (u64)-1;
    1675        1026 :         key.type = BTRFS_ROOT_ITEM_KEY;
    1676             : 
    1677        1026 :         rsv = trans->block_rsv;
    1678        1026 :         trans->block_rsv = &pending->block_rsv;
    1679        1026 :         trans->bytes_reserved = trans->block_rsv->reserved;
    1680        1026 :         trace_btrfs_space_reservation(fs_info, "transaction",
    1681             :                                       trans->transid,
    1682             :                                       trans->bytes_reserved, 1);
    1683        1026 :         parent_root = BTRFS_I(parent_inode)->root;
    1684        1026 :         ret = record_root_in_trans(trans, parent_root, 0);
    1685        1026 :         if (ret)
    1686           0 :                 goto fail;
    1687        1026 :         cur_time = current_time(parent_inode);
    1688             : 
    1689             :         /*
    1690             :          * insert the directory item
    1691             :          */
    1692        1026 :         ret = btrfs_set_inode_index(BTRFS_I(parent_inode), &index);
    1693        1026 :         if (ret) {
    1694           0 :                 btrfs_abort_transaction(trans, ret);
    1695           0 :                 goto fail;
    1696             :         }
    1697             : 
    1698             :         /* check if there is a file/dir which has the same name. */
    1699        1026 :         dir_item = btrfs_lookup_dir_item(NULL, parent_root, path,
    1700             :                                          btrfs_ino(BTRFS_I(parent_inode)),
    1701             :                                          &fname.disk_name, 0);
    1702        1026 :         if (dir_item != NULL && !IS_ERR(dir_item)) {
    1703           0 :                 pending->error = -EEXIST;
    1704           0 :                 goto dir_item_existed;
    1705        1026 :         } else if (IS_ERR(dir_item)) {
    1706           0 :                 ret = PTR_ERR(dir_item);
    1707           0 :                 btrfs_abort_transaction(trans, ret);
    1708           0 :                 goto fail;
    1709             :         }
    1710        1026 :         btrfs_release_path(path);
    1711             : 
    1712             :         /*
    1713             :          * pull in the delayed directory update
    1714             :          * and the delayed inode item
    1715             :          * otherwise we corrupt the FS during
    1716             :          * snapshot
    1717             :          */
    1718        1026 :         ret = btrfs_run_delayed_items(trans);
    1719        1026 :         if (ret) {      /* Transaction aborted */
    1720           0 :                 btrfs_abort_transaction(trans, ret);
    1721           0 :                 goto fail;
    1722             :         }
    1723             : 
    1724        1026 :         ret = record_root_in_trans(trans, root, 0);
    1725        1026 :         if (ret) {
    1726           0 :                 btrfs_abort_transaction(trans, ret);
    1727           0 :                 goto fail;
    1728             :         }
    1729        1026 :         btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
    1730        2052 :         memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
    1731        1026 :         btrfs_check_and_init_root_item(new_root_item);
    1732             : 
    1733        1026 :         root_flags = btrfs_root_flags(new_root_item);
    1734        1026 :         if (pending->readonly)
    1735         738 :                 root_flags |= BTRFS_ROOT_SUBVOL_RDONLY;
    1736             :         else
    1737         288 :                 root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY;
    1738        1026 :         btrfs_set_root_flags(new_root_item, root_flags);
    1739             : 
    1740        1026 :         btrfs_set_root_generation_v2(new_root_item,
    1741             :                         trans->transid);
    1742        1026 :         generate_random_guid(new_root_item->uuid);
    1743        2052 :         memcpy(new_root_item->parent_uuid, root->root_item.uuid,
    1744             :                         BTRFS_UUID_SIZE);
    1745        1026 :         if (!(root_flags & BTRFS_ROOT_SUBVOL_RDONLY)) {
    1746         288 :                 memset(new_root_item->received_uuid, 0,
    1747             :                        sizeof(new_root_item->received_uuid));
    1748         288 :                 memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
    1749         288 :                 memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
    1750         288 :                 btrfs_set_root_stransid(new_root_item, 0);
    1751         288 :                 btrfs_set_root_rtransid(new_root_item, 0);
    1752             :         }
    1753        1026 :         btrfs_set_stack_timespec_sec(&new_root_item->otime, cur_time.tv_sec);
    1754        1026 :         btrfs_set_stack_timespec_nsec(&new_root_item->otime, cur_time.tv_nsec);
    1755        1026 :         btrfs_set_root_otransid(new_root_item, trans->transid);
    1756             : 
    1757        1026 :         old = btrfs_lock_root_node(root);
    1758        1026 :         ret = btrfs_cow_block(trans, root, old, NULL, 0, &old,
    1759             :                               BTRFS_NESTING_COW);
    1760        1026 :         if (ret) {
    1761           0 :                 btrfs_tree_unlock(old);
    1762           0 :                 free_extent_buffer(old);
    1763           0 :                 btrfs_abort_transaction(trans, ret);
    1764           0 :                 goto fail;
    1765             :         }
    1766             : 
    1767        1026 :         ret = btrfs_copy_root(trans, root, old, &tmp, objectid);
    1768             :         /* clean up in any case */
    1769        1026 :         btrfs_tree_unlock(old);
    1770        1026 :         free_extent_buffer(old);
    1771        1026 :         if (ret) {
    1772           0 :                 btrfs_abort_transaction(trans, ret);
    1773           0 :                 goto fail;
    1774             :         }
    1775             :         /* see comments in should_cow_block() */
    1776        1026 :         set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
    1777        1026 :         smp_wmb();
    1778             : 
    1779        1026 :         btrfs_set_root_node(new_root_item, tmp);
    1780             :         /* record when the snapshot was created in key.offset */
    1781        1026 :         key.offset = trans->transid;
    1782        1026 :         ret = btrfs_insert_root(trans, tree_root, &key, new_root_item);
    1783        1026 :         btrfs_tree_unlock(tmp);
    1784        1026 :         free_extent_buffer(tmp);
    1785        1026 :         if (ret) {
    1786           0 :                 btrfs_abort_transaction(trans, ret);
    1787           0 :                 goto fail;
    1788             :         }
    1789             : 
    1790             :         /*
    1791             :          * insert root back/forward references
    1792             :          */
    1793        1026 :         ret = btrfs_add_root_ref(trans, objectid,
    1794             :                                  parent_root->root_key.objectid,
    1795             :                                  btrfs_ino(BTRFS_I(parent_inode)), index,
    1796             :                                  &fname.disk_name);
    1797        1026 :         if (ret) {
    1798           0 :                 btrfs_abort_transaction(trans, ret);
    1799           0 :                 goto fail;
    1800             :         }
    1801             : 
    1802        1026 :         key.offset = (u64)-1;
    1803        1026 :         pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev);
    1804        1026 :         if (IS_ERR(pending->snap)) {
    1805           0 :                 ret = PTR_ERR(pending->snap);
    1806           0 :                 pending->snap = NULL;
    1807           0 :                 btrfs_abort_transaction(trans, ret);
    1808           0 :                 goto fail;
    1809             :         }
    1810             : 
    1811        1026 :         ret = btrfs_reloc_post_snapshot(trans, pending);
    1812        1026 :         if (ret) {
    1813           0 :                 btrfs_abort_transaction(trans, ret);
    1814           0 :                 goto fail;
    1815             :         }
    1816             : 
    1817             :         /*
    1818             :          * Do special qgroup accounting for snapshot, as we do some qgroup
    1819             :          * snapshot hack to do fast snapshot.
    1820             :          * To co-operate with that hack, we do hack again.
    1821             :          * Or snapshot will be greatly slowed down by a subtree qgroup rescan
    1822             :          */
    1823        1026 :         ret = qgroup_account_snapshot(trans, root, parent_root,
    1824             :                                       pending->inherit, objectid);
    1825        1026 :         if (ret < 0)
    1826           0 :                 goto fail;
    1827             : 
    1828        1026 :         ret = btrfs_insert_dir_item(trans, &fname.disk_name,
    1829             :                                     BTRFS_I(parent_inode), &key, BTRFS_FT_DIR,
    1830             :                                     index);
    1831             :         /* We have check then name at the beginning, so it is impossible. */
    1832        1026 :         BUG_ON(ret == -EEXIST || ret == -EOVERFLOW);
    1833        1026 :         if (ret) {
    1834           0 :                 btrfs_abort_transaction(trans, ret);
    1835           0 :                 goto fail;
    1836             :         }
    1837             : 
    1838        1026 :         btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size +
    1839        1026 :                                                   fname.disk_name.len * 2);
    1840        1026 :         parent_inode->i_mtime = current_time(parent_inode);
    1841        1026 :         parent_inode->i_ctime = parent_inode->i_mtime;
    1842        1026 :         ret = btrfs_update_inode_fallback(trans, parent_root, BTRFS_I(parent_inode));
    1843        1026 :         if (ret) {
    1844           0 :                 btrfs_abort_transaction(trans, ret);
    1845           0 :                 goto fail;
    1846             :         }
    1847        1026 :         ret = btrfs_uuid_tree_add(trans, new_root_item->uuid,
    1848             :                                   BTRFS_UUID_KEY_SUBVOL,
    1849             :                                   objectid);
    1850        1026 :         if (ret) {
    1851           0 :                 btrfs_abort_transaction(trans, ret);
    1852           0 :                 goto fail;
    1853             :         }
    1854        1026 :         if (!btrfs_is_empty_uuid(new_root_item->received_uuid)) {
    1855           0 :                 ret = btrfs_uuid_tree_add(trans, new_root_item->received_uuid,
    1856             :                                           BTRFS_UUID_KEY_RECEIVED_SUBVOL,
    1857             :                                           objectid);
    1858           0 :                 if (ret && ret != -EEXIST) {
    1859           0 :                         btrfs_abort_transaction(trans, ret);
    1860           0 :                         goto fail;
    1861             :                 }
    1862             :         }
    1863             : 
    1864        1026 : fail:
    1865        1026 :         pending->error = ret;
    1866        1026 : dir_item_existed:
    1867        1026 :         trans->block_rsv = rsv;
    1868        1026 :         trans->bytes_reserved = 0;
    1869        1026 : clear_skip_qgroup:
    1870        1026 :         btrfs_clear_skip_qgroup(trans);
    1871             : free_fname:
    1872             :         fscrypt_free_filename(&fname);
    1873        1026 : free_pending:
    1874        1026 :         kfree(new_root_item);
    1875        1026 :         pending->root_item = NULL;
    1876        1026 :         btrfs_free_path(path);
    1877        1026 :         pending->path = NULL;
    1878             : 
    1879        1026 :         return ret;
    1880             : }
    1881             : 
    1882             : /*
    1883             :  * create all the snapshots we've scheduled for creation
    1884             :  */
    1885      206175 : static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans)
    1886             : {
    1887      206175 :         struct btrfs_pending_snapshot *pending, *next;
    1888      206175 :         struct list_head *head = &trans->transaction->pending_snapshots;
    1889      206175 :         int ret = 0;
    1890             : 
    1891      207201 :         list_for_each_entry_safe(pending, next, head, list) {
    1892        1026 :                 list_del(&pending->list);
    1893        1026 :                 ret = create_pending_snapshot(trans, pending);
    1894        1026 :                 if (ret)
    1895             :                         break;
    1896             :         }
    1897      206175 :         return ret;
    1898             : }
    1899             : 
    1900      206174 : static void update_super_roots(struct btrfs_fs_info *fs_info)
    1901             : {
    1902      206174 :         struct btrfs_root_item *root_item;
    1903      206174 :         struct btrfs_super_block *super;
    1904             : 
    1905      206174 :         super = fs_info->super_copy;
    1906             : 
    1907      206174 :         root_item = &fs_info->chunk_root->root_item;
    1908      206174 :         super->chunk_root = root_item->bytenr;
    1909      206174 :         super->chunk_root_generation = root_item->generation;
    1910      206174 :         super->chunk_root_level = root_item->level;
    1911             : 
    1912      206174 :         root_item = &fs_info->tree_root->root_item;
    1913      206174 :         super->root = root_item->bytenr;
    1914      206174 :         super->generation = root_item->generation;
    1915      206174 :         super->root_level = root_item->level;
    1916      206174 :         if (btrfs_test_opt(fs_info, SPACE_CACHE))
    1917          24 :                 super->cache_generation = root_item->generation;
    1918      412300 :         else if (test_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags))
    1919           2 :                 super->cache_generation = 0;
    1920      412348 :         if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags))
    1921      206137 :                 super->uuid_tree_generation = root_item->generation;
    1922      206174 : }
    1923             : 
    1924    41095696 : int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
    1925             : {
    1926    41095696 :         struct btrfs_transaction *trans;
    1927    41095696 :         int ret = 0;
    1928             : 
    1929    41095696 :         spin_lock(&info->trans_lock);
    1930    41117951 :         trans = info->running_transaction;
    1931    41117951 :         if (trans)
    1932    40787435 :                 ret = (trans->state >= TRANS_STATE_COMMIT_START);
    1933    41117951 :         spin_unlock(&info->trans_lock);
    1934    41109028 :         return ret;
    1935             : }
    1936             : 
    1937        4176 : int btrfs_transaction_blocked(struct btrfs_fs_info *info)
    1938             : {
    1939        4176 :         struct btrfs_transaction *trans;
    1940        4176 :         int ret = 0;
    1941             : 
    1942        4176 :         spin_lock(&info->trans_lock);
    1943        4176 :         trans = info->running_transaction;
    1944        4176 :         if (trans)
    1945         208 :                 ret = is_transaction_blocked(trans);
    1946        4176 :         spin_unlock(&info->trans_lock);
    1947        4176 :         return ret;
    1948             : }
    1949             : 
    1950           4 : void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans)
    1951             : {
    1952           4 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1953           4 :         struct btrfs_transaction *cur_trans;
    1954             : 
    1955             :         /* Kick the transaction kthread. */
    1956           4 :         set_bit(BTRFS_FS_COMMIT_TRANS, &fs_info->flags);
    1957           4 :         wake_up_process(fs_info->transaction_kthread);
    1958             : 
    1959             :         /* take transaction reference */
    1960           4 :         cur_trans = trans->transaction;
    1961           4 :         refcount_inc(&cur_trans->use_count);
    1962             : 
    1963           4 :         btrfs_end_transaction(trans);
    1964             : 
    1965             :         /*
    1966             :          * Wait for the current transaction commit to start and block
    1967             :          * subsequent transaction joins
    1968             :          */
    1969           4 :         btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
    1970           5 :         wait_event(fs_info->transaction_blocked_wait,
    1971             :                    cur_trans->state >= TRANS_STATE_COMMIT_START ||
    1972             :                    TRANS_ABORTED(cur_trans));
    1973           4 :         btrfs_put_transaction(cur_trans);
    1974           4 : }
    1975             : 
    1976          16 : static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
    1977             : {
    1978          16 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1979          16 :         struct btrfs_transaction *cur_trans = trans->transaction;
    1980             : 
    1981          16 :         WARN_ON(refcount_read(&trans->use_count) > 1);
    1982             : 
    1983          16 :         btrfs_abort_transaction(trans, err);
    1984             : 
    1985          16 :         spin_lock(&fs_info->trans_lock);
    1986             : 
    1987             :         /*
    1988             :          * If the transaction is removed from the list, it means this
    1989             :          * transaction has been committed successfully, so it is impossible
    1990             :          * to call the cleanup function.
    1991             :          */
    1992          16 :         BUG_ON(list_empty(&cur_trans->list));
    1993             : 
    1994          16 :         if (cur_trans == fs_info->running_transaction) {
    1995          12 :                 cur_trans->state = TRANS_STATE_COMMIT_DOING;
    1996          12 :                 spin_unlock(&fs_info->trans_lock);
    1997             : 
    1998             :                 /*
    1999             :                  * The thread has already released the lockdep map as reader
    2000             :                  * already in btrfs_commit_transaction().
    2001             :                  */
    2002          12 :                 btrfs_might_wait_for_event(fs_info, btrfs_trans_num_writers);
    2003          12 :                 wait_event(cur_trans->writer_wait,
    2004             :                            atomic_read(&cur_trans->num_writers) == 1);
    2005             : 
    2006          12 :                 spin_lock(&fs_info->trans_lock);
    2007             :         }
    2008             : 
    2009             :         /*
    2010             :          * Now that we know no one else is still using the transaction we can
    2011             :          * remove the transaction from the list of transactions. This avoids
    2012             :          * the transaction kthread from cleaning up the transaction while some
    2013             :          * other task is still using it, which could result in a use-after-free
    2014             :          * on things like log trees, as it forces the transaction kthread to
    2015             :          * wait for this transaction to be cleaned up by us.
    2016             :          */
    2017          16 :         list_del_init(&cur_trans->list);
    2018             : 
    2019          16 :         spin_unlock(&fs_info->trans_lock);
    2020             : 
    2021          16 :         btrfs_cleanup_one_transaction(trans->transaction, fs_info);
    2022             : 
    2023          16 :         spin_lock(&fs_info->trans_lock);
    2024          16 :         if (cur_trans == fs_info->running_transaction)
    2025          12 :                 fs_info->running_transaction = NULL;
    2026          16 :         spin_unlock(&fs_info->trans_lock);
    2027             : 
    2028          16 :         if (trans->type & __TRANS_FREEZABLE)
    2029           2 :                 sb_end_intwrite(fs_info->sb);
    2030          16 :         btrfs_put_transaction(cur_trans);
    2031          16 :         btrfs_put_transaction(cur_trans);
    2032             : 
    2033          16 :         trace_btrfs_transaction_commit(fs_info);
    2034             : 
    2035          16 :         if (current->journal_info == trans)
    2036           0 :                 current->journal_info = NULL;
    2037             : 
    2038             :         /*
    2039             :          * If relocation is running, we can't cancel scrub because that will
    2040             :          * result in a deadlock. Before relocating a block group, relocation
    2041             :          * pauses scrub, then starts and commits a transaction before unpausing
    2042             :          * scrub. If the transaction commit is being done by the relocation
    2043             :          * task or triggered by another task and the relocation task is waiting
    2044             :          * for the commit, and we end up here due to an error in the commit
    2045             :          * path, then calling btrfs_scrub_cancel() will deadlock, as we are
    2046             :          * asking for scrub to stop while having it asked to be paused higher
    2047             :          * above in relocation code.
    2048             :          */
    2049          16 :         if (!test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags))
    2050          16 :                 btrfs_scrub_cancel(fs_info);
    2051             : 
    2052          16 :         kmem_cache_free(btrfs_trans_handle_cachep, trans);
    2053          16 : }
    2054             : 
    2055             : /*
    2056             :  * Release reserved delayed ref space of all pending block groups of the
    2057             :  * transaction and remove them from the list
    2058             :  */
    2059          16 : static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
    2060             : {
    2061          16 :        struct btrfs_fs_info *fs_info = trans->fs_info;
    2062          16 :        struct btrfs_block_group *block_group, *tmp;
    2063             : 
    2064          16 :        list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
    2065           0 :                btrfs_delayed_refs_rsv_release(fs_info, 1);
    2066           0 :                list_del_init(&block_group->bg_list);
    2067             :        }
    2068          16 : }
    2069             : 
    2070      206186 : static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
    2071             : {
    2072             :         /*
    2073             :          * We use try_to_writeback_inodes_sb() here because if we used
    2074             :          * btrfs_start_delalloc_roots we would deadlock with fs freeze.
    2075             :          * Currently are holding the fs freeze lock, if we do an async flush
    2076             :          * we'll do btrfs_join_transaction() and deadlock because we need to
    2077             :          * wait for the fs freeze lock.  Using the direct flushing we benefit
    2078             :          * from already being in a transaction and our join_transaction doesn't
    2079             :          * have to re-take the fs freeze lock.
    2080             :          *
    2081             :          * Note that try_to_writeback_inodes_sb() will only trigger writeback
    2082             :          * if it can read lock sb->s_umount. It will always be able to lock it,
    2083             :          * except when the filesystem is being unmounted or being frozen, but in
    2084             :          * those cases sync_filesystem() is called, which results in calling
    2085             :          * writeback_inodes_sb() while holding a write lock on sb->s_umount.
    2086             :          * Note that we don't call writeback_inodes_sb() directly, because it
    2087             :          * will emit a warning if sb->s_umount is not locked.
    2088             :          */
    2089      206186 :         if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
    2090           3 :                 try_to_writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
    2091      206186 :         return 0;
    2092             : }
    2093             : 
    2094      206175 : static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
    2095             : {
    2096      206175 :         if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
    2097           3 :                 btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
    2098      206175 : }
    2099             : 
    2100             : /*
    2101             :  * Add a pending snapshot associated with the given transaction handle to the
    2102             :  * respective handle. This must be called after the transaction commit started
    2103             :  * and while holding fs_info->trans_lock.
    2104             :  * This serves to guarantee a caller of btrfs_commit_transaction() that it can
    2105             :  * safely free the pending snapshot pointer in case btrfs_commit_transaction()
    2106             :  * returns an error.
    2107             :  */
    2108      208990 : static void add_pending_snapshot(struct btrfs_trans_handle *trans)
    2109             : {
    2110      208990 :         struct btrfs_transaction *cur_trans = trans->transaction;
    2111             : 
    2112      208990 :         if (!trans->pending_snapshot)
    2113             :                 return;
    2114             : 
    2115        1026 :         lockdep_assert_held(&trans->fs_info->trans_lock);
    2116        1026 :         ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_START);
    2117             : 
    2118        1026 :         list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots);
    2119             : }
    2120             : 
    2121             : static void update_commit_stats(struct btrfs_fs_info *fs_info, ktime_t interval)
    2122             : {
    2123      206170 :         fs_info->commit_stats.commit_count++;
    2124      206170 :         fs_info->commit_stats.last_commit_dur = interval;
    2125      206170 :         fs_info->commit_stats.max_commit_dur =
    2126      206170 :                         max_t(u64, fs_info->commit_stats.max_commit_dur, interval);
    2127      206170 :         fs_info->commit_stats.total_commit_dur += interval;
    2128             : }
    2129             : 
    2130      208990 : int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
    2131             : {
    2132      208990 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    2133      208990 :         struct btrfs_transaction *cur_trans = trans->transaction;
    2134      208990 :         struct btrfs_transaction *prev_trans = NULL;
    2135      208990 :         int ret;
    2136      208990 :         ktime_t start_time;
    2137      208990 :         ktime_t interval;
    2138             : 
    2139      208990 :         ASSERT(refcount_read(&trans->use_count) == 1);
    2140      208990 :         btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
    2141             : 
    2142      208990 :         clear_bit(BTRFS_FS_NEED_TRANS_COMMIT, &fs_info->flags);
    2143             : 
    2144             :         /* Stop the commit early if ->aborted is set */
    2145      208988 :         if (TRANS_ABORTED(cur_trans)) {
    2146           0 :                 ret = cur_trans->aborted;
    2147           0 :                 goto lockdep_trans_commit_start_release;
    2148             :         }
    2149             : 
    2150      208988 :         btrfs_trans_release_metadata(trans);
    2151      208993 :         trans->block_rsv = NULL;
    2152             : 
    2153             :         /*
    2154             :          * We only want one transaction commit doing the flushing so we do not
    2155             :          * waste a bunch of time on lock contention on the extent root node.
    2156             :          */
    2157      208993 :         if (!test_and_set_bit(BTRFS_DELAYED_REFS_FLUSHING,
    2158      208993 :                               &cur_trans->delayed_refs.flags)) {
    2159             :                 /*
    2160             :                  * Make a pass through all the delayed refs we have so far.
    2161             :                  * Any running threads may add more while we are here.
    2162             :                  */
    2163      206187 :                 ret = btrfs_run_delayed_refs(trans, 0);
    2164      206187 :                 if (ret)
    2165           1 :                         goto lockdep_trans_commit_start_release;
    2166             :         }
    2167             : 
    2168      208992 :         btrfs_create_pending_block_groups(trans);
    2169             : 
    2170      209000 :         if (!test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &cur_trans->flags)) {
    2171      206188 :                 int run_it = 0;
    2172             : 
    2173             :                 /* this mutex is also taken before trying to set
    2174             :                  * block groups readonly.  We need to make sure
    2175             :                  * that nobody has set a block group readonly
    2176             :                  * after a extents from that block group have been
    2177             :                  * allocated for cache files.  btrfs_set_block_group_ro
    2178             :                  * will wait for the transaction to commit if it
    2179             :                  * finds BTRFS_TRANS_DIRTY_BG_RUN set.
    2180             :                  *
    2181             :                  * The BTRFS_TRANS_DIRTY_BG_RUN flag is also used to make sure
    2182             :                  * only one process starts all the block group IO.  It wouldn't
    2183             :                  * hurt to have more than one go through, but there's no
    2184             :                  * real advantage to it either.
    2185             :                  */
    2186      206188 :                 mutex_lock(&fs_info->ro_block_group_mutex);
    2187      206188 :                 if (!test_and_set_bit(BTRFS_TRANS_DIRTY_BG_RUN,
    2188             :                                       &cur_trans->flags))
    2189      206186 :                         run_it = 1;
    2190      206188 :                 mutex_unlock(&fs_info->ro_block_group_mutex);
    2191             : 
    2192      206188 :                 if (run_it) {
    2193      206186 :                         ret = btrfs_start_dirty_block_groups(trans);
    2194      206186 :                         if (ret)
    2195           0 :                                 goto lockdep_trans_commit_start_release;
    2196             :                 }
    2197             :         }
    2198             : 
    2199      209000 :         spin_lock(&fs_info->trans_lock);
    2200      209001 :         if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
    2201        2815 :                 enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
    2202             : 
    2203        2815 :                 add_pending_snapshot(trans);
    2204             : 
    2205        2815 :                 spin_unlock(&fs_info->trans_lock);
    2206        2815 :                 refcount_inc(&cur_trans->use_count);
    2207             : 
    2208        2815 :                 if (trans->in_fsync)
    2209         123 :                         want_state = TRANS_STATE_SUPER_COMMITTED;
    2210             : 
    2211        2815 :                 btrfs_trans_state_lockdep_release(fs_info,
    2212             :                                                   BTRFS_LOCKDEP_TRANS_COMMIT_START);
    2213        2815 :                 ret = btrfs_end_transaction(trans);
    2214        2812 :                 wait_for_commit(cur_trans, want_state);
    2215             : 
    2216        2812 :                 if (TRANS_ABORTED(cur_trans))
    2217           1 :                         ret = cur_trans->aborted;
    2218             : 
    2219        2812 :                 btrfs_put_transaction(cur_trans);
    2220             : 
    2221        2812 :                 return ret;
    2222             :         }
    2223             : 
    2224      206186 :         cur_trans->state = TRANS_STATE_COMMIT_START;
    2225      206186 :         wake_up(&fs_info->transaction_blocked_wait);
    2226      206186 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
    2227             : 
    2228      206186 :         if (cur_trans->list.prev != &fs_info->trans_list) {
    2229        3545 :                 enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
    2230             : 
    2231        3545 :                 if (trans->in_fsync)
    2232          43 :                         want_state = TRANS_STATE_SUPER_COMMITTED;
    2233             : 
    2234        3545 :                 prev_trans = list_entry(cur_trans->list.prev,
    2235             :                                         struct btrfs_transaction, list);
    2236        3545 :                 if (prev_trans->state < want_state) {
    2237        3516 :                         refcount_inc(&prev_trans->use_count);
    2238        3516 :                         spin_unlock(&fs_info->trans_lock);
    2239             : 
    2240        3516 :                         wait_for_commit(prev_trans, want_state);
    2241             : 
    2242        3516 :                         ret = READ_ONCE(prev_trans->aborted);
    2243             : 
    2244        3516 :                         btrfs_put_transaction(prev_trans);
    2245        3516 :                         if (ret)
    2246           0 :                                 goto lockdep_release;
    2247             :                 } else {
    2248          29 :                         spin_unlock(&fs_info->trans_lock);
    2249             :                 }
    2250             :         } else {
    2251      202641 :                 spin_unlock(&fs_info->trans_lock);
    2252             :                 /*
    2253             :                  * The previous transaction was aborted and was already removed
    2254             :                  * from the list of transactions at fs_info->trans_list. So we
    2255             :                  * abort to prevent writing a new superblock that reflects a
    2256             :                  * corrupt state (pointing to trees with unwritten nodes/leafs).
    2257             :                  */
    2258      202641 :                 if (BTRFS_FS_ERROR(fs_info)) {
    2259           0 :                         ret = -EROFS;
    2260           0 :                         goto lockdep_release;
    2261             :                 }
    2262             :         }
    2263             : 
    2264             :         /*
    2265             :          * Get the time spent on the work done by the commit thread and not
    2266             :          * the time spent waiting on a previous commit
    2267             :          */
    2268      206186 :         start_time = ktime_get_ns();
    2269             : 
    2270      206186 :         extwriter_counter_dec(cur_trans, trans->type);
    2271             : 
    2272      206186 :         ret = btrfs_start_delalloc_flush(fs_info);
    2273      206186 :         if (ret)
    2274           0 :                 goto lockdep_release;
    2275             : 
    2276      206186 :         ret = btrfs_run_delayed_items(trans);
    2277      206186 :         if (ret)
    2278          11 :                 goto lockdep_release;
    2279             : 
    2280             :         /*
    2281             :          * The thread has started/joined the transaction thus it holds the
    2282             :          * lockdep map as a reader. It has to release it before acquiring the
    2283             :          * lockdep map as a writer.
    2284             :          */
    2285      206175 :         btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters);
    2286      206175 :         btrfs_might_wait_for_event(fs_info, btrfs_trans_num_extwriters);
    2287      216156 :         wait_event(cur_trans->writer_wait,
    2288             :                    extwriter_counter_read(cur_trans) == 0);
    2289             : 
    2290             :         /* some pending stuffs might be added after the previous flush. */
    2291      206175 :         ret = btrfs_run_delayed_items(trans);
    2292      206175 :         if (ret) {
    2293           0 :                 btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
    2294           0 :                 goto cleanup_transaction;
    2295             :         }
    2296             : 
    2297      206175 :         btrfs_wait_delalloc_flush(fs_info);
    2298             : 
    2299             :         /*
    2300             :          * Wait for all ordered extents started by a fast fsync that joined this
    2301             :          * transaction. Otherwise if this transaction commits before the ordered
    2302             :          * extents complete we lose logged data after a power failure.
    2303             :          */
    2304      206175 :         btrfs_might_wait_for_event(fs_info, btrfs_trans_pending_ordered);
    2305      206175 :         wait_event(cur_trans->pending_wait,
    2306             :                    atomic_read(&cur_trans->pending_ordered) == 0);
    2307             : 
    2308      206175 :         btrfs_scrub_pause(fs_info);
    2309             :         /*
    2310             :          * Ok now we need to make sure to block out any other joins while we
    2311             :          * commit the transaction.  We could have started a join before setting
    2312             :          * COMMIT_DOING so make sure to wait for num_writers to == 1 again.
    2313             :          */
    2314      206175 :         spin_lock(&fs_info->trans_lock);
    2315      206175 :         add_pending_snapshot(trans);
    2316      206175 :         cur_trans->state = TRANS_STATE_COMMIT_DOING;
    2317      206175 :         spin_unlock(&fs_info->trans_lock);
    2318             : 
    2319             :         /*
    2320             :          * The thread has started/joined the transaction thus it holds the
    2321             :          * lockdep map as a reader. It has to release it before acquiring the
    2322             :          * lockdep map as a writer.
    2323             :          */
    2324      206175 :         btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
    2325      206175 :         btrfs_might_wait_for_event(fs_info, btrfs_trans_num_writers);
    2326      207175 :         wait_event(cur_trans->writer_wait,
    2327             :                    atomic_read(&cur_trans->num_writers) == 1);
    2328             : 
    2329             :         /*
    2330             :          * Make lockdep happy by acquiring the state locks after
    2331             :          * btrfs_trans_num_writers is released. If we acquired the state locks
    2332             :          * before releasing the btrfs_trans_num_writers lock then lockdep would
    2333             :          * complain because we did not follow the reverse order unlocking rule.
    2334             :          */
    2335      206175 :         btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED);
    2336      206175 :         btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
    2337      206175 :         btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
    2338             : 
    2339             :         /*
    2340             :          * We've started the commit, clear the flag in case we were triggered to
    2341             :          * do an async commit but somebody else started before the transaction
    2342             :          * kthread could do the work.
    2343             :          */
    2344      206175 :         clear_bit(BTRFS_FS_COMMIT_TRANS, &fs_info->flags);
    2345             : 
    2346      206175 :         if (TRANS_ABORTED(cur_trans)) {
    2347           0 :                 ret = cur_trans->aborted;
    2348           0 :                 btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
    2349           0 :                 goto scrub_continue;
    2350             :         }
    2351             :         /*
    2352             :          * the reloc mutex makes sure that we stop
    2353             :          * the balancing code from coming in and moving
    2354             :          * extents around in the middle of the commit
    2355             :          */
    2356      206175 :         mutex_lock(&fs_info->reloc_mutex);
    2357             : 
    2358             :         /*
    2359             :          * We needn't worry about the delayed items because we will
    2360             :          * deal with them in create_pending_snapshot(), which is the
    2361             :          * core function of the snapshot creation.
    2362             :          */
    2363      206175 :         ret = create_pending_snapshots(trans);
    2364      206175 :         if (ret)
    2365           0 :                 goto unlock_reloc;
    2366             : 
    2367             :         /*
    2368             :          * We insert the dir indexes of the snapshots and update the inode
    2369             :          * of the snapshots' parents after the snapshot creation, so there
    2370             :          * are some delayed items which are not dealt with. Now deal with
    2371             :          * them.
    2372             :          *
    2373             :          * We needn't worry that this operation will corrupt the snapshots,
    2374             :          * because all the tree which are snapshoted will be forced to COW
    2375             :          * the nodes and leaves.
    2376             :          */
    2377      206175 :         ret = btrfs_run_delayed_items(trans);
    2378      206175 :         if (ret)
    2379           0 :                 goto unlock_reloc;
    2380             : 
    2381      206175 :         ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
    2382      206175 :         if (ret)
    2383           0 :                 goto unlock_reloc;
    2384             : 
    2385             :         /*
    2386             :          * make sure none of the code above managed to slip in a
    2387             :          * delayed item
    2388             :          */
    2389      206175 :         btrfs_assert_delayed_root_empty(fs_info);
    2390             : 
    2391      206175 :         WARN_ON(cur_trans != trans->transaction);
    2392             : 
    2393      206175 :         ret = commit_fs_roots(trans);
    2394      206175 :         if (ret)
    2395           0 :                 goto unlock_reloc;
    2396             : 
    2397             :         /* commit_fs_roots gets rid of all the tree log roots, it is now
    2398             :          * safe to free the root of tree log roots
    2399             :          */
    2400      206175 :         btrfs_free_log_root_tree(trans, fs_info);
    2401             : 
    2402             :         /*
    2403             :          * Since fs roots are all committed, we can get a quite accurate
    2404             :          * new_roots. So let's do quota accounting.
    2405             :          */
    2406      206175 :         ret = btrfs_qgroup_account_extents(trans);
    2407      206175 :         if (ret < 0)
    2408           0 :                 goto unlock_reloc;
    2409             : 
    2410      206175 :         ret = commit_cowonly_roots(trans);
    2411      206175 :         if (ret)
    2412           0 :                 goto unlock_reloc;
    2413             : 
    2414             :         /*
    2415             :          * The tasks which save the space cache and inode cache may also
    2416             :          * update ->aborted, check it.
    2417             :          */
    2418      206175 :         if (TRANS_ABORTED(cur_trans)) {
    2419           1 :                 ret = cur_trans->aborted;
    2420           1 :                 goto unlock_reloc;
    2421             :         }
    2422             : 
    2423      206174 :         cur_trans = fs_info->running_transaction;
    2424             : 
    2425      206174 :         btrfs_set_root_node(&fs_info->tree_root->root_item,
    2426      206174 :                             fs_info->tree_root->node);
    2427      206174 :         list_add_tail(&fs_info->tree_root->dirty_list,
    2428             :                       &cur_trans->switch_commits);
    2429             : 
    2430      206174 :         btrfs_set_root_node(&fs_info->chunk_root->root_item,
    2431      206174 :                             fs_info->chunk_root->node);
    2432      206174 :         list_add_tail(&fs_info->chunk_root->dirty_list,
    2433             :                       &cur_trans->switch_commits);
    2434             : 
    2435      206174 :         if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
    2436           0 :                 btrfs_set_root_node(&fs_info->block_group_root->root_item,
    2437           0 :                                     fs_info->block_group_root->node);
    2438           0 :                 list_add_tail(&fs_info->block_group_root->dirty_list,
    2439             :                               &cur_trans->switch_commits);
    2440             :         }
    2441             : 
    2442      206174 :         switch_commit_roots(trans);
    2443             : 
    2444      206174 :         ASSERT(list_empty(&cur_trans->dirty_bgs));
    2445      206174 :         ASSERT(list_empty(&cur_trans->io_bgs));
    2446      206174 :         update_super_roots(fs_info);
    2447             : 
    2448      206174 :         btrfs_set_super_log_root(fs_info->super_copy, 0);
    2449      206174 :         btrfs_set_super_log_root_level(fs_info->super_copy, 0);
    2450      412348 :         memcpy(fs_info->super_for_commit, fs_info->super_copy,
    2451             :                sizeof(*fs_info->super_copy));
    2452             : 
    2453      206174 :         btrfs_commit_device_sizes(cur_trans);
    2454             : 
    2455      206174 :         clear_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags);
    2456      206174 :         clear_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags);
    2457             : 
    2458      206174 :         btrfs_trans_release_chunk_metadata(trans);
    2459             : 
    2460             :         /*
    2461             :          * Before changing the transaction state to TRANS_STATE_UNBLOCKED and
    2462             :          * setting fs_info->running_transaction to NULL, lock tree_log_mutex to
    2463             :          * make sure that before we commit our superblock, no other task can
    2464             :          * start a new transaction and commit a log tree before we commit our
    2465             :          * superblock. Anyone trying to commit a log tree locks this mutex before
    2466             :          * writing its superblock.
    2467             :          */
    2468      206174 :         mutex_lock(&fs_info->tree_log_mutex);
    2469             : 
    2470      206174 :         spin_lock(&fs_info->trans_lock);
    2471      206174 :         cur_trans->state = TRANS_STATE_UNBLOCKED;
    2472      206174 :         fs_info->running_transaction = NULL;
    2473      206174 :         spin_unlock(&fs_info->trans_lock);
    2474      206174 :         mutex_unlock(&fs_info->reloc_mutex);
    2475             : 
    2476      206174 :         wake_up(&fs_info->transaction_wait);
    2477      206174 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
    2478             : 
    2479             :         /* If we have features changed, wake up the cleaner to update sysfs. */
    2480      412348 :         if (test_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags) &&
    2481          41 :             fs_info->cleaner_kthread)
    2482          41 :                 wake_up_process(fs_info->cleaner_kthread);
    2483             : 
    2484      206174 :         ret = btrfs_write_and_wait_transaction(trans);
    2485      206174 :         if (ret) {
    2486           4 :                 btrfs_handle_fs_error(fs_info, ret,
    2487             :                                       "Error while writing out transaction");
    2488           4 :                 mutex_unlock(&fs_info->tree_log_mutex);
    2489           4 :                 goto scrub_continue;
    2490             :         }
    2491             : 
    2492      206170 :         ret = write_all_supers(fs_info, 0);
    2493             :         /*
    2494             :          * the super is written, we can safely allow the tree-loggers
    2495             :          * to go about their business
    2496             :          */
    2497      206170 :         mutex_unlock(&fs_info->tree_log_mutex);
    2498      206170 :         if (ret)
    2499           0 :                 goto scrub_continue;
    2500             : 
    2501             :         /*
    2502             :          * We needn't acquire the lock here because there is no other task
    2503             :          * which can change it.
    2504             :          */
    2505      206170 :         cur_trans->state = TRANS_STATE_SUPER_COMMITTED;
    2506      206170 :         wake_up(&cur_trans->commit_wait);
    2507      206170 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
    2508             : 
    2509      206170 :         btrfs_finish_extent_commit(trans);
    2510             : 
    2511      412340 :         if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &cur_trans->flags))
    2512         531 :                 btrfs_clear_space_info_full(fs_info);
    2513             : 
    2514      206170 :         fs_info->last_trans_committed = cur_trans->transid;
    2515             :         /*
    2516             :          * We needn't acquire the lock here because there is no other task
    2517             :          * which can change it.
    2518             :          */
    2519      206170 :         cur_trans->state = TRANS_STATE_COMPLETED;
    2520      206170 :         wake_up(&cur_trans->commit_wait);
    2521      206170 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED);
    2522             : 
    2523      206170 :         spin_lock(&fs_info->trans_lock);
    2524      206170 :         list_del_init(&cur_trans->list);
    2525      206170 :         spin_unlock(&fs_info->trans_lock);
    2526             : 
    2527      206170 :         btrfs_put_transaction(cur_trans);
    2528      206170 :         btrfs_put_transaction(cur_trans);
    2529             : 
    2530      206170 :         if (trans->type & __TRANS_FREEZABLE)
    2531      191383 :                 sb_end_intwrite(fs_info->sb);
    2532             : 
    2533      206170 :         trace_btrfs_transaction_commit(fs_info);
    2534             : 
    2535      206170 :         interval = ktime_get_ns() - start_time;
    2536             : 
    2537      206170 :         btrfs_scrub_continue(fs_info);
    2538             : 
    2539      206170 :         if (current->journal_info == trans)
    2540      206170 :                 current->journal_info = NULL;
    2541             : 
    2542      206170 :         kmem_cache_free(btrfs_trans_handle_cachep, trans);
    2543             : 
    2544      206170 :         update_commit_stats(fs_info, interval);
    2545             : 
    2546      206170 :         return ret;
    2547             : 
    2548           1 : unlock_reloc:
    2549           1 :         mutex_unlock(&fs_info->reloc_mutex);
    2550           5 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
    2551           5 : scrub_continue:
    2552           5 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
    2553           5 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED);
    2554           5 :         btrfs_scrub_continue(fs_info);
    2555          16 : cleanup_transaction:
    2556          16 :         btrfs_trans_release_metadata(trans);
    2557          16 :         btrfs_cleanup_pending_block_groups(trans);
    2558          16 :         btrfs_trans_release_chunk_metadata(trans);
    2559          16 :         trans->block_rsv = NULL;
    2560          16 :         btrfs_warn(fs_info, "Skipping commit of aborted transaction.");
    2561          16 :         if (current->journal_info == trans)
    2562          16 :                 current->journal_info = NULL;
    2563          16 :         cleanup_transaction(trans, ret);
    2564             : 
    2565          16 :         return ret;
    2566             : 
    2567          11 : lockdep_release:
    2568          11 :         btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters);
    2569          11 :         btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
    2570          11 :         goto cleanup_transaction;
    2571             : 
    2572           1 : lockdep_trans_commit_start_release:
    2573           1 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
    2574           1 :         btrfs_end_transaction(trans);
    2575           1 :         return ret;
    2576             : }
    2577             : 
    2578             : /*
    2579             :  * return < 0 if error
    2580             :  * 0 if there are no more dead_roots at the time of call
    2581             :  * 1 there are more to be processed, call me again
    2582             :  *
    2583             :  * The return value indicates there are certainly more snapshots to delete, but
    2584             :  * if there comes a new one during processing, it may return 0. We don't mind,
    2585             :  * because btrfs_commit_super will poke cleaner thread and it will process it a
    2586             :  * few seconds later.
    2587             :  */
    2588       44013 : int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info)
    2589             : {
    2590       44013 :         struct btrfs_root *root;
    2591       44013 :         int ret;
    2592             : 
    2593       44013 :         spin_lock(&fs_info->trans_lock);
    2594       44013 :         if (list_empty(&fs_info->dead_roots)) {
    2595       43894 :                 spin_unlock(&fs_info->trans_lock);
    2596       43894 :                 return 0;
    2597             :         }
    2598         119 :         root = list_first_entry(&fs_info->dead_roots,
    2599             :                         struct btrfs_root, root_list);
    2600         119 :         list_del_init(&root->root_list);
    2601         119 :         spin_unlock(&fs_info->trans_lock);
    2602             : 
    2603         119 :         btrfs_debug(fs_info, "cleaner removing %llu", root->root_key.objectid);
    2604             : 
    2605         119 :         btrfs_kill_all_delayed_nodes(root);
    2606             : 
    2607         119 :         if (btrfs_header_backref_rev(root->node) <
    2608             :                         BTRFS_MIXED_BACKREF_REV)
    2609           0 :                 ret = btrfs_drop_snapshot(root, 0, 0);
    2610             :         else
    2611         119 :                 ret = btrfs_drop_snapshot(root, 1, 0);
    2612             : 
    2613         119 :         btrfs_put_root(root);
    2614         119 :         return (ret < 0) ? 0 : 1;
    2615             : }
    2616             : 
    2617             : /*
    2618             :  * We only mark the transaction aborted and then set the file system read-only.
    2619             :  * This will prevent new transactions from starting or trying to join this
    2620             :  * one.
    2621             :  *
    2622             :  * This means that error recovery at the call site is limited to freeing
    2623             :  * any local memory allocations and passing the error code up without
    2624             :  * further cleanup. The transaction should complete as it normally would
    2625             :  * in the call path but will return -EIO.
    2626             :  *
    2627             :  * We'll complete the cleanup in btrfs_end_transaction and
    2628             :  * btrfs_commit_transaction.
    2629             :  */
    2630          35 : void __cold __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
    2631             :                                       const char *function,
    2632             :                                       unsigned int line, int errno, bool first_hit)
    2633             : {
    2634          35 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    2635             : 
    2636          35 :         WRITE_ONCE(trans->aborted, errno);
    2637          35 :         WRITE_ONCE(trans->transaction->aborted, errno);
    2638          35 :         if (first_hit && errno == -ENOSPC)
    2639           0 :                 btrfs_dump_space_info_for_trans_abort(fs_info);
    2640             :         /* Wake up anybody who may be waiting on this transaction */
    2641          35 :         wake_up(&fs_info->transaction_wait);
    2642          35 :         wake_up(&fs_info->transaction_blocked_wait);
    2643          35 :         __btrfs_handle_fs_error(fs_info, function, line, errno, NULL);
    2644          35 : }
    2645             : 
    2646          11 : int __init btrfs_transaction_init(void)
    2647             : {
    2648          11 :         btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
    2649             :                         sizeof(struct btrfs_trans_handle), 0,
    2650             :                         SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL);
    2651          11 :         if (!btrfs_trans_handle_cachep)
    2652           0 :                 return -ENOMEM;
    2653             :         return 0;
    2654             : }
    2655             : 
    2656           0 : void __cold btrfs_transaction_exit(void)
    2657             : {
    2658           0 :         kmem_cache_destroy(btrfs_trans_handle_cachep);
    2659           0 : }

Generated by: LCOV version 1.14