LCOV - code coverage report
Current view: top level - fs/btrfs - transaction.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-djwx @ Mon Jul 31 20:08:22 PDT 2023 Lines: 1058 1209 87.5 %
Date: 2023-07-31 20:08:22 Functions: 54 56 96.4 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (C) 2007 Oracle.  All rights reserved.
       4             :  */
       5             : 
       6             : #include <linux/fs.h>
       7             : #include <linux/slab.h>
       8             : #include <linux/sched.h>
       9             : #include <linux/sched/mm.h>
      10             : #include <linux/writeback.h>
      11             : #include <linux/pagemap.h>
      12             : #include <linux/blkdev.h>
      13             : #include <linux/uuid.h>
      14             : #include <linux/timekeeping.h>
      15             : #include "misc.h"
      16             : #include "ctree.h"
      17             : #include "disk-io.h"
      18             : #include "transaction.h"
      19             : #include "locking.h"
      20             : #include "tree-log.h"
      21             : #include "volumes.h"
      22             : #include "dev-replace.h"
      23             : #include "qgroup.h"
      24             : #include "block-group.h"
      25             : #include "space-info.h"
      26             : #include "zoned.h"
      27             : #include "fs.h"
      28             : #include "accessors.h"
      29             : #include "extent-tree.h"
      30             : #include "root-tree.h"
      31             : #include "defrag.h"
      32             : #include "dir-item.h"
      33             : #include "uuid-tree.h"
      34             : #include "ioctl.h"
      35             : #include "relocation.h"
      36             : #include "scrub.h"
      37             : 
      38             : static struct kmem_cache *btrfs_trans_handle_cachep;
      39             : 
      40             : #define BTRFS_ROOT_TRANS_TAG 0
      41             : 
      42             : /*
      43             :  * Transaction states and transitions
      44             :  *
      45             :  * No running transaction (fs tree blocks are not modified)
      46             :  * |
      47             :  * | To next stage:
      48             :  * |  Call start_transaction() variants. Except btrfs_join_transaction_nostart().
      49             :  * V
      50             :  * Transaction N [[TRANS_STATE_RUNNING]]
      51             :  * |
      52             :  * | New trans handles can be attached to transaction N by calling all
      53             :  * | start_transaction() variants.
      54             :  * |
      55             :  * | To next stage:
      56             :  * |  Call btrfs_commit_transaction() on any trans handle attached to
      57             :  * |  transaction N
      58             :  * V
      59             :  * Transaction N [[TRANS_STATE_COMMIT_START]]
      60             :  * |
      61             :  * | Will wait for previous running transaction to completely finish if there
      62             :  * | is one
      63             :  * |
      64             :  * | Then one of the following happes:
      65             :  * | - Wait for all other trans handle holders to release.
      66             :  * |   The btrfs_commit_transaction() caller will do the commit work.
      67             :  * | - Wait for current transaction to be committed by others.
      68             :  * |   Other btrfs_commit_transaction() caller will do the commit work.
      69             :  * |
      70             :  * | At this stage, only btrfs_join_transaction*() variants can attach
      71             :  * | to this running transaction.
      72             :  * | All other variants will wait for current one to finish and attach to
      73             :  * | transaction N+1.
      74             :  * |
      75             :  * | To next stage:
      76             :  * |  Caller is chosen to commit transaction N, and all other trans handle
      77             :  * |  haven been released.
      78             :  * V
      79             :  * Transaction N [[TRANS_STATE_COMMIT_DOING]]
      80             :  * |
      81             :  * | The heavy lifting transaction work is started.
      82             :  * | From running delayed refs (modifying extent tree) to creating pending
      83             :  * | snapshots, running qgroups.
      84             :  * | In short, modify supporting trees to reflect modifications of subvolume
      85             :  * | trees.
      86             :  * |
      87             :  * | At this stage, all start_transaction() calls will wait for this
      88             :  * | transaction to finish and attach to transaction N+1.
      89             :  * |
      90             :  * | To next stage:
      91             :  * |  Until all supporting trees are updated.
      92             :  * V
      93             :  * Transaction N [[TRANS_STATE_UNBLOCKED]]
      94             :  * |                                                Transaction N+1
      95             :  * | All needed trees are modified, thus we only    [[TRANS_STATE_RUNNING]]
      96             :  * | need to write them back to disk and update     |
      97             :  * | super blocks.                                  |
      98             :  * |                                                |
      99             :  * | At this stage, new transaction is allowed to   |
     100             :  * | start.                                         |
     101             :  * | All new start_transaction() calls will be      |
     102             :  * | attached to transid N+1.                       |
     103             :  * |                                                |
     104             :  * | To next stage:                                 |
     105             :  * |  Until all tree blocks are super blocks are    |
     106             :  * |  written to block devices                      |
     107             :  * V                                                |
     108             :  * Transaction N [[TRANS_STATE_COMPLETED]]          V
     109             :  *   All tree blocks and super blocks are written.  Transaction N+1
     110             :  *   This transaction is finished and all its       [[TRANS_STATE_COMMIT_START]]
     111             :  *   data structures will be cleaned up.            | Life goes on
     112             :  */
     113             : static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
     114             :         [TRANS_STATE_RUNNING]           = 0U,
     115             :         [TRANS_STATE_COMMIT_START]      = (__TRANS_START | __TRANS_ATTACH),
     116             :         [TRANS_STATE_COMMIT_DOING]      = (__TRANS_START |
     117             :                                            __TRANS_ATTACH |
     118             :                                            __TRANS_JOIN |
     119             :                                            __TRANS_JOIN_NOSTART),
     120             :         [TRANS_STATE_UNBLOCKED]         = (__TRANS_START |
     121             :                                            __TRANS_ATTACH |
     122             :                                            __TRANS_JOIN |
     123             :                                            __TRANS_JOIN_NOLOCK |
     124             :                                            __TRANS_JOIN_NOSTART),
     125             :         [TRANS_STATE_SUPER_COMMITTED]   = (__TRANS_START |
     126             :                                            __TRANS_ATTACH |
     127             :                                            __TRANS_JOIN |
     128             :                                            __TRANS_JOIN_NOLOCK |
     129             :                                            __TRANS_JOIN_NOSTART),
     130             :         [TRANS_STATE_COMPLETED]         = (__TRANS_START |
     131             :                                            __TRANS_ATTACH |
     132             :                                            __TRANS_JOIN |
     133             :                                            __TRANS_JOIN_NOLOCK |
     134             :                                            __TRANS_JOIN_NOSTART),
     135             : };
     136             : 
     137    89601566 : void btrfs_put_transaction(struct btrfs_transaction *transaction)
     138             : {
     139    89601566 :         WARN_ON(refcount_read(&transaction->use_count) == 0);
     140    89601566 :         if (refcount_dec_and_test(&transaction->use_count)) {
     141      203027 :                 BUG_ON(!list_empty(&transaction->list));
     142      203027 :                 WARN_ON(!RB_EMPTY_ROOT(
     143             :                                 &transaction->delayed_refs.href_root.rb_root));
     144      203027 :                 WARN_ON(!RB_EMPTY_ROOT(
     145             :                                 &transaction->delayed_refs.dirty_extent_root));
     146      203027 :                 if (transaction->delayed_refs.pending_csums)
     147           0 :                         btrfs_err(transaction->fs_info,
     148             :                                   "pending csums is %llu",
     149             :                                   transaction->delayed_refs.pending_csums);
     150             :                 /*
     151             :                  * If any block groups are found in ->deleted_bgs then it's
     152             :                  * because the transaction was aborted and a commit did not
     153             :                  * happen (things failed before writing the new superblock
     154             :                  * and calling btrfs_finish_extent_commit()), so we can not
     155             :                  * discard the physical locations of the block groups.
     156             :                  */
     157      203027 :                 while (!list_empty(&transaction->deleted_bgs)) {
     158           0 :                         struct btrfs_block_group *cache;
     159             : 
     160           0 :                         cache = list_first_entry(&transaction->deleted_bgs,
     161             :                                                  struct btrfs_block_group,
     162             :                                                  bg_list);
     163           0 :                         list_del_init(&cache->bg_list);
     164           0 :                         btrfs_unfreeze_block_group(cache);
     165           0 :                         btrfs_put_block_group(cache);
     166             :                 }
     167      203027 :                 WARN_ON(!list_empty(&transaction->dev_update_list));
     168      203027 :                 kfree(transaction);
     169             :         }
     170    89372797 : }
     171             : 
     172      203187 : static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
     173             : {
     174      203187 :         struct btrfs_transaction *cur_trans = trans->transaction;
     175      203187 :         struct btrfs_fs_info *fs_info = trans->fs_info;
     176      203187 :         struct btrfs_root *root, *tmp;
     177             : 
     178             :         /*
     179             :          * At this point no one can be using this transaction to modify any tree
     180             :          * and no one can start another transaction to modify any tree either.
     181             :          */
     182      203187 :         ASSERT(cur_trans->state == TRANS_STATE_COMMIT_DOING);
     183             : 
     184      203187 :         down_write(&fs_info->commit_root_sem);
     185             : 
     186      406374 :         if (test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags))
     187        3567 :                 fs_info->last_reloc_trans = trans->transid;
     188             : 
     189     1205586 :         list_for_each_entry_safe(root, tmp, &cur_trans->switch_commits,
     190             :                                  dirty_list) {
     191     1002399 :                 list_del_init(&root->dirty_list);
     192     1002399 :                 free_extent_buffer(root->commit_root);
     193     1002399 :                 root->commit_root = btrfs_root_node(root);
     194     1002399 :                 extent_io_tree_release(&root->dirty_log_pages);
     195     1002399 :                 btrfs_qgroup_clean_swapped_blocks(root);
     196             :         }
     197             : 
     198             :         /* We can free old roots now. */
     199      203187 :         spin_lock(&cur_trans->dropped_roots_lock);
     200      203305 :         while (!list_empty(&cur_trans->dropped_roots)) {
     201         118 :                 root = list_first_entry(&cur_trans->dropped_roots,
     202             :                                         struct btrfs_root, root_list);
     203         118 :                 list_del_init(&root->root_list);
     204         118 :                 spin_unlock(&cur_trans->dropped_roots_lock);
     205         118 :                 btrfs_free_log(trans, root);
     206         118 :                 btrfs_drop_and_free_fs_root(fs_info, root);
     207         118 :                 spin_lock(&cur_trans->dropped_roots_lock);
     208             :         }
     209      203187 :         spin_unlock(&cur_trans->dropped_roots_lock);
     210             : 
     211      203187 :         up_write(&fs_info->commit_root_sem);
     212      203187 : }
     213             : 
     214    52874365 : static inline void extwriter_counter_inc(struct btrfs_transaction *trans,
     215             :                                          unsigned int type)
     216             : {
     217    52874365 :         if (type & TRANS_EXTWRITERS)
     218    27719951 :                 atomic_inc(&trans->num_extwriters);
     219    52874365 : }
     220             : 
     221    53066273 : static inline void extwriter_counter_dec(struct btrfs_transaction *trans,
     222             :                                          unsigned int type)
     223             : {
     224    53066273 :         if (type & TRANS_EXTWRITERS)
     225    27783641 :                 atomic_dec(&trans->num_extwriters);
     226    53066445 : }
     227             : 
     228             : static inline void extwriter_counter_init(struct btrfs_transaction *trans,
     229             :                                           unsigned int type)
     230             : {
     231      203026 :         atomic_set(&trans->num_extwriters, ((type & TRANS_EXTWRITERS) ? 1 : 0));
     232             : }
     233             : 
     234             : static inline int extwriter_counter_read(struct btrfs_transaction *trans)
     235             : {
     236      218890 :         return atomic_read(&trans->num_extwriters);
     237             : }
     238             : 
     239             : /*
     240             :  * To be called after doing the chunk btree updates right after allocating a new
     241             :  * chunk (after btrfs_chunk_alloc_add_chunk_item() is called), when removing a
     242             :  * chunk after all chunk btree updates and after finishing the second phase of
     243             :  * chunk allocation (btrfs_create_pending_block_groups()) in case some block
     244             :  * group had its chunk item insertion delayed to the second phase.
     245             :  */
     246   109234583 : void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
     247             : {
     248   109234583 :         struct btrfs_fs_info *fs_info = trans->fs_info;
     249             : 
     250   109234583 :         if (!trans->chunk_bytes_reserved)
     251             :                 return;
     252             : 
     253        2081 :         btrfs_block_rsv_release(fs_info, &fs_info->chunk_block_rsv,
     254             :                                 trans->chunk_bytes_reserved, NULL);
     255        2081 :         trans->chunk_bytes_reserved = 0;
     256             : }
     257             : 
     258             : /*
     259             :  * either allocate a new transaction or hop into the existing one
     260             :  */
     261    53079729 : static noinline int join_transaction(struct btrfs_fs_info *fs_info,
     262             :                                      unsigned int type)
     263             : {
     264    53079729 :         struct btrfs_transaction *cur_trans;
     265             : 
     266    53079729 :         spin_lock(&fs_info->trans_lock);
     267    53105019 : loop:
     268             :         /* The file system has been taken offline. No new transactions. */
     269    53105019 :         if (BTRFS_FS_ERROR(fs_info)) {
     270           0 :                 spin_unlock(&fs_info->trans_lock);
     271           0 :                 return -EROFS;
     272             :         }
     273             : 
     274    53105019 :         cur_trans = fs_info->running_transaction;
     275    53105019 :         if (cur_trans) {
     276    52886973 :                 if (TRANS_ABORTED(cur_trans)) {
     277           0 :                         spin_unlock(&fs_info->trans_lock);
     278           0 :                         return cur_trans->aborted;
     279             :                 }
     280    52886973 :                 if (btrfs_blocked_trans_types[cur_trans->state] & type) {
     281       12620 :                         spin_unlock(&fs_info->trans_lock);
     282       12620 :                         return -EBUSY;
     283             :                 }
     284    52874353 :                 refcount_inc(&cur_trans->use_count);
     285    52874363 :                 atomic_inc(&cur_trans->num_writers);
     286    52874366 :                 extwriter_counter_inc(cur_trans, type);
     287    52874365 :                 spin_unlock(&fs_info->trans_lock);
     288    52874365 :                 btrfs_lockdep_acquire(fs_info, btrfs_trans_num_writers);
     289    52874365 :                 btrfs_lockdep_acquire(fs_info, btrfs_trans_num_extwriters);
     290    52874365 :                 return 0;
     291             :         }
     292      218046 :         spin_unlock(&fs_info->trans_lock);
     293             : 
     294             :         /*
     295             :          * If we are ATTACH, we just want to catch the current transaction,
     296             :          * and commit it. If there is no transaction, just return ENOENT.
     297             :          */
     298      218015 :         if (type == TRANS_ATTACH)
     299             :                 return -ENOENT;
     300             : 
     301             :         /*
     302             :          * JOIN_NOLOCK only happens during the transaction commit, so
     303             :          * it is impossible that ->running_transaction is NULL
     304             :          */
     305      204236 :         BUG_ON(type == TRANS_JOIN_NOLOCK);
     306             : 
     307      204236 :         cur_trans = kmalloc(sizeof(*cur_trans), GFP_NOFS);
     308      204223 :         if (!cur_trans)
     309             :                 return -ENOMEM;
     310             : 
     311      204223 :         btrfs_lockdep_acquire(fs_info, btrfs_trans_num_writers);
     312      204223 :         btrfs_lockdep_acquire(fs_info, btrfs_trans_num_extwriters);
     313             : 
     314      204223 :         spin_lock(&fs_info->trans_lock);
     315      204257 :         if (fs_info->running_transaction) {
     316             :                 /*
     317             :                  * someone started a transaction after we unlocked.  Make sure
     318             :                  * to redo the checks above
     319             :                  */
     320        1231 :                 btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters);
     321        1231 :                 btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
     322        1231 :                 kfree(cur_trans);
     323        1231 :                 goto loop;
     324      203026 :         } else if (BTRFS_FS_ERROR(fs_info)) {
     325           0 :                 spin_unlock(&fs_info->trans_lock);
     326           0 :                 btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters);
     327           0 :                 btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
     328           0 :                 kfree(cur_trans);
     329           0 :                 return -EROFS;
     330             :         }
     331             : 
     332      203026 :         cur_trans->fs_info = fs_info;
     333      203026 :         atomic_set(&cur_trans->pending_ordered, 0);
     334      203026 :         init_waitqueue_head(&cur_trans->pending_wait);
     335      203026 :         atomic_set(&cur_trans->num_writers, 1);
     336      203026 :         extwriter_counter_init(cur_trans, type);
     337      203026 :         init_waitqueue_head(&cur_trans->writer_wait);
     338      203026 :         init_waitqueue_head(&cur_trans->commit_wait);
     339      203026 :         cur_trans->state = TRANS_STATE_RUNNING;
     340             :         /*
     341             :          * One for this trans handle, one so it will live on until we
     342             :          * commit the transaction.
     343             :          */
     344      203026 :         refcount_set(&cur_trans->use_count, 2);
     345      203026 :         cur_trans->flags = 0;
     346      203026 :         cur_trans->start_time = ktime_get_seconds();
     347             : 
     348      203026 :         memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs));
     349             : 
     350      203026 :         cur_trans->delayed_refs.href_root = RB_ROOT_CACHED;
     351      203026 :         cur_trans->delayed_refs.dirty_extent_root = RB_ROOT;
     352      203026 :         atomic_set(&cur_trans->delayed_refs.num_entries, 0);
     353             : 
     354             :         /*
     355             :          * although the tree mod log is per file system and not per transaction,
     356             :          * the log must never go across transaction boundaries.
     357             :          */
     358      203026 :         smp_mb();
     359      203026 :         if (!list_empty(&fs_info->tree_mod_seq_list))
     360           0 :                 WARN(1, KERN_ERR "BTRFS: tree_mod_seq_list not empty when creating a fresh transaction\n");
     361      203026 :         if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log))
     362           0 :                 WARN(1, KERN_ERR "BTRFS: tree_mod_log rb tree not empty when creating a fresh transaction\n");
     363      203026 :         atomic64_set(&fs_info->tree_mod_seq, 0);
     364             : 
     365      203026 :         spin_lock_init(&cur_trans->delayed_refs.lock);
     366             : 
     367      203026 :         INIT_LIST_HEAD(&cur_trans->pending_snapshots);
     368      203026 :         INIT_LIST_HEAD(&cur_trans->dev_update_list);
     369      203026 :         INIT_LIST_HEAD(&cur_trans->switch_commits);
     370      203026 :         INIT_LIST_HEAD(&cur_trans->dirty_bgs);
     371      203026 :         INIT_LIST_HEAD(&cur_trans->io_bgs);
     372      203026 :         INIT_LIST_HEAD(&cur_trans->dropped_roots);
     373      203026 :         mutex_init(&cur_trans->cache_write_mutex);
     374      203026 :         spin_lock_init(&cur_trans->dirty_bgs_lock);
     375      203026 :         INIT_LIST_HEAD(&cur_trans->deleted_bgs);
     376      203026 :         spin_lock_init(&cur_trans->dropped_roots_lock);
     377      203026 :         list_add_tail(&cur_trans->list, &fs_info->trans_list);
     378      203026 :         extent_io_tree_init(fs_info, &cur_trans->dirty_pages,
     379             :                         IO_TREE_TRANS_DIRTY_PAGES);
     380      203026 :         extent_io_tree_init(fs_info, &cur_trans->pinned_extents,
     381             :                         IO_TREE_FS_PINNED_EXTENTS);
     382      203026 :         fs_info->generation++;
     383      203026 :         cur_trans->transid = fs_info->generation;
     384      203026 :         fs_info->running_transaction = cur_trans;
     385      203026 :         cur_trans->aborted = 0;
     386      203026 :         spin_unlock(&fs_info->trans_lock);
     387             : 
     388      203026 :         return 0;
     389             : }
     390             : 
     391             : /*
     392             :  * This does all the record keeping required to make sure that a shareable root
     393             :  * is properly recorded in a given transaction.  This is required to make sure
     394             :  * the old root from before we joined the transaction is deleted when the
     395             :  * transaction commits.
     396             :  */
     397      155917 : static int record_root_in_trans(struct btrfs_trans_handle *trans,
     398             :                                struct btrfs_root *root,
     399             :                                int force)
     400             : {
     401      155917 :         struct btrfs_fs_info *fs_info = root->fs_info;
     402      155917 :         int ret = 0;
     403             : 
     404      155917 :         if ((test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
     405      155917 :             root->last_trans < trans->transid) || force) {
     406      304968 :                 WARN_ON(!force && root->commit_root != root->node);
     407             : 
     408             :                 /*
     409             :                  * see below for IN_TRANS_SETUP usage rules
     410             :                  * we have the reloc mutex held now, so there
     411             :                  * is only one writer in this function
     412             :                  */
     413      152484 :                 set_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state);
     414             : 
     415             :                 /* make sure readers find IN_TRANS_SETUP before
     416             :                  * they find our root->last_trans update
     417             :                  */
     418      152484 :                 smp_wmb();
     419             : 
     420      152484 :                 spin_lock(&fs_info->fs_roots_radix_lock);
     421      152484 :                 if (root->last_trans == trans->transid && !force) {
     422           0 :                         spin_unlock(&fs_info->fs_roots_radix_lock);
     423           0 :                         return 0;
     424             :                 }
     425      152484 :                 radix_tree_tag_set(&fs_info->fs_roots_radix,
     426      152484 :                                    (unsigned long)root->root_key.objectid,
     427             :                                    BTRFS_ROOT_TRANS_TAG);
     428      152484 :                 spin_unlock(&fs_info->fs_roots_radix_lock);
     429      152484 :                 root->last_trans = trans->transid;
     430             : 
     431             :                 /* this is pretty tricky.  We don't want to
     432             :                  * take the relocation lock in btrfs_record_root_in_trans
     433             :                  * unless we're really doing the first setup for this root in
     434             :                  * this transaction.
     435             :                  *
     436             :                  * Normally we'd use root->last_trans as a flag to decide
     437             :                  * if we want to take the expensive mutex.
     438             :                  *
     439             :                  * But, we have to set root->last_trans before we
     440             :                  * init the relocation root, otherwise, we trip over warnings
     441             :                  * in ctree.c.  The solution used here is to flag ourselves
     442             :                  * with root IN_TRANS_SETUP.  When this is 1, we're still
     443             :                  * fixing up the reloc trees and everyone must wait.
     444             :                  *
     445             :                  * When this is zero, they can trust root->last_trans and fly
     446             :                  * through btrfs_record_root_in_trans without having to take the
     447             :                  * lock.  smp_wmb() makes sure that all the writes above are
     448             :                  * done before we pop in the zero below
     449             :                  */
     450      152484 :                 ret = btrfs_init_reloc_root(trans, root);
     451      152484 :                 smp_mb__before_atomic();
     452      152484 :                 clear_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state);
     453             :         }
     454             :         return ret;
     455             : }
     456             : 
     457             : 
     458         118 : void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
     459             :                             struct btrfs_root *root)
     460             : {
     461         118 :         struct btrfs_fs_info *fs_info = root->fs_info;
     462         118 :         struct btrfs_transaction *cur_trans = trans->transaction;
     463             : 
     464             :         /* Add ourselves to the transaction dropped list */
     465         118 :         spin_lock(&cur_trans->dropped_roots_lock);
     466         118 :         list_add_tail(&root->root_list, &cur_trans->dropped_roots);
     467         118 :         spin_unlock(&cur_trans->dropped_roots_lock);
     468             : 
     469             :         /* Make sure we don't try to update the root at commit time */
     470         118 :         spin_lock(&fs_info->fs_roots_radix_lock);
     471         118 :         radix_tree_tag_clear(&fs_info->fs_roots_radix,
     472         118 :                              (unsigned long)root->root_key.objectid,
     473             :                              BTRFS_ROOT_TRANS_TAG);
     474         118 :         spin_unlock(&fs_info->fs_roots_radix_lock);
     475         118 : }
     476             : 
     477    53095928 : int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
     478             :                                struct btrfs_root *root)
     479             : {
     480    53095928 :         struct btrfs_fs_info *fs_info = root->fs_info;
     481    53095928 :         int ret;
     482             : 
     483    53095928 :         if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
     484             :                 return 0;
     485             : 
     486             :         /*
     487             :          * see record_root_in_trans for comments about IN_TRANS_SETUP usage
     488             :          * and barriers
     489             :          */
     490    45760405 :         smp_rmb();
     491    45760276 :         if (root->last_trans == trans->transid &&
     492           0 :             !test_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state))
     493             :                 return 0;
     494             : 
     495      153538 :         mutex_lock(&fs_info->reloc_mutex);
     496      153521 :         ret = record_root_in_trans(trans, root, 0);
     497      153521 :         mutex_unlock(&fs_info->reloc_mutex);
     498             : 
     499      153521 :         return ret;
     500             : }
     501             : 
     502             : static inline int is_transaction_blocked(struct btrfs_transaction *trans)
     503             : {
     504    28995971 :         return (trans->state >= TRANS_STATE_COMMIT_START &&
     505    28995653 :                 trans->state < TRANS_STATE_UNBLOCKED &&
     506       87785 :                 !TRANS_ABORTED(trans));
     507             : }
     508             : 
     509             : /* wait for commit against the current transaction to become unblocked
     510             :  * when this is done, it is safe to start a new transaction, but the current
     511             :  * transaction might not be fully on disk.
     512             :  */
     513    29057153 : static void wait_current_trans(struct btrfs_fs_info *fs_info)
     514             : {
     515    29057153 :         struct btrfs_transaction *cur_trans;
     516             : 
     517    29057153 :         spin_lock(&fs_info->trans_lock);
     518    29058686 :         cur_trans = fs_info->running_transaction;
     519    29058686 :         if (cur_trans && is_transaction_blocked(cur_trans)) {
     520       87784 :                 refcount_inc(&cur_trans->use_count);
     521       87784 :                 spin_unlock(&fs_info->trans_lock);
     522             : 
     523       87783 :                 btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
     524      175339 :                 wait_event(fs_info->transaction_wait,
     525             :                            cur_trans->state >= TRANS_STATE_UNBLOCKED ||
     526             :                            TRANS_ABORTED(cur_trans));
     527       87779 :                 btrfs_put_transaction(cur_trans);
     528             :         } else {
     529    28970902 :                 spin_unlock(&fs_info->trans_lock);
     530             :         }
     531    29058522 : }
     532             : 
     533    53175844 : static int may_wait_transaction(struct btrfs_fs_info *fs_info, int type)
     534             : {
     535   106351688 :         if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
     536             :                 return 0;
     537             : 
     538    53175561 :         if (type == TRANS_START)
     539    27764251 :                 return 1;
     540             : 
     541             :         return 0;
     542             : }
     543             : 
     544    25459422 : static inline bool need_reserve_reloc_root(struct btrfs_root *root)
     545             : {
     546    25459422 :         struct btrfs_fs_info *fs_info = root->fs_info;
     547             : 
     548    25497163 :         if (!fs_info->reloc_ctl ||
     549       37741 :             !test_bit(BTRFS_ROOT_SHAREABLE, &root->state) ||
     550       14709 :             root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
     551       14709 :             root->reloc_root)
     552    25458751 :                 return false;
     553             : 
     554             :         return true;
     555             : }
     556             : 
     557             : static struct btrfs_trans_handle *
     558    53073691 : start_transaction(struct btrfs_root *root, unsigned int num_items,
     559             :                   unsigned int type, enum btrfs_reserve_flush_enum flush,
     560             :                   bool enforce_qgroups)
     561             : {
     562    53073691 :         struct btrfs_fs_info *fs_info = root->fs_info;
     563    53073691 :         struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
     564    53073691 :         struct btrfs_trans_handle *h;
     565    53073691 :         struct btrfs_transaction *cur_trans;
     566    53073691 :         u64 num_bytes = 0;
     567    53073691 :         u64 qgroup_reserved = 0;
     568    53073691 :         bool reloc_reserved = false;
     569    53073691 :         bool do_chunk_alloc = false;
     570    53073691 :         int ret;
     571             : 
     572    53073691 :         if (BTRFS_FS_ERROR(fs_info))
     573             :                 return ERR_PTR(-EROFS);
     574             : 
     575    53073691 :         if (current->journal_info) {
     576           0 :                 WARN_ON(type & TRANS_EXTWRITERS);
     577           0 :                 h = current->journal_info;
     578           0 :                 refcount_inc(&h->use_count);
     579           0 :                 WARN_ON(refcount_read(&h->use_count) > 2);
     580           0 :                 h->orig_rsv = h->block_rsv;
     581           0 :                 h->block_rsv = NULL;
     582           0 :                 goto got_it;
     583             :         }
     584             : 
     585             :         /*
     586             :          * Do the reservation before we join the transaction so we can do all
     587             :          * the appropriate flushing if need be.
     588             :          */
     589    53073691 :         if (num_items && root != fs_info->chunk_root) {
     590    25463845 :                 struct btrfs_block_rsv *rsv = &fs_info->trans_block_rsv;
     591    25463845 :                 u64 delayed_refs_bytes = 0;
     592             : 
     593    25463845 :                 qgroup_reserved = num_items * fs_info->nodesize;
     594    25463845 :                 ret = btrfs_qgroup_reserve_meta_pertrans(root, qgroup_reserved,
     595             :                                 enforce_qgroups);
     596    25462246 :                 if (ret)
     597        6168 :                         return ERR_PTR(ret);
     598             : 
     599             :                 /*
     600             :                  * We want to reserve all the bytes we may need all at once, so
     601             :                  * we only do 1 enospc flushing cycle per transaction start.  We
     602             :                  * accomplish this by simply assuming we'll do num_items worth
     603             :                  * of delayed refs updates in this trans handle, and refill that
     604             :                  * amount for whatever is missing in the reserve.
     605             :                  */
     606    25456078 :                 num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items);
     607    25456078 :                 if (flush == BTRFS_RESERVE_FLUSH_ALL &&
     608             :                     !btrfs_block_rsv_full(delayed_refs_rsv)) {
     609     1425271 :                         delayed_refs_bytes = btrfs_calc_delayed_ref_bytes(fs_info,
     610             :                                                                           num_items);
     611     1425271 :                         num_bytes += delayed_refs_bytes;
     612             :                 }
     613             : 
     614             :                 /*
     615             :                  * Do the reservation for the relocation root creation
     616             :                  */
     617    25456078 :                 if (need_reserve_reloc_root(root)) {
     618         671 :                         num_bytes += fs_info->nodesize;
     619         671 :                         reloc_reserved = true;
     620             :                 }
     621             : 
     622    25447731 :                 ret = btrfs_block_rsv_add(fs_info, rsv, num_bytes, flush);
     623    25491116 :                 if (ret)
     624       23211 :                         goto reserve_fail;
     625    25467905 :                 if (delayed_refs_bytes) {
     626     1404765 :                         btrfs_migrate_to_delayed_refs_rsv(fs_info, rsv,
     627             :                                                           delayed_refs_bytes);
     628     1404755 :                         num_bytes -= delayed_refs_bytes;
     629             :                 }
     630             : 
     631    25467895 :                 if (rsv->space_info->force_alloc)
     632           0 :                         do_chunk_alloc = true;
     633    27609846 :         } else if (num_items == 0 && flush == BTRFS_RESERVE_FLUSH_ALL &&
     634             :                    !btrfs_block_rsv_full(delayed_refs_rsv)) {
     635             :                 /*
     636             :                  * Some people call with btrfs_start_transaction(root, 0)
     637             :                  * because they can be throttled, but have some other mechanism
     638             :                  * for reserving space.  We still want these guys to refill the
     639             :                  * delayed block_rsv so just add 1 items worth of reservation
     640             :                  * here.
     641             :                  */
     642     2062429 :                 ret = btrfs_delayed_refs_rsv_refill(fs_info, flush);
     643     2062430 :                 if (ret)
     644           0 :                         goto reserve_fail;
     645             :         }
     646    53077742 : again:
     647    53077755 :         h = kmem_cache_zalloc(btrfs_trans_handle_cachep, GFP_NOFS);
     648    53076402 :         if (!h) {
     649           0 :                 ret = -ENOMEM;
     650           0 :                 goto alloc_fail;
     651             :         }
     652             : 
     653             :         /*
     654             :          * If we are JOIN_NOLOCK we're already committing a transaction and
     655             :          * waiting on this guy, so we don't need to do the sb_start_intwrite
     656             :          * because we're already holding a ref.  We need this because we could
     657             :          * have raced in and did an fsync() on a file which can kick a commit
     658             :          * and then we deadlock with somebody doing a freeze.
     659             :          *
     660             :          * If we are ATTACH, it means we just want to catch the current
     661             :          * transaction and commit it, so we needn't do sb_start_intwrite(). 
     662             :          */
     663    53076402 :         if (type & __TRANS_FREEZABLE)
     664    51892520 :                 sb_start_intwrite(fs_info->sb);
     665             : 
     666    53072789 :         if (may_wait_transaction(fs_info, type))
     667    27764100 :                 wait_current_trans(fs_info);
     668             : 
     669    53082832 :         do {
     670    53082832 :                 ret = join_transaction(fs_info, type);
     671    53103755 :                 if (ret == -EBUSY) {
     672       12619 :                         wait_current_trans(fs_info);
     673       12612 :                         if (unlikely(type == TRANS_ATTACH ||
     674             :                                      type == TRANS_JOIN_NOSTART))
     675             :                                 ret = -ENOENT;
     676             :                 }
     677    53102644 :         } while (ret == -EBUSY);
     678             : 
     679    53092241 :         if (ret < 0)
     680       14882 :                 goto join_fail;
     681             : 
     682    53077359 :         cur_trans = fs_info->running_transaction;
     683             : 
     684    53077359 :         h->transid = cur_trans->transid;
     685    53077359 :         h->transaction = cur_trans;
     686    53077359 :         refcount_set(&h->use_count, 1);
     687    53077359 :         h->fs_info = root->fs_info;
     688             : 
     689    53077359 :         h->type = type;
     690    53077359 :         INIT_LIST_HEAD(&h->new_bgs);
     691             : 
     692    53077359 :         smp_mb();
     693    53181458 :         if (cur_trans->state >= TRANS_STATE_COMMIT_START &&
     694      104166 :             may_wait_transaction(fs_info, type)) {
     695          13 :                 current->journal_info = h;
     696          13 :                 btrfs_commit_transaction(h);
     697          13 :                 goto again;
     698             :         }
     699             : 
     700    53077279 :         if (num_bytes) {
     701    25468054 :                 trace_btrfs_space_reservation(fs_info, "transaction",
     702             :                                               h->transid, num_bytes, 1);
     703    25467930 :                 h->block_rsv = &fs_info->trans_block_rsv;
     704    25467930 :                 h->bytes_reserved = num_bytes;
     705    25467930 :                 h->reloc_reserved = reloc_reserved;
     706             :         }
     707             : 
     708    27609225 : got_it:
     709    53077155 :         if (!current->journal_info)
     710    53077107 :                 current->journal_info = h;
     711             : 
     712             :         /*
     713             :          * If the space_info is marked ALLOC_FORCE then we'll get upgraded to
     714             :          * ALLOC_FORCE the first run through, and then we won't allocate for
     715             :          * anybody else who races in later.  We don't care about the return
     716             :          * value here.
     717             :          */
     718    53077155 :         if (do_chunk_alloc && num_bytes) {
     719           0 :                 u64 flags = h->block_rsv->space_info->flags;
     720             : 
     721           0 :                 btrfs_chunk_alloc(h, btrfs_get_alloc_profile(fs_info, flags),
     722             :                                   CHUNK_ALLOC_NO_FORCE);
     723             :         }
     724             : 
     725             :         /*
     726             :          * btrfs_record_root_in_trans() needs to alloc new extents, and may
     727             :          * call btrfs_join_transaction() while we're also starting a
     728             :          * transaction.
     729             :          *
     730             :          * Thus it need to be called after current->journal_info initialized,
     731             :          * or we can deadlock.
     732             :          */
     733    53077155 :         ret = btrfs_record_root_in_trans(h, root);
     734    53076093 :         if (ret) {
     735             :                 /*
     736             :                  * The transaction handle is fully initialized and linked with
     737             :                  * other structures so it needs to be ended in case of errors,
     738             :                  * not just freed.
     739             :                  */
     740           0 :                 btrfs_end_transaction(h);
     741           0 :                 return ERR_PTR(ret);
     742             :         }
     743             : 
     744             :         return h;
     745             : 
     746             : join_fail:
     747       14882 :         if (type & __TRANS_FREEZABLE)
     748           0 :                 sb_end_intwrite(fs_info->sb);
     749       14882 :         kmem_cache_free(btrfs_trans_handle_cachep, h);
     750       14883 : alloc_fail:
     751       14883 :         if (num_bytes)
     752           0 :                 btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv,
     753             :                                         num_bytes, NULL);
     754       14883 : reserve_fail:
     755       38094 :         btrfs_qgroup_free_meta_pertrans(root, qgroup_reserved);
     756       38090 :         return ERR_PTR(ret);
     757             : }
     758             : 
     759    26387642 : struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
     760             :                                                    unsigned int num_items)
     761             : {
     762    26387642 :         return start_transaction(root, num_items, TRANS_START,
     763             :                                  BTRFS_RESERVE_FLUSH_ALL, true);
     764             : }
     765             : 
     766     1384196 : struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
     767             :                                         struct btrfs_root *root,
     768             :                                         unsigned int num_items)
     769             : {
     770     1384196 :         return start_transaction(root, num_items, TRANS_START,
     771             :                                  BTRFS_RESERVE_FLUSH_ALL_STEAL, false);
     772             : }
     773             : 
     774    24128252 : struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
     775             : {
     776    24128252 :         return start_transaction(root, 0, TRANS_JOIN, BTRFS_RESERVE_NO_FLUSH,
     777             :                                  true);
     778             : }
     779             : 
     780          40 : struct btrfs_trans_handle *btrfs_join_transaction_spacecache(struct btrfs_root *root)
     781             : {
     782          40 :         return start_transaction(root, 0, TRANS_JOIN_NOLOCK,
     783             :                                  BTRFS_RESERVE_NO_FLUSH, true);
     784             : }
     785             : 
     786             : /*
     787             :  * Similar to regular join but it never starts a transaction when none is
     788             :  * running or after waiting for the current one to finish.
     789             :  */
     790     1150909 : struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root)
     791             : {
     792     1150909 :         return start_transaction(root, 0, TRANS_JOIN_NOSTART,
     793             :                                  BTRFS_RESERVE_NO_FLUSH, true);
     794             : }
     795             : 
     796             : /*
     797             :  * btrfs_attach_transaction() - catch the running transaction
     798             :  *
     799             :  * It is used when we want to commit the current the transaction, but
     800             :  * don't want to start a new one.
     801             :  *
     802             :  * Note: If this function return -ENOENT, it just means there is no
     803             :  * running transaction. But it is possible that the inactive transaction
     804             :  * is still in the memory, not fully on disk. If you hope there is no
     805             :  * inactive transaction in the fs when -ENOENT is returned, you should
     806             :  * invoke
     807             :  *     btrfs_attach_transaction_barrier()
     808             :  */
     809        2302 : struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root)
     810             : {
     811        2302 :         return start_transaction(root, 0, TRANS_ATTACH,
     812             :                                  BTRFS_RESERVE_NO_FLUSH, true);
     813             : }
     814             : 
     815             : /*
     816             :  * btrfs_attach_transaction_barrier() - catch the running transaction
     817             :  *
     818             :  * It is similar to the above function, the difference is this one
     819             :  * will wait for all the inactive transactions until they fully
     820             :  * complete.
     821             :  */
     822             : struct btrfs_trans_handle *
     823       30843 : btrfs_attach_transaction_barrier(struct btrfs_root *root)
     824             : {
     825       30843 :         struct btrfs_trans_handle *trans;
     826             : 
     827       30843 :         trans = start_transaction(root, 0, TRANS_ATTACH,
     828             :                                   BTRFS_RESERVE_NO_FLUSH, true);
     829       30832 :         if (trans == ERR_PTR(-ENOENT))
     830       14608 :                 btrfs_wait_for_commit(root->fs_info, 0);
     831             : 
     832       30838 :         return trans;
     833             : }
     834             : 
     835             : /* Wait for a transaction commit to reach at least the given state. */
     836        7176 : static noinline void wait_for_commit(struct btrfs_transaction *commit,
     837             :                                      const enum btrfs_trans_state min_state)
     838             : {
     839        7176 :         struct btrfs_fs_info *fs_info = commit->fs_info;
     840        7176 :         u64 transid = commit->transid;
     841        7176 :         bool put = false;
     842             : 
     843             :         /*
     844             :          * At the moment this function is called with min_state either being
     845             :          * TRANS_STATE_COMPLETED or TRANS_STATE_SUPER_COMMITTED.
     846             :          */
     847        7176 :         if (min_state == TRANS_STATE_COMPLETED)
     848             :                 btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED);
     849             :         else
     850    35876751 :                 btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
     851             : 
     852    35876751 :         while (1) {
     853    35888931 :                 wait_event(commit->commit_wait, commit->state >= min_state);
     854    35874430 :                 if (put)
     855    35868633 :                         btrfs_put_transaction(commit);
     856             : 
     857    35633912 :                 if (min_state < TRANS_STATE_COMPLETED)
     858             :                         break;
     859             : 
     860             :                 /*
     861             :                  * A transaction isn't really completed until all of the
     862             :                  * previous transactions are completed, but with fsync we can
     863             :                  * end up with SUPER_COMMITTED transactions before a COMPLETED
     864             :                  * transaction. Wait for those.
     865             :                  */
     866             : 
     867    35633744 :                 spin_lock(&fs_info->trans_lock);
     868    35876946 :                 commit = list_first_entry_or_null(&fs_info->trans_list,
     869             :                                                   struct btrfs_transaction,
     870             :                                                   list);
     871    35876588 :                 if (!commit || commit->transid > transid) {
     872        7010 :                         spin_unlock(&fs_info->trans_lock);
     873             :                         break;
     874             :                 }
     875    35869936 :                 refcount_inc(&commit->use_count);
     876    35869936 :                 put = true;
     877    35869936 :                 spin_unlock(&fs_info->trans_lock);
     878             :         }
     879        7177 : }
     880             : 
     881       14605 : int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid)
     882             : {
     883       14605 :         struct btrfs_transaction *cur_trans = NULL, *t;
     884       14605 :         int ret = 0;
     885             : 
     886       14605 :         if (transid) {
     887           4 :                 if (transid <= fs_info->last_trans_committed)
     888           0 :                         goto out;
     889             : 
     890             :                 /* find specified transaction */
     891           4 :                 spin_lock(&fs_info->trans_lock);
     892           4 :                 list_for_each_entry(t, &fs_info->trans_list, list) {
     893           4 :                         if (t->transid == transid) {
     894           4 :                                 cur_trans = t;
     895           4 :                                 refcount_inc(&cur_trans->use_count);
     896           4 :                                 ret = 0;
     897           4 :                                 break;
     898             :                         }
     899           0 :                         if (t->transid > transid) {
     900             :                                 ret = 0;
     901             :                                 break;
     902             :                         }
     903             :                 }
     904           4 :                 spin_unlock(&fs_info->trans_lock);
     905             : 
     906             :                 /*
     907             :                  * The specified transaction doesn't exist, or we
     908             :                  * raced with btrfs_commit_transaction
     909             :                  */
     910           4 :                 if (!cur_trans) {
     911           0 :                         if (transid > fs_info->last_trans_committed)
     912           0 :                                 ret = -EINVAL;
     913           0 :                         goto out;
     914             :                 }
     915             :         } else {
     916             :                 /* find newest transaction that is committing | committed */
     917       14601 :                 spin_lock(&fs_info->trans_lock);
     918       15227 :                 list_for_each_entry_reverse(t, &fs_info->trans_list,
     919             :                                             list) {
     920        1971 :                         if (t->state >= TRANS_STATE_COMMIT_START) {
     921        1360 :                                 if (t->state == TRANS_STATE_COMPLETED)
     922             :                                         break;
     923        1360 :                                 cur_trans = t;
     924        1360 :                                 refcount_inc(&cur_trans->use_count);
     925             :                                 break;
     926             :                         }
     927             :                 }
     928       14616 :                 spin_unlock(&fs_info->trans_lock);
     929       14614 :                 if (!cur_trans)
     930       13254 :                         goto out;  /* nothing committing|committed */
     931             :         }
     932             : 
     933        1364 :         wait_for_commit(cur_trans, TRANS_STATE_COMPLETED);
     934        1363 :         btrfs_put_transaction(cur_trans);
     935       14617 : out:
     936       14617 :         return ret;
     937             : }
     938             : 
     939     1280501 : void btrfs_throttle(struct btrfs_fs_info *fs_info)
     940             : {
     941     1280501 :         wait_current_trans(fs_info);
     942     1280501 : }
     943             : 
     944     1914790 : bool btrfs_should_end_transaction(struct btrfs_trans_handle *trans)
     945             : {
     946     1914790 :         struct btrfs_transaction *cur_trans = trans->transaction;
     947             : 
     948     3829572 :         if (cur_trans->state >= TRANS_STATE_COMMIT_START ||
     949     1914782 :             test_bit(BTRFS_DELAYED_REFS_FLUSHING, &cur_trans->delayed_refs.flags))
     950             :                 return true;
     951             : 
     952     1913837 :         if (btrfs_check_space_for_delayed_refs(trans->fs_info))
     953             :                 return true;
     954             : 
     955        2674 :         return !!btrfs_block_rsv_check(&trans->fs_info->global_block_rsv, 50);
     956             : }
     957             : 
     958    53070956 : static void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans)
     959             : 
     960             : {
     961    53070956 :         struct btrfs_fs_info *fs_info = trans->fs_info;
     962             : 
     963    53070956 :         if (!trans->block_rsv) {
     964             :                 ASSERT(!trans->bytes_reserved);
     965             :                 return;
     966             :         }
     967             : 
     968    35592395 :         if (!trans->bytes_reserved)
     969             :                 return;
     970             : 
     971    29017397 :         ASSERT(trans->block_rsv == &fs_info->trans_block_rsv);
     972    29017397 :         trace_btrfs_space_reservation(fs_info, "transaction",
     973             :                                       trans->transid, trans->bytes_reserved, 0);
     974    29016950 :         btrfs_block_rsv_release(fs_info, trans->block_rsv,
     975             :                                 trans->bytes_reserved, NULL);
     976    29019485 :         trans->bytes_reserved = 0;
     977             : }
     978             : 
     979    52864216 : static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
     980             :                                    int throttle)
     981             : {
     982    52864216 :         struct btrfs_fs_info *info = trans->fs_info;
     983    52864216 :         struct btrfs_transaction *cur_trans = trans->transaction;
     984    52864216 :         int err = 0;
     985             : 
     986    52864216 :         if (refcount_read(&trans->use_count) > 1) {
     987           0 :                 refcount_dec(&trans->use_count);
     988           0 :                 trans->block_rsv = trans->orig_rsv;
     989           0 :                 return 0;
     990             :         }
     991             : 
     992    52864216 :         btrfs_trans_release_metadata(trans);
     993    52863113 :         trans->block_rsv = NULL;
     994             : 
     995    52863113 :         btrfs_create_pending_block_groups(trans);
     996             : 
     997    52860897 :         btrfs_trans_release_chunk_metadata(trans);
     998             : 
     999    52859465 :         if (trans->type & __TRANS_FREEZABLE)
    1000    51706147 :                 sb_end_intwrite(info->sb);
    1001             : 
    1002    52859802 :         WARN_ON(cur_trans != info->running_transaction);
    1003    52859802 :         WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
    1004    52859802 :         atomic_dec(&cur_trans->num_writers);
    1005    52863549 :         extwriter_counter_dec(cur_trans, trans->type);
    1006             : 
    1007    52863627 :         cond_wake_up(&cur_trans->writer_wait);
    1008             : 
    1009    52868443 :         btrfs_lockdep_release(info, btrfs_trans_num_extwriters);
    1010    52868443 :         btrfs_lockdep_release(info, btrfs_trans_num_writers);
    1011             : 
    1012    52868443 :         btrfs_put_transaction(cur_trans);
    1013             : 
    1014    52862379 :         if (current->journal_info == trans)
    1015    52862721 :                 current->journal_info = NULL;
    1016             : 
    1017    52862379 :         if (throttle)
    1018     3951362 :                 btrfs_run_delayed_iputs(info);
    1019             : 
    1020    52862379 :         if (TRANS_ABORTED(trans) || BTRFS_FS_ERROR(info)) {
    1021           1 :                 wake_up_process(info->transaction_kthread);
    1022           1 :                 if (TRANS_ABORTED(trans))
    1023           1 :                         err = trans->aborted;
    1024             :                 else
    1025             :                         err = -EROFS;
    1026             :         }
    1027             : 
    1028    52862379 :         kmem_cache_free(btrfs_trans_handle_cachep, trans);
    1029    52862379 :         return err;
    1030             : }
    1031             : 
    1032    48913187 : int btrfs_end_transaction(struct btrfs_trans_handle *trans)
    1033             : {
    1034    48913187 :         return __btrfs_end_transaction(trans, 0);
    1035             : }
    1036             : 
    1037     3951362 : int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans)
    1038             : {
    1039     3951362 :         return __btrfs_end_transaction(trans, 1);
    1040             : }
    1041             : 
    1042             : /*
    1043             :  * when btree blocks are allocated, they have some corresponding bits set for
    1044             :  * them in one of two extent_io trees.  This is used to make sure all of
    1045             :  * those extents are sent to disk but does not wait on them
    1046             :  */
    1047      683769 : int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
    1048             :                                struct extent_io_tree *dirty_pages, int mark)
    1049             : {
    1050      683769 :         int err = 0;
    1051      683769 :         int werr = 0;
    1052      683769 :         struct address_space *mapping = fs_info->btree_inode->i_mapping;
    1053      683769 :         struct extent_state *cached_state = NULL;
    1054      683769 :         u64 start = 0;
    1055      683769 :         u64 end;
    1056             : 
    1057     2944074 :         while (!find_first_extent_bit(dirty_pages, start, &start, &end,
    1058             :                                       mark, &cached_state)) {
    1059     2260272 :                 bool wait_writeback = false;
    1060             : 
    1061     2260272 :                 err = convert_extent_bit(dirty_pages, start, end,
    1062             :                                          EXTENT_NEED_WAIT,
    1063             :                                          mark, &cached_state);
    1064             :                 /*
    1065             :                  * convert_extent_bit can return -ENOMEM, which is most of the
    1066             :                  * time a temporary error. So when it happens, ignore the error
    1067             :                  * and wait for writeback of this range to finish - because we
    1068             :                  * failed to set the bit EXTENT_NEED_WAIT for the range, a call
    1069             :                  * to __btrfs_wait_marked_extents() would not know that
    1070             :                  * writeback for this range started and therefore wouldn't
    1071             :                  * wait for it to finish - we don't want to commit a
    1072             :                  * superblock that points to btree nodes/leafs for which
    1073             :                  * writeback hasn't finished yet (and without errors).
    1074             :                  * We cleanup any entries left in the io tree when committing
    1075             :                  * the transaction (through extent_io_tree_release()).
    1076             :                  */
    1077     2260265 :                 if (err == -ENOMEM) {
    1078             :                         err = 0;
    1079             :                         wait_writeback = true;
    1080             :                 }
    1081     2260265 :                 if (!err)
    1082     2260265 :                         err = filemap_fdatawrite_range(mapping, start, end);
    1083     2260303 :                 if (err)
    1084             :                         werr = err;
    1085     2260303 :                 else if (wait_writeback)
    1086           0 :                         werr = filemap_fdatawait_range(mapping, start, end);
    1087     2260303 :                 free_extent_state(cached_state);
    1088     2260304 :                 cached_state = NULL;
    1089     2260304 :                 cond_resched();
    1090     2260305 :                 start = end + 1;
    1091             :         }
    1092      683821 :         return werr;
    1093             : }
    1094             : 
    1095             : /*
    1096             :  * when btree blocks are allocated, they have some corresponding bits set for
    1097             :  * them in one of two extent_io trees.  This is used to make sure all of
    1098             :  * those extents are on disk for transaction or log commit.  We wait
    1099             :  * on all the pages and clear them from the dirty pages state tree
    1100             :  */
    1101      683821 : static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info,
    1102             :                                        struct extent_io_tree *dirty_pages)
    1103             : {
    1104      683821 :         int err = 0;
    1105      683821 :         int werr = 0;
    1106      683821 :         struct address_space *mapping = fs_info->btree_inode->i_mapping;
    1107      683821 :         struct extent_state *cached_state = NULL;
    1108      683821 :         u64 start = 0;
    1109      683821 :         u64 end;
    1110             : 
    1111     2944124 :         while (!find_first_extent_bit(dirty_pages, start, &start, &end,
    1112             :                                       EXTENT_NEED_WAIT, &cached_state)) {
    1113             :                 /*
    1114             :                  * Ignore -ENOMEM errors returned by clear_extent_bit().
    1115             :                  * When committing the transaction, we'll remove any entries
    1116             :                  * left in the io tree. For a log commit, we don't remove them
    1117             :                  * after committing the log because the tree can be accessed
    1118             :                  * concurrently - we do it only at transaction commit time when
    1119             :                  * it's safe to do it (through extent_io_tree_release()).
    1120             :                  */
    1121     2260304 :                 err = clear_extent_bit(dirty_pages, start, end,
    1122             :                                        EXTENT_NEED_WAIT, &cached_state);
    1123     2260304 :                 if (err == -ENOMEM)
    1124             :                         err = 0;
    1125     2260304 :                 if (!err)
    1126     2260304 :                         err = filemap_fdatawait_range(mapping, start, end);
    1127     2260304 :                 if (err)
    1128           5 :                         werr = err;
    1129     2260304 :                 free_extent_state(cached_state);
    1130     2260304 :                 cached_state = NULL;
    1131     2260304 :                 cond_resched();
    1132     2260303 :                 start = end + 1;
    1133             :         }
    1134      683821 :         if (err)
    1135           2 :                 werr = err;
    1136      683821 :         return werr;
    1137             : }
    1138             : 
    1139      203187 : static int btrfs_wait_extents(struct btrfs_fs_info *fs_info,
    1140             :                        struct extent_io_tree *dirty_pages)
    1141             : {
    1142      203187 :         bool errors = false;
    1143      203187 :         int err;
    1144             : 
    1145      203187 :         err = __btrfs_wait_marked_extents(fs_info, dirty_pages);
    1146      203187 :         if (test_and_clear_bit(BTRFS_FS_BTREE_ERR, &fs_info->flags))
    1147           4 :                 errors = true;
    1148             : 
    1149      203187 :         if (errors && !err)
    1150           0 :                 err = -EIO;
    1151      203187 :         return err;
    1152             : }
    1153             : 
    1154      480634 : int btrfs_wait_tree_log_extents(struct btrfs_root *log_root, int mark)
    1155             : {
    1156      480634 :         struct btrfs_fs_info *fs_info = log_root->fs_info;
    1157      480634 :         struct extent_io_tree *dirty_pages = &log_root->dirty_log_pages;
    1158      480634 :         bool errors = false;
    1159      480634 :         int err;
    1160             : 
    1161      480634 :         ASSERT(log_root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
    1162             : 
    1163      480634 :         err = __btrfs_wait_marked_extents(fs_info, dirty_pages);
    1164      842168 :         if ((mark & EXTENT_DIRTY) &&
    1165      361534 :             test_and_clear_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags))
    1166           1 :                 errors = true;
    1167             : 
    1168      839320 :         if ((mark & EXTENT_NEW) &&
    1169      358686 :             test_and_clear_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags))
    1170           0 :                 errors = true;
    1171             : 
    1172      480634 :         if (errors && !err)
    1173           0 :                 err = -EIO;
    1174      480634 :         return err;
    1175             : }
    1176             : 
    1177             : /*
    1178             :  * When btree blocks are allocated the corresponding extents are marked dirty.
    1179             :  * This function ensures such extents are persisted on disk for transaction or
    1180             :  * log commit.
    1181             :  *
    1182             :  * @trans: transaction whose dirty pages we'd like to write
    1183             :  */
    1184      203187 : static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans)
    1185             : {
    1186      203187 :         int ret;
    1187      203187 :         int ret2;
    1188      203187 :         struct extent_io_tree *dirty_pages = &trans->transaction->dirty_pages;
    1189      203187 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1190      203187 :         struct blk_plug plug;
    1191             : 
    1192      203187 :         blk_start_plug(&plug);
    1193      203187 :         ret = btrfs_write_marked_extents(fs_info, dirty_pages, EXTENT_DIRTY);
    1194      203187 :         blk_finish_plug(&plug);
    1195      203187 :         ret2 = btrfs_wait_extents(fs_info, dirty_pages);
    1196             : 
    1197      203187 :         extent_io_tree_release(&trans->transaction->dirty_pages);
    1198             : 
    1199      203187 :         if (ret)
    1200             :                 return ret;
    1201      203187 :         else if (ret2)
    1202             :                 return ret2;
    1203             :         else
    1204      203183 :                 return 0;
    1205             : }
    1206             : 
    1207             : /*
    1208             :  * this is used to update the root pointer in the tree of tree roots.
    1209             :  *
    1210             :  * But, in the case of the extent allocation tree, updating the root
    1211             :  * pointer may allocate blocks which may change the root of the extent
    1212             :  * allocation tree.
    1213             :  *
    1214             :  * So, this loops and repeats and makes sure the cowonly root didn't
    1215             :  * change while the root pointer was being updated in the metadata.
    1216             :  */
    1217      449410 : static int update_cowonly_root(struct btrfs_trans_handle *trans,
    1218             :                                struct btrfs_root *root)
    1219             : {
    1220      449410 :         int ret;
    1221      449410 :         u64 old_root_bytenr;
    1222      449410 :         u64 old_root_used;
    1223      449410 :         struct btrfs_fs_info *fs_info = root->fs_info;
    1224      449410 :         struct btrfs_root *tree_root = fs_info->tree_root;
    1225             : 
    1226      449410 :         old_root_used = btrfs_root_used(&root->root_item);
    1227             : 
    1228     1348230 :         while (1) {
    1229      898820 :                 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
    1230      898820 :                 if (old_root_bytenr == root->node->start &&
    1231             :                     old_root_used == btrfs_root_used(&root->root_item))
    1232             :                         break;
    1233             : 
    1234      449410 :                 btrfs_set_root_node(&root->root_item, root->node);
    1235      449410 :                 ret = btrfs_update_root(trans, tree_root,
    1236             :                                         &root->root_key,
    1237             :                                         &root->root_item);
    1238      449410 :                 if (ret)
    1239           0 :                         return ret;
    1240             : 
    1241      449410 :                 old_root_used = btrfs_root_used(&root->root_item);
    1242             :         }
    1243             : 
    1244             :         return 0;
    1245             : }
    1246             : 
    1247             : /*
    1248             :  * update all the cowonly tree roots on disk
    1249             :  *
    1250             :  * The error handling in this function may not be obvious. Any of the
    1251             :  * failures will cause the file system to go offline. We still need
    1252             :  * to clean up the delayed refs.
    1253             :  */
    1254      203188 : static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans)
    1255             : {
    1256      203188 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1257      203188 :         struct list_head *dirty_bgs = &trans->transaction->dirty_bgs;
    1258      203188 :         struct list_head *io_bgs = &trans->transaction->io_bgs;
    1259      203188 :         struct list_head *next;
    1260      203188 :         struct extent_buffer *eb;
    1261      203188 :         int ret;
    1262             : 
    1263             :         /*
    1264             :          * At this point no one can be using this transaction to modify any tree
    1265             :          * and no one can start another transaction to modify any tree either.
    1266             :          */
    1267      203188 :         ASSERT(trans->transaction->state == TRANS_STATE_COMMIT_DOING);
    1268             : 
    1269      203188 :         eb = btrfs_lock_root_node(fs_info->tree_root);
    1270      203188 :         ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL,
    1271             :                               0, &eb, BTRFS_NESTING_COW);
    1272      203188 :         btrfs_tree_unlock(eb);
    1273      203188 :         free_extent_buffer(eb);
    1274             : 
    1275      203188 :         if (ret)
    1276             :                 return ret;
    1277             : 
    1278      203188 :         ret = btrfs_run_dev_stats(trans);
    1279      203188 :         if (ret)
    1280             :                 return ret;
    1281      203188 :         ret = btrfs_run_dev_replace(trans);
    1282      203188 :         if (ret)
    1283             :                 return ret;
    1284      203188 :         ret = btrfs_run_qgroups(trans);
    1285      203188 :         if (ret)
    1286             :                 return ret;
    1287             : 
    1288      203188 :         ret = btrfs_setup_space_cache(trans);
    1289      203188 :         if (ret)
    1290             :                 return ret;
    1291             : 
    1292      203188 : again:
    1293      712636 :         while (!list_empty(&fs_info->dirty_cowonly_roots)) {
    1294      449410 :                 struct btrfs_root *root;
    1295      449410 :                 next = fs_info->dirty_cowonly_roots.next;
    1296      449410 :                 list_del_init(next);
    1297      449410 :                 root = list_entry(next, struct btrfs_root, dirty_list);
    1298      449410 :                 clear_bit(BTRFS_ROOT_DIRTY, &root->state);
    1299             : 
    1300      449410 :                 list_add_tail(&root->dirty_list,
    1301      449410 :                               &trans->transaction->switch_commits);
    1302      449410 :                 ret = update_cowonly_root(trans, root);
    1303      449410 :                 if (ret)
    1304           0 :                         return ret;
    1305             :         }
    1306             : 
    1307             :         /* Now flush any delayed refs generated by updating all of the roots */
    1308      263226 :         ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
    1309      263226 :         if (ret)
    1310           0 :                 return ret;
    1311             : 
    1312      466600 :         while (!list_empty(dirty_bgs) || !list_empty(io_bgs)) {
    1313      203374 :                 ret = btrfs_write_dirty_block_groups(trans);
    1314      203374 :                 if (ret)
    1315           0 :                         return ret;
    1316             : 
    1317             :                 /*
    1318             :                  * We're writing the dirty block groups, which could generate
    1319             :                  * delayed refs, which could generate more dirty block groups,
    1320             :                  * so we want to keep this flushing in this loop to make sure
    1321             :                  * everything gets run.
    1322             :                  */
    1323      203374 :                 ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
    1324      203374 :                 if (ret)
    1325           0 :                         return ret;
    1326             :         }
    1327             : 
    1328      263226 :         if (!list_empty(&fs_info->dirty_cowonly_roots))
    1329       60038 :                 goto again;
    1330             : 
    1331             :         /* Update dev-replace pointer once everything is committed */
    1332      203188 :         fs_info->dev_replace.committed_cursor_left =
    1333      203188 :                 fs_info->dev_replace.cursor_left_last_write_of_item;
    1334             : 
    1335      203188 :         return 0;
    1336             : }
    1337             : 
    1338             : /*
    1339             :  * If we had a pending drop we need to see if there are any others left in our
    1340             :  * dead roots list, and if not clear our bit and wake any waiters.
    1341             :  */
    1342           0 : void btrfs_maybe_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
    1343             : {
    1344             :         /*
    1345             :          * We put the drop in progress roots at the front of the list, so if the
    1346             :          * first entry doesn't have UNFINISHED_DROP set we can wake everybody
    1347             :          * up.
    1348             :          */
    1349           0 :         spin_lock(&fs_info->trans_lock);
    1350           0 :         if (!list_empty(&fs_info->dead_roots)) {
    1351           0 :                 struct btrfs_root *root = list_first_entry(&fs_info->dead_roots,
    1352             :                                                            struct btrfs_root,
    1353             :                                                            root_list);
    1354           0 :                 if (test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state)) {
    1355           0 :                         spin_unlock(&fs_info->trans_lock);
    1356           0 :                         return;
    1357             :                 }
    1358             :         }
    1359           0 :         spin_unlock(&fs_info->trans_lock);
    1360             : 
    1361           0 :         btrfs_wake_unfinished_drop(fs_info);
    1362             : }
    1363             : 
    1364             : /*
    1365             :  * dead roots are old snapshots that need to be deleted.  This allocates
    1366             :  * a dirty root struct and adds it into the list of dead roots that need to
    1367             :  * be deleted
    1368             :  */
    1369         300 : void btrfs_add_dead_root(struct btrfs_root *root)
    1370             : {
    1371         300 :         struct btrfs_fs_info *fs_info = root->fs_info;
    1372             : 
    1373         300 :         spin_lock(&fs_info->trans_lock);
    1374         300 :         if (list_empty(&root->root_list)) {
    1375         300 :                 btrfs_grab_root(root);
    1376             : 
    1377             :                 /* We want to process the partially complete drops first. */
    1378         600 :                 if (test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state))
    1379           0 :                         list_add(&root->root_list, &fs_info->dead_roots);
    1380             :                 else
    1381         300 :                         list_add_tail(&root->root_list, &fs_info->dead_roots);
    1382             :         }
    1383         300 :         spin_unlock(&fs_info->trans_lock);
    1384         300 : }
    1385             : 
    1386             : /*
    1387             :  * Update each subvolume root and its relocation root, if it exists, in the tree
    1388             :  * of tree roots. Also free log roots if they exist.
    1389             :  */
    1390      203188 : static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
    1391             : {
    1392      203188 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1393      203188 :         struct btrfs_root *gang[8];
    1394      203188 :         int i;
    1395      203188 :         int ret;
    1396             : 
    1397             :         /*
    1398             :          * At this point no one can be using this transaction to modify any tree
    1399             :          * and no one can start another transaction to modify any tree either.
    1400             :          */
    1401      203188 :         ASSERT(trans->transaction->state == TRANS_STATE_COMMIT_DOING);
    1402             : 
    1403      203188 :         spin_lock(&fs_info->fs_roots_radix_lock);
    1404      349168 :         while (1) {
    1405      349168 :                 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
    1406             :                                                  (void **)gang, 0,
    1407             :                                                  ARRAY_SIZE(gang),
    1408             :                                                  BTRFS_ROOT_TRANS_TAG);
    1409      349168 :                 if (ret == 0)
    1410             :                         break;
    1411      298277 :                 for (i = 0; i < ret; i++) {
    1412      152297 :                         struct btrfs_root *root = gang[i];
    1413      152297 :                         int ret2;
    1414             : 
    1415             :                         /*
    1416             :                          * At this point we can neither have tasks logging inodes
    1417             :                          * from a root nor trying to commit a log tree.
    1418             :                          */
    1419      152297 :                         ASSERT(atomic_read(&root->log_writers) == 0);
    1420      152297 :                         ASSERT(atomic_read(&root->log_commit[0]) == 0);
    1421      152297 :                         ASSERT(atomic_read(&root->log_commit[1]) == 0);
    1422             : 
    1423      152297 :                         radix_tree_tag_clear(&fs_info->fs_roots_radix,
    1424      152297 :                                         (unsigned long)root->root_key.objectid,
    1425             :                                         BTRFS_ROOT_TRANS_TAG);
    1426      152297 :                         spin_unlock(&fs_info->fs_roots_radix_lock);
    1427             : 
    1428      152297 :                         btrfs_free_log(trans, root);
    1429      152297 :                         ret2 = btrfs_update_reloc_root(trans, root);
    1430      152297 :                         if (ret2)
    1431           0 :                                 return ret2;
    1432             : 
    1433             :                         /* see comments in should_cow_block() */
    1434      152297 :                         clear_bit(BTRFS_ROOT_FORCE_COW, &root->state);
    1435      152297 :                         smp_mb__after_atomic();
    1436             : 
    1437      152297 :                         if (root->commit_root != root->node) {
    1438      147055 :                                 list_add_tail(&root->dirty_list,
    1439      147055 :                                         &trans->transaction->switch_commits);
    1440      147055 :                                 btrfs_set_root_node(&root->root_item,
    1441             :                                                     root->node);
    1442             :                         }
    1443             : 
    1444      152297 :                         ret2 = btrfs_update_root(trans, fs_info->tree_root,
    1445             :                                                 &root->root_key,
    1446             :                                                 &root->root_item);
    1447      152297 :                         if (ret2)
    1448           0 :                                 return ret2;
    1449      152297 :                         spin_lock(&fs_info->fs_roots_radix_lock);
    1450      152297 :                         btrfs_qgroup_free_meta_all_pertrans(root);
    1451             :                 }
    1452             :         }
    1453      203188 :         spin_unlock(&fs_info->fs_roots_radix_lock);
    1454      203188 :         return 0;
    1455             : }
    1456             : 
    1457             : /*
    1458             :  * defrag a given btree.
    1459             :  * Every leaf in the btree is read and defragged.
    1460             :  */
    1461           2 : int btrfs_defrag_root(struct btrfs_root *root)
    1462             : {
    1463           2 :         struct btrfs_fs_info *info = root->fs_info;
    1464           2 :         struct btrfs_trans_handle *trans;
    1465           2 :         int ret;
    1466             : 
    1467           2 :         if (test_and_set_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state))
    1468             :                 return 0;
    1469             : 
    1470           2 :         while (1) {
    1471           2 :                 trans = btrfs_start_transaction(root, 0);
    1472           2 :                 if (IS_ERR(trans)) {
    1473           0 :                         ret = PTR_ERR(trans);
    1474           0 :                         break;
    1475             :                 }
    1476             : 
    1477           2 :                 ret = btrfs_defrag_leaves(trans, root);
    1478             : 
    1479           2 :                 btrfs_end_transaction(trans);
    1480           2 :                 btrfs_btree_balance_dirty(info);
    1481           2 :                 cond_resched();
    1482             : 
    1483           2 :                 if (btrfs_fs_closing(info) || ret != -EAGAIN)
    1484             :                         break;
    1485             : 
    1486           0 :                 if (btrfs_defrag_cancelled(info)) {
    1487             :                         btrfs_debug(info, "defrag_root cancelled");
    1488             :                         ret = -EAGAIN;
    1489             :                         break;
    1490             :                 }
    1491             :         }
    1492           2 :         clear_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state);
    1493           2 :         return ret;
    1494             : }
    1495             : 
    1496             : /*
    1497             :  * Do all special snapshot related qgroup dirty hack.
    1498             :  *
    1499             :  * Will do all needed qgroup inherit and dirty hack like switch commit
    1500             :  * roots inside one transaction and write all btree into disk, to make
    1501             :  * qgroup works.
    1502             :  */
    1503        1025 : static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
    1504             :                                    struct btrfs_root *src,
    1505             :                                    struct btrfs_root *parent,
    1506             :                                    struct btrfs_qgroup_inherit *inherit,
    1507             :                                    u64 dst_objectid)
    1508             : {
    1509        1025 :         struct btrfs_fs_info *fs_info = src->fs_info;
    1510        1025 :         int ret;
    1511             : 
    1512             :         /*
    1513             :          * Save some performance in the case that qgroups are not
    1514             :          * enabled. If this check races with the ioctl, rescan will
    1515             :          * kick in anyway.
    1516             :          */
    1517        1025 :         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
    1518             :                 return 0;
    1519             : 
    1520             :         /*
    1521             :          * Ensure dirty @src will be committed.  Or, after coming
    1522             :          * commit_fs_roots() and switch_commit_roots(), any dirty but not
    1523             :          * recorded root will never be updated again, causing an outdated root
    1524             :          * item.
    1525             :          */
    1526         173 :         ret = record_root_in_trans(trans, src, 1);
    1527         173 :         if (ret)
    1528             :                 return ret;
    1529             : 
    1530             :         /*
    1531             :          * btrfs_qgroup_inherit relies on a consistent view of the usage for the
    1532             :          * src root, so we must run the delayed refs here.
    1533             :          *
    1534             :          * However this isn't particularly fool proof, because there's no
    1535             :          * synchronization keeping us from changing the tree after this point
    1536             :          * before we do the qgroup_inherit, or even from making changes while
    1537             :          * we're doing the qgroup_inherit.  But that's a problem for the future,
    1538             :          * for now flush the delayed refs to narrow the race window where the
    1539             :          * qgroup counters could end up wrong.
    1540             :          */
    1541         173 :         ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
    1542         173 :         if (ret) {
    1543           0 :                 btrfs_abort_transaction(trans, ret);
    1544           0 :                 return ret;
    1545             :         }
    1546             : 
    1547         173 :         ret = commit_fs_roots(trans);
    1548         173 :         if (ret)
    1549           0 :                 goto out;
    1550         173 :         ret = btrfs_qgroup_account_extents(trans);
    1551         173 :         if (ret < 0)
    1552           0 :                 goto out;
    1553             : 
    1554             :         /* Now qgroup are all updated, we can inherit it to new qgroups */
    1555         173 :         ret = btrfs_qgroup_inherit(trans, src->root_key.objectid, dst_objectid,
    1556             :                                    inherit);
    1557         173 :         if (ret < 0)
    1558           0 :                 goto out;
    1559             : 
    1560             :         /*
    1561             :          * Now we do a simplified commit transaction, which will:
    1562             :          * 1) commit all subvolume and extent tree
    1563             :          *    To ensure all subvolume and extent tree have a valid
    1564             :          *    commit_root to accounting later insert_dir_item()
    1565             :          * 2) write all btree blocks onto disk
    1566             :          *    This is to make sure later btree modification will be cowed
    1567             :          *    Or commit_root can be populated and cause wrong qgroup numbers
    1568             :          * In this simplified commit, we don't really care about other trees
    1569             :          * like chunk and root tree, as they won't affect qgroup.
    1570             :          * And we don't write super to avoid half committed status.
    1571             :          */
    1572         173 :         ret = commit_cowonly_roots(trans);
    1573         173 :         if (ret)
    1574           0 :                 goto out;
    1575         173 :         switch_commit_roots(trans);
    1576         173 :         ret = btrfs_write_and_wait_transaction(trans);
    1577         173 :         if (ret)
    1578           0 :                 btrfs_handle_fs_error(fs_info, ret,
    1579             :                         "Error while writing out transaction for qgroup");
    1580             : 
    1581         173 : out:
    1582             :         /*
    1583             :          * Force parent root to be updated, as we recorded it before so its
    1584             :          * last_trans == cur_transid.
    1585             :          * Or it won't be committed again onto disk after later
    1586             :          * insert_dir_item()
    1587             :          */
    1588         173 :         if (!ret)
    1589         173 :                 ret = record_root_in_trans(trans, parent, 1);
    1590             :         return ret;
    1591             : }
    1592             : 
    1593             : /*
    1594             :  * new snapshots need to be created at a very specific time in the
    1595             :  * transaction commit.  This does the actual creation.
    1596             :  *
    1597             :  * Note:
    1598             :  * If the error which may affect the commitment of the current transaction
    1599             :  * happens, we should return the error number. If the error which just affect
    1600             :  * the creation of the pending snapshots, just return 0.
    1601             :  */
    1602        1025 : static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
    1603             :                                    struct btrfs_pending_snapshot *pending)
    1604             : {
    1605             : 
    1606        1025 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1607        1025 :         struct btrfs_key key;
    1608        1025 :         struct btrfs_root_item *new_root_item;
    1609        1025 :         struct btrfs_root *tree_root = fs_info->tree_root;
    1610        1025 :         struct btrfs_root *root = pending->root;
    1611        1025 :         struct btrfs_root *parent_root;
    1612        1025 :         struct btrfs_block_rsv *rsv;
    1613        1025 :         struct inode *parent_inode = pending->dir;
    1614        1025 :         struct btrfs_path *path;
    1615        1025 :         struct btrfs_dir_item *dir_item;
    1616        1025 :         struct extent_buffer *tmp;
    1617        1025 :         struct extent_buffer *old;
    1618        1025 :         struct timespec64 cur_time;
    1619        1025 :         int ret = 0;
    1620        1025 :         u64 to_reserve = 0;
    1621        1025 :         u64 index = 0;
    1622        1025 :         u64 objectid;
    1623        1025 :         u64 root_flags;
    1624        1025 :         unsigned int nofs_flags;
    1625        1025 :         struct fscrypt_name fname;
    1626             : 
    1627        1025 :         ASSERT(pending->path);
    1628        1025 :         path = pending->path;
    1629             : 
    1630        1025 :         ASSERT(pending->root_item);
    1631        1025 :         new_root_item = pending->root_item;
    1632             : 
    1633             :         /*
    1634             :          * We're inside a transaction and must make sure that any potential
    1635             :          * allocations with GFP_KERNEL in fscrypt won't recurse back to
    1636             :          * filesystem.
    1637             :          */
    1638        1025 :         nofs_flags = memalloc_nofs_save();
    1639        2050 :         pending->error = fscrypt_setup_filename(parent_inode,
    1640        1025 :                                                 &pending->dentry->d_name, 0,
    1641             :                                                 &fname);
    1642        1025 :         memalloc_nofs_restore(nofs_flags);
    1643        1025 :         if (pending->error)
    1644           0 :                 goto free_pending;
    1645             : 
    1646        1025 :         pending->error = btrfs_get_free_objectid(tree_root, &objectid);
    1647        1025 :         if (pending->error)
    1648           0 :                 goto free_fname;
    1649             : 
    1650             :         /*
    1651             :          * Make qgroup to skip current new snapshot's qgroupid, as it is
    1652             :          * accounted by later btrfs_qgroup_inherit().
    1653             :          */
    1654        1025 :         btrfs_set_skip_qgroup(trans, objectid);
    1655             : 
    1656        1025 :         btrfs_reloc_pre_snapshot(pending, &to_reserve);
    1657             : 
    1658        1025 :         if (to_reserve > 0) {
    1659           0 :                 pending->error = btrfs_block_rsv_add(fs_info,
    1660             :                                                      &pending->block_rsv,
    1661             :                                                      to_reserve,
    1662             :                                                      BTRFS_RESERVE_NO_FLUSH);
    1663           0 :                 if (pending->error)
    1664           0 :                         goto clear_skip_qgroup;
    1665             :         }
    1666             : 
    1667        1025 :         key.objectid = objectid;
    1668        1025 :         key.offset = (u64)-1;
    1669        1025 :         key.type = BTRFS_ROOT_ITEM_KEY;
    1670             : 
    1671        1025 :         rsv = trans->block_rsv;
    1672        1025 :         trans->block_rsv = &pending->block_rsv;
    1673        1025 :         trans->bytes_reserved = trans->block_rsv->reserved;
    1674        1025 :         trace_btrfs_space_reservation(fs_info, "transaction",
    1675             :                                       trans->transid,
    1676             :                                       trans->bytes_reserved, 1);
    1677        1025 :         parent_root = BTRFS_I(parent_inode)->root;
    1678        1025 :         ret = record_root_in_trans(trans, parent_root, 0);
    1679        1025 :         if (ret)
    1680           0 :                 goto fail;
    1681        1025 :         cur_time = current_time(parent_inode);
    1682             : 
    1683             :         /*
    1684             :          * insert the directory item
    1685             :          */
    1686        1025 :         ret = btrfs_set_inode_index(BTRFS_I(parent_inode), &index);
    1687        1025 :         if (ret) {
    1688           0 :                 btrfs_abort_transaction(trans, ret);
    1689           0 :                 goto fail;
    1690             :         }
    1691             : 
    1692             :         /* check if there is a file/dir which has the same name. */
    1693        1025 :         dir_item = btrfs_lookup_dir_item(NULL, parent_root, path,
    1694             :                                          btrfs_ino(BTRFS_I(parent_inode)),
    1695             :                                          &fname.disk_name, 0);
    1696        1025 :         if (dir_item != NULL && !IS_ERR(dir_item)) {
    1697           0 :                 pending->error = -EEXIST;
    1698           0 :                 goto dir_item_existed;
    1699        1025 :         } else if (IS_ERR(dir_item)) {
    1700           0 :                 ret = PTR_ERR(dir_item);
    1701           0 :                 btrfs_abort_transaction(trans, ret);
    1702           0 :                 goto fail;
    1703             :         }
    1704        1025 :         btrfs_release_path(path);
    1705             : 
    1706             :         /*
    1707             :          * pull in the delayed directory update
    1708             :          * and the delayed inode item
    1709             :          * otherwise we corrupt the FS during
    1710             :          * snapshot
    1711             :          */
    1712        1025 :         ret = btrfs_run_delayed_items(trans);
    1713        1025 :         if (ret) {      /* Transaction aborted */
    1714           0 :                 btrfs_abort_transaction(trans, ret);
    1715           0 :                 goto fail;
    1716             :         }
    1717             : 
    1718        1025 :         ret = record_root_in_trans(trans, root, 0);
    1719        1025 :         if (ret) {
    1720           0 :                 btrfs_abort_transaction(trans, ret);
    1721           0 :                 goto fail;
    1722             :         }
    1723        1025 :         btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
    1724        2050 :         memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
    1725        1025 :         btrfs_check_and_init_root_item(new_root_item);
    1726             : 
    1727        1025 :         root_flags = btrfs_root_flags(new_root_item);
    1728        1025 :         if (pending->readonly)
    1729         738 :                 root_flags |= BTRFS_ROOT_SUBVOL_RDONLY;
    1730             :         else
    1731         287 :                 root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY;
    1732        1025 :         btrfs_set_root_flags(new_root_item, root_flags);
    1733             : 
    1734        1025 :         btrfs_set_root_generation_v2(new_root_item,
    1735             :                         trans->transid);
    1736        1025 :         generate_random_guid(new_root_item->uuid);
    1737        2050 :         memcpy(new_root_item->parent_uuid, root->root_item.uuid,
    1738             :                         BTRFS_UUID_SIZE);
    1739        1025 :         if (!(root_flags & BTRFS_ROOT_SUBVOL_RDONLY)) {
    1740         287 :                 memset(new_root_item->received_uuid, 0,
    1741             :                        sizeof(new_root_item->received_uuid));
    1742         287 :                 memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
    1743         287 :                 memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
    1744         287 :                 btrfs_set_root_stransid(new_root_item, 0);
    1745         287 :                 btrfs_set_root_rtransid(new_root_item, 0);
    1746             :         }
    1747        1025 :         btrfs_set_stack_timespec_sec(&new_root_item->otime, cur_time.tv_sec);
    1748        1025 :         btrfs_set_stack_timespec_nsec(&new_root_item->otime, cur_time.tv_nsec);
    1749        1025 :         btrfs_set_root_otransid(new_root_item, trans->transid);
    1750             : 
    1751        1025 :         old = btrfs_lock_root_node(root);
    1752        1025 :         ret = btrfs_cow_block(trans, root, old, NULL, 0, &old,
    1753             :                               BTRFS_NESTING_COW);
    1754        1025 :         if (ret) {
    1755           0 :                 btrfs_tree_unlock(old);
    1756           0 :                 free_extent_buffer(old);
    1757           0 :                 btrfs_abort_transaction(trans, ret);
    1758           0 :                 goto fail;
    1759             :         }
    1760             : 
    1761        1025 :         ret = btrfs_copy_root(trans, root, old, &tmp, objectid);
    1762             :         /* clean up in any case */
    1763        1025 :         btrfs_tree_unlock(old);
    1764        1025 :         free_extent_buffer(old);
    1765        1025 :         if (ret) {
    1766           0 :                 btrfs_abort_transaction(trans, ret);
    1767           0 :                 goto fail;
    1768             :         }
    1769             :         /* see comments in should_cow_block() */
    1770        1025 :         set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
    1771        1025 :         smp_wmb();
    1772             : 
    1773        1025 :         btrfs_set_root_node(new_root_item, tmp);
    1774             :         /* record when the snapshot was created in key.offset */
    1775        1025 :         key.offset = trans->transid;
    1776        1025 :         ret = btrfs_insert_root(trans, tree_root, &key, new_root_item);
    1777        1025 :         btrfs_tree_unlock(tmp);
    1778        1025 :         free_extent_buffer(tmp);
    1779        1025 :         if (ret) {
    1780           0 :                 btrfs_abort_transaction(trans, ret);
    1781           0 :                 goto fail;
    1782             :         }
    1783             : 
    1784             :         /*
    1785             :          * insert root back/forward references
    1786             :          */
    1787        1025 :         ret = btrfs_add_root_ref(trans, objectid,
    1788             :                                  parent_root->root_key.objectid,
    1789             :                                  btrfs_ino(BTRFS_I(parent_inode)), index,
    1790             :                                  &fname.disk_name);
    1791        1025 :         if (ret) {
    1792           0 :                 btrfs_abort_transaction(trans, ret);
    1793           0 :                 goto fail;
    1794             :         }
    1795             : 
    1796        1025 :         key.offset = (u64)-1;
    1797        1025 :         pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev);
    1798        1025 :         if (IS_ERR(pending->snap)) {
    1799           0 :                 ret = PTR_ERR(pending->snap);
    1800           0 :                 pending->snap = NULL;
    1801           0 :                 btrfs_abort_transaction(trans, ret);
    1802           0 :                 goto fail;
    1803             :         }
    1804             : 
    1805        1025 :         ret = btrfs_reloc_post_snapshot(trans, pending);
    1806        1025 :         if (ret) {
    1807           0 :                 btrfs_abort_transaction(trans, ret);
    1808           0 :                 goto fail;
    1809             :         }
    1810             : 
    1811             :         /*
    1812             :          * Do special qgroup accounting for snapshot, as we do some qgroup
    1813             :          * snapshot hack to do fast snapshot.
    1814             :          * To co-operate with that hack, we do hack again.
    1815             :          * Or snapshot will be greatly slowed down by a subtree qgroup rescan
    1816             :          */
    1817        1025 :         ret = qgroup_account_snapshot(trans, root, parent_root,
    1818             :                                       pending->inherit, objectid);
    1819        1025 :         if (ret < 0)
    1820           0 :                 goto fail;
    1821             : 
    1822        1025 :         ret = btrfs_insert_dir_item(trans, &fname.disk_name,
    1823             :                                     BTRFS_I(parent_inode), &key, BTRFS_FT_DIR,
    1824             :                                     index);
    1825             :         /* We have check then name at the beginning, so it is impossible. */
    1826        1025 :         BUG_ON(ret == -EEXIST || ret == -EOVERFLOW);
    1827        1025 :         if (ret) {
    1828           0 :                 btrfs_abort_transaction(trans, ret);
    1829           0 :                 goto fail;
    1830             :         }
    1831             : 
    1832        1025 :         btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size +
    1833        1025 :                                                   fname.disk_name.len * 2);
    1834        1025 :         parent_inode->i_mtime = current_time(parent_inode);
    1835        1025 :         parent_inode->i_ctime = parent_inode->i_mtime;
    1836        1025 :         ret = btrfs_update_inode_fallback(trans, parent_root, BTRFS_I(parent_inode));
    1837        1025 :         if (ret) {
    1838           0 :                 btrfs_abort_transaction(trans, ret);
    1839           0 :                 goto fail;
    1840             :         }
    1841        1025 :         ret = btrfs_uuid_tree_add(trans, new_root_item->uuid,
    1842             :                                   BTRFS_UUID_KEY_SUBVOL,
    1843             :                                   objectid);
    1844        1025 :         if (ret) {
    1845           0 :                 btrfs_abort_transaction(trans, ret);
    1846           0 :                 goto fail;
    1847             :         }
    1848        1025 :         if (!btrfs_is_empty_uuid(new_root_item->received_uuid)) {
    1849           0 :                 ret = btrfs_uuid_tree_add(trans, new_root_item->received_uuid,
    1850             :                                           BTRFS_UUID_KEY_RECEIVED_SUBVOL,
    1851             :                                           objectid);
    1852           0 :                 if (ret && ret != -EEXIST) {
    1853           0 :                         btrfs_abort_transaction(trans, ret);
    1854           0 :                         goto fail;
    1855             :                 }
    1856             :         }
    1857             : 
    1858        1025 : fail:
    1859        1025 :         pending->error = ret;
    1860        1025 : dir_item_existed:
    1861        1025 :         trans->block_rsv = rsv;
    1862        1025 :         trans->bytes_reserved = 0;
    1863        1025 : clear_skip_qgroup:
    1864        1025 :         btrfs_clear_skip_qgroup(trans);
    1865             : free_fname:
    1866             :         fscrypt_free_filename(&fname);
    1867        1025 : free_pending:
    1868        1025 :         kfree(new_root_item);
    1869        1025 :         pending->root_item = NULL;
    1870        1025 :         btrfs_free_path(path);
    1871        1025 :         pending->path = NULL;
    1872             : 
    1873        1025 :         return ret;
    1874             : }
    1875             : 
    1876             : /*
    1877             :  * create all the snapshots we've scheduled for creation
    1878             :  */
    1879      203015 : static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans)
    1880             : {
    1881      203015 :         struct btrfs_pending_snapshot *pending, *next;
    1882      203015 :         struct list_head *head = &trans->transaction->pending_snapshots;
    1883      203015 :         int ret = 0;
    1884             : 
    1885      204040 :         list_for_each_entry_safe(pending, next, head, list) {
    1886        1025 :                 list_del(&pending->list);
    1887        1025 :                 ret = create_pending_snapshot(trans, pending);
    1888        1025 :                 if (ret)
    1889             :                         break;
    1890             :         }
    1891      203015 :         return ret;
    1892             : }
    1893             : 
    1894      203014 : static void update_super_roots(struct btrfs_fs_info *fs_info)
    1895             : {
    1896      203014 :         struct btrfs_root_item *root_item;
    1897      203014 :         struct btrfs_super_block *super;
    1898             : 
    1899      203014 :         super = fs_info->super_copy;
    1900             : 
    1901      203014 :         root_item = &fs_info->chunk_root->root_item;
    1902      203014 :         super->chunk_root = root_item->bytenr;
    1903      203014 :         super->chunk_root_generation = root_item->generation;
    1904      203014 :         super->chunk_root_level = root_item->level;
    1905             : 
    1906      203014 :         root_item = &fs_info->tree_root->root_item;
    1907      203014 :         super->root = root_item->bytenr;
    1908      203014 :         super->generation = root_item->generation;
    1909      203014 :         super->root_level = root_item->level;
    1910      203014 :         if (btrfs_test_opt(fs_info, SPACE_CACHE))
    1911          24 :                 super->cache_generation = root_item->generation;
    1912      405980 :         else if (test_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags))
    1913           2 :                 super->cache_generation = 0;
    1914      406028 :         if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags))
    1915      202977 :                 super->uuid_tree_generation = root_item->generation;
    1916      203014 : }
    1917             : 
    1918    40148816 : int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
    1919             : {
    1920    40148816 :         struct btrfs_transaction *trans;
    1921    40148816 :         int ret = 0;
    1922             : 
    1923    40148816 :         spin_lock(&info->trans_lock);
    1924    40185103 :         trans = info->running_transaction;
    1925    40185103 :         if (trans)
    1926    39896628 :                 ret = (trans->state >= TRANS_STATE_COMMIT_START);
    1927    40185103 :         spin_unlock(&info->trans_lock);
    1928    40178658 :         return ret;
    1929             : }
    1930             : 
    1931        4484 : int btrfs_transaction_blocked(struct btrfs_fs_info *info)
    1932             : {
    1933        4484 :         struct btrfs_transaction *trans;
    1934        4484 :         int ret = 0;
    1935             : 
    1936        4484 :         spin_lock(&info->trans_lock);
    1937        4484 :         trans = info->running_transaction;
    1938        4484 :         if (trans)
    1939         318 :                 ret = is_transaction_blocked(trans);
    1940        4484 :         spin_unlock(&info->trans_lock);
    1941        4484 :         return ret;
    1942             : }
    1943             : 
    1944           4 : void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans)
    1945             : {
    1946           4 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1947           4 :         struct btrfs_transaction *cur_trans;
    1948             : 
    1949             :         /* Kick the transaction kthread. */
    1950           4 :         set_bit(BTRFS_FS_COMMIT_TRANS, &fs_info->flags);
    1951           4 :         wake_up_process(fs_info->transaction_kthread);
    1952             : 
    1953             :         /* take transaction reference */
    1954           4 :         cur_trans = trans->transaction;
    1955           4 :         refcount_inc(&cur_trans->use_count);
    1956             : 
    1957           4 :         btrfs_end_transaction(trans);
    1958             : 
    1959             :         /*
    1960             :          * Wait for the current transaction commit to start and block
    1961             :          * subsequent transaction joins
    1962             :          */
    1963           4 :         btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
    1964           7 :         wait_event(fs_info->transaction_blocked_wait,
    1965             :                    cur_trans->state >= TRANS_STATE_COMMIT_START ||
    1966             :                    TRANS_ABORTED(cur_trans));
    1967           4 :         btrfs_put_transaction(cur_trans);
    1968           4 : }
    1969             : 
    1970          16 : static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
    1971             : {
    1972          16 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1973          16 :         struct btrfs_transaction *cur_trans = trans->transaction;
    1974             : 
    1975          16 :         WARN_ON(refcount_read(&trans->use_count) > 1);
    1976             : 
    1977          16 :         btrfs_abort_transaction(trans, err);
    1978             : 
    1979          16 :         spin_lock(&fs_info->trans_lock);
    1980             : 
    1981             :         /*
    1982             :          * If the transaction is removed from the list, it means this
    1983             :          * transaction has been committed successfully, so it is impossible
    1984             :          * to call the cleanup function.
    1985             :          */
    1986          16 :         BUG_ON(list_empty(&cur_trans->list));
    1987             : 
    1988          16 :         if (cur_trans == fs_info->running_transaction) {
    1989          12 :                 cur_trans->state = TRANS_STATE_COMMIT_DOING;
    1990          12 :                 spin_unlock(&fs_info->trans_lock);
    1991             : 
    1992             :                 /*
    1993             :                  * The thread has already released the lockdep map as reader
    1994             :                  * already in btrfs_commit_transaction().
    1995             :                  */
    1996          12 :                 btrfs_might_wait_for_event(fs_info, btrfs_trans_num_writers);
    1997          12 :                 wait_event(cur_trans->writer_wait,
    1998             :                            atomic_read(&cur_trans->num_writers) == 1);
    1999             : 
    2000          12 :                 spin_lock(&fs_info->trans_lock);
    2001             :         }
    2002             : 
    2003             :         /*
    2004             :          * Now that we know no one else is still using the transaction we can
    2005             :          * remove the transaction from the list of transactions. This avoids
    2006             :          * the transaction kthread from cleaning up the transaction while some
    2007             :          * other task is still using it, which could result in a use-after-free
    2008             :          * on things like log trees, as it forces the transaction kthread to
    2009             :          * wait for this transaction to be cleaned up by us.
    2010             :          */
    2011          16 :         list_del_init(&cur_trans->list);
    2012             : 
    2013          16 :         spin_unlock(&fs_info->trans_lock);
    2014             : 
    2015          16 :         btrfs_cleanup_one_transaction(trans->transaction, fs_info);
    2016             : 
    2017          16 :         spin_lock(&fs_info->trans_lock);
    2018          16 :         if (cur_trans == fs_info->running_transaction)
    2019          12 :                 fs_info->running_transaction = NULL;
    2020          16 :         spin_unlock(&fs_info->trans_lock);
    2021             : 
    2022          16 :         if (trans->type & __TRANS_FREEZABLE)
    2023           2 :                 sb_end_intwrite(fs_info->sb);
    2024          16 :         btrfs_put_transaction(cur_trans);
    2025          16 :         btrfs_put_transaction(cur_trans);
    2026             : 
    2027          16 :         trace_btrfs_transaction_commit(fs_info);
    2028             : 
    2029          16 :         if (current->journal_info == trans)
    2030           0 :                 current->journal_info = NULL;
    2031             : 
    2032             :         /*
    2033             :          * If relocation is running, we can't cancel scrub because that will
    2034             :          * result in a deadlock. Before relocating a block group, relocation
    2035             :          * pauses scrub, then starts and commits a transaction before unpausing
    2036             :          * scrub. If the transaction commit is being done by the relocation
    2037             :          * task or triggered by another task and the relocation task is waiting
    2038             :          * for the commit, and we end up here due to an error in the commit
    2039             :          * path, then calling btrfs_scrub_cancel() will deadlock, as we are
    2040             :          * asking for scrub to stop while having it asked to be paused higher
    2041             :          * above in relocation code.
    2042             :          */
    2043          16 :         if (!test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags))
    2044          16 :                 btrfs_scrub_cancel(fs_info);
    2045             : 
    2046          16 :         kmem_cache_free(btrfs_trans_handle_cachep, trans);
    2047          16 : }
    2048             : 
    2049             : /*
    2050             :  * Release reserved delayed ref space of all pending block groups of the
    2051             :  * transaction and remove them from the list
    2052             :  */
    2053          16 : static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
    2054             : {
    2055          16 :        struct btrfs_fs_info *fs_info = trans->fs_info;
    2056          16 :        struct btrfs_block_group *block_group, *tmp;
    2057             : 
    2058          16 :        list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
    2059           0 :                btrfs_delayed_refs_rsv_release(fs_info, 1);
    2060           0 :                list_del_init(&block_group->bg_list);
    2061             :        }
    2062          16 : }
    2063             : 
    2064      203026 : static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
    2065             : {
    2066             :         /*
    2067             :          * We use try_to_writeback_inodes_sb() here because if we used
    2068             :          * btrfs_start_delalloc_roots we would deadlock with fs freeze.
    2069             :          * Currently are holding the fs freeze lock, if we do an async flush
    2070             :          * we'll do btrfs_join_transaction() and deadlock because we need to
    2071             :          * wait for the fs freeze lock.  Using the direct flushing we benefit
    2072             :          * from already being in a transaction and our join_transaction doesn't
    2073             :          * have to re-take the fs freeze lock.
    2074             :          *
    2075             :          * Note that try_to_writeback_inodes_sb() will only trigger writeback
    2076             :          * if it can read lock sb->s_umount. It will always be able to lock it,
    2077             :          * except when the filesystem is being unmounted or being frozen, but in
    2078             :          * those cases sync_filesystem() is called, which results in calling
    2079             :          * writeback_inodes_sb() while holding a write lock on sb->s_umount.
    2080             :          * Note that we don't call writeback_inodes_sb() directly, because it
    2081             :          * will emit a warning if sb->s_umount is not locked.
    2082             :          */
    2083      203026 :         if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
    2084           3 :                 try_to_writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
    2085      203026 :         return 0;
    2086             : }
    2087             : 
    2088      203015 : static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
    2089             : {
    2090      203015 :         if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
    2091           3 :                 btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
    2092      203015 : }
    2093             : 
    2094             : /*
    2095             :  * Add a pending snapshot associated with the given transaction handle to the
    2096             :  * respective handle. This must be called after the transaction commit started
    2097             :  * and while holding fs_info->trans_lock.
    2098             :  * This serves to guarantee a caller of btrfs_commit_transaction() that it can
    2099             :  * safely free the pending snapshot pointer in case btrfs_commit_transaction()
    2100             :  * returns an error.
    2101             :  */
    2102      205648 : static void add_pending_snapshot(struct btrfs_trans_handle *trans)
    2103             : {
    2104      205648 :         struct btrfs_transaction *cur_trans = trans->transaction;
    2105             : 
    2106      205648 :         if (!trans->pending_snapshot)
    2107             :                 return;
    2108             : 
    2109        1025 :         lockdep_assert_held(&trans->fs_info->trans_lock);
    2110        1025 :         ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_START);
    2111             : 
    2112        1025 :         list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots);
    2113             : }
    2114             : 
    2115             : static void update_commit_stats(struct btrfs_fs_info *fs_info, ktime_t interval)
    2116             : {
    2117      203010 :         fs_info->commit_stats.commit_count++;
    2118      203010 :         fs_info->commit_stats.last_commit_dur = interval;
    2119      203010 :         fs_info->commit_stats.max_commit_dur =
    2120      203010 :                         max_t(u64, fs_info->commit_stats.max_commit_dur, interval);
    2121      203010 :         fs_info->commit_stats.total_commit_dur += interval;
    2122             : }
    2123             : 
    2124      205656 : int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
    2125             : {
    2126      205656 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    2127      205656 :         struct btrfs_transaction *cur_trans = trans->transaction;
    2128      205656 :         struct btrfs_transaction *prev_trans = NULL;
    2129      205656 :         int ret;
    2130      205656 :         ktime_t start_time;
    2131      205656 :         ktime_t interval;
    2132             : 
    2133      205656 :         ASSERT(refcount_read(&trans->use_count) == 1);
    2134      205656 :         btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
    2135             : 
    2136      205656 :         clear_bit(BTRFS_FS_NEED_TRANS_COMMIT, &fs_info->flags);
    2137             : 
    2138             :         /* Stop the commit early if ->aborted is set */
    2139      205655 :         if (TRANS_ABORTED(cur_trans)) {
    2140           0 :                 ret = cur_trans->aborted;
    2141           0 :                 goto lockdep_trans_commit_start_release;
    2142             :         }
    2143             : 
    2144      205655 :         btrfs_trans_release_metadata(trans);
    2145      205656 :         trans->block_rsv = NULL;
    2146             : 
    2147             :         /*
    2148             :          * We only want one transaction commit doing the flushing so we do not
    2149             :          * waste a bunch of time on lock contention on the extent root node.
    2150             :          */
    2151      205658 :         if (!test_and_set_bit(BTRFS_DELAYED_REFS_FLUSHING,
    2152      205656 :                               &cur_trans->delayed_refs.flags)) {
    2153             :                 /*
    2154             :                  * Make a pass through all the delayed refs we have so far.
    2155             :                  * Any running threads may add more while we are here.
    2156             :                  */
    2157      203027 :                 ret = btrfs_run_delayed_refs(trans, 0);
    2158      203027 :                 if (ret)
    2159           1 :                         goto lockdep_trans_commit_start_release;
    2160             :         }
    2161             : 
    2162      205657 :         btrfs_create_pending_block_groups(trans);
    2163             : 
    2164      205658 :         if (!test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &cur_trans->flags)) {
    2165      203026 :                 int run_it = 0;
    2166             : 
    2167             :                 /* this mutex is also taken before trying to set
    2168             :                  * block groups readonly.  We need to make sure
    2169             :                  * that nobody has set a block group readonly
    2170             :                  * after a extents from that block group have been
    2171             :                  * allocated for cache files.  btrfs_set_block_group_ro
    2172             :                  * will wait for the transaction to commit if it
    2173             :                  * finds BTRFS_TRANS_DIRTY_BG_RUN set.
    2174             :                  *
    2175             :                  * The BTRFS_TRANS_DIRTY_BG_RUN flag is also used to make sure
    2176             :                  * only one process starts all the block group IO.  It wouldn't
    2177             :                  * hurt to have more than one go through, but there's no
    2178             :                  * real advantage to it either.
    2179             :                  */
    2180      203026 :                 mutex_lock(&fs_info->ro_block_group_mutex);
    2181      203026 :                 if (!test_and_set_bit(BTRFS_TRANS_DIRTY_BG_RUN,
    2182             :                                       &cur_trans->flags))
    2183      203026 :                         run_it = 1;
    2184      203026 :                 mutex_unlock(&fs_info->ro_block_group_mutex);
    2185             : 
    2186      203026 :                 if (run_it) {
    2187      203026 :                         ret = btrfs_start_dirty_block_groups(trans);
    2188      203026 :                         if (ret)
    2189           0 :                                 goto lockdep_trans_commit_start_release;
    2190             :                 }
    2191             :         }
    2192             : 
    2193      205658 :         spin_lock(&fs_info->trans_lock);
    2194      205659 :         if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
    2195        2633 :                 enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
    2196             : 
    2197        2633 :                 add_pending_snapshot(trans);
    2198             : 
    2199        2633 :                 spin_unlock(&fs_info->trans_lock);
    2200        2633 :                 refcount_inc(&cur_trans->use_count);
    2201             : 
    2202        2633 :                 if (trans->in_fsync)
    2203         115 :                         want_state = TRANS_STATE_SUPER_COMMITTED;
    2204             : 
    2205        2633 :                 btrfs_trans_state_lockdep_release(fs_info,
    2206             :                                                   BTRFS_LOCKDEP_TRANS_COMMIT_START);
    2207        2633 :                 ret = btrfs_end_transaction(trans);
    2208        2630 :                 wait_for_commit(cur_trans, want_state);
    2209             : 
    2210        2628 :                 if (TRANS_ABORTED(cur_trans))
    2211           1 :                         ret = cur_trans->aborted;
    2212             : 
    2213        2628 :                 btrfs_put_transaction(cur_trans);
    2214             : 
    2215        2628 :                 return ret;
    2216             :         }
    2217             : 
    2218      203026 :         cur_trans->state = TRANS_STATE_COMMIT_START;
    2219      203026 :         wake_up(&fs_info->transaction_blocked_wait);
    2220      203026 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
    2221             : 
    2222      203026 :         if (cur_trans->list.prev != &fs_info->trans_list) {
    2223        3212 :                 enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
    2224             : 
    2225        3212 :                 if (trans->in_fsync)
    2226          54 :                         want_state = TRANS_STATE_SUPER_COMMITTED;
    2227             : 
    2228        3212 :                 prev_trans = list_entry(cur_trans->list.prev,
    2229             :                                         struct btrfs_transaction, list);
    2230        3212 :                 if (prev_trans->state < want_state) {
    2231        3182 :                         refcount_inc(&prev_trans->use_count);
    2232        3182 :                         spin_unlock(&fs_info->trans_lock);
    2233             : 
    2234        3182 :                         wait_for_commit(prev_trans, want_state);
    2235             : 
    2236        3182 :                         ret = READ_ONCE(prev_trans->aborted);
    2237             : 
    2238        3182 :                         btrfs_put_transaction(prev_trans);
    2239        3182 :                         if (ret)
    2240           0 :                                 goto lockdep_release;
    2241             :                 } else {
    2242          30 :                         spin_unlock(&fs_info->trans_lock);
    2243             :                 }
    2244             :         } else {
    2245      199814 :                 spin_unlock(&fs_info->trans_lock);
    2246             :                 /*
    2247             :                  * The previous transaction was aborted and was already removed
    2248             :                  * from the list of transactions at fs_info->trans_list. So we
    2249             :                  * abort to prevent writing a new superblock that reflects a
    2250             :                  * corrupt state (pointing to trees with unwritten nodes/leafs).
    2251             :                  */
    2252      199814 :                 if (BTRFS_FS_ERROR(fs_info)) {
    2253           0 :                         ret = -EROFS;
    2254           0 :                         goto lockdep_release;
    2255             :                 }
    2256             :         }
    2257             : 
    2258             :         /*
    2259             :          * Get the time spent on the work done by the commit thread and not
    2260             :          * the time spent waiting on a previous commit
    2261             :          */
    2262      203026 :         start_time = ktime_get_ns();
    2263             : 
    2264      203026 :         extwriter_counter_dec(cur_trans, trans->type);
    2265             : 
    2266      203026 :         ret = btrfs_start_delalloc_flush(fs_info);
    2267      203026 :         if (ret)
    2268           0 :                 goto lockdep_release;
    2269             : 
    2270      203026 :         ret = btrfs_run_delayed_items(trans);
    2271      203026 :         if (ret)
    2272          11 :                 goto lockdep_release;
    2273             : 
    2274             :         /*
    2275             :          * The thread has started/joined the transaction thus it holds the
    2276             :          * lockdep map as a reader. It has to release it before acquiring the
    2277             :          * lockdep map as a writer.
    2278             :          */
    2279      203015 :         btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters);
    2280      203015 :         btrfs_might_wait_for_event(fs_info, btrfs_trans_num_extwriters);
    2281      212702 :         wait_event(cur_trans->writer_wait,
    2282             :                    extwriter_counter_read(cur_trans) == 0);
    2283             : 
    2284             :         /* some pending stuffs might be added after the previous flush. */
    2285      203015 :         ret = btrfs_run_delayed_items(trans);
    2286      203015 :         if (ret) {
    2287           0 :                 btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
    2288           0 :                 goto cleanup_transaction;
    2289             :         }
    2290             : 
    2291      203015 :         btrfs_wait_delalloc_flush(fs_info);
    2292             : 
    2293             :         /*
    2294             :          * Wait for all ordered extents started by a fast fsync that joined this
    2295             :          * transaction. Otherwise if this transaction commits before the ordered
    2296             :          * extents complete we lose logged data after a power failure.
    2297             :          */
    2298      203015 :         btrfs_might_wait_for_event(fs_info, btrfs_trans_pending_ordered);
    2299      203015 :         wait_event(cur_trans->pending_wait,
    2300             :                    atomic_read(&cur_trans->pending_ordered) == 0);
    2301             : 
    2302      203015 :         btrfs_scrub_pause(fs_info);
    2303             :         /*
    2304             :          * Ok now we need to make sure to block out any other joins while we
    2305             :          * commit the transaction.  We could have started a join before setting
    2306             :          * COMMIT_DOING so make sure to wait for num_writers to == 1 again.
    2307             :          */
    2308      203015 :         spin_lock(&fs_info->trans_lock);
    2309      203015 :         add_pending_snapshot(trans);
    2310      203015 :         cur_trans->state = TRANS_STATE_COMMIT_DOING;
    2311      203015 :         spin_unlock(&fs_info->trans_lock);
    2312             : 
    2313             :         /*
    2314             :          * The thread has started/joined the transaction thus it holds the
    2315             :          * lockdep map as a reader. It has to release it before acquiring the
    2316             :          * lockdep map as a writer.
    2317             :          */
    2318      203015 :         btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
    2319      203015 :         btrfs_might_wait_for_event(fs_info, btrfs_trans_num_writers);
    2320      203750 :         wait_event(cur_trans->writer_wait,
    2321             :                    atomic_read(&cur_trans->num_writers) == 1);
    2322             : 
    2323             :         /*
    2324             :          * Make lockdep happy by acquiring the state locks after
    2325             :          * btrfs_trans_num_writers is released. If we acquired the state locks
    2326             :          * before releasing the btrfs_trans_num_writers lock then lockdep would
    2327             :          * complain because we did not follow the reverse order unlocking rule.
    2328             :          */
    2329      203015 :         btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED);
    2330      203015 :         btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
    2331      203015 :         btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
    2332             : 
    2333             :         /*
    2334             :          * We've started the commit, clear the flag in case we were triggered to
    2335             :          * do an async commit but somebody else started before the transaction
    2336             :          * kthread could do the work.
    2337             :          */
    2338      203015 :         clear_bit(BTRFS_FS_COMMIT_TRANS, &fs_info->flags);
    2339             : 
    2340      203015 :         if (TRANS_ABORTED(cur_trans)) {
    2341           0 :                 ret = cur_trans->aborted;
    2342           0 :                 btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
    2343           0 :                 goto scrub_continue;
    2344             :         }
    2345             :         /*
    2346             :          * the reloc mutex makes sure that we stop
    2347             :          * the balancing code from coming in and moving
    2348             :          * extents around in the middle of the commit
    2349             :          */
    2350      203015 :         mutex_lock(&fs_info->reloc_mutex);
    2351             : 
    2352             :         /*
    2353             :          * We needn't worry about the delayed items because we will
    2354             :          * deal with them in create_pending_snapshot(), which is the
    2355             :          * core function of the snapshot creation.
    2356             :          */
    2357      203015 :         ret = create_pending_snapshots(trans);
    2358      203015 :         if (ret)
    2359           0 :                 goto unlock_reloc;
    2360             : 
    2361             :         /*
    2362             :          * We insert the dir indexes of the snapshots and update the inode
    2363             :          * of the snapshots' parents after the snapshot creation, so there
    2364             :          * are some delayed items which are not dealt with. Now deal with
    2365             :          * them.
    2366             :          *
    2367             :          * We needn't worry that this operation will corrupt the snapshots,
    2368             :          * because all the tree which are snapshoted will be forced to COW
    2369             :          * the nodes and leaves.
    2370             :          */
    2371      203015 :         ret = btrfs_run_delayed_items(trans);
    2372      203015 :         if (ret)
    2373           0 :                 goto unlock_reloc;
    2374             : 
    2375      203015 :         ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
    2376      203015 :         if (ret)
    2377           0 :                 goto unlock_reloc;
    2378             : 
    2379             :         /*
    2380             :          * make sure none of the code above managed to slip in a
    2381             :          * delayed item
    2382             :          */
    2383      203015 :         btrfs_assert_delayed_root_empty(fs_info);
    2384             : 
    2385      203015 :         WARN_ON(cur_trans != trans->transaction);
    2386             : 
    2387      203015 :         ret = commit_fs_roots(trans);
    2388      203015 :         if (ret)
    2389           0 :                 goto unlock_reloc;
    2390             : 
    2391             :         /* commit_fs_roots gets rid of all the tree log roots, it is now
    2392             :          * safe to free the root of tree log roots
    2393             :          */
    2394      203015 :         btrfs_free_log_root_tree(trans, fs_info);
    2395             : 
    2396             :         /*
    2397             :          * Since fs roots are all committed, we can get a quite accurate
    2398             :          * new_roots. So let's do quota accounting.
    2399             :          */
    2400      203015 :         ret = btrfs_qgroup_account_extents(trans);
    2401      203015 :         if (ret < 0)
    2402           0 :                 goto unlock_reloc;
    2403             : 
    2404      203015 :         ret = commit_cowonly_roots(trans);
    2405      203015 :         if (ret)
    2406           0 :                 goto unlock_reloc;
    2407             : 
    2408             :         /*
    2409             :          * The tasks which save the space cache and inode cache may also
    2410             :          * update ->aborted, check it.
    2411             :          */
    2412      203015 :         if (TRANS_ABORTED(cur_trans)) {
    2413           1 :                 ret = cur_trans->aborted;
    2414           1 :                 goto unlock_reloc;
    2415             :         }
    2416             : 
    2417      203014 :         cur_trans = fs_info->running_transaction;
    2418             : 
    2419      203014 :         btrfs_set_root_node(&fs_info->tree_root->root_item,
    2420      203014 :                             fs_info->tree_root->node);
    2421      203014 :         list_add_tail(&fs_info->tree_root->dirty_list,
    2422             :                       &cur_trans->switch_commits);
    2423             : 
    2424      203014 :         btrfs_set_root_node(&fs_info->chunk_root->root_item,
    2425      203014 :                             fs_info->chunk_root->node);
    2426      203014 :         list_add_tail(&fs_info->chunk_root->dirty_list,
    2427             :                       &cur_trans->switch_commits);
    2428             : 
    2429      203014 :         if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
    2430           0 :                 btrfs_set_root_node(&fs_info->block_group_root->root_item,
    2431           0 :                                     fs_info->block_group_root->node);
    2432           0 :                 list_add_tail(&fs_info->block_group_root->dirty_list,
    2433             :                               &cur_trans->switch_commits);
    2434             :         }
    2435             : 
    2436      203014 :         switch_commit_roots(trans);
    2437             : 
    2438      203014 :         ASSERT(list_empty(&cur_trans->dirty_bgs));
    2439      203014 :         ASSERT(list_empty(&cur_trans->io_bgs));
    2440      203014 :         update_super_roots(fs_info);
    2441             : 
    2442      203014 :         btrfs_set_super_log_root(fs_info->super_copy, 0);
    2443      203014 :         btrfs_set_super_log_root_level(fs_info->super_copy, 0);
    2444      406028 :         memcpy(fs_info->super_for_commit, fs_info->super_copy,
    2445             :                sizeof(*fs_info->super_copy));
    2446             : 
    2447      203014 :         btrfs_commit_device_sizes(cur_trans);
    2448             : 
    2449      203014 :         clear_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags);
    2450      203014 :         clear_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags);
    2451             : 
    2452      203014 :         btrfs_trans_release_chunk_metadata(trans);
    2453             : 
    2454             :         /*
    2455             :          * Before changing the transaction state to TRANS_STATE_UNBLOCKED and
    2456             :          * setting fs_info->running_transaction to NULL, lock tree_log_mutex to
    2457             :          * make sure that before we commit our superblock, no other task can
    2458             :          * start a new transaction and commit a log tree before we commit our
    2459             :          * superblock. Anyone trying to commit a log tree locks this mutex before
    2460             :          * writing its superblock.
    2461             :          */
    2462      203014 :         mutex_lock(&fs_info->tree_log_mutex);
    2463             : 
    2464      203014 :         spin_lock(&fs_info->trans_lock);
    2465      203014 :         cur_trans->state = TRANS_STATE_UNBLOCKED;
    2466      203014 :         fs_info->running_transaction = NULL;
    2467      203014 :         spin_unlock(&fs_info->trans_lock);
    2468      203014 :         mutex_unlock(&fs_info->reloc_mutex);
    2469             : 
    2470      203014 :         wake_up(&fs_info->transaction_wait);
    2471      203014 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
    2472             : 
    2473             :         /* If we have features changed, wake up the cleaner to update sysfs. */
    2474      406028 :         if (test_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags) &&
    2475          37 :             fs_info->cleaner_kthread)
    2476          37 :                 wake_up_process(fs_info->cleaner_kthread);
    2477             : 
    2478      203014 :         ret = btrfs_write_and_wait_transaction(trans);
    2479      203014 :         if (ret) {
    2480           4 :                 btrfs_handle_fs_error(fs_info, ret,
    2481             :                                       "Error while writing out transaction");
    2482           4 :                 mutex_unlock(&fs_info->tree_log_mutex);
    2483           4 :                 goto scrub_continue;
    2484             :         }
    2485             : 
    2486      203010 :         ret = write_all_supers(fs_info, 0);
    2487             :         /*
    2488             :          * the super is written, we can safely allow the tree-loggers
    2489             :          * to go about their business
    2490             :          */
    2491      203010 :         mutex_unlock(&fs_info->tree_log_mutex);
    2492      203010 :         if (ret)
    2493           0 :                 goto scrub_continue;
    2494             : 
    2495             :         /*
    2496             :          * We needn't acquire the lock here because there is no other task
    2497             :          * which can change it.
    2498             :          */
    2499      203010 :         cur_trans->state = TRANS_STATE_SUPER_COMMITTED;
    2500      203010 :         wake_up(&cur_trans->commit_wait);
    2501      203010 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
    2502             : 
    2503      203010 :         btrfs_finish_extent_commit(trans);
    2504             : 
    2505      406020 :         if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &cur_trans->flags))
    2506         531 :                 btrfs_clear_space_info_full(fs_info);
    2507             : 
    2508      203010 :         fs_info->last_trans_committed = cur_trans->transid;
    2509             :         /*
    2510             :          * We needn't acquire the lock here because there is no other task
    2511             :          * which can change it.
    2512             :          */
    2513      203010 :         cur_trans->state = TRANS_STATE_COMPLETED;
    2514      203010 :         wake_up(&cur_trans->commit_wait);
    2515      203010 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED);
    2516             : 
    2517      203010 :         spin_lock(&fs_info->trans_lock);
    2518      203010 :         list_del_init(&cur_trans->list);
    2519      203010 :         spin_unlock(&fs_info->trans_lock);
    2520             : 
    2521      203010 :         btrfs_put_transaction(cur_trans);
    2522      203010 :         btrfs_put_transaction(cur_trans);
    2523             : 
    2524      203010 :         if (trans->type & __TRANS_FREEZABLE)
    2525      188662 :                 sb_end_intwrite(fs_info->sb);
    2526             : 
    2527      203010 :         trace_btrfs_transaction_commit(fs_info);
    2528             : 
    2529      203010 :         interval = ktime_get_ns() - start_time;
    2530             : 
    2531      203010 :         btrfs_scrub_continue(fs_info);
    2532             : 
    2533      203010 :         if (current->journal_info == trans)
    2534      203010 :                 current->journal_info = NULL;
    2535             : 
    2536      203010 :         kmem_cache_free(btrfs_trans_handle_cachep, trans);
    2537             : 
    2538      203010 :         update_commit_stats(fs_info, interval);
    2539             : 
    2540      203010 :         return ret;
    2541             : 
    2542           1 : unlock_reloc:
    2543           1 :         mutex_unlock(&fs_info->reloc_mutex);
    2544           5 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
    2545           5 : scrub_continue:
    2546           5 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
    2547           5 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED);
    2548           5 :         btrfs_scrub_continue(fs_info);
    2549          16 : cleanup_transaction:
    2550          16 :         btrfs_trans_release_metadata(trans);
    2551          16 :         btrfs_cleanup_pending_block_groups(trans);
    2552          16 :         btrfs_trans_release_chunk_metadata(trans);
    2553          16 :         trans->block_rsv = NULL;
    2554          16 :         btrfs_warn(fs_info, "Skipping commit of aborted transaction.");
    2555          16 :         if (current->journal_info == trans)
    2556          16 :                 current->journal_info = NULL;
    2557          16 :         cleanup_transaction(trans, ret);
    2558             : 
    2559          16 :         return ret;
    2560             : 
    2561          11 : lockdep_release:
    2562          11 :         btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters);
    2563          11 :         btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
    2564          11 :         goto cleanup_transaction;
    2565             : 
    2566           1 : lockdep_trans_commit_start_release:
    2567           1 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
    2568           1 :         btrfs_end_transaction(trans);
    2569           1 :         return ret;
    2570             : }
    2571             : 
    2572             : /*
    2573             :  * return < 0 if error
    2574             :  * 0 if there are no more dead_roots at the time of call
    2575             :  * 1 there are more to be processed, call me again
    2576             :  *
    2577             :  * The return value indicates there are certainly more snapshots to delete, but
    2578             :  * if there comes a new one during processing, it may return 0. We don't mind,
    2579             :  * because btrfs_commit_super will poke cleaner thread and it will process it a
    2580             :  * few seconds later.
    2581             :  */
    2582       42109 : int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info)
    2583             : {
    2584       42109 :         struct btrfs_root *root;
    2585       42109 :         int ret;
    2586             : 
    2587       42109 :         spin_lock(&fs_info->trans_lock);
    2588       42109 :         if (list_empty(&fs_info->dead_roots)) {
    2589       41991 :                 spin_unlock(&fs_info->trans_lock);
    2590       41991 :                 return 0;
    2591             :         }
    2592         118 :         root = list_first_entry(&fs_info->dead_roots,
    2593             :                         struct btrfs_root, root_list);
    2594         118 :         list_del_init(&root->root_list);
    2595         118 :         spin_unlock(&fs_info->trans_lock);
    2596             : 
    2597         118 :         btrfs_debug(fs_info, "cleaner removing %llu", root->root_key.objectid);
    2598             : 
    2599         118 :         btrfs_kill_all_delayed_nodes(root);
    2600             : 
    2601         118 :         if (btrfs_header_backref_rev(root->node) <
    2602             :                         BTRFS_MIXED_BACKREF_REV)
    2603           0 :                 ret = btrfs_drop_snapshot(root, 0, 0);
    2604             :         else
    2605         118 :                 ret = btrfs_drop_snapshot(root, 1, 0);
    2606             : 
    2607         118 :         btrfs_put_root(root);
    2608         118 :         return (ret < 0) ? 0 : 1;
    2609             : }
    2610             : 
    2611             : /*
    2612             :  * We only mark the transaction aborted and then set the file system read-only.
    2613             :  * This will prevent new transactions from starting or trying to join this
    2614             :  * one.
    2615             :  *
    2616             :  * This means that error recovery at the call site is limited to freeing
    2617             :  * any local memory allocations and passing the error code up without
    2618             :  * further cleanup. The transaction should complete as it normally would
    2619             :  * in the call path but will return -EIO.
    2620             :  *
    2621             :  * We'll complete the cleanup in btrfs_end_transaction and
    2622             :  * btrfs_commit_transaction.
    2623             :  */
    2624          35 : void __cold __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
    2625             :                                       const char *function,
    2626             :                                       unsigned int line, int errno, bool first_hit)
    2627             : {
    2628          35 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    2629             : 
    2630          35 :         WRITE_ONCE(trans->aborted, errno);
    2631          35 :         WRITE_ONCE(trans->transaction->aborted, errno);
    2632          35 :         if (first_hit && errno == -ENOSPC)
    2633           0 :                 btrfs_dump_space_info_for_trans_abort(fs_info);
    2634             :         /* Wake up anybody who may be waiting on this transaction */
    2635          35 :         wake_up(&fs_info->transaction_wait);
    2636          35 :         wake_up(&fs_info->transaction_blocked_wait);
    2637          35 :         __btrfs_handle_fs_error(fs_info, function, line, errno, NULL);
    2638          35 : }
    2639             : 
    2640          11 : int __init btrfs_transaction_init(void)
    2641             : {
    2642          11 :         btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
    2643             :                         sizeof(struct btrfs_trans_handle), 0,
    2644             :                         SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL);
    2645          11 :         if (!btrfs_trans_handle_cachep)
    2646           0 :                 return -ENOMEM;
    2647             :         return 0;
    2648             : }
    2649             : 
    2650           0 : void __cold btrfs_transaction_exit(void)
    2651             : {
    2652           0 :         kmem_cache_destroy(btrfs_trans_handle_cachep);
    2653           0 : }

Generated by: LCOV version 1.14