LCOV - code coverage report
Current view: top level - fs/btrfs - transaction.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-acha @ Mon Jul 31 20:08:06 PDT 2023 Lines: 3 1205 0.2 %
Date: 2023-07-31 20:08:07 Functions: 1 56 1.8 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (C) 2007 Oracle.  All rights reserved.
       4             :  */
       5             : 
       6             : #include <linux/fs.h>
       7             : #include <linux/slab.h>
       8             : #include <linux/sched.h>
       9             : #include <linux/sched/mm.h>
      10             : #include <linux/writeback.h>
      11             : #include <linux/pagemap.h>
      12             : #include <linux/blkdev.h>
      13             : #include <linux/uuid.h>
      14             : #include <linux/timekeeping.h>
      15             : #include "misc.h"
      16             : #include "ctree.h"
      17             : #include "disk-io.h"
      18             : #include "transaction.h"
      19             : #include "locking.h"
      20             : #include "tree-log.h"
      21             : #include "volumes.h"
      22             : #include "dev-replace.h"
      23             : #include "qgroup.h"
      24             : #include "block-group.h"
      25             : #include "space-info.h"
      26             : #include "zoned.h"
      27             : #include "fs.h"
      28             : #include "accessors.h"
      29             : #include "extent-tree.h"
      30             : #include "root-tree.h"
      31             : #include "defrag.h"
      32             : #include "dir-item.h"
      33             : #include "uuid-tree.h"
      34             : #include "ioctl.h"
      35             : #include "relocation.h"
      36             : #include "scrub.h"
      37             : 
      38             : static struct kmem_cache *btrfs_trans_handle_cachep;
      39             : 
      40             : #define BTRFS_ROOT_TRANS_TAG 0
      41             : 
      42             : /*
      43             :  * Transaction states and transitions
      44             :  *
      45             :  * No running transaction (fs tree blocks are not modified)
      46             :  * |
      47             :  * | To next stage:
      48             :  * |  Call start_transaction() variants. Except btrfs_join_transaction_nostart().
      49             :  * V
      50             :  * Transaction N [[TRANS_STATE_RUNNING]]
      51             :  * |
      52             :  * | New trans handles can be attached to transaction N by calling all
      53             :  * | start_transaction() variants.
      54             :  * |
      55             :  * | To next stage:
      56             :  * |  Call btrfs_commit_transaction() on any trans handle attached to
      57             :  * |  transaction N
      58             :  * V
      59             :  * Transaction N [[TRANS_STATE_COMMIT_START]]
      60             :  * |
      61             :  * | Will wait for previous running transaction to completely finish if there
      62             :  * | is one
      63             :  * |
      64             :  * | Then one of the following happes:
      65             :  * | - Wait for all other trans handle holders to release.
      66             :  * |   The btrfs_commit_transaction() caller will do the commit work.
      67             :  * | - Wait for current transaction to be committed by others.
      68             :  * |   Other btrfs_commit_transaction() caller will do the commit work.
      69             :  * |
      70             :  * | At this stage, only btrfs_join_transaction*() variants can attach
      71             :  * | to this running transaction.
      72             :  * | All other variants will wait for current one to finish and attach to
      73             :  * | transaction N+1.
      74             :  * |
      75             :  * | To next stage:
      76             :  * |  Caller is chosen to commit transaction N, and all other trans handle
      77             :  * |  haven been released.
      78             :  * V
      79             :  * Transaction N [[TRANS_STATE_COMMIT_DOING]]
      80             :  * |
      81             :  * | The heavy lifting transaction work is started.
      82             :  * | From running delayed refs (modifying extent tree) to creating pending
      83             :  * | snapshots, running qgroups.
      84             :  * | In short, modify supporting trees to reflect modifications of subvolume
      85             :  * | trees.
      86             :  * |
      87             :  * | At this stage, all start_transaction() calls will wait for this
      88             :  * | transaction to finish and attach to transaction N+1.
      89             :  * |
      90             :  * | To next stage:
      91             :  * |  Until all supporting trees are updated.
      92             :  * V
      93             :  * Transaction N [[TRANS_STATE_UNBLOCKED]]
      94             :  * |                                                Transaction N+1
      95             :  * | All needed trees are modified, thus we only    [[TRANS_STATE_RUNNING]]
      96             :  * | need to write them back to disk and update     |
      97             :  * | super blocks.                                  |
      98             :  * |                                                |
      99             :  * | At this stage, new transaction is allowed to   |
     100             :  * | start.                                         |
     101             :  * | All new start_transaction() calls will be      |
     102             :  * | attached to transid N+1.                       |
     103             :  * |                                                |
     104             :  * | To next stage:                                 |
     105             :  * |  Until all tree blocks are super blocks are    |
     106             :  * |  written to block devices                      |
     107             :  * V                                                |
     108             :  * Transaction N [[TRANS_STATE_COMPLETED]]          V
     109             :  *   All tree blocks and super blocks are written.  Transaction N+1
     110             :  *   This transaction is finished and all its       [[TRANS_STATE_COMMIT_START]]
     111             :  *   data structures will be cleaned up.            | Life goes on
     112             :  */
     113             : static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
     114             :         [TRANS_STATE_RUNNING]           = 0U,
     115             :         [TRANS_STATE_COMMIT_START]      = (__TRANS_START | __TRANS_ATTACH),
     116             :         [TRANS_STATE_COMMIT_DOING]      = (__TRANS_START |
     117             :                                            __TRANS_ATTACH |
     118             :                                            __TRANS_JOIN |
     119             :                                            __TRANS_JOIN_NOSTART),
     120             :         [TRANS_STATE_UNBLOCKED]         = (__TRANS_START |
     121             :                                            __TRANS_ATTACH |
     122             :                                            __TRANS_JOIN |
     123             :                                            __TRANS_JOIN_NOLOCK |
     124             :                                            __TRANS_JOIN_NOSTART),
     125             :         [TRANS_STATE_SUPER_COMMITTED]   = (__TRANS_START |
     126             :                                            __TRANS_ATTACH |
     127             :                                            __TRANS_JOIN |
     128             :                                            __TRANS_JOIN_NOLOCK |
     129             :                                            __TRANS_JOIN_NOSTART),
     130             :         [TRANS_STATE_COMPLETED]         = (__TRANS_START |
     131             :                                            __TRANS_ATTACH |
     132             :                                            __TRANS_JOIN |
     133             :                                            __TRANS_JOIN_NOLOCK |
     134             :                                            __TRANS_JOIN_NOSTART),
     135             : };
     136             : 
     137           0 : void btrfs_put_transaction(struct btrfs_transaction *transaction)
     138             : {
     139           0 :         WARN_ON(refcount_read(&transaction->use_count) == 0);
     140           0 :         if (refcount_dec_and_test(&transaction->use_count)) {
     141           0 :                 BUG_ON(!list_empty(&transaction->list));
     142           0 :                 WARN_ON(!RB_EMPTY_ROOT(
     143             :                                 &transaction->delayed_refs.href_root.rb_root));
     144           0 :                 WARN_ON(!RB_EMPTY_ROOT(
     145             :                                 &transaction->delayed_refs.dirty_extent_root));
     146           0 :                 if (transaction->delayed_refs.pending_csums)
     147           0 :                         btrfs_err(transaction->fs_info,
     148             :                                   "pending csums is %llu",
     149             :                                   transaction->delayed_refs.pending_csums);
     150             :                 /*
     151             :                  * If any block groups are found in ->deleted_bgs then it's
     152             :                  * because the transaction was aborted and a commit did not
     153             :                  * happen (things failed before writing the new superblock
     154             :                  * and calling btrfs_finish_extent_commit()), so we can not
     155             :                  * discard the physical locations of the block groups.
     156             :                  */
     157           0 :                 while (!list_empty(&transaction->deleted_bgs)) {
     158           0 :                         struct btrfs_block_group *cache;
     159             : 
     160           0 :                         cache = list_first_entry(&transaction->deleted_bgs,
     161             :                                                  struct btrfs_block_group,
     162             :                                                  bg_list);
     163           0 :                         list_del_init(&cache->bg_list);
     164           0 :                         btrfs_unfreeze_block_group(cache);
     165           0 :                         btrfs_put_block_group(cache);
     166             :                 }
     167           0 :                 WARN_ON(!list_empty(&transaction->dev_update_list));
     168           0 :                 kfree(transaction);
     169             :         }
     170           0 : }
     171             : 
     172           0 : static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
     173             : {
     174           0 :         struct btrfs_transaction *cur_trans = trans->transaction;
     175           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
     176           0 :         struct btrfs_root *root, *tmp;
     177             : 
     178             :         /*
     179             :          * At this point no one can be using this transaction to modify any tree
     180             :          * and no one can start another transaction to modify any tree either.
     181             :          */
     182           0 :         ASSERT(cur_trans->state == TRANS_STATE_COMMIT_DOING);
     183             : 
     184           0 :         down_write(&fs_info->commit_root_sem);
     185             : 
     186           0 :         if (test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags))
     187           0 :                 fs_info->last_reloc_trans = trans->transid;
     188             : 
     189           0 :         list_for_each_entry_safe(root, tmp, &cur_trans->switch_commits,
     190             :                                  dirty_list) {
     191           0 :                 list_del_init(&root->dirty_list);
     192           0 :                 free_extent_buffer(root->commit_root);
     193           0 :                 root->commit_root = btrfs_root_node(root);
     194           0 :                 extent_io_tree_release(&root->dirty_log_pages);
     195           0 :                 btrfs_qgroup_clean_swapped_blocks(root);
     196             :         }
     197             : 
     198             :         /* We can free old roots now. */
     199           0 :         spin_lock(&cur_trans->dropped_roots_lock);
     200           0 :         while (!list_empty(&cur_trans->dropped_roots)) {
     201           0 :                 root = list_first_entry(&cur_trans->dropped_roots,
     202             :                                         struct btrfs_root, root_list);
     203           0 :                 list_del_init(&root->root_list);
     204           0 :                 spin_unlock(&cur_trans->dropped_roots_lock);
     205           0 :                 btrfs_free_log(trans, root);
     206           0 :                 btrfs_drop_and_free_fs_root(fs_info, root);
     207           0 :                 spin_lock(&cur_trans->dropped_roots_lock);
     208             :         }
     209           0 :         spin_unlock(&cur_trans->dropped_roots_lock);
     210             : 
     211           0 :         up_write(&fs_info->commit_root_sem);
     212           0 : }
     213             : 
     214           0 : static inline void extwriter_counter_inc(struct btrfs_transaction *trans,
     215             :                                          unsigned int type)
     216             : {
     217           0 :         if (type & TRANS_EXTWRITERS)
     218           0 :                 atomic_inc(&trans->num_extwriters);
     219           0 : }
     220             : 
     221           0 : static inline void extwriter_counter_dec(struct btrfs_transaction *trans,
     222             :                                          unsigned int type)
     223             : {
     224           0 :         if (type & TRANS_EXTWRITERS)
     225           0 :                 atomic_dec(&trans->num_extwriters);
     226           0 : }
     227             : 
     228             : static inline void extwriter_counter_init(struct btrfs_transaction *trans,
     229             :                                           unsigned int type)
     230             : {
     231           0 :         atomic_set(&trans->num_extwriters, ((type & TRANS_EXTWRITERS) ? 1 : 0));
     232             : }
     233             : 
     234             : static inline int extwriter_counter_read(struct btrfs_transaction *trans)
     235             : {
     236           0 :         return atomic_read(&trans->num_extwriters);
     237             : }
     238             : 
     239             : /*
     240             :  * To be called after doing the chunk btree updates right after allocating a new
     241             :  * chunk (after btrfs_chunk_alloc_add_chunk_item() is called), when removing a
     242             :  * chunk after all chunk btree updates and after finishing the second phase of
     243             :  * chunk allocation (btrfs_create_pending_block_groups()) in case some block
     244             :  * group had its chunk item insertion delayed to the second phase.
     245             :  */
     246           0 : void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
     247             : {
     248           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
     249             : 
     250           0 :         if (!trans->chunk_bytes_reserved)
     251             :                 return;
     252             : 
     253           0 :         btrfs_block_rsv_release(fs_info, &fs_info->chunk_block_rsv,
     254             :                                 trans->chunk_bytes_reserved, NULL);
     255           0 :         trans->chunk_bytes_reserved = 0;
     256             : }
     257             : 
     258             : /*
     259             :  * either allocate a new transaction or hop into the existing one
     260             :  */
     261           0 : static noinline int join_transaction(struct btrfs_fs_info *fs_info,
     262             :                                      unsigned int type)
     263             : {
     264           0 :         struct btrfs_transaction *cur_trans;
     265             : 
     266           0 :         spin_lock(&fs_info->trans_lock);
     267           0 : loop:
     268             :         /* The file system has been taken offline. No new transactions. */
     269           0 :         if (BTRFS_FS_ERROR(fs_info)) {
     270           0 :                 spin_unlock(&fs_info->trans_lock);
     271           0 :                 return -EROFS;
     272             :         }
     273             : 
     274           0 :         cur_trans = fs_info->running_transaction;
     275           0 :         if (cur_trans) {
     276           0 :                 if (TRANS_ABORTED(cur_trans)) {
     277           0 :                         spin_unlock(&fs_info->trans_lock);
     278           0 :                         return cur_trans->aborted;
     279             :                 }
     280           0 :                 if (btrfs_blocked_trans_types[cur_trans->state] & type) {
     281           0 :                         spin_unlock(&fs_info->trans_lock);
     282           0 :                         return -EBUSY;
     283             :                 }
     284           0 :                 refcount_inc(&cur_trans->use_count);
     285           0 :                 atomic_inc(&cur_trans->num_writers);
     286           0 :                 extwriter_counter_inc(cur_trans, type);
     287           0 :                 spin_unlock(&fs_info->trans_lock);
     288           0 :                 btrfs_lockdep_acquire(fs_info, btrfs_trans_num_writers);
     289           0 :                 btrfs_lockdep_acquire(fs_info, btrfs_trans_num_extwriters);
     290           0 :                 return 0;
     291             :         }
     292           0 :         spin_unlock(&fs_info->trans_lock);
     293             : 
     294             :         /*
     295             :          * If we are ATTACH, we just want to catch the current transaction,
     296             :          * and commit it. If there is no transaction, just return ENOENT.
     297             :          */
     298           0 :         if (type == TRANS_ATTACH)
     299             :                 return -ENOENT;
     300             : 
     301             :         /*
     302             :          * JOIN_NOLOCK only happens during the transaction commit, so
     303             :          * it is impossible that ->running_transaction is NULL
     304             :          */
     305           0 :         BUG_ON(type == TRANS_JOIN_NOLOCK);
     306             : 
     307           0 :         cur_trans = kmalloc(sizeof(*cur_trans), GFP_NOFS);
     308           0 :         if (!cur_trans)
     309             :                 return -ENOMEM;
     310             : 
     311           0 :         btrfs_lockdep_acquire(fs_info, btrfs_trans_num_writers);
     312           0 :         btrfs_lockdep_acquire(fs_info, btrfs_trans_num_extwriters);
     313             : 
     314           0 :         spin_lock(&fs_info->trans_lock);
     315           0 :         if (fs_info->running_transaction) {
     316             :                 /*
     317             :                  * someone started a transaction after we unlocked.  Make sure
     318             :                  * to redo the checks above
     319             :                  */
     320           0 :                 btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters);
     321           0 :                 btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
     322           0 :                 kfree(cur_trans);
     323           0 :                 goto loop;
     324           0 :         } else if (BTRFS_FS_ERROR(fs_info)) {
     325           0 :                 spin_unlock(&fs_info->trans_lock);
     326           0 :                 btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters);
     327           0 :                 btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
     328           0 :                 kfree(cur_trans);
     329           0 :                 return -EROFS;
     330             :         }
     331             : 
     332           0 :         cur_trans->fs_info = fs_info;
     333           0 :         atomic_set(&cur_trans->pending_ordered, 0);
     334           0 :         init_waitqueue_head(&cur_trans->pending_wait);
     335           0 :         atomic_set(&cur_trans->num_writers, 1);
     336           0 :         extwriter_counter_init(cur_trans, type);
     337           0 :         init_waitqueue_head(&cur_trans->writer_wait);
     338           0 :         init_waitqueue_head(&cur_trans->commit_wait);
     339           0 :         cur_trans->state = TRANS_STATE_RUNNING;
     340             :         /*
     341             :          * One for this trans handle, one so it will live on until we
     342             :          * commit the transaction.
     343             :          */
     344           0 :         refcount_set(&cur_trans->use_count, 2);
     345           0 :         cur_trans->flags = 0;
     346           0 :         cur_trans->start_time = ktime_get_seconds();
     347             : 
     348           0 :         memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs));
     349             : 
     350           0 :         cur_trans->delayed_refs.href_root = RB_ROOT_CACHED;
     351           0 :         cur_trans->delayed_refs.dirty_extent_root = RB_ROOT;
     352           0 :         atomic_set(&cur_trans->delayed_refs.num_entries, 0);
     353             : 
     354             :         /*
     355             :          * although the tree mod log is per file system and not per transaction,
     356             :          * the log must never go across transaction boundaries.
     357             :          */
     358           0 :         smp_mb();
     359           0 :         if (!list_empty(&fs_info->tree_mod_seq_list))
     360           0 :                 WARN(1, KERN_ERR "BTRFS: tree_mod_seq_list not empty when creating a fresh transaction\n");
     361           0 :         if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log))
     362           0 :                 WARN(1, KERN_ERR "BTRFS: tree_mod_log rb tree not empty when creating a fresh transaction\n");
     363           0 :         atomic64_set(&fs_info->tree_mod_seq, 0);
     364             : 
     365           0 :         spin_lock_init(&cur_trans->delayed_refs.lock);
     366             : 
     367           0 :         INIT_LIST_HEAD(&cur_trans->pending_snapshots);
     368           0 :         INIT_LIST_HEAD(&cur_trans->dev_update_list);
     369           0 :         INIT_LIST_HEAD(&cur_trans->switch_commits);
     370           0 :         INIT_LIST_HEAD(&cur_trans->dirty_bgs);
     371           0 :         INIT_LIST_HEAD(&cur_trans->io_bgs);
     372           0 :         INIT_LIST_HEAD(&cur_trans->dropped_roots);
     373           0 :         mutex_init(&cur_trans->cache_write_mutex);
     374           0 :         spin_lock_init(&cur_trans->dirty_bgs_lock);
     375           0 :         INIT_LIST_HEAD(&cur_trans->deleted_bgs);
     376           0 :         spin_lock_init(&cur_trans->dropped_roots_lock);
     377           0 :         list_add_tail(&cur_trans->list, &fs_info->trans_list);
     378           0 :         extent_io_tree_init(fs_info, &cur_trans->dirty_pages,
     379             :                         IO_TREE_TRANS_DIRTY_PAGES);
     380           0 :         extent_io_tree_init(fs_info, &cur_trans->pinned_extents,
     381             :                         IO_TREE_FS_PINNED_EXTENTS);
     382           0 :         fs_info->generation++;
     383           0 :         cur_trans->transid = fs_info->generation;
     384           0 :         fs_info->running_transaction = cur_trans;
     385           0 :         cur_trans->aborted = 0;
     386           0 :         spin_unlock(&fs_info->trans_lock);
     387             : 
     388           0 :         return 0;
     389             : }
     390             : 
     391             : /*
     392             :  * This does all the record keeping required to make sure that a shareable root
     393             :  * is properly recorded in a given transaction.  This is required to make sure
     394             :  * the old root from before we joined the transaction is deleted when the
     395             :  * transaction commits.
     396             :  */
     397           0 : static int record_root_in_trans(struct btrfs_trans_handle *trans,
     398             :                                struct btrfs_root *root,
     399             :                                int force)
     400             : {
     401           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
     402           0 :         int ret = 0;
     403             : 
     404           0 :         if ((test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
     405           0 :             root->last_trans < trans->transid) || force) {
     406           0 :                 WARN_ON(!force && root->commit_root != root->node);
     407             : 
     408             :                 /*
     409             :                  * see below for IN_TRANS_SETUP usage rules
     410             :                  * we have the reloc mutex held now, so there
     411             :                  * is only one writer in this function
     412             :                  */
     413           0 :                 set_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state);
     414             : 
     415             :                 /* make sure readers find IN_TRANS_SETUP before
     416             :                  * they find our root->last_trans update
     417             :                  */
     418           0 :                 smp_wmb();
     419             : 
     420           0 :                 spin_lock(&fs_info->fs_roots_radix_lock);
     421           0 :                 if (root->last_trans == trans->transid && !force) {
     422           0 :                         spin_unlock(&fs_info->fs_roots_radix_lock);
     423           0 :                         return 0;
     424             :                 }
     425           0 :                 radix_tree_tag_set(&fs_info->fs_roots_radix,
     426           0 :                                    (unsigned long)root->root_key.objectid,
     427             :                                    BTRFS_ROOT_TRANS_TAG);
     428           0 :                 spin_unlock(&fs_info->fs_roots_radix_lock);
     429           0 :                 root->last_trans = trans->transid;
     430             : 
     431             :                 /* this is pretty tricky.  We don't want to
     432             :                  * take the relocation lock in btrfs_record_root_in_trans
     433             :                  * unless we're really doing the first setup for this root in
     434             :                  * this transaction.
     435             :                  *
     436             :                  * Normally we'd use root->last_trans as a flag to decide
     437             :                  * if we want to take the expensive mutex.
     438             :                  *
     439             :                  * But, we have to set root->last_trans before we
     440             :                  * init the relocation root, otherwise, we trip over warnings
     441             :                  * in ctree.c.  The solution used here is to flag ourselves
     442             :                  * with root IN_TRANS_SETUP.  When this is 1, we're still
     443             :                  * fixing up the reloc trees and everyone must wait.
     444             :                  *
     445             :                  * When this is zero, they can trust root->last_trans and fly
     446             :                  * through btrfs_record_root_in_trans without having to take the
     447             :                  * lock.  smp_wmb() makes sure that all the writes above are
     448             :                  * done before we pop in the zero below
     449             :                  */
     450           0 :                 ret = btrfs_init_reloc_root(trans, root);
     451           0 :                 smp_mb__before_atomic();
     452           0 :                 clear_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state);
     453             :         }
     454             :         return ret;
     455             : }
     456             : 
     457             : 
     458           0 : void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
     459             :                             struct btrfs_root *root)
     460             : {
     461           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
     462           0 :         struct btrfs_transaction *cur_trans = trans->transaction;
     463             : 
     464             :         /* Add ourselves to the transaction dropped list */
     465           0 :         spin_lock(&cur_trans->dropped_roots_lock);
     466           0 :         list_add_tail(&root->root_list, &cur_trans->dropped_roots);
     467           0 :         spin_unlock(&cur_trans->dropped_roots_lock);
     468             : 
     469             :         /* Make sure we don't try to update the root at commit time */
     470           0 :         spin_lock(&fs_info->fs_roots_radix_lock);
     471           0 :         radix_tree_tag_clear(&fs_info->fs_roots_radix,
     472           0 :                              (unsigned long)root->root_key.objectid,
     473             :                              BTRFS_ROOT_TRANS_TAG);
     474           0 :         spin_unlock(&fs_info->fs_roots_radix_lock);
     475           0 : }
     476             : 
     477           0 : int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
     478             :                                struct btrfs_root *root)
     479             : {
     480           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
     481           0 :         int ret;
     482             : 
     483           0 :         if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
     484             :                 return 0;
     485             : 
     486             :         /*
     487             :          * see record_root_in_trans for comments about IN_TRANS_SETUP usage
     488             :          * and barriers
     489             :          */
     490           0 :         smp_rmb();
     491           0 :         if (root->last_trans == trans->transid &&
     492           0 :             !test_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state))
     493             :                 return 0;
     494             : 
     495           0 :         mutex_lock(&fs_info->reloc_mutex);
     496           0 :         ret = record_root_in_trans(trans, root, 0);
     497           0 :         mutex_unlock(&fs_info->reloc_mutex);
     498             : 
     499           0 :         return ret;
     500             : }
     501             : 
     502             : static inline int is_transaction_blocked(struct btrfs_transaction *trans)
     503             : {
     504           0 :         return (trans->state >= TRANS_STATE_COMMIT_START &&
     505           0 :                 trans->state < TRANS_STATE_UNBLOCKED &&
     506           0 :                 !TRANS_ABORTED(trans));
     507             : }
     508             : 
     509             : /* wait for commit against the current transaction to become unblocked
     510             :  * when this is done, it is safe to start a new transaction, but the current
     511             :  * transaction might not be fully on disk.
     512             :  */
     513           0 : static void wait_current_trans(struct btrfs_fs_info *fs_info)
     514             : {
     515           0 :         struct btrfs_transaction *cur_trans;
     516             : 
     517           0 :         spin_lock(&fs_info->trans_lock);
     518           0 :         cur_trans = fs_info->running_transaction;
     519           0 :         if (cur_trans && is_transaction_blocked(cur_trans)) {
     520           0 :                 refcount_inc(&cur_trans->use_count);
     521           0 :                 spin_unlock(&fs_info->trans_lock);
     522             : 
     523           0 :                 btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
     524           0 :                 wait_event(fs_info->transaction_wait,
     525             :                            cur_trans->state >= TRANS_STATE_UNBLOCKED ||
     526             :                            TRANS_ABORTED(cur_trans));
     527           0 :                 btrfs_put_transaction(cur_trans);
     528             :         } else {
     529           0 :                 spin_unlock(&fs_info->trans_lock);
     530             :         }
     531           0 : }
     532             : 
     533           0 : static int may_wait_transaction(struct btrfs_fs_info *fs_info, int type)
     534             : {
     535           0 :         if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
     536             :                 return 0;
     537             : 
     538           0 :         if (type == TRANS_START)
     539           0 :                 return 1;
     540             : 
     541             :         return 0;
     542             : }
     543             : 
     544           0 : static inline bool need_reserve_reloc_root(struct btrfs_root *root)
     545             : {
     546           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
     547             : 
     548           0 :         if (!fs_info->reloc_ctl ||
     549           0 :             !test_bit(BTRFS_ROOT_SHAREABLE, &root->state) ||
     550           0 :             root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
     551           0 :             root->reloc_root)
     552           0 :                 return false;
     553             : 
     554             :         return true;
     555             : }
     556             : 
     557             : static struct btrfs_trans_handle *
     558           0 : start_transaction(struct btrfs_root *root, unsigned int num_items,
     559             :                   unsigned int type, enum btrfs_reserve_flush_enum flush,
     560             :                   bool enforce_qgroups)
     561             : {
     562           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
     563           0 :         struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
     564           0 :         struct btrfs_trans_handle *h;
     565           0 :         struct btrfs_transaction *cur_trans;
     566           0 :         u64 num_bytes = 0;
     567           0 :         u64 qgroup_reserved = 0;
     568           0 :         bool reloc_reserved = false;
     569           0 :         bool do_chunk_alloc = false;
     570           0 :         int ret;
     571             : 
     572           0 :         if (BTRFS_FS_ERROR(fs_info))
     573             :                 return ERR_PTR(-EROFS);
     574             : 
     575           0 :         if (current->journal_info) {
     576           0 :                 WARN_ON(type & TRANS_EXTWRITERS);
     577           0 :                 h = current->journal_info;
     578           0 :                 refcount_inc(&h->use_count);
     579           0 :                 WARN_ON(refcount_read(&h->use_count) > 2);
     580           0 :                 h->orig_rsv = h->block_rsv;
     581           0 :                 h->block_rsv = NULL;
     582           0 :                 goto got_it;
     583             :         }
     584             : 
     585             :         /*
     586             :          * Do the reservation before we join the transaction so we can do all
     587             :          * the appropriate flushing if need be.
     588             :          */
     589           0 :         if (num_items && root != fs_info->chunk_root) {
     590           0 :                 struct btrfs_block_rsv *rsv = &fs_info->trans_block_rsv;
     591           0 :                 u64 delayed_refs_bytes = 0;
     592             : 
     593           0 :                 qgroup_reserved = num_items * fs_info->nodesize;
     594           0 :                 ret = btrfs_qgroup_reserve_meta_pertrans(root, qgroup_reserved,
     595             :                                 enforce_qgroups);
     596           0 :                 if (ret)
     597           0 :                         return ERR_PTR(ret);
     598             : 
     599             :                 /*
     600             :                  * We want to reserve all the bytes we may need all at once, so
     601             :                  * we only do 1 enospc flushing cycle per transaction start.  We
     602             :                  * accomplish this by simply assuming we'll do num_items worth
     603             :                  * of delayed refs updates in this trans handle, and refill that
     604             :                  * amount for whatever is missing in the reserve.
     605             :                  */
     606           0 :                 num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items);
     607           0 :                 if (flush == BTRFS_RESERVE_FLUSH_ALL &&
     608             :                     !btrfs_block_rsv_full(delayed_refs_rsv)) {
     609           0 :                         delayed_refs_bytes = btrfs_calc_delayed_ref_bytes(fs_info,
     610             :                                                                           num_items);
     611           0 :                         num_bytes += delayed_refs_bytes;
     612             :                 }
     613             : 
     614             :                 /*
     615             :                  * Do the reservation for the relocation root creation
     616             :                  */
     617           0 :                 if (need_reserve_reloc_root(root)) {
     618           0 :                         num_bytes += fs_info->nodesize;
     619           0 :                         reloc_reserved = true;
     620             :                 }
     621             : 
     622           0 :                 ret = btrfs_block_rsv_add(fs_info, rsv, num_bytes, flush);
     623           0 :                 if (ret)
     624           0 :                         goto reserve_fail;
     625           0 :                 if (delayed_refs_bytes) {
     626           0 :                         btrfs_migrate_to_delayed_refs_rsv(fs_info, rsv,
     627             :                                                           delayed_refs_bytes);
     628           0 :                         num_bytes -= delayed_refs_bytes;
     629             :                 }
     630             : 
     631           0 :                 if (rsv->space_info->force_alloc)
     632           0 :                         do_chunk_alloc = true;
     633           0 :         } else if (num_items == 0 && flush == BTRFS_RESERVE_FLUSH_ALL &&
     634             :                    !btrfs_block_rsv_full(delayed_refs_rsv)) {
     635             :                 /*
     636             :                  * Some people call with btrfs_start_transaction(root, 0)
     637             :                  * because they can be throttled, but have some other mechanism
     638             :                  * for reserving space.  We still want these guys to refill the
     639             :                  * delayed block_rsv so just add 1 items worth of reservation
     640             :                  * here.
     641             :                  */
     642           0 :                 ret = btrfs_delayed_refs_rsv_refill(fs_info, flush);
     643           0 :                 if (ret)
     644           0 :                         goto reserve_fail;
     645             :         }
     646           0 : again:
     647           0 :         h = kmem_cache_zalloc(btrfs_trans_handle_cachep, GFP_NOFS);
     648           0 :         if (!h) {
     649           0 :                 ret = -ENOMEM;
     650           0 :                 goto alloc_fail;
     651             :         }
     652             : 
     653             :         /*
     654             :          * If we are JOIN_NOLOCK we're already committing a transaction and
     655             :          * waiting on this guy, so we don't need to do the sb_start_intwrite
     656             :          * because we're already holding a ref.  We need this because we could
     657             :          * have raced in and did an fsync() on a file which can kick a commit
     658             :          * and then we deadlock with somebody doing a freeze.
     659             :          *
     660             :          * If we are ATTACH, it means we just want to catch the current
     661             :          * transaction and commit it, so we needn't do sb_start_intwrite(). 
     662             :          */
     663           0 :         if (type & __TRANS_FREEZABLE)
     664           0 :                 sb_start_intwrite(fs_info->sb);
     665             : 
     666           0 :         if (may_wait_transaction(fs_info, type))
     667           0 :                 wait_current_trans(fs_info);
     668             : 
     669           0 :         do {
     670           0 :                 ret = join_transaction(fs_info, type);
     671           0 :                 if (ret == -EBUSY) {
     672           0 :                         wait_current_trans(fs_info);
     673           0 :                         if (unlikely(type == TRANS_ATTACH ||
     674             :                                      type == TRANS_JOIN_NOSTART))
     675             :                                 ret = -ENOENT;
     676             :                 }
     677           0 :         } while (ret == -EBUSY);
     678             : 
     679           0 :         if (ret < 0)
     680           0 :                 goto join_fail;
     681             : 
     682           0 :         cur_trans = fs_info->running_transaction;
     683             : 
     684           0 :         h->transid = cur_trans->transid;
     685           0 :         h->transaction = cur_trans;
     686           0 :         refcount_set(&h->use_count, 1);
     687           0 :         h->fs_info = root->fs_info;
     688             : 
     689           0 :         h->type = type;
     690           0 :         INIT_LIST_HEAD(&h->new_bgs);
     691             : 
     692           0 :         smp_mb();
     693           0 :         if (cur_trans->state >= TRANS_STATE_COMMIT_START &&
     694           0 :             may_wait_transaction(fs_info, type)) {
     695           0 :                 current->journal_info = h;
     696           0 :                 btrfs_commit_transaction(h);
     697           0 :                 goto again;
     698             :         }
     699             : 
     700           0 :         if (num_bytes) {
     701           0 :                 trace_btrfs_space_reservation(fs_info, "transaction",
     702             :                                               h->transid, num_bytes, 1);
     703           0 :                 h->block_rsv = &fs_info->trans_block_rsv;
     704           0 :                 h->bytes_reserved = num_bytes;
     705           0 :                 h->reloc_reserved = reloc_reserved;
     706             :         }
     707             : 
     708           0 : got_it:
     709           0 :         if (!current->journal_info)
     710           0 :                 current->journal_info = h;
     711             : 
     712             :         /*
     713             :          * If the space_info is marked ALLOC_FORCE then we'll get upgraded to
     714             :          * ALLOC_FORCE the first run through, and then we won't allocate for
     715             :          * anybody else who races in later.  We don't care about the return
     716             :          * value here.
     717             :          */
     718           0 :         if (do_chunk_alloc && num_bytes) {
     719           0 :                 u64 flags = h->block_rsv->space_info->flags;
     720             : 
     721           0 :                 btrfs_chunk_alloc(h, btrfs_get_alloc_profile(fs_info, flags),
     722             :                                   CHUNK_ALLOC_NO_FORCE);
     723             :         }
     724             : 
     725             :         /*
     726             :          * btrfs_record_root_in_trans() needs to alloc new extents, and may
     727             :          * call btrfs_join_transaction() while we're also starting a
     728             :          * transaction.
     729             :          *
     730             :          * Thus it need to be called after current->journal_info initialized,
     731             :          * or we can deadlock.
     732             :          */
     733           0 :         ret = btrfs_record_root_in_trans(h, root);
     734           0 :         if (ret) {
     735             :                 /*
     736             :                  * The transaction handle is fully initialized and linked with
     737             :                  * other structures so it needs to be ended in case of errors,
     738             :                  * not just freed.
     739             :                  */
     740           0 :                 btrfs_end_transaction(h);
     741           0 :                 return ERR_PTR(ret);
     742             :         }
     743             : 
     744             :         return h;
     745             : 
     746             : join_fail:
     747           0 :         if (type & __TRANS_FREEZABLE)
     748           0 :                 sb_end_intwrite(fs_info->sb);
     749           0 :         kmem_cache_free(btrfs_trans_handle_cachep, h);
     750           0 : alloc_fail:
     751           0 :         if (num_bytes)
     752           0 :                 btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv,
     753             :                                         num_bytes, NULL);
     754           0 : reserve_fail:
     755           0 :         btrfs_qgroup_free_meta_pertrans(root, qgroup_reserved);
     756           0 :         return ERR_PTR(ret);
     757             : }
     758             : 
     759           0 : struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
     760             :                                                    unsigned int num_items)
     761             : {
     762           0 :         return start_transaction(root, num_items, TRANS_START,
     763             :                                  BTRFS_RESERVE_FLUSH_ALL, true);
     764             : }
     765             : 
     766           0 : struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
     767             :                                         struct btrfs_root *root,
     768             :                                         unsigned int num_items)
     769             : {
     770           0 :         return start_transaction(root, num_items, TRANS_START,
     771             :                                  BTRFS_RESERVE_FLUSH_ALL_STEAL, false);
     772             : }
     773             : 
     774           0 : struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
     775             : {
     776           0 :         return start_transaction(root, 0, TRANS_JOIN, BTRFS_RESERVE_NO_FLUSH,
     777             :                                  true);
     778             : }
     779             : 
     780           0 : struct btrfs_trans_handle *btrfs_join_transaction_spacecache(struct btrfs_root *root)
     781             : {
     782           0 :         return start_transaction(root, 0, TRANS_JOIN_NOLOCK,
     783             :                                  BTRFS_RESERVE_NO_FLUSH, true);
     784             : }
     785             : 
     786             : /*
     787             :  * Similar to regular join but it never starts a transaction when none is
     788             :  * running or after waiting for the current one to finish.
     789             :  */
     790           0 : struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root)
     791             : {
     792           0 :         return start_transaction(root, 0, TRANS_JOIN_NOSTART,
     793             :                                  BTRFS_RESERVE_NO_FLUSH, true);
     794             : }
     795             : 
     796             : /*
     797             :  * btrfs_attach_transaction() - catch the running transaction
     798             :  *
     799             :  * It is used when we want to commit the current the transaction, but
     800             :  * don't want to start a new one.
     801             :  *
     802             :  * Note: If this function return -ENOENT, it just means there is no
     803             :  * running transaction. But it is possible that the inactive transaction
     804             :  * is still in the memory, not fully on disk. If you hope there is no
     805             :  * inactive transaction in the fs when -ENOENT is returned, you should
     806             :  * invoke
     807             :  *     btrfs_attach_transaction_barrier()
     808             :  */
     809           0 : struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root)
     810             : {
     811           0 :         return start_transaction(root, 0, TRANS_ATTACH,
     812             :                                  BTRFS_RESERVE_NO_FLUSH, true);
     813             : }
     814             : 
     815             : /*
     816             :  * btrfs_attach_transaction_barrier() - catch the running transaction
     817             :  *
     818             :  * It is similar to the above function, the difference is this one
     819             :  * will wait for all the inactive transactions until they fully
     820             :  * complete.
     821             :  */
     822             : struct btrfs_trans_handle *
     823           0 : btrfs_attach_transaction_barrier(struct btrfs_root *root)
     824             : {
     825           0 :         struct btrfs_trans_handle *trans;
     826             : 
     827           0 :         trans = start_transaction(root, 0, TRANS_ATTACH,
     828             :                                   BTRFS_RESERVE_NO_FLUSH, true);
     829           0 :         if (trans == ERR_PTR(-ENOENT))
     830           0 :                 btrfs_wait_for_commit(root->fs_info, 0);
     831             : 
     832           0 :         return trans;
     833             : }
     834             : 
     835             : /* Wait for a transaction commit to reach at least the given state. */
     836           0 : static noinline void wait_for_commit(struct btrfs_transaction *commit,
     837             :                                      const enum btrfs_trans_state min_state)
     838             : {
     839           0 :         struct btrfs_fs_info *fs_info = commit->fs_info;
     840           0 :         u64 transid = commit->transid;
     841           0 :         bool put = false;
     842             : 
     843             :         /*
     844             :          * At the moment this function is called with min_state either being
     845             :          * TRANS_STATE_COMPLETED or TRANS_STATE_SUPER_COMMITTED.
     846             :          */
     847           0 :         if (min_state == TRANS_STATE_COMPLETED)
     848             :                 btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED);
     849             :         else
     850           0 :                 btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
     851             : 
     852           0 :         while (1) {
     853           0 :                 wait_event(commit->commit_wait, commit->state >= min_state);
     854           0 :                 if (put)
     855           0 :                         btrfs_put_transaction(commit);
     856             : 
     857           0 :                 if (min_state < TRANS_STATE_COMPLETED)
     858             :                         break;
     859             : 
     860             :                 /*
     861             :                  * A transaction isn't really completed until all of the
     862             :                  * previous transactions are completed, but with fsync we can
     863             :                  * end up with SUPER_COMMITTED transactions before a COMPLETED
     864             :                  * transaction. Wait for those.
     865             :                  */
     866             : 
     867           0 :                 spin_lock(&fs_info->trans_lock);
     868           0 :                 commit = list_first_entry_or_null(&fs_info->trans_list,
     869             :                                                   struct btrfs_transaction,
     870             :                                                   list);
     871           0 :                 if (!commit || commit->transid > transid) {
     872           0 :                         spin_unlock(&fs_info->trans_lock);
     873             :                         break;
     874             :                 }
     875           0 :                 refcount_inc(&commit->use_count);
     876           0 :                 put = true;
     877           0 :                 spin_unlock(&fs_info->trans_lock);
     878             :         }
     879           0 : }
     880             : 
     881           0 : int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid)
     882             : {
     883           0 :         struct btrfs_transaction *cur_trans = NULL, *t;
     884           0 :         int ret = 0;
     885             : 
     886           0 :         if (transid) {
     887           0 :                 if (transid <= fs_info->last_trans_committed)
     888           0 :                         goto out;
     889             : 
     890             :                 /* find specified transaction */
     891           0 :                 spin_lock(&fs_info->trans_lock);
     892           0 :                 list_for_each_entry(t, &fs_info->trans_list, list) {
     893           0 :                         if (t->transid == transid) {
     894           0 :                                 cur_trans = t;
     895           0 :                                 refcount_inc(&cur_trans->use_count);
     896           0 :                                 ret = 0;
     897           0 :                                 break;
     898             :                         }
     899           0 :                         if (t->transid > transid) {
     900             :                                 ret = 0;
     901             :                                 break;
     902             :                         }
     903             :                 }
     904           0 :                 spin_unlock(&fs_info->trans_lock);
     905             : 
     906             :                 /*
     907             :                  * The specified transaction doesn't exist, or we
     908             :                  * raced with btrfs_commit_transaction
     909             :                  */
     910           0 :                 if (!cur_trans) {
     911           0 :                         if (transid > fs_info->last_trans_committed)
     912           0 :                                 ret = -EINVAL;
     913           0 :                         goto out;
     914             :                 }
     915             :         } else {
     916             :                 /* find newest transaction that is committing | committed */
     917           0 :                 spin_lock(&fs_info->trans_lock);
     918           0 :                 list_for_each_entry_reverse(t, &fs_info->trans_list,
     919             :                                             list) {
     920           0 :                         if (t->state >= TRANS_STATE_COMMIT_START) {
     921           0 :                                 if (t->state == TRANS_STATE_COMPLETED)
     922             :                                         break;
     923           0 :                                 cur_trans = t;
     924           0 :                                 refcount_inc(&cur_trans->use_count);
     925             :                                 break;
     926             :                         }
     927             :                 }
     928           0 :                 spin_unlock(&fs_info->trans_lock);
     929           0 :                 if (!cur_trans)
     930           0 :                         goto out;  /* nothing committing|committed */
     931             :         }
     932             : 
     933           0 :         wait_for_commit(cur_trans, TRANS_STATE_COMPLETED);
     934           0 :         btrfs_put_transaction(cur_trans);
     935           0 : out:
     936           0 :         return ret;
     937             : }
     938             : 
     939           0 : void btrfs_throttle(struct btrfs_fs_info *fs_info)
     940             : {
     941           0 :         wait_current_trans(fs_info);
     942           0 : }
     943             : 
     944           0 : bool btrfs_should_end_transaction(struct btrfs_trans_handle *trans)
     945             : {
     946           0 :         struct btrfs_transaction *cur_trans = trans->transaction;
     947             : 
     948           0 :         if (cur_trans->state >= TRANS_STATE_COMMIT_START ||
     949           0 :             test_bit(BTRFS_DELAYED_REFS_FLUSHING, &cur_trans->delayed_refs.flags))
     950             :                 return true;
     951             : 
     952           0 :         if (btrfs_check_space_for_delayed_refs(trans->fs_info))
     953             :                 return true;
     954             : 
     955           0 :         return !!btrfs_block_rsv_check(&trans->fs_info->global_block_rsv, 50);
     956             : }
     957             : 
     958           0 : static void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans)
     959             : 
     960             : {
     961           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
     962             : 
     963           0 :         if (!trans->block_rsv) {
     964             :                 ASSERT(!trans->bytes_reserved);
     965             :                 return;
     966             :         }
     967             : 
     968           0 :         if (!trans->bytes_reserved)
     969             :                 return;
     970             : 
     971           0 :         ASSERT(trans->block_rsv == &fs_info->trans_block_rsv);
     972           0 :         trace_btrfs_space_reservation(fs_info, "transaction",
     973             :                                       trans->transid, trans->bytes_reserved, 0);
     974           0 :         btrfs_block_rsv_release(fs_info, trans->block_rsv,
     975             :                                 trans->bytes_reserved, NULL);
     976           0 :         trans->bytes_reserved = 0;
     977             : }
     978             : 
     979           0 : static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
     980             :                                    int throttle)
     981             : {
     982           0 :         struct btrfs_fs_info *info = trans->fs_info;
     983           0 :         struct btrfs_transaction *cur_trans = trans->transaction;
     984           0 :         int err = 0;
     985             : 
     986           0 :         if (refcount_read(&trans->use_count) > 1) {
     987           0 :                 refcount_dec(&trans->use_count);
     988           0 :                 trans->block_rsv = trans->orig_rsv;
     989           0 :                 return 0;
     990             :         }
     991             : 
     992           0 :         btrfs_trans_release_metadata(trans);
     993           0 :         trans->block_rsv = NULL;
     994             : 
     995           0 :         btrfs_create_pending_block_groups(trans);
     996             : 
     997           0 :         btrfs_trans_release_chunk_metadata(trans);
     998             : 
     999           0 :         if (trans->type & __TRANS_FREEZABLE)
    1000           0 :                 sb_end_intwrite(info->sb);
    1001             : 
    1002           0 :         WARN_ON(cur_trans != info->running_transaction);
    1003           0 :         WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
    1004           0 :         atomic_dec(&cur_trans->num_writers);
    1005           0 :         extwriter_counter_dec(cur_trans, trans->type);
    1006             : 
    1007           0 :         cond_wake_up(&cur_trans->writer_wait);
    1008             : 
    1009           0 :         btrfs_lockdep_release(info, btrfs_trans_num_extwriters);
    1010           0 :         btrfs_lockdep_release(info, btrfs_trans_num_writers);
    1011             : 
    1012           0 :         btrfs_put_transaction(cur_trans);
    1013             : 
    1014           0 :         if (current->journal_info == trans)
    1015           0 :                 current->journal_info = NULL;
    1016             : 
    1017           0 :         if (throttle)
    1018           0 :                 btrfs_run_delayed_iputs(info);
    1019             : 
    1020           0 :         if (TRANS_ABORTED(trans) || BTRFS_FS_ERROR(info)) {
    1021           0 :                 wake_up_process(info->transaction_kthread);
    1022           0 :                 if (TRANS_ABORTED(trans))
    1023           0 :                         err = trans->aborted;
    1024             :                 else
    1025             :                         err = -EROFS;
    1026             :         }
    1027             : 
    1028           0 :         kmem_cache_free(btrfs_trans_handle_cachep, trans);
    1029           0 :         return err;
    1030             : }
    1031             : 
    1032           0 : int btrfs_end_transaction(struct btrfs_trans_handle *trans)
    1033             : {
    1034           0 :         return __btrfs_end_transaction(trans, 0);
    1035             : }
    1036             : 
    1037           0 : int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans)
    1038             : {
    1039           0 :         return __btrfs_end_transaction(trans, 1);
    1040             : }
    1041             : 
    1042             : /*
    1043             :  * when btree blocks are allocated, they have some corresponding bits set for
    1044             :  * them in one of two extent_io trees.  This is used to make sure all of
    1045             :  * those extents are sent to disk but does not wait on them
    1046             :  */
    1047           0 : int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
    1048             :                                struct extent_io_tree *dirty_pages, int mark)
    1049             : {
    1050           0 :         int err = 0;
    1051           0 :         int werr = 0;
    1052           0 :         struct address_space *mapping = fs_info->btree_inode->i_mapping;
    1053           0 :         struct extent_state *cached_state = NULL;
    1054           0 :         u64 start = 0;
    1055           0 :         u64 end;
    1056             : 
    1057           0 :         while (!find_first_extent_bit(dirty_pages, start, &start, &end,
    1058             :                                       mark, &cached_state)) {
    1059           0 :                 bool wait_writeback = false;
    1060             : 
    1061           0 :                 err = convert_extent_bit(dirty_pages, start, end,
    1062             :                                          EXTENT_NEED_WAIT,
    1063             :                                          mark, &cached_state);
    1064             :                 /*
    1065             :                  * convert_extent_bit can return -ENOMEM, which is most of the
    1066             :                  * time a temporary error. So when it happens, ignore the error
    1067             :                  * and wait for writeback of this range to finish - because we
    1068             :                  * failed to set the bit EXTENT_NEED_WAIT for the range, a call
    1069             :                  * to __btrfs_wait_marked_extents() would not know that
    1070             :                  * writeback for this range started and therefore wouldn't
    1071             :                  * wait for it to finish - we don't want to commit a
    1072             :                  * superblock that points to btree nodes/leafs for which
    1073             :                  * writeback hasn't finished yet (and without errors).
    1074             :                  * We cleanup any entries left in the io tree when committing
    1075             :                  * the transaction (through extent_io_tree_release()).
    1076             :                  */
    1077           0 :                 if (err == -ENOMEM) {
    1078             :                         err = 0;
    1079             :                         wait_writeback = true;
    1080             :                 }
    1081           0 :                 if (!err)
    1082           0 :                         err = filemap_fdatawrite_range(mapping, start, end);
    1083           0 :                 if (err)
    1084             :                         werr = err;
    1085           0 :                 else if (wait_writeback)
    1086           0 :                         werr = filemap_fdatawait_range(mapping, start, end);
    1087           0 :                 free_extent_state(cached_state);
    1088           0 :                 cached_state = NULL;
    1089           0 :                 cond_resched();
    1090           0 :                 start = end + 1;
    1091             :         }
    1092           0 :         return werr;
    1093             : }
    1094             : 
    1095             : /*
    1096             :  * when btree blocks are allocated, they have some corresponding bits set for
    1097             :  * them in one of two extent_io trees.  This is used to make sure all of
    1098             :  * those extents are on disk for transaction or log commit.  We wait
    1099             :  * on all the pages and clear them from the dirty pages state tree
    1100             :  */
    1101           0 : static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info,
    1102             :                                        struct extent_io_tree *dirty_pages)
    1103             : {
    1104           0 :         int err = 0;
    1105           0 :         int werr = 0;
    1106           0 :         struct address_space *mapping = fs_info->btree_inode->i_mapping;
    1107           0 :         struct extent_state *cached_state = NULL;
    1108           0 :         u64 start = 0;
    1109           0 :         u64 end;
    1110             : 
    1111           0 :         while (!find_first_extent_bit(dirty_pages, start, &start, &end,
    1112             :                                       EXTENT_NEED_WAIT, &cached_state)) {
    1113             :                 /*
    1114             :                  * Ignore -ENOMEM errors returned by clear_extent_bit().
    1115             :                  * When committing the transaction, we'll remove any entries
    1116             :                  * left in the io tree. For a log commit, we don't remove them
    1117             :                  * after committing the log because the tree can be accessed
    1118             :                  * concurrently - we do it only at transaction commit time when
    1119             :                  * it's safe to do it (through extent_io_tree_release()).
    1120             :                  */
    1121           0 :                 err = clear_extent_bit(dirty_pages, start, end,
    1122             :                                        EXTENT_NEED_WAIT, &cached_state);
    1123           0 :                 if (err == -ENOMEM)
    1124             :                         err = 0;
    1125           0 :                 if (!err)
    1126           0 :                         err = filemap_fdatawait_range(mapping, start, end);
    1127           0 :                 if (err)
    1128           0 :                         werr = err;
    1129           0 :                 free_extent_state(cached_state);
    1130           0 :                 cached_state = NULL;
    1131           0 :                 cond_resched();
    1132           0 :                 start = end + 1;
    1133             :         }
    1134           0 :         if (err)
    1135           0 :                 werr = err;
    1136           0 :         return werr;
    1137             : }
    1138             : 
    1139           0 : static int btrfs_wait_extents(struct btrfs_fs_info *fs_info,
    1140             :                        struct extent_io_tree *dirty_pages)
    1141             : {
    1142           0 :         bool errors = false;
    1143           0 :         int err;
    1144             : 
    1145           0 :         err = __btrfs_wait_marked_extents(fs_info, dirty_pages);
    1146           0 :         if (test_and_clear_bit(BTRFS_FS_BTREE_ERR, &fs_info->flags))
    1147           0 :                 errors = true;
    1148             : 
    1149           0 :         if (errors && !err)
    1150           0 :                 err = -EIO;
    1151           0 :         return err;
    1152             : }
    1153             : 
    1154           0 : int btrfs_wait_tree_log_extents(struct btrfs_root *log_root, int mark)
    1155             : {
    1156           0 :         struct btrfs_fs_info *fs_info = log_root->fs_info;
    1157           0 :         struct extent_io_tree *dirty_pages = &log_root->dirty_log_pages;
    1158           0 :         bool errors = false;
    1159           0 :         int err;
    1160             : 
    1161           0 :         ASSERT(log_root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
    1162             : 
    1163           0 :         err = __btrfs_wait_marked_extents(fs_info, dirty_pages);
    1164           0 :         if ((mark & EXTENT_DIRTY) &&
    1165           0 :             test_and_clear_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags))
    1166           0 :                 errors = true;
    1167             : 
    1168           0 :         if ((mark & EXTENT_NEW) &&
    1169           0 :             test_and_clear_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags))
    1170           0 :                 errors = true;
    1171             : 
    1172           0 :         if (errors && !err)
    1173           0 :                 err = -EIO;
    1174           0 :         return err;
    1175             : }
    1176             : 
    1177             : /*
    1178             :  * When btree blocks are allocated the corresponding extents are marked dirty.
    1179             :  * This function ensures such extents are persisted on disk for transaction or
    1180             :  * log commit.
    1181             :  *
    1182             :  * @trans: transaction whose dirty pages we'd like to write
    1183             :  */
    1184           0 : static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans)
    1185             : {
    1186           0 :         int ret;
    1187           0 :         int ret2;
    1188           0 :         struct extent_io_tree *dirty_pages = &trans->transaction->dirty_pages;
    1189           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1190           0 :         struct blk_plug plug;
    1191             : 
    1192           0 :         blk_start_plug(&plug);
    1193           0 :         ret = btrfs_write_marked_extents(fs_info, dirty_pages, EXTENT_DIRTY);
    1194           0 :         blk_finish_plug(&plug);
    1195           0 :         ret2 = btrfs_wait_extents(fs_info, dirty_pages);
    1196             : 
    1197           0 :         extent_io_tree_release(&trans->transaction->dirty_pages);
    1198             : 
    1199           0 :         if (ret)
    1200             :                 return ret;
    1201           0 :         else if (ret2)
    1202             :                 return ret2;
    1203             :         else
    1204           0 :                 return 0;
    1205             : }
    1206             : 
    1207             : /*
    1208             :  * this is used to update the root pointer in the tree of tree roots.
    1209             :  *
    1210             :  * But, in the case of the extent allocation tree, updating the root
    1211             :  * pointer may allocate blocks which may change the root of the extent
    1212             :  * allocation tree.
    1213             :  *
    1214             :  * So, this loops and repeats and makes sure the cowonly root didn't
    1215             :  * change while the root pointer was being updated in the metadata.
    1216             :  */
    1217           0 : static int update_cowonly_root(struct btrfs_trans_handle *trans,
    1218             :                                struct btrfs_root *root)
    1219             : {
    1220           0 :         int ret;
    1221           0 :         u64 old_root_bytenr;
    1222           0 :         u64 old_root_used;
    1223           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    1224           0 :         struct btrfs_root *tree_root = fs_info->tree_root;
    1225             : 
    1226           0 :         old_root_used = btrfs_root_used(&root->root_item);
    1227             : 
    1228           0 :         while (1) {
    1229           0 :                 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
    1230           0 :                 if (old_root_bytenr == root->node->start &&
    1231             :                     old_root_used == btrfs_root_used(&root->root_item))
    1232             :                         break;
    1233             : 
    1234           0 :                 btrfs_set_root_node(&root->root_item, root->node);
    1235           0 :                 ret = btrfs_update_root(trans, tree_root,
    1236             :                                         &root->root_key,
    1237             :                                         &root->root_item);
    1238           0 :                 if (ret)
    1239           0 :                         return ret;
    1240             : 
    1241           0 :                 old_root_used = btrfs_root_used(&root->root_item);
    1242             :         }
    1243             : 
    1244             :         return 0;
    1245             : }
    1246             : 
    1247             : /*
    1248             :  * update all the cowonly tree roots on disk
    1249             :  *
    1250             :  * The error handling in this function may not be obvious. Any of the
    1251             :  * failures will cause the file system to go offline. We still need
    1252             :  * to clean up the delayed refs.
    1253             :  */
    1254           0 : static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans)
    1255             : {
    1256           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1257           0 :         struct list_head *dirty_bgs = &trans->transaction->dirty_bgs;
    1258           0 :         struct list_head *io_bgs = &trans->transaction->io_bgs;
    1259           0 :         struct list_head *next;
    1260           0 :         struct extent_buffer *eb;
    1261           0 :         int ret;
    1262             : 
    1263             :         /*
    1264             :          * At this point no one can be using this transaction to modify any tree
    1265             :          * and no one can start another transaction to modify any tree either.
    1266             :          */
    1267           0 :         ASSERT(trans->transaction->state == TRANS_STATE_COMMIT_DOING);
    1268             : 
    1269           0 :         eb = btrfs_lock_root_node(fs_info->tree_root);
    1270           0 :         ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL,
    1271             :                               0, &eb, BTRFS_NESTING_COW);
    1272           0 :         btrfs_tree_unlock(eb);
    1273           0 :         free_extent_buffer(eb);
    1274             : 
    1275           0 :         if (ret)
    1276             :                 return ret;
    1277             : 
    1278           0 :         ret = btrfs_run_dev_stats(trans);
    1279           0 :         if (ret)
    1280             :                 return ret;
    1281           0 :         ret = btrfs_run_dev_replace(trans);
    1282           0 :         if (ret)
    1283             :                 return ret;
    1284           0 :         ret = btrfs_run_qgroups(trans);
    1285           0 :         if (ret)
    1286             :                 return ret;
    1287             : 
    1288           0 :         ret = btrfs_setup_space_cache(trans);
    1289           0 :         if (ret)
    1290             :                 return ret;
    1291             : 
    1292           0 : again:
    1293           0 :         while (!list_empty(&fs_info->dirty_cowonly_roots)) {
    1294           0 :                 struct btrfs_root *root;
    1295           0 :                 next = fs_info->dirty_cowonly_roots.next;
    1296           0 :                 list_del_init(next);
    1297           0 :                 root = list_entry(next, struct btrfs_root, dirty_list);
    1298           0 :                 clear_bit(BTRFS_ROOT_DIRTY, &root->state);
    1299             : 
    1300           0 :                 list_add_tail(&root->dirty_list,
    1301           0 :                               &trans->transaction->switch_commits);
    1302           0 :                 ret = update_cowonly_root(trans, root);
    1303           0 :                 if (ret)
    1304           0 :                         return ret;
    1305             :         }
    1306             : 
    1307             :         /* Now flush any delayed refs generated by updating all of the roots */
    1308           0 :         ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
    1309           0 :         if (ret)
    1310           0 :                 return ret;
    1311             : 
    1312           0 :         while (!list_empty(dirty_bgs) || !list_empty(io_bgs)) {
    1313           0 :                 ret = btrfs_write_dirty_block_groups(trans);
    1314           0 :                 if (ret)
    1315           0 :                         return ret;
    1316             : 
    1317             :                 /*
    1318             :                  * We're writing the dirty block groups, which could generate
    1319             :                  * delayed refs, which could generate more dirty block groups,
    1320             :                  * so we want to keep this flushing in this loop to make sure
    1321             :                  * everything gets run.
    1322             :                  */
    1323           0 :                 ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
    1324           0 :                 if (ret)
    1325           0 :                         return ret;
    1326             :         }
    1327             : 
    1328           0 :         if (!list_empty(&fs_info->dirty_cowonly_roots))
    1329           0 :                 goto again;
    1330             : 
    1331             :         /* Update dev-replace pointer once everything is committed */
    1332           0 :         fs_info->dev_replace.committed_cursor_left =
    1333           0 :                 fs_info->dev_replace.cursor_left_last_write_of_item;
    1334             : 
    1335           0 :         return 0;
    1336             : }
    1337             : 
    1338             : /*
    1339             :  * If we had a pending drop we need to see if there are any others left in our
    1340             :  * dead roots list, and if not clear our bit and wake any waiters.
    1341             :  */
    1342           0 : void btrfs_maybe_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
    1343             : {
    1344             :         /*
    1345             :          * We put the drop in progress roots at the front of the list, so if the
    1346             :          * first entry doesn't have UNFINISHED_DROP set we can wake everybody
    1347             :          * up.
    1348             :          */
    1349           0 :         spin_lock(&fs_info->trans_lock);
    1350           0 :         if (!list_empty(&fs_info->dead_roots)) {
    1351           0 :                 struct btrfs_root *root = list_first_entry(&fs_info->dead_roots,
    1352             :                                                            struct btrfs_root,
    1353             :                                                            root_list);
    1354           0 :                 if (test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state)) {
    1355           0 :                         spin_unlock(&fs_info->trans_lock);
    1356           0 :                         return;
    1357             :                 }
    1358             :         }
    1359           0 :         spin_unlock(&fs_info->trans_lock);
    1360             : 
    1361           0 :         btrfs_wake_unfinished_drop(fs_info);
    1362             : }
    1363             : 
    1364             : /*
    1365             :  * dead roots are old snapshots that need to be deleted.  This allocates
    1366             :  * a dirty root struct and adds it into the list of dead roots that need to
    1367             :  * be deleted
    1368             :  */
    1369           0 : void btrfs_add_dead_root(struct btrfs_root *root)
    1370             : {
    1371           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    1372             : 
    1373           0 :         spin_lock(&fs_info->trans_lock);
    1374           0 :         if (list_empty(&root->root_list)) {
    1375           0 :                 btrfs_grab_root(root);
    1376             : 
    1377             :                 /* We want to process the partially complete drops first. */
    1378           0 :                 if (test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state))
    1379           0 :                         list_add(&root->root_list, &fs_info->dead_roots);
    1380             :                 else
    1381           0 :                         list_add_tail(&root->root_list, &fs_info->dead_roots);
    1382             :         }
    1383           0 :         spin_unlock(&fs_info->trans_lock);
    1384           0 : }
    1385             : 
    1386             : /*
    1387             :  * Update each subvolume root and its relocation root, if it exists, in the tree
    1388             :  * of tree roots. Also free log roots if they exist.
    1389             :  */
    1390           0 : static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
    1391             : {
    1392           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1393           0 :         struct btrfs_root *gang[8];
    1394           0 :         int i;
    1395           0 :         int ret;
    1396             : 
    1397             :         /*
    1398             :          * At this point no one can be using this transaction to modify any tree
    1399             :          * and no one can start another transaction to modify any tree either.
    1400             :          */
    1401           0 :         ASSERT(trans->transaction->state == TRANS_STATE_COMMIT_DOING);
    1402             : 
    1403           0 :         spin_lock(&fs_info->fs_roots_radix_lock);
    1404           0 :         while (1) {
    1405           0 :                 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
    1406             :                                                  (void **)gang, 0,
    1407             :                                                  ARRAY_SIZE(gang),
    1408             :                                                  BTRFS_ROOT_TRANS_TAG);
    1409           0 :                 if (ret == 0)
    1410             :                         break;
    1411           0 :                 for (i = 0; i < ret; i++) {
    1412           0 :                         struct btrfs_root *root = gang[i];
    1413           0 :                         int ret2;
    1414             : 
    1415             :                         /*
    1416             :                          * At this point we can neither have tasks logging inodes
    1417             :                          * from a root nor trying to commit a log tree.
    1418             :                          */
    1419           0 :                         ASSERT(atomic_read(&root->log_writers) == 0);
    1420           0 :                         ASSERT(atomic_read(&root->log_commit[0]) == 0);
    1421           0 :                         ASSERT(atomic_read(&root->log_commit[1]) == 0);
    1422             : 
    1423           0 :                         radix_tree_tag_clear(&fs_info->fs_roots_radix,
    1424           0 :                                         (unsigned long)root->root_key.objectid,
    1425             :                                         BTRFS_ROOT_TRANS_TAG);
    1426           0 :                         spin_unlock(&fs_info->fs_roots_radix_lock);
    1427             : 
    1428           0 :                         btrfs_free_log(trans, root);
    1429           0 :                         ret2 = btrfs_update_reloc_root(trans, root);
    1430           0 :                         if (ret2)
    1431           0 :                                 return ret2;
    1432             : 
    1433             :                         /* see comments in should_cow_block() */
    1434           0 :                         clear_bit(BTRFS_ROOT_FORCE_COW, &root->state);
    1435           0 :                         smp_mb__after_atomic();
    1436             : 
    1437           0 :                         if (root->commit_root != root->node) {
    1438           0 :                                 list_add_tail(&root->dirty_list,
    1439           0 :                                         &trans->transaction->switch_commits);
    1440           0 :                                 btrfs_set_root_node(&root->root_item,
    1441             :                                                     root->node);
    1442             :                         }
    1443             : 
    1444           0 :                         ret2 = btrfs_update_root(trans, fs_info->tree_root,
    1445             :                                                 &root->root_key,
    1446             :                                                 &root->root_item);
    1447           0 :                         if (ret2)
    1448           0 :                                 return ret2;
    1449           0 :                         spin_lock(&fs_info->fs_roots_radix_lock);
    1450           0 :                         btrfs_qgroup_free_meta_all_pertrans(root);
    1451             :                 }
    1452             :         }
    1453           0 :         spin_unlock(&fs_info->fs_roots_radix_lock);
    1454           0 :         return 0;
    1455             : }
    1456             : 
    1457             : /*
    1458             :  * defrag a given btree.
    1459             :  * Every leaf in the btree is read and defragged.
    1460             :  */
    1461           0 : int btrfs_defrag_root(struct btrfs_root *root)
    1462             : {
    1463           0 :         struct btrfs_fs_info *info = root->fs_info;
    1464           0 :         struct btrfs_trans_handle *trans;
    1465           0 :         int ret;
    1466             : 
    1467           0 :         if (test_and_set_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state))
    1468             :                 return 0;
    1469             : 
    1470           0 :         while (1) {
    1471           0 :                 trans = btrfs_start_transaction(root, 0);
    1472           0 :                 if (IS_ERR(trans)) {
    1473           0 :                         ret = PTR_ERR(trans);
    1474           0 :                         break;
    1475             :                 }
    1476             : 
    1477           0 :                 ret = btrfs_defrag_leaves(trans, root);
    1478             : 
    1479           0 :                 btrfs_end_transaction(trans);
    1480           0 :                 btrfs_btree_balance_dirty(info);
    1481           0 :                 cond_resched();
    1482             : 
    1483           0 :                 if (btrfs_fs_closing(info) || ret != -EAGAIN)
    1484             :                         break;
    1485             : 
    1486           0 :                 if (btrfs_defrag_cancelled(info)) {
    1487             :                         btrfs_debug(info, "defrag_root cancelled");
    1488             :                         ret = -EAGAIN;
    1489             :                         break;
    1490             :                 }
    1491             :         }
    1492           0 :         clear_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state);
    1493             :         return ret;
    1494             : }
    1495             : 
    1496             : /*
    1497             :  * Do all special snapshot related qgroup dirty hack.
    1498             :  *
    1499             :  * Will do all needed qgroup inherit and dirty hack like switch commit
    1500             :  * roots inside one transaction and write all btree into disk, to make
    1501             :  * qgroup works.
    1502             :  */
    1503           0 : static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
    1504             :                                    struct btrfs_root *src,
    1505             :                                    struct btrfs_root *parent,
    1506             :                                    struct btrfs_qgroup_inherit *inherit,
    1507             :                                    u64 dst_objectid)
    1508             : {
    1509           0 :         struct btrfs_fs_info *fs_info = src->fs_info;
    1510           0 :         int ret;
    1511             : 
    1512             :         /*
    1513             :          * Save some performance in the case that qgroups are not
    1514             :          * enabled. If this check races with the ioctl, rescan will
    1515             :          * kick in anyway.
    1516             :          */
    1517           0 :         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
    1518             :                 return 0;
    1519             : 
    1520             :         /*
    1521             :          * Ensure dirty @src will be committed.  Or, after coming
    1522             :          * commit_fs_roots() and switch_commit_roots(), any dirty but not
    1523             :          * recorded root will never be updated again, causing an outdated root
    1524             :          * item.
    1525             :          */
    1526           0 :         ret = record_root_in_trans(trans, src, 1);
    1527           0 :         if (ret)
    1528             :                 return ret;
    1529             : 
    1530             :         /*
    1531             :          * btrfs_qgroup_inherit relies on a consistent view of the usage for the
    1532             :          * src root, so we must run the delayed refs here.
    1533             :          *
    1534             :          * However this isn't particularly fool proof, because there's no
    1535             :          * synchronization keeping us from changing the tree after this point
    1536             :          * before we do the qgroup_inherit, or even from making changes while
    1537             :          * we're doing the qgroup_inherit.  But that's a problem for the future,
    1538             :          * for now flush the delayed refs to narrow the race window where the
    1539             :          * qgroup counters could end up wrong.
    1540             :          */
    1541           0 :         ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
    1542           0 :         if (ret) {
    1543           0 :                 btrfs_abort_transaction(trans, ret);
    1544           0 :                 return ret;
    1545             :         }
    1546             : 
    1547           0 :         ret = commit_fs_roots(trans);
    1548           0 :         if (ret)
    1549           0 :                 goto out;
    1550           0 :         ret = btrfs_qgroup_account_extents(trans);
    1551           0 :         if (ret < 0)
    1552           0 :                 goto out;
    1553             : 
    1554             :         /* Now qgroup are all updated, we can inherit it to new qgroups */
    1555           0 :         ret = btrfs_qgroup_inherit(trans, src->root_key.objectid, dst_objectid,
    1556             :                                    inherit);
    1557           0 :         if (ret < 0)
    1558           0 :                 goto out;
    1559             : 
    1560             :         /*
    1561             :          * Now we do a simplified commit transaction, which will:
    1562             :          * 1) commit all subvolume and extent tree
    1563             :          *    To ensure all subvolume and extent tree have a valid
    1564             :          *    commit_root to accounting later insert_dir_item()
    1565             :          * 2) write all btree blocks onto disk
    1566             :          *    This is to make sure later btree modification will be cowed
    1567             :          *    Or commit_root can be populated and cause wrong qgroup numbers
    1568             :          * In this simplified commit, we don't really care about other trees
    1569             :          * like chunk and root tree, as they won't affect qgroup.
    1570             :          * And we don't write super to avoid half committed status.
    1571             :          */
    1572           0 :         ret = commit_cowonly_roots(trans);
    1573           0 :         if (ret)
    1574           0 :                 goto out;
    1575           0 :         switch_commit_roots(trans);
    1576           0 :         ret = btrfs_write_and_wait_transaction(trans);
    1577           0 :         if (ret)
    1578           0 :                 btrfs_handle_fs_error(fs_info, ret,
    1579             :                         "Error while writing out transaction for qgroup");
    1580             : 
    1581           0 : out:
    1582             :         /*
    1583             :          * Force parent root to be updated, as we recorded it before so its
    1584             :          * last_trans == cur_transid.
    1585             :          * Or it won't be committed again onto disk after later
    1586             :          * insert_dir_item()
    1587             :          */
    1588           0 :         if (!ret)
    1589           0 :                 ret = record_root_in_trans(trans, parent, 1);
    1590             :         return ret;
    1591             : }
    1592             : 
    1593             : /*
    1594             :  * new snapshots need to be created at a very specific time in the
    1595             :  * transaction commit.  This does the actual creation.
    1596             :  *
    1597             :  * Note:
    1598             :  * If the error which may affect the commitment of the current transaction
    1599             :  * happens, we should return the error number. If the error which just affect
    1600             :  * the creation of the pending snapshots, just return 0.
    1601             :  */
    1602           0 : static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
    1603             :                                    struct btrfs_pending_snapshot *pending)
    1604             : {
    1605             : 
    1606           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1607           0 :         struct btrfs_key key;
    1608           0 :         struct btrfs_root_item *new_root_item;
    1609           0 :         struct btrfs_root *tree_root = fs_info->tree_root;
    1610           0 :         struct btrfs_root *root = pending->root;
    1611           0 :         struct btrfs_root *parent_root;
    1612           0 :         struct btrfs_block_rsv *rsv;
    1613           0 :         struct inode *parent_inode = pending->dir;
    1614           0 :         struct btrfs_path *path;
    1615           0 :         struct btrfs_dir_item *dir_item;
    1616           0 :         struct extent_buffer *tmp;
    1617           0 :         struct extent_buffer *old;
    1618           0 :         struct timespec64 cur_time;
    1619           0 :         int ret = 0;
    1620           0 :         u64 to_reserve = 0;
    1621           0 :         u64 index = 0;
    1622           0 :         u64 objectid;
    1623           0 :         u64 root_flags;
    1624           0 :         unsigned int nofs_flags;
    1625           0 :         struct fscrypt_name fname;
    1626             : 
    1627           0 :         ASSERT(pending->path);
    1628           0 :         path = pending->path;
    1629             : 
    1630           0 :         ASSERT(pending->root_item);
    1631           0 :         new_root_item = pending->root_item;
    1632             : 
    1633             :         /*
    1634             :          * We're inside a transaction and must make sure that any potential
    1635             :          * allocations with GFP_KERNEL in fscrypt won't recurse back to
    1636             :          * filesystem.
    1637             :          */
    1638           0 :         nofs_flags = memalloc_nofs_save();
    1639           0 :         pending->error = fscrypt_setup_filename(parent_inode,
    1640           0 :                                                 &pending->dentry->d_name, 0,
    1641             :                                                 &fname);
    1642           0 :         memalloc_nofs_restore(nofs_flags);
    1643           0 :         if (pending->error)
    1644           0 :                 goto free_pending;
    1645             : 
    1646           0 :         pending->error = btrfs_get_free_objectid(tree_root, &objectid);
    1647           0 :         if (pending->error)
    1648           0 :                 goto free_fname;
    1649             : 
    1650             :         /*
    1651             :          * Make qgroup to skip current new snapshot's qgroupid, as it is
    1652             :          * accounted by later btrfs_qgroup_inherit().
    1653             :          */
    1654           0 :         btrfs_set_skip_qgroup(trans, objectid);
    1655             : 
    1656           0 :         btrfs_reloc_pre_snapshot(pending, &to_reserve);
    1657             : 
    1658           0 :         if (to_reserve > 0) {
    1659           0 :                 pending->error = btrfs_block_rsv_add(fs_info,
    1660             :                                                      &pending->block_rsv,
    1661             :                                                      to_reserve,
    1662             :                                                      BTRFS_RESERVE_NO_FLUSH);
    1663           0 :                 if (pending->error)
    1664           0 :                         goto clear_skip_qgroup;
    1665             :         }
    1666             : 
    1667           0 :         key.objectid = objectid;
    1668           0 :         key.offset = (u64)-1;
    1669           0 :         key.type = BTRFS_ROOT_ITEM_KEY;
    1670             : 
    1671           0 :         rsv = trans->block_rsv;
    1672           0 :         trans->block_rsv = &pending->block_rsv;
    1673           0 :         trans->bytes_reserved = trans->block_rsv->reserved;
    1674           0 :         trace_btrfs_space_reservation(fs_info, "transaction",
    1675             :                                       trans->transid,
    1676             :                                       trans->bytes_reserved, 1);
    1677           0 :         parent_root = BTRFS_I(parent_inode)->root;
    1678           0 :         ret = record_root_in_trans(trans, parent_root, 0);
    1679           0 :         if (ret)
    1680           0 :                 goto fail;
    1681           0 :         cur_time = current_time(parent_inode);
    1682             : 
    1683             :         /*
    1684             :          * insert the directory item
    1685             :          */
    1686           0 :         ret = btrfs_set_inode_index(BTRFS_I(parent_inode), &index);
    1687           0 :         if (ret) {
    1688           0 :                 btrfs_abort_transaction(trans, ret);
    1689           0 :                 goto fail;
    1690             :         }
    1691             : 
    1692             :         /* check if there is a file/dir which has the same name. */
    1693           0 :         dir_item = btrfs_lookup_dir_item(NULL, parent_root, path,
    1694             :                                          btrfs_ino(BTRFS_I(parent_inode)),
    1695             :                                          &fname.disk_name, 0);
    1696           0 :         if (dir_item != NULL && !IS_ERR(dir_item)) {
    1697           0 :                 pending->error = -EEXIST;
    1698           0 :                 goto dir_item_existed;
    1699           0 :         } else if (IS_ERR(dir_item)) {
    1700           0 :                 ret = PTR_ERR(dir_item);
    1701           0 :                 btrfs_abort_transaction(trans, ret);
    1702           0 :                 goto fail;
    1703             :         }
    1704           0 :         btrfs_release_path(path);
    1705             : 
    1706             :         /*
    1707             :          * pull in the delayed directory update
    1708             :          * and the delayed inode item
    1709             :          * otherwise we corrupt the FS during
    1710             :          * snapshot
    1711             :          */
    1712           0 :         ret = btrfs_run_delayed_items(trans);
    1713           0 :         if (ret) {      /* Transaction aborted */
    1714           0 :                 btrfs_abort_transaction(trans, ret);
    1715           0 :                 goto fail;
    1716             :         }
    1717             : 
    1718           0 :         ret = record_root_in_trans(trans, root, 0);
    1719           0 :         if (ret) {
    1720           0 :                 btrfs_abort_transaction(trans, ret);
    1721           0 :                 goto fail;
    1722             :         }
    1723           0 :         btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
    1724           0 :         memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
    1725           0 :         btrfs_check_and_init_root_item(new_root_item);
    1726             : 
    1727           0 :         root_flags = btrfs_root_flags(new_root_item);
    1728           0 :         if (pending->readonly)
    1729           0 :                 root_flags |= BTRFS_ROOT_SUBVOL_RDONLY;
    1730             :         else
    1731           0 :                 root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY;
    1732           0 :         btrfs_set_root_flags(new_root_item, root_flags);
    1733             : 
    1734           0 :         btrfs_set_root_generation_v2(new_root_item,
    1735             :                         trans->transid);
    1736           0 :         generate_random_guid(new_root_item->uuid);
    1737           0 :         memcpy(new_root_item->parent_uuid, root->root_item.uuid,
    1738             :                         BTRFS_UUID_SIZE);
    1739           0 :         if (!(root_flags & BTRFS_ROOT_SUBVOL_RDONLY)) {
    1740           0 :                 memset(new_root_item->received_uuid, 0,
    1741             :                        sizeof(new_root_item->received_uuid));
    1742           0 :                 memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
    1743           0 :                 memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
    1744           0 :                 btrfs_set_root_stransid(new_root_item, 0);
    1745           0 :                 btrfs_set_root_rtransid(new_root_item, 0);
    1746             :         }
    1747           0 :         btrfs_set_stack_timespec_sec(&new_root_item->otime, cur_time.tv_sec);
    1748           0 :         btrfs_set_stack_timespec_nsec(&new_root_item->otime, cur_time.tv_nsec);
    1749           0 :         btrfs_set_root_otransid(new_root_item, trans->transid);
    1750             : 
    1751           0 :         old = btrfs_lock_root_node(root);
    1752           0 :         ret = btrfs_cow_block(trans, root, old, NULL, 0, &old,
    1753             :                               BTRFS_NESTING_COW);
    1754           0 :         if (ret) {
    1755           0 :                 btrfs_tree_unlock(old);
    1756           0 :                 free_extent_buffer(old);
    1757           0 :                 btrfs_abort_transaction(trans, ret);
    1758           0 :                 goto fail;
    1759             :         }
    1760             : 
    1761           0 :         ret = btrfs_copy_root(trans, root, old, &tmp, objectid);
    1762             :         /* clean up in any case */
    1763           0 :         btrfs_tree_unlock(old);
    1764           0 :         free_extent_buffer(old);
    1765           0 :         if (ret) {
    1766           0 :                 btrfs_abort_transaction(trans, ret);
    1767           0 :                 goto fail;
    1768             :         }
    1769             :         /* see comments in should_cow_block() */
    1770           0 :         set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
    1771           0 :         smp_wmb();
    1772             : 
    1773           0 :         btrfs_set_root_node(new_root_item, tmp);
    1774             :         /* record when the snapshot was created in key.offset */
    1775           0 :         key.offset = trans->transid;
    1776           0 :         ret = btrfs_insert_root(trans, tree_root, &key, new_root_item);
    1777           0 :         btrfs_tree_unlock(tmp);
    1778           0 :         free_extent_buffer(tmp);
    1779           0 :         if (ret) {
    1780           0 :                 btrfs_abort_transaction(trans, ret);
    1781           0 :                 goto fail;
    1782             :         }
    1783             : 
    1784             :         /*
    1785             :          * insert root back/forward references
    1786             :          */
    1787           0 :         ret = btrfs_add_root_ref(trans, objectid,
    1788             :                                  parent_root->root_key.objectid,
    1789             :                                  btrfs_ino(BTRFS_I(parent_inode)), index,
    1790             :                                  &fname.disk_name);
    1791           0 :         if (ret) {
    1792           0 :                 btrfs_abort_transaction(trans, ret);
    1793           0 :                 goto fail;
    1794             :         }
    1795             : 
    1796           0 :         key.offset = (u64)-1;
    1797           0 :         pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev);
    1798           0 :         if (IS_ERR(pending->snap)) {
    1799           0 :                 ret = PTR_ERR(pending->snap);
    1800           0 :                 pending->snap = NULL;
    1801           0 :                 btrfs_abort_transaction(trans, ret);
    1802           0 :                 goto fail;
    1803             :         }
    1804             : 
    1805           0 :         ret = btrfs_reloc_post_snapshot(trans, pending);
    1806           0 :         if (ret) {
    1807           0 :                 btrfs_abort_transaction(trans, ret);
    1808           0 :                 goto fail;
    1809             :         }
    1810             : 
    1811             :         /*
    1812             :          * Do special qgroup accounting for snapshot, as we do some qgroup
    1813             :          * snapshot hack to do fast snapshot.
    1814             :          * To co-operate with that hack, we do hack again.
    1815             :          * Or snapshot will be greatly slowed down by a subtree qgroup rescan
    1816             :          */
    1817           0 :         ret = qgroup_account_snapshot(trans, root, parent_root,
    1818             :                                       pending->inherit, objectid);
    1819           0 :         if (ret < 0)
    1820           0 :                 goto fail;
    1821             : 
    1822           0 :         ret = btrfs_insert_dir_item(trans, &fname.disk_name,
    1823             :                                     BTRFS_I(parent_inode), &key, BTRFS_FT_DIR,
    1824             :                                     index);
    1825             :         /* We have check then name at the beginning, so it is impossible. */
    1826           0 :         BUG_ON(ret == -EEXIST || ret == -EOVERFLOW);
    1827           0 :         if (ret) {
    1828           0 :                 btrfs_abort_transaction(trans, ret);
    1829           0 :                 goto fail;
    1830             :         }
    1831             : 
    1832           0 :         btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size +
    1833           0 :                                                   fname.disk_name.len * 2);
    1834           0 :         parent_inode->i_mtime = current_time(parent_inode);
    1835           0 :         parent_inode->i_ctime = parent_inode->i_mtime;
    1836           0 :         ret = btrfs_update_inode_fallback(trans, parent_root, BTRFS_I(parent_inode));
    1837           0 :         if (ret) {
    1838           0 :                 btrfs_abort_transaction(trans, ret);
    1839           0 :                 goto fail;
    1840             :         }
    1841           0 :         ret = btrfs_uuid_tree_add(trans, new_root_item->uuid,
    1842             :                                   BTRFS_UUID_KEY_SUBVOL,
    1843             :                                   objectid);
    1844           0 :         if (ret) {
    1845           0 :                 btrfs_abort_transaction(trans, ret);
    1846           0 :                 goto fail;
    1847             :         }
    1848           0 :         if (!btrfs_is_empty_uuid(new_root_item->received_uuid)) {
    1849           0 :                 ret = btrfs_uuid_tree_add(trans, new_root_item->received_uuid,
    1850             :                                           BTRFS_UUID_KEY_RECEIVED_SUBVOL,
    1851             :                                           objectid);
    1852           0 :                 if (ret && ret != -EEXIST) {
    1853           0 :                         btrfs_abort_transaction(trans, ret);
    1854           0 :                         goto fail;
    1855             :                 }
    1856             :         }
    1857             : 
    1858           0 : fail:
    1859           0 :         pending->error = ret;
    1860           0 : dir_item_existed:
    1861           0 :         trans->block_rsv = rsv;
    1862           0 :         trans->bytes_reserved = 0;
    1863           0 : clear_skip_qgroup:
    1864           0 :         btrfs_clear_skip_qgroup(trans);
    1865             : free_fname:
    1866             :         fscrypt_free_filename(&fname);
    1867           0 : free_pending:
    1868           0 :         kfree(new_root_item);
    1869           0 :         pending->root_item = NULL;
    1870           0 :         btrfs_free_path(path);
    1871           0 :         pending->path = NULL;
    1872             : 
    1873           0 :         return ret;
    1874             : }
    1875             : 
    1876             : /*
    1877             :  * create all the snapshots we've scheduled for creation
    1878             :  */
    1879           0 : static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans)
    1880             : {
    1881           0 :         struct btrfs_pending_snapshot *pending, *next;
    1882           0 :         struct list_head *head = &trans->transaction->pending_snapshots;
    1883           0 :         int ret = 0;
    1884             : 
    1885           0 :         list_for_each_entry_safe(pending, next, head, list) {
    1886           0 :                 list_del(&pending->list);
    1887           0 :                 ret = create_pending_snapshot(trans, pending);
    1888           0 :                 if (ret)
    1889             :                         break;
    1890             :         }
    1891           0 :         return ret;
    1892             : }
    1893             : 
    1894           0 : static void update_super_roots(struct btrfs_fs_info *fs_info)
    1895             : {
    1896           0 :         struct btrfs_root_item *root_item;
    1897           0 :         struct btrfs_super_block *super;
    1898             : 
    1899           0 :         super = fs_info->super_copy;
    1900             : 
    1901           0 :         root_item = &fs_info->chunk_root->root_item;
    1902           0 :         super->chunk_root = root_item->bytenr;
    1903           0 :         super->chunk_root_generation = root_item->generation;
    1904           0 :         super->chunk_root_level = root_item->level;
    1905             : 
    1906           0 :         root_item = &fs_info->tree_root->root_item;
    1907           0 :         super->root = root_item->bytenr;
    1908           0 :         super->generation = root_item->generation;
    1909           0 :         super->root_level = root_item->level;
    1910           0 :         if (btrfs_test_opt(fs_info, SPACE_CACHE))
    1911           0 :                 super->cache_generation = root_item->generation;
    1912           0 :         else if (test_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags))
    1913           0 :                 super->cache_generation = 0;
    1914           0 :         if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags))
    1915           0 :                 super->uuid_tree_generation = root_item->generation;
    1916           0 : }
    1917             : 
    1918           0 : int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
    1919             : {
    1920           0 :         struct btrfs_transaction *trans;
    1921           0 :         int ret = 0;
    1922             : 
    1923           0 :         spin_lock(&info->trans_lock);
    1924           0 :         trans = info->running_transaction;
    1925           0 :         if (trans)
    1926           0 :                 ret = (trans->state >= TRANS_STATE_COMMIT_START);
    1927           0 :         spin_unlock(&info->trans_lock);
    1928           0 :         return ret;
    1929             : }
    1930             : 
    1931           0 : int btrfs_transaction_blocked(struct btrfs_fs_info *info)
    1932             : {
    1933           0 :         struct btrfs_transaction *trans;
    1934           0 :         int ret = 0;
    1935             : 
    1936           0 :         spin_lock(&info->trans_lock);
    1937           0 :         trans = info->running_transaction;
    1938           0 :         if (trans)
    1939           0 :                 ret = is_transaction_blocked(trans);
    1940           0 :         spin_unlock(&info->trans_lock);
    1941           0 :         return ret;
    1942             : }
    1943             : 
    1944           0 : void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans)
    1945             : {
    1946           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1947           0 :         struct btrfs_transaction *cur_trans;
    1948             : 
    1949             :         /* Kick the transaction kthread. */
    1950           0 :         set_bit(BTRFS_FS_COMMIT_TRANS, &fs_info->flags);
    1951           0 :         wake_up_process(fs_info->transaction_kthread);
    1952             : 
    1953             :         /* take transaction reference */
    1954           0 :         cur_trans = trans->transaction;
    1955           0 :         refcount_inc(&cur_trans->use_count);
    1956             : 
    1957           0 :         btrfs_end_transaction(trans);
    1958             : 
    1959             :         /*
    1960             :          * Wait for the current transaction commit to start and block
    1961             :          * subsequent transaction joins
    1962             :          */
    1963           0 :         btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
    1964           0 :         wait_event(fs_info->transaction_blocked_wait,
    1965             :                    cur_trans->state >= TRANS_STATE_COMMIT_START ||
    1966             :                    TRANS_ABORTED(cur_trans));
    1967           0 :         btrfs_put_transaction(cur_trans);
    1968           0 : }
    1969             : 
    1970           0 : static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
    1971             : {
    1972           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1973           0 :         struct btrfs_transaction *cur_trans = trans->transaction;
    1974             : 
    1975           0 :         WARN_ON(refcount_read(&trans->use_count) > 1);
    1976             : 
    1977           0 :         btrfs_abort_transaction(trans, err);
    1978             : 
    1979           0 :         spin_lock(&fs_info->trans_lock);
    1980             : 
    1981             :         /*
    1982             :          * If the transaction is removed from the list, it means this
    1983             :          * transaction has been committed successfully, so it is impossible
    1984             :          * to call the cleanup function.
    1985             :          */
    1986           0 :         BUG_ON(list_empty(&cur_trans->list));
    1987             : 
    1988           0 :         if (cur_trans == fs_info->running_transaction) {
    1989           0 :                 cur_trans->state = TRANS_STATE_COMMIT_DOING;
    1990           0 :                 spin_unlock(&fs_info->trans_lock);
    1991             : 
    1992             :                 /*
    1993             :                  * The thread has already released the lockdep map as reader
    1994             :                  * already in btrfs_commit_transaction().
    1995             :                  */
    1996           0 :                 btrfs_might_wait_for_event(fs_info, btrfs_trans_num_writers);
    1997           0 :                 wait_event(cur_trans->writer_wait,
    1998             :                            atomic_read(&cur_trans->num_writers) == 1);
    1999             : 
    2000           0 :                 spin_lock(&fs_info->trans_lock);
    2001             :         }
    2002             : 
    2003             :         /*
    2004             :          * Now that we know no one else is still using the transaction we can
    2005             :          * remove the transaction from the list of transactions. This avoids
    2006             :          * the transaction kthread from cleaning up the transaction while some
    2007             :          * other task is still using it, which could result in a use-after-free
    2008             :          * on things like log trees, as it forces the transaction kthread to
    2009             :          * wait for this transaction to be cleaned up by us.
    2010             :          */
    2011           0 :         list_del_init(&cur_trans->list);
    2012             : 
    2013           0 :         spin_unlock(&fs_info->trans_lock);
    2014             : 
    2015           0 :         btrfs_cleanup_one_transaction(trans->transaction, fs_info);
    2016             : 
    2017           0 :         spin_lock(&fs_info->trans_lock);
    2018           0 :         if (cur_trans == fs_info->running_transaction)
    2019           0 :                 fs_info->running_transaction = NULL;
    2020           0 :         spin_unlock(&fs_info->trans_lock);
    2021             : 
    2022           0 :         if (trans->type & __TRANS_FREEZABLE)
    2023           0 :                 sb_end_intwrite(fs_info->sb);
    2024           0 :         btrfs_put_transaction(cur_trans);
    2025           0 :         btrfs_put_transaction(cur_trans);
    2026             : 
    2027           0 :         trace_btrfs_transaction_commit(fs_info);
    2028             : 
    2029           0 :         if (current->journal_info == trans)
    2030           0 :                 current->journal_info = NULL;
    2031             : 
    2032             :         /*
    2033             :          * If relocation is running, we can't cancel scrub because that will
    2034             :          * result in a deadlock. Before relocating a block group, relocation
    2035             :          * pauses scrub, then starts and commits a transaction before unpausing
    2036             :          * scrub. If the transaction commit is being done by the relocation
    2037             :          * task or triggered by another task and the relocation task is waiting
    2038             :          * for the commit, and we end up here due to an error in the commit
    2039             :          * path, then calling btrfs_scrub_cancel() will deadlock, as we are
    2040             :          * asking for scrub to stop while having it asked to be paused higher
    2041             :          * above in relocation code.
    2042             :          */
    2043           0 :         if (!test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags))
    2044           0 :                 btrfs_scrub_cancel(fs_info);
    2045             : 
    2046           0 :         kmem_cache_free(btrfs_trans_handle_cachep, trans);
    2047           0 : }
    2048             : 
    2049             : /*
    2050             :  * Release reserved delayed ref space of all pending block groups of the
    2051             :  * transaction and remove them from the list
    2052             :  */
    2053           0 : static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
    2054             : {
    2055           0 :        struct btrfs_fs_info *fs_info = trans->fs_info;
    2056           0 :        struct btrfs_block_group *block_group, *tmp;
    2057             : 
    2058           0 :        list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
    2059           0 :                btrfs_delayed_refs_rsv_release(fs_info, 1);
    2060           0 :                list_del_init(&block_group->bg_list);
    2061             :        }
    2062           0 : }
    2063             : 
    2064           0 : static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
    2065             : {
    2066             :         /*
    2067             :          * We use try_to_writeback_inodes_sb() here because if we used
    2068             :          * btrfs_start_delalloc_roots we would deadlock with fs freeze.
    2069             :          * Currently are holding the fs freeze lock, if we do an async flush
    2070             :          * we'll do btrfs_join_transaction() and deadlock because we need to
    2071             :          * wait for the fs freeze lock.  Using the direct flushing we benefit
    2072             :          * from already being in a transaction and our join_transaction doesn't
    2073             :          * have to re-take the fs freeze lock.
    2074             :          *
    2075             :          * Note that try_to_writeback_inodes_sb() will only trigger writeback
    2076             :          * if it can read lock sb->s_umount. It will always be able to lock it,
    2077             :          * except when the filesystem is being unmounted or being frozen, but in
    2078             :          * those cases sync_filesystem() is called, which results in calling
    2079             :          * writeback_inodes_sb() while holding a write lock on sb->s_umount.
    2080             :          * Note that we don't call writeback_inodes_sb() directly, because it
    2081             :          * will emit a warning if sb->s_umount is not locked.
    2082             :          */
    2083           0 :         if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
    2084           0 :                 try_to_writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
    2085           0 :         return 0;
    2086             : }
    2087             : 
    2088           0 : static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
    2089             : {
    2090           0 :         if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
    2091           0 :                 btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
    2092           0 : }
    2093             : 
    2094             : /*
    2095             :  * Add a pending snapshot associated with the given transaction handle to the
    2096             :  * respective handle. This must be called after the transaction commit started
    2097             :  * and while holding fs_info->trans_lock.
    2098             :  * This serves to guarantee a caller of btrfs_commit_transaction() that it can
    2099             :  * safely free the pending snapshot pointer in case btrfs_commit_transaction()
    2100             :  * returns an error.
    2101             :  */
    2102           0 : static void add_pending_snapshot(struct btrfs_trans_handle *trans)
    2103             : {
    2104           0 :         struct btrfs_transaction *cur_trans = trans->transaction;
    2105             : 
    2106           0 :         if (!trans->pending_snapshot)
    2107             :                 return;
    2108             : 
    2109           0 :         lockdep_assert_held(&trans->fs_info->trans_lock);
    2110           0 :         ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_START);
    2111             : 
    2112           0 :         list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots);
    2113             : }
    2114             : 
    2115             : static void update_commit_stats(struct btrfs_fs_info *fs_info, ktime_t interval)
    2116             : {
    2117           0 :         fs_info->commit_stats.commit_count++;
    2118           0 :         fs_info->commit_stats.last_commit_dur = interval;
    2119           0 :         fs_info->commit_stats.max_commit_dur =
    2120           0 :                         max_t(u64, fs_info->commit_stats.max_commit_dur, interval);
    2121           0 :         fs_info->commit_stats.total_commit_dur += interval;
    2122             : }
    2123             : 
    2124           0 : int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
    2125             : {
    2126           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    2127           0 :         struct btrfs_transaction *cur_trans = trans->transaction;
    2128           0 :         struct btrfs_transaction *prev_trans = NULL;
    2129           0 :         int ret;
    2130           0 :         ktime_t start_time;
    2131           0 :         ktime_t interval;
    2132             : 
    2133           0 :         ASSERT(refcount_read(&trans->use_count) == 1);
    2134           0 :         btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
    2135             : 
    2136           0 :         clear_bit(BTRFS_FS_NEED_TRANS_COMMIT, &fs_info->flags);
    2137             : 
    2138             :         /* Stop the commit early if ->aborted is set */
    2139           0 :         if (TRANS_ABORTED(cur_trans)) {
    2140           0 :                 ret = cur_trans->aborted;
    2141           0 :                 goto lockdep_trans_commit_start_release;
    2142             :         }
    2143             : 
    2144           0 :         btrfs_trans_release_metadata(trans);
    2145           0 :         trans->block_rsv = NULL;
    2146             : 
    2147             :         /*
    2148             :          * We only want one transaction commit doing the flushing so we do not
    2149             :          * waste a bunch of time on lock contention on the extent root node.
    2150             :          */
    2151           0 :         if (!test_and_set_bit(BTRFS_DELAYED_REFS_FLUSHING,
    2152           0 :                               &cur_trans->delayed_refs.flags)) {
    2153             :                 /*
    2154             :                  * Make a pass through all the delayed refs we have so far.
    2155             :                  * Any running threads may add more while we are here.
    2156             :                  */
    2157           0 :                 ret = btrfs_run_delayed_refs(trans, 0);
    2158           0 :                 if (ret)
    2159           0 :                         goto lockdep_trans_commit_start_release;
    2160             :         }
    2161             : 
    2162           0 :         btrfs_create_pending_block_groups(trans);
    2163             : 
    2164           0 :         if (!test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &cur_trans->flags)) {
    2165           0 :                 int run_it = 0;
    2166             : 
    2167             :                 /* this mutex is also taken before trying to set
    2168             :                  * block groups readonly.  We need to make sure
    2169             :                  * that nobody has set a block group readonly
    2170             :                  * after a extents from that block group have been
    2171             :                  * allocated for cache files.  btrfs_set_block_group_ro
    2172             :                  * will wait for the transaction to commit if it
    2173             :                  * finds BTRFS_TRANS_DIRTY_BG_RUN set.
    2174             :                  *
    2175             :                  * The BTRFS_TRANS_DIRTY_BG_RUN flag is also used to make sure
    2176             :                  * only one process starts all the block group IO.  It wouldn't
    2177             :                  * hurt to have more than one go through, but there's no
    2178             :                  * real advantage to it either.
    2179             :                  */
    2180           0 :                 mutex_lock(&fs_info->ro_block_group_mutex);
    2181           0 :                 if (!test_and_set_bit(BTRFS_TRANS_DIRTY_BG_RUN,
    2182             :                                       &cur_trans->flags))
    2183           0 :                         run_it = 1;
    2184           0 :                 mutex_unlock(&fs_info->ro_block_group_mutex);
    2185             : 
    2186           0 :                 if (run_it) {
    2187           0 :                         ret = btrfs_start_dirty_block_groups(trans);
    2188           0 :                         if (ret)
    2189           0 :                                 goto lockdep_trans_commit_start_release;
    2190             :                 }
    2191             :         }
    2192             : 
    2193           0 :         spin_lock(&fs_info->trans_lock);
    2194           0 :         if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
    2195           0 :                 enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
    2196             : 
    2197           0 :                 add_pending_snapshot(trans);
    2198             : 
    2199           0 :                 spin_unlock(&fs_info->trans_lock);
    2200           0 :                 refcount_inc(&cur_trans->use_count);
    2201             : 
    2202           0 :                 if (trans->in_fsync)
    2203           0 :                         want_state = TRANS_STATE_SUPER_COMMITTED;
    2204             : 
    2205           0 :                 btrfs_trans_state_lockdep_release(fs_info,
    2206             :                                                   BTRFS_LOCKDEP_TRANS_COMMIT_START);
    2207           0 :                 ret = btrfs_end_transaction(trans);
    2208           0 :                 wait_for_commit(cur_trans, want_state);
    2209             : 
    2210           0 :                 if (TRANS_ABORTED(cur_trans))
    2211           0 :                         ret = cur_trans->aborted;
    2212             : 
    2213           0 :                 btrfs_put_transaction(cur_trans);
    2214             : 
    2215           0 :                 return ret;
    2216             :         }
    2217             : 
    2218           0 :         cur_trans->state = TRANS_STATE_COMMIT_START;
    2219           0 :         wake_up(&fs_info->transaction_blocked_wait);
    2220           0 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
    2221             : 
    2222           0 :         if (cur_trans->list.prev != &fs_info->trans_list) {
    2223           0 :                 enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
    2224             : 
    2225           0 :                 if (trans->in_fsync)
    2226           0 :                         want_state = TRANS_STATE_SUPER_COMMITTED;
    2227             : 
    2228           0 :                 prev_trans = list_entry(cur_trans->list.prev,
    2229             :                                         struct btrfs_transaction, list);
    2230           0 :                 if (prev_trans->state < want_state) {
    2231           0 :                         refcount_inc(&prev_trans->use_count);
    2232           0 :                         spin_unlock(&fs_info->trans_lock);
    2233             : 
    2234           0 :                         wait_for_commit(prev_trans, want_state);
    2235             : 
    2236           0 :                         ret = READ_ONCE(prev_trans->aborted);
    2237             : 
    2238           0 :                         btrfs_put_transaction(prev_trans);
    2239           0 :                         if (ret)
    2240           0 :                                 goto lockdep_release;
    2241             :                 } else {
    2242           0 :                         spin_unlock(&fs_info->trans_lock);
    2243             :                 }
    2244             :         } else {
    2245           0 :                 spin_unlock(&fs_info->trans_lock);
    2246             :                 /*
    2247             :                  * The previous transaction was aborted and was already removed
    2248             :                  * from the list of transactions at fs_info->trans_list. So we
    2249             :                  * abort to prevent writing a new superblock that reflects a
    2250             :                  * corrupt state (pointing to trees with unwritten nodes/leafs).
    2251             :                  */
    2252           0 :                 if (BTRFS_FS_ERROR(fs_info)) {
    2253           0 :                         ret = -EROFS;
    2254           0 :                         goto lockdep_release;
    2255             :                 }
    2256             :         }
    2257             : 
    2258             :         /*
    2259             :          * Get the time spent on the work done by the commit thread and not
    2260             :          * the time spent waiting on a previous commit
    2261             :          */
    2262           0 :         start_time = ktime_get_ns();
    2263             : 
    2264           0 :         extwriter_counter_dec(cur_trans, trans->type);
    2265             : 
    2266           0 :         ret = btrfs_start_delalloc_flush(fs_info);
    2267           0 :         if (ret)
    2268           0 :                 goto lockdep_release;
    2269             : 
    2270           0 :         ret = btrfs_run_delayed_items(trans);
    2271           0 :         if (ret)
    2272           0 :                 goto lockdep_release;
    2273             : 
    2274             :         /*
    2275             :          * The thread has started/joined the transaction thus it holds the
    2276             :          * lockdep map as a reader. It has to release it before acquiring the
    2277             :          * lockdep map as a writer.
    2278             :          */
    2279           0 :         btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters);
    2280           0 :         btrfs_might_wait_for_event(fs_info, btrfs_trans_num_extwriters);
    2281           0 :         wait_event(cur_trans->writer_wait,
    2282             :                    extwriter_counter_read(cur_trans) == 0);
    2283             : 
    2284             :         /* some pending stuffs might be added after the previous flush. */
    2285           0 :         ret = btrfs_run_delayed_items(trans);
    2286           0 :         if (ret) {
    2287           0 :                 btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
    2288           0 :                 goto cleanup_transaction;
    2289             :         }
    2290             : 
    2291           0 :         btrfs_wait_delalloc_flush(fs_info);
    2292             : 
    2293             :         /*
    2294             :          * Wait for all ordered extents started by a fast fsync that joined this
    2295             :          * transaction. Otherwise if this transaction commits before the ordered
    2296             :          * extents complete we lose logged data after a power failure.
    2297             :          */
    2298           0 :         btrfs_might_wait_for_event(fs_info, btrfs_trans_pending_ordered);
    2299           0 :         wait_event(cur_trans->pending_wait,
    2300             :                    atomic_read(&cur_trans->pending_ordered) == 0);
    2301             : 
    2302           0 :         btrfs_scrub_pause(fs_info);
    2303             :         /*
    2304             :          * Ok now we need to make sure to block out any other joins while we
    2305             :          * commit the transaction.  We could have started a join before setting
    2306             :          * COMMIT_DOING so make sure to wait for num_writers to == 1 again.
    2307             :          */
    2308           0 :         spin_lock(&fs_info->trans_lock);
    2309           0 :         add_pending_snapshot(trans);
    2310           0 :         cur_trans->state = TRANS_STATE_COMMIT_DOING;
    2311           0 :         spin_unlock(&fs_info->trans_lock);
    2312             : 
    2313             :         /*
    2314             :          * The thread has started/joined the transaction thus it holds the
    2315             :          * lockdep map as a reader. It has to release it before acquiring the
    2316             :          * lockdep map as a writer.
    2317             :          */
    2318           0 :         btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
    2319           0 :         btrfs_might_wait_for_event(fs_info, btrfs_trans_num_writers);
    2320           0 :         wait_event(cur_trans->writer_wait,
    2321             :                    atomic_read(&cur_trans->num_writers) == 1);
    2322             : 
    2323             :         /*
    2324             :          * Make lockdep happy by acquiring the state locks after
    2325             :          * btrfs_trans_num_writers is released. If we acquired the state locks
    2326             :          * before releasing the btrfs_trans_num_writers lock then lockdep would
    2327             :          * complain because we did not follow the reverse order unlocking rule.
    2328             :          */
    2329           0 :         btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED);
    2330           0 :         btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
    2331           0 :         btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
    2332             : 
    2333             :         /*
    2334             :          * We've started the commit, clear the flag in case we were triggered to
    2335             :          * do an async commit but somebody else started before the transaction
    2336             :          * kthread could do the work.
    2337             :          */
    2338           0 :         clear_bit(BTRFS_FS_COMMIT_TRANS, &fs_info->flags);
    2339             : 
    2340           0 :         if (TRANS_ABORTED(cur_trans)) {
    2341           0 :                 ret = cur_trans->aborted;
    2342           0 :                 btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
    2343           0 :                 goto scrub_continue;
    2344             :         }
    2345             :         /*
    2346             :          * the reloc mutex makes sure that we stop
    2347             :          * the balancing code from coming in and moving
    2348             :          * extents around in the middle of the commit
    2349             :          */
    2350           0 :         mutex_lock(&fs_info->reloc_mutex);
    2351             : 
    2352             :         /*
    2353             :          * We needn't worry about the delayed items because we will
    2354             :          * deal with them in create_pending_snapshot(), which is the
    2355             :          * core function of the snapshot creation.
    2356             :          */
    2357           0 :         ret = create_pending_snapshots(trans);
    2358           0 :         if (ret)
    2359           0 :                 goto unlock_reloc;
    2360             : 
    2361             :         /*
    2362             :          * We insert the dir indexes of the snapshots and update the inode
    2363             :          * of the snapshots' parents after the snapshot creation, so there
    2364             :          * are some delayed items which are not dealt with. Now deal with
    2365             :          * them.
    2366             :          *
    2367             :          * We needn't worry that this operation will corrupt the snapshots,
    2368             :          * because all the tree which are snapshoted will be forced to COW
    2369             :          * the nodes and leaves.
    2370             :          */
    2371           0 :         ret = btrfs_run_delayed_items(trans);
    2372           0 :         if (ret)
    2373           0 :                 goto unlock_reloc;
    2374             : 
    2375           0 :         ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
    2376           0 :         if (ret)
    2377           0 :                 goto unlock_reloc;
    2378             : 
    2379             :         /*
    2380             :          * make sure none of the code above managed to slip in a
    2381             :          * delayed item
    2382             :          */
    2383           0 :         btrfs_assert_delayed_root_empty(fs_info);
    2384             : 
    2385           0 :         WARN_ON(cur_trans != trans->transaction);
    2386             : 
    2387           0 :         ret = commit_fs_roots(trans);
    2388           0 :         if (ret)
    2389           0 :                 goto unlock_reloc;
    2390             : 
    2391             :         /* commit_fs_roots gets rid of all the tree log roots, it is now
    2392             :          * safe to free the root of tree log roots
    2393             :          */
    2394           0 :         btrfs_free_log_root_tree(trans, fs_info);
    2395             : 
    2396             :         /*
    2397             :          * Since fs roots are all committed, we can get a quite accurate
    2398             :          * new_roots. So let's do quota accounting.
    2399             :          */
    2400           0 :         ret = btrfs_qgroup_account_extents(trans);
    2401           0 :         if (ret < 0)
    2402           0 :                 goto unlock_reloc;
    2403             : 
    2404           0 :         ret = commit_cowonly_roots(trans);
    2405           0 :         if (ret)
    2406           0 :                 goto unlock_reloc;
    2407             : 
    2408             :         /*
    2409             :          * The tasks which save the space cache and inode cache may also
    2410             :          * update ->aborted, check it.
    2411             :          */
    2412           0 :         if (TRANS_ABORTED(cur_trans)) {
    2413           0 :                 ret = cur_trans->aborted;
    2414           0 :                 goto unlock_reloc;
    2415             :         }
    2416             : 
    2417           0 :         cur_trans = fs_info->running_transaction;
    2418             : 
    2419           0 :         btrfs_set_root_node(&fs_info->tree_root->root_item,
    2420             :                             fs_info->tree_root->node);
    2421           0 :         list_add_tail(&fs_info->tree_root->dirty_list,
    2422             :                       &cur_trans->switch_commits);
    2423             : 
    2424           0 :         btrfs_set_root_node(&fs_info->chunk_root->root_item,
    2425             :                             fs_info->chunk_root->node);
    2426           0 :         list_add_tail(&fs_info->chunk_root->dirty_list,
    2427             :                       &cur_trans->switch_commits);
    2428             : 
    2429           0 :         if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
    2430           0 :                 btrfs_set_root_node(&fs_info->block_group_root->root_item,
    2431             :                                     fs_info->block_group_root->node);
    2432           0 :                 list_add_tail(&fs_info->block_group_root->dirty_list,
    2433             :                               &cur_trans->switch_commits);
    2434             :         }
    2435             : 
    2436           0 :         switch_commit_roots(trans);
    2437             : 
    2438           0 :         ASSERT(list_empty(&cur_trans->dirty_bgs));
    2439           0 :         ASSERT(list_empty(&cur_trans->io_bgs));
    2440           0 :         update_super_roots(fs_info);
    2441             : 
    2442           0 :         btrfs_set_super_log_root(fs_info->super_copy, 0);
    2443           0 :         btrfs_set_super_log_root_level(fs_info->super_copy, 0);
    2444           0 :         memcpy(fs_info->super_for_commit, fs_info->super_copy,
    2445             :                sizeof(*fs_info->super_copy));
    2446             : 
    2447           0 :         btrfs_commit_device_sizes(cur_trans);
    2448             : 
    2449           0 :         clear_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags);
    2450           0 :         clear_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags);
    2451             : 
    2452           0 :         btrfs_trans_release_chunk_metadata(trans);
    2453             : 
    2454             :         /*
    2455             :          * Before changing the transaction state to TRANS_STATE_UNBLOCKED and
    2456             :          * setting fs_info->running_transaction to NULL, lock tree_log_mutex to
    2457             :          * make sure that before we commit our superblock, no other task can
    2458             :          * start a new transaction and commit a log tree before we commit our
    2459             :          * superblock. Anyone trying to commit a log tree locks this mutex before
    2460             :          * writing its superblock.
    2461             :          */
    2462           0 :         mutex_lock(&fs_info->tree_log_mutex);
    2463             : 
    2464           0 :         spin_lock(&fs_info->trans_lock);
    2465           0 :         cur_trans->state = TRANS_STATE_UNBLOCKED;
    2466           0 :         fs_info->running_transaction = NULL;
    2467           0 :         spin_unlock(&fs_info->trans_lock);
    2468           0 :         mutex_unlock(&fs_info->reloc_mutex);
    2469             : 
    2470           0 :         wake_up(&fs_info->transaction_wait);
    2471           0 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
    2472             : 
    2473             :         /* If we have features changed, wake up the cleaner to update sysfs. */
    2474           0 :         if (test_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags) &&
    2475           0 :             fs_info->cleaner_kthread)
    2476           0 :                 wake_up_process(fs_info->cleaner_kthread);
    2477             : 
    2478           0 :         ret = btrfs_write_and_wait_transaction(trans);
    2479           0 :         if (ret) {
    2480           0 :                 btrfs_handle_fs_error(fs_info, ret,
    2481             :                                       "Error while writing out transaction");
    2482           0 :                 mutex_unlock(&fs_info->tree_log_mutex);
    2483           0 :                 goto scrub_continue;
    2484             :         }
    2485             : 
    2486           0 :         ret = write_all_supers(fs_info, 0);
    2487             :         /*
    2488             :          * the super is written, we can safely allow the tree-loggers
    2489             :          * to go about their business
    2490             :          */
    2491           0 :         mutex_unlock(&fs_info->tree_log_mutex);
    2492           0 :         if (ret)
    2493           0 :                 goto scrub_continue;
    2494             : 
    2495             :         /*
    2496             :          * We needn't acquire the lock here because there is no other task
    2497             :          * which can change it.
    2498             :          */
    2499           0 :         cur_trans->state = TRANS_STATE_SUPER_COMMITTED;
    2500           0 :         wake_up(&cur_trans->commit_wait);
    2501           0 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
    2502             : 
    2503           0 :         btrfs_finish_extent_commit(trans);
    2504             : 
    2505           0 :         if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &cur_trans->flags))
    2506           0 :                 btrfs_clear_space_info_full(fs_info);
    2507             : 
    2508           0 :         fs_info->last_trans_committed = cur_trans->transid;
    2509             :         /*
    2510             :          * We needn't acquire the lock here because there is no other task
    2511             :          * which can change it.
    2512             :          */
    2513           0 :         cur_trans->state = TRANS_STATE_COMPLETED;
    2514           0 :         wake_up(&cur_trans->commit_wait);
    2515           0 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED);
    2516             : 
    2517           0 :         spin_lock(&fs_info->trans_lock);
    2518           0 :         list_del_init(&cur_trans->list);
    2519           0 :         spin_unlock(&fs_info->trans_lock);
    2520             : 
    2521           0 :         btrfs_put_transaction(cur_trans);
    2522           0 :         btrfs_put_transaction(cur_trans);
    2523             : 
    2524           0 :         if (trans->type & __TRANS_FREEZABLE)
    2525           0 :                 sb_end_intwrite(fs_info->sb);
    2526             : 
    2527           0 :         trace_btrfs_transaction_commit(fs_info);
    2528             : 
    2529           0 :         interval = ktime_get_ns() - start_time;
    2530             : 
    2531           0 :         btrfs_scrub_continue(fs_info);
    2532             : 
    2533           0 :         if (current->journal_info == trans)
    2534           0 :                 current->journal_info = NULL;
    2535             : 
    2536           0 :         kmem_cache_free(btrfs_trans_handle_cachep, trans);
    2537             : 
    2538           0 :         update_commit_stats(fs_info, interval);
    2539             : 
    2540           0 :         return ret;
    2541             : 
    2542           0 : unlock_reloc:
    2543           0 :         mutex_unlock(&fs_info->reloc_mutex);
    2544           0 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
    2545           0 : scrub_continue:
    2546           0 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
    2547           0 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED);
    2548           0 :         btrfs_scrub_continue(fs_info);
    2549           0 : cleanup_transaction:
    2550           0 :         btrfs_trans_release_metadata(trans);
    2551           0 :         btrfs_cleanup_pending_block_groups(trans);
    2552           0 :         btrfs_trans_release_chunk_metadata(trans);
    2553           0 :         trans->block_rsv = NULL;
    2554           0 :         btrfs_warn(fs_info, "Skipping commit of aborted transaction.");
    2555           0 :         if (current->journal_info == trans)
    2556           0 :                 current->journal_info = NULL;
    2557           0 :         cleanup_transaction(trans, ret);
    2558             : 
    2559           0 :         return ret;
    2560             : 
    2561           0 : lockdep_release:
    2562           0 :         btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters);
    2563           0 :         btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
    2564           0 :         goto cleanup_transaction;
    2565             : 
    2566           0 : lockdep_trans_commit_start_release:
    2567           0 :         btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
    2568           0 :         btrfs_end_transaction(trans);
    2569           0 :         return ret;
    2570             : }
    2571             : 
    2572             : /*
    2573             :  * return < 0 if error
    2574             :  * 0 if there are no more dead_roots at the time of call
    2575             :  * 1 there are more to be processed, call me again
    2576             :  *
    2577             :  * The return value indicates there are certainly more snapshots to delete, but
    2578             :  * if there comes a new one during processing, it may return 0. We don't mind,
    2579             :  * because btrfs_commit_super will poke cleaner thread and it will process it a
    2580             :  * few seconds later.
    2581             :  */
    2582           0 : int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info)
    2583             : {
    2584           0 :         struct btrfs_root *root;
    2585           0 :         int ret;
    2586             : 
    2587           0 :         spin_lock(&fs_info->trans_lock);
    2588           0 :         if (list_empty(&fs_info->dead_roots)) {
    2589           0 :                 spin_unlock(&fs_info->trans_lock);
    2590           0 :                 return 0;
    2591             :         }
    2592           0 :         root = list_first_entry(&fs_info->dead_roots,
    2593             :                         struct btrfs_root, root_list);
    2594           0 :         list_del_init(&root->root_list);
    2595           0 :         spin_unlock(&fs_info->trans_lock);
    2596             : 
    2597           0 :         btrfs_debug(fs_info, "cleaner removing %llu", root->root_key.objectid);
    2598             : 
    2599           0 :         btrfs_kill_all_delayed_nodes(root);
    2600             : 
    2601           0 :         if (btrfs_header_backref_rev(root->node) <
    2602             :                         BTRFS_MIXED_BACKREF_REV)
    2603           0 :                 ret = btrfs_drop_snapshot(root, 0, 0);
    2604             :         else
    2605           0 :                 ret = btrfs_drop_snapshot(root, 1, 0);
    2606             : 
    2607           0 :         btrfs_put_root(root);
    2608           0 :         return (ret < 0) ? 0 : 1;
    2609             : }
    2610             : 
    2611             : /*
    2612             :  * We only mark the transaction aborted and then set the file system read-only.
    2613             :  * This will prevent new transactions from starting or trying to join this
    2614             :  * one.
    2615             :  *
    2616             :  * This means that error recovery at the call site is limited to freeing
    2617             :  * any local memory allocations and passing the error code up without
    2618             :  * further cleanup. The transaction should complete as it normally would
    2619             :  * in the call path but will return -EIO.
    2620             :  *
    2621             :  * We'll complete the cleanup in btrfs_end_transaction and
    2622             :  * btrfs_commit_transaction.
    2623             :  */
    2624           0 : void __cold __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
    2625             :                                       const char *function,
    2626             :                                       unsigned int line, int errno, bool first_hit)
    2627             : {
    2628           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    2629             : 
    2630           0 :         WRITE_ONCE(trans->aborted, errno);
    2631           0 :         WRITE_ONCE(trans->transaction->aborted, errno);
    2632           0 :         if (first_hit && errno == -ENOSPC)
    2633           0 :                 btrfs_dump_space_info_for_trans_abort(fs_info);
    2634             :         /* Wake up anybody who may be waiting on this transaction */
    2635           0 :         wake_up(&fs_info->transaction_wait);
    2636           0 :         wake_up(&fs_info->transaction_blocked_wait);
    2637           0 :         __btrfs_handle_fs_error(fs_info, function, line, errno, NULL);
    2638           0 : }
    2639             : 
    2640           2 : int __init btrfs_transaction_init(void)
    2641             : {
    2642           2 :         btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
    2643             :                         sizeof(struct btrfs_trans_handle), 0,
    2644             :                         SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL);
    2645           2 :         if (!btrfs_trans_handle_cachep)
    2646           0 :                 return -ENOMEM;
    2647             :         return 0;
    2648             : }
    2649             : 
    2650           0 : void __cold btrfs_transaction_exit(void)
    2651             : {
    2652           0 :         kmem_cache_destroy(btrfs_trans_handle_cachep);
    2653           0 : }

Generated by: LCOV version 1.14